Last active
May 12, 2020 17:35
-
-
Save south1907/d9358a91cbd1c3aab33655f13d35ca25 to your computer and use it in GitHub Desktop.
Get comment facebook plugin of one film in Phimmoi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import time | |
start_time = time.time() | |
headers = { | |
'cookie': 'fr=0shZ5eUbOjaYtgLs0..BeuWpJ...1.0.BeuWpJ.' | |
} | |
limit = 20 | |
def get_id_film(url_film): | |
params = {'href': url_film} | |
plugin_comment_root = 'https://www.facebook.com/plugins/feedback.php' | |
# request với param href = url_fillm | |
r = requests.get(plugin_comment_root, params=params) | |
resp = r.text | |
# tìm index targetFBID | |
start_index = resp.find('"targetFBID":"') + 14 | |
# tìm dấu nháy tiếp theo | |
end_index = resp.find('"', start_index) | |
# crop :) | |
id_film_fb = resp[start_index:end_index] | |
return id_film_fb | |
def get_page(film_id, after_cursor = ''): | |
data = { | |
'after_cursor': after_cursor, | |
'limit': limit, | |
'__a': '1' | |
} | |
url = 'https://www.facebook.com/plugins/comments/async/'+ film_id +'/pager/reverse_time/' | |
r = requests.post(url, headers=headers, data=data) | |
response = r.text[9:] | |
res_obj = json.loads(response) | |
list_comment = res_obj['payload']['idMap'] | |
r = [] | |
film_name = '' | |
for key in list_comment: | |
user_id = '' | |
user_name = '' | |
user_uri = '' | |
item = list_comment[key] | |
# 3 loại | |
if item['type'] == 'user': | |
user_id = item['id'] | |
user_name = item['name'] | |
user_uri = item['uri'] | |
if item['type'] == 'ogobject': | |
if film_name == '': | |
film_name = item['name'] | |
film_uri = item['uri'] | |
if item['type'] == 'comment': | |
comment_user = item['authorID'] | |
comment_content = item['body']['text'] | |
comment_timestamp = item['timestamp'] | |
temp = { | |
'comment_user': comment_user, | |
'comment_content': comment_content, | |
'comment_timestamp': comment_timestamp, | |
'film_id': film_id | |
} | |
r.append(temp) | |
return { | |
'data': r, | |
'next': res_obj['payload']['afterCursor'], | |
'film_name': film_name | |
} | |
def get_all_of_film(url_film): | |
results = [] | |
after_cursor = '' | |
film_id = get_id_film(url_film) | |
while 1: | |
print('after_cursor: ' + after_cursor) | |
res = get_page(film_id, after_cursor) | |
# nếu không có comment nào nữa thì thoát | |
if len(res['data']) == 0: | |
break | |
# nếu không thì + vào results và request next page dựa vào after_cursor | |
results += res['data'] | |
after_cursor = res['next'] | |
return { | |
'data': results, | |
'film_id': film_id | |
} | |
url_film = 'http://www.phimmoi.net/phim/spongebob-bot-bien-dao-tau-9881/' | |
all_data = get_all_of_film(url_film) | |
with open(all_data['film_id'] + '.json', 'w') as outfile: | |
json.dump(all_data['data'], outfile, indent=4, ensure_ascii=False) | |
end_time = time.time() | |
total_time = end_time - start_time | |
# print(total_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment