-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_reaction.py
71 lines (62 loc) · 2.44 KB
/
get_reaction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: UTF-8 -*-
from time import sleep
from random import randint
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import pickle
import os
import glob
import re
import pandas as pd
firefox_options = Options()
firefox_options.add_argument('--headless')
driver = webdriver.Firefox(executable_path="../driver/geckodriver", options=firefox_options)
driver.get("https://m.facebook.com/")
account = "your email account"
pwd = "your password"
# type in account and pwd
driver.find_element_by_id("m_login_email").send_keys(account)
driver.find_element_by_css_selector(".bl.bm.bo.bp").send_keys(pwd)
driver.find_element_by_css_selector(".n.t.o.bz.br.ca").click()
sleep(3)
def Get_Reaction(driver, page, post_id):
base_url = "https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier="
driver.get(base_url + post_id)
sleep(randint(1,2))
reaction = [i.get_attribute('href') for i in driver.find_elements_by_class_name('ba')]
reaction_types = ['Total', '1', '2', '3', '4', '7', '8']
react_dict = dict((el, 0) for el in reaction_types)
for r in reaction:
r_count = re.search(r"^.*count\=(.*)\&.*$", r).group(1)
if re.search(r"^.*type\=(.*)\&total.*$", r) is not None:
r_type = re.search(r"^.*type\=(.*)\&total.*$", r).group(1)
react_dict[r_type] = r_count
else:
react_dict['Total'] = r_count
print('posts ' + page + '/' + post_id + ' is done!')
return react_dict
# get files dir
csv_list = glob.glob("postid_files/*.csv")
for f in csv_list:
# read posts data from csv file
post_df = pd.read_csv(f, encoding='utf-8-sig')
post_header = list(post_df)
post_list = post_df.values.tolist()
page = re.match(r"^.*\/(.*)\_.*$", f).group(1)
reaction_list = []
for post in post_list:
print('***************post***************')
post_id = str(post[0])
react_dict = Get_Reaction(driver, page, post_id)
reaction_list.append(react_dict)
print(react_dict)
sleep(randint(1,3))
# change lists of reaction dict into dataframe and rename column name.
new_header = ['Like', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Total']
df = pd.DataFrame(reaction_list)
df.columns = new_header
df['Post_Id'] = post_df['Post_Id']
cols = ['Post_Id'] + new_header
df = df[cols]
df.to_csv('reaction_files/' + page +'_reaction.csv', encoding='utf-8-sig', index=False)
driver.quit()