-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_update_handler.py
93 lines (54 loc) · 2.24 KB
/
data_update_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import json
from reddit_scraper import SubRedditScraper
from DB import DB
from prepare_data_for_DB import get_dictionary_from_comment_json, get_dictionary_from_user_json
class DataUpdateHandler:
def __init__(self, subredditName):
# create a reddit scraper class
reddit_scraper = SubRedditScraper(subredditName)
self.reddit_scraper = reddit_scraper
self.DB = DB()
def update_user(self, username):
user_data = self.reddit_scraper.get_users_data(username)
try:
user_data = user_data['data']
except:
return
user_dict, user_dict_other = get_dictionary_from_user_json(user_data)
# todo: if 'is_suspended' in users_json.keys():
def get_last_valid_post_id(self,limit=2):
before_last_id = self.DB.get_last_post_id(limit)
if before_last_id is not None:
before_last_id = before_last_id[limit-1][0]
if self.reddit_scraper.get_new_posts(before_last_id).json['data']['children']:
return before_last_id
else:
before_last_id = get_last_valid_post_id(limit+1)
return before_last_id
def update_posts(self, last_post_id):
new_posts = self.reddit_scraper.get_new_posts(last_post_id)
# we should check if new-posts exists maybe the post related to last_post_id has been deleted
if last_post_id is not None:
if not new_posts.json()['data']['children']:
last_post_id_candidate = self.get_last_valid_post_id()
if last_post_id_candidate is not None:
last_post_id = last_post_id_candidate
new_posts = self.reddit_scraper.get_new_posts(last_post_id)
self.DB.insert_posts_to_db(new_posts)
#self.update_comments(post_dict['link'], 1)
def update_comments(self, post_url, post_id):
new_comments = self.reddit_scraper.get_post_comments(post_url)
comment_dicts, user_list = get_dictionary_from_comment_json(new_comments, post_id, post_id ,1, [], [])
#for comment_dict in comment_dicts:
# self.DB.inset_query_with_dict(comment_dict, 'comments')
for username in user_list:
self.update_user(username)
def start_post_update(self, firsttime):
if not firsttime:
last_post_id = self.DB.get_last_post_id()
if last_post_id is not None:
last_post_id = last_post_id[0][0]
if last_post_id is not None:
update_posts(last_post_id)
else:
self.update_posts(None)