-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreddit_comment_fetch.py
40 lines (36 loc) · 1.44 KB
/
reddit_comment_fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import praw
import pandas as pd
import time
reddit = praw.Reddit(
client_id='<YOUR CLIENT ID>',
client_secret='<YOUR CLIENT SECRET>',
user_agent='u/<USER ID> <VERSION> <BOT NAME>',
)
def save_comments(data, filename):
comments = pd.DataFrame(data, columns=['comments'])
comments.to_csv(f'{filename}_comments.csv')
def traverse_data(posts, filename):
count = 0
all_comments = []
for post in posts:
try:
submission = reddit.submission(id=post.id)
submission.comments.replace_more(limit=None)
comments = submission.comments.list()
sorted_comments = sorted(comments, key=lambda comment: comment.score, reverse=True)
for comment in sorted_comments[2:32]:
all_comments.append(comment.body)
if count % 100 == 0:
save_comments(all_comments, filename)
print(f'Saved {len(all_comments)} comments for {filename}')
count += 1
time.sleep(1) # add a delay to avoid hitting Reddit's API rate limit
except Exception as e:
print(f'Error: {e}')
continue
if __name__ == '__main__':
subreddits = ['AskTeenGirls', 'AskTeenBoys', 'youngadults', 'college', 'LifeAfterSchool', 'AskWomenOver30', 'AskMenOver30']
for sub in subreddits:
subreddit = reddit.subreddit(sub)
posts = subreddit.top(limit=100)
traverse_data(posts, sub)