-
-
Save kennethreitz/4997e2b191afd941b54f8183d4c17e36 to your computer and use it in GitHub Desktop.
dumb reddit scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import praw | |
reddit = praw.Reddit( | |
client_id='', | |
client_secret='', | |
password='', | |
user_agent='', | |
username='' | |
) | |
def get_comment_body(): | |
threads = get_subreddit_id(sub) | |
for thread in threads: | |
submission = reddit.submission(id=thread) | |
submission.comments.replace_more(limit=0) | |
for comment in submission.comments.list(): | |
with open('subreddits.txt', 'a') as outfile: | |
outfile.write(comment.body) | |
def get_subreddit_id(sub): | |
submissions = [] | |
for submit in reddit.subreddit(sub).hot(limit=25): | |
submissions.append(submit.id) | |
return submissions | |
def get_user_comments(redditor, tfile=False): | |
for comment in reddit.redditor(redditor).comments.new(limit=None): | |
if tfile: | |
with open('usercomments.txt', 'a') as outfile: | |
outfile.write(comment.body) | |
else: | |
data = {comment.id: comment.body} | |
with open('comment.json', 'a') as outfile: | |
json.dump(data, outfile) | |
def subreddit_to_json(): | |
threads = get_subreddit_id(sub) | |
for thread in threads: | |
submission = reddit.submission(id=thread) | |
submission.comments.replace_more(limit=0) | |
comments = submission.comments.list() | |
data = [{comment.id: comment.body} for comment in comments] | |
with open('data.json', 'a') as outfile: | |
json.dump(data, outfile) | |
sub = 'learnpython' | |
name = 'danceprometheus' | |
get_comment_body() | |
get_user_comments(name) | |
subreddit_to_json() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment