Skip to content

Instantly share code, notes, and snippets.

@raymonstah
Created November 30, 2014 23:20
Show Gist options
  • Save raymonstah/c9d27c559a5078dc5f71 to your computer and use it in GitHub Desktop.
Save raymonstah/c9d27c559a5078dc5f71 to your computer and use it in GitHub Desktop.
A reddit imgur ripper. Prevents downloading same files repeatedly if ran multiple times.
#!/usr/local/bin/python3
# Raymond Ho
# November 29, 2014
# Downloads imgur uploads from reddit and saves them in a new directory.
# Rerun every so often to download new pictures without downloading the
# same pictures multiple times. A text file keeps track of what has
# already been downloaded.
import requests # To access Reddit
import urllib.request # To download files
import re # To find RegEx,
import os # To check if directory exists
def rip_from_reddit(subreddit='MechanicalKeyboards', post_limit=100):
"""
Returns a list of unseen pics from specified subreddit.
Updates text file's link.
"""
url = 'http://www.reddit.com/r/' + subreddit + '/.json?' +\
'limit=' + str(post_limit)
authorize = ('User-Agent', 'Raymonds redditrip')
r = requests.get(url, auth=authorize)
# RegExp to find image links.
images = re.findall("http://i.imgur.com/\w+.(?:jpg|gif|png)", r.text)
unseen_pics = [] # These are going to be the pics we download later.
# Updates the text file with new links
with open(file_name, 'a+') as f:
for link in images:
if link not in seen_pics:
unseen_pics.append(link)
f.write(link + '\n')
print('Adding', link)
if not unseen_pics:
print('No new files to download.\
\nCheck subreddit spelling / Increase limit.')
#Create path if it doesn't exist.
PICDIR = 'reddit_' + subreddit.lower() + '/'
if not os.path.exists(PICDIR):
os.makedirs(PICDIR)
# Download everything we haven't seen already.
for pic in unseen_pics:
urllib.request.urlretrieve(pic, PICDIR + pic[pic.rfind('/')+1:])
print('Downloading..', pic)
if __name__ == '__main__':
seen_pics = []
file_name = 'redditlinks.txt'
# Read file into a list to see what we already downloaded
# If file doesn't exist, create it.
try:
with open(file_name, 'r') as f:
for line in f:
seen_pics.append(line.strip())
except FileNotFoundError:
open(file_name, 'w')
rip_from_reddit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment