Created
September 4, 2014 22:48
Revisions
-
rjmackay created this gist
Sep 4, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,51 @@ # Based on http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ # Modified to clean up filename import urllib import urllib2 import json import collections import HTMLParser import time import os import re # Create a parser for HTML entities h = HTMLParser.HTMLParser() # Maximum filename length # Last 4 characters will be .jpg or .png etc max_length = os.statvfs('.').f_namemax - 4 # Target Page twitpic_api = "http://api.twitpic.com/2/users/show.json?username=rjmackay&page=" # Get the data about the target page for page in range(1, 100): print "page " + page twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page))) # Get the info about each image on the page twitpic_images = twitpic_data["images"] for item in twitpic_images: twitpic_id = item['short_id'] twitpic_title = item["message"] # Replace / (which can't be used in a file name) with a similar looking character twitpic_title = twitpic_title.replace('/', u'\u2044') twitpic_title = twitpic_title[:max_length] twitpic_file_type = item["type"] twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) twitpic_file_url = "http://twitpic.com/show/full/"+twitpic_id twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-") twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name) if (twitpic_file_name == ""): twitpic_file_name = twitpic_id twitpic_file_name = twitpic_file_name + "." + twitpic_file_type print twitpic_file_name # Save the file urllib.urlretrieve (twitpic_file_url, twitpic_file_name) # Set the file time os.utime(twitpic_file_name,(twitpic_time, twitpic_time))