Skip to content

Instantly share code, notes, and snippets.

@rjmackay
Created September 4, 2014 22:48

Revisions

  1. rjmackay created this gist Sep 4, 2014.
    51 changes: 51 additions & 0 deletions export.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,51 @@
    # Based on http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
    # Modified to clean up filename

    import urllib
    import urllib2
    import json
    import collections
    import HTMLParser
    import time
    import os
    import re

    # Create a parser for HTML entities
    h = HTMLParser.HTMLParser()

    # Maximum filename length
    # Last 4 characters will be .jpg or .png etc
    max_length = os.statvfs('.').f_namemax - 4

    # Target Page
    twitpic_api = "http://api.twitpic.com/2/users/show.json?username=rjmackay&page="

    # Get the data about the target page
    for page in range(1, 100):
    print "page " + page
    twitpic_data = json.load(urllib2.urlopen(twitpic_api + str(page)))

    # Get the info about each image on the page
    twitpic_images = twitpic_data["images"]

    for item in twitpic_images:
    twitpic_id = item['short_id']
    twitpic_title = item["message"]
    # Replace / (which can't be used in a file name) with a similar looking character
    twitpic_title = twitpic_title.replace('/', u'\u2044')
    twitpic_title = twitpic_title[:max_length]
    twitpic_file_type = item["type"]
    twitpic_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
    twitpic_file_url = "http://twitpic.com/show/full/"+twitpic_id

    twitpic_file_name = h.unescape(twitpic_title).lower().replace(" ", "-")
    twitpic_file_name = re.sub(r'[^A-Za-z-]','', twitpic_file_name)
    if (twitpic_file_name == ""):
    twitpic_file_name = twitpic_id
    twitpic_file_name = twitpic_file_name + "." + twitpic_file_type
    print twitpic_file_name

    # Save the file
    urllib.urlretrieve (twitpic_file_url, twitpic_file_name)
    # Set the file time
    os.utime(twitpic_file_name,(twitpic_time, twitpic_time))