Created
December 24, 2012 21:45
-
-
Save tdsmith/4370781 to your computer and use it in GitHub Desktop.
A little script to dump your Twitter .zip archive into an Evernote notebook on OS X. For, you know, cloudiness. Loads tweets at about one per second, so don't expect it to be super-zippy. pip install appscript if you haven't, yet, which will give you aem.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json, appscript, aem, sys, os, glob, datetime | |
def usage(called_name): | |
print 'Usage: %s <path to unzipped twitter archive> <name of Evernote notebook>' % called_name | |
print 'e.g., %s tweets/ "My Tweets"' % called_name | |
def main(tweet_path, notebook_name): | |
if not os.path.isdir(tweet_path): | |
raise Exception, "%s: not a directory" % tweet_path | |
try: | |
en = appscript.app('Evernote') | |
except aem.findapp.ApplicationNotFoundError, e: | |
print 'Are you sure Evernote is installed? :(' | |
raise e | |
if notebook_name in [nb.name() for nb in en.notebooks.get()]: | |
raise Exception, 'Notebook %s already exists' % notebook_name | |
filespec = os.path.join(tweet_path, 'data/js/tweets/*js') | |
twitter_files = glob.glob(filespec) | |
if not twitter_files: | |
raise Exception, 'Could not find any files matching %s' % filespec | |
nb = en.create_notebook(notebook_name) | |
for filename in twitter_files: | |
f = open(filename, 'rb') | |
buf = f.readlines()[1:] | |
f.close() | |
try: | |
archive = json.loads(''.join(buf)) | |
except Exception, e: | |
print 'Uh oh! Hit a rough patch in %s' % filename | |
raise e | |
for tweet in archive: | |
datelist = tweet['created_at'].split(' ') | |
datestring = ' '.join(datelist[:4] + datelist[-1:]) # erase timezone info | |
date = datetime.datetime.strptime(datestring, '%a %b %d %H:%M:%S %Y') | |
en.create_note(with_text = json.dumps(tweet), title=tweet['text'], notebook=nb, created=date) | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
usage(sys.argv[0]) | |
sys.exit(1) | |
try: | |
main(sys.argv[1], sys.argv[2]) | |
except Exception, e: | |
usage(sys.argv[0]) | |
print e | |
sys.exit(1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Tim and thanks for sharing this snippet,
I keep on running into the following error when trying to import my archive:
time data '2013-02-28 19:38:05 +0000 +0000' does not match format '%a %b %d %H:%M:%S %Y'
Here's the data of the troublesome tweet:
This is not the first tweet, so I guess prior tweet work well and I don't see any difference in the date string :(
Could you help me out here?