Created
January 21, 2017 14:12
-
-
Save jassey/df9ad5ef01d16c02d68d9a8c17c75b73 to your computer and use it in GitHub Desktop.
twitter csv info to json and get more detail from text message
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# twitter csv process | |
# write by @jiyang_viz | |
# | |
# require: | |
# https://github.com/edburnett/twitter-text-python | |
# | |
# download csv file from: | |
# https://github.com/bpb27/political_twitter_archive/tree/master/realdonaldtrump | |
# | |
import json | |
import csv | |
from ttp import ttp | |
from dateutil import parser as date_parser | |
# read csv to Dict | |
with open('realdonaldtrump.csv', 'r') as f: | |
reader = csv.DictReader(f, delimiter = ',') | |
data = list(reader) | |
# write to json file (same fields as csv) | |
with open('realdonaldtrump.json', 'w') as f: | |
for item in data: | |
f.write(json.dumps(item) + '\n') | |
# get more info from text message | |
parser = ttp.Parser() | |
for item in data: | |
result = parser.parse(item['text']) | |
item['tags'] = result.tags | |
item['users'] = result.users | |
item['reply'] = result.reply | |
item['tweet_time'] = str(date_parser.parse(item['created_at'])) | |
# write to json file (more fields) | |
with open('realdonaldtrump_more.json', 'w') as f: | |
for item in data: | |
f.write(json.dumps(item) + '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example:
{
"tags": [
"ThankYouTour2016",
"MAGA"
],
"is_retweet": false,
"favorite_count": 15424,
"id_str": "806583438748815361",
"text": "Join me tomorrow in Des Moines, Iowa with Vice President-Elect @mike_pence - at 7:00pm!\n#ThankYouTour2016 #MAGA… https://t.co/Geq6sT70IT",
"reply": null,
"tweet_time": "2016-12-07 19:37:32+00:00",
"users": [
"mike_pence"
],
"retweet_count": 3877,
"in_reply_to_screen_name": null,
"source": "Twitter for iPhone",
"created_at": "Wed Dec 07 19:37:32 +0000 2016"
}