Created
April 8, 2017 07:33
-
-
Save ShivangiM/626e74cc0190cf467f35c0eec7cbd759 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"How stress controls hemoglobin levels in blood https://t.co/D4ffkErZ4b\n", | |
"Trump will very likely fire either his grandchildren's father, or Steve Bannon. Who will it be?\n", | |
"Hospitals put your data at risk, study finds https://t.co/l4Q0AFz4he https://t.co/GChWceFnDw\n", | |
".@Boeing and ULA demo their Emergency Egress System for crewed space launches https://t.co/V0XwdPNpQK https://t.co/stGN6yfQjB\n", | |
"Engrams and circuits crucial for systems consolidation of a memory | If you have money to access Science ... https://t.co/Q4CUt8AyLc\n", | |
"A NASA infrared look at the Southern Indian Ocean’s 15th tropical cyclone https://t.co/uqXIWmVlki https://t.co/DYgUC5JaNB\n", | |
"Weekly Roundup: Apple to reset the Mac Pro, Tesla most valuable U.S. automaker https://t.co/O8XissmJvI\n", | |
"Instead of doubling its data center footprint, Google built its own computer chip for running deep neural networks https://t.co/oJyjVdXzik\n", | |
"RT @tejasdkulkarni: Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowl…\n", | |
"RT @CMU_Robotics: RI Seminar: Sergey Levine : Deep Robotic Learning: https://t.co/a0A2tlesfP via @YouTube\n" | |
] | |
} | |
], | |
"source": [ | |
"import tweepy\n", | |
"from tweepy import OAuthHandler\n", | |
"\n", | |
"#The Twitter API credentials\n", | |
"\n", | |
"consumer_key = ''\n", | |
"consumer_secret = ''\n", | |
"access_token = ''\n", | |
"access_secret = ''\n", | |
"\n", | |
" \n", | |
"auth = OAuthHandler(consumer_key, consumer_secret)\n", | |
"auth.set_access_token(access_token, access_secret)\n", | |
" \n", | |
"api = tweepy.API(auth)\n", | |
"\n", | |
"for status in tweepy.Cursor(api.home_timeline).items(10):\n", | |
" # Process a single status\n", | |
" print(status.text) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'created_at': 'Sat Apr 08 07:30:28 +0000 2017', 'id': 850611788693549056, 'id_str': '850611788693549056', 'text': \"#Sirens used for weather emergencies in #Dallas are going off. It's a malfunction, no emergency. Crews working to fix. Don't call 911.\", 'truncated': False, 'entities': {'hashtags': [{'text': 'Sirens', 'indices': [0, 7]}, {'text': 'Dallas', 'indices': [40, 47]}], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 824126001936474113, 'id_str': '824126001936474113', 'name': 'Alternative NOAA', 'screen_name': 'altNOAA', 'location': 'Silver Spring, MD', 'description': 'The Unofficial \"Resistance\" team of the NOAA. Account not tax payer subsidized. The NOAA studies the oceans, and the atmosphere to understand our planet. #MASA', 'url': 'https://t.co/DrD566fhpI', 'entities': {'url': {'urls': [{'url': 'https://t.co/DrD566fhpI', 'expanded_url': 'http://noaa.gov', 'display_url': 'noaa.gov', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 149265, 'friends_count': 250, 'listed_count': 1775, 'created_at': 'Wed Jan 25 05:25:25 +0000 2017', 'favourites_count': 785, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2247, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/824126001936474113/1485456978', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:26:37 +0000 2017', 'id': 850610820832776196, 'id_str': '850610820832776196', 'text': 'How stress controls hemoglobin levels in blood https://t.co/D4ffkErZ4b', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/D4ffkErZ4b', 'expanded_url': 'https://scienmag.com/?p=1527016', 'display_url': 'scienmag.com/?p=1527016', 'indices': [47, 70]}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 2, 'favorite_count': 2, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:26:18 +0000 2017', 'id': 850610738863497217, 'id_str': '850610738863497217', 'text': \"Trump will very likely fire either his grandchildren's father, or Steve Bannon. Who will it be?\", 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 824126001936474113, 'id_str': '824126001936474113', 'name': 'Alternative NOAA', 'screen_name': 'altNOAA', 'location': 'Silver Spring, MD', 'description': 'The Unofficial \"Resistance\" team of the NOAA. Account not tax payer subsidized. The NOAA studies the oceans, and the atmosphere to understand our planet. #MASA', 'url': 'https://t.co/DrD566fhpI', 'entities': {'url': {'urls': [{'url': 'https://t.co/DrD566fhpI', 'expanded_url': 'http://noaa.gov', 'display_url': 'noaa.gov', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 149265, 'friends_count': 250, 'listed_count': 1775, 'created_at': 'Wed Jan 25 05:25:25 +0000 2017', 'favourites_count': 785, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2247, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/824126001936474113/1485456978', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 2, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:23:44 +0000 2017', 'id': 850610094379331586, 'id_str': '850610094379331586', 'text': 'Hospitals put your data at risk, study finds https://t.co/l4Q0AFz4he https://t.co/GChWceFnDw', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/l4Q0AFz4he', 'expanded_url': 'https://scienmag.com/?p=1527733', 'display_url': 'scienmag.com/?p=1527733', 'indices': [45, 68]}], 'media': [{'id': 850610091539779584, 'id_str': '850610091539779584', 'indices': [69, 92], 'media_url': 'http://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'url': 'https://t.co/GChWceFnDw', 'display_url': 'pic.twitter.com/GChWceFnDw', 'expanded_url': 'https://twitter.com/scienmag/status/850610094379331586/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 999, 'resize': 'fit'}, 'small': {'w': 490, 'h': 680, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'large': {'w': 720, 'h': 999, 'resize': 'fit'}}}]}, 'extended_entities': {'media': [{'id': 850610091539779584, 'id_str': '850610091539779584', 'indices': [69, 92], 'media_url': 'http://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'url': 'https://t.co/GChWceFnDw', 'display_url': 'pic.twitter.com/GChWceFnDw', 'expanded_url': 'https://twitter.com/scienmag/status/850610094379331586/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 999, 'resize': 'fit'}, 'small': {'w': 490, 'h': 680, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'large': {'w': 720, 'h': 999, 'resize': 'fit'}}}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:15:52 +0000 2017', 'id': 850608112096509952, 'id_str': '850608112096509952', 'text': '.@Boeing and ULA demo their Emergency Egress System for crewed space launches https://t.co/V0XwdPNpQK https://t.co/stGN6yfQjB', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'Boeing', 'name': 'The Boeing Company', 'id': 25103967, 'id_str': '25103967', 'indices': [1, 8]}], 'urls': [{'url': 'https://t.co/V0XwdPNpQK', 'expanded_url': 'http://tcrn.ch/2nxQ5S1', 'display_url': 'tcrn.ch/2nxQ5S1', 'indices': [78, 101]}], 'media': [{'id': 850607935507972096, 'id_str': '850607935507972096', 'indices': [102, 125], 'media_url': 'http://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'url': 'https://t.co/stGN6yfQjB', 'display_url': 'pic.twitter.com/stGN6yfQjB', 'expanded_url': 'https://twitter.com/TechCrunch/status/850608112096509952/video/1', 'type': 'photo', 'sizes': {'small': {'w': 680, 'h': 383, 'resize': 'fit'}, 'medium': {'w': 1200, 'h': 675, 'resize': 'fit'}, 'large': {'w': 1280, 'h': 720, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}}}]}, 'extended_entities': {'media': [{'id': 850607935507972096, 'id_str': '850607935507972096', 'indices': [102, 125], 'media_url': 'http://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'url': 'https://t.co/stGN6yfQjB', 'display_url': 'pic.twitter.com/stGN6yfQjB', 'expanded_url': 'https://twitter.com/TechCrunch/status/850608112096509952/video/1', 'type': 'video', 'sizes': {'small': {'w': 680, 'h': 383, 'resize': 'fit'}, 'medium': {'w': 1200, 'h': 675, 'resize': 'fit'}, 'large': {'w': 1280, 'h': 720, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}}, 'video_info': {'aspect_ratio': [16, 9], 'duration_millis': 43377, 'variants': [{'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/640x360/LM3uDnS4VK0_AXpQ.mp4'}, {'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/pl/qbcsnyEVjyxpCvF0.m3u8'}, {'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/1280x720/MfsjvH_gIjdnTA6A.mp4'}, {'bitrate': 320000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/320x180/6b7lqyx2aE-0R_WA.mp4'}]}, 'additional_media_info': {'title': 'ULA demo its Emergency Egress System', 'description': 'Follow @TechCrunch', 'call_to_actions': {'visit_site': {'url': 'http://www.techcrunch.com/video'}}, 'embeddable': True, 'monetizable': True}}]}, 'source': '<a href=\"http://snappytv.com\" rel=\"nofollow\">SnappyTV.com</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 816653, 'id_str': '816653', 'name': 'TechCrunch', 'screen_name': 'TechCrunch', 'location': 'San Francisco, CA', 'description': 'Breaking technology news, analysis, and opinions from TechCrunch. The number one guide for all things tech. Got a tip? Let us know [email protected]', 'url': 'https://t.co/b5Oyx12qGG', 'entities': {'url': {'urls': [{'url': 'https://t.co/b5Oyx12qGG', 'expanded_url': 'http://techcrunch.com', 'display_url': 'techcrunch.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8800086, 'friends_count': 898, 'listed_count': 108905, 'created_at': 'Wed Mar 07 01:27:09 +0000 2007', 'favourites_count': 1906, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 153517, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '149500', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/816653/1490894597', 'profile_link_color': '097000', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDFFCC', 'profile_text_color': '222222', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 37, 'favorite_count': 10, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:15:02 +0000 2017', 'id': 850607904730296320, 'id_str': '850607904730296320', 'text': 'Engrams and circuits crucial for systems consolidation of a memory | If you have money to access Science ... https://t.co/Q4CUt8AyLc', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/Q4CUt8AyLc', 'expanded_url': 'http://science.sciencemag.org/content/356/6333/73', 'display_url': 'science.sciencemag.org/content/356/63…', 'indices': [109, 132]}]}, 'source': '<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 29843511, 'id_str': '29843511', 'name': 'Nando de Freitas', 'screen_name': 'NandoDF', 'location': 'London, England', 'description': 'Researching intelligence to understand what we are and to find ways to harness it wisely.', 'url': 'https://t.co/KJYJNVKZE9', 'entities': {'url': {'urls': [{'url': 'https://t.co/KJYJNVKZE9', 'expanded_url': 'http://www.cs.ox.ac.uk/people/nando.defreitas/', 'display_url': 'cs.ox.ac.uk/people/nando.d…', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 22713, 'friends_count': 186, 'listed_count': 623, 'created_at': 'Wed Apr 08 22:41:09 +0000 2009', 'favourites_count': 2265, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2683, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '022330', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/29843511/1457616654', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'A8C7F7', 'profile_sidebar_fill_color': 'C0DFEC', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 1, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:12:06 +0000 2017', 'id': 850607166495051776, 'id_str': '850607166495051776', 'text': 'A NASA infrared look at the Southern Indian Ocean’s 15th tropical cyclone https://t.co/uqXIWmVlki https://t.co/DYgUC5JaNB', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/uqXIWmVlki', 'expanded_url': 'https://scienmag.com/?p=1527728', 'display_url': 'scienmag.com/?p=1527728', 'indices': [74, 97]}], 'media': [{'id': 850607163332603905, 'id_str': '850607163332603905', 'indices': [98, 121], 'media_url': 'http://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'url': 'https://t.co/DYgUC5JaNB', 'display_url': 'pic.twitter.com/DYgUC5JaNB', 'expanded_url': 'https://twitter.com/scienmag/status/850607166495051776/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 556, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'small': {'w': 680, 'h': 525, 'resize': 'fit'}, 'large': {'w': 720, 'h': 556, 'resize': 'fit'}}}]}, 'extended_entities': {'media': [{'id': 850607163332603905, 'id_str': '850607163332603905', 'indices': [98, 121], 'media_url': 'http://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'url': 'https://t.co/DYgUC5JaNB', 'display_url': 'pic.twitter.com/DYgUC5JaNB', 'expanded_url': 'https://twitter.com/scienmag/status/850607166495051776/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 556, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'small': {'w': 680, 'h': 525, 'resize': 'fit'}, 'large': {'w': 720, 'h': 556, 'resize': 'fit'}}}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 6, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:12:03 +0000 2017', 'id': 850607151462711297, 'id_str': '850607151462711297', 'text': 'Weekly Roundup: Apple to reset the Mac Pro, Tesla most valuable U.S. automaker https://t.co/O8XissmJvI', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/O8XissmJvI', 'expanded_url': 'http://tcrn.ch/2nTd2jA', 'display_url': 'tcrn.ch/2nTd2jA', 'indices': [79, 102]}]}, 'source': '<a href=\"http://www.socialflow.com\" rel=\"nofollow\">SocialFlow</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 816653, 'id_str': '816653', 'name': 'TechCrunch', 'screen_name': 'TechCrunch', 'location': 'San Francisco, CA', 'description': 'Breaking technology news, analysis, and opinions from TechCrunch. The number one guide for all things tech. Got a tip? Let us know [email protected]', 'url': 'https://t.co/b5Oyx12qGG', 'entities': {'url': {'urls': [{'url': 'https://t.co/b5Oyx12qGG', 'expanded_url': 'http://techcrunch.com', 'display_url': 'techcrunch.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8800086, 'friends_count': 898, 'listed_count': 108905, 'created_at': 'Wed Mar 07 01:27:09 +0000 2007', 'favourites_count': 1906, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 153517, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '149500', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/816653/1490894597', 'profile_link_color': '097000', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDFFCC', 'profile_text_color': '222222', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 15, 'favorite_count': 13, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:10:34 +0000 2017', 'id': 850606780954628096, 'id_str': '850606780954628096', 'text': 'Instead of doubling its data center footprint, Google built its own computer chip for running deep neural networks https://t.co/oJyjVdXzik', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/oJyjVdXzik', 'expanded_url': 'http://bit.ly/2nTZVil', 'display_url': 'bit.ly/2nTZVil', 'indices': [115, 138]}]}, 'source': '<a href=\"http://www.socialflow.com\" rel=\"nofollow\">SocialFlow</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1344951, 'id_str': '1344951', 'name': 'WIRED', 'screen_name': 'WIRED', 'location': 'San Francisco/New York', 'description': 'WIRED is where tomorrow is realized.', 'url': 'http://t.co/AbRkBRjcYo', 'entities': {'url': {'urls': [{'url': 'http://t.co/AbRkBRjcYo', 'expanded_url': 'http://WIRED.com', 'display_url': 'WIRED.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8525967, 'friends_count': 278, 'listed_count': 89501, 'created_at': 'Sat Mar 17 09:57:25 +0000 2007', 'favourites_count': 1715, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': True, 'statuses_count': 75916, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '000000', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/734548027/5e5ee18945d22a9e2d9971208971bfdd.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/734548027/5e5ee18945d22a9e2d9971208971bfdd.jpeg', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615598832726970372/jsK-gBSt_normal.png', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615598832726970372/jsK-gBSt_normal.png', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1344951/1490210731', 'profile_link_color': '99DCF0', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'EEEEEE', 'profile_text_color': '000000', 'profile_use_background_image': False, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 41, 'favorite_count': 64, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n", | |
"{'created_at': 'Sat Apr 08 07:09:33 +0000 2017', 'id': 850606523440967680, 'id_str': '850606523440967680', 'text': 'RT @tejasdkulkarni: Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowl…', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'tejasdkulkarni', 'name': 'Tejas Kulkarni', 'id': 56872711, 'id_str': '56872711', 'indices': [3, 18]}], 'urls': []}, 'source': '<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 29843511, 'id_str': '29843511', 'name': 'Nando de Freitas', 'screen_name': 'NandoDF', 'location': 'London, England', 'description': 'Researching intelligence to understand what we are and to find ways to harness it wisely.', 'url': 'https://t.co/KJYJNVKZE9', 'entities': {'url': {'urls': [{'url': 'https://t.co/KJYJNVKZE9', 'expanded_url': 'http://www.cs.ox.ac.uk/people/nando.defreitas/', 'display_url': 'cs.ox.ac.uk/people/nando.d…', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 22713, 'friends_count': 186, 'listed_count': 623, 'created_at': 'Wed Apr 08 22:41:09 +0000 2009', 'favourites_count': 2265, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2683, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '022330', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/29843511/1457616654', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'A8C7F7', 'profile_sidebar_fill_color': 'C0DFEC', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Fri Apr 07 09:46:46 +0000 2017', 'id': 850283699312762880, 'id_str': '850283699312762880', 'text': 'Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowledge by Spelke et al', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 56872711, 'id_str': '56872711', 'name': 'Tejas Kulkarni', 'screen_name': 'tejasdkulkarni', 'location': '', 'description': 'Research Scientist at Google DeepMind. AI PhD from MIT. I want to understand how the mind works.', 'url': 'http://t.co/qNnX25VwDk', 'entities': {'url': {'urls': [{'url': 'http://t.co/qNnX25VwDk', 'expanded_url': 'http://tejask.com', 'display_url': 'tejask.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1731, 'friends_count': 267, 'listed_count': 51, 'created_at': 'Wed Jul 15 00:44:25 +0000 2009', 'favourites_count': 254, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 139, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/722316259132645376/kHn7-6gJ_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/722316259132645376/kHn7-6gJ_normal.jpg', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': True, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 3, 'favorite_count': 26, 'favorited': False, 'retweeted': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 3, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n" | |
] | |
} | |
], | |
"source": [ | |
"for status in tweepy.Cursor(api.home_timeline).items(10):\n", | |
" # Process a single status\n", | |
" print(status._json) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from tweepy import Stream\n", | |
"from tweepy.streaming import StreamListener\n", | |
"import time\n", | |
" \n", | |
"class MyListener(StreamListener):\n", | |
" \n", | |
" def on_data(self, data):\n", | |
" try:\n", | |
" with open('python.json', 'a') as f:\n", | |
" f.write(data)\n", | |
" return True\n", | |
" except BaseException as e:\n", | |
" print(str(e))\n", | |
" return True\n", | |
" \n", | |
" def on_error(self, status):\n", | |
" print(status)\n", | |
" return True\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\n", | |
" \"created_at\": \"Sun Mar 19 10:25:43 +0000 2017\",\n", | |
" \"id\": 843408135171792896,\n", | |
" \"id_str\": \"843408135171792896\",\n", | |
" \"text\": \"#AI Consulting Roles-We're Hiring #Artificial Intelligence experts #machinelearning #python...\\u2026 https://t.co/ivmKoKwC6T\",\n", | |
" \"display_text_range\": [\n", | |
" 0,\n", | |
" 140\n", | |
" ],\n", | |
" \"source\": \"<a href=\\\"http://linkis.com\\\" rel=\\\"nofollow\\\">Linkis: turn sharing into growth</a>\",\n", | |
" \"truncated\": true,\n", | |
" \"in_reply_to_status_id\": null,\n", | |
" \"in_reply_to_status_id_str\": null,\n", | |
" \"in_reply_to_user_id\": null,\n", | |
" \"in_reply_to_user_id_str\": null,\n", | |
" \"in_reply_to_screen_name\": null,\n", | |
" \"user\": {\n", | |
" \"id\": 4481835072,\n", | |
" \"id_str\": \"4481835072\",\n", | |
" \"name\": \"DIGR\",\n", | |
" \"screen_name\": \"digr_io\",\n", | |
" \"location\": \"St. Louis, MO, United States\",\n", | |
" \"url\": \"http://bit.ly/digr\",\n", | |
" \"description\": \"Helping customers deploy #Analytics #DataManagement #BigData #DataViz @Tableau @Talend @XtremeData @SnowflakeDB. Founded by @axelrod_eric\",\n", | |
" \"protected\": false,\n", | |
" \"verified\": false,\n", | |
" \"followers_count\": 2538,\n", | |
" \"friends_count\": 4111,\n", | |
" \"listed_count\": 1298,\n", | |
" \"favourites_count\": 640,\n", | |
" \"statuses_count\": 11859,\n", | |
" \"created_at\": \"Mon Dec 07 03:40:21 +0000 2015\",\n", | |
" \"utc_offset\": -18000,\n", | |
" \"time_zone\": \"Central Time (US & Canada)\",\n", | |
" \"geo_enabled\": false,\n", | |
" \"lang\": \"en\",\n", | |
" \"contributors_enabled\": false,\n", | |
" \"is_translator\": false,\n", | |
" \"profile_background_color\": \"C0DEED\",\n", | |
" \"profile_background_image_url\": \"http://abs.twimg.com/images/themes/theme1/bg.png\",\n", | |
" \"profile_background_image_url_https\": \"https://abs.twimg.com/images/themes/theme1/bg.png\",\n", | |
" \"profile_background_tile\": false,\n", | |
" \"profile_link_color\": \"1DA1F2\",\n", | |
" \"profile_sidebar_border_color\": \"C0DEED\",\n", | |
" \"profile_sidebar_fill_color\": \"DDEEF6\",\n", | |
" \"profile_text_color\": \"333333\",\n", | |
" \"profile_use_background_image\": true,\n", | |
" \"profile_image_url\": \"http://pbs.twimg.com/profile_images/673709429200564225/XrIRC-ip_normal.png\",\n", | |
" \"profile_image_url_https\": \"https://pbs.twimg.com/profile_images/673709429200564225/XrIRC-ip_normal.png\",\n", | |
" \"default_profile\": true,\n", | |
" \"default_profile_image\": false,\n", | |
" \"following\": null,\n", | |
" \"follow_request_sent\": null,\n", | |
" \"notifications\": null\n", | |
" },\n", | |
" \"geo\": null,\n", | |
" \"coordinates\": null,\n", | |
" \"place\": null,\n", | |
" \"contributors\": null,\n", | |
" \"is_quote_status\": false,\n", | |
" \"extended_tweet\": {\n", | |
" \"full_text\": \"#AI Consulting Roles-We're Hiring #Artificial Intelligence experts #machinelearning #python... https://t.co/cs51fgWqTW by #alevergara78 https://t.co/Ug0OpxTacU\",\n", | |
" \"display_text_range\": [\n", | |
" 0,\n", | |
" 135\n", | |
" ],\n", | |
" \"entities\": {\n", | |
" \"hashtags\": [\n", | |
" {\n", | |
" \"text\": \"AI\",\n", | |
" \"indices\": [\n", | |
" 0,\n", | |
" 3\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"Artificial\",\n", | |
" \"indices\": [\n", | |
" 34,\n", | |
" 45\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"machinelearning\",\n", | |
" \"indices\": [\n", | |
" 67,\n", | |
" 83\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"python\",\n", | |
" \"indices\": [\n", | |
" 84,\n", | |
" 91\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"alevergara78\",\n", | |
" \"indices\": [\n", | |
" 122,\n", | |
" 135\n", | |
" ]\n", | |
" }\n", | |
" ],\n", | |
" \"urls\": [\n", | |
" {\n", | |
" \"url\": \"https://t.co/cs51fgWqTW\",\n", | |
" \"expanded_url\": \"http://ln.is/FMYLB\",\n", | |
" \"display_url\": \"ln.is/FMYLB\",\n", | |
" \"indices\": [\n", | |
" 95,\n", | |
" 118\n", | |
" ]\n", | |
" }\n", | |
" ],\n", | |
" \"user_mentions\": [],\n", | |
" \"symbols\": [],\n", | |
" \"media\": [\n", | |
" {\n", | |
" \"id\": 843408132487315457,\n", | |
" \"id_str\": \"843408132487315457\",\n", | |
" \"indices\": [\n", | |
" 136,\n", | |
" 159\n", | |
" ],\n", | |
" \"media_url\": \"http://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n", | |
" \"media_url_https\": \"https://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n", | |
" \"url\": \"https://t.co/Ug0OpxTacU\",\n", | |
" \"display_url\": \"pic.twitter.com/Ug0OpxTacU\",\n", | |
" \"expanded_url\": \"https://twitter.com/digr_io/status/843408135171792896/photo/1\",\n", | |
" \"type\": \"photo\",\n", | |
" \"sizes\": {\n", | |
" \"large\": {\n", | |
" \"w\": 1200,\n", | |
" \"h\": 675,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"small\": {\n", | |
" \"w\": 680,\n", | |
" \"h\": 383,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"medium\": {\n", | |
" \"w\": 1200,\n", | |
" \"h\": 675,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"thumb\": {\n", | |
" \"w\": 150,\n", | |
" \"h\": 150,\n", | |
" \"resize\": \"crop\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" ]\n", | |
" },\n", | |
" \"extended_entities\": {\n", | |
" \"media\": [\n", | |
" {\n", | |
" \"id\": 843408132487315457,\n", | |
" \"id_str\": \"843408132487315457\",\n", | |
" \"indices\": [\n", | |
" 136,\n", | |
" 159\n", | |
" ],\n", | |
" \"media_url\": \"http://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n", | |
" \"media_url_https\": \"https://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n", | |
" \"url\": \"https://t.co/Ug0OpxTacU\",\n", | |
" \"display_url\": \"pic.twitter.com/Ug0OpxTacU\",\n", | |
" \"expanded_url\": \"https://twitter.com/digr_io/status/843408135171792896/photo/1\",\n", | |
" \"type\": \"photo\",\n", | |
" \"sizes\": {\n", | |
" \"large\": {\n", | |
" \"w\": 1200,\n", | |
" \"h\": 675,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"small\": {\n", | |
" \"w\": 680,\n", | |
" \"h\": 383,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"medium\": {\n", | |
" \"w\": 1200,\n", | |
" \"h\": 675,\n", | |
" \"resize\": \"fit\"\n", | |
" },\n", | |
" \"thumb\": {\n", | |
" \"w\": 150,\n", | |
" \"h\": 150,\n", | |
" \"resize\": \"crop\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" ]\n", | |
" }\n", | |
" },\n", | |
" \"retweet_count\": 0,\n", | |
" \"favorite_count\": 0,\n", | |
" \"entities\": {\n", | |
" \"hashtags\": [\n", | |
" {\n", | |
" \"text\": \"AI\",\n", | |
" \"indices\": [\n", | |
" 0,\n", | |
" 3\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"Artificial\",\n", | |
" \"indices\": [\n", | |
" 34,\n", | |
" 45\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"machinelearning\",\n", | |
" \"indices\": [\n", | |
" 67,\n", | |
" 83\n", | |
" ]\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"python\",\n", | |
" \"indices\": [\n", | |
" 84,\n", | |
" 91\n", | |
" ]\n", | |
" }\n", | |
" ],\n", | |
" \"urls\": [\n", | |
" {\n", | |
" \"url\": \"https://t.co/ivmKoKwC6T\",\n", | |
" \"expanded_url\": \"https://twitter.com/i/web/status/843408135171792896\",\n", | |
" \"display_url\": \"twitter.com/i/web/status/8\\u2026\",\n", | |
" \"indices\": [\n", | |
" 96,\n", | |
" 119\n", | |
" ]\n", | |
" }\n", | |
" ],\n", | |
" \"user_mentions\": [],\n", | |
" \"symbols\": []\n", | |
" },\n", | |
" \"favorited\": false,\n", | |
" \"retweeted\": false,\n", | |
" \"possibly_sensitive\": false,\n", | |
" \"filter_level\": \"low\",\n", | |
" \"lang\": \"en\",\n", | |
" \"timestamp_ms\": \"1489919143847\"\n", | |
"}\n" | |
] | |
} | |
], | |
"source": [ | |
"import json\n", | |
" \n", | |
"with open('python.json', 'r') as f:\n", | |
" line = f.readline() # read only the first tweet/line\n", | |
" tweet = json.loads(line) # load it as Python dict\n", | |
" print(json.dumps(tweet, indent=4)) # pretty-print" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['RT', '@', 'marcobonzanini', ':', 'just', 'an', 'example', '!', ':', 'D', 'http', ':', '//example.com', '#', 'NLP']\n" | |
] | |
} | |
], | |
"source": [ | |
"from nltk.tokenize import word_tokenize\n", | |
" \n", | |
"tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'\n", | |
"print(word_tokenize(tweet))\n", | |
"# ['RT', '@', 'marcobonzanini', ':', 'just', 'an', 'example', '!', ':', 'D', 'http', ':', '//example.com', '#', 'NLP']\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
" \n", | |
"emoticons_str = r\"\"\"\n", | |
" (?:\n", | |
" [:=;] # Eyes\n", | |
" [oO\\-]? # Nose (optional)\n", | |
" [D\\)\\]\\(\\]/\\\\OpP] # Mouth\n", | |
" )\"\"\"\n", | |
" \n", | |
"regex_str = [\n", | |
" emoticons_str,\n", | |
" r'<[^>]+>', # HTML tags\n", | |
" r'(?:@[\\w_]+)', # @-mentions\n", | |
" r\"(?:\\#+[\\w_]+[\\w\\'_\\-]*[\\w_]+)\", # hash-tags\n", | |
" r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-f][0-9a-f]))+', # URLs\n", | |
" \n", | |
" r'(?:(?:\\d+,?)+(?:\\.?\\d+)?)', # numbers\n", | |
" r\"(?:[a-z][a-z'\\-_]+[a-z])\", # words with - and '\n", | |
" r'(?:[\\w_]+)', # other words\n", | |
" r'(?:\\S)' # anything else\n", | |
"]\n", | |
" \n", | |
"tokens_re = re.compile(r'('+'|'.join(regex_str)+')', re.VERBOSE | re.IGNORECASE)\n", | |
"emoticon_re = re.compile(r'^'+emoticons_str+'$', re.VERBOSE | re.IGNORECASE)\n", | |
" \n", | |
"def tokenize(s):\n", | |
" return tokens_re.findall(s)\n", | |
" \n", | |
"def preprocess(s, lowercase=False):\n", | |
" tokens = tokenize(s)\n", | |
" if lowercase:\n", | |
" tokens = [token if emoticon_re.search(token) else token.lower() for token in tokens]\n", | |
" return tokens\n", | |
" \n", | |
"tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'\n", | |
"print(preprocess(tweet))\n", | |
"# ['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('python.json', 'r') as f:\n", | |
" for line in f:\n", | |
" tweet = json.loads(line)\n", | |
" tokens = preprocess(tweet['text'])\n", | |
" #do_something_else(tokens)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['RT', '@ipfconline1', ':', 'The', 'Guide', 'to', 'Understand', '#MachineLearning', 'Algorithms', 'With', 'The', 'R', '&', 'amp', ';', '#Python', 'Codes', 'to', 'Run', 'Them', '.', 'https://t.co/POABbz2So5', '…']\n" | |
] | |
} | |
], | |
"source": [ | |
"print(tokens)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment