Skip to content

Instantly share code, notes, and snippets.

@seanlinehan
Created September 13, 2015 03:14

Revisions

  1. seanlinehan created this gist Sep 13, 2015.
    26 changes: 26 additions & 0 deletions find_baseline.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,26 @@
    #!/usr/bin/env python
    from collections import defaultdict

    # Convenience method
    def get_intersection(users):
    # pull out the users' following list
    list_of_follower_lists = map(lambda x: x[1], users)
    # return a list containing the common followers amongst them all
    return set(list_of_follower_lists[0]).intersection(*list_of_follower_lists)

    # This is the file containing all the data fetched from Twitter
    with open('twitter_ids.txt') as fo:
    ids = fo.read().splitlines()

    # Convenient data structure to aggregate each person's follower set
    follower_sets = defaultdict(list)
    for pair in ids:
    followed, follower = pair.split(',')
    follower_sets[followed].append(follower)

    # Find the 10-top most followed people
    most_followers = sorted(follower_sets.iteritems(), key=lambda i: len(i[1]), reverse=True)[:10]
    # Find the people that follow all 10
    common_followers = get_intersection(most_followers)
    # Count them
    num_common_followers = len(common_followers)