Created
March 14, 2019 21:45
-
-
Save victorkohler/5060654ed76536e3662ae1304b02c48f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#----------------------- | |
# FIND SIMILAR ARTISTS | |
#----------------------- | |
def find_similar_artists(artist=None, num_items=10): | |
"""Find artists similar to an artist. | |
Args: | |
artist (str): The name of the artist we want to find similar artists for | |
num_items (int): How many similar artists we want to return. | |
Returns: | |
similar (pandas.DataFrame): DataFrame with num_items artist names and scores | |
""" | |
# Grab our User matrix U | |
user_vecs = get_variable(graph, session, 'user_factors') | |
# Grab our Item matrix V | |
item_vecs = get_variable(graph, session, 'item_factors') | |
# Grab our item bias | |
item_bi = get_variable(graph, session, 'item_bias').reshape(-1) | |
# Get the item id for Lady GaGa | |
item_id = int(item_lookup[item_lookup.artist == artist]['artist_id']) | |
# Get the item vector for our item_id and transpose it. | |
item_vec = item_vecs[item_id].T | |
# Calculate the similarity between Lady GaGa and all other artists | |
# by multiplying the item vector with our item_matrix | |
scores = np.add(item_vecs.dot(item_vec), item_bi).reshape(1,-1)[0] | |
# Get the indices for the top 10 scores | |
top_10 = np.argsort(scores)[::-1][:num_items] | |
# We then use our lookup table to grab the names of these indices | |
# and add it along with its score to a pandas dataframe. | |
artists, artist_scores = [], [] | |
for idx in top_10: | |
artists.append(item_lookup.artist.loc[item_lookup.artist_id == str(idx)].iloc[0]) | |
artist_scores.append(scores[idx]) | |
similar = pd.DataFrame({'artist': artists, 'score': artist_scores}) | |
return similar | |
print(find_similar_artists(artist='beyoncé')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment