Last active
June 12, 2019 20:24
-
-
Save victorkohler/11630be55ebfce4ad0674bb1983bc478 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_train_instances(): | |
"""Samples a number of negative user-item interactions for each | |
user-item pair in our testing data. | |
Returns: | |
user_input (list): A list of all users for each item | |
item_input (list): A list of all items for every user, | |
both positive and negative interactions. | |
labels (list): A list of all labels. 0 or 1. | |
""" | |
user_input, item_input, labels = [],[],[] | |
zipped = set(zip(uids, iids)) | |
for (u, i) in zip(uids,iids): | |
# Add our positive interaction | |
user_input.append(u) | |
item_input.append(i) | |
labels.append(1) | |
# Sample a number of random negative interactions | |
for t in range(num_neg): | |
j = np.random.randint(len(items)) | |
while (u, j) in zipped: | |
j = np.random.randint(len(items)) | |
user_input.append(u) | |
item_input.append(j) | |
labels.append(0) | |
return user_input, item_input, labels | |
def random_mini_batches(U, I, L, mini_batch_size=256): | |
"""Returns a list of shuffeled mini batched of a given size. | |
Args: | |
U (list): All users for every interaction | |
I (list): All items for every interaction | |
L (list): All labels for every interaction. | |
Returns: | |
mini_batches (list): A list of minibatches containing sets | |
of batch users, batch items and batch labels | |
[(u, i, l), (u, i, l) ...] | |
""" | |
mini_batches = [] | |
shuffled_U, shuffled_I, shuffled_L = shuffle(U, I, L) | |
num_complete_batches = int(math.floor(len(U)/mini_batch_size)) | |
for k in range(0, num_complete_batches): | |
mini_batch_U = shuffled_U[k * mini_batch_size : k * mini_batch_size + mini_batch_size] | |
mini_batch_I = shuffled_I[k * mini_batch_size : k * mini_batch_size + mini_batch_size] | |
mini_batch_L = shuffled_L[k * mini_batch_size : k * mini_batch_size + mini_batch_size] | |
mini_batch = (mini_batch_U, mini_batch_I, mini_batch_L) | |
mini_batches.append(mini_batch) | |
if len(U) % mini_batch_size != 0: | |
mini_batch_U = shuffled_U[num_complete_batches * mini_batch_size: len(U)] | |
mini_batch_I = shuffled_I[num_complete_batches * mini_batch_size: len(U)] | |
mini_batch_L = shuffled_L[num_complete_batches * mini_batch_size: len(U)] | |
mini_batch = (mini_batch_U, mini_batch_I, mini_batch_L) | |
mini_batches.append(mini_batch) | |
return mini_batches | |
def get_hits(k_ranked, holdout): | |
"""Return 1 if an item exists in a given list and 0 if not. """ | |
for item in k_ranked: | |
if item == holdout: | |
return 1 | |
return 0 | |
def eval_rating(idx, test_ratings, test_negatives, K): | |
"""Generate ratings for the users in our test set and | |
check if our holdout item is among the top K highest scores. | |
Args: | |
idx (int): Current index | |
test_ratings (list): Our test set user-item pairs | |
test_negatives (list): 100 negative items for each | |
user in our test set. | |
K (int): number of top recommendations | |
Returns: | |
hr (list): A list of 1 if the holdout appeared in our | |
top K predicted items. 0 if not. | |
""" | |
map_item_score = {} | |
# Get the negative interactions our user. | |
items = test_negatives[idx] | |
# Get the user idx. | |
user_idx = test_ratings[idx][0] | |
# Get the item idx, i.e. our holdout item. | |
holdout = test_ratings[idx][1] | |
# Add the holdout to the end of the negative interactions list. | |
items.append(holdout) | |
# Prepare our user and item arrays for tensorflow. | |
predict_user = np.full(len(items), user_idx, dtype='int32').reshape(-1,1) | |
np_items = np.array(items).reshape(-1,1) | |
# Feed user and items into the TF graph . | |
predictions = session.run([output_layer], feed_dict={user: predict_user, item: np_items}) | |
# Get the predicted scores as a list | |
predictions = predictions[0].flatten().tolist() | |
# Map predicted score to item id. | |
for i in range(len(items)): | |
current_item = items[i] | |
map_item_score[current_item] = predictions[i] | |
# Get the K highest ranked items as a list | |
k_ranked = heapq.nlargest(K, map_item_score, key=map_item_score.get) | |
# Get a list of hit or no hit. | |
hits = get_hits(k_ranked, holdout) | |
return hits | |
def evaluate(df_neg, K=10): | |
"""Calculate the top@K hit ratio for our recommendations. | |
Args: | |
df_neg (dataframe): dataframe containing our holdout items | |
and 100 randomly sampled negative interactions for each | |
(user, item) holdout pair. | |
K (int): The 'K' number of ranked predictions we want | |
our holdout item to be present in. | |
Returns: | |
hits (list): list of "hits". 1 if the holdout was present in | |
the K highest ranked predictions. 0 if not. | |
""" | |
hits = [] | |
test_u = df_test['user_id'].values.tolist() | |
test_i = df_test['item_id'].values.tolist() | |
test_ratings = list(zip(test_u, test_i)) | |
df_neg = df_neg.drop(df_neg.columns[0], axis=1) | |
test_negatives = df_neg.values.tolist() | |
for idx in range(len(test_ratings)): | |
# For each idx, call eval_one_rating | |
hitrate = eval_rating(idx, test_ratings, test_negatives, K) | |
hits.append(hitrate) | |
return hits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment