Skip to content

Instantly share code, notes, and snippets.

@shengch02
Created January 3, 2017 17:59
Show Gist options
  • Save shengch02/820cb497f54a75c655e6c8bf3655424c to your computer and use it in GitHub Desktop.
Save shengch02/820cb497f54a75c655e6c8bf3655424c to your computer and use it in GitHub Desktop.
(Python) Explore various evaluation metrics: accuracy, confusion matrix, precision, recall. Explore how various metrics can be combined to produce a cost of making an error. Explore precision and recall curves.
#explore precision and recall
import pandas as pd
import numpy as np
#the dataset consists of baby product reviews on Amazon.com
import sframe
products = sframe.SFrame('amazon_baby.gl/')
#clean the original data: remove punctuation, fill in N/A, remove neutral sentiment,
# perform a train/test split, produce word count matrix
def remove_punctuation(text):
import string
return text.translate(None, string.punctuation)
products['review_clean']=products['review'].apply(remove_punctuation)
products = products.fillna('review','')
#ignore all reviews with rating=3, and classify the reviews as positive or negative
products = products[products['rating'] != 3]
products['sentiment']=products['rating'].apply(lambda rating : +1
if rating > 3 else -1)
#split dataset into training and test sets
train_data, test_data = products.random_split(0.8, seed=1)
#calculate the word-count-matrix
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(token_pattern=r'\b\w+\b')
train_matrix = vectorizer.fit_transform(train_data['review_clean'])
test_matrix = vectorizer.transform(test_data['review_clean'])
print train_matrix
#scikit learn, LogisticRegression(), training
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(train_matrix, train_data['sentiment'])
#measure accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_true=test_data['sentiment'].to_numpy(),
y_pred=model.predict(test_matrix))
print 'Test Accuracy: %s' % accuracy
#use the majority class classifier as a baseline
baseline = len(test_data[test_data['sentiment']==1])/float(len(test_data))
print 'Baseline accuracy : %s' % baseline
#confusion matrix
from sklearn.metrics import confusion_matrix
cmat = confusion_matrix(y_true=test_data['sentiment'].to_numpy(),
y_pred=model.predict(test_matrix),
labels=model.classes_)
print 'target_label | predicted_label | count'
print '--------------------------------------'
for i, target_label in enumerate(model.classes_):
for j, predicted_label in enumerate(model.classes_):
print '{0:^13} | {1:^15} | {2:5d}'.format(target_label,
predicted_label, cmat[i,j])
#compute the precision of the logistic regression classifier
from sklearn.metrics import precision_score
precision = precision_score(y_true=test_data['sentiment'].to_numpy(),
y_pred=model.predict(test_matrix))
#recall
from sklearn.metrics import recall_score
recall = recall_score(y_true=test_data['sentiment'].to_numpy(),
y_pred=model.predict(test_matrix))
#precision-recall tradeoff
def apply_threshold(probabilities, threshold):
return sframe.SArray(probabilities).apply(lambda x: +1 if x>threshold else -1)
probabilities = model.predict_proba(test_matrix)[:,1]
y_pred_05 = apply_threshold(probabilities, 0.5)
y_pred_09 = apply_threshold(probabilities, 0.9)
print sum(y_pred_05==1) #28745
print sum(y_pred_09==1) #25070
print precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_05) #0.9494
print precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_09) #0.9815
print recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_05) #0.9714
print recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_09) #0.8758
#precision-recall curve
threshold_values = np.linspace(0.5, 1, num=101)
print threshold_values
precision_all = []
recall_all = []
for threshold in threshold_values:
pred = apply_threshold(probabilities, threshold)
precision_all.append(precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=pred))
recall_all.append(recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=pred))
precision_all[100]=1.0 #0.710 is the smallest threshold value that achieve precision 0.965
#8208 false negatives
import matplotlib.pyplot as plt
def plot_pr_curve(precision, recall, title):
plt.rcParams['figure.figsize'] = 7, 5
plt.locator_params(axis='x', nbins=5)
plt.plot(precision, recall, 'b-', linewidth=2.0)
plt.title(title)
plt.xlabel('Precision')
plt.ylabel('Recall')
plt.show()
plot_pr_curve(precision_all, recall_all, 'Precision recall curve')
#evaluate specific search terms
baby_reviews = test_data[test_data['name'].apply(lambda x: 'baby' in x.lower())]
baby_matrix = vectorizer.transform(baby_reviews['review_clean'])
probabilities = model.predict_proba(baby_matrix)[:,1]
precision_baby = []
recall_baby = []
for threshold in threshold_values:
pred = apply_threshold(probabilities, threshold)
precision_baby.append(precision_score(y_true=baby_reviews['sentiment'].to_numpy(), y_pred=pred))
recall_baby.append(recall_score(y_true=baby_reviews['sentiment'].to_numpy(), y_pred=pred))
precision_baby[100]=1.0 #0.735 is the smallest threshold value that achieve precision 0.965
plot_pr_curve(precision_baby, recall_baby, "Precision-Recall (Baby)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment