Skip to content

Instantly share code, notes, and snippets.

@Idan707
Created January 14, 2018 09:07
Show Gist options
  • Save Idan707/79757e9099e02ff9a90d85d33d5dd666 to your computer and use it in GitHub Desktop.
Save Idan707/79757e9099e02ff9a90d85d33d5dd666 to your computer and use it in GitHub Desktop.
get_model_results
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import roc_auc_score, roc_curve
def find_best_threshold(thresholds, fpr, tpr):
"""
find the best threshold from the roc curve. by finding the threshold for the point which is closest to (fpr=0,tpr=1)
"""
fpr_tpr = pd.DataFrame({'thresholds': thresholds, 'fpr': fpr, 'tpr': tpr})
fpr_tpr['dist'] = (fpr_tpr['fpr'])**2 + (fpr_tpr['tpr']-1)**2
return fpr_tpr.ix[fpr_tpr.dist.idxmin(), 'thresholds']
def get_model_results(model, train, test, y_train, y_test):
probabilities = model.predict_proba(test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, probabilities)
threshold = find_best_threshold(thresholds, fpr, tpr)
predictions = probabilities>threshold
plt.figure()
plt.plot(fpr, tpr, label='test')
roc_auc = roc_auc_score(y_test, probabilities)
probabilities = model.predict_proba(train)[:,1]
fpr, tpr, thresholds = roc_curve(y_train, probabilities)
plt.plot(fpr, tpr, label='train')
plt.plot([0, 1], [0, 1], 'r--', label='random guess')
plt.title("area under the ROC curve = {}".format(roc_auc), fontsize=18);
print(classification_report(y_test, predictions))
plt.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment