Idan707 · January 14, 2018 09:07
diff --git a/get_model_results.py b/get_model_results.py
 from sklearn.metrics import confusion_matrix, classification_report
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline, make_pipeline
 from sklearn.metrics import roc_auc_score, roc_curve

 def find_best_threshold(thresholds, fpr, tpr):
    """
    find the best threshold from the roc curve. by finding the threshold for the point which is closest to (fpr=0,tpr=1)
    """
    fpr_tpr = pd.DataFrame({'thresholds': thresholds, 'fpr': fpr, 'tpr': tpr})
    fpr_tpr['dist'] = (fpr_tpr['fpr'])**2 + (fpr_tpr['tpr']-1)**2
    return fpr_tpr.ix[fpr_tpr.dist.idxmin(), 'thresholds']


 def get_model_results(model, train, test, y_train, y_test):
    probabilities = model.predict_proba(test)[:,1]
    fpr, tpr, thresholds = roc_curve(y_test, probabilities)
    threshold = find_best_threshold(thresholds, fpr, tpr)
    predictions = probabilities>threshold
    plt.figure()
    plt.plot(fpr, tpr, label='test')
    roc_auc = roc_auc_score(y_test, probabilities)
    probabilities = model.predict_proba(train)[:,1]
    fpr, tpr, thresholds = roc_curve(y_train, probabilities)
    plt.plot(fpr, tpr, label='train')
    plt.plot([0, 1], [0, 1], 'r--', label='random guess')
    plt.title("area under the ROC curve = {}".format(roc_auc), fontsize=18);
    print(classification_report(y_test, predictions))
    plt.legend()
	from sklearn.metrics import confusion_matrix, classification_report
	from sklearn.linear_model import LogisticRegression
	from sklearn.pipeline import Pipeline, make_pipeline
	from sklearn.metrics import roc_auc_score, roc_curve

	def find_best_threshold(thresholds, fpr, tpr):
	"""
	find the best threshold from the roc curve. by finding the threshold for the point which is closest to (fpr=0,tpr=1)
	"""
	fpr_tpr = pd.DataFrame({'thresholds': thresholds, 'fpr': fpr, 'tpr': tpr})
	fpr_tpr['dist'] = (fpr_tpr['fpr'])2 + (fpr_tpr['tpr']-1)2
	return fpr_tpr.ix[fpr_tpr.dist.idxmin(), 'thresholds']


	def get_model_results(model, train, test, y_train, y_test):
	probabilities = model.predict_proba(test)[:,1]
	fpr, tpr, thresholds = roc_curve(y_test, probabilities)
	threshold = find_best_threshold(thresholds, fpr, tpr)
	predictions = probabilities>threshold
	plt.figure()
	plt.plot(fpr, tpr, label='test')
	roc_auc = roc_auc_score(y_test, probabilities)
	probabilities = model.predict_proba(train)[:,1]
	fpr, tpr, thresholds = roc_curve(y_train, probabilities)
	plt.plot(fpr, tpr, label='train')
	plt.plot([0, 1], [0, 1], 'r--', label='random guess')
	plt.title("area under the ROC curve = {}".format(roc_auc), fontsize=18);
	print(classification_report(y_test, predictions))
	plt.legend()