Created
March 25, 2018 11:40
-
-
Save Idan707/339814004335ff4d624449722cc510c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################### | |
# Suppress matplotlib user warnings | |
# Necessary for newer version of matplotlib | |
import warnings | |
warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib") | |
# | |
# Display inline matplotlib plots with IPython | |
from IPython import get_ipython | |
get_ipython().run_line_magic('matplotlib', 'inline') | |
########################################### | |
import matplotlib.pyplot as pl | |
import numpy as np | |
import sklearn.learning_curve as curves | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.cross_validation import ShuffleSplit, train_test_split | |
def ModelComplexity(X, y): | |
""" Calculates the performance of the model as model complexity increases. | |
The learning and testing errors rates are then plotted. """ | |
# Create 10 cross-validation sets for training and testing | |
cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0) | |
# Vary the max_depth parameter from 1 to 10 | |
max_depth = np.arange(1,11) | |
# Calculate the training and testing scores | |
train_scores, test_scores = curves.validation_curve(DecisionTreeRegressor(), X, y, \ | |
param_name = "max_depth", param_range = max_depth, cv = cv, scoring = 'r2') | |
# Find the mean and standard deviation for smoothing | |
train_mean = np.mean(train_scores, axis=1) | |
train_std = np.std(train_scores, axis=1) | |
test_mean = np.mean(test_scores, axis=1) | |
test_std = np.std(test_scores, axis=1) | |
# Plot the validation curve | |
pl.figure(figsize=(7, 5)) | |
pl.title('Decision Tree Regressor Complexity Performance') | |
pl.plot(max_depth, train_mean, 'o-', color = 'r', label = 'Training Score') | |
pl.plot(max_depth, test_mean, 'o-', color = 'g', label = 'Validation Score') | |
pl.fill_between(max_depth, train_mean - train_std, \ | |
train_mean + train_std, alpha = 0.15, color = 'r') | |
pl.fill_between(max_depth, test_mean - test_std, \ | |
test_mean + test_std, alpha = 0.15, color = 'g') | |
# Visual aesthetics | |
pl.legend(loc = 'lower right') | |
pl.xlabel('Maximum Depth') | |
pl.ylabel('Score') | |
pl.ylim([-0.05,1.05]) | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment