Created
September 16, 2014 05:14
-
-
Save ageitgey/c40fba50b6fece4ee1e7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn import grid_search | |
from sklearn import cross_validation | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.linear_model import SGDRegressor | |
# Load your starting coefficient data as an array called X. This can be as big | |
# as your computer's memory. If that's still not big enough, you can load it in | |
# segments and use partial_fit instead of fit. | |
# Replace this with code to load data from a CSV or something. | |
X = [ | |
[4,3,1,0,1], | |
[5,2,1,0,1], | |
[4,2,1,1,1], | |
[3,1,0,1,1], | |
[1,1,0,1,1], | |
[4,3,1,0,1], | |
[5,2,1,0,1], | |
[4,2,1,1,1], | |
[3,1,0,1,1], | |
[1,1,0,1,1], | |
[4,3,1,0,1], | |
[5,2,1,0,1], | |
[4,2,1,1,1], | |
[3,1,0,1,1], | |
[1,1,0,1,1] | |
] | |
# Load the equation "answers" to your equation into a vector named y | |
y = [4, 6, 6, 3, 1, 4, 6, 6, 3, 1, 4, 6, 6, 3, 1] | |
# To find the best parameters for the SGD regressor, we are just going to try | |
# them all and see which get the most accurate results. | |
# To do that, we need to split the data into a training set and a test set. | |
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3) | |
# Create the model. Shuffling is important with SGD because the order you see | |
# each array row affects the output. | |
model = SGDRegressor(shuffle=True) | |
# A list of param ranges we want to guess and check and a list of values to try | |
# for each one. | |
param_grid = [{ | |
'alpha': [0.1, 0.3, 0.01, 0.03, 0.001, 0.003], | |
'l1_ratio': [.05, .15, .5, .7, .9, .95, .99, 1] | |
}] | |
# Try each param pair and pick the best! This might take several min with a big data set. | |
gs = grid_search.GridSearchCV(model, param_grid, n_jobs=8, verbose=1) | |
gs.fit(X_train, y_train) | |
# gs.best_estimator_ will be a SGDClassifier classifier instance with the most efficient params | |
print("Best estimator:") | |
print(gs.best_estimator_) | |
print(gs.best_estimator_.coef_) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment