ageitgey · September 16, 2014 05:14
diff --git a/gistfile1.py b/gistfile1.py
 import numpy as np
 from sklearn import grid_search
 from sklearn import cross_validation
 from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import SGDRegressor

 # Load your starting coefficient data as an array called X. This can be as big
 # as your computer's memory. If that's still not big enough, you can load it in
 # segments and use partial_fit instead of fit.

 # Replace this with code to load data from a CSV or something.
 X = [
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1],
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1],
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1]
 ]

 # Load the equation "answers" to your equation into a vector named y
 y = [4, 6, 6, 3, 1, 4, 6, 6, 3, 1, 4, 6, 6, 3, 1]

 # To find the best parameters for the SGD regressor, we are just going to try
 # them all and see which get the most accurate results.
 # To do that, we need to split the data into a training set and a test set.
 X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

 # Create the model. Shuffling is important with SGD because the order you see
 # each array row affects the output.
 model = SGDRegressor(shuffle=True)

 # A list of param ranges we want to guess and check and a list of values to try
 # for each one.
 param_grid = [{
      'alpha': [0.1, 0.3, 0.01, 0.03, 0.001, 0.003],
      'l1_ratio': [.05, .15, .5, .7, .9, .95, .99, 1]
      }]

 # Try each param pair and pick the best! This might take several min with a big data set.
 gs = grid_search.GridSearchCV(model, param_grid, n_jobs=8, verbose=1)
 gs.fit(X_train, y_train)

 # gs.best_estimator_ will be a SGDClassifier classifier instance with the most efficient params
 print("Best estimator:")
 print(gs.best_estimator_)
 print(gs.best_estimator_.coef_)
	import numpy as np
	from sklearn import grid_search
	from sklearn import cross_validation
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import SGDRegressor

	# Load your starting coefficient data as an array called X. This can be as big
	# as your computer's memory. If that's still not big enough, you can load it in
	# segments and use partial_fit instead of fit.

	# Replace this with code to load data from a CSV or something.
	X = [
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1],
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1],
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1]
	]

	# Load the equation "answers" to your equation into a vector named y
	y = [4, 6, 6, 3, 1, 4, 6, 6, 3, 1, 4, 6, 6, 3, 1]

	# To find the best parameters for the SGD regressor, we are just going to try
	# them all and see which get the most accurate results.
	# To do that, we need to split the data into a training set and a test set.
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

	# Create the model. Shuffling is important with SGD because the order you see
	# each array row affects the output.
	model = SGDRegressor(shuffle=True)

	# A list of param ranges we want to guess and check and a list of values to try
	# for each one.
	param_grid = [{
	'alpha': [0.1, 0.3, 0.01, 0.03, 0.001, 0.003],
	'l1_ratio': [.05, .15, .5, .7, .9, .95, .99, 1]
	}]

	# Try each param pair and pick the best! This might take several min with a big data set.
	gs = grid_search.GridSearchCV(model, param_grid, n_jobs=8, verbose=1)
	gs.fit(X_train, y_train)

	# gs.best_estimator_ will be a SGDClassifier classifier instance with the most efficient params
	print("Best estimator:")
	print(gs.best_estimator_)
	print(gs.best_estimator_.coef_)
No results found