Last active
August 29, 2015 14:07
-
-
Save mihaelacr/a15f110640fae9a40047 to your computer and use it in GitHub Desktop.
Changes to the pull request to solve some of the problems with it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
This module creates an optimization of hyper-parameters of a DBN using hyperopt library | |
Check out the library here: https://github.com/hyperopt/hyperopt). | |
example run: | |
python hyperopt_exampleMNIST.py --trainSize=10000 --path=... | |
REFERENCES | |
[1] - Bergstra, James, Dan Yamins, and David D. Cox. "Hyperopt: A python library for optimizing the hyperparameters of machine learning algorithms." (2013). | |
[2] - Larochelle, Hugo, et al. "Exploring strategies for training deep neural networks." The Journal of Machine Learning Research 10 (2009): 1-40. | |
""" | |
__credits__ = ["Walter H. L. Pinaya", "Mihaela Rosca"] | |
import argparse | |
import hyperopt | |
import numpy as np | |
import theano | |
from sklearn import cross_validation | |
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support | |
from lib import deepbelief as db | |
from lib.activationfunctions import Sigmoid | |
from lib.common import * | |
from read import readmnist | |
theanoFloat = theano.config.floatX | |
parser = argparse.ArgumentParser(description='digit recognition') | |
parser.add_argument('--trainSize', type=int, default=10000, | |
help='the number of tranining cases to be considered') | |
parser.add_argument('--path',dest='path', default="MNIST", help="the path to the MNIST files") | |
args = parser.parse_args() | |
def objective(hyperparameters): | |
""" | |
Specify an objective function to hyperopt's fmin function minimize | |
Arguments | |
---------- | |
hyperparameters: List of hyperparameters to be optimized | |
hyperparameters[0]: the number of layers of the network. | |
hyperparameters[1]: the sizes of the individual layers. | |
hyperparameters[2]: learning rate for pretraining | |
hyperparameters[3]: learning rate for discriminative training | |
hyperparameters[4]: the maximum value momentum is allowed to increase to in training | |
hyperparameters[5]: the dropout used for the visible layers during discriminative training | |
Returns | |
------- | |
results: A dictionary mapping the DBN performance for the given arguments. | |
""" | |
global runCounter | |
runCounter += 1 | |
print 'run number', runCounter | |
print 'running algorithm for', hyperparameters | |
nrLayers = hyperparameters[0] | |
hiddenNeuronsPerLayer = hyperparameters[1] | |
unsupervisedLearningRate = hyperparameters[2] | |
supervisedLearningRate = hyperparameters[3] | |
momentumMax = hyperparameters[4] | |
visibleDropout = hyperparameters[5] | |
# Hyperparameters of the DBN which will not be optimized. | |
activationFunction = Sigmoid() | |
rbmHiddenDropout = 1.0 | |
rbmVisibleDropout = 1.0 | |
weightDecayL1 = 0 | |
weightDecayL2 = 0 | |
preTrainEpochs = 1 | |
maxEpochs = 10 # For better results try more epochs for training | |
hiddenDropout = 0.5 | |
hiddenLayers = [] | |
# Using constant width net as [2] | |
for i in range(0, nrLayers - 2): | |
hiddenLayers.append(hiddenNeuronsPerLayer) | |
dbnLayers = hiddenLayers | |
dbnLayers.insert(0, 784) | |
dbnLayers.append(10) | |
nrFolds = 5 | |
training = len(trainLabel) | |
kf = cross_validation.KFold(training, n_folds=nrFolds) | |
cvAucRoc = [] | |
cvPrecision = [] | |
cvRecall = [] | |
cvFscore = [] | |
for training, testing in kf: | |
# Train the net | |
net = db.DBN(nrLayers, dbnLayers, | |
binary=False, | |
unsupervisedLearningRate=unsupervisedLearningRate, | |
supervisedLearningRate=supervisedLearningRate, | |
momentumMax=momentumMax, | |
activationFunction=activationFunction, | |
rbmActivationFunctionVisible=activationFunction, | |
rbmActivationFunctionHidden=activationFunction, | |
nesterovMomentum=True, | |
rbmNesterovMomentum=True, | |
rmsprop=True, | |
hiddenDropout=hiddenDropout, | |
visibleDropout=visibleDropout, | |
rbmHiddenDropout=rbmHiddenDropout, | |
rbmVisibleDropout=rbmVisibleDropout, | |
weightDecayL1=weightDecayL1, | |
weightDecayL2=weightDecayL2, | |
preTrainEpochs= preTrainEpochs) | |
net.train(trainData[training], trainLabel[training], | |
maxEpochs=maxEpochs, validation=False) | |
# Peformance evaluation of the network | |
probabilities, predicted = net.classify(trainData[testing]) | |
vectorPredicted = labelsToVectors(predicted, 10) | |
testLabels = trainLabel[testing] | |
cvAucRoc.append(roc_auc_score(testLabels, probabilities)) | |
[pre, rec, fsc, _] = precision_recall_fscore_support(testLabels,vectorPredicted, average='macro') | |
cvPrecision.append(pre) | |
cvRecall.append(rec) | |
cvFscore.append(fsc) | |
results = { | |
'loss': 1 - np.mean(cvAucRoc), | |
'status': hyperopt.STATUS_OK, | |
'classifier_precision': {'type': float, 'value': np.mean(cvPrecision)}, | |
'classifier_recall': {'type': float, 'value': np.mean(cvRecall)}, | |
'classifier_fscore': {'type': float, 'value': np.mean(cvFscore)} | |
} | |
return results | |
def main(): | |
import random | |
print 'FIXING RANDOMNESS' | |
random.seed(6) | |
np.random.seed(6) | |
global runCounter | |
runCounter = 0 | |
training = args.trainSize | |
trainVectors, trainLabels =\ | |
readmnist.read(0, training, bTrain=True, path=args.path) | |
print trainVectors[0].shape | |
trainVectors, trainLabels = shuffle(trainVectors, trainLabels) | |
trainingScaledVectors = trainVectors / 255.0 | |
vectorLabels = labelsToVectors(trainLabels, 10) | |
global trainData | |
trainData = trainingScaledVectors | |
global trainLabel | |
trainLabel = vectorLabels | |
# Defining a Search Space (stochastic argument-sampling programs) | |
nrLayersList = [4, 5] | |
hiddenNeuronsPerLayerList = [500, 750, 1000, 1250] | |
space = ( | |
hyperopt.hp.choice('nrLayers', nrLayersList), | |
hyperopt.hp.choice('hiddenNeuronsPerLayer', hiddenNeuronsPerLayerList), | |
hyperopt.hp.loguniform('unsupervisedLearningRate', np.log(1e-5), np.log(1e-1)), | |
hyperopt.hp.loguniform('supervisedLearningRate', np.log(1e-5), np.log(1e-1)), | |
hyperopt.hp.uniform('momentumMax', 0.5, 0.99), | |
hyperopt.hp.uniform('visibleDropout', 0.5, 0.9) | |
) | |
# With the trials object, we can inspect all of the return values that were calculated during the experiment. | |
trials = hyperopt.Trials() | |
# Minimize objective function over a hyperparameter space. | |
# algo : search algorithm | |
# max_evals : Allow up to this many function evaluations before returning. | |
# change max_eval for a better search | |
best = hyperopt.fmin(objective, | |
space, | |
algo=hyperopt.tpe.suggest, | |
# Note: the accuracy of the hyperparameter optimization depends on max_evals | |
# for best results, set max_eval to a relatively high value (10) | |
# so that the algorithm can explore the state space | |
max_evals=10, | |
trials=trials) | |
print 'Best Parameters' | |
best['nrLayers'] = nrLayersList[best['nrLayers']] | |
best['hiddenNeuronsPerLayer'] = hiddenNeuronsPerLayerList[best['hiddenNeuronsPerLayer']] | |
print best | |
for i in range(0, runCounter): | |
print 'results for trial', i | |
print trials.results[i] | |
print 'trial losses' | |
print trials.losses() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment