Skip to content

Instantly share code, notes, and snippets.

@mihaelacr
Last active August 29, 2015 14:07
Show Gist options
  • Save mihaelacr/a15f110640fae9a40047 to your computer and use it in GitHub Desktop.
Save mihaelacr/a15f110640fae9a40047 to your computer and use it in GitHub Desktop.
Changes to the pull request to solve some of the problems with it
# -*- coding: utf-8 -*-
"""
This module creates an optimization of hyper-parameters of a DBN using hyperopt library
Check out the library here: https://github.com/hyperopt/hyperopt).
example run:
python hyperopt_exampleMNIST.py --trainSize=10000 --path=...
REFERENCES
[1] - Bergstra, James, Dan Yamins, and David D. Cox. "Hyperopt: A python library for optimizing the hyperparameters of machine learning algorithms." (2013).
[2] - Larochelle, Hugo, et al. "Exploring strategies for training deep neural networks." The Journal of Machine Learning Research 10 (2009): 1-40.
"""
__credits__ = ["Walter H. L. Pinaya", "Mihaela Rosca"]
import argparse
import hyperopt
import numpy as np
import theano
from sklearn import cross_validation
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
from lib import deepbelief as db
from lib.activationfunctions import Sigmoid
from lib.common import *
from read import readmnist
theanoFloat = theano.config.floatX
parser = argparse.ArgumentParser(description='digit recognition')
parser.add_argument('--trainSize', type=int, default=10000,
help='the number of tranining cases to be considered')
parser.add_argument('--path',dest='path', default="MNIST", help="the path to the MNIST files")
args = parser.parse_args()
def objective(hyperparameters):
"""
Specify an objective function to hyperopt's fmin function minimize
Arguments
----------
hyperparameters: List of hyperparameters to be optimized
hyperparameters[0]: the number of layers of the network.
hyperparameters[1]: the sizes of the individual layers.
hyperparameters[2]: learning rate for pretraining
hyperparameters[3]: learning rate for discriminative training
hyperparameters[4]: the maximum value momentum is allowed to increase to in training
hyperparameters[5]: the dropout used for the visible layers during discriminative training
Returns
-------
results: A dictionary mapping the DBN performance for the given arguments.
"""
global runCounter
runCounter += 1
print 'run number', runCounter
print 'running algorithm for', hyperparameters
nrLayers = hyperparameters[0]
hiddenNeuronsPerLayer = hyperparameters[1]
unsupervisedLearningRate = hyperparameters[2]
supervisedLearningRate = hyperparameters[3]
momentumMax = hyperparameters[4]
visibleDropout = hyperparameters[5]
# Hyperparameters of the DBN which will not be optimized.
activationFunction = Sigmoid()
rbmHiddenDropout = 1.0
rbmVisibleDropout = 1.0
weightDecayL1 = 0
weightDecayL2 = 0
preTrainEpochs = 1
maxEpochs = 10 # For better results try more epochs for training
hiddenDropout = 0.5
hiddenLayers = []
# Using constant width net as [2]
for i in range(0, nrLayers - 2):
hiddenLayers.append(hiddenNeuronsPerLayer)
dbnLayers = hiddenLayers
dbnLayers.insert(0, 784)
dbnLayers.append(10)
nrFolds = 5
training = len(trainLabel)
kf = cross_validation.KFold(training, n_folds=nrFolds)
cvAucRoc = []
cvPrecision = []
cvRecall = []
cvFscore = []
for training, testing in kf:
# Train the net
net = db.DBN(nrLayers, dbnLayers,
binary=False,
unsupervisedLearningRate=unsupervisedLearningRate,
supervisedLearningRate=supervisedLearningRate,
momentumMax=momentumMax,
activationFunction=activationFunction,
rbmActivationFunctionVisible=activationFunction,
rbmActivationFunctionHidden=activationFunction,
nesterovMomentum=True,
rbmNesterovMomentum=True,
rmsprop=True,
hiddenDropout=hiddenDropout,
visibleDropout=visibleDropout,
rbmHiddenDropout=rbmHiddenDropout,
rbmVisibleDropout=rbmVisibleDropout,
weightDecayL1=weightDecayL1,
weightDecayL2=weightDecayL2,
preTrainEpochs= preTrainEpochs)
net.train(trainData[training], trainLabel[training],
maxEpochs=maxEpochs, validation=False)
# Peformance evaluation of the network
probabilities, predicted = net.classify(trainData[testing])
vectorPredicted = labelsToVectors(predicted, 10)
testLabels = trainLabel[testing]
cvAucRoc.append(roc_auc_score(testLabels, probabilities))
[pre, rec, fsc, _] = precision_recall_fscore_support(testLabels,vectorPredicted, average='macro')
cvPrecision.append(pre)
cvRecall.append(rec)
cvFscore.append(fsc)
results = {
'loss': 1 - np.mean(cvAucRoc),
'status': hyperopt.STATUS_OK,
'classifier_precision': {'type': float, 'value': np.mean(cvPrecision)},
'classifier_recall': {'type': float, 'value': np.mean(cvRecall)},
'classifier_fscore': {'type': float, 'value': np.mean(cvFscore)}
}
return results
def main():
import random
print 'FIXING RANDOMNESS'
random.seed(6)
np.random.seed(6)
global runCounter
runCounter = 0
training = args.trainSize
trainVectors, trainLabels =\
readmnist.read(0, training, bTrain=True, path=args.path)
print trainVectors[0].shape
trainVectors, trainLabels = shuffle(trainVectors, trainLabels)
trainingScaledVectors = trainVectors / 255.0
vectorLabels = labelsToVectors(trainLabels, 10)
global trainData
trainData = trainingScaledVectors
global trainLabel
trainLabel = vectorLabels
# Defining a Search Space (stochastic argument-sampling programs)
nrLayersList = [4, 5]
hiddenNeuronsPerLayerList = [500, 750, 1000, 1250]
space = (
hyperopt.hp.choice('nrLayers', nrLayersList),
hyperopt.hp.choice('hiddenNeuronsPerLayer', hiddenNeuronsPerLayerList),
hyperopt.hp.loguniform('unsupervisedLearningRate', np.log(1e-5), np.log(1e-1)),
hyperopt.hp.loguniform('supervisedLearningRate', np.log(1e-5), np.log(1e-1)),
hyperopt.hp.uniform('momentumMax', 0.5, 0.99),
hyperopt.hp.uniform('visibleDropout', 0.5, 0.9)
)
# With the trials object, we can inspect all of the return values that were calculated during the experiment.
trials = hyperopt.Trials()
# Minimize objective function over a hyperparameter space.
# algo : search algorithm
# max_evals : Allow up to this many function evaluations before returning.
# change max_eval for a better search
best = hyperopt.fmin(objective,
space,
algo=hyperopt.tpe.suggest,
# Note: the accuracy of the hyperparameter optimization depends on max_evals
# for best results, set max_eval to a relatively high value (10)
# so that the algorithm can explore the state space
max_evals=10,
trials=trials)
print 'Best Parameters'
best['nrLayers'] = nrLayersList[best['nrLayers']]
best['hiddenNeuronsPerLayer'] = hiddenNeuronsPerLayerList[best['hiddenNeuronsPerLayer']]
print best
for i in range(0, runCounter):
print 'results for trial', i
print trials.results[i]
print 'trial losses'
print trials.losses()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment