mihaelacr · August 29, 2015 14:07
diff --git a/hyperopt_exampleMNIST.py b/hyperopt_exampleMNIST.py
 # -*- coding: utf-8 -*-
 """
 This module creates an optimization of hyper-parameters of a DBN using hyperopt library
 Check out the library here: https://github.com/hyperopt/hyperopt).
 example run:
 python hyperopt_exampleMNIST.py --trainSize=10000 --path=...

 REFERENCES
 [1] - Bergstra, James, Dan Yamins, and David D. Cox. "Hyperopt: A python library for optimizing the hyperparameters of machine learning algorithms." (2013).

 [2] - Larochelle, Hugo, et al. "Exploring strategies for training deep neural networks." The Journal of Machine Learning Research 10 (2009): 1-40.

 """

 __credits__ = ["Walter H. L. Pinaya", "Mihaela Rosca"]

 import argparse
 import hyperopt
 import numpy as np
 import theano

 from sklearn import cross_validation
 from sklearn.metrics import roc_auc_score, precision_recall_fscore_support

 from lib import deepbelief as db
 from lib.activationfunctions import Sigmoid
 from lib.common import *

 from read import readmnist


 theanoFloat = theano.config.floatX

 parser = argparse.ArgumentParser(description='digit recognition')
 parser.add_argument('--trainSize', type=int, default=10000,
                                  help='the number of tranining cases to be considered')
 parser.add_argument('--path',dest='path', default="MNIST", help="the path to the MNIST files")
 args = parser.parse_args()


 def objective(hyperparameters):
    """
    Specify an objective function to hyperopt's fmin function minimize

    Arguments
    ----------
    hyperparameters: List of hyperparameters to be optimized

        hyperparameters[0]: the number of layers of the network.
        hyperparameters[1]: the sizes of the individual layers.
        hyperparameters[2]: learning rate for pretraining
        hyperparameters[3]: learning rate for discriminative training
        hyperparameters[4]: the maximum value momentum is allowed to increase to in training
        hyperparameters[5]: the dropout used for the visible layers during discriminative training

    Returns
    -------
    results: A dictionary mapping the DBN performance for the given arguments.
    """
    global runCounter
    runCounter += 1

    print 'run number', runCounter
    print 'running algorithm for', hyperparameters

    nrLayers = hyperparameters[0]
    hiddenNeuronsPerLayer = hyperparameters[1]
    unsupervisedLearningRate = hyperparameters[2]
    supervisedLearningRate = hyperparameters[3]
    momentumMax = hyperparameters[4]
    visibleDropout = hyperparameters[5]

    # Hyperparameters of the DBN which will not be optimized.
    activationFunction = Sigmoid()
    rbmHiddenDropout = 1.0
    rbmVisibleDropout = 1.0
    weightDecayL1 = 0
    weightDecayL2 = 0
    preTrainEpochs = 1
    maxEpochs = 10  # For better results try more epochs for training
    hiddenDropout = 0.5
    hiddenLayers = []

    # Using constant width net as [2]
    for i in range(0, nrLayers - 2):
        hiddenLayers.append(hiddenNeuronsPerLayer)

    dbnLayers = hiddenLayers
    dbnLayers.insert(0, 784)
    dbnLayers.append(10)
    nrFolds = 5
    training = len(trainLabel)
    kf = cross_validation.KFold(training, n_folds=nrFolds)
    cvAucRoc = []
    cvPrecision = []
    cvRecall = []
    cvFscore = []

    for training, testing in kf:
        # Train the net
        net = db.DBN(nrLayers, dbnLayers,
                     binary=False,
                     unsupervisedLearningRate=unsupervisedLearningRate,
                     supervisedLearningRate=supervisedLearningRate,
                     momentumMax=momentumMax,
                     activationFunction=activationFunction,
                     rbmActivationFunctionVisible=activationFunction,
                     rbmActivationFunctionHidden=activationFunction,
                     nesterovMomentum=True,
                     rbmNesterovMomentum=True,
                     rmsprop=True,
                     hiddenDropout=hiddenDropout,
                     visibleDropout=visibleDropout,
                     rbmHiddenDropout=rbmHiddenDropout,
                     rbmVisibleDropout=rbmVisibleDropout,
                     weightDecayL1=weightDecayL1,
                     weightDecayL2=weightDecayL2,
                     preTrainEpochs= preTrainEpochs)
        net.train(trainData[training], trainLabel[training],
                  maxEpochs=maxEpochs, validation=False)

        # Peformance evaluation of the network
        probabilities, predicted = net.classify(trainData[testing])
        vectorPredicted = labelsToVectors(predicted, 10)
        testLabels = trainLabel[testing]

        cvAucRoc.append(roc_auc_score(testLabels, probabilities))
        [pre, rec, fsc, _] = precision_recall_fscore_support(testLabels,vectorPredicted, average='macro')
        cvPrecision.append(pre)
        cvRecall.append(rec)
        cvFscore.append(fsc)

    results = {
            'loss': 1 - np.mean(cvAucRoc),
            'status': hyperopt.STATUS_OK,
            'classifier_precision': {'type': float, 'value': np.mean(cvPrecision)},
            'classifier_recall': {'type': float, 'value': np.mean(cvRecall)},
            'classifier_fscore': {'type': float, 'value': np.mean(cvFscore)}
            }
    return results

 def main():
    import random
    print 'FIXING RANDOMNESS'
    random.seed(6)
    np.random.seed(6)

    global runCounter
    runCounter = 0

    training = args.trainSize
    trainVectors, trainLabels =\
        readmnist.read(0, training, bTrain=True, path=args.path)
    print trainVectors[0].shape

    trainVectors, trainLabels = shuffle(trainVectors, trainLabels)
    trainingScaledVectors = trainVectors / 255.0
    vectorLabels = labelsToVectors(trainLabels, 10)

    global trainData
    trainData = trainingScaledVectors
    global trainLabel
    trainLabel = vectorLabels

    # Defining a Search Space (stochastic argument-sampling programs)
    nrLayersList = [4, 5]
    hiddenNeuronsPerLayerList = [500, 750, 1000, 1250]

    space = (
        hyperopt.hp.choice('nrLayers', nrLayersList),
        hyperopt.hp.choice('hiddenNeuronsPerLayer', hiddenNeuronsPerLayerList),
        hyperopt.hp.loguniform('unsupervisedLearningRate', np.log(1e-5), np.log(1e-1)),
        hyperopt.hp.loguniform('supervisedLearningRate', np.log(1e-5), np.log(1e-1)),
        hyperopt.hp.uniform('momentumMax', 0.5, 0.99),
        hyperopt.hp.uniform('visibleDropout', 0.5, 0.9)
    )

    # With the trials object, we can inspect all of the return values that were calculated during the experiment.
    trials = hyperopt.Trials()

    # Minimize objective function over a hyperparameter space.
    #   algo : search algorithm
    #   max_evals : Allow up to this many function evaluations before returning.
    # change max_eval for a better search
    best = hyperopt.fmin(objective,
                space,
                algo=hyperopt.tpe.suggest,
                # Note: the accuracy of the hyperparameter optimization depends on max_evals
                # for best results, set max_eval to a relatively high value (10)
                # so that the algorithm can explore the state space
                max_evals=10,
                trials=trials)
    print 'Best Parameters'
    best['nrLayers'] = nrLayersList[best['nrLayers']]
    best['hiddenNeuronsPerLayer'] = hiddenNeuronsPerLayerList[best['hiddenNeuronsPerLayer']]
    print best

    for i in range(0, runCounter):
        print 'results for trial', i
        print trials.results[i]

    print 'trial losses'
    print trials.losses()

 if __name__ == '__main__':
    main()
	# -- coding: utf-8 --
	"""
	This module creates an optimization of hyper-parameters of a DBN using hyperopt library
	Check out the library here: https://github.com/hyperopt/hyperopt).
	example run:
	python hyperopt_exampleMNIST.py --trainSize=10000 --path=...

	REFERENCES
	[1] - Bergstra, James, Dan Yamins, and David D. Cox. "Hyperopt: A python library for optimizing the hyperparameters of machine learning algorithms." (2013).

	[2] - Larochelle, Hugo, et al. "Exploring strategies for training deep neural networks." The Journal of Machine Learning Research 10 (2009): 1-40.

	"""

	__credits__ = ["Walter H. L. Pinaya", "Mihaela Rosca"]

	import argparse
	import hyperopt
	import numpy as np
	import theano

	from sklearn import cross_validation
	from sklearn.metrics import roc_auc_score, precision_recall_fscore_support

	from lib import deepbelief as db
	from lib.activationfunctions import Sigmoid
	from lib.common import *

	from read import readmnist


	theanoFloat = theano.config.floatX

	parser = argparse.ArgumentParser(description='digit recognition')
	parser.add_argument('--trainSize', type=int, default=10000,
	help='the number of tranining cases to be considered')
	parser.add_argument('--path',dest='path', default="MNIST", help="the path to the MNIST files")
	args = parser.parse_args()


	def objective(hyperparameters):
	"""
	Specify an objective function to hyperopt's fmin function minimize

	Arguments
	----------
	hyperparameters: List of hyperparameters to be optimized

	hyperparameters[0]: the number of layers of the network.
	hyperparameters[1]: the sizes of the individual layers.
	hyperparameters[2]: learning rate for pretraining
	hyperparameters[3]: learning rate for discriminative training
	hyperparameters[4]: the maximum value momentum is allowed to increase to in training
	hyperparameters[5]: the dropout used for the visible layers during discriminative training

	Returns
	-------
	results: A dictionary mapping the DBN performance for the given arguments.
	"""
	global runCounter
	runCounter += 1

	print 'run number', runCounter
	print 'running algorithm for', hyperparameters

	nrLayers = hyperparameters[0]
	hiddenNeuronsPerLayer = hyperparameters[1]
	unsupervisedLearningRate = hyperparameters[2]
	supervisedLearningRate = hyperparameters[3]
	momentumMax = hyperparameters[4]
	visibleDropout = hyperparameters[5]

	# Hyperparameters of the DBN which will not be optimized.
	activationFunction = Sigmoid()
	rbmHiddenDropout = 1.0
	rbmVisibleDropout = 1.0
	weightDecayL1 = 0
	weightDecayL2 = 0
	preTrainEpochs = 1
	maxEpochs = 10 # For better results try more epochs for training
	hiddenDropout = 0.5
	hiddenLayers = []

	# Using constant width net as [2]
	for i in range(0, nrLayers - 2):
	hiddenLayers.append(hiddenNeuronsPerLayer)

	dbnLayers = hiddenLayers
	dbnLayers.insert(0, 784)
	dbnLayers.append(10)
	nrFolds = 5
	training = len(trainLabel)
	kf = cross_validation.KFold(training, n_folds=nrFolds)
	cvAucRoc = []
	cvPrecision = []
	cvRecall = []
	cvFscore = []

	for training, testing in kf:
	# Train the net
	net = db.DBN(nrLayers, dbnLayers,
	binary=False,
	unsupervisedLearningRate=unsupervisedLearningRate,
	supervisedLearningRate=supervisedLearningRate,
	momentumMax=momentumMax,
	activationFunction=activationFunction,
	rbmActivationFunctionVisible=activationFunction,
	rbmActivationFunctionHidden=activationFunction,
	nesterovMomentum=True,
	rbmNesterovMomentum=True,
	rmsprop=True,
	hiddenDropout=hiddenDropout,
	visibleDropout=visibleDropout,
	rbmHiddenDropout=rbmHiddenDropout,
	rbmVisibleDropout=rbmVisibleDropout,
	weightDecayL1=weightDecayL1,
	weightDecayL2=weightDecayL2,
	preTrainEpochs= preTrainEpochs)
	net.train(trainData[training], trainLabel[training],
	maxEpochs=maxEpochs, validation=False)

	# Peformance evaluation of the network
	probabilities, predicted = net.classify(trainData[testing])
	vectorPredicted = labelsToVectors(predicted, 10)
	testLabels = trainLabel[testing]

	cvAucRoc.append(roc_auc_score(testLabels, probabilities))
	[pre, rec, fsc, _] = precision_recall_fscore_support(testLabels,vectorPredicted, average='macro')
	cvPrecision.append(pre)
	cvRecall.append(rec)
	cvFscore.append(fsc)

	results = {
	'loss': 1 - np.mean(cvAucRoc),
	'status': hyperopt.STATUS_OK,
	'classifier_precision': {'type': float, 'value': np.mean(cvPrecision)},
	'classifier_recall': {'type': float, 'value': np.mean(cvRecall)},
	'classifier_fscore': {'type': float, 'value': np.mean(cvFscore)}
	}
	return results

	def main():
	import random
	print 'FIXING RANDOMNESS'
	random.seed(6)
	np.random.seed(6)

	global runCounter
	runCounter = 0

	training = args.trainSize
	trainVectors, trainLabels =\
	readmnist.read(0, training, bTrain=True, path=args.path)
	print trainVectors[0].shape

	trainVectors, trainLabels = shuffle(trainVectors, trainLabels)
	trainingScaledVectors = trainVectors / 255.0
	vectorLabels = labelsToVectors(trainLabels, 10)

	global trainData
	trainData = trainingScaledVectors
	global trainLabel
	trainLabel = vectorLabels

	# Defining a Search Space (stochastic argument-sampling programs)
	nrLayersList = [4, 5]
	hiddenNeuronsPerLayerList = [500, 750, 1000, 1250]

	space = (
	hyperopt.hp.choice('nrLayers', nrLayersList),
	hyperopt.hp.choice('hiddenNeuronsPerLayer', hiddenNeuronsPerLayerList),
	hyperopt.hp.loguniform('unsupervisedLearningRate', np.log(1e-5), np.log(1e-1)),
	hyperopt.hp.loguniform('supervisedLearningRate', np.log(1e-5), np.log(1e-1)),
	hyperopt.hp.uniform('momentumMax', 0.5, 0.99),
	hyperopt.hp.uniform('visibleDropout', 0.5, 0.9)
	)

	# With the trials object, we can inspect all of the return values that were calculated during the experiment.
	trials = hyperopt.Trials()

	# Minimize objective function over a hyperparameter space.
	# algo : search algorithm
	# max_evals : Allow up to this many function evaluations before returning.
	# change max_eval for a better search
	best = hyperopt.fmin(objective,
	space,
	algo=hyperopt.tpe.suggest,
	# Note: the accuracy of the hyperparameter optimization depends on max_evals
	# for best results, set max_eval to a relatively high value (10)
	# so that the algorithm can explore the state space
	max_evals=10,
	trials=trials)
	print 'Best Parameters'
	best['nrLayers'] = nrLayersList[best['nrLayers']]
	best['hiddenNeuronsPerLayer'] = hiddenNeuronsPerLayerList[best['hiddenNeuronsPerLayer']]
	print best

	for i in range(0, runCounter):
	print 'results for trial', i
	print trials.results[i]

	print 'trial losses'
	print trials.losses()

	if __name__ == '__main__':
	main()