dela3499 · November 25, 2017 21:22
diff --git a/keras_improvements.py b/keras_improvements.py
 """
 1. For the fit and evaluate methods, make verbose=2 the default (rather than verbose=1). 
   I ignore the checkmarks produced by verbose=1, and they seem to freeze the Jupyter notebook. Frustrating. 

 2. Save IMDB as pickle upon download, rather than npz. Loading it into memory then takes around 6 seconds rather than 60 seconds. 

 """

 ### Usual imports
 import pandas as pd
 import numpy as np
 import keras
 from keras.layers import Dense
 import matplotlib.pyplot as plt
 %matplotlib inline


 ### Utility functions

 # MNIST loader. Create a single dictionary, rather than 4 variables in a particular order. 
 def load_mnist():
    (training_images, training_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
    return {
        'training_images': training_images,
        'training_labels': training_labels,
        'test_images': test_images,
        'test_labels': test_labels
    }


 def summarize(xs):
    """ Return dataframe with shape, range, and type of values in provided dictionary. 
        Used for dictionary of training/test samples & labels. 
    """
    def summarize_value(x):
        return dict(
            shape = x.shape,
            value_range = (x.min(), x.max()),
            value_type = x.dtype
        )
    return pd.DataFrame({k: summarize_value(v) for k,v in xs.items()})


 def compile_net(hidden_layers, kind):
    """ Infer final layer and loss based on the kind of problem. 
        Given an integer (number of classes), it assumes a multiclass problem (and the associated softmax activation in the last layer)
        Given 'binary', produces proper last layer and loss. 
    """
    net = keras.models.Sequential()
    
    for layer in hidden_layers:
        net.add(layer)
    
    if type(kind) == int: 
        n_classes = kind
        net.add(Dense(n_classes, activation='softmax'))
        loss = 'categorical_crossentropy'
    elif kind == 'binary':
        net.add(Dense(1, activation='sigmoid'))
        loss = "binary_crossentropy"
    else:
        pass
        
    net.compile(
            optimizer = 'rmsprop',
            metrics = ['accuracy'],
            loss = loss
        )
    
    net.summary()
    
    return net


 def fit(net, data, validation_fraction=None, verbose = 2, **kwargs):
    """ Avoid default verbosity, which freezes notebook. Set other defaults too.
        Submit all data as dictionary containing x_train, y_train, x_test, y_test
        Can specify validation_fraction. 
    """
    if validation_fraction == None:
        return net.fit(data['x_train'], data['y_train'], verbose=verbose, **kwargs)
    else:
        n = int(float(len(data['x_train'])) * validation_fraction)
        x_train = data['x_train'][n:]
        y_train = data['y_train'][n:]
        x_validation = data['x_train'][:n]
        y_validation = data['y_train'][:n]
        return net.fit(x_train, y_train, verbose=verbose, validation_data = (x_validation, y_validation),**kwargs)


 def evaluate(net, x, y, verbose = 2, **kwargs):
    "Identitical to usual function, with verbose=2"
    return net.evaluate(x,y, verbose=verbose, **kwargs)
	"""
	1. For the fit and evaluate methods, make verbose=2 the default (rather than verbose=1).
	I ignore the checkmarks produced by verbose=1, and they seem to freeze the Jupyter notebook. Frustrating.

	2. Save IMDB as pickle upon download, rather than npz. Loading it into memory then takes around 6 seconds rather than 60 seconds.

	"""

	### Usual imports
	import pandas as pd
	import numpy as np
	import keras
	from keras.layers import Dense
	import matplotlib.pyplot as plt
	%matplotlib inline


	### Utility functions

	# MNIST loader. Create a single dictionary, rather than 4 variables in a particular order.
	def load_mnist():
	(training_images, training_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
	return {
	'training_images': training_images,
	'training_labels': training_labels,
	'test_images': test_images,
	'test_labels': test_labels
	}


	def summarize(xs):
	""" Return dataframe with shape, range, and type of values in provided dictionary.
	Used for dictionary of training/test samples & labels.
	"""
	def summarize_value(x):
	return dict(
	shape = x.shape,
	value_range = (x.min(), x.max()),
	value_type = x.dtype
	)
	return pd.DataFrame({k: summarize_value(v) for k,v in xs.items()})


	def compile_net(hidden_layers, kind):
	""" Infer final layer and loss based on the kind of problem.
	Given an integer (number of classes), it assumes a multiclass problem (and the associated softmax activation in the last layer)
	Given 'binary', produces proper last layer and loss.
	"""
	net = keras.models.Sequential()

	for layer in hidden_layers:
	net.add(layer)

	if type(kind) == int:
	n_classes = kind
	net.add(Dense(n_classes, activation='softmax'))
	loss = 'categorical_crossentropy'
	elif kind == 'binary':
	net.add(Dense(1, activation='sigmoid'))
	loss = "binary_crossentropy"
	else:
	pass

	net.compile(
	optimizer = 'rmsprop',
	metrics = ['accuracy'],
	loss = loss
	)

	net.summary()

	return net


	def fit(net, data, validation_fraction=None, verbose = 2, **kwargs):
	""" Avoid default verbosity, which freezes notebook. Set other defaults too.
	Submit all data as dictionary containing x_train, y_train, x_test, y_test
	Can specify validation_fraction.
	"""
	if validation_fraction == None:
	return net.fit(data['x_train'], data['y_train'], verbose=verbose, **kwargs)
	else:
	n = int(float(len(data['x_train'])) * validation_fraction)
	x_train = data['x_train'][n:]
	y_train = data['y_train'][n:]
	x_validation = data['x_train'][:n]
	y_validation = data['y_train'][:n]
	return net.fit(x_train, y_train, verbose=verbose, validation_data = (x_validation, y_validation),**kwargs)


	def evaluate(net, x, y, verbose = 2, **kwargs):
	"Identitical to usual function, with verbose=2"
	return net.evaluate(x,y, verbose=verbose, **kwargs)