Last active
September 10, 2015 14:27
-
-
Save fmder/cb0117b539e79bd9f147 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lasagne.layers import * | |
from lasagne.nonlinearities import sigmoid | |
import lasagne | |
import numpy | |
import theano | |
import theano.tensor as T | |
def build_network(input_shape, input_var, num_units): | |
nnet = dict() | |
nnet["input"] = InputLayer(input_shape, input_var, name="input") | |
batchsize, seqlen, num_words = nnet["input"].input_var.shape | |
# Introduce some noise | |
# nnet["noise"] = DropoutLayer(nnet["input"], p=0.5, name="dropout") | |
# Process data forward and backward | |
nnet["mask"] = InputLayer(shape=(batchsize, seqlen), name="mask") | |
nnet["forward_lstm"] = LSTMLayer(nnet["input"], num_units=num_units, mask_input=nnet["mask"], | |
name="forward_lstm") | |
nnet["backward_lstm"] = LSTMLayer(nnet["input"], num_units=num_units, mask_input=nnet["mask"], | |
backwards=True, name="backward_lstm") | |
# Check only the last output | |
nnet["forward_slice"] = SliceLayer(nnet["forward_lstm"], -1, 1, name="forward_slice") | |
nnet["backward_slice"] = SliceLayer(nnet["backward_lstm"], 0, 1, name="backward_slice") | |
# Concatenate the output of the forward and backward networks | |
nnet["concat"] = ConcatLayer([nnet["forward_slice"], nnet["backward_slice"]], name="concat") | |
nnet["output"] = DenseLayer(nnet["concat"], num_units=1, nonlinearity=sigmoid, name="output") | |
return nnet | |
# Make and shuffle an easy dataset predict 1 when ones and 0 when zeros | |
indices = numpy.arange(150) | |
numpy.random.shuffle(indices) | |
inputs = numpy.concatenate((numpy.ones((75, 1024, 350)), numpy.zeros((75, 1024, 350))))[indices] | |
targets = numpy.concatenate((numpy.ones((75, 1), dtype=numpy.int32), numpy.zeros((75, 1), dtype=numpy.int32)))[indices] | |
masks = numpy.ones((150, 1024)) | |
def iterate_minibatch(): | |
for i in range(3): | |
# Generate some data (random for minimum working example purpose) | |
yield inputs[i*50:(i+1)*50], targets[i*50:(i+1)*50], masks[i*50:(i+1)*50] | |
def main(num_units): | |
input_var = T.tensor3("inputs") | |
input_shape = (None, 1024, 350) | |
target_var = T.imatrix("targets") | |
network = build_network(input_shape, input_var, num_units) | |
TOL = 1e-6 | |
prediction = lasagne.layers.get_output(network["output"]) | |
pred_clipped = T.clip(prediction, TOL, 1.0 - TOL) | |
loss = lasagne.objectives.binary_crossentropy(pred_clipped, target_var) | |
loss = loss.mean() | |
acc = T.mean(T.eq(T.round(prediction), target_var), dtype=theano.config.floatX) | |
params = get_all_params(network["output"], trainable=True) | |
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.1, momentum=0.5) | |
mask_var = network["mask"].input_var | |
train_fn = theano.function([input_var, target_var, mask_var], [loss, acc], updates=updates) | |
num_epochs = 50 | |
for epoch in range(num_epochs): | |
# In each epoch, we do a full pass over the training data: | |
train_err = 0 | |
train_acc = 0 | |
train_batches = 0 | |
for inputs, targets, masks in iterate_minibatch(): | |
err, acc = train_fn(inputs, targets, masks) | |
train_err += err | |
train_acc += acc | |
train_batches += 1 | |
print("Epoch {} of {}".format(epoch + 1, num_epochs)) | |
print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) | |
print(" training accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100)) | |
if __name__ == "__main__": | |
main(num_units=25) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment