""" Deep Auto-Encoder implementation
	
	An auto-encoder works as follows:

	Data of dimension k is reduced to a lower dimension j using a matrix multiplication:
	softmax(W*x + b)  = x'
	
	where W is matrix from R^k --> R^j

	A reconstruction matrix W' maps back from R^j --> R^k

	so our reconstruction function is softmax'(W' * x' + b') 

	Now the point of the auto-encoder is to create a reduction matrix (values for W, b) 
	that is "good" at reconstructing  the original data. 

	Thus we want to minimize  ||softmax'(W' * (softmax(W *x+ b)) + b')  - x||

	A deep auto-encoder is nothing more than stacking successive layers of these reductions.
"""
import tensorflow as tf
import numpy as np
import math
import random

def create(x, layer_sizes):

	# Build the encoding layers
	next_layer_input = x

	encoding_matrices = []
	for dim in layer_sizes:
		input_dim = int(next_layer_input.get_shape()[1])

		# Initialize W using random values in interval [-1/sqrt(n) , 1/sqrt(n)]
		W = tf.Variable(tf.random_uniform([input_dim, dim], -1.0 / math.sqrt(input_dim), 1.0 / math.sqrt(input_dim)))

		# Initialize b to zero
		b = tf.Variable(tf.zeros([dim]))

		# We are going to use tied-weights so store the W matrix for later reference.
		encoding_matrices.append(W)

		output = tf.nn.tanh(tf.matmul(next_layer_input,W) + b)

		# the input into the next layer is the output of this layer
		next_layer_input = output

	# The fully encoded x value is now stored in the next_layer_input
	encoded_x = next_layer_input

	# build the reconstruction layers by reversing the reductions
	layer_sizes.reverse()
	encoding_matrices.reverse()


	for i, dim in enumerate(layer_sizes[1:] + [ int(x.get_shape()[1])]) :
		# we are using tied weights, so just lookup the encoding matrix for this step and transpose it
		W = tf.transpose(encoding_matrices[i])
		b = tf.Variable(tf.zeros([dim]))
		output = tf.nn.tanh(tf.matmul(next_layer_input,W) + b)
		next_layer_input = output

	# the fully encoded and reconstructed value of x is here:
	reconstructed_x = next_layer_input

	return {
		'encoded': encoded_x,
		'decoded': reconstructed_x,
		'cost' : tf.sqrt(tf.reduce_mean(tf.square(x-reconstructed_x)))
	}

def simple_test():
	sess = tf.Session()
	x = tf.placeholder("float", [None, 4])
	autoencoder = create(x, [2])
	init = tf.initialize_all_variables()
	sess.run(init)
	train_step = tf.train.GradientDescentOptimizer(0.01).minimize(autoencoder['cost'])


	# Our dataset consists of two centers with gaussian noise w/ sigma = 0.1
	c1 = np.array([0,0,0.5,0])
	c2 = np.array([0.5,0,0,0])

	# do 1000 training steps
	for i in range(2000):
		# make a batch of 100:
		batch = []
		for j in range(100):
			# pick a random centroid
			if (random.random() > 0.5):
				vec = c1
			else:
				vec = c2
  			batch.append(np.random.normal(vec, 0.1))
		sess.run(train_step, feed_dict={x: np.array(batch)})
		if i % 100 == 0:
			print i, " cost", sess.run(autoencoder['cost'], feed_dict={x: batch})


def deep_test():
	sess = tf.Session()
	start_dim = 5
	x = tf.placeholder("float", [None, start_dim])
	autoencoder = create(x, [4, 3, 2])
	init = tf.initialize_all_variables()
	sess.run(init)
	train_step = tf.train.GradientDescentOptimizer(0.5).minimize(autoencoder['cost'])


	# Our dataset consists of two centers with gaussian noise w/ sigma = 0.1
	c1 = np.zeros(start_dim)
	c1[0] = 1

	print c1

	c2 = np.zeros(start_dim)
	c2[1] = 1

	# do 1000 training steps
	for i in range(5000):
		# make a batch of 100:
		batch = []
		for j in range(1):
			# pick a random centroid
			if (random.random() > 0.5):
				vec = c1
			else:
				vec = c2
  			batch.append(np.random.normal(vec, 0.1))
		sess.run(train_step, feed_dict={x: np.array(batch)})
		if i % 100 == 0:
			print i, " cost", sess.run(autoencoder['cost'], feed_dict={x: batch})
			print i, " original", batch[0]
			print i, " decoded", sess.run(autoencoder['decoded'], feed_dict={x: batch})
			
if __name__ == '__main__':
	deep_test()