jad2192 · February 6, 2018 16:36
diff --git a/N_tuple.py b/N_tuple.py
 import numpy as np

 class N_Tuple_Classifier_fast(object):
    
    def __init__(self, pixel_percentage=0.1, num_tuples=100, pixel_tolerance=0.3, warm_start=None):
        ''' pixel_percentage: The percentage of pixels on to which we'll 
                              For example the default 0.1 randomly choose 78 pixels.
                       
            num_tuples: Number of unique tuples on to which project.
            
            pixel_tolerance: The classifier requires binary 0-1 pixel values,
                             set this as the pixel intensity threshold below which
                             we set a pixel to zero.
                             
            warm_start: Pre-set tuple matrix and frequency table if you want to start with
                        a pre-trained model.''' 
        
        self.M = num_tuples
        self.pt = pixel_tolerance
        self.class_ind = {}
        
        if warm_start is None:
            
            self.tuples = np.zeros((num_tuples,784), dtype='i8')
            
            for m in range(self.M):
                
                self.tuples[m][np.random.choice(np.arange(784),
                                                size=int(784*pixel_percentage),
                                                replace=False)] = np.ones(int(784*pixel_percentage), dtype='i8')
            self.projs = None
        
        else:
            
            self.tuples = warm_start[0]
            self.freq_table = warm_start[1]
        
    def fit(self, data):
        '''Assume the data is an array of size n_samples x 785, where first column is the label
           and next 784 columns are the pixel values.'''
        
        start_t = d_timer()
        labs = data[:,0]
            
        for k in range(10):
                
            self.class_ind[k] = np.where(labs == k)
                
        # Transforming image to a binary vector by cutting of pixel intensities which are
        # below pixel_tolerance.
        
        data_bin = np.asanyarray((data[:,1:] / 256) > self.pt, dtype='i8')
        
        # Will project all the data on all dimensions simultaneously using tensor ops.
        
        data_tensor = np.ones((self.M, data_bin.shape[0], data_bin.shape[1]), dtype='i8')
        
        data_tensor = np.einsum('ij,kij->kij', data_bin, data_tensor, dtype='i8')
        
        # The following will result in a tensor T of shape (M,N,785) =  (num_tup, num_samp, 785)
        # where T[m,n,:] is the projection of the n-th data sample onto the m-th tuple set.
        
        self.projs = np.einsum('ij,ikj->ikj', self.tuples, data_tensor, dtype='i8')
        print('Model fit, time spent: ', d_timer() - start_t, ' s')
        
                     
    def predict(self, X):
        
        s = d_timer()
        X_bin = np.asanyarray(X / 256 > self.pt, dtype='i8')
        X_proj = np.einsum('i,ji->ji', X_bin, self.tuples, dtype='i8')
        proj_test = np.einsum('ij,ikj->ikj', X_proj, self.projs, dtype='i8')
        ext = np.ones(proj_test.shape, dtype='i8') 
        ext = np.einsum('ij,ikj->ikj', X_proj, ext, dtype='i8')
        
        ''' The following tensor holds tell us:
            Given the N-th test sample comb[N,m,j] is zero 
            iff the N-th test input has same projection on the m-th tuple
            as the j-th training sample. We can use this to indirecty compute the
            counts and hence make a prediction'''
        
        comb = np.asanyarray((proj_test + ext) == 2, dtype='i8').sum(axis=-1)
        prob = np.zeros(10)
        
        for k in range(10):
            
            cur_ix = self.class_ind[k]
            cur_comb = comb[:,cur_ix]
            prob[k] = cur_comb.sum() / len(cur_ix)
        print('Prediction took: ', d_timer() - s, ' s')   
        return prob.argmax()
	import numpy as np

	class N_Tuple_Classifier_fast(object):

	def __init__(self, pixel_percentage=0.1, num_tuples=100, pixel_tolerance=0.3, warm_start=None):
	''' pixel_percentage: The percentage of pixels on to which we'll
	For example the default 0.1 randomly choose 78 pixels.

	num_tuples: Number of unique tuples on to which project.

	pixel_tolerance: The classifier requires binary 0-1 pixel values,
	set this as the pixel intensity threshold below which
	we set a pixel to zero.

	warm_start: Pre-set tuple matrix and frequency table if you want to start with
	a pre-trained model.'''

	self.M = num_tuples
	self.pt = pixel_tolerance
	self.class_ind = {}

	if warm_start is None:

	self.tuples = np.zeros((num_tuples,784), dtype='i8')

	for m in range(self.M):

	self.tuples[m][np.random.choice(np.arange(784),
	size=int(784*pixel_percentage),
	replace=False)] = np.ones(int(784*pixel_percentage), dtype='i8')
	self.projs = None

	else:

	self.tuples = warm_start[0]
	self.freq_table = warm_start[1]

	def fit(self, data):
	'''Assume the data is an array of size n_samples x 785, where first column is the label
	and next 784 columns are the pixel values.'''

	start_t = d_timer()
	labs = data[:,0]

	for k in range(10):

	self.class_ind[k] = np.where(labs == k)

	# Transforming image to a binary vector by cutting of pixel intensities which are
	# below pixel_tolerance.

	data_bin = np.asanyarray((data[:,1:] / 256) > self.pt, dtype='i8')

	# Will project all the data on all dimensions simultaneously using tensor ops.

	data_tensor = np.ones((self.M, data_bin.shape[0], data_bin.shape[1]), dtype='i8')

	data_tensor = np.einsum('ij,kij->kij', data_bin, data_tensor, dtype='i8')

	# The following will result in a tensor T of shape (M,N,785) = (num_tup, num_samp, 785)
	# where T[m,n,:] is the projection of the n-th data sample onto the m-th tuple set.

	self.projs = np.einsum('ij,ikj->ikj', self.tuples, data_tensor, dtype='i8')
	print('Model fit, time spent: ', d_timer() - start_t, ' s')


	def predict(self, X):

	s = d_timer()
	X_bin = np.asanyarray(X / 256 > self.pt, dtype='i8')
	X_proj = np.einsum('i,ji->ji', X_bin, self.tuples, dtype='i8')
	proj_test = np.einsum('ij,ikj->ikj', X_proj, self.projs, dtype='i8')
	ext = np.ones(proj_test.shape, dtype='i8')
	ext = np.einsum('ij,ikj->ikj', X_proj, ext, dtype='i8')

	''' The following tensor holds tell us:
	Given the N-th test sample comb[N,m,j] is zero
	iff the N-th test input has same projection on the m-th tuple
	as the j-th training sample. We can use this to indirecty compute the
	counts and hence make a prediction'''

	comb = np.asanyarray((proj_test + ext) == 2, dtype='i8').sum(axis=-1)
	prob = np.zeros(10)

	for k in range(10):

	cur_ix = self.class_ind[k]
	cur_comb = comb[:,cur_ix]
	prob[k] = cur_comb.sum() / len(cur_ix)
	print('Prediction took: ', d_timer() - s, ' s')
	return prob.argmax()