Last active
December 25, 2020 03:11
-
-
Save sbugrov/7f373f0e4788f8e076b8efa2abfd227a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// nn.cpp | |
// | |
// To compile: g++ -o nn nn.cpp -std=c++11 | |
// To run: ./nn | |
// Created by Sergei Bugrov on 4/20/18. | |
// Copyright © 2017 Sergei Bugrov. All rights reserved. | |
// Download dataset from: https://drive.google.com/file/d/1OdtwXHf_-2T0aS9HLBnxU3o-72mklCZY/view?usp=sharing | |
#include <iostream> | |
#include <vector> | |
#include <math.h> | |
#include <fstream> | |
#include <sstream> | |
#include <string> | |
#include <random> | |
using namespace std; | |
void print ( const vector <float>& m, int n_rows, int n_columns ) { | |
/* "Couts" the input vector as n_rows x n_columns matrix. | |
Inputs: | |
m: vector, matrix of size n_rows x n_columns | |
n_rows: int, number of rows in the left matrix m1 | |
n_columns: int, number of columns in the left matrix m1 | |
*/ | |
for( int i = 0; i != n_rows; ++i ) { | |
for( int j = 0; j != n_columns; ++j ) { | |
cout << m[ i * n_columns + j ] << " "; | |
} | |
cout << '\n'; | |
} | |
cout << endl; | |
} | |
int argmax ( const vector <float>& m ) { | |
return distance(m.begin(), max_element(m.begin(), m.end())); | |
} | |
vector <float> relu(const vector <float>& z){ | |
int size = z.size(); | |
vector <float> output; | |
for( int i = 0; i < size; ++i ) { | |
if (z[i] < 0){ | |
output.push_back(0.0); | |
} | |
else output.push_back(z[i]); | |
} | |
return output; | |
} | |
vector <float> reluPrime (const vector <float>& z) { | |
int size = z.size(); | |
vector <float> output; | |
for( int i = 0; i < size; ++i ) { | |
if (z[i] <= 0){ | |
output.push_back(0.0); | |
} | |
else output.push_back(1.0); | |
} | |
return output; | |
} | |
static vector<float> random_vector(const int size) | |
{ | |
random_device rd; | |
mt19937 gen(rd()); | |
uniform_real_distribution<> distribution(0.0, 0.05); | |
static default_random_engine generator; | |
vector<float> data(size); | |
generate(data.begin(), data.end(), [&]() { return distribution(generator); }); | |
return data; | |
} | |
vector <float> softmax (const vector <float>& z, const int dim) { | |
const int zsize = static_cast<int>(z.size()); | |
vector <float> out; | |
for (unsigned i = 0; i != zsize; i += dim) { | |
vector <float> foo; | |
for (unsigned j = 0; j != dim; ++j) { | |
foo.push_back(z[i + j]); | |
} | |
float max_foo = *max_element(foo.begin(), foo.end()); | |
for (unsigned j = 0; j != dim; ++j) { | |
foo[j] = exp(foo[j] - max_foo); | |
} | |
float sum_of_elems = 0.0; | |
for (unsigned j = 0; j != dim; ++j) { | |
sum_of_elems = sum_of_elems + foo[j]; | |
} | |
for (unsigned j = 0; j != dim; ++j) { | |
out.push_back(foo[j]/sum_of_elems); | |
} | |
} | |
return out; | |
} | |
vector <float> sigmoid_d (const vector <float>& m1) { | |
/* Returns the value of the sigmoid function derivative f'(x) = f(x)(1 - f(x)), | |
where f(x) is sigmoid function. | |
Input: m1, a vector. | |
Output: x(1 - x) for every element of the input matrix m1. | |
*/ | |
const unsigned long VECTOR_SIZE = m1.size(); | |
vector <float> output (VECTOR_SIZE); | |
for( unsigned i = 0; i != VECTOR_SIZE; ++i ) { | |
output[ i ] = m1[ i ] * (1 - m1[ i ]); | |
} | |
return output; | |
} | |
vector <float> sigmoid (const vector <float>& m1) { | |
/* Returns the value of the sigmoid function f(x) = 1/(1 + e^-x). | |
Input: m1, a vector. | |
Output: 1/(1 + e^-x) for every element of the input matrix m1. | |
*/ | |
const unsigned long VECTOR_SIZE = m1.size(); | |
vector <float> output (VECTOR_SIZE); | |
for( unsigned i = 0; i != VECTOR_SIZE; ++i ) { | |
output[ i ] = 1 / (1 + exp(-m1[ i ])); | |
} | |
return output; | |
} | |
vector <float> operator+(const vector <float>& m1, const vector <float>& m2){ | |
/* Returns the elementwise sum of two vectors. | |
Inputs: | |
m1: a vector | |
m2: a vector | |
Output: a vector, sum of the vectors m1 and m2. | |
*/ | |
const unsigned long VECTOR_SIZE = m1.size(); | |
vector <float> sum (VECTOR_SIZE); | |
for (unsigned i = 0; i != VECTOR_SIZE; ++i){ | |
sum[i] = m1[i] + m2[i]; | |
}; | |
return sum; | |
} | |
vector <float> operator-(const vector <float>& m1, const vector <float>& m2){ | |
/* Returns the difference between two vectors. | |
Inputs: | |
m1: vector | |
m2: vector | |
Output: vector, m1 - m2, difference between two vectors m1 and m2. | |
*/ | |
const unsigned long VECTOR_SIZE = m1.size(); | |
vector <float> difference (VECTOR_SIZE); | |
for (unsigned i = 0; i != VECTOR_SIZE; ++i){ | |
difference[i] = m1[i] - m2[i]; | |
}; | |
return difference; | |
} | |
vector <float> operator*(const vector <float>& m1, const vector <float>& m2){ | |
/* Returns the product of two vectors (elementwise multiplication). | |
Inputs: | |
m1: vector | |
m2: vector | |
Output: vector, m1 * m2, product of two vectors m1 and m2 | |
*/ | |
const unsigned long VECTOR_SIZE = m1.size(); | |
vector <float> product (VECTOR_SIZE); | |
for (unsigned i = 0; i != VECTOR_SIZE; ++i){ | |
product[i] = m1[i] * m2[i]; | |
}; | |
return product; | |
} | |
vector <float> operator*(const float m1, const vector <float>& m2){ | |
/* Returns the product of a float and a vectors (elementwise multiplication). | |
Inputs: | |
m1: float | |
m2: vector | |
Output: vector, m1 * m2, product of two vectors m1 and m2 | |
*/ | |
const unsigned long VECTOR_SIZE = m2.size(); | |
vector <float> product (VECTOR_SIZE); | |
for (unsigned i = 0; i != VECTOR_SIZE; ++i){ | |
product[i] = m1 * m2[i]; | |
}; | |
return product; | |
} | |
vector <float> operator/(const vector <float>& m2, const float m1){ | |
/* Returns the product of a float and a vectors (elementwise multiplication). | |
Inputs: | |
m1: float | |
m2: vector | |
Output: vector, m1 * m2, product of two vectors m1 and m2 | |
*/ | |
const unsigned long VECTOR_SIZE = m2.size(); | |
vector <float> product (VECTOR_SIZE); | |
for (unsigned i = 0; i != VECTOR_SIZE; ++i){ | |
product[i] = m2[i] / m1; | |
}; | |
return product; | |
} | |
vector <float> transpose (float *m, const int C, const int R) { | |
/* Returns a transpose matrix of input matrix. | |
Inputs: | |
m: vector, input matrix | |
C: int, number of columns in the input matrix | |
R: int, number of rows in the input matrix | |
Output: vector, transpose matrix mT of input matrix m | |
*/ | |
vector <float> mT (C*R); | |
for(unsigned n = 0; n != C*R; n++) { | |
unsigned i = n/C; | |
unsigned j = n%C; | |
mT[n] = m[R*j + i]; | |
} | |
return mT; | |
} | |
vector <float> dot (const vector <float>& m1, const vector <float>& m2, const int m1_rows, const int m1_columns, const int m2_columns) { | |
/* Returns the product of two matrices: m1 x m2. | |
Inputs: | |
m1: vector, left matrix of size m1_rows x m1_columns | |
m2: vector, right matrix of size m1_columns x m2_columns (the number of rows in the right matrix | |
must be equal to the number of the columns in the left one) | |
m1_rows: int, number of rows in the left matrix m1 | |
m1_columns: int, number of columns in the left matrix m1 | |
m2_columns: int, number of columns in the right matrix m2 | |
Output: vector, m1 * m2, product of two vectors m1 and m2, a matrix of size m1_rows x m2_columns | |
*/ | |
vector <float> output (m1_rows*m2_columns); | |
for( int row = 0; row != m1_rows; ++row ) { | |
for( int col = 0; col != m2_columns; ++col ) { | |
output[ row * m2_columns + col ] = 0.f; | |
for( int k = 0; k != m1_columns; ++k ) { | |
output[ row * m2_columns + col ] += m1[ row * m1_columns + k ] * m2[ k * m2_columns + col ]; | |
} | |
} | |
} | |
return output; | |
} | |
vector<string> split(const string &s, char delim) { | |
stringstream ss(s); | |
string item; | |
vector<string> tokens; | |
while (getline(ss, item, delim)) { | |
tokens.push_back(item); | |
} | |
return tokens; | |
} | |
int main(int argc, const char * argv[]) { | |
string line; | |
vector<string> line_v; | |
cout << "Loading data ...\n"; | |
vector<float> X_train; | |
vector<float> y_train; | |
ifstream myfile ("train.txt"); | |
if (myfile.is_open()) | |
{ | |
while ( getline (myfile,line) ) | |
{ | |
line_v = split(line, '\t'); | |
int digit = strtof((line_v[0]).c_str(),0); | |
for (unsigned i = 0; i < 10; ++i) { | |
if (i == digit) | |
{ | |
y_train.push_back(1.); | |
} | |
else y_train.push_back(0.); | |
} | |
int size = static_cast<int>(line_v.size()); | |
for (unsigned i = 1; i < size; ++i) { | |
X_train.push_back(strtof((line_v[i]).c_str(),0)); | |
} | |
} | |
X_train = X_train/255.0; | |
myfile.close(); | |
} | |
else cout << "Unable to open file" << '\n'; | |
int xsize = static_cast<int>(X_train.size()); | |
int ysize = static_cast<int>(y_train.size()); | |
// Some hyperparameters for the NN | |
int BATCH_SIZE = 256; | |
float lr = .01/BATCH_SIZE; | |
// Random initialization of the weights | |
vector <float> W1 = random_vector(784*128); | |
vector <float> W2 = random_vector(128*64); | |
vector <float> W3 = random_vector(64*10); | |
cout << "Training the model ...\n"; | |
for (unsigned i = 0; i < 10000; ++i) { | |
// Building batches of input variables (X) and labels (y) | |
int randindx = rand() % (42000-BATCH_SIZE); | |
vector<float> b_X; | |
vector<float> b_y; | |
for (unsigned j = randindx*784; j < (randindx+BATCH_SIZE)*784; ++j){ | |
b_X.push_back(X_train[j]); | |
} | |
for (unsigned k = randindx*10; k < (randindx+BATCH_SIZE)*10; ++k){ | |
b_y.push_back(y_train[k]); | |
} | |
// Feed forward | |
vector<float> a1 = relu(dot( b_X, W1, BATCH_SIZE, 784, 128 )); | |
vector<float> a2 = relu(dot( a1, W2, BATCH_SIZE, 128, 64 )); | |
vector<float> yhat = softmax(dot( a2, W3, BATCH_SIZE, 64, 10 ), 10); | |
// Back propagation | |
vector<float> dyhat = (yhat - b_y); | |
// dW3 = a2.T * dyhat | |
vector<float> dW3 = dot(transpose( &a2[0], BATCH_SIZE, 64 ), dyhat, 64, BATCH_SIZE, 10); | |
// dz2 = dyhat * W3.T * relu'(a2) | |
vector<float> dz2 = dot(dyhat, transpose( &W3[0], 64, 10 ), BATCH_SIZE, 10, 64) * reluPrime(a2); | |
// dW2 = a1.T * dz2 | |
vector<float> dW2 = dot(transpose( &a1[0], BATCH_SIZE, 128 ), dz2, 128, BATCH_SIZE, 64); | |
// dz1 = dz2 * W2.T * relu'(a1) | |
vector<float> dz1 = dot(dz2, transpose( &W2[0], 128, 64 ), BATCH_SIZE, 64, 128) * reluPrime(a1); | |
// dW1 = X.T * dz1 | |
vector<float> dW1 = dot(transpose( &b_X[0], BATCH_SIZE, 784 ), dz1, 784, BATCH_SIZE, 128); | |
// Updating the parameters | |
W3 = W3 - lr * dW3; | |
W2 = W2 - lr * dW2; | |
W1 = W1 - lr * dW1; | |
if ((i+1) % 100 == 0){ | |
cout << "-----------------------------------------------Epoch " << i+1 << "--------------------------------------------------" <<"\n"; | |
cout << "Predictions:" << "\n"; | |
print ( yhat, 10, 10 ); | |
cout << "Ground truth:" << "\n"; | |
print ( b_y, 10, 10 ); | |
vector<float> loss_m = yhat - b_y; | |
float loss = 0.0; | |
for (unsigned k = 0; k < BATCH_SIZE*10; ++k){ | |
loss += loss_m[k]*loss_m[k]; | |
} | |
cout << " Loss " << loss/BATCH_SIZE <<"\n"; | |
cout << "--------------------------------------------End of Epoch :(------------------------------------------------" <<"\n"; | |
}; | |
}; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment