Skip to content

Instantly share code, notes, and snippets.

View acl21's full-sized avatar

Akshay L Chandra acl21

View GitHub Profile
@acl21
acl21 / adam.py
Created September 27, 2019 02:38
Adam
def do_adam():
w, b, eta, max_epochs = 1, 1, 0.01, 100,
m_w, m_b, v_w, v_b, eps, beta1, beta2 = 0, 0, 0, 0, 1e-8, 0.9, 0.99
for i in range(max_epochs):
dw, db = 0, 0
for x,y in data:
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
m_w = beta1 * m_w + (1-beta1) * dw
m_b = beta1 * m_b + (1-beta1) * db
@acl21
acl21 / rmsprop.py
Last active September 27, 2019 02:44
RMSProp
def do_rmsprop():
w, b, eta = init_w, init_b, 0.1
v_w, v_b, beta, eps = 0, 0, 0.9, 1e-8
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
v_w = beta * v_w + (1 - beta) * dw**2
@acl21
acl21 / adagrad.py
Created September 27, 2019 01:34
AdaGrad
def do_adagrad():
w, b, eta = init_w, init_b, 0.1
v_w, v_b, eps = 0, 0, 1e-8
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
v_w = v_w + dw**2
@acl21
acl21 / line_search.py
Created September 26, 2019 19:29
Line Search Gradient Descent
def do_line_search_gradient_descent():
w, b, etas, max_epochs = init_w, init_b, [0.1, 0.5, 1.0, 5.0, 10.0], 100
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
min_error = 100000 #some large value
best_w, best_b = w, b
for eta in etas:
@acl21
acl21 / mini_batch.py
Created May 15, 2019 19:18
Mini-Batch Gradient Descent
def do_mini_batch_gradient_descent():
w, b, eta = init_w, init_b, 1.0
mini_batch_size, num_points_seen = 2, 0
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
num_points_seen += 1
@acl21
acl21 / stochastic_gradient.py
Last active May 15, 2019 18:24
Stochastic Gradient Descent
def do_stochastic_gradient_descent():
w, b, eta, max_epochs = init_w, init_b, 1.0, 100
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
w = w - eta * dw
b = b - eta * db
@acl21
acl21 / gradient_descent.py
Last active May 15, 2019 18:03
Gradient Descent
def do_gradient_descent():
w, b, eta, max_epochs = init_w, init_b, 1.0, 100
for i in range(max_epochs):
dw, db = 0, 0
#############################
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
#############################
w = w - eta * dw
@acl21
acl21 / nesterov-accelerated.py
Last active October 14, 2020 09:57
Nesterov Accelerated Gradient Descent
def do_nesterov_accelerated_gradient_descent():
w, b, eta = init_w, init_b, 1.0
prev_v_w, prev_v_b, gamma = 0, 0, 0.9
for i in range(max_epochs):
dw, db = 0, 0
# do partial update
v_w = gamma * prev_v_w
v_b = gamma * prev_v_b
for x,y in zip(X,Y):
# calculate gradients after partial update
@acl21
acl21 / momentum-gd.py
Last active May 15, 2019 19:04
Momentum-Based Gradient Descent
def do_momentum_gradient_descent():
w, b, eta = init_w, init_b, 1.0
prev_v_w, prev_v_b, gamma = 0, 0, 0.9
for i in range(max_epochs):
dw, db = 0, 0
for x,y in zip(X,Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
v_w = gamma * prev_v_w + eta*dw
v_b = gamma * prev_v_b + eta*db
@acl21
acl21 / gradient_descent.py
Last active June 25, 2021 21:45
Gradient Descent
import numpy as np
X = [0.5, 2.5]
Y = [0.2, 0.9]
def f(w, b, x):
return 1.0/(1.0 + np.exp(-(w*x + b)))
def error(w, b):
err = 0.0