This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_adam(): | |
w, b, eta, max_epochs = 1, 1, 0.01, 100, | |
m_w, m_b, v_w, v_b, eps, beta1, beta2 = 0, 0, 0, 0, 1e-8, 0.9, 0.99 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in data: | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
m_w = beta1 * m_w + (1-beta1) * dw | |
m_b = beta1 * m_b + (1-beta1) * db |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_rmsprop(): | |
w, b, eta = init_w, init_b, 0.1 | |
v_w, v_b, beta, eps = 0, 0, 0.9, 1e-8 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
v_w = beta * v_w + (1 - beta) * dw**2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_adagrad(): | |
w, b, eta = init_w, init_b, 0.1 | |
v_w, v_b, eps = 0, 0, 1e-8 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
v_w = v_w + dw**2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_line_search_gradient_descent(): | |
w, b, etas, max_epochs = init_w, init_b, [0.1, 0.5, 1.0, 5.0, 10.0], 100 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
min_error = 100000 #some large value | |
best_w, best_b = w, b | |
for eta in etas: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_mini_batch_gradient_descent(): | |
w, b, eta = init_w, init_b, 1.0 | |
mini_batch_size, num_points_seen = 2, 0 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
num_points_seen += 1 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_stochastic_gradient_descent(): | |
w, b, eta, max_epochs = init_w, init_b, 1.0, 100 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
w = w - eta * dw | |
b = b - eta * db |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_gradient_descent(): | |
w, b, eta, max_epochs = init_w, init_b, 1.0, 100 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
############################# | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
############################# | |
w = w - eta * dw |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_nesterov_accelerated_gradient_descent(): | |
w, b, eta = init_w, init_b, 1.0 | |
prev_v_w, prev_v_b, gamma = 0, 0, 0.9 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
# do partial update | |
v_w = gamma * prev_v_w | |
v_b = gamma * prev_v_b | |
for x,y in zip(X,Y): | |
# calculate gradients after partial update |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def do_momentum_gradient_descent(): | |
w, b, eta = init_w, init_b, 1.0 | |
prev_v_w, prev_v_b, gamma = 0, 0, 0.9 | |
for i in range(max_epochs): | |
dw, db = 0, 0 | |
for x,y in zip(X,Y): | |
dw += grad_w(w, b, x, y) | |
db += grad_b(w, b, x, y) | |
v_w = gamma * prev_v_w + eta*dw | |
v_b = gamma * prev_v_b + eta*db |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
X = [0.5, 2.5] | |
Y = [0.2, 0.9] | |
def f(w, b, x): | |
return 1.0/(1.0 + np.exp(-(w*x + b))) | |
def error(w, b): | |
err = 0.0 |
NewerOlder