import numpy as np
from dataset import *
from copy import deepcopy
import matplotlib.pyplot as plt
from tqdm import tqdm
%load_ext autoreload
%autoreload 2
def logistic(z):
return 1./( 1. + np.exp(-z) )
def logistic_grad(z):
y = logistic(z)
return y*(1. - y)
def categorical_ce(y, t):
return -np.sum(t * np.log(y))
def categorical_cs_grad(y, t):
return -t / y
def softmax(z):
expz = np.exp(z)
sumz = np.sum(expz)
return expz / sumz
def softmax_grad(z, s):
# s (grad from above) should have only one non-zero element
idx = np.nonzero(s)[0][0] # Find index of non-zero element in s
y = softmax(z)
kronecker = np.zeros_like(s)
kronecker[idx] = 1.
return s[idx]*y[idx]*(kronecker-y)
ds = Annuli(n=1000, noise=1.)
ds.plot()
# or...
#ds = UClasses(n=1000)
#ds.plot()
X = ds.inputs()
X[:5]
array([[ 0.50914045, 0.13859321], [ 0.0576307 , -0.00659747], [ 0.74830528, 0.26346251], [-0.24609827, -0.87633696], [-0.53753603, -0.09685067]])
T = ds.targets()
T[:5]
array([[0., 0., 1.], [1., 0., 0.], [0., 0., 1.], [0., 0., 1.], [0., 1., 0.]])
# How many neurons do we want in the hidden layers?
hidden_dim = 10
# Set up the connection weights and biases
# input -> hidden
W1 = np.random.normal(size=(hidden_dim, ds.input_dim))
b1 = np.zeros((hidden_dim))
# hidden -> output
W2 = np.random.normal(size=(ds.n_classes, hidden_dim))
b2 = np.zeros((ds.n_classes,))
x = X[0] # input
t = T[0] # target
alpha = W1@x + b1 # input -> alpha
h = logistic(alpha) # alpha -> h
beta = W2@h + b2 # h -> beta
y = softmax(beta) # beta -> y
# Compute the loss between y (output) and t (target)
loss = categorical_ce(y, t)
# Run the inputs through the network
y = []
for x,t in zip(X,T):
alpha = W1@x + b1
h = logistic(alpha)
beta = W2@h + b2
y.append(softmax(beta))
# Plot the dataset using the network's output as labels
# Use the outputs as labels.
ds.plot(labels=y);
# Feedforward pass first
x = X[0] # input
t = T[0] # target
alpha = W1@x + b1 # input -> alpha
h = logistic(alpha) # alpha -> h
beta = W2@h + b2 # h -> beta
y = softmax(beta) # beta -> y
# Compute the loss between y (output) and t (target)
loss = categorical_ce(y, t)
# Gradient of loss w.r.t. y
dloss_dy = categorical_cs_grad(y, t)
# Gradient of loss w.r.t. beta
dloss_dbeta = softmax_grad(beta, dloss_dy)
# Gradient of loss w.r.t. W2
dloss_dW2 = np.outer(dloss_dbeta, h)
# Gradient of loss w.r.t. b2
dloss_db2 = dloss_dbeta
# Gradient of loss w.r.t. h
dloss_dh = W2.T @ dloss_dbeta
# Gradient of loss w.r.t. alpha
dloss_dalpha = dloss_dh * logistic_grad(alpha)
# Gradient of loss w.r.t. W1
dloss_dW1 = np.outer(dloss_dalpha, x)
# Gradient of loss w.r.t. b1
dloss_db1 = dloss_dalpha
print(beta)
print(dloss_dbeta)
[ 0.54110102 -0.78924242 -1.30526665] [ 0.70313807 0.18590017 -0.88903824]
print(f'{W1} \n')
print(dloss_dW1)
[[-0.55517567 -0.60343372] [ 0.93826216 -0.46268204] [ 0.44757177 -0.34370798] [-0.16619815 -2.00738823] [-0.53361728 3.57107671] [ 0.02343947 1.14840791] [-1.27741013 0.86057669] [-0.24788056 0.20720883] [-1.82013999 -0.22142245] [ 1.06336751 0.5737013 ]] [[-0.0190782 -0.00519328] [-0.14372557 -0.03912356] [ 0.30395695 0.08274017] [ 0.1075365 0.02927253] [ 0.0052815 0.00143768] [-0.02893441 -0.00787624] [ 0.02614044 0.00711569] [-0.07583874 -0.02064408] [ 0.09778822 0.02661895] [ 0.09511591 0.02589152]]
n_epochs = 500
kappa = 1.
losses = []
for epoch in tqdm(range(n_epochs)):
dloss_dW1 = np.zeros_like(W1)
dloss_db1 = np.zeros_like(b1)
dloss_dW2 = np.zeros_like(W2)
dloss_db2 = np.zeros_like(b2)
loss = 0.
for x,t in zip(X,T):
# Forward pass
alpha = W1@x + b1
h = logistic(alpha)
beta = W2@h + b2
y = softmax(beta)
# Compute the loss
loss += categorical_ce(y, t)
# Backpropagate the gradient of the loss
# Gradient from loss -> beta
dloss_dy = categorical_cs_grad(y, t)
dloss_dbeta = softmax_grad(beta, dloss_dy)
# Gradient of loss w.r.t. W2 and b2
dloss_dW2 += np.outer(dloss_dbeta, h)
dloss_db2 += dloss_dbeta
# Gradient: beta -> x
dloss_dh = W2.T @ dloss_dbeta
dloss_dalpha = dloss_dh * logistic_grad(alpha)
# Gradient of loss w.r.t. W1 and b1
dloss_dW1 += np.outer(dloss_dalpha, x)
dloss_db1 += dloss_dalpha
# Increment the weights and biases
W1 -= kappa * dloss_dW1 / len(X)
b1 -= kappa * dloss_db1 / len(X)
W2 -= kappa * dloss_dW2 / len(X)
b2 -= kappa * dloss_db2 / len(X)
losses.append(loss / len(X))
plt.plot(losses); plt.xlabel('Epoch'); plt.ylabel('Loss');
100%|██████████| 500/500 [00:21<00:00, 23.79it/s]
# Run the inputs through the network
Y = []
for x,t in zip(X,T):
alpha = W1@x + b1
h = logistic(alpha)
beta = W2@h + b2
Y.append(softmax(beta))
print(f'Target: {T[0]}')
print(f'Output: {Y[0]}')
# Plot the dataset using the network's output as labels
ds.plot(labels=Y);
Target: [0. 0. 1.] Output: [0.22907674 0.5291186 0.24180465]
# Count correct
correct = 0
for t,y in zip(T,Y):
k = np.argmax(t)
j = np.argmax(y)
if k==j:
correct += 1
# Print accuracy as percent
print(f'Accuracy = {correct/len(ds)*100}%')
Accuracy = 94.39999999999999%