In [4]:
import numpy as np

In [1]:
def numerical_gradient(f, x):
 h = 1e-4 # 0.0001
 grad = np.zeros_like(x)
 
 for idx in range(x.size):
 tmp_val = x[idx]
 # f(x+h)
 x[idx] = tmp_val + h
 fxh1 = f(x)
 #f(x-h)
 x[idx] = tmp_val - h
 fxh2 = f(x)
 
 grad[idx] = (fxh1 - fxh2) / (2*h)
 x[idx] = tmp_val # restore
 
 return grad

In [2]:
def function_2(x):
 return x[0]**2 + x[1]**2

In [5]:
numerical_gradient(function_2, np.array([3.0, 4.0]))

array([ 6., 8.])

In [6]:
numerical_gradient(function_2, np.array([0.0, 2.0]))

array([ 0., 4.])

In [7]:
numerical_gradient(function_2, np.array([3.0, 0.0]))

array([ 6., 0.])

In [8]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
 x = init_x
 
 for i in range(step_num):
 grad = numerical_gradient(f, x)
 x -= lr * grad
 
 return x

In [12]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)

array([ -6.11110793e-10, 8.14814391e-10])

In [13]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)

array([ -2.58983747e+13, -1.29524862e+12])

In [14]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)

array([-2.99999994, 3.99999992])

In [15]:
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient

In [21]:
class SimpleNet:
 def __init__(self):
 self.W = np.random.randn(2, 3) # Gaussian Distribution
 
 def predict(self, x):
 return np.dot(x, self.W)
 
 def loss(self, x, t):
 z = self.predict(x)
 y = softmax(z)
 loss = cross_entropy_error(y, t)
 
 return loss

In [22]:
net = SimpleNet()

In [23]:
print(net.W)

[[ 1.57097266 -0.05146236 1.43420101]
 [ 0.02408947 0.49283932 2.22659858]]


In [24]:
x = np.array([0.6, 0.9])
p = net.predict(x)
p

array([ 0.96426412, 0.41267797, 2.86445932])

In [25]:
t = np.array([0, 0, 1])
net.loss(x, t)

0.21162096444663459

In [26]:
def f(W):
 return net.loss(x, t)

In [27]:
dW = numerical_gradient(f, net.W)
dW

array([[ 0.07261079, 0.04182638, -0.11443717],
 [ 0.10891618, 0.06273958, -0.17165576]])

In [28]:
from common.functions import *

In [29]:
class TwoLayerNet:
 def __init__(self, input_size, hidden_size, output_size, 
 weight_init_std=0.01):
 # init weights
 self.params = {}
 self.params['W1'] = weight_init_std * \
 np.random.randn(input_size, hidden_size)
 self.params['b1'] = np.zeros(hidden_size)
 self.params['W2'] = weight_init_std * \
 np.random.randn(hidden_size, output_size)
 self.params['b2'] = np.zeros(output_size)
 
 def predict(self, x):
 W1, W2 = self.params['W1'], self.params['W2']
 b1, b2 = self.params['b1'], self.params['b2']
 
 a1 = np.dot(x, W1) + b1
 z1 = sigmoid(a1)
 a2 = np.dot(z1, W2) + b2
 y = softmax(a2)
 
 return y
 
 def loss(self, x, t):
 y = self.predict(x)
 
 return cross_entropy_error(y, t)
 
 def accuracy(self, x, t):
 y = self.predict(x)
 y = np.argmax(y, axis=1)
 t = np.argmax(t, axis=1)
 
 accuracy = np.sum(y == t) / float(x.shape[0])
 return accuracy
 
 def numerical_gradient(self, x, t):
 loss_W = lambda W: self.loss(x, t)
 
 grads = {}
 grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
 grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
 grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
 grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
 
 return grads


In [30]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

In [31]:
net.params['W1'].shape

(784, 100)

In [32]:
net.params['b1'].shape

(100,)

In [33]:
net.params['W2'].shape

(100, 10)

In [34]:
net.params['b2'].shape

(10,)

In [35]:
x = np.random.rand(100, 784)
y = net.predict(x)

In [36]:
y

array([[ 0.0929079 , 0.10545567, 0.1011332 , 0.10213417, 0.100048 ,
 0.09859305, 0.09589364, 0.1050598 , 0.09630168, 0.10247288],
 [ 0.09301324, 0.10553867, 0.10122337, 0.10208667, 0.09934299,
 0.09852167, 0.09627567, 0.10514601, 0.09635366, 0.10249805],
 [ 0.09320152, 0.10543257, 0.10134838, 0.10227634, 0.09925217,
 0.09846688, 0.09630402, 0.10498337, 0.0964722 , 0.10226256],
 [ 0.09266878, 0.10569335, 0.10127206, 0.10266394, 0.09956575,
 0.09868581, 0.09608076, 0.10515584, 0.09590158, 0.10231212],
 [ 0.09305302, 0.10550946, 0.10122928, 0.10210634, 0.09984373,
 0.09886459, 0.09600813, 0.1050553 , 0.09597674, 0.1023534 ],
 [ 0.09331485, 0.10531989, 0.10143237, 0.10204317, 0.09978208,
 0.09857535, 0.09602764, 0.10528953, 0.09594862, 0.10226651],
 [ 0.0928379 , 0.10561995, 0.10143794, 0.1022184 , 0.09939102,
 0.09871511, 0.09615843, 0.10507205, 0.0965413 , 0.10200789],
 [ 0.09281353, 0.10542469, 0.10155212, 0.10231861, 0.09961348,
 0.09841559, 0.09607444, 0.10513555, 0.09629261, 0.102359

In [37]:
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)

In [38]:
grads = net.numerical_gradient(x, t)

In [39]:
grads['W1'].shape

(784, 100)

In [40]:
grads['b1'].shape

(100,)

In [41]:
grads['W2'].shape

(100, 10)

In [42]:
grads['b2'].shape

(10,)

In [43]:
from dataset.mnist import load_mnist

In [44]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

In [45]:
train_loss_list = []

In [48]:
train_acc_list = []
test_acc_list = []

In [46]:
iters_num = 10000
batch_size = 100
learning_rate = 0.1

In [51]:
x_train.shape

(60000, 784)

In [53]:
iter_per_epoch = max(x_train.shape[0] / batch_size, 1)

In [49]:
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

In [55]:
for i in range(iters_num):
 batch_mask = np.random.choice(x_train.shape[0], batch_size)
 x_batch = x_train[batch_mask]
 t_batch = t_train[batch_mask]
 
 grad = network.numerical_gradient(x_batch, t_batch)
 
 for key in ('W1', 'b1', 'W2', 'b2'):
 network.params[key] -= learning_rate * grad[key]
 
 loss = network.loss(x_batch, t_batch)
 train_loss_list.append(loss)
 
 if i % iter_per_epoch == 0:
 train_acc = network.accuracy(x_train, t_train)
 test_acc = network.accuracy(x_test, t_test)
 train_acc_list.append(train_acc)
 test_acc_list.append(test_acc)
 print("train acc, test acc | {}, {}".format(train_acc, test_acc))

AxisError: axis 1 is out of bounds for array of dimension 1