# 誤差逆伝播法
## 単純なレイヤの実装
### 乗算レイヤの実装

In [1]:
import numpy as np
class MulLayer:
 def __init__(self):
 self.x = None
 self.y = None

 def forward(self, x, y):
 self.x = x
 self.y = y 
 out = x * y
 return out

 def backward(self, dout):
 dx = dout * self.y
 dy = dout * self.x
 return dx, dy

In [1]:
# coding: utf-8
import numpy as np
from mymodule.layer_naive import *

apple = 100
apple_num = 2
tax = 1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print("price:", int(price))
print("dApple:", dapple)
print("dApple_num:", int(dapple_num))
print("dTax:", dtax)

price: 220
dApple: 2.2
dApple_num: 110
dTax: 200


### 加算レイヤの実装

In [2]:
import numpy as np
class AddLayer:
 def __init__(self):
 pass

 def forward(self, x, y):
 out = x + y
 return out

 def backward(self, dout):
 dx = dout * 1
 dy = dout * 1
 return dx, dy

In [2]:
# coding: utf-8
import numpy as np
from mymodule.layer_naive import *

apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num) # (1)
orange_price = mul_orange_layer.forward(orange, orange_num) # (2)
all_price = add_apple_orange_layer.forward(apple_price, orange_price) # (3)
price = mul_tax_layer.forward(all_price, tax) # (4)

# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice) # (4)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) # (3)
dorange, dorange_num = mul_orange_layer.backward(dorange_price) # (2)
dapple, dapple_num = mul_apple_layer.backward(dapple_price) # (1)

print("price:", int(price))
print("dApple:", dapple)
print("dApple_num:", int(dapple_num))
print("dOrange:", dorange)
print("dOrange_num:", int(dorange_num))
print("dTax:", dtax)

price: 715
dApple: 2.2
dApple_num: 110
dOrange: 3.3000000000000003
dOrange_num: 165
dTax: 650


## 活性化関数レイヤの実装
### ReLUレイヤ

In [5]:
import numpy as np
x = np.array([[1.0, -0.5], [-2.0, 3.0]])
mask = (x <= 0)
print(mask)

[[False True]
 [ True False]]


In [3]:
import numpy as np
class Relu:
 def __init__(self):
 self.mask = None

 def forward(self, x):
 self.mask = (x <= 0)
 out = x.copy()
 out[self.mask] = 0
 return out

 def backward(self, dout):
 dout[self.mask] = 0
 dx = dout
 return dx

### Sigmoidレイヤ

In [6]:
import numpy as np
class Sigmoid:
 def __init__(self):
 self.out = None

 def forward(self, x):
 out = sigmoid(x)
 self.out = out
 return out

 def backward(self, dout):
 dx = dout * (1.0 - self.out) * self.out
 return dx

## Affine / Softmax レイヤの実装
### Affineレイヤ

In [7]:
import numpy as np
class Affine:
 def __init__(self, W, b):
 self.W =W
 self.b = b
 self.x = None
 self.original_x_shape = None
 # 重み・バイアスパラメータの微分
 self.dW = None
 self.db = None

 def forward(self, x):
 # テンソル対応
 self.original_x_shape = x.shape
 x = x.reshape(x.shape[0], -1)
 self.x = x
 out = np.dot(self.x, self.W) + self.b
 return out

 def backward(self, dout):
 dx = np.dot(dout, self.W.T)
 self.dW = np.dot(self.x.T, dout)
 self.db = np.sum(dout, axis=0)
 dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
 return dx

### Softmax-With-Lossレイヤ

In [8]:
class SoftmaxWithLoss:
 def __init__(self):
 self.loss = None
 self.y = None # softmaxの出力
 self.t = None # 教師データ

 def forward(self, x, t):
 self.t = t
 self.y = softmax(x)
 self.loss = cross_entropy_error(self.y, self.t)
 return self.loss

 def backward(self, dout=1):
 batch_size = self.t.shape[0]
 if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
 dx = (self.y - self.t) / batch_size
 else:
 dx = self.y.copy()
 dx[np.arange(batch_size), self.t] -= 1
 dx = dx / batch_size
 return dx

## 誤差逆伝播法の実装

In [9]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
from mymodule.layers import *
from mymodule.gradient import numerical_gradient
from collections import OrderedDict


class TwoLayerNet:

 def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
 # 重みの初期化
 self.params = {}
 self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
 self.params['b1'] = np.zeros(hidden_size)
 self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
 self.params['b2'] = np.zeros(output_size)

 # レイヤの生成
 self.layers = OrderedDict()
 self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
 self.layers['Relu1'] = Relu()
 self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

 self.lastLayer = SoftmaxWithLoss()
 
 def predict(self, x):
 for layer in self.layers.values():
 x = layer.forward(x)
 
 return x
 
 # x:入力データ, t:教師データ
 def loss(self, x, t):
 y = self.predict(x)
 return self.lastLayer.forward(y, t)
 
 def accuracy(self, x, t):
 y = self.predict(x)
 y = np.argmax(y, axis=1)
 if t.ndim != 1 : t = np.argmax(t, axis=1)
 
 accuracy = np.sum(y == t) / float(x.shape[0])
 return accuracy
 
 # x:入力データ, t:教師データ
 def numerical_gradient(self, x, t):
 loss_W = lambda W: self.loss(x, t)
 
 grads = {}
 grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
 grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
 grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
 grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
 
 return grads
 
 def gradient(self, x, t):
 # forward
 self.loss(x, t)

 # backward
 dout = 1
 dout = self.lastLayer.backward(dout)
 
 layers = list(self.layers.values())
 layers.reverse()
 for layer in layers:
 dout = layer.backward(dout)

 # 設定
 grads = {}
 grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
 grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

 return grads

### 誤差逆伝播法の勾配確認

In [5]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
from mymodule.mnist import load_mnist
from mymodule.two_layer_net import TwoLayerNet

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
 diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
 print(key + ":" + str(diff))

W1:2.65339253248e-13
b1:7.42792047488e-13
W2:8.93400580132e-13
b2:1.20348178645e-10


### 誤差逆伝播法を使った学習

In [7]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)

import numpy as np
from dataset.mnist import load_mnist
from mymodule.two_layer_net import TwoLayerNet

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
 batch_mask = np.random.choice(train_size, batch_size)
 x_batch = x_train[batch_mask]
 t_batch = t_train[batch_mask]
 
 # 勾配
 #grad = network.numerical_gradient(x_batch, t_batch)
 grad = network.gradient(x_batch, t_batch)
 
 # 更新
 for key in ('W1', 'b1', 'W2', 'b2'):
 network.params[key] -= learning_rate * grad[key]
 
 loss = network.loss(x_batch, t_batch)
 train_loss_list.append(loss)
 
 if i % iter_per_epoch == 0:
 train_acc = network.accuracy(x_train, t_train)
 test_acc = network.accuracy(x_test, t_test)
 train_acc_list.append(train_acc)
 test_acc_list.append(test_acc)
 print(train_acc, test_acc)

0.06215 0.0625
0.902966666667 0.9061
0.921516666667 0.9251
0.934316666667 0.9365
0.94705 0.9473
0.952183333333 0.9506
0.956283333333 0.9543
0.961233333333 0.9567
0.964783333333 0.9606
0.966466666667 0.9612
0.970933333333 0.964
0.9713 0.9669
0.97195 0.965
0.974666666667 0.9663
0.976066666667 0.9687
0.9772 0.9691
0.97815 0.9685
