In [1]:
import numpy as np

In [2]:
class SGD:
 def __init__(self, lr=0.01):
 self.lr = lr
 
 def update(self, params, grads):
 for key in params.keys():
 params[key] -= self.lr * grads[key]

In [3]:
class Momentum:
 def __init__(self, lr=0.01, momentum=0.9):
 self.lr = lr
 self.momentum = momentum
 self.v = None
 
 def update(self, params, grads):
 if self.v is None:
 self.v = {}
 for key, val in params.items():
 self.v[key] = np.zeros_like(val)
 
 for key in params.keys():
 self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
 params[key] += self.v[key]

In [4]:
class AdaGrad:
 def __init__(self, lr=0.01):
 self.lr = lr
 self.h = None
 
 def update(self, params, grads):
 if self.h is None:
 self.h = {}
 for key, val in params.items():
 self.h[key] = np.zeros_like(val)
 
 for key in params.keys():
 self.h[key] += grads[key] * grads[key]
 params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

In [5]:
from dataset import mnist

In [15]:
(x_train, t_train), (x_test, t_test) = mnist.load_mnist()

In [16]:
x_train.shape

(60000, 784)

In [17]:
t_train.shape

(60000,)

In [18]:
x_test.shape

(10000, 784)

In [19]:
t_test.shape

(10000,)

In [20]:
validation_rate = 0.20
validation_num = int(x_train.shape[0] * validation_rate)

In [21]:
from common.util import shuffle_dataset

In [22]:
x_train, t_train = shuffle_dataset(x_train, t_train)

In [23]:
x_val = x_train[:validation_num]
t_val = t_train[:validation_num]
x_train = x_train[validation_num:]
t_train = t_train[validation_num:]

In [24]:
x_val.shape

(12000, 784)

In [25]:
t_val.shape

(12000,)

In [26]:
x_train.shape

(48000, 784)

In [27]:
t_train.shape

(48000,)