{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class SGD:\n", " def __init__(self, lr=0.01):\n", " self.lr = lr\n", " \n", " def update(self, params, grads):\n", " for key in params.keys():\n", " params[key] -= self.lr * grads[key]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class Momentum:\n", " def __init__(self, lr=0.01, momentum=0.9):\n", " self.lr = lr\n", " self.momentum = momentum\n", " self.v = None\n", " \n", " def update(self, params, grads):\n", " if self.v is None:\n", " self.v = {}\n", " for key, val in params.items():\n", " self.v[key] = np.zeros_like(val)\n", " \n", " for key in params.keys():\n", " self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]\n", " params[key] += self.v[key]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class AdaGrad:\n", " def __init__(self, lr=0.01):\n", " self.lr = lr\n", " self.h = None\n", " \n", " def update(self, params, grads):\n", " if self.h is None:\n", " self.h = {}\n", " for key, val in params.items():\n", " self.h[key] = np.zeros_like(val)\n", " \n", " for key in params.keys():\n", " self.h[key] += grads[key] * grads[key]\n", " params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from dataset import mnist" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "(x_train, t_train), (x_test, t_test) = mnist.load_mnist()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(60000, 784)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(60000,)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_train.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10000, 784)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_test.shape" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10000,)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_test.shape" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "validation_rate = 0.20\n", "validation_num = int(x_train.shape[0] * validation_rate)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from common.util import shuffle_dataset" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x_train, t_train = shuffle_dataset(x_train, t_train)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x_val = x_train[:validation_num]\n", "t_val = t_train[:validation_num]\n", "x_train = x_train[validation_num:]\n", "t_train = t_train[validation_num:]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(12000, 784)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_val.shape" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(12000,)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_val.shape" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(48000, 784)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train.shape" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(48000,)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_train.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }