{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "abstract type AbstractLayer end" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct MulLayer{T<:AbstractFloat} <: AbstractLayer\n", " x::T\n", " y::T\n", " (::Type{MulLayer{T}})() where {T<:AbstractFloat} = new{T}()\n", "end" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "forward (generic function with 2 methods)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function forward(self::MulLayer{T}, x::T, y::T) where {T<:AbstractFloat}\n", " self.x = x\n", " self.y = y\n", " out = x * y\n", " return out\n", "end\n", "@inline forward(lyr::MulLayer{T}, x, y) where T = forward(lyr, T(x), T(y))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "backward (generic function with 2 methods)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function backward(self::MulLayer{T}, dout::T) where {T<:AbstractFloat}\n", " dx = dout * self.y\n", " dy = dout * self.x\n", " return dx, dy\n", "end\n", "@inline backward(lyr::MulLayer{T}, dout) where T = backward(lyr, T(dout))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apple = 100\n", "apple_num = 2\n", "tax = 1.1" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MulLayer{Float32}(7.0f-45, 0.0f0)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mul_apple_layer = MulLayer{Float32}()\n", "mul_tax_layer = MulLayer{Float32}()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "220.0f0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apple_price = forward(mul_apple_layer, apple, apple_num)\n", "price = forward(mul_tax_layer, apple_price, tax)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2.2f0, 110.0f0, 200.0f0)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dprice = 1\n", "dapple_price, dtax = backward(mul_tax_layer, dprice)\n", "dapple, dapple_num = backward(mul_apple_layer, dapple_price)\n", "(dapple, dapple_num, dtax)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "backward (generic function with 4 methods)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mutable struct AddLayer{T<:AbstractFloat} <: AbstractLayer end\n", "\n", "function forward(self::AddLayer{T}, x::T, y::T) where {T<:AbstractFloat}\n", " out = x + y\n", " return out\n", "end\n", "@inline forward(lyr::AddLayer{T}, x, y) where T = forward(lyr, T(x), T(y))\n", " \n", "function backward(self::AddLayer{T}, dout::T) where {T<:AbstractFloat}\n", " dx = dout * 1\n", " dy = dout * 1\n", " return dx, dy\n", "end\n", "@inline backward(lyr::AddLayer{T}, dout) where T = backward(lyr, T(dout))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.1" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apple = 100\n", "apple_num = 2\n", "orange = 150\n", "orange_num = 3\n", "tax = 1.1" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MulLayer{Float32}(1.160496f20, 4.5761f-41)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mul_apple_layer = MulLayer{Float32}()\n", "mul_orange_layer = MulLayer{Float32}()\n", "add_apple_orange_layer = AddLayer{Float32}()\n", "mul_tax_layer = MulLayer{Float32}()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "715.0f0" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apple_price = forward(mul_apple_layer, apple, apple_num)\n", "orange_price = forward(mul_orange_layer, orange, orange_num)\n", "all_price = forward(add_apple_orange_layer, apple_price, orange_price)\n", "price = forward(mul_tax_layer, all_price, tax)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2.2f0, 110.0f0, 3.3000002f0, 165.0f0, 650.0f0)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dprice = 1\n", "dall_price, dtax = backward(mul_tax_layer, dprice)\n", "dapple_price, dorange_price = backward(add_apple_orange_layer, dall_price)\n", "dorange, dorange_num = backward(mul_orange_layer, dorange_price)\n", "dapple, dapple_num = backward(mul_apple_layer, dapple_price)\n", "(dapple, dapple_num, dorange, dorange_num, dtax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Relu" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct ReluLayer{T<:AbstractFloat} <: AbstractLayer\n", " mask::AbstractArray{Bool}\n", " (::Type{ReluLayer{T}})() where {T} = new{T}()\n", "end" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "forward (generic function with 5 methods)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function forward(self::ReluLayer{T}, x::AbstractArray{T}) where {T<:AbstractFloat}\n", " mask = self.mask = (x .<= 0)\n", " out = copy(x)\n", " out[mask] .= zero(T)\n", " out\n", "end" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "backward (generic function with 5 methods)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function backward(self::ReluLayer{T}, dout::AbstractArray{T}) where {T<:AbstractFloat}\n", " dout[self.mask] .= zero(T)\n", " dout\n", "end" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2×2 Array{Float32,2}:\n", " 1.0 0.0\n", " 0.0 3.0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "relulyr = ReluLayer{Float32}()\n", "forward(relulyr, Float32[1.0 -0.5; -2.0 3.0])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2×2 Array{Bool,2}:\n", " false true\n", " true false" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "relulyr.mask" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2×2 Array{Float32,2}:\n", " 1.0 0.0\n", " 0.0 1.0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "backward(relulyr, Float32[1.0 1.0; 1.0 1.0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sigmoid" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct SigmoidLayer{T<:AbstractFloat} <: AbstractLayer\n", " out::AbstractArray{T}\n", " (::Type{SigmoidLayer{T}})() where {T} = new{T}()\n", "end" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "forward (generic function with 6 methods)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function forward(self::SigmoidLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}\n", " self.out = one(T) ./ (one(T) .+ exp.(.-x))\n", "end" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "backward (generic function with 6 methods)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function backward(self::SigmoidLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}\n", " dout .* (one(T) .- self.out) .* self.out\n", "end" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11-element Array{Float32,1}:\n", " 0.00669285\n", " 0.0179862 \n", " 0.0474259 \n", " 0.119203 \n", " 0.268941 \n", " 0.5 \n", " 0.731059 \n", " 0.880797 \n", " 0.952574 \n", " 0.982014 \n", " 0.993307 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sigmoidlyr = SigmoidLayer{Float32}()\n", "forward(sigmoidlyr, Float32[-5:5;])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11-element Array{Float32,1}:\n", " 0.00664806\n", " 0.0176627 \n", " 0.0451767 \n", " 0.104994 \n", " 0.196612 \n", " 0.25 \n", " 0.196612 \n", " 0.104994 \n", " 0.0451767 \n", " 0.0176627 \n", " 0.00664803" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "backward(sigmoidlyr, ones(Float32,11))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.6.2 バッチ版 Affine レイヤ" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct AffineLayer{T<:AbstractFloat} <: AbstractLayer\n", " W::AbstractMatrix{T}\n", " b::AbstractVector{T}\n", " x::AbstractArray{T}\n", " dW::AbstractMatrix{T}\n", " db::AbstractVector{T}\n", " function (::Type{AffineLayer})(W::AbstractMatrix{T}, b::AbstractVector{T}) where {T}\n", " lyr = new{T}()\n", " lyr.W = W\n", " lyr.b = b\n", " lyr\n", " end\n", "end" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "forward (generic function with 7 methods)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function forward(self::AffineLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}\n", " self.x = x\n", " self.W * x .+ self.b\n", "end" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "_sumvec (generic function with 3 methods)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function backward(self::AffineLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}\n", " dx = self.W' * dout\n", " self.dW = dout * self.x'\n", " self.db = _sumvec(dout)\n", " dx\n", "end\n", "@inline _sumvec{T}(dout::AbstractVector{T}) = dout\n", "@inline _sumvec{T}(dout::AbstractMatrix{T}) = vec(mapslices(sum, dout, 2))\n", "@inline _sumvec{T,N}(dout::AbstractArray{T,N}) = vec(mapslices(sum, dout, 2:N))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.6.3 Softmax-with-Loss レイヤ" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "softmax (generic function with 2 methods)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function softmax(a::AbstractVector{T}) where {T<:AbstractFloat}\n", " c = maximum(a) # オーバーフロー対策\n", " exp_a = exp.(a .- c)\n", " exp_a ./ sum(exp_a)\n", "end\n", "\n", "function softmax(a::AbstractMatrix{T}) where {T<:AbstractFloat}\n", " mapslices(softmax, a, 1)\n", "end" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "crossentropyerror (generic function with 3 methods)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function crossentropyerror(y::Vector, t::Vector)\n", " δ = 1e-7 # アンダーフロー対策\n", " # -sum(t .* log.(y .+ δ))\n", " -(t ⋅ log.(y .+ δ))\n", "end\n", "function crossentropyerror(y::Matrix, t::Matrix)\n", " batch_size = size(y, 2)\n", " δ = 1e-7 # アンダーフロー対策\n", " # -sum(t .* log(y .+ δ)) / batch_size\n", " -vecdot(t, log.(y .+ δ)) / batch_size\n", "end\n", "function crossentropyerror(y::Matrix, t::Vector)\n", " batch_size = size(y, 2)\n", " δ = 1e-7 # アンダーフロー対策\n", " -sum([log.(y[t[i]+1, i]) for i=1:batch_size] .+ δ) / batch_size\n", "end" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct SoftmaxWithLossLayer{T<:AbstractFloat} <: AbstractLayer\n", " loss::T\n", " y::AbstractArray{T}\n", " t::AbstractArray{T}\n", " (::Type{SoftmaxWithLossLayer{T}})() where {T} = new{T}()\n", "end" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "forward (generic function with 8 methods)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function forward(self::SoftmaxWithLossLayer{T}, x::AbstractArray{T}, t::AbstractArray{T}) where {T<:AbstractFloat}\n", " self.t = t\n", " y = self.y = softmax(x)\n", " self.loss = crossentropyerror(y, t)\n", "end" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "_swlvec (generic function with 2 methods)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function backward(lyr::SoftmaxWithLossLayer{T}, dout::T=one(T)) where {T<:AbstractFloat}\n", " dout .* _swlvec(lyr.y, lyr.t)\n", "end\n", "@inline _swlvec(y::AbstractArray{T}, t::AbstractVector{T}) where {T<:AbstractFloat} = y .- t\n", "@inline _swlvec(y::AbstractArray{T}, t::AbstractMatrix{T}) where {T<:AbstractFloat} = (y .- t) / size(t)[2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.7.2 誤差逆伝播法に対応したニューラルネットワークの実装" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mutable struct TwoLayerNet{T<:AbstractFloat}\n", " a1lyr::AffineLayer{T}\n", " relu1lyr::ReluLayer\n", " a2lyr::AffineLayer{T}\n", " softmaxlyr::SoftmaxWithLossLayer{T}\n", "end\n", "\n", "function (::Type{TwoLayerNet{T}})(input_size::Int, hidden_size::Int, output_size::Int,\n", " weight_init_std::T=T(0.01)) where {T<:AbstractFloat}\n", " W1 = weight_init_std .* randn(T, hidden_size, input_size)\n", " b1 = zeros(T, hidden_size)\n", " W2 = weight_init_std .* randn(T, output_size, hidden_size)\n", " b2 = zeros(T, output_size)\n", " a1lyr = AffineLayer(W1, b1)\n", " relu1lyr = ReluLayer{T}()\n", " a2lyr = AffineLayer(W2, b2)\n", " softmaxlyr = SoftmaxWithLossLayer{T}()\n", " TwoLayerNet(a1lyr, relu1lyr, a2lyr, softmaxlyr)\n", "end" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "predict (generic function with 1 method)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function predict(net::TwoLayerNet{T}, x::AbstractArray{T}) where {T<:AbstractFloat}\n", " a1 = forward(net.a1lyr, x)\n", " z1 = forward(net.relu1lyr, a1)\n", " a2 = forward(net.a2lyr, z1)\n", " # softmax(a2)\n", " a2\n", "end" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "loss (generic function with 1 method)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function loss(net::TwoLayerNet{T}, x::AbstractArray{T}, t::AbstractArray{T}) where {T<:AbstractFloat}\n", " y = predict(net, x)\n", " forward(net.softmaxlyr, y, t)\n", "end" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "accuracy (generic function with 1 method)" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function accuracy(net::TwoLayerNet{T}, x::AbstractArray{T}, t::AbstractArray{T}) where {T<:AbstractFloat}\n", " y = vec(mapslices(indmax, predict(net, x), 1))\n", " if ndims(t) > 1 t = vec(mapslices(indmax, t, 1)) end\n", " mean(y .== t)\n", "end" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "struct TwoLayerNetGrads{T}\n", " W1::AbstractMatrix{T}\n", " b1::AbstractVector{T}\n", " W2::AbstractMatrix{T}\n", " b2::AbstractVector{T}\n", "end" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "function Base.gradient(net::TwoLayerNet{T}, x::AbstractArray{T}, t::AbstractArray{T}) where {T<:AbstractFloat}\n", " # forward\n", " loss(net, x, t)\n", " # backward\n", " dout = one(T)\n", " dz2 = backward(net.softmaxlyr, dout)\n", " da2 = backward(net.a2lyr, dz2)\n", " dz1 = backward(net.relu1lyr, da2)\n", " da1 = backward(net.a1lyr, dz1)\n", " TwoLayerNetGrads(net.a1lyr.dW, net.a1lyr.db, net.a2lyr.dW, net.a2lyr.db)\n", "end" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "applygradient! (generic function with 1 method)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function applygradient!(net::TwoLayerNet{T}, grads::TwoLayerNetGrads{T}, learning_rate::T) where {T<:AbstractFloat}\n", " net.a1lyr.W -= learning_rate .* grads.W1\n", " net.a1lyr.b -= learning_rate .* grads.b1\n", " net.a2lyr.W -= learning_rate .* grads.W2\n", " net.a2lyr.b -= learning_rate .* grads.b2\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.7.3 誤差逆伝播法の勾配確認" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LoadMnist" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "include(\"dataset/load_mnist.jl\")" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x00 0x01 … 0x00 0x00; 0x00 0x00 … 0x00 0x00; … ; 0x00 0x00 … 0x00 0x01; 0x00 0x00 … 0x00 0x00]), (Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], UInt8[0x00 0x00 … 0x00 0x00; 0x00 0x00 … 0x00 0x00; … ; 0x00 0x00 … 0x00 0x00; 0x00 0x00 … 0x00 0x00]))" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(x_train, t_train), (x_test, t_test) = LoadMnist.load_mnist(;\n", " normalize=true, flatten=true, one_hot_label=true)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10, 60000)" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "size(t_train)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TwoLayerNet{Float32}(AffineLayer{Float32}(Float32[0.00318988 0.00485958 … 0.00432392 -0.00266472; 0.00109636 0.00661024 … 0.0103529 0.00247615; … ; -0.000739207 0.0207509 … -6.73199f-5 0.0100186; 0.011015 0.0093262 … 0.00189109 -0.0164121], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], #undef, #undef, #undef), ReluLayer{Float32}(#undef), AffineLayer{Float32}(Float32[0.00970453 -0.00344387 … -0.00698218 0.00117947; 0.00413081 0.000712915 … -0.00246677 -0.00750007; … ; 0.00190007 -0.00820374 … 0.00863559 -0.0117912; -0.0139849 0.00173204 … -0.000736556 0.00177398], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], #undef, #undef, #undef), SoftmaxWithLossLayer{Float32}(7.6892265f20, #undef, #undef))" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "network = TwoLayerNet{Float32}(784, 50, 10)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "784×3 Array{Float32,2}:\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " ⋮ \n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_batch = x_train[:, 1:3]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10×3 Array{Float32,2}:\n", " 0.0 1.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 1.0\n", " 1.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0\n", " 0.0 0.0 0.0" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_batch = Matrix{Float32}(t_train[:, 1:3])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10×3 Array{Float32,2}:\n", " -0.0011316 -0.0014604 -0.000707305\n", " -0.00289681 0.0045625 0.00177277 \n", " -0.000804678 -0.00404845 0.00219935 \n", " 0.00101501 0.00697281 0.000847883\n", " -0.00777562 -0.00674609 -0.00108261 \n", " -0.00570799 -0.0101445 -0.000680426\n", " -0.00433399 -0.00503931 -0.000889094\n", " 0.00661214 0.00467918 0.00023113 \n", " -0.00730404 -0.00778527 0.000421932\n", " 0.00122218 -0.00650803 -0.00211031 " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict(network, x_batch)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.303789029815617" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loss(network, x_batch, t_batch)\n", "# loss(network, x_batch, Matrix{Float32}(t_batch))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TwoLayerNetGrads{Float32}(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Float32[-0.00222329, 0.00170545, -0.00132295, 0.0, -0.00491989, 0.0, 0.00502056, 0.0, 0.00248478, 0.00177194 … -0.00875813, 0.00109634, 0.00871906, -0.00546616, 0.000614705, -0.000547221, 0.00571311, 0.00147637, -0.00292858, 0.0], Float32[0.00182903 -0.0109375 … -0.0491178 0.0; 0.00183358 0.00400405 … 0.0101247 0.0; … ; 0.0018311 0.0039746 … 0.0100359 0.0; 0.00182647 0.00399781 … 0.0100695 0.0], Float32[-0.233289, 0.10027, 0.100066, 0.100451, -0.233698, -0.233729, 0.0998127, 0.100541, 0.0996665, 0.0999085])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grad_backprop = gradient(network, x_batch, t_batch)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5.7.4 誤差逆伝播法を使った学習" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TwoLayerNet{Float32}(AffineLayer{Float32}(Float32[-0.0145858 0.0112641 … -0.00181048 -0.00982263; -0.00418988 0.000436219 … 0.0169963 -0.00666518; … ; -0.00234774 -0.0183891 … -0.0095593 -0.00152059; -0.0075102 0.00935047 … 0.000530805 -0.0067455], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], #undef, #undef, #undef), ReluLayer{Float32}(#undef), AffineLayer{Float32}(Float32[-0.00512687 -0.0145102 … -0.00207356 0.0202991; -0.00433898 0.0068244 … 0.00575347 -0.0205735; … ; -0.0189138 0.0150627 … -0.0172908 0.000555763; 0.00474322 -0.0054643 … -0.00044954 -0.0116008], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], #undef, #undef, #undef), SoftmaxWithLossLayer{Float32}(7.6892265f20, #undef, #undef))" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "network = TwoLayerNet{Float32}(784, 50, 10)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "iters_num = 10000;\n", "train_size = size(x_train, 2); # => 60000\n", "batch_size = 100;\n", "learning_rate = Float32(0.1);" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train_loss_list = Float32[];\n", "train_acc_list = Float32[];\n", "test_acc_list = Float32[];" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "600" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iter_per_epoch = max(train_size ÷ batch_size, 1)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0: train_acc=0.15588333333333335 / test_acc=0.1581\n", "600: train_acc=0.9048833333333334 / test_acc=0.9073\n", "1200: train_acc=0.92345 / test_acc=0.925\n", "1800: train_acc=0.9356833333333333 / test_acc=0.9366\n", "2400: train_acc=0.9428166666666666 / test_acc=0.9426\n", "3000: train_acc=0.951 / test_acc=0.9468\n", "3600: train_acc=0.9546666666666667 / test_acc=0.9517\n", "4200: train_acc=0.9602 / test_acc=0.9552\n", "4800: train_acc=0.9620666666666666 / test_acc=0.9577\n", "5400: train_acc=0.96585 / test_acc=0.9614\n", "6000: train_acc=0.9685833333333334 / test_acc=0.9629\n", "6600: train_acc=0.9716166666666667 / test_acc=0.9634\n", "7200: train_acc=0.9726833333333333 / test_acc=0.9635\n", "7800: train_acc=0.9729 / test_acc=0.965\n", "8400: train_acc=0.9748666666666667 / test_acc=0.9662\n", "9000: train_acc=0.9767666666666667 / test_acc=0.9692\n", "9600: train_acc=0.9782333333333333 / test_acc=0.9665\n" ] } ], "source": [ "for i = 1:iters_num\n", " batch_mask = rand(1:train_size, batch_size)\n", " x_batch = x_train[:, batch_mask]\n", " t_batch = Matrix{Float32}(t_train[:, batch_mask])\n", " \n", " # 誤差逆伝播法によって勾配を求める\n", " grads = gradient(network, x_batch, t_batch)\n", " \n", " # 更新\n", " applygradient!(network, grads, learning_rate)\n", " \n", " _loss = loss(network, x_batch, t_batch)\n", " push!(train_loss_list, _loss)\n", "\n", " if i % iter_per_epoch == 1\n", " train_acc = accuracy(network, x_train, Matrix{Float32}(t_train))\n", " test_acc = accuracy(network, x_test, Matrix{Float32}(t_test))\n", " push!(train_acc_list, train_acc)\n", " push!(test_acc_list, test_acc)\n", " println(\"$(i-1): train_acc=$(train_acc) / test_acc=$(test_acc)\")\n", " end\n", "end" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "final: train_acc=0.9794666666666667 / test_acc=0.9689\n" ] } ], "source": [ "final_train_acc = accuracy(network, x_train, Matrix{Float32}(t_train))\n", "final_test_acc = accuracy(network, x_test, Matrix{Float32}(t_test))\n", "push!(train_acc_list, final_train_acc)\n", "push!(test_acc_list, final_test_acc)\n", "println(\"final: train_acc=$(final_train_acc) / test_acc=$(final_test_acc)\")" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PyPlot" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using PyPlot\n", "const plt = PyPlot" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "PyPlot.Figure(PyObject )" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "1-element Array{Any,1}:\n", " PyObject " ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.plot(1:length(train_loss_list), train_loss_list)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "PyPlot.Figure(PyObject )" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "PyObject " ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x1 = [1:length(train_acc_list);]\n", "plt.plot(x1, train_acc_list, label=\"train_acc\")\n", "plt.plot(x1, test_acc_list, label=\"test_acc\")\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 0.6.0", "language": "julia", "name": "julia-0.6" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "0.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }