{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Theano 实例:卷积神经网络" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using gpu device 1: Tesla C2075 (CNMeM is disabled)\n" ] } ], "source": [ "import theano\n", "import theano.tensor as T\n", "from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams\n", "import numpy as np\n", "from load import mnist\n", "\n", "srng = RandomStreams()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "从前一节导入有用的函数:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def floatX(X):\n", " return np.asarray(X, dtype=theano.config.floatX)\n", "\n", "def init_weights(shape):\n", " return theano.shared(floatX(np.random.randn(*shape) * 0.01))\n", "\n", "def rectify(X):\n", " return T.maximum(X, 0.)\n", "\n", "def softmax(X):\n", " e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))\n", " return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')\n", "\n", "def dropout(X, p=0.):\n", " if p > 0:\n", " retain_prob = 1 - p\n", " X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)\n", " X /= retain_prob\n", " return X\n", "\n", "def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):\n", " grads = T.grad(cost=cost, wrt=params)\n", " updates = []\n", " for p, g in zip(params, grads):\n", " acc = theano.shared(p.get_value() * 0.)\n", " acc_new = rho * acc + (1 - rho) * g ** 2\n", " gradient_scaling = T.sqrt(acc_new + epsilon)\n", " g = g / gradient_scaling\n", " updates.append((acc, acc_new))\n", " updates.append((p, p - lr * g))\n", " return updates" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "与前一节不同,我们使用卷积神经网络来实现这次的模型,为此,我们需要导入 2 维的卷积和池化函数:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from theano.tensor.nnet.conv import conv2d\n", "from theano.tensor.signal.downsample import max_pool_2d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`conv2d` 函数接受两个输入:\n", "\n", "- 对应输入的 `4D` 张量,其形状如下:\n", " \n", " `[mini-batch size, number of feature maps at layer m-1, image height, image width]`\n", " \n", " \n", "- 对应参数矩阵的 `4D` 张量,其形状如下:\n", " \n", " `[number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]`\n", "\n", "为了对图像使用卷积,我们需要将图像转化为原始的 `28 × 28` 大小,同时添加一维表示图像的通道数(黑白图像为 1):" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "trX, teX, trY, teY = mnist(onehot=True)\n", "\n", "trX = trX.reshape(-1, 1, 28, 28)\n", "teX = teX.reshape(-1, 1, 28, 28)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "注意,对于 `reshape` 方法,传入的参数是 `-1` 表示该维的维度将根据其他参数自动计算。\n", "\n", "模型首先进行三层卷积加池化操作,然后在第三层的输出中加一个全连结层,最后在第四层加上一个 `softmax` 层:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):\n", " \n", " # X: 128 * 1 * 28 * 28\n", " # w: 32 * 1 * 3 * 3\n", " # full mode\n", " # l1a: 128 * 32 * (28 + 3 - 1) * (28 + 3 - 1)\n", " l1a = rectify(conv2d(X, w, border_mode='full'))\n", " # l1a: 128 * 32 * 30 * 30\n", " # ignore_border False\n", " # l1: 128 * 32 * (30 / 2) * (30 / 2)\n", " l1 = max_pool_2d(l1a, (2, 2), ignore_border=False)\n", " l1 = dropout(l1, p_drop_conv)\n", "\n", " # l1: 128 * 32 * 15 * 15\n", " # w2: 64 * 32 * 3 * 3\n", " # valid mode\n", " # l2a: 128 * 64 * (15 - 3 + 1) * (15 - 3 + 1)\n", " l2a = rectify(conv2d(l1, w2)) \n", " # l2a: 128 * 64 * 13 * 13\n", " # l2: 128 * 64 * (13 / 2 + 1) * (13 / 2 + 1)\n", " l2 = max_pool_2d(l2a, (2, 2), ignore_border=False)\n", " l2 = dropout(l2, p_drop_conv)\n", "\n", " # l2: 128 * 64 * 7 * 7\n", " # w3: 128 * 64 * 3 * 3\n", " # l3a: 128 * 128 * (7 - 3 + 1) * (7 - 3 + 1)\n", " l3a = rectify(conv2d(l2, w3))\n", " # l3a: 128 * 128 * 5 * 5\n", " # l3b: 128 * 128 * (5 / 2 + 1) * (5 / 2 + 1)\n", " l3b = max_pool_2d(l3a, (2, 2), ignore_border=False) \n", " # l3b: 128 * 128 * 3 * 3\n", " # l3: 128 * (128 * 3 * 3)\n", " l3 = T.flatten(l3b, outdim=2)\n", " l3 = dropout(l3, p_drop_conv)\n", " \n", " # l3: 128 * (128 * 3 * 3)\n", " # w4: (128 * 3 * 3) * 625\n", " # l4: 128 * 625\n", " l4 = rectify(T.dot(l3, w4))\n", " l4 = dropout(l4, p_drop_hidden)\n", "\n", " # l5: 128 * 625\n", " # w5: 625 * 10\n", " # pyx: 128 * 10\n", " pyx = softmax(T.dot(l4, w_o))\n", " return l1, l2, l3, l4, pyx" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "定义符号变量:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X = T.ftensor4()\n", "Y = T.fmatrix()\n", "\n", "w = init_weights((32, 1, 3, 3))\n", "w2 = init_weights((64, 32, 3, 3))\n", "w3 = init_weights((128, 64, 3, 3))\n", "w4 = init_weights((128 * 3 * 3, 625))\n", "w_o = init_weights((625, 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "使用带 `dropout` 的模型进行训练: " ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "使用不带 `dropout` 的模型进行预测:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)\n", "y_x = T.argmax(py_x, axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "定义损失函数和迭代规则:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))\n", "params = [w, w2, w3, w4, w_o]\n", "updates = RMSprop(cost, params, lr=0.001)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "开始训练:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "iter 001, 0.917\n", "iter 002, 0.974\n", "iter 003, 0.983\n", "iter 004, 0.984\n", "iter 005, 0.987\n", "iter 006, 0.989\n", "iter 007, 0.991\n", "iter 008, 0.993\n", "iter 009, 0.991\n", "iter 010, 0.992\n", "iter 011, 0.993\n", "iter 012, 0.992\n", "iter 013, 0.992\n", "iter 014, 0.992\n", "iter 015, 0.993\n", "iter 016, 0.992\n", "iter 017, 0.994\n", "iter 018, 0.993\n", "iter 019, 0.993\n", "iter 020, 0.994\n", "iter 021, 0.993\n", "iter 022, 0.993\n", "iter 023, 0.993\n", "iter 024, 0.992\n", "iter 025, 0.994\n", "iter 026, 0.993\n", "iter 027, 0.994\n", "iter 028, 0.993\n", "iter 029, 0.993\n", "iter 030, 0.994\n", "iter 031, 0.994\n", "iter 032, 0.993\n", "iter 033, 0.994\n", "iter 034, 0.994\n", "iter 035, 0.994\n", "iter 036, 0.994\n", "iter 037, 0.994\n", "iter 038, 0.993\n", "iter 039, 0.994\n", "iter 040, 0.994\n", "iter 041, 0.994\n", "iter 042, 0.994\n", "iter 043, 0.995\n", "iter 044, 0.994\n", "iter 045, 0.994\n", "iter 046, 0.994\n", "iter 047, 0.995\n", "iter 048, 0.994\n", "iter 049, 0.994\n", "iter 050, 0.995\n" ] } ], "source": [ "train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)\n", "predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)\n", "\n", "for i in range(50):\n", " for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):\n", " cost = train(trX[start:end], trY[start:end])\n", " print \"iter {:03d}, {:.3f}\".format(i + 1, np.mean(np.argmax(teY, axis=1) == predict(teX)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }