{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# 多层感知机的从零开始实现\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:36:54.565830Z", "start_time": "2019-07-02T20:36:51.010297Z" }, "attributes": { "classes": [], "id": "", "n": "9" } }, "outputs": [], "source": [ "import d2l\n", "from mxnet import np, npx, gluon\n", "npx.set_np()\n", "\n", "batch_size = 256\n", "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "初始模型参数。" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:36:54.575953Z", "start_time": "2019-07-02T20:36:54.568798Z" }, "attributes": { "classes": [], "id": "", "n": "3" } }, "outputs": [], "source": [ "num_inputs, num_outputs, num_hiddens = 784, 10, 256\n", "\n", "W1 = np.random.normal(scale=0.01, size=(num_inputs, num_hiddens))\n", "b1 = np.zeros(num_hiddens)\n", "W2 = np.random.normal(scale=0.01, size=(num_hiddens, num_outputs))\n", "b2 = np.zeros(num_outputs)\n", "params = [W1, b1, W2, b2]\n", "\n", "for param in params:\n", " param.attach_grad()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "激活函数。" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:36:54.580360Z", "start_time": "2019-07-02T20:36:54.577434Z" }, "attributes": { "classes": [], "id": "", "n": "4" } }, "outputs": [], "source": [ "def relu(X):\n", " return np.maximum(X, 0)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "定义模型。" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:36:54.586617Z", "start_time": "2019-07-02T20:36:54.582210Z" }, "attributes": { "classes": [], "id": "", "n": "5" } }, "outputs": [], "source": [ "def net(X):\n", " X = X.reshape((-1, num_inputs))\n", " H = relu(np.dot(X, W1) + b1)\n", " return np.dot(H, W2) + b2" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "训练。" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:37:18.081449Z", "start_time": "2019-07-02T20:36:54.588505Z" }, "attributes": { "classes": [], "id": "", "n": "7" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "loss = gluon.loss.SoftmaxCrossEntropyLoss()\n", "num_epochs, lr = 10, 0.5\n", "d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, \n", " lambda batch_size: d2l.sgd(params, lr, batch_size))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "预测。" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-07-02T20:37:18.363043Z", "start_time": "2019-07-02T20:37:18.082991Z" }, "scrolled": true }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "d2l.predict_ch3(net, test_iter)" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }