{ "cells": [ { "cell_type": "markdown", "id": "7c3fb25b", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "# Multilayer Perceptrons\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "cec00ab5", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:40.016291Z", "iopub.status.busy": "2023-08-18T19:38:40.015244Z", "iopub.status.idle": "2023-08-18T19:38:43.744461Z", "shell.execute_reply": "2023-08-18T19:38:43.741144Z" }, "origin_pos": 3, "tab": [ "pytorch" ] }, "outputs": [], "source": [ "%matplotlib inline\n", "import torch\n", "from d2l import torch as d2l" ] }, { "cell_type": "markdown", "id": "76090679", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Let's briefly survey some common ones\n", "ReLU provides a very simple nonlinear transformation" ] }, { "cell_type": "code", "execution_count": 2, "id": "5881b40d", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:43.749925Z", "iopub.status.busy": "2023-08-18T19:38:43.748740Z", "iopub.status.idle": "2023-08-18T19:38:44.145067Z", "shell.execute_reply": "2023-08-18T19:38:44.143027Z" }, "origin_pos": 8, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)\n", "y = torch.relu(x)\n", "d2l.plot(x.detach(), y.detach(), 'x', 'relu(x)', figsize=(5, 2.5))" ] }, { "cell_type": "code", "execution_count": 3, "id": "1d36fda0", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:44.152064Z", "iopub.status.busy": "2023-08-18T19:38:44.150649Z", "iopub.status.idle": "2023-08-18T19:38:44.554556Z", "shell.execute_reply": "2023-08-18T19:38:44.552066Z" }, "origin_pos": 13, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y.backward(torch.ones_like(x), retain_graph=True)\n", "d2l.plot(x.detach(), x.grad, 'x', 'grad of relu', figsize=(5, 2.5))" ] }, { "cell_type": "markdown", "id": "7df9db18", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "The *sigmoid function* transforms those inputs\n", "to outputs that lie on the interval (0, 1)" ] }, { "cell_type": "code", "execution_count": 4, "id": "f75a16d6", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:44.563423Z", "iopub.status.busy": "2023-08-18T19:38:44.559371Z", "iopub.status.idle": "2023-08-18T19:38:44.872663Z", "shell.execute_reply": "2023-08-18T19:38:44.870181Z" }, "origin_pos": 18, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y = torch.sigmoid(x)\n", "d2l.plot(x.detach(), y.detach(), 'x', 'sigmoid(x)', figsize=(5, 2.5))" ] }, { "cell_type": "code", "execution_count": 5, "id": "c4cc9493", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:44.878332Z", "iopub.status.busy": "2023-08-18T19:38:44.877535Z", "iopub.status.idle": "2023-08-18T19:38:45.347828Z", "shell.execute_reply": "2023-08-18T19:38:45.346820Z" }, "origin_pos": 23, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "x.grad.data.zero_()\n", "y.backward(torch.ones_like(x),retain_graph=True)\n", "d2l.plot(x.detach(), x.grad, 'x', 'grad of sigmoid', figsize=(5, 2.5))" ] }, { "cell_type": "markdown", "id": "84a05d74", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "The tanh (hyperbolic tangent)\n", "function also squashes its inputs\n", "between $-1$ and $1$" ] }, { "cell_type": "code", "execution_count": 6, "id": "59cc5dc6", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:45.353701Z", "iopub.status.busy": "2023-08-18T19:38:45.352955Z", "iopub.status.idle": "2023-08-18T19:38:45.679220Z", "shell.execute_reply": "2023-08-18T19:38:45.677327Z" }, "origin_pos": 28, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y = torch.tanh(x)\n", "d2l.plot(x.detach(), y.detach(), 'x', 'tanh(x)', figsize=(5, 2.5))" ] }, { "cell_type": "code", "execution_count": 7, "id": "fef45e56", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:38:45.684265Z", "iopub.status.busy": "2023-08-18T19:38:45.683296Z", "iopub.status.idle": "2023-08-18T19:38:46.004038Z", "shell.execute_reply": "2023-08-18T19:38:46.001327Z" }, "origin_pos": 33, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "x.grad.data.zero_()\n", "y.backward(torch.ones_like(x),retain_graph=True)\n", "d2l.plot(x.detach(), x.grad, 'x', 'grad of tanh', figsize=(5, 2.5))" ] } ], "metadata": { "celltoolbar": "Slideshow", "language_info": { "name": "python" }, "required_libs": [], "rise": { "autolaunch": true, "enable_chalkboard": true, "overlay": "

", "scroll": true } }, "nbformat": 4, "nbformat_minor": 5 }