{
"cells": [
{
"cell_type": "markdown",
"id": "d31d58d9",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"# Implementation of Multilayer Perceptrons\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "87926c3b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:41:22.340655Z",
"iopub.status.busy": "2023-08-18T19:41:22.340381Z",
"iopub.status.idle": "2023-08-18T19:41:25.449640Z",
"shell.execute_reply": "2023-08-18T19:41:25.448607Z"
},
"origin_pos": 3,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"import torch\n",
"from torch import nn\n",
"from d2l import torch as d2l"
]
},
{
"cell_type": "markdown",
"id": "3cc10e54",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Implement an MLP\n",
"with one hidden layer and 256 hidden units"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bcccd30d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:41:25.459844Z",
"iopub.status.busy": "2023-08-18T19:41:25.459149Z",
"iopub.status.idle": "2023-08-18T19:41:25.472859Z",
"shell.execute_reply": "2023-08-18T19:41:25.471738Z"
},
"origin_pos": 12,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"class MLPScratch(d2l.Classifier):\n",
" def __init__(self, num_inputs, num_outputs, num_hiddens, lr, sigma=0.01):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens) * sigma)\n",
" self.b1 = nn.Parameter(torch.zeros(num_hiddens))\n",
" self.W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs) * sigma)\n",
" self.b2 = nn.Parameter(torch.zeros(num_outputs))"
]
},
{
"cell_type": "markdown",
"id": "5891e5e6",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Implement the ReLU activation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2af157bb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:41:25.477923Z",
"iopub.status.busy": "2023-08-18T19:41:25.477044Z",
"iopub.status.idle": "2023-08-18T19:41:25.482976Z",
"shell.execute_reply": "2023-08-18T19:41:25.481963Z"
},
"origin_pos": 17,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"def relu(X):\n",
" a = torch.zeros_like(X)\n",
" return torch.max(X, a)"
]
},
{
"cell_type": "markdown",
"id": "c679c4c1",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"Implement our model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7438b40c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:41:25.492513Z",
"iopub.status.busy": "2023-08-18T19:41:25.491685Z",
"iopub.status.idle": "2023-08-18T19:41:25.498375Z",
"shell.execute_reply": "2023-08-18T19:41:25.497344Z"
},
"origin_pos": 21,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"@d2l.add_to_class(MLPScratch)\n",
"def forward(self, X):\n",
" X = X.reshape((-1, self.num_inputs))\n",
" H = relu(torch.matmul(X, self.W1) + self.b1)\n",
" return torch.matmul(H, self.W2) + self.b2"
]
},
{
"cell_type": "markdown",
"id": "18681986",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"The training loop for MLPs\n",
"is exactly the same as for softmax regression"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "82d57362",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:41:25.502740Z",
"iopub.status.busy": "2023-08-18T19:41:25.502096Z",
"iopub.status.idle": "2023-08-18T19:42:19.146140Z",
"shell.execute_reply": "2023-08-18T19:42:19.144962Z"
},
"origin_pos": 23,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"