{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "07abee6f",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "# Convolutions for Images\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "07444b49",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:01.908966Z",
     "iopub.status.busy": "2023-08-18T19:41:01.908168Z",
     "iopub.status.idle": "2023-08-18T19:41:05.382412Z",
     "shell.execute_reply": "2023-08-18T19:41:05.381355Z"
    },
    "origin_pos": 3,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "from d2l import torch as d2l"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd1878b1",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "Cross-correlation operation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5e550b4f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.386933Z",
     "iopub.status.busy": "2023-08-18T19:41:05.386003Z",
     "iopub.status.idle": "2023-08-18T19:41:05.393401Z",
     "shell.execute_reply": "2023-08-18T19:41:05.392355Z"
    },
    "origin_pos": 8,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "def corr2d(X, K):  \n",
    "    \"\"\"Compute 2D cross-correlation.\"\"\"\n",
    "    h, w = K.shape\n",
    "    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
    "    return Y"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2613d6a9",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Validate the output of the above implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7845059c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.397828Z",
     "iopub.status.busy": "2023-08-18T19:41:05.397122Z",
     "iopub.status.idle": "2023-08-18T19:41:05.427898Z",
     "shell.execute_reply": "2023-08-18T19:41:05.426544Z"
    },
    "origin_pos": 12,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[19., 25.],\n",
       "        [37., 43.]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
    "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
    "corr2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00c12716",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Implement a two-dimensional convolutional layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "74d09a7c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.432542Z",
     "iopub.status.busy": "2023-08-18T19:41:05.431776Z",
     "iopub.status.idle": "2023-08-18T19:41:05.444669Z",
     "shell.execute_reply": "2023-08-18T19:41:05.443731Z"
    },
    "origin_pos": 15,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "class Conv2D(nn.Module):\n",
    "    def __init__(self, kernel_size):\n",
    "        super().__init__()\n",
    "        self.weight = nn.Parameter(torch.rand(kernel_size))\n",
    "        self.bias = nn.Parameter(torch.zeros(1))\n",
    "\n",
    "    def forward(self, x):\n",
    "        return corr2d(x, self.weight) + self.bias"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "24c71c37",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "A simple application of a convolutional layer:\n",
    "detecting the edge of an object in an image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3b5cdda0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.448555Z",
     "iopub.status.busy": "2023-08-18T19:41:05.447775Z",
     "iopub.status.idle": "2023-08-18T19:41:05.456977Z",
     "shell.execute_reply": "2023-08-18T19:41:05.455824Z"
    },
    "origin_pos": 19,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
       "        [1., 1., 0., 0., 0., 0., 1., 1.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.ones((6, 8))\n",
    "X[:, 2:6] = 0\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c64588b6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.463518Z",
     "iopub.status.busy": "2023-08-18T19:41:05.462855Z",
     "iopub.status.idle": "2023-08-18T19:41:05.467300Z",
     "shell.execute_reply": "2023-08-18T19:41:05.466485Z"
    },
    "origin_pos": 23,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "K = torch.tensor([[1.0, -1.0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e1d625a",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "We detect $1$ for the edge from white to black\n",
    "and $-1$ for the edge from black to white"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7287547f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.472001Z",
     "iopub.status.busy": "2023-08-18T19:41:05.471414Z",
     "iopub.status.idle": "2023-08-18T19:41:05.478644Z",
     "shell.execute_reply": "2023-08-18T19:41:05.477751Z"
    },
    "origin_pos": 25,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
       "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y = corr2d(X, K)\n",
    "Y"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a6e055bb",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "The kernel `K` only detects vertical edges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c5803e8e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.482194Z",
     "iopub.status.busy": "2023-08-18T19:41:05.481611Z",
     "iopub.status.idle": "2023-08-18T19:41:05.489355Z",
     "shell.execute_reply": "2023-08-18T19:41:05.488493Z"
    },
    "origin_pos": 27,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0.]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "corr2d(X.t(), K)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b90666d",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Learn the kernel that generated `Y` from `X`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "cc5935f0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.493184Z",
     "iopub.status.busy": "2023-08-18T19:41:05.492373Z",
     "iopub.status.idle": "2023-08-18T19:41:05.588165Z",
     "shell.execute_reply": "2023-08-18T19:41:05.586875Z"
    },
    "origin_pos": 30,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch 2, loss 16.481\n",
      "epoch 4, loss 5.069\n",
      "epoch 6, loss 1.794\n",
      "epoch 8, loss 0.688\n",
      "epoch 10, loss 0.274\n"
     ]
    }
   ],
   "source": [
    "conv2d = nn.LazyConv2d(1, kernel_size=(1, 2), bias=False)\n",
    "\n",
    "X = X.reshape((1, 1, 6, 8))\n",
    "Y = Y.reshape((1, 1, 6, 7))\n",
    "lr = 3e-2\n",
    "\n",
    "for i in range(10):\n",
    "    Y_hat = conv2d(X)\n",
    "    l = (Y_hat - Y) ** 2\n",
    "    conv2d.zero_grad()\n",
    "    l.sum().backward()\n",
    "    conv2d.weight.data[:] -= lr * conv2d.weight.grad\n",
    "    if (i + 1) % 2 == 0:\n",
    "        print(f'epoch {i + 1}, loss {l.sum():.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16459fcb",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Take a look at the kernel tensor we learned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "4ab76f3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-08-18T19:41:05.593720Z",
     "iopub.status.busy": "2023-08-18T19:41:05.592926Z",
     "iopub.status.idle": "2023-08-18T19:41:05.601680Z",
     "shell.execute_reply": "2023-08-18T19:41:05.600494Z"
    },
    "origin_pos": 35,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.0398, -0.9328]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv2d.weight.data.reshape((1, 2))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "language_info": {
   "name": "python"
  },
  "required_libs": [],
  "rise": {
   "autolaunch": true,
   "enable_chalkboard": true,
   "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
   "scroll": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}