{ "cells": [ { "cell_type": "markdown", "id": "e092f1f8-8b39-4859-9fe5-744fc0a942e7", "metadata": {}, "source": [ "# 1. Construct an image X with diagonal edges." ] }, { "cell_type": "code", "execution_count": 4, "id": "26884bae-ff3b-47a6-be5d-dc1c0ca36b2c", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 1., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 1., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 1., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 1., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 1., 0., 0.]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "def corr2d(X,K):\n", " h,w = K.shape\n", " Y = torch.zeros(X.shape[0]-h+1, X.shape[1]-w+1)\n", " for i in range(Y.shape[0]):\n", " for j in range(Y.shape[1]):\n", " Y[i, j] = (X[i:i+h, j:j+w]*K).sum()\n", " return Y\n", "\n", "K = torch.tensor([[1.0,-1.0]])\n", "X = torch.eye(6,8)\n", "X" ] }, { "cell_type": "markdown", "id": "ebecd2d5-b4dc-461f-bd73-a69003993f24", "metadata": {}, "source": [ "## 1.1 What happens if you apply the kernel K in this section to it?" ] }, { "cell_type": "code", "execution_count": 8, "id": "6b71bb54-14fd-40a9-b80b-7c1fd47f546f", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[ 1., 0., 0., 0., 0., 0., 0.],\n", " [-1., 1., 0., 0., 0., 0., 0.],\n", " [ 0., -1., 1., 0., 0., 0., 0.],\n", " [ 0., 0., -1., 1., 0., 0., 0.],\n", " [ 0., 0., 0., -1., 1., 0., 0.],\n", " [ 0., 0., 0., 0., -1., 1., 0.]])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corr2d(X, K)" ] }, { "cell_type": "markdown", "id": "0906b0b8-246c-44d5-b757-89d11fecd908", "metadata": {}, "source": [ "## 1.2 What happens if you transpose X?" ] }, { "cell_type": "code", "execution_count": 9, "id": "b480fd36-3a9a-4b39-bac1-0e0737c819ef", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[ 1., 0., 0., 0., 0.],\n", " [-1., 1., 0., 0., 0.],\n", " [ 0., -1., 1., 0., 0.],\n", " [ 0., 0., -1., 1., 0.],\n", " [ 0., 0., 0., -1., 1.],\n", " [ 0., 0., 0., 0., -1.],\n", " [ 0., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 0.]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corr2d(X.T, K)" ] }, { "cell_type": "markdown", "id": "9ed7718b-5571-4671-8edc-2986f3b96590", "metadata": {}, "source": [ "## 1.3 What happens if you transpose K?" ] }, { "cell_type": "code", "execution_count": 7, "id": "c25f175c-8607-415c-a90c-73c6e9010266", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[ 1., -1., 0., 0., 0., 0., 0., 0.],\n", " [ 0., 1., -1., 0., 0., 0., 0., 0.],\n", " [ 0., 0., 1., -1., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 1., -1., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 1., -1., 0., 0.]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corr2d(X, K.T)" ] }, { "cell_type": "markdown", "id": "f3f456cd-9892-4208-ba81-d235523d0e31", "metadata": {}, "source": [ "# 2. Design some kernels manually." ] }, { "cell_type": "markdown", "id": "ee31f43b-47a2-4cfd-b1d5-3e194b64689a", "metadata": {}, "source": [ "## 2.1 Given a directional vector $\\vec{v}=(v_1,v_2)$, derive an edge-detection kernel that detects edges orthogonal to $\\vec{v}$, i.e., edges in the direction $(v_2,-v_1)$." ] }, { "cell_type": "code", "execution_count": 400, "id": "edcd439e-512b-430e-af9f-ccb5e6f290a7", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 2, loss 10.199\n", "epoch 4, loss 5.911\n", "epoch 6, loss 4.149\n", "epoch 8, loss 3.191\n", "epoch 10, loss 2.631\n", "epoch 12, loss 2.293\n", "epoch 14, loss 2.085\n", "epoch 16, loss 1.954\n" ] }, { "data": { "text/plain": [ "tensor([[ 0.3847, -0.3420, 0.2657],\n", " [-0.1117, 0.3634, -0.0246],\n", " [ 0.1967, -0.1634, 0.2337]])" ] }, "execution_count": 400, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pic = torch.roll(X, shifts=(1,),dims=(1,))+torch.roll(X, shifts=(2,),dims=(1,))+X\n", "Y = X+torch.roll(X, shifts=(2,),dims=(1,))\n", "# Construct a two-dimensional convolutional layer with 1 output channel and a\n", "# kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here\n", "conv2d = nn.LazyConv2d(1, kernel_size=(3, 3), bias=False,padding=1)\n", "\n", "# The two-dimensional convolutional layer uses four-dimensional input and\n", "# output in the format of (example, channel, height, width), where the batch\n", "# size (number of examples in the batch) and the number of channels are both 1\n", "train_X = pic.reshape((1, 1, pic.shape[0], pic.shape[1]))\n", "train_Y = Y.reshape((1, 1, Y.shape[0], Y.shape[1]))\n", "lr = 1e-2 # Learning rate\n", "\n", "for i in range(16):\n", " Y_hat = conv2d(train_X)\n", " l = (Y_hat - train_Y) ** 2\n", " conv2d.zero_grad()\n", " l.sum().backward()\n", " # Update the kernel\n", " conv2d.weight.data[:] -= lr * conv2d.weight.grad\n", " if (i + 1) % 2 == 0:\n", " print(f'epoch {i + 1}, loss {l.sum():.3f}')\n", "conv2d.weight.data.squeeze()" ] }, { "cell_type": "code", "execution_count": 399, "id": "835fc1ec-9498-4d9a-a1af-d3c5159ac932", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0.9362, 0.3084, 0.9182, -0.1221, 0.2900, 0.0000],\n", " [-0.1041, 0.9362, 0.3084, 0.9182, -0.1221, 0.2900],\n", " [ 0.2157, -0.1041, 0.9362, 0.3084, 0.9182, -0.1221],\n", " [ 0.0000, 0.2157, -0.1041, 0.9362, 0.3084, 0.9182]])" ] }, "execution_count": 399, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corr2d(pic,conv2d.weight.data.squeeze())" ] }, { "cell_type": "code", "execution_count": 270, "id": "ecd0519f-3cb7-4ca6-b061-55271e43f86e", "metadata": { "tags": [] }, "outputs": [], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torchvision.transforms.functional as tF\n", "def stat_theta(r):\n", " v = [1,0]\n", " theta = math.acos(F.cosine_similarity(torch.tensor(r).type(torch.float32),torch.tensor(v).type(torch.float32),dim=0))/math.pi*180\n", " return theta\n", "\n", "def gen_K(v):\n", " v = torch.tensor(v, dtype=torch.float32) # Replace v1 and v2 with your values\n", " u = v / torch.norm(v)\n", " # Create the edge-detection kernel along the direction (v2, -v1)\n", " K = torch.tensor([[-u[1], u[0]],[-u[0], -u[1]]], dtype=torch.float32)\n", " return K\n", "\n", "def test(r,a):\n", " theta = stat_theta(r)\n", " K = gen_K(r)\n", " print(K)\n", " b = tF.rotate(a.reshape(1,1,a.shape[0],-1,),angle=theta).reshape(a.shape[0],-1)\n", " print(b)\n", " print(corr2d(b,K))" ] }, { "cell_type": "code", "execution_count": 278, "id": "959dd02a-59a4-4d96-8f04-6e15585bd75b", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[-0., 1.],\n", " [-1., -0.]])\n", "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.]])\n", "tensor([[ 0., -1., 0., 0., 0., 1., 0.],\n", " [ 0., -1., 0., 0., 0., 1., 0.],\n", " [ 0., -1., 0., 0., 0., 1., 0.],\n", " [ 0., -1., 0., 0., 0., 1., 0.],\n", " [ 0., -1., 0., 0., 0., 1., 0.]])\n" ] } ], "source": [ "a = torch.ones((6, 8))\n", "a[:, 2:6] = 0\n", "test([1,0],a)" ] }, { "cell_type": "code", "execution_count": 273, "id": "92764c3f-3782-43f7-b3bc-99c3e8d57e12", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[-1., 0.],\n", " [-0., -1.]])\n", "tensor([[0., 1., 1., 1., 1., 1., 1., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 0., 0., 0., 0., 0., 0., 0.],\n", " [0., 1., 1., 1., 1., 1., 1., 0.]])\n", "tensor([[ 0., -1., -1., -1., -1., -1., -1.],\n", " [ 0., 0., 0., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 0., 0., 0.],\n", " [-1., -1., -1., -1., -1., -1., 0.]])\n" ] } ], "source": [ "test([0,1],a)" ] }, { "cell_type": "code", "execution_count": 274, "id": "c2ebc6bb-03cb-4c7b-95f8-3fca73faf68a", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[-0.7071, 0.7071],\n", " [-0.7071, -0.7071]])\n", "tensor([[0., 0., 0., 0., 1., 1., 1., 0.],\n", " [0., 0., 0., 0., 0., 1., 1., 1.],\n", " [1., 0., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 0., 1.],\n", " [1., 1., 1., 0., 0., 0., 0., 0.],\n", " [0., 1., 1., 1., 0., 0., 0., 0.]])\n", "tensor([[ 0.0000, 0.0000, 0.0000, 0.7071, -0.7071, -1.4142, -2.1213],\n", " [-0.7071, 0.0000, 0.0000, 0.0000, 0.7071, -0.7071, -1.4142],\n", " [-2.1213, -0.7071, 0.0000, 0.0000, 0.0000, 0.7071, -0.7071],\n", " [-1.4142, -2.1213, -0.7071, 0.0000, 0.0000, 0.0000, 0.7071],\n", " [-0.7071, -1.4142, -2.1213, -0.7071, 0.0000, 0.0000, 0.0000]])\n" ] } ], "source": [ "test([1,1],a)" ] }, { "cell_type": "markdown", "id": "349f8837-f468-4e46-a9ea-3e4309ed185b", "metadata": {}, "source": [ "## 2.2 Derive a finite difference operator for the second derivative. What is the minimum size of the convolutional kernel associated with it? Which structures in images respond most strongly to it?" ] }, { "cell_type": "markdown", "id": "0c0f9b4f-be20-4b19-b42a-23c91afb8894", "metadata": {}, "source": [ "The second derivative of a continuous function can be approximated using a finite difference operator. One common way to do this is to use the central difference formula, which is given by:\n", "\n", "$$ \\frac{\\partial^2 f}{\\partial x^2} \\approx \\frac{f(x+h) - 2f(x) + f(x-h)}{h^2} $$\n", "\n", "Where $h$ is a small step size.\n", "\n", "To create a convolutional kernel associated with the second derivative, we can discretize the above formula and put it into a kernel format. The kernel would look like:\n", "$$\\text{kernel} = \\begin{bmatrix} 1 & -2 & 1 \\end{bmatrix}$$\n", "This kernel captures the second derivative along the horizontal direction. It's worth noting that the central difference formula can be applied in both horizontal and vertical directions separately to capture the second derivative along each direction.\n", "\n", "The minimum size of the convolutional kernel associated with the second derivative is $3 \\times 1$ or $1 \\times 3$. This size captures the essence of the central difference formula for the second derivative.\n", "\n", "Structures in images that have rapid intensity changes or sharp transitions will respond most strongly to this second derivative kernel. These structures include edges, corners, and other high-frequency features. The second derivative kernel enhances areas in the image where the intensity changes abruptly, making it a useful tool for edge detection and feature extraction." ] }, { "cell_type": "code", "execution_count": 280, "id": "5174669d-5409-4bb6-83b6-5598362e8500", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.]])\n" ] }, { "data": { "text/plain": [ "tensor([[-1., 1., 0., 0., 1., -1.],\n", " [-1., 1., 0., 0., 1., -1.],\n", " [-1., 1., 0., 0., 1., -1.],\n", " [-1., 1., 0., 0., 1., -1.],\n", " [-1., 1., 0., 0., 1., -1.],\n", " [-1., 1., 0., 0., 1., -1.]])" ] }, "execution_count": 280, "metadata": {}, "output_type": "execute_result" } ], "source": [ "K = torch.tensor([[1,-2,1]])\n", "print(a)\n", "corr2d(a,K)" ] }, { "cell_type": "markdown", "id": "81484b65-ff3a-43e2-aa70-e5dbe11607eb", "metadata": {}, "source": [ "## 2.3 How would you design a blur kernel? Why might you want to use such a kernel?" ] }, { "cell_type": "markdown", "id": "eddff753-df4d-420d-a145-f76f16dab949", "metadata": {}, "source": [ "Designing a blur kernel involves creating a convolutional kernel that, when applied to an image, reduces the high-frequency components in the image, resulting in a smoother and more blurred appearance. A commonly used blur kernel is the Gaussian kernel, which is derived from the Gaussian distribution. The Gaussian kernel has the property of spreading out the pixel values around the central pixel, creating a gradual transition between neighboring pixels.\n", "\n", "To design a Gaussian blur kernel, you typically follow these steps:\n", "\n", "1. Choose the size of the kernel: The size of the kernel determines the extent of blurring. A larger kernel size will result in more pronounced blurring.\n", "\n", "2. Determine the standard deviation (\\(\\sigma\\)): The standard deviation controls the spread of the Gaussian distribution. A larger \\(\\sigma\\) will result in a wider spread and more smoothing.\n", "\n", "3. Compute the Gaussian values: For each pixel in the kernel, compute the Gaussian value based on its distance from the center. The Gaussian values are then normalized to ensure that they sum up to 1.\n", "\n", "Here's an example of how you can create a 2D Gaussian blur kernel using Python and NumPy:\n" ] }, { "cell_type": "code", "execution_count": 287, "id": "be44e440-43ff-46ac-9220-668e35b92f34", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Blur kernel sum:1.0\n", "tensor([[0.0751, 0.1238, 0.0751],\n", " [0.1238, 0.2042, 0.1238],\n", " [0.0751, 0.1238, 0.0751]], dtype=torch.float64)\n", "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.],\n", " [1., 1., 0., 0., 0., 0., 1., 1.]])\n" ] }, { "data": { "text/plain": [ "tensor([[0.7259, 0.2741, 0.0000, 0.0000, 0.2741, 0.7259],\n", " [0.7259, 0.2741, 0.0000, 0.0000, 0.2741, 0.7259],\n", " [0.7259, 0.2741, 0.0000, 0.0000, 0.2741, 0.7259],\n", " [0.7259, 0.2741, 0.0000, 0.0000, 0.2741, 0.7259]])" ] }, "execution_count": 287, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "import numpy as np\n", "def gaussian_kernel(size, sigma):\n", " kernel = np.fromfunction(\n", " lambda x, y: (1/(2*np.pi*sigma**2)) * np.exp(-((x-size//2)**2 + (y-size//2)**2) / (2*sigma**2)),\n", " (size, size)\n", " )\n", " kernel /= np.sum(kernel)\n", " return kernel\n", "\n", "kernel_size = 3\n", "sigma = 1.0\n", "blur_kernel = torch.tensor(gaussian_kernel(kernel_size, sigma))\n", "\n", "print(f\"Blur kernel sum:{blur_kernel.sum()}\")\n", "print(blur_kernel)\n", "print(a)\n", "corr2d(a,blur_kernel)" ] }, { "cell_type": "markdown", "id": "fd9eff06-e74c-4c62-b632-438f5d9e9c0e", "metadata": {}, "source": [ "Why might you want to use a blur kernel?\n", "\n", "1. **Noise Reduction**: Blurring can help reduce noise and unwanted artifacts in an image. High-frequency noise is often smoothed out, resulting in a cleaner appearance.\n", "\n", "2. **Image Smoothing**: Blurring is commonly used to smooth out textures and fine details in an image, creating a more cohesive and aesthetically pleasing result.\n", "\n", "3. **Edge Preservation**: While blurring reduces high-frequency details, certain blur techniques can preserve important edges while still providing a smoother overall appearance.\n", "\n", "4. **Preprocessing**: Blurring can be used as a preprocessing step for various computer vision tasks such as object detection and recognition, where the focus is on features rather than fine textures.\n", "\n", "5. **Privacy Protection**: Blurring or pixelating specific regions of an image can be used for privacy protection by making sensitive information less recognizable.\n", "\n", "6. **Artistic Effects**: Blurring can also be used creatively to achieve artistic effects or simulate depth of field in photography.\n", "\n", "Overall, blur kernels serve as a versatile tool in image processing with applications ranging from noise reduction to artistic manipulation." ] }, { "cell_type": "markdown", "id": "2ebb9154-cd94-4554-b121-346ef78b60d5", "metadata": {}, "source": [ "## 2.4 What is the minimum size of a kernel to obtain a derivative of order $d$?" ] }, { "cell_type": "markdown", "id": "3680d583-01dd-457d-89e9-05d8ec468ae9", "metadata": {}, "source": [ "need a kernel of size $2d+1$ along the direction in which we're calculating the derivative.\n", "\n", "One possible way to get the kernel size of k-order derivative in 1D is to use the finite difference approximation, which estimates the derivative of a function at a point by using the values of the function at nearby points. ยน For example, if we use the central difference formula to approximate the derivative, then we need a kernel of size 2k+1 to obtain a derivative of order k. This is because the central difference formula uses k points on each side of the center point to estimate the derivative. For instance, the first-order derivative can be approximated by using a kernel of size 3: $$\\frac{\\partial f}{\\partial x}(x)\\approx \\frac{f(x+1)-f(x-1)}{2}$$ The second-order derivative can be approximated by using a kernel of size 5: $$\\frac{\\partial^2 f}{\\partial x^2}(x)\\approx \\frac{f(x+2)-2f(x)+f(x-2)}{4}$$ And so on. However, if we use other types of kernels, such as Sobel or Laplace kernels, then we may need different sizes to obtain a derivative of order k. For example, the Sobel kernel can approximate the first-order derivative by using a kernel of size 3, but it cannot approximate the second-order derivative by using a single kernel. Instead, we need to apply the Sobel kernel twice or use another kernel, such as the Laplace kernel, which can approximate the second-order derivative by using a kernel of size 3. " ] }, { "cell_type": "markdown", "id": "5352cde1-11e5-4a5b-8ad2-066fcb425c95", "metadata": {}, "source": [ "# 3. When you try to automatically find the gradient for the Conv2D class we created, what kind of error message do you see?" ] }, { "cell_type": "code", "execution_count": 401, "id": "826bcaa7-f8fb-44e4-9319-fe8b415161c8", "metadata": { "tags": [] }, "outputs": [], "source": [ "class Conv2D(nn.Module):\n", " def __init__(self, kernel_size):\n", " super().__init__()\n", " self.weight = nn.Parameter(torch.rand(kernel_size))\n", " self.bias = nn.Parameter(torch.zeros(1))\n", "\n", " def forward(self, x):\n", " return corr2d(x, self.weight) + self.bias" ] }, { "cell_type": "code", "execution_count": 404, "id": "f57ffb72-2e5c-4456-8a09-7881dc0604f1", "metadata": { "tags": [] }, "outputs": [ { "ename": "RuntimeError", "evalue": "grad can be implicitly created only for scalar outputs", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[404], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m output \u001b[38;5;241m=\u001b[39m custom_conv2d(x)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Compute gradients\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m \u001b[43moutput\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# Access gradients\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGradient of weight:\u001b[39m\u001b[38;5;124m\"\u001b[39m, custom_conv2d\u001b[38;5;241m.\u001b[39mweight\u001b[38;5;241m.\u001b[39mgrad)\n", "File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/autograd/__init__.py:193\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 189\u001b[0m inputs \u001b[38;5;241m=\u001b[39m (inputs,) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(inputs, torch\u001b[38;5;241m.\u001b[39mTensor) \u001b[38;5;28;01melse\u001b[39;00m \\\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28mtuple\u001b[39m(inputs) \u001b[38;5;28;01mif\u001b[39;00m inputs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m()\n\u001b[1;32m 192\u001b[0m grad_tensors_ \u001b[38;5;241m=\u001b[39m _tensor_or_tensors_to_tuple(grad_tensors, \u001b[38;5;28mlen\u001b[39m(tensors))\n\u001b[0;32m--> 193\u001b[0m grad_tensors_ \u001b[38;5;241m=\u001b[39m \u001b[43m_make_grads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_grads_batched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retain_graph \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 195\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n", "File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/autograd/__init__.py:88\u001b[0m, in \u001b[0;36m_make_grads\u001b[0;34m(outputs, grads, is_grads_batched)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m out\u001b[38;5;241m.\u001b[39mrequires_grad:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m out\u001b[38;5;241m.\u001b[39mnumel() \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m---> 88\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgrad can be implicitly created only for scalar outputs\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 89\u001b[0m new_grads\u001b[38;5;241m.\u001b[39mappend(torch\u001b[38;5;241m.\u001b[39mones_like(out, memory_format\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mpreserve_format))\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "\u001b[0;31mRuntimeError\u001b[0m: grad can be implicitly created only for scalar outputs" ] } ], "source": [ "x = pic\n", "# Create an instance of your custom Conv2D layer\n", "custom_conv2d = Conv2D(kernel_size=(3, 3))\n", "\n", "# Set requires_grad to True\n", "x.requires_grad = True\n", "\n", "# Perform forward pass\n", "output = custom_conv2d(x)\n", "\n", "# Compute gradients\n", "output.backward()\n", "\n", "# Access gradients\n", "print(\"Gradient of weight:\", custom_conv2d.weight.grad)\n", "print(\"Gradient of bias:\", custom_conv2d.bias.grad)" ] }, { "cell_type": "markdown", "id": "39f41747-dbbf-4273-9d18-c2b37020afb4", "metadata": {}, "source": [ "# 4. How do you represent a cross-correlation operation as a matrix multiplication by changing the input and kernel tensors?" ] }, { "cell_type": "code", "execution_count": 462, "id": "d1218ca5-0b09-4f68-838c-2771b26a7c33", "metadata": { "tags": [] }, "outputs": [], "source": [ "def corr2d_matmul(pic, K):\n", " pad_K = F.pad(K,(0,pic.shape[1]-K.shape[1],0,pic.shape[0]-K.shape[0])).type(torch.float32)\n", " l = []\n", " for i in range(pic.shape[0]-K.shape[0]+1):\n", " for j in range(pic.shape[1]-K.shape[1]+1):\n", " l.append(torch.roll(pad_K,(i,j),(0,1)).reshape(1,-1))\n", " print(torch.cat(l,dim=0))\n", " return (torch.cat(l,dim=0)@pic.reshape(-1,1)).reshape(pic.shape[0]-K.shape[0]+1,pic.shape[1]-K.shape[1]+1)" ] }, { "cell_type": "code", "execution_count": 463, "id": "959c2a94-abb9-40dc-858a-cc6eb17106c9", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[1., 1., 0., 1., 1., 0., 0., 0., 0.],\n", " [0., 1., 1., 0., 1., 1., 0., 0., 0.],\n", " [0., 0., 0., 1., 1., 0., 1., 1., 0.],\n", " [0., 0., 0., 0., 1., 1., 0., 1., 1.]])\n" ] }, { "data": { "text/plain": [ "tensor(True)" ] }, "execution_count": 463, "metadata": {}, "output_type": "execute_result" } ], "source": [ "K = torch.tensor([[1,1],[1,1]])\n", "x = torch.ones(3,3)\n", "(corr2d_matmul(x,K) == corr2d(x,K)).all()" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:d2l]", "language": "python", "name": "conda-env-d2l-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }