{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YJU3pSHEVEca" }, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "from torchvision import datasets, transforms\n", "from torch.optim.lr_scheduler import ExponentialLR\n", "\n", "# Get CPU or GPU device for training\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "device = torch.device(device)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dvcDj6biVw3N" }, "outputs": [], "source": [ "# Random seed for reproducibility\n", "seed = 42\n", "torch.manual_seed(seed)\n", "\n", "# Save the model at the end?\n", "save_model = False\n", "\n", "# Batch sizes for training and testing\n", "batch_size = 64\n", "test_batch_size = 14\n", "\n", "# Training epochs\n", "n_epochs = 10\n", "\n", "# Learning rate\n", "learning_rate = 1.0\n", "\n", "# Decay rate for adjusting the learning rate\n", "gamma = 0.7\n", "\n", "# How many batches before logging training status\n", "log_interval = 10\n", "\n", "# Number of target classes in the MNIST data\n", "num_classes = 10\n", "\n", "train_kwargs = {'batch_size': batch_size}\n", "test_kwargs = {'batch_size': test_batch_size}\n", "\n", "# CUDA settings\n", "if torch.cuda.is_available():\n", " cuda_kwargs = {'num_workers': 1,\n", " 'pin_memory': True,\n", " 'shuffle': True}\n", " train_kwargs.update(cuda_kwargs)\n", " test_kwargs.update(cuda_kwargs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QIUfqzekLRSd" }, "outputs": [], "source": [ "# The scaled mean and standard deviation of the MNIST dataset (precalculated)\n", "data_mean = 0.1307\n", "data_std = 0.3081\n", "\n", "# Convert input images to tensors and normalize\n", "transform=transforms.Compose([\n", " transforms.ToTensor(),\n", " transforms.Normalize((data_mean,), (data_std,))\n", " ])\n", "\n", "# Get the MNIST data from torchvision\n", "dataset1 = datasets.MNIST('../data', train=True, download=True,\n", " transform=transform)\n", "dataset2 = datasets.MNIST('../data', train=False,\n", " transform=transform)\n", "\n", "# Define the data loaders that will handle fetching of data\n", "train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n", "test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "firvobxRVIgm" }, "outputs": [], "source": [ "# Define the architecture of the neural network\n", "class Net(nn.Module):\n", " def __init__(self):\n", " super(Net, self).__init__()\n", " self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding='valid')\n", " self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='valid')\n", " self.dropout1 = nn.Dropout(0.25)\n", " self.dropout2 = nn.Dropout(0.5)\n", " self.fc1 = nn.Linear(9216, 128)\n", " self.fc2 = nn.Linear(128, num_classes)\n", "\n", " def forward(self, x):\n", " x = self.conv1(x)\n", " x = F.relu(x)\n", " x = self.conv2(x)\n", " x = F.relu(x)\n", " x = F.max_pool2d(x, 2)\n", " x = self.dropout1(x)\n", " x = torch.flatten(x, 1)\n", " x = self.fc1(x)\n", " x = F.relu(x)\n", " x = self.dropout2(x)\n", " x = self.fc2(x)\n", " output = F.softmax(x, dim=1)\n", " return output" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pxG7nB1xVnYA" }, "outputs": [], "source": [ "def train(model, device, train_loader, optimizer, epoch, log_interval):\n", " model.train()\n", " for batch_idx, (data, target) in enumerate(train_loader):\n", " data, target = data.to(device), target.to(device)\n", " optimizer.zero_grad()\n", " output = model(data)\n", " loss = F.cross_entropy(output, target)\n", " loss.backward()\n", " optimizer.step()\n", " if batch_idx % log_interval == 0:\n", " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", " epoch, batch_idx * len(data), len(train_loader.dataset),\n", " 100. * batch_idx / len(train_loader), loss.item()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jrTKi_uBVpLH" }, "outputs": [], "source": [ "def test(model, device, test_loader):\n", " model.eval()\n", " test_loss = 0\n", " correct = 0\n", " with torch.no_grad():\n", " for data, target in test_loader:\n", " data, target = data.to(device), target.to(device)\n", " output = model(data)\n", " # sum up batch loss\n", " test_loss += F.nll_loss(output, target, reduction='sum').item()\n", " # get the index of the max log-probability\n", " pred = output.argmax(dim=1, keepdim=True)\n", " correct += pred.eq(target.view_as(pred)).sum().item()\n", "\n", " test_loss /= len(test_loader.dataset)\n", "\n", " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", " test_loss, correct, len(test_loader.dataset),\n", " 100. * correct / len(test_loader.dataset)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "M00667mnZhJ2", "outputId": "06217f4b-f947-4091-8be1-130d2afb2301" }, "outputs": [], "source": [ "# Send the model to the device (CPU or GPU)\n", "model = Net().to(device)\n", "\n", "# Define the optimizer to user for gradient descent\n", "optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)\n", "\n", "# Shrinks the learning rate by gamma every step_size\n", "scheduler = ExponentialLR(optimizer, gamma=gamma)\n", "\n", "# Train the model\n", "for epoch in range(1, n_epochs + 1):\n", " train(model, device, train_loader, optimizer, epoch, log_interval)\n", " test(model, device, test_loader)\n", " scheduler.step()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "CsuYPI3hYUiJ" }, "outputs": [], "source": [ "if save_model:\n", " torch.save(model.state_dict(), \"mnist_cnn_pytorch.ckpt\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def visualize_and_predict(model, device, data_loader):\n", " model.eval()\n", " with torch.no_grad():\n", " # Extract the first batch of images and labels\n", " data, target = next(iter(data_loader))\n", " # Select the first image and label\n", " img, label = data[0], target[0]\n", " \n", " # Visualize the image\n", " plt.imshow(img.squeeze(), cmap='gray')\n", " plt.title(f'Actual Label: {label.item()}')\n", " plt.show()\n", "\n", " # Run inference\n", " img = img.to(device)\n", " output = model(img.unsqueeze(0)) # Add batch dimension\n", " pred = output.argmax(dim=1, keepdim=True)\n", "\n", " print(f'Predicted Label: {pred.item()}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "visualize_and_predict(model, device, test_loader)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "authorship_tag": "ABX9TyM0OdaiF8Pr7l3VAayhM54C", "include_colab_link": true, "name": "MNIST - PyTorch.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 1 }