{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/mikhailklassen/CNN_MNIST/blob/main/MNIST_PyTorch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YJU3pSHEVEca"
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torchvision import datasets, transforms\n",
    "from torch.optim.lr_scheduler import ExponentialLR\n",
    "\n",
    "# Get CPU or GPU device for training\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "device = torch.device(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dvcDj6biVw3N"
   },
   "outputs": [],
   "source": [
    "# Random seed for reproducibility\n",
    "seed = 42\n",
    "torch.manual_seed(seed)\n",
    "\n",
    "# Save the model at the end?\n",
    "save_model = False\n",
    "\n",
    "# Batch sizes for training and testing\n",
    "batch_size = 64\n",
    "test_batch_size = 14\n",
    "\n",
    "# Training epochs\n",
    "n_epochs = 10\n",
    "\n",
    "# Learning rate\n",
    "learning_rate = 1.0\n",
    "\n",
    "# Decay rate for adjusting the learning rate\n",
    "gamma = 0.7\n",
    "\n",
    "# How many batches before logging training status\n",
    "log_interval = 10\n",
    "\n",
    "# Number of target classes in the MNIST data\n",
    "num_classes = 10\n",
    "\n",
    "train_kwargs = {'batch_size': batch_size}\n",
    "test_kwargs = {'batch_size': test_batch_size}\n",
    "\n",
    "# CUDA settings\n",
    "if torch.cuda.is_available():\n",
    "    cuda_kwargs = {'num_workers': 1,\n",
    "                   'pin_memory': True,\n",
    "                   'shuffle': True}\n",
    "    train_kwargs.update(cuda_kwargs)\n",
    "    test_kwargs.update(cuda_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QIUfqzekLRSd"
   },
   "outputs": [],
   "source": [
    "# The scaled mean and standard deviation of the MNIST dataset (precalculated)\n",
    "data_mean = 0.1307\n",
    "data_std = 0.3081\n",
    "\n",
    "# Convert input images to tensors and normalize\n",
    "transform=transforms.Compose([\n",
    "    transforms.ToTensor(),\n",
    "    transforms.Normalize((data_mean,), (data_std,))\n",
    "    ])\n",
    "\n",
    "# Get the MNIST data from torchvision\n",
    "dataset1 = datasets.MNIST('../data', train=True, download=True,\n",
    "                    transform=transform)\n",
    "dataset2 = datasets.MNIST('../data', train=False,\n",
    "                    transform=transform)\n",
    "\n",
    "# Define the data loaders that will handle fetching of data\n",
    "train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)\n",
    "test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "firvobxRVIgm"
   },
   "outputs": [],
   "source": [
    "# Define the architecture of the neural network\n",
    "class Net(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding='valid')\n",
    "        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='valid')\n",
    "        self.dropout1 = nn.Dropout(0.25)\n",
    "        self.dropout2 = nn.Dropout(0.5)\n",
    "        self.fc1 = nn.Linear(9216, 128)\n",
    "        self.fc2 = nn.Linear(128, num_classes)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.conv1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.conv2(x)\n",
    "        x = F.relu(x)\n",
    "        x = F.max_pool2d(x, 2)\n",
    "        x = self.dropout1(x)\n",
    "        x = torch.flatten(x, 1)\n",
    "        x = self.fc1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.dropout2(x)\n",
    "        x = self.fc2(x)\n",
    "        output = F.softmax(x, dim=1)\n",
    "        return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "pxG7nB1xVnYA"
   },
   "outputs": [],
   "source": [
    "def train(model, device, train_loader, optimizer, epoch, log_interval):\n",
    "    model.train()\n",
    "    for batch_idx, (data, target) in enumerate(train_loader):\n",
    "        data, target = data.to(device), target.to(device)\n",
    "        optimizer.zero_grad()\n",
    "        output = model(data)\n",
    "        loss = F.cross_entropy(output, target)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        if batch_idx % log_interval == 0:\n",
    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
    "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
    "                100. * batch_idx / len(train_loader), loss.item()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jrTKi_uBVpLH"
   },
   "outputs": [],
   "source": [
    "def test(model, device, test_loader):\n",
    "    model.eval()\n",
    "    test_loss = 0\n",
    "    correct = 0\n",
    "    with torch.no_grad():\n",
    "        for data, target in test_loader:\n",
    "            data, target = data.to(device), target.to(device)\n",
    "            output = model(data)\n",
    "            # sum up batch loss\n",
    "            test_loss += F.nll_loss(output, target, reduction='sum').item()\n",
    "            # get the index of the max log-probability\n",
    "            pred = output.argmax(dim=1, keepdim=True)\n",
    "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
    "\n",
    "    test_loss /= len(test_loader.dataset)\n",
    "\n",
    "    print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
    "        test_loss, correct, len(test_loader.dataset),\n",
    "        100. * correct / len(test_loader.dataset)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "M00667mnZhJ2",
    "outputId": "06217f4b-f947-4091-8be1-130d2afb2301"
   },
   "outputs": [],
   "source": [
    "# Send the model to the device (CPU or GPU)\n",
    "model = Net().to(device)\n",
    "\n",
    "# Define the optimizer to user for gradient descent\n",
    "optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)\n",
    "\n",
    "# Shrinks the learning rate by gamma every step_size\n",
    "scheduler = ExponentialLR(optimizer, gamma=gamma)\n",
    "\n",
    "# Train the model\n",
    "for epoch in range(1, n_epochs + 1):\n",
    "    train(model, device, train_loader, optimizer, epoch, log_interval)\n",
    "    test(model, device, test_loader)\n",
    "    scheduler.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "CsuYPI3hYUiJ"
   },
   "outputs": [],
   "source": [
    "if save_model:\n",
    "    torch.save(model.state_dict(), \"mnist_cnn_pytorch.ckpt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def visualize_and_predict(model, device, data_loader):\n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        # Extract the first batch of images and labels\n",
    "        data, target = next(iter(data_loader))\n",
    "        # Select the first image and label\n",
    "        img, label = data[0], target[0]\n",
    "        \n",
    "        # Visualize the image\n",
    "        plt.imshow(img.squeeze(), cmap='gray')\n",
    "        plt.title(f'Actual Label: {label.item()}')\n",
    "        plt.show()\n",
    "\n",
    "        # Run inference\n",
    "        img = img.to(device)\n",
    "        output = model(img.unsqueeze(0))  # Add batch dimension\n",
    "        pred = output.argmax(dim=1, keepdim=True)\n",
    "\n",
    "        print(f'Predicted Label: {pred.item()}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "visualize_and_predict(model, device, test_loader)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "authorship_tag": "ABX9TyM0OdaiF8Pr7l3VAayhM54C",
   "include_colab_link": true,
   "name": "MNIST - PyTorch.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}