{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n", "- Author: Sebastian Raschka\n", "- GitHub Repository: https://github.com/rasbt/deeplearning-models" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sebastian Raschka \n", "\n", "CPython 3.6.8\n", "IPython 7.2.0\n", "\n", "torch 1.0.0\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -a 'Sebastian Raschka' -v -p torch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Runs on CPU or GPU (if available)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model Zoo -- Autoencoder" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A simple, single-layer autoencoder that compresses 768-pixel MNIST images into 32-pixel vectors (32-times smaller representations)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Device: cuda:0\n", "Image batch dimensions: torch.Size([256, 1, 28, 28])\n", "Image label dimensions: torch.Size([256])\n" ] } ], "source": [ "import time\n", "import numpy as np\n", "import torch\n", "import torch.nn.functional as F\n", "from torchvision import datasets\n", "from torchvision import transforms\n", "from torch.utils.data import DataLoader\n", "\n", "if torch.cuda.is_available():\n", " torch.backends.cudnn.deterministic = True\n", "\n", "\n", "##########################\n", "### SETTINGS\n", "##########################\n", "\n", "# Device\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "print('Device:', device)\n", "\n", "# Hyperparameters\n", "random_seed = 123\n", "learning_rate = 0.005\n", "num_epochs = 5\n", "batch_size = 256\n", "\n", "# Architecture\n", "num_features = 784\n", "num_hidden_1 = 32\n", "\n", "\n", "##########################\n", "### MNIST DATASET\n", "##########################\n", "\n", "# Note transforms.ToTensor() scales input images\n", "# to 0-1 range\n", "train_dataset = datasets.MNIST(root='data', \n", " train=True, \n", " transform=transforms.ToTensor(),\n", " download=True)\n", "\n", "test_dataset = datasets.MNIST(root='data', \n", " train=False, \n", " transform=transforms.ToTensor())\n", "\n", "\n", "train_loader = DataLoader(dataset=train_dataset, \n", " batch_size=batch_size, \n", " shuffle=True)\n", "\n", "test_loader = DataLoader(dataset=test_dataset, \n", " batch_size=batch_size, \n", " shuffle=False)\n", "\n", "# Checking the dataset\n", "for images, labels in train_loader: \n", " print('Image batch dimensions:', images.shape)\n", " print('Image label dimensions:', labels.shape)\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "##########################\n", "### MODEL\n", "##########################\n", "\n", "class Autoencoder(torch.nn.Module):\n", "\n", " def __init__(self, num_features):\n", " super(Autoencoder, self).__init__()\n", " \n", " ### ENCODER\n", " self.linear_1 = torch.nn.Linear(num_features, num_hidden_1)\n", " # The following to lones are not necessary, \n", " # but used here to demonstrate how to access the weights\n", " # and use a different weight initialization.\n", " # By default, PyTorch uses Xavier/Glorot initialization, which\n", " # should usually be preferred.\n", " self.linear_1.weight.detach().normal_(0.0, 0.1)\n", " self.linear_1.bias.detach().zero_()\n", " \n", " ### DECODER\n", " self.linear_2 = torch.nn.Linear(num_hidden_1, num_features)\n", " self.linear_1.weight.detach().normal_(0.0, 0.1)\n", " self.linear_1.bias.detach().zero_()\n", " \n", "\n", " def forward(self, x):\n", " \n", " ### ENCODER\n", " encoded = self.linear_1(x)\n", " encoded = F.leaky_relu(encoded)\n", " \n", " ### DECODER\n", " logits = self.linear_2(encoded)\n", " decoded = torch.sigmoid(logits)\n", " \n", " return decoded\n", "\n", " \n", "torch.manual_seed(random_seed)\n", "model = Autoencoder(num_features=num_features)\n", "model = model.to(device)\n", "\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "## Training" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 001/005 | Batch 000/235 | Cost: 0.7100\n", "Epoch: 001/005 | Batch 050/235 | Cost: 0.2028\n", "Epoch: 001/005 | Batch 100/235 | Cost: 0.1636\n", "Epoch: 001/005 | Batch 150/235 | Cost: 0.1349\n", "Epoch: 001/005 | Batch 200/235 | Cost: 0.1302\n", "Time elapsed: 0.10 min\n", "Epoch: 002/005 | Batch 000/235 | Cost: 0.1239\n", "Epoch: 002/005 | Batch 050/235 | Cost: 0.1130\n", "Epoch: 002/005 | Batch 100/235 | Cost: 0.1097\n", "Epoch: 002/005 | Batch 150/235 | Cost: 0.1061\n", "Epoch: 002/005 | Batch 200/235 | Cost: 0.1035\n", "Time elapsed: 0.21 min\n", "Epoch: 003/005 | Batch 000/235 | Cost: 0.1010\n", "Epoch: 003/005 | Batch 050/235 | Cost: 0.0975\n", "Epoch: 003/005 | Batch 100/235 | Cost: 0.0983\n", "Epoch: 003/005 | Batch 150/235 | Cost: 0.0975\n", "Epoch: 003/005 | Batch 200/235 | Cost: 0.0937\n", "Time elapsed: 0.31 min\n", "Epoch: 004/005 | Batch 000/235 | Cost: 0.0946\n", "Epoch: 004/005 | Batch 050/235 | Cost: 0.0961\n", "Epoch: 004/005 | Batch 100/235 | Cost: 0.0960\n", "Epoch: 004/005 | Batch 150/235 | Cost: 0.0971\n", "Epoch: 004/005 | Batch 200/235 | Cost: 0.0899\n", "Time elapsed: 0.42 min\n", "Epoch: 005/005 | Batch 000/235 | Cost: 0.0948\n", "Epoch: 005/005 | Batch 050/235 | Cost: 0.0927\n", "Epoch: 005/005 | Batch 100/235 | Cost: 0.0932\n", "Epoch: 005/005 | Batch 150/235 | Cost: 0.0938\n", "Epoch: 005/005 | Batch 200/235 | Cost: 0.0935\n", "Time elapsed: 0.52 min\n", "Total Training Time: 0.52 min\n" ] } ], "source": [ "start_time = time.time()\n", "for epoch in range(num_epochs):\n", " for batch_idx, (features, targets) in enumerate(train_loader):\n", " \n", " # don't need labels, only the images (features)\n", " features = features.view(-1, 28*28).to(device)\n", " \n", " ### FORWARD AND BACK PROP\n", " decoded = model(features)\n", " cost = F.binary_cross_entropy(decoded, features)\n", " optimizer.zero_grad()\n", " \n", " cost.backward()\n", " \n", " ### UPDATE MODEL PARAMETERS\n", " optimizer.step()\n", " \n", " ### LOGGING\n", " if not batch_idx % 50:\n", " print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' \n", " %(epoch+1, num_epochs, batch_idx, \n", " len(train_loader), cost))\n", " \n", " print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))\n", " \n", "print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluation" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "##########################\n", "### VISUALIZATION\n", "##########################\n", "\n", "n_images = 15\n", "image_width = 28\n", "\n", "fig, axes = plt.subplots(nrows=2, ncols=n_images, \n", " sharex=True, sharey=True, figsize=(20, 2.5))\n", "orig_images = features[:n_images]\n", "decoded_images = decoded[:n_images]\n", "\n", "for i in range(n_images):\n", " for ax, img in zip(axes, [orig_images, decoded_images]):\n", " curr_img = img[i].detach().to(torch.device('cpu'))\n", " ax[i].imshow(curr_img.view((image_width, image_width)), cmap='binary')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "numpy 1.15.4\n", "torch 1.0.0\n", "\n" ] } ], "source": [ "%watermark -iv" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "toc": { "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }