{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "512b9d458893e474da69aa7e23a01e24", "grade": false, "grade_id": "cell-9e42398fb4955fba", "locked": true, "schema_version": 1, "solution": false } }, "outputs": [], "source": [ "# Execute this code block to install dependencies when running on colab\n", "try:\n", " import torch\n", "except:\n", " from os.path import exists\n", " from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag\n", " platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())\n", " cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\\.\\([0-9]*\\)\\.\\([0-9]*\\)$/cu\\1\\2/'\n", " accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'\n", "\n", " !pip install -q http://download.pytorch.org/whl/{accelerator}/torch-1.0.0-{platform}-linux_x86_64.whl torchvision" ] }, { "cell_type": "markdown", "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "a83d3d20445bce6b3565c220a1d1b5b5", "grade": false, "grade_id": "cell-9daf9d2f6ba17cf3", "locked": true, "schema_version": 1, "solution": false } }, "source": [ "# Part 3: Variational Autoencoders (VAE)\n", "\n", "For this part of the lab, you will implement a VAE. There is some code below which will help you run the learning procedure, however, you will need to complete the definition of the loss function. Start by implementing the encoder and decoder for the VAE. There is a diagram below illustrating what these architectures should look like.\n", "\n", "Let's start by loading the Fashion-MNIST dataset again and transforming the data to a flattened tensor." ] }, { "cell_type": "markdown", "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "0f9c6bc81b6a042933f8c866f2536bf4", "grade": false, "grade_id": "cell-2122f281579eb211", "locked": true, "schema_version": 1, "solution": false } }, "source": [ "### Loading the Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "0773f20f2005e856ca3a56cfcd912f7f", "grade": false, "grade_id": "cell-fb05179f34afa2fb", "locked": true, "schema_version": 1, "solution": false } }, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import torchvision\n", "import torchvision.transforms as transforms\n", "\n", "batch_size = 256\n", "image_dim = 784 #flattened\n", "\n", "# dataset construction\n", "transform = transforms.Compose([\n", " transforms.ToTensor(), # convert to tensor\n", " transforms.Lambda(lambda x: x.view(image_dim)) # flatten into vector\n", " ])\n", "\n", "train_set = torchvision.datasets.FashionMNIST(\n", " root='./data/FashionMNIST'\n", " ,train=True\n", " ,download=True\n", " ,transform=transform\n", ")\n", "\n", "train_loader = torch.utils.data.DataLoader(\n", " train_set, batch_size=batch_size\n", ")" ] }, { "cell_type": "markdown", "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "55716d8caf68b36778b5ab313e00b514", "grade": false, "grade_id": "cell-c4a66cd9fec76585", "locked": true, "schema_version": 1, "solution": false } }, "source": [ "## Build a Simple Variational Autoencoder\n", "\n", "" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "deletable": false, "nbgrader": { "checksum": "36de01cb9c9a800ede758c8c39856959", "grade": true, "grade_id": "cell-cce2169671951c0d", "locked": false, "points": 6, "schema_version": 1, "solution": true } }, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "\n", "from tqdm.autonotebook import tqdm\n", "from itertools import chain\n", "\n", "import numpy as np\n", "\n", "class Encoder(nn.Module):\n", " '''\n", " simple encoder with a single hidden dense layer (ReLU activation)\n", " and linear projections to the diag-Gauss parameters\n", " '''\n", " # YOUR CODE HERE\n", " raise NotImplementedError()\n", "\n", "class Decoder(nn.Module):\n", " '''\n", " simple decoder: single dense hidden layer (ReLU activation) followed by \n", " output layer with a sigmoid to squish values\n", " '''\n", " # YOUR CODE HERE\n", " raise NotImplementedError()" ] }, { "cell_type": "markdown", "metadata": { "deletable": false, "editable": false, "nbgrader": { "checksum": "215aceedbc5f7dbc7aeb23f959a99b88", "grade": false, "grade_id": "cell-b4513299e5279b61", "locked": true, "schema_version": 1, "solution": false } }, "source": [ "## You may complete the code below to test your implementation, or alternately, rewrite your own.\n", "\n", "#### Once you've trained the network plot some reconstructions side-by-side with the original images and reflect on how good the reconstructions are (or aren't!). Also try generating some random images by sampling the prior and feeding the vectors to the decoder." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "deletable": false, "nbgrader": { "checksum": "62b7f846bcfd0fbb26177b35925d7af0", "grade": true, "grade_id": "cell-85a3a1c4ee56b86b", "locked": false, "points": 0, "schema_version": 1, "solution": true } }, "outputs": [], "source": [ "import matplotlib.gridspec as gridspec \n", "import os\n", "\n", "# Sampling function (using the reparameterisation trick)\n", "def sample(mu, log_sigma2):\n", " eps = torch.randn(mu.shape[0], mu.shape[1])\n", " return mu + torch.exp(log_sigma2 / 2) * eps\n", "\n", "\n", "#parameters\n", "batch_size = 256\n", "embedding_dim = 2\n", "enc_hidden_units = 512\n", "dec_hidden_units = 512\n", "nEpoch = 10\n", "\n", "# construct the encoder, decoder and optimiser\n", "enc = Encoder(image_dim, enc_hidden_units, embedding_dim)\n", "dec = Decoder(embedding_dim, dec_hidden_units, image_dim)\n", "optimizer = optim.Adam(chain(enc.parameters(), dec.parameters()), lr=1e-3)\n", "\n", "# training loop\n", "for epoch in range(nEpoch):\n", " losses = []\n", " trainloader = tqdm(train_loader)\n", "\n", " for i, data in enumerate(trainloader, 0):\n", " inputs, _ = data\n", "\n", " optimizer.zero_grad()\n", "\n", " mu, log_sigma2 = enc(inputs)\n", " z = sample(mu, log_sigma2)\n", " outputs = dec(z)\n", "\n", " # E[log P(X|z)] - as images are binary it makes most sense to use binary cross entropy\n", " # we need to be a little careful - by default torch averages over every observation \n", " # (e.g. each pixel in each image of each batch), whereas we want the average over entire\n", " # images instead\n", " recon = F.binary_cross_entropy(outputs, inputs, reduction='sum') / inputs.shape[0]\n", " \n", " kl = 0 \n", " # kl = D_KL(Q(z|X) || P(z|X)) - calculate in closed form\n", " # Compute the term kl which is then added to the total loss\n", " # YOUR CODE HERE\n", " raise NotImplementedError()\n", " \n", " loss = recon + kl\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # keep track of the loss and update the stats\n", " losses.append(loss.item())\n", " trainloader.set_postfix(loss=np.mean(losses), epoch=epoch)\n", "\n", " \n", " ## Please display some of the generated images in the submitted Notebook\n", " # YOUR CODE HERE\n", " raise NotImplementedError()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }