{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lesson 1 Experiments\n",
    "This section just reproduces lesson 1 logic using my own code and with 30 tennis and 30 basketball player images. I chose all male players for simplicity. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Put these at the top of every notebook, to get automatic reloading and inline plotting\n",
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This file contains all the main external libs we'll use\n",
    "from fastai.imports import *\n",
    "from fastai.transforms import *\n",
    "from fastai.conv_learner import *\n",
    "from fastai.model import *\n",
    "from fastai.dataset import *\n",
    "from fastai.sgdr import *\n",
    "from fastai.plots import *\n",
    "from typing import List, Union\n",
    "from pathlib import Path\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download the Sample Data\n",
    "Only execute the cell below once! If the commands below don't work, try the direct link [here](https://1drv.ms/u/s!AkhwiUY5vHPCs03Q26908HIwKFkG)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget 'https://onedrive.live.com/download?cid=C273BC3946897048&resid=C273BC3946897048%216605&authkey=AIVFQLj7IoJYiz4' -O foo.zip\n",
    "!unzip -d data foo.zip \n",
    "!rm foo.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load the Sample Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sz=224\n",
    "path = Path('data/tennisbball')\n",
    "path.absolute(), list(path.glob('*'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample = plt.imread(next(iter((path / 'valid' / 'tennis').iterdir())))\n",
    "plt.imshow(sample)\n",
    "plt.figure()\n",
    "sample = plt.imread(next(iter((path / 'valid' / 'bball').iterdir())))\n",
    "plt.imshow(sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample.shape, sample[:4,:4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.cuda.is_available(),torch.backends.cudnn.enabled"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Construct the Model\n",
    "Define the model architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#tfms_from_model -- model based image transforms (preprocessing stats)\n",
    "arch=resnet50\n",
    "data = ImageClassifierData.from_paths(path, test_name='test', test_with_labels=True, tfms=tfms_from_model(arch, sz))\n",
    "\n",
    "#precompute=True to save conv layer activations! pass False if you want to run the data viz below\n",
    "learner = ConvLearner.pretrained(f=arch, data=data, precompute=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train a Model\n",
    "This section trains a model using transfer learning."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "learner.fit(0.01, 15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#uncomment line below to save the model\n",
    "\n",
    "#learner.save('tennis_v_bball.lrnr')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load/Visualize an Existing Model\n",
    "Or if you've already trained a model, skip the above section and start from here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learner.load('tennis_v_bball.lrnr')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "probs = np.exp(learner.predict())\n",
    "probs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#TODO: improve\n",
    "def display_images(images:List[Union[Path, np.ndarray]], columns:int, titles:List[str]=None, figsize=None) -> None:\n",
    "    if not titles:\n",
    "        titles = [f'Image {i+1}' for i in range(len(images))]\n",
    "    rows = len(images) // columns + int(len(images) % columns > 0)\n",
    "    if figsize is None:\n",
    "        figsize = (60,60)\n",
    "    plt.figure(figsize=figsize)\n",
    "    for i, (image, title) in enumerate(zip(images, titles)):\n",
    "        if isinstance(image, Path):\n",
    "            image = np.array(PIL.Image.open(image))\n",
    "        plt.subplot(rows, columns, i+1)\n",
    "        plt.imshow(image)\n",
    "        plt.title(title, fontsize=10*columns)\n",
    "        plt.axis('off')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#val images\n",
    "predictions = probs.argmax(axis=1)\n",
    "images, titles = [], []\n",
    "for prob, pclass, fname in zip(probs, predictions,  data.val_ds.fnames):\n",
    "    images.append(path / fname)\n",
    "    titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')\n",
    "    \n",
    "display_images(images, 4, titles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_probs = np.exp(learner.predict(is_test=True))\n",
    "test_predictions = test_probs.argmax(axis=1)\n",
    "\n",
    "#test images\n",
    "images, titles = [],[]\n",
    "for prob, pclass, fname in zip(test_probs, test_predictions,  data.test_ds.fnames):\n",
    "    images.append(path / fname)\n",
    "    titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')\n",
    "    \n",
    "display_images(images, 4, titles)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dataviz -- Activations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#check out the model structure\n",
    "model = learner.model\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# utilize torch hooks to capture the activations for any conv layer. for simplicity we use a \n",
    "# batch size of 1.\n",
    "#\n",
    "class ActivationHook:\n",
    "    def __init__(self):\n",
    "        self.output = []\n",
    "        \n",
    "    def __call__(self, module, input, output):\n",
    "        self.output = output.data\n",
    "        \n",
    "def find_layers(module, ltype):    \n",
    "    rv = []\n",
    "    if isinstance(module, ltype):\n",
    "        rv.append(module)\n",
    "    else:\n",
    "        for c in module.children():\n",
    "            rv.extend(find_layers(c, ltype))\n",
    "    \n",
    "    return rv\n",
    "\n",
    "def capture_activations(model, x):\n",
    "    layers = find_layers(model, nn.Conv2d)\n",
    "    hooks = [ActivationHook() for _ in layers]\n",
    "    handles = [conv.register_forward_hook(hook) for conv, hook in zip(layers, hooks)]\n",
    "    model(x)\n",
    "    for h in handles:\n",
    "        h.remove()\n",
    "        \n",
    "    return [h.output for h in hooks]\n",
    "\n",
    "bs = data.bs\n",
    "data.bs = 1\n",
    "dl = data.get_dl(data.test_ds, False) \n",
    "i = iter(dl)\n",
    "ball_x = next(i)[0]\n",
    "noball_x = next(i)[0]\n",
    "data.bs = bs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ball_activations = capture_activations(model, Variable(ball_x))\n",
    "noball_activations = capture_activations(model, Variable(noball_x))\n",
    "for i, layer_output in enumerate(ball_activations):\n",
    "    print(f'Layer {i}: {layer_output.squeeze().shape}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#layer 5, filter 18, 36 seems to like circular type things\n",
    "layer_idx = 0\n",
    "images = []\n",
    "titles = []\n",
    "num_filters = ball_activations[layer_idx].shape[1]\n",
    "asize = ball_activations[layer_idx].shape[2]\n",
    "\n",
    "def filter_activations_to_image(activations, lidx, fidx):\n",
    "    a = activations[lidx].squeeze() #choose conv layer & discard batch dimension\n",
    "    a = a[fidx] #choose conv filter\n",
    "    a = (a - a.mean())/(3*a.std()) + 0.5 #center and scale down\n",
    "    a = a.clamp(0, 1).numpy() # and finally clamp \n",
    "    return a\n",
    "\n",
    "buff_size = 10\n",
    "for filter_idx in range(num_filters):\n",
    "    a0 = filter_activations_to_image(ball_activations, layer_idx, filter_idx)\n",
    "    a1 = filter_activations_to_image(noball_activations, layer_idx, filter_idx)\n",
    "    z = np.hstack([a0, np.ones((asize, 10)), a1])\n",
    "    plt.imshow(z, cmap='gray')\n",
    "    plt.axis('off')\n",
    "    plt.title(f'Filter {filter_idx}')\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DataViz -- Filters\n",
    "\n",
    "We can also look at filters. This is easiest at the first layer where each filter is 3 dimensional."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.colors as mc\n",
    "import math\n",
    "conv = find_layers(learner.model, nn.Conv2d)[0]\n",
    "weight = conv.weight.data.numpy()\n",
    "\n",
    "num_filters, depth, w, h = weight.shape\n",
    "\n",
    "rows = int(num_filters**0.5)\n",
    "cols = int(math.ceil(num_filters/rows))\n",
    "border = 1\n",
    "img = np.zeros((depth, rows*h + (1+rows)*border, cols*w + (1+cols)*border))\n",
    "for f in range(num_filters):\n",
    "    r = f // rows\n",
    "    c = f % cols\n",
    "    x = border + r * (w+border)\n",
    "    y = border + c * (w+border)\n",
    "    norm = mc.Normalize()\n",
    "    img[:, x:x+w, y:y+h] = norm(weight[f, :, :, :])\n",
    "\n",
    "plt.figure(figsize=(12,12))\n",
    "plt.imshow(img.transpose(1,2,0))\n",
    "_ = plt.axis('off')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also visualize subsequent layers, though it's not so pretty. We can map each dimension of each filter back into grayscale."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for i, conv in enumerate(find_layers(learner.model, nn.Conv2d)):\n",
    "#      print(conv, conv.weight.shape)\n",
    "weight = find_layers(learner.model, nn.Conv2d)[2].weight.data.numpy()\n",
    "num_filters, depth, w, h = weight.shape\n",
    "rows = num_filters\n",
    "cols = depth\n",
    "border = 1\n",
    "img = np.zeros((rows*h + (1+rows)*border, cols*w + (1+cols)*border))\n",
    "for f in range(num_filters):\n",
    "    norm = mc.Normalize()\n",
    "    normed = norm(weight[f, :, :, :]) #normalize over all the weights in a filter\n",
    "    for d in range(depth):\n",
    "        r = f\n",
    "        c = d\n",
    "        x = border + r * (w+border)\n",
    "        y = border + c * (w+border)\n",
    "        img[x:x+w, y:y+h] = normed[d]\n",
    "\n",
    "plt.figure(figsize=(18,18))\n",
    "plt.imshow(img, cmap='gray')\n",
    "_ = plt.axis('off')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Occlusion\n",
    "We can also mask out portions of the image by sliding a gray block over the image repeatedly and record how the predictions change."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "block_size = 50\n",
    "image_path = path / data.test_ds.fnames[0]\n",
    "image = open_image(image_path)\n",
    "image[50:250, 50:250] = np.full((200,200,3), 0.75)\n",
    "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n",
    "# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n",
    "plt.imshow(image)\n",
    "_ = plt.axis('off')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "block_size = 50\n",
    "image_path = path / data.test_ds.fnames[0]\n",
    "orig_image = open_image(image_path)\n",
    "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n",
    "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n",
    "# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n",
    "# plt.imshow(image)\n",
    "plt.axis('off')\n",
    "\n",
    "#the prediction for the smaller image should be essentially unchanged\n",
    "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n",
    "w,h,_ = scaled_image.shape\n",
    "learner.model.eval()\n",
    "t0 = time.time()\n",
    "prob_map = np.zeros((2, w, h))\n",
    "\n",
    "z = 0\n",
    "\n",
    "#TODO: add stride for efficiency.\n",
    "for x in tqdm(range(1 - block_size, w)):\n",
    "    for y in range(1 - block_size, h):\n",
    "        image = np.array(scaled_image)\n",
    "        x0, x1 = max(0, x), min(w, x + block_size)\n",
    "        y0, y1 = max(0, y), min(h, y + block_size)\n",
    "        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)\n",
    "        image = tfms_from_model(arch, sz)[1](image)\n",
    "        predictions = learner.model(VV(image).unsqueeze(0))        \n",
    "        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]\n",
    "        prob_map[1,x0:x1,y0:y1] += 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('probs-heatmap.npy', prob_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "heatmap = prob_map[0]/prob_map[1]\n",
    "plt.subplot(1,2,1)\n",
    "plt.imshow(1 - heatmap, cmap='jet')\n",
    "plt.axis('off')\n",
    "plt.subplot(1,2,2)\n",
    "plt.imshow(orig_image)\n",
    "_ = plt.axis('off')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "block_size = 50\n",
    "image_path = path / 'valid/bball/29.jpg'\n",
    "orig_image = open_image(image_path)\n",
    "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n",
    "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n",
    "# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n",
    "# plt.imshow(orig_image)\n",
    "# plt.axis('off')\n",
    "\n",
    "#the prediction for the smaller image should be essentially unchanged\n",
    "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n",
    "w,h,_ = scaled_image.shape\n",
    "learner.model.eval()\n",
    "t0 = time.time()\n",
    "prob_map = np.zeros((2, w, h))\n",
    "\n",
    "z = 0\n",
    "\n",
    "#TODO: add stride for efficiency.\n",
    "for x in tqdm(range(1 - block_size, w)):\n",
    "    for y in range(1 - block_size, h):b\n",
    "        image = np.array(scaled_image)\n",
    "        x0, x1 = max(0, x), min(w, x + block_size)\n",
    "        y0, y1 = max(0, y), min(h, y + block_size)\n",
    "        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)\n",
    "        image = tfms_from_model(arch, sz)[1](image)\n",
    "        predictions = learner.model(VV(image).unsqueeze(0))        \n",
    "        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]\n",
    "        prob_map[1,x0:x1,y0:y1] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('probs-giannis-heatmap.npy', prob_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "heatmap = prob_map[0]/prob_map[1]\n",
    "plt.subplot(1,2,1)\n",
    "plt.imshow(1 - heatmap, cmap='jet')\n",
    "plt.axis('off')\n",
    "plt.subplot(1,2,2)\n",
    "plt.imshow(orig_image)\n",
    "_ = plt.axis('off')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "block_size = 50\n",
    "image_path = path / 'valid/tennis/23.jpg'\n",
    "orig_image = open_image(image_path)\n",
    "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n",
    "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n",
    "# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n",
    "plt.imshow(scaled_image)\n",
    "# plt.axis('off')\n",
    "\n",
    "#the prediction for the smaller image should be essentially unchanged\n",
    "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n",
    "w,h,_ = scaled_image.shape\n",
    "learner.model.eval()\n",
    "t0 = time.time()\n",
    "prob_map = np.zeros((2, w, h))\n",
    "\n",
    "z = 0\n",
    "\n",
    "#TODO: add stride for efficiency.\n",
    "for x in tqdm(range(1 - block_size, w)):\n",
    "    for y in range(1 - block_size, h):\n",
    "        image = np.array(scaled_image)\n",
    "        x0, x1 = max(0, x), min(w, x + block_size)\n",
    "        y0, y1 = max(0, y), min(h, y + block_size)\n",
    "        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)\n",
    "        image = tfms_from_model(arch, sz)[1](image)\n",
    "        predictions = learner.model(VV(image).unsqueeze(0))        \n",
    "        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]\n",
    "        prob_map[1,x0:x1,y0:y1] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('probs-tennis-heatmap.npy', prob_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "heatmap = prob_map[0]/prob_map[1]\n",
    "plt.subplot(1,2,1)\n",
    "plt.imshow(heatmap, cmap='jet')\n",
    "plt.axis('off')\n",
    "plt.subplot(1,2,2)\n",
    "plt.imshow(orig_image)\n",
    "_ = plt.axis('off')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}