{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Lesson 1 Experiments\n", "This section just reproduces lesson 1 logic using my own code and with 30 tennis and 30 basketball player images. I chose all male players for simplicity. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Put these at the top of every notebook, to get automatic reloading and inline plotting\n", "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This file contains all the main external libs we'll use\n", "from fastai.imports import *\n", "from fastai.transforms import *\n", "from fastai.conv_learner import *\n", "from fastai.model import *\n", "from fastai.dataset import *\n", "from fastai.sgdr import *\n", "from fastai.plots import *\n", "from typing import List, Union\n", "from pathlib import Path\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download the Sample Data\n", "Only execute the cell below once! If the commands below don't work, try the direct link [here](https://1drv.ms/u/s!AkhwiUY5vHPCs03Q26908HIwKFkG)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget 'https://onedrive.live.com/download?cid=C273BC3946897048&resid=C273BC3946897048%216605&authkey=AIVFQLj7IoJYiz4' -O foo.zip\n", "!unzip -d data foo.zip \n", "!rm foo.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the Sample Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sz=224\n", "path = Path('data/tennisbball')\n", "path.absolute(), list(path.glob('*'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sample = plt.imread(next(iter((path / 'valid' / 'tennis').iterdir())))\n", "plt.imshow(sample)\n", "plt.figure()\n", "sample = plt.imread(next(iter((path / 'valid' / 'bball').iterdir())))\n", "plt.imshow(sample)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sample.shape, sample[:4,:4]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "torch.cuda.is_available(),torch.backends.cudnn.enabled" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Construct the Model\n", "Define the model architecture" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#tfms_from_model -- model based image transforms (preprocessing stats)\n", "arch=resnet50\n", "data = ImageClassifierData.from_paths(path, test_name='test', test_with_labels=True, tfms=tfms_from_model(arch, sz))\n", "\n", "#precompute=True to save conv layer activations! pass False if you want to run the data viz below\n", "learner = ConvLearner.pretrained(f=arch, data=data, precompute=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train a Model\n", "This section trains a model using transfer learning." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "learner.fit(0.01, 15)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#uncomment line below to save the model\n", "\n", "#learner.save('tennis_v_bball.lrnr')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load/Visualize an Existing Model\n", "Or if you've already trained a model, skip the above section and start from here." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.load('tennis_v_bball.lrnr')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "probs = np.exp(learner.predict())\n", "probs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#TODO: improve\n", "def display_images(images:List[Union[Path, np.ndarray]], columns:int, titles:List[str]=None, figsize=None) -> None:\n", " if not titles:\n", " titles = [f'Image {i+1}' for i in range(len(images))]\n", " rows = len(images) // columns + int(len(images) % columns > 0)\n", " if figsize is None:\n", " figsize = (60,60)\n", " plt.figure(figsize=figsize)\n", " for i, (image, title) in enumerate(zip(images, titles)):\n", " if isinstance(image, Path):\n", " image = np.array(PIL.Image.open(image))\n", " plt.subplot(rows, columns, i+1)\n", " plt.imshow(image)\n", " plt.title(title, fontsize=10*columns)\n", " plt.axis('off')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#val images\n", "predictions = probs.argmax(axis=1)\n", "images, titles = [], []\n", "for prob, pclass, fname in zip(probs, predictions, data.val_ds.fnames):\n", " images.append(path / fname)\n", " titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')\n", " \n", "display_images(images, 4, titles)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_probs = np.exp(learner.predict(is_test=True))\n", "test_predictions = test_probs.argmax(axis=1)\n", "\n", "#test images\n", "images, titles = [],[]\n", "for prob, pclass, fname in zip(test_probs, test_predictions, data.test_ds.fnames):\n", " images.append(path / fname)\n", " titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')\n", " \n", "display_images(images, 4, titles)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataviz -- Activations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#check out the model structure\n", "model = learner.model\n", "model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# utilize torch hooks to capture the activations for any conv layer. for simplicity we use a \n", "# batch size of 1.\n", "#\n", "class ActivationHook:\n", " def __init__(self):\n", " self.output = []\n", " \n", " def __call__(self, module, input, output):\n", " self.output = output.data\n", " \n", "def find_layers(module, ltype): \n", " rv = []\n", " if isinstance(module, ltype):\n", " rv.append(module)\n", " else:\n", " for c in module.children():\n", " rv.extend(find_layers(c, ltype))\n", " \n", " return rv\n", "\n", "def capture_activations(model, x):\n", " layers = find_layers(model, nn.Conv2d)\n", " hooks = [ActivationHook() for _ in layers]\n", " handles = [conv.register_forward_hook(hook) for conv, hook in zip(layers, hooks)]\n", " model(x)\n", " for h in handles:\n", " h.remove()\n", " \n", " return [h.output for h in hooks]\n", "\n", "bs = data.bs\n", "data.bs = 1\n", "dl = data.get_dl(data.test_ds, False) \n", "i = iter(dl)\n", "ball_x = next(i)[0]\n", "noball_x = next(i)[0]\n", "data.bs = bs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ball_activations = capture_activations(model, Variable(ball_x))\n", "noball_activations = capture_activations(model, Variable(noball_x))\n", "for i, layer_output in enumerate(ball_activations):\n", " print(f'Layer {i}: {layer_output.squeeze().shape}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "#layer 5, filter 18, 36 seems to like circular type things\n", "layer_idx = 0\n", "images = []\n", "titles = []\n", "num_filters = ball_activations[layer_idx].shape[1]\n", "asize = ball_activations[layer_idx].shape[2]\n", "\n", "def filter_activations_to_image(activations, lidx, fidx):\n", " a = activations[lidx].squeeze() #choose conv layer & discard batch dimension\n", " a = a[fidx] #choose conv filter\n", " a = (a - a.mean())/(3*a.std()) + 0.5 #center and scale down\n", " a = a.clamp(0, 1).numpy() # and finally clamp \n", " return a\n", "\n", "buff_size = 10\n", "for filter_idx in range(num_filters):\n", " a0 = filter_activations_to_image(ball_activations, layer_idx, filter_idx)\n", " a1 = filter_activations_to_image(noball_activations, layer_idx, filter_idx)\n", " z = np.hstack([a0, np.ones((asize, 10)), a1])\n", " plt.imshow(z, cmap='gray')\n", " plt.axis('off')\n", " plt.title(f'Filter {filter_idx}')\n", " plt.show()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataViz -- Filters\n", "\n", "We can also look at filters. This is easiest at the first layer where each filter is 3 dimensional." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.colors as mc\n", "import math\n", "conv = find_layers(learner.model, nn.Conv2d)[0]\n", "weight = conv.weight.data.numpy()\n", "\n", "num_filters, depth, w, h = weight.shape\n", "\n", "rows = int(num_filters**0.5)\n", "cols = int(math.ceil(num_filters/rows))\n", "border = 1\n", "img = np.zeros((depth, rows*h + (1+rows)*border, cols*w + (1+cols)*border))\n", "for f in range(num_filters):\n", " r = f // rows\n", " c = f % cols\n", " x = border + r * (w+border)\n", " y = border + c * (w+border)\n", " norm = mc.Normalize()\n", " img[:, x:x+w, y:y+h] = norm(weight[f, :, :, :])\n", "\n", "plt.figure(figsize=(12,12))\n", "plt.imshow(img.transpose(1,2,0))\n", "_ = plt.axis('off')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can also visualize subsequent layers, though it's not so pretty. We can map each dimension of each filter back into grayscale." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# for i, conv in enumerate(find_layers(learner.model, nn.Conv2d)):\n", "# print(conv, conv.weight.shape)\n", "weight = find_layers(learner.model, nn.Conv2d)[2].weight.data.numpy()\n", "num_filters, depth, w, h = weight.shape\n", "rows = num_filters\n", "cols = depth\n", "border = 1\n", "img = np.zeros((rows*h + (1+rows)*border, cols*w + (1+cols)*border))\n", "for f in range(num_filters):\n", " norm = mc.Normalize()\n", " normed = norm(weight[f, :, :, :]) #normalize over all the weights in a filter\n", " for d in range(depth):\n", " r = f\n", " c = d\n", " x = border + r * (w+border)\n", " y = border + c * (w+border)\n", " img[x:x+w, y:y+h] = normed[d]\n", "\n", "plt.figure(figsize=(18,18))\n", "plt.imshow(img, cmap='gray')\n", "_ = plt.axis('off')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Occlusion\n", "We can also mask out portions of the image by sliding a gray block over the image repeatedly and record how the predictions change." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "block_size = 50\n", "image_path = path / data.test_ds.fnames[0]\n", "image = open_image(image_path)\n", "image[50:250, 50:250] = np.full((200,200,3), 0.75)\n", "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n", "# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n", "plt.imshow(image)\n", "_ = plt.axis('off')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "block_size = 50\n", "image_path = path / data.test_ds.fnames[0]\n", "orig_image = open_image(image_path)\n", "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n", "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n", "# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n", "# plt.imshow(image)\n", "plt.axis('off')\n", "\n", "#the prediction for the smaller image should be essentially unchanged\n", "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n", "w,h,_ = scaled_image.shape\n", "learner.model.eval()\n", "t0 = time.time()\n", "prob_map = np.zeros((2, w, h))\n", "\n", "z = 0\n", "\n", "#TODO: add stride for efficiency.\n", "for x in tqdm(range(1 - block_size, w)):\n", " for y in range(1 - block_size, h):\n", " image = np.array(scaled_image)\n", " x0, x1 = max(0, x), min(w, x + block_size)\n", " y0, y1 = max(0, y), min(h, y + block_size)\n", " image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)\n", " image = tfms_from_model(arch, sz)[1](image)\n", " predictions = learner.model(VV(image).unsqueeze(0)) \n", " prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]\n", " prob_map[1,x0:x1,y0:y1] += 1\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.save('probs-heatmap.npy', prob_map)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "heatmap = prob_map[0]/prob_map[1]\n", "plt.subplot(1,2,1)\n", "plt.imshow(1 - heatmap, cmap='jet')\n", "plt.axis('off')\n", "plt.subplot(1,2,2)\n", "plt.imshow(orig_image)\n", "_ = plt.axis('off')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "block_size = 50\n", "image_path = path / 'valid/bball/29.jpg'\n", "orig_image = open_image(image_path)\n", "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n", "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n", "# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n", "# plt.imshow(orig_image)\n", "# plt.axis('off')\n", "\n", "#the prediction for the smaller image should be essentially unchanged\n", "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n", "w,h,_ = scaled_image.shape\n", "learner.model.eval()\n", "t0 = time.time()\n", "prob_map = np.zeros((2, w, h))\n", "\n", "z = 0\n", "\n", "#TODO: add stride for efficiency.\n", "for x in tqdm(range(1 - block_size, w)):\n", " for y in range(1 - block_size, h):b\n", " image = np.array(scaled_image)\n", " x0, x1 = max(0, x), min(w, x + block_size)\n", " y0, y1 = max(0, y), min(h, y + block_size)\n", " image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)\n", " image = tfms_from_model(arch, sz)[1](image)\n", " predictions = learner.model(VV(image).unsqueeze(0)) \n", " prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]\n", " prob_map[1,x0:x1,y0:y1] += 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.save('probs-giannis-heatmap.npy', prob_map)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "heatmap = prob_map[0]/prob_map[1]\n", "plt.subplot(1,2,1)\n", "plt.imshow(1 - heatmap, cmap='jet')\n", "plt.axis('off')\n", "plt.subplot(1,2,2)\n", "plt.imshow(orig_image)\n", "_ = plt.axis('off')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "block_size = 50\n", "image_path = path / 'valid/tennis/23.jpg'\n", "orig_image = open_image(image_path)\n", "# image[0:200, 0:200] = np.full((200,200,3), 0.75)\n", "scaled_image = Scale(sz=224).do_transform(orig_image, False)\n", "# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)\n", "plt.imshow(scaled_image)\n", "# plt.axis('off')\n", "\n", "#the prediction for the smaller image should be essentially unchanged\n", "print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())\n", "w,h,_ = scaled_image.shape\n", "learner.model.eval()\n", "t0 = time.time()\n", "prob_map = np.zeros((2, w, h))\n", "\n", "z = 0\n", "\n", "#TODO: add stride for efficiency.\n", "for x in tqdm(range(1 - 