{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from nb_002 import *\n", "\n", "import typing, os\n", "from typing import Dict, Any, AnyStr, List, Sequence, TypeVar, Tuple, Optional, Union" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "DATA_PATH = Path('data')\n", "# PATH = DATA_PATH/'cifar10_dog_air'\n", "PATH = DATA_PATH/'cifar10'\n", "\n", "train_ds = ImageDataset.from_folder(PATH/'train')\n", "valid_ds = ImageDataset.from_folder(PATH/'test', train_ds.classes)\n", "\n", "x = train_ds[1][0]\n", "bs=256\n", "c = len(train_ds.classes)\n", "len(train_ds)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# CIFAR augmentation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [flip_lr(p=0.5),\n", " pad(padding=4),\n", " crop(size=32, row_pct=(0,1.), col_pct=(0,1.))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_tds = DatasetTfm(train_ds, tfms)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axes = plt.subplots(1,4, figsize=(12,9))\n", "for ax in axes.flat: train_tds[1][0].show(ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Normalization and training" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To train our network the first step is to normalize our pixels. This makes our cost function faster and easier to optimize [(see Yann le Cun's paper, section 4.3)](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)\n", "\n", "Normalization is a pixel transform since it directly modifies the pixels of our input image." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def normalize(x:TensorImage, mean:float,std:float)->TensorImage: return (x-mean[...,None,None]) / std[...,None,None]\n", "def denormalize(x:TensorImage, mean:float,std:float)->TensorImage: return x*std[...,None,None] + mean[...,None,None]\n", "\n", "def normalize_batch(b:Tuple[Tensor,Tensor], mean:float, std:float, do_y:bool=False)->Tuple[Tensor,Tensor]:\n", " \"`b` = `x`,`y` - normalize `x` array of imgs and `do_y` optionally `y`\"\n", " x,y = b\n", " x = normalize(x,mean,std)\n", " if do_y: y = normalize(y,mean,std)\n", " return x,y\n", "\n", "def normalize_funcs(mean:float, std, do_y=False, device=None)->[Callable,Callable]:\n", " \"Create normalize/denormalize func using `mean` and `std`, can specify `do_y` and `device`\"\n", " if device is None: device=default_device\n", " return (partial(normalize_batch, mean=mean.to(device),std=std.to(device)),\n", " partial(denormalize, mean=mean, std=std))\n", "\n", "def transform_datasets(train_ds:Dataset, valid_ds:Dataset, test_ds:Optional[Dataset]=None, \n", " tfms:Optional[Tuple[TfmList,TfmList]]=None, **kwargs:Any):\n", " \"Create train, valid and maybe test DatasetTfm` using `tfms` = (train_tfms,valid_tfms)\"\n", " res = [DatasetTfm(train_ds, tfms[0], **kwargs),\n", " DatasetTfm(valid_ds, tfms[1], **kwargs)]\n", " if test_ds is not None: res.append(DatasetTfm(test_ds, tfms[1], **kwargs))\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "# CIFAR 10 stats looked up on google \n", "cifar_stats = (tensor([0.491, 0.482, 0.447]), tensor([0.247, 0.243, 0.261]))\n", "cifar_norm,cifar_denorm = normalize_funcs(*cifar_stats)\n", "\n", "def num_cpus()->int:\n", " \"Get number of cpus\"\n", " try: return len(os.sched_getaffinity(0))\n", " except AttributeError: return os.cpu_count()\n", " \n", "default_cpus = min(16, num_cpus())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "@dataclass\n", "class DeviceDataLoader():\n", " \"Binds a `DataLoader` to a `torch.device`\"\n", " dl: DataLoader\n", " device: torch.device\n", " tfms: List[Callable]=None\n", " collate_fn: Callable=data_collate\n", " def __post_init__(self):\n", " self.dl.collate_fn=self.collate_fn\n", " self.tfms = listify(self.tfms)\n", "\n", " def __len__(self)->int: return len(self.dl)\n", " def __getattr__(self,k:str)->Any: return getattr(self.dl, k)\n", " \n", " def add_tfm(self,tfm:Callable)->None: self.tfms.append(tfm)\n", " def remove_tfm(self,tfm:Callable)->None: self.tfms.remove(tfm)\n", "\n", " def proc_batch(self,b:Tensor)->Tensor:\n", " \"Proces batch `b` of `TensorImage`\"\n", " b = to_device(b, self.device)\n", " for f in listify(self.tfms): b = f(b)\n", " return b\n", "\n", " def __iter__(self):\n", " \"Process and returns items from `DataLoader`\"\n", " self.gen = map(self.proc_batch, self.dl)\n", " return iter(self.gen)\n", "\n", " @classmethod\n", " def create(cls, dataset:Dataset, bs:int=1, shuffle:bool=False, device:torch.device=default_device, \n", " tfms:TfmList=tfms, num_workers:int=default_cpus, collate_fn:Callable=data_collate, **kwargs:Any):\n", " \"Create DeviceDataLoader from `dataset` with `batch_size` and `shuffle`: processs using `num_workers`\"\n", " return cls(DataLoader(dataset, batch_size=bs, shuffle=shuffle, num_workers=num_workers, **kwargs),\n", " device=device, tfms=tfms, collate_fn=collate_fn)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class DataBunch():\n", " \"Bind `train_dl`,`valid_dl` and`test_dl` to `device`. tfms are DL tfms (normalize). `path` is for models.\"\n", " def __init__(self, train_dl:DataLoader, valid_dl:DataLoader, test_dl:Optional[DataLoader]=None,\n", " device:torch.device=None, tfms:Optional[Collection[Callable]]=None, path:PathOrStr='.'):\n", " \"Bind `train_dl`,`valid_dl` and`test_dl` to `device`. tfms are DL tfms (normalize). `path` is for models.\"\n", " self.device = default_device if device is None else device\n", " self.train_dl = DeviceDataLoader(train_dl, self.device, tfms=tfms)\n", " self.valid_dl = DeviceDataLoader(valid_dl, self.device, tfms=tfms)\n", " self.test_dl = DeviceDataLoader(test_dl, self.device, tfms=tfms) if test_dl else None\n", " self.path = Path(path)\n", "\n", " @classmethod\n", " def create(cls, train_ds, valid_ds, test_ds=None,\n", " path='.', bs=64, ds_tfms=None, num_workers=default_cpus,\n", " tfms=None, device=None, size=None, **kwargs)->'DataBunch':\n", " \"`DataBunch` factory. `bs` batch size, `ds_tfms` for `Dataset`, `tfms` for `DataLoader`\"\n", " datasets = [train_ds,valid_ds]\n", " if test_ds is not None: datasets.append(test_ds)\n", " if ds_tfms: datasets = transform_datasets(*datasets, tfms=ds_tfms, size=size, **kwargs)\n", " dls = [DataLoader(*o, num_workers=num_workers) for o in\n", " zip(datasets, (bs,bs*2,bs*2), (True,False,False))]\n", " return cls(*dls, path=path, device=device, tfms=tfms)\n", " \n", " def __getattr__(self,k)->Any: return getattr(self.train_ds, k)\n", " def holdout(self, is_test:bool=False)->DeviceDataLoader: \n", " \"Returns correct holdout `Dataset` for test vs validation (`is_test`)\"\n", " return self.test_dl if is_test else self.valid_dl\n", "\n", " @property\n", " def train_ds(self)->Dataset: return self.train_dl.dl.dataset\n", " @property\n", " def valid_ds(self)->Dataset: return self.valid_dl.dl.dataset\n", "\n", "def data_from_imagefolder(path:PathOrStr, train:PathOrStr='train', valid:PathOrStr='valid', \n", " test:Optional[PathOrStr]=None, **kwargs:Any):\n", " \"Create `DataBunch` from imagenet style dataset in `path` with `train`,`valid`,`test` subfolders\"\n", " path=Path(path)\n", " train_ds = ImageDataset.from_folder(path/train)\n", " datasets = [train_ds, ImageDataset.from_folder(path/valid, classes=train_ds.classes)]\n", " if test: datasets.append(ImageDataset.from_single_folder(\n", " path/test,classes=train_ds.classes))\n", " return DataBunch.create(*datasets, path=path, **kwargs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data_from_imagefolder(PATH, valid='test', bs=bs, train_tfm=tfms, tfms=cifar_norm, num_workers=12)\n", "data.device" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,y = next(iter(data.train_dl))\n", "x = x.cpu()\n", "x = cifar_denorm(x)\n", "show_images(x,y,6,train_ds.classes, figsize=(9,10))\n", "print(x.min(),x.max(),x.mean(),x.std())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn = Learner(data, simple_cnn([3,16,16,c], [3,3,3], [2,2,2]))\n", "opt_fn = partial(optim.SGD, momentum=0.9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit(1, 0.1, opt_fn=opt_fn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Darknet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we are going to try our transforms on an architecture similar to the [darknet-53](https://pjreddie.com/media/files/papers/yolo.pdf) architecture. Note that it is not the whole architecture, just the part of it that the authors pre-trained on Imagenet (see paper, section 2.2). This is the basis of any modern ResNet based architecture and it is good for experimenting.\n", "\n", "If you are interested in a full, step-by-step description of this architecture please refer to a [video explanation](https://youtu.be/ondivPiwQho?t=0h11m07s) in Lesson 12 of Part 2 of the course or a [written transcript](https://medium.com/@hiromi_suenaga/deep-learning-2-part-2-lesson-12-215dfbf04a94), courtesy of @hiromi." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1)->nn.Sequential:\n", " \"Create Conv2d->BatchNorm2d->LeakyReLu layer: `ni` input, `nf` out filters, `ks` kernel, `stride`:stride\"\n", " return nn.Sequential(\n", " nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),\n", " nn.BatchNorm2d(nf),\n", " nn.LeakyReLU(negative_slope=0.1, inplace=True))\n", "\n", "class ResLayer(nn.Module):\n", " \"Resnet style `ResLayer`\"\n", " def __init__(self, ni:int):\n", " \"create ResLayer with `ni` inputs\"\n", " super().__init__()\n", " self.conv1=conv_layer(ni, ni//2, ks=1)\n", " self.conv2=conv_layer(ni//2, ni, ks=3)\n", " \n", " def forward(self, x): return x + self.conv2(self.conv1(x))\n", "\n", "class Darknet(nn.Module):\n", " \"https://github.com/pjreddie/darknet\"\n", " def make_group_layer(self, ch_in:int, num_blocks:int, stride:int=1):\n", " \"starts with conv layer - `ch_in` channels in - then has `num_blocks` `ResLayer`\"\n", " return [conv_layer(ch_in, ch_in*2,stride=stride)\n", " ] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]\n", "\n", " def __init__(self, num_blocks:int, num_classes:int, nf=32):\n", " \"create darknet with `nf` and `num_blocks` layers\"\n", " super().__init__()\n", " layers = [conv_layer(3, nf, ks=3, stride=1)]\n", " for i,nb in enumerate(num_blocks):\n", " layers += self.make_group_layer(nf, nb, stride=2-(i==1))\n", " nf *= 2\n", " layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]\n", " self.layers = nn.Sequential(*layers)\n", " \n", " def forward(self, x): return self.layers(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = Darknet([1, 2, 4, 4, 2], num_classes=c, nf=16)\n", "learner = Learner(data, model)\n", "opt_fn = partial(optim.SGD, momentum=0.9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.fit(1, 0.1, opt_fn=opt_fn)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# for lr in (0.1,0.2,0.4,0.8,0.1,0.01):\n", "# momentum = 0.95 if lr<0.1 else 0.85 if lr>0.5 else 0.9\n", "# learner.fit(2, lr, opt_fn=partial(optim.SGD, momentum=momentum))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Fin" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }