{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from nb_005b import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Carvana" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "(See final section of notebook for one-time data processing steps.)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PATH = Path('../../data/carvana')\n", "PATH_PNG = PATH/'train_masks_png'\n", "PATH_X_FULL = PATH/'train'\n", "PATH_X_128 = PATH/'train-128'\n", "PATH_Y_FULL = PATH_PNG\n", "PATH_Y_128 = PATH/'train_masks-128'\n", "\n", "# start with the 128x128 images\n", "PATH_X = PATH_X_128\n", "PATH_Y = PATH_Y_128" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "img_f = next(PATH_X.iterdir())\n", "open_image(img_f).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class ImageMask(Image):\n", " \"Class for image segmentation target\"\n", " def clone(self)->'ImageBase':\n", " \"Clones this item\"\n", " return self.__class__(self.px.clone())\n", " \n", " def lighting(self, func:LightingFunc, *args:Any, **kwargs:Any)->'Image': return self\n", " \n", " def refresh(self):\n", " self.sample_kwargs['mode'] = 'nearest'\n", " return super().refresh()\n", " \n", " @property\n", " def data(self)->TensorImage:\n", " \"Returns this images pixels as a tensor\"\n", " return self.px.long()\n", " \n", "def open_mask(fn:PathOrStr) -> ImageMask:\n", " \"Return `ImageMask` object create from mask in file `fn`\"\n", " return ImageMask(pil2tensor(PIL.Image.open(fn)).float())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_y_fn(x_fn): return PATH_Y/f'{x_fn.name[:-4]}_mask.png'\n", "\n", "img_y_f = get_y_fn(img_f)\n", "y = open_mask(img_y_f)\n", "y.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "# Same as `show_image`, but renamed with _ prefix\n", "def _show_image(img:Image, ax:plt.Axes=None, figsize:tuple=(3,3), hide_axis:bool=True, cmap:str='binary', \n", " alpha:float=None) -> plt.Axes:\n", " if ax is None: fig,ax = plt.subplots(figsize=figsize)\n", " ax.imshow(image2np(img), cmap=cmap, alpha=alpha)\n", " if hide_axis: ax.axis('off')\n", " return ax\n", "\n", "def show_image(x:Image, y:Image=None, ax:plt.Axes=None, figsize:tuple=(3,3), alpha:float=0.5, \n", " hide_axis:bool=True, cmap:str='viridis'):\n", " ax1 = _show_image(x, ax=ax, hide_axis=hide_axis, cmap=cmap)\n", " if y is not None: _show_image(y, ax=ax1, alpha=alpha, hide_axis=hide_axis, cmap=cmap)\n", " if hide_axis: ax1.axis('off')\n", " \n", "def _show(self:Image, ax:plt.Axes=None, y:Image=None, **kwargs):\n", " if y is not None: y=y.data\n", " return show_image(self.data, ax=ax, y=y, **kwargs)\n", "\n", "Image.show = _show" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = open_image(img_f)\n", "x.show(y=y)\n", "x.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- data types: regr, class, seg, bbox, polygon, generative (s/res, color), custom" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class DatasetTfm(Dataset):\n", " \"`Dataset` that applies a list of transforms to every item drawn\"\n", " def __init__(self, ds:Dataset, tfms:TfmList=None, tfm_y:bool=False, **kwargs:Any):\n", " \"this dataset will apply `tfms` to `ds`\"\n", " self.ds,self.tfms,self.kwargs,self.tfm_y = ds,tfms,kwargs,tfm_y\n", " self.y_kwargs = {**self.kwargs, 'do_resolve':False}\n", " \n", " def __len__(self)->int: return len(self.ds)\n", " \n", " def __getitem__(self,idx:int)->Tuple[Image,Any]:\n", " \"returns tfms(x),y\"\n", " x,y = self.ds[idx]\n", " x = apply_tfms(self.tfms, x, **self.kwargs)\n", " if self.tfm_y: y = apply_tfms(self.tfms, y, **self.y_kwargs)\n", " return x, y\n", " \n", " def __getattr__(self,k): \n", " \"passthrough access to wrapped dataset attributes\"\n", " return getattr(self.ds, k)\n", " \n", "import nb_002b\n", "nb_002b.DatasetTfm = DatasetTfm " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class SegmentationDataset(DatasetBase):\n", " \"A dataset for segmentation task\"\n", " def __init__(self, x:Collection[PathOrStr], y:Collection[PathOrStr]):\n", " assert len(x)==len(y)\n", " self.x,self.y = np.array(x),np.array(y)\n", "\n", " def __getitem__(self, i:int) -> Tuple[Image,ImageMask]: \n", " return open_image(self.x[i]), open_mask(self.y[i])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_datasets(path):\n", " x_fns = [o for o in path.iterdir() if o.is_file()]\n", " y_fns = [get_y_fn(o) for o in x_fns]\n", " mask = [o>=1008 for o in range(len(x_fns))]\n", " arrs = arrays_split(mask, x_fns, y_fns)\n", " return [SegmentationDataset(*o) for o in arrs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_ds,valid_ds = get_datasets(PATH_X_128)\n", "train_ds,valid_ds" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,y = next(iter(train_ds))\n", "x.shape, y.shape, type(x), type(y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "size=128" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_tfm_datasets(size):\n", " datasets = get_datasets(PATH_X_128 if size<=128 else PATH_X_FULL)\n", " tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2)\n", " return transform_datasets(train_ds, valid_ds, tfms=tfms, tfm_y=True, size=size, padding_mode='border')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "transform_datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_tds,*_ = get_tfm_datasets(size)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axes = plt.subplots(1,4, figsize=(12,6))\n", "for i, ax in enumerate(axes.flat):\n", " imgx,imgy = train_tds[i]\n", " imgx.show(ax, y=imgy)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "default_norm,default_denorm = normalize_funcs(*imagenet_stats)\n", "bs = 64" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_data(size, bs):\n", " return DataBunch.create(*get_tfm_datasets(size), bs=bs, tfms=default_norm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = get_data(size, bs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def show_xy_images(x:Tensor,y:Tensor,rows:int,figsize:tuple=(9,9)):\n", " \"Shows a selection of images and targets from a given batch.\"\n", " fig, axs = plt.subplots(rows,rows,figsize=figsize)\n", " for i, ax in enumerate(axs.flatten()): show_image(x[i], y=y[i], ax=ax)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,y = next(iter(data.train_dl))\n", "x,y = x.cpu(),y.cpu()\n", "x = default_denorm(x)\n", "show_xy_images(x,y,4, figsize=(9,9))\n", "x.shape, y.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class Debugger(nn.Module):\n", " \"A module to debug inside a model\"\n", " def forward(self,x:Tensor) -> Tensor: \n", " set_trace()\n", " return x\n", "\n", "class StdUpsample(nn.Module):\n", " \"Standard upsample module\"\n", " def __init__(self, n_in:int, n_out:int):\n", " super().__init__()\n", " self.conv = conv2d_trans(n_in, n_out)\n", " self.bn = nn.BatchNorm2d(n_out)\n", " \n", " def forward(self, x:Tensor) -> Tensor: \n", " return self.bn(F.relu(self.conv(x)))\n", "\n", "def std_upsample_head(c, *nfs:Collection[int]) -> Model:\n", " \"Creates a sequence of upsample layers\"\n", " return nn.Sequential(\n", " nn.ReLU(),\n", " *(StdUpsample(nfs[i],nfs[i+1]) for i in range(4)),\n", " conv2d_trans(nfs[-1], c)\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "head = std_upsample_head(2, 512,256,256,256,256)\n", "head" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def dice(input:Tensor, targs:Tensor) -> Rank0Tensor:\n", " \"Dice coefficient metric for binary target\"\n", " n = targs.shape[0]\n", " input = input.argmax(dim=1).view(n,-1)\n", " targs = targs.view(n,-1)\n", " intersect = (input*targs).sum().float()\n", " union = (input+targs).sum().float()\n", " return 2. * intersect / union\n", "\n", "def accuracy(input:Tensor, targs:Tensor) -> Rank0Tensor:\n", " \"Accuracy\"\n", " n = targs.shape[0]\n", " input = input.argmax(dim=1).view(n,-1)\n", " targs = targs.view(n,-1)\n", " return (input==targs).float().mean()\n", "\n", "class CrossEntropyFlat(nn.CrossEntropyLoss):\n", " \"Same as `nn.CrossEntropyLoss`, but flattens input and target\"\n", " def forward(self, input:Tensor, target:Tensor) -> Rank0Tensor:\n", " n,c,*_ = input.shape\n", " return super().forward(input.view(n, c, -1), target.view(n, -1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics=[accuracy, dice]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn = ConvLearner(data, tvm.resnet34, 2, custom_head=head,\n", " metrics=metrics, loss_fn=CrossEntropyFlat())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr_find(learn)\n", "learn.recorder.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr = 1e-1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(10, slice(lr))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('0')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('0')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr = 2e-2\n", "learn.fit_one_cycle(10, slice(lr/100,lr))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,y,py = learn.pred_batch()\n", "py = py.argmax(dim=1).unsqueeze(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i, ax in enumerate(plt.subplots(4,4,figsize=(10,10))[1].flat):\n", " show_image(default_denorm(x[i].cpu()), py[i], ax=ax)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "size=512\n", "bs = 8\n", "data = get_data(size, bs)\n", "learn.data = data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr = 2e-2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(5, slice(lr))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('2')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('2')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr = 2e-2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(8, slice(lr/100,lr))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('3')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,py = learn.pred_batch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i, ax in enumerate(plt.subplots(4,4,figsize=(10,10))[1].flat):\n", " show_image(default_denorm(x[i].cpu()), py[i]>0, ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing steps" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def convert_img(fn): Image.open(fn).save(PATH_PNG/f'{fn.name[:-4]}.png')\n", "\n", "def resize_img(fn, dirname):\n", " Image.open(fn).resize((128,128)).save((fn.parent.parent)/dirname/fn.name)\n", "\n", "def do_conversion():\n", " PATH_PNG.mkdir(exist_ok=True)\n", " PATH_X.mkdir(exist_ok=True)\n", " PATH_Y.mkdir(exist_ok=True)\n", "\n", " files = list((PATH/'train_masks').iterdir())\n", " with ThreadPoolExecutor(8) as e: e.map(convert_img, files)\n", "\n", " files = list((PATH_PNG).iterdir())\n", " with ThreadPoolExecutor(8) as e: e.map(partial(resize_img, dirname='train_masks-128'), files)\n", "\n", " files = list((PATH/'train').iterdir())\n", " with ThreadPoolExecutor(8) as e: e.map(partial(resize_img, dirname='train-128'), files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }