{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data augmentation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from nb_200 import *\n", "import random\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "device = torch.device('cuda', 0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class PetsData(DataBlock):\n", " types = Image,Category\n", " get_items = lambda source, self: [get_image_files(source)[0]]*100\n", " split = random_splitter()\n", " label_func = re_labeller(pat = r'/([^/]+)_\\d+.jpg$')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class CamvidData(DataBlock):\n", " types = Image,SegmentMask\n", " get_items = lambda source,self: [get_image_files(source/'images')[0]] * 100\n", " split = random_splitter()\n", " label_func = lambda o,self: self.source/'labels'/f'{o.stem}_P{o.suffix}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class BiwiData(DataBlock):\n", " types = Image,Points\n", " def __init__(self, source, *args, **kwargs):\n", " super().__init__(source, *args, **kwargs)\n", " self.fn2ctr = pickle.load(open(source/'centers.pkl', 'rb'))\n", " \n", " get_items = lambda source, self: [get_image_files(source/'images')[0]] * 100\n", " split = random_splitter()\n", " label_func = lambda o,self: [[0, 0], [120, 0], [0, 160], [120,160]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class CocoData(DataBlock):\n", " types = Image,BBox\n", " def __init__(self, source, *args, **kwargs):\n", " super().__init__(source, *args, **kwargs)\n", " images, lbl_bbox = get_annotations(source/'train.json')\n", " self.img2bbox = dict(zip(images, lbl_bbox))\n", " \n", " get_items = lambda source, self: [get_image_files(source/'train')[18]] * 100\n", " split = random_splitter()\n", " label_func = lambda o,self: self.img2bbox[o.name]\n", " \n", " def databunch(self, ds_tfms=None, dl_tfms=None, bs=64, tfm_kwargs=None, **kwargs):\n", " return super().databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=bs, tfm_kwargs=tfm_kwargs,\n", " collate_fn=bb_pad_collate, **kwargs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]\n", "dl_tfms = [Cuda(device), ToFloatTensor()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets_src = untar_data(URLs.PETS)\n", "camvid_src = untar_data(URLs.CAMVID_TINY)\n", "biwi_src = untar_data(URLs.BIWI_SAMPLE)\n", "coco_src = untar_data(URLs.COCO_TINY)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Flip and dihedral with PIL" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Flip(ImageTransform):\n", " _data_aug=True\n", " def __init__(self, p=0.5): self.p = p\n", " def randomize(self): self.do = random.random() < self.p\n", " \n", " def apply(self, x):\n", " return x.transpose(PIL.Image.FLIP_LEFT_RIGHT) if self.do else x\n", " def apply_point(self, x):\n", " if self.do: x[...,0] = -x[...,0]\n", " return x\n", " def apply_bbox(self, x): return (self.apply_point(x[0].view(-1,2)).view(-1,4), x[1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), Flip(), ResizeFixed(128), ToByteTensor()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Dihedral(ImageTransform):\n", " _data_aug=True\n", " def __init__(self, p=0.5): self.p = p\n", " def randomize(self): \n", " self.idx = random.randint(0,7) if random.random() < self.p else 0\n", " \n", " def apply(self, x): return x if self.idx==0 else x.transpose(self.idx-1)\n", " def apply_point(self, x):\n", " if self.idx in [1, 3, 4, 7]: x[...,0] = -x[...,0]\n", " if self.idx in [2, 4, 5, 7]: x[...,1] = -x[...,1]\n", " if self.idx in [3, 5, 6, 7]: x = x.flip(1)\n", " return x\n", " \n", " def apply_bbox(self, x): \n", " pnts = self.apply_point(x[0].view(-1,2)).view(-1,2,2)\n", " tl,dr = pnts.min(dim=1)[0],pnts.max(dim=1)[0]\n", " return [torch.cat([tl, dr], dim=1), x[1]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), Dihedral(), ResizeFixed(128), ToByteTensor()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Affine and coords on the GPU" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is the main transform, that will apply affine and coordinates transform and do only one interpolation. Implementation differs for each type of target." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def clip_remove_empty(bbox, label):\n", " bbox = torch.clamp(bbox, -1, 1)\n", " empty = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) < 0.)\n", " if isinstance(label, torch.Tensor): label[empty] = 0\n", " else:\n", " for i,m in enumerate(empty):\n", " if m: label[i] = 0\n", " return [bbox, label]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class AffineAndCoordTfm(ImageTransform):\n", " _data_aug=True\n", " def __init__(self, aff_tfms, coord_tfms, size=None, mode='bilinear', padding_mode='reflection'):\n", " self.aff_tfms,self.coord_tfms,self.mode,self.padding_mode = aff_tfms,coord_tfms,mode,padding_mode\n", " self.size = None if size is None else (size,size) if isinstance(size, int) else tuple(size)\n", " \n", " def randomize(self):\n", " for t in self.aff_tfms+self.coord_tfms: t.randomize(self.x)\n", " \n", " def _get_affine_mat(self):\n", " aff_m = torch.eye(3, dtype=self.x.dtype, device=self.x.device)\n", " aff_m = aff_m.unsqueeze(0).expand(self.x.size(0), 3, 3)\n", " ms = [tfm() for tfm in self.aff_tfms]\n", " ms = [m for m in ms if m is not None]\n", " for m in ms: aff_m = aff_m @ m\n", " return aff_m\n", " \n", " def apply(self, x):\n", " bs = x.size(0)\n", " size = tuple(x.shape[-2:]) if self.size is None else self.size\n", " size = (bs,x.size(1)) + size\n", " coords = F.affine_grid(self._get_affine_mat()[:,:2], size)\n", " coords = apply_all(coords, self.coord_tfms)\n", " return F.grid_sample(x, coords, mode=self.mode, padding_mode=self.padding_mode)\n", " \n", " def apply_mask(self, y):\n", " self.old_mode,self.mode = self.mode,'nearest'\n", " res = self.apply(y.float())\n", " self.mode = self.old_mode\n", " return res.long()\n", " \n", " def apply_point(self, y):\n", " m = self._get_affine_mat()[:,:2]\n", " y = (y - m[:,:,2].unsqueeze(1)) @ torch.inverse(m[:,:2,:2].transpose(1,2))\n", " return apply_all(y, self.coord_tfms, filter_kwargs=True, invert=True)\n", " \n", " def apply_bbox(self, y):\n", " bbox,label = y\n", " bs,n = bbox.shape[:2]\n", " pnts = stack([bbox[...,:2], stack([bbox[...,0],bbox[...,3]],dim=2), \n", " stack([bbox[...,2],bbox[...,1]],dim=2), bbox[...,2:]], dim=2)\n", " pnts = self.apply_point(pnts.view(bs, 4*n, 2))\n", " pnts = pnts.view(bs, n, 4, 2)\n", " tl,dr = pnts.min(dim=2)[0],pnts.max(dim=2)[0]\n", " return clip_remove_empty(torch.cat([tl, dr], dim=2), label)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Affine" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "import math\n", "from torch import stack, zeros_like as t0, ones_like as t1\n", "from torch.distributions.bernoulli import Bernoulli" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### rotate" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def mask_tensor(x, p=0.5, neutral=0.):\n", " if p==1.: return x\n", " if neutral != 0: x.add_(-neutral)\n", " mask = x.new_empty(*x.size()).bernoulli_(p)\n", " x.mul_(mask)\n", " return x.add_(neutral) if neutral != 0 else x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def masked_uniform(x, a, b, *sz, p=0.5, neutral=0.):\n", " return mask_tensor(x.new_empty(*sz).uniform_(a,b), p=p, neutral=neutral)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Rotation():\n", " def __init__(self, degrees=10., p=0.5):\n", " self.range,self.p = (-degrees,degrees),p\n", " \n", " def randomize(self, x):\n", " thetas = masked_uniform(x, *self.range, x.size(0), p=self.p) * math.pi/180\n", " self.mat = stack([stack([thetas.cos(), thetas.sin(), t0(thetas)], dim=1),\n", " stack([-thetas.sin(), thetas.cos(), t0(thetas)], dim=1),\n", " stack([t0(thetas), t0(thetas), t1(thetas)], dim=1)], dim=1)\n", " \n", " def __call__(self): return self.mat" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]\n", "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation(30.)], [])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### flip and dihedral affine" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class FlipAffine():\n", " def __init__(self, p=0.5):\n", " self.p=p\n", " \n", " def randomize(self, x):\n", " mask = -2*x.new_empty(x.size(0)).bernoulli_(self.p)+1\n", " self.mat = stack([stack([mask, t0(mask), t0(mask)], dim=1),\n", " stack([t0(mask), t1(mask), t0(mask)], dim=1),\n", " stack([t0(mask), t0(mask), t1(mask)], dim=1)], dim=1)\n", " \n", " def __call__(self): return self.mat" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class DihedralAffine():\n", " def __init__(self, p=0.5):\n", " self.p=p\n", " \n", " def randomize(self, x):\n", " idx = mask_tensor(torch.randint(0, 8, (x.size(0),), device=x.device), p=self.p)\n", " xs = 1 - 2*(idx & 1)\n", " ys = 1 - (idx & 2)\n", " m0,m1 = (idx<4).long(),(idx>3).long()\n", " self.mat = stack([stack([xs*m0, xs*m1, t0(xs)], dim=1),\n", " stack([ys*m1, ys*m0, t0(xs)], dim=1),\n", " stack([t0(xs), t0(xs), t1(xs)], dim=1)], dim=1).float()\n", " \n", " def __call__(self): return self.mat" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([DihedralAffine()], [])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### zoom" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Zoom():\n", " def __init__(self, max_zoom=1.1, p=0.5):\n", " self.range,self.p = (1,max_zoom),p\n", " \n", " def randomize(self, x):\n", " s = 1/masked_uniform(x, *self.range, x.size(0), p=self.p, neutral=1.)\n", " col_pct = x.new_empty(x.size(0)).uniform_(0.,1.)\n", " row_pct = x.new_empty(x.size(0)).uniform_(0.,1.)\n", " col_c = (1-s) * (2*col_pct - 1)\n", " row_c = (1-s) * (2*row_pct - 1)\n", " self.mat = stack([stack([s, t0(s), col_c], dim=1),\n", " stack([t0(s), s, row_c], dim=1),\n", " stack([t0(s), t0(s), t1(s)], dim=1)], dim=1)\n", " \n", " def __call__(self): return self.mat" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Zoom()], [])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Coordinates" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### warping" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def find_coeffs(p1, p2):\n", " matrix = []\n", " p = p1[:,0,0]\n", " #The equations we'll need to solve.\n", " for i in range(p1.shape[1]):\n", " matrix.append(stack([p2[:,i,0], p2[:,i,1], t1(p), t0(p), t0(p), t0(p), -p1[:,i,0]*p2[:,i,0], -p1[:,i,0]*p2[:,i,1]]))\n", " matrix.append(stack([t0(p), t0(p), t0(p), p2[:,i,0], p2[:,i,1], t1(p), -p1[:,i,1]*p2[:,i,0], -p1[:,i,1]*p2[:,i,1]]))\n", " #The 8 scalars we seek are solution of AX = B\n", " A = stack(matrix).permute(2, 0, 1)\n", " B = p1.view(p1.shape[0], 8, 1)\n", " return torch.solve(B,A)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def apply_perspective(coords, coeffs):\n", " sz = coords.shape\n", " coords = coords.view(sz[0], -1, 2)\n", " coeffs = torch.cat([coeffs, t1(coeffs[:,:1])], dim=1).view(coeffs.shape[0], 3,3)\n", " coords = coords @ coeffs[...,:2].transpose(1,2) + coeffs[...,2].unsqueeze(1)\n", " coords.div_(coords[...,2].unsqueeze(-1))\n", " return coords[...,:2].view(*sz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Warp():\n", " def __init__(self, magnitude=0.2, p=0.5):\n", " self.coeffs,self.magnitude,self.p = None,magnitude,p\n", " \n", " def randomize(self, x):\n", " up_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)\n", " lr_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)\n", " orig_pts = torch.tensor([[-1,-1], [-1,1], [1,-1], [1,1]], dtype=x.dtype, device=x.device)\n", " self.orig_pts = orig_pts.unsqueeze(0).expand(x.size(0),4,2)\n", " targ_pts = stack([stack([-1-up_t, -1-lr_t]), stack([-1+up_t, 1+lr_t]), \n", " stack([ 1+up_t, -1+lr_t]), stack([ 1-up_t, 1-lr_t])])\n", " self.targ_pts = targ_pts.permute(2,0,1)\n", " \n", " def __call__(self, x, invert=False): \n", " coeffs = find_coeffs(self.targ_pts, self.orig_pts) if invert else find_coeffs(self.orig_pts, self.targ_pts)\n", " return apply_perspective(x, coeffs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation()], [Warp()])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ligthing transforms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "def logit(x):\n", " \"Logit of `x`, clamped to avoid inf.\"\n", " x = x.clamp(1e-7, 1-1e-7)\n", " return -(1/x-1).log()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class LightingTransform(ImageTransform):\n", " _order = 15\n", " _data_aug=True\n", " def __init__(self, tfms): self.tfms=listify(tfms)\n", " def randomize(self): \n", " for t in self.tfms: t.randomize(self.x)\n", " \n", " def apply(self,x): return torch.sigmoid(apply_all(logit(x), self.tfms))\n", " def apply_mask(self, x): return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "from math import log\n", "def masked_log_uniform(x, a, b, *sz, p=0.5, neutral=0.):\n", " return torch.exp(masked_uniform(x, log(a), log(b), *sz, p=p, neutral=neutral))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# export\n", "class Brightness():\n", " \"Apply `change` in brightness of image `x`.\"\n", " def __init__(self, max_lighting=0.2, p=0.75): \n", " self.p = p\n", " self.range = (0.5*(1-max_lighting), 0.5*(1+max_lighting))\n", " def randomize(self, x): \n", " self.change = masked_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p, neutral=0.5)\n", " def __call__(self, x): return x.add_(self.change)\n", " \n", "class Contrast():\n", " \"Apply `change` in brightness of image `x`.\"\n", " def __init__(self, max_lighting=0.2, p=0.75): \n", " self.p = p\n", " self.range = (1-max_lighting, 1/(1-max_lighting))\n", " def randomize(self, x): \n", " self.change = masked_log_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p)\n", " def __call__(self, x): return x.mul_(self.change)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(1), Contrast(0.5)])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## All at once" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), ResizeFixed(224), ToByteTensor(), Flip()]\n", "dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(), Contrast()]), \n", " AffineAndCoordTfm([Rotation(), Zoom()], [Warp()])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%timeit -n 10 _ = coco.one_batch(0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Crops and pads" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### On the CPU" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### crop" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class CenterCrop(ImageTransform):\n", " _order = 12\n", " def __init__(self, size):\n", " if isinstance(size,int): size=(size,size)\n", " self.size = (size[1],size[0])\n", " \n", " def randomize(self):\n", " w,h = self.x.size\n", " self.tl = (w-self.size[0]//2, h-self.size[1]//2)\n", " \n", " def apply(self, x):\n", " return x.crop((self.tl[0],self.tl[1],self.tl[0]+self.size[0],self.tl[1]+self.size[1]))\n", " \n", " def apply_point(self, y):\n", " old_sz,new_sz,tl = map(lambda o: tensor(o).float(), (self.x.size,self.size,self.tl))\n", " return (y + 1) * old_sz/new_sz - tl * 2/new_sz - 1\n", " \n", " def apply_bbox(self, y):\n", " bbox,label = y\n", " bbox = self.apply_point(bbox.view(-1,2)).view(-1,4)\n", " return clip_remove_empty(bbox, label)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class RandomCrop(CenterCrop):\n", " def randomize(self):\n", " w,h = self.x.size\n", " if self.filt != 0: self.tl = (w-self.size[0]//2, h-self.size[1]//2)\n", " self.tl = (random.randint(0,w-self.size[0]), random.randint(0,h-self.size[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), RandomCrop(100), ToByteTensor()]\n", "dl_tfms = [Cuda(device), ToFloatTensor()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### pad" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torchvision.transforms.functional as tvfunc" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Pad(CenterCrop):\n", " _order = 15\n", " _pad_modes = {'zeros': 'constant', 'border': 'replicate', 'reflection': 'reflect'}\n", " def __init__(self, size, mode='zeros'):\n", " if isinstance(size,int): size=(size,size)\n", " self.size = (size[1],size[0])\n", " self.size,self.mode = size,self._pad_modes[mode]\n", " \n", " def randomize(self):\n", " ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]\n", " self.tl = (-ph//2,-pw//2)\n", " self.pad = (pw//2,ph//2,pw-pw//2,ph-ph//2)\n", " \n", " def apply(self, x): return tvfunc.pad(x, self.pad, padding_mode=self.mode)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class RandomPad(Pad):\n", " def randomize(self):\n", " ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]\n", " c,r = random.randint(0,ph),random.randint(0,pw)\n", " self.tl = (-r,-c)\n", " self.pad = (r,c,pw-r,ph-c)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_tfms = [DecodeImg(), RandomPad(150, mode='reflection'), ToByteTensor()]\n", "dl_tfms = [Cuda(device), ToFloatTensor()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n", "coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "coco.show_batch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }