{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nb_200 import *\n",
    "import random\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device('cuda', 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PetsData(DataBlock):\n",
    "    types = Image,Category\n",
    "    get_items = lambda source, self: [get_image_files(source)[0]]*100\n",
    "    split = random_splitter()\n",
    "    label_func = re_labeller(pat = r'/([^/]+)_\\d+.jpg$')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CamvidData(DataBlock):\n",
    "    types = Image,SegmentMask\n",
    "    get_items = lambda source,self: [get_image_files(source/'images')[0]] * 100\n",
    "    split = random_splitter()\n",
    "    label_func = lambda o,self: self.source/'labels'/f'{o.stem}_P{o.suffix}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class BiwiData(DataBlock):\n",
    "    types = Image,Points\n",
    "    def __init__(self, source, *args, **kwargs):\n",
    "        super().__init__(source, *args, **kwargs)\n",
    "        self.fn2ctr = pickle.load(open(source/'centers.pkl', 'rb'))\n",
    "        \n",
    "    get_items = lambda source, self: [get_image_files(source/'images')[0]] * 100\n",
    "    split = random_splitter()\n",
    "    label_func = lambda o,self: [[0, 0], [120, 0], [0, 160], [120,160]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CocoData(DataBlock):\n",
    "    types = Image,BBox\n",
    "    def __init__(self, source, *args, **kwargs):\n",
    "        super().__init__(source, *args, **kwargs)\n",
    "        images, lbl_bbox = get_annotations(source/'train.json')\n",
    "        self.img2bbox = dict(zip(images, lbl_bbox))\n",
    "        \n",
    "    get_items = lambda source, self: [get_image_files(source/'train')[18]] * 100\n",
    "    split = random_splitter()\n",
    "    label_func = lambda o,self: self.img2bbox[o.name]\n",
    "    \n",
    "    def databunch(self, ds_tfms=None, dl_tfms=None, bs=64, tfm_kwargs=None, **kwargs):\n",
    "        return super().databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=bs, tfm_kwargs=tfm_kwargs,\n",
    "                                 collate_fn=bb_pad_collate, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]\n",
    "dl_tfms = [Cuda(device), ToFloatTensor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets_src   = untar_data(URLs.PETS)\n",
    "camvid_src = untar_data(URLs.CAMVID_TINY)\n",
    "biwi_src   = untar_data(URLs.BIWI_SAMPLE)\n",
    "coco_src   = untar_data(URLs.COCO_TINY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Flip and dihedral with PIL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class Flip(ImageTransform):\n",
    "    _data_aug=True\n",
    "    def __init__(self, p=0.5): self.p = p\n",
    "    def randomize(self): self.do = random.random() < self.p\n",
    "        \n",
    "    def apply(self, x):\n",
    "        return x.transpose(PIL.Image.FLIP_LEFT_RIGHT) if self.do else x\n",
    "    def apply_point(self, x):\n",
    "        if self.do: x[...,0] = -x[...,0]\n",
    "        return x\n",
    "    def apply_bbox(self, x): return (self.apply_point(x[0].view(-1,2)).view(-1,4), x[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), Flip(), ResizeFixed(128), ToByteTensor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class Dihedral(ImageTransform):\n",
    "    _data_aug=True\n",
    "    def __init__(self, p=0.5): self.p = p\n",
    "    def randomize(self): \n",
    "        self.idx = random.randint(0,7) if random.random() < self.p else 0\n",
    "        \n",
    "    def apply(self, x): return x if self.idx==0 else x.transpose(self.idx-1)\n",
    "    def apply_point(self, x):\n",
    "        if self.idx in [1, 3, 4, 7]: x[...,0] = -x[...,0]\n",
    "        if self.idx in [2, 4, 5, 7]: x[...,1] = -x[...,1]\n",
    "        if self.idx in [3, 5, 6, 7]: x = x.flip(1)\n",
    "        return x\n",
    "    \n",
    "    def apply_bbox(self, x): \n",
    "        pnts = self.apply_point(x[0].view(-1,2)).view(-1,2,2)\n",
    "        tl,dr = pnts.min(dim=1)[0],pnts.max(dim=1)[0]\n",
    "        return [torch.cat([tl, dr], dim=1), x[1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), Dihedral(), ResizeFixed(128), ToByteTensor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Affine and coords on the GPU"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is the main transform, that will apply affine and coordinates transform and do only one interpolation. Implementation differs for each type of target."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def clip_remove_empty(bbox, label):\n",
    "    bbox = torch.clamp(bbox, -1, 1)\n",
    "    empty = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) < 0.)\n",
    "    if isinstance(label, torch.Tensor): label[empty] = 0\n",
    "    else:\n",
    "        for i,m in enumerate(empty):\n",
    "            if m: label[i] = 0\n",
    "    return [bbox, label]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class AffineAndCoordTfm(ImageTransform):\n",
    "    _data_aug=True\n",
    "    def __init__(self, aff_tfms, coord_tfms, size=None, mode='bilinear', padding_mode='reflection'):\n",
    "        self.aff_tfms,self.coord_tfms,self.mode,self.padding_mode = aff_tfms,coord_tfms,mode,padding_mode\n",
    "        self.size = None if size is None else (size,size) if isinstance(size, int) else tuple(size)\n",
    "    \n",
    "    def randomize(self):\n",
    "        for t in self.aff_tfms+self.coord_tfms: t.randomize(self.x)\n",
    "    \n",
    "    def _get_affine_mat(self):\n",
    "        aff_m = torch.eye(3, dtype=self.x.dtype, device=self.x.device)\n",
    "        aff_m = aff_m.unsqueeze(0).expand(self.x.size(0), 3, 3)\n",
    "        ms = [tfm() for tfm in self.aff_tfms]\n",
    "        ms = [m for m in ms if m is not None]\n",
    "        for m in ms: aff_m = aff_m @ m\n",
    "        return aff_m\n",
    "    \n",
    "    def apply(self, x):\n",
    "        bs = x.size(0)\n",
    "        size = tuple(x.shape[-2:]) if self.size is None else self.size\n",
    "        size = (bs,x.size(1)) + size\n",
    "        coords = F.affine_grid(self._get_affine_mat()[:,:2], size)\n",
    "        coords = apply_all(coords, self.coord_tfms)\n",
    "        return F.grid_sample(x, coords, mode=self.mode, padding_mode=self.padding_mode)\n",
    "    \n",
    "    def apply_mask(self, y):\n",
    "        self.old_mode,self.mode = self.mode,'nearest'\n",
    "        res = self.apply(y.float())\n",
    "        self.mode = self.old_mode\n",
    "        return res.long()\n",
    "    \n",
    "    def apply_point(self, y):\n",
    "        m = self._get_affine_mat()[:,:2]\n",
    "        y = (y - m[:,:,2].unsqueeze(1)) @ torch.inverse(m[:,:2,:2].transpose(1,2))\n",
    "        return apply_all(y, self.coord_tfms, filter_kwargs=True, invert=True)\n",
    "    \n",
    "    def apply_bbox(self, y):\n",
    "        bbox,label = y\n",
    "        bs,n = bbox.shape[:2]\n",
    "        pnts = stack([bbox[...,:2], stack([bbox[...,0],bbox[...,3]],dim=2), \n",
    "                      stack([bbox[...,2],bbox[...,1]],dim=2), bbox[...,2:]], dim=2)\n",
    "        pnts = self.apply_point(pnts.view(bs, 4*n, 2))\n",
    "        pnts = pnts.view(bs, n, 4, 2)\n",
    "        tl,dr = pnts.min(dim=2)[0],pnts.max(dim=2)[0]\n",
    "        return clip_remove_empty(torch.cat([tl, dr], dim=2), label)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Affine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "import math\n",
    "from torch import stack, zeros_like as t0, ones_like as t1\n",
    "from torch.distributions.bernoulli import Bernoulli"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### rotate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def mask_tensor(x, p=0.5, neutral=0.):\n",
    "    if p==1.: return x\n",
    "    if neutral != 0: x.add_(-neutral)\n",
    "    mask = x.new_empty(*x.size()).bernoulli_(p)\n",
    "    x.mul_(mask)\n",
    "    return x.add_(neutral) if neutral != 0 else x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def masked_uniform(x, a, b, *sz, p=0.5, neutral=0.):\n",
    "    return mask_tensor(x.new_empty(*sz).uniform_(a,b), p=p, neutral=neutral)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class Rotation():\n",
    "    def __init__(self, degrees=10., p=0.5):\n",
    "        self.range,self.p = (-degrees,degrees),p\n",
    "    \n",
    "    def randomize(self, x):\n",
    "        thetas = masked_uniform(x, *self.range, x.size(0), p=self.p) * math.pi/180\n",
    "        self.mat = stack([stack([thetas.cos(),  thetas.sin(), t0(thetas)], dim=1),\n",
    "                          stack([-thetas.sin(), thetas.cos(), t0(thetas)], dim=1),\n",
    "                          stack([t0(thetas),    t0(thetas),   t1(thetas)], dim=1)], dim=1)\n",
    "    \n",
    "    def __call__(self): return self.mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]\n",
    "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation(30.)], [])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### flip and dihedral affine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class FlipAffine():\n",
    "    def __init__(self, p=0.5):\n",
    "        self.p=p\n",
    "    \n",
    "    def randomize(self, x):\n",
    "        mask = -2*x.new_empty(x.size(0)).bernoulli_(self.p)+1\n",
    "        self.mat = stack([stack([mask,     t0(mask), t0(mask)], dim=1),\n",
    "                          stack([t0(mask), t1(mask), t0(mask)], dim=1),\n",
    "                          stack([t0(mask), t0(mask), t1(mask)], dim=1)], dim=1)\n",
    "    \n",
    "    def __call__(self): return self.mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class DihedralAffine():\n",
    "    def __init__(self, p=0.5):\n",
    "        self.p=p\n",
    "    \n",
    "    def randomize(self, x):\n",
    "        idx = mask_tensor(torch.randint(0, 8, (x.size(0),), device=x.device), p=self.p)\n",
    "        xs = 1 - 2*(idx & 1)\n",
    "        ys = 1 - (idx & 2)\n",
    "        m0,m1 = (idx<4).long(),(idx>3).long()\n",
    "        self.mat = stack([stack([xs*m0,  xs*m1,  t0(xs)], dim=1),\n",
    "                          stack([ys*m1,  ys*m0,  t0(xs)], dim=1),\n",
    "                          stack([t0(xs), t0(xs), t1(xs)], dim=1)], dim=1).float()\n",
    "    \n",
    "    def __call__(self): return self.mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([DihedralAffine()], [])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### zoom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Zoom():\n",
    "    def __init__(self, max_zoom=1.1, p=0.5):\n",
    "        self.range,self.p = (1,max_zoom),p\n",
    "    \n",
    "    def randomize(self, x):\n",
    "        s = 1/masked_uniform(x, *self.range, x.size(0), p=self.p, neutral=1.)\n",
    "        col_pct = x.new_empty(x.size(0)).uniform_(0.,1.)\n",
    "        row_pct = x.new_empty(x.size(0)).uniform_(0.,1.)\n",
    "        col_c = (1-s) * (2*col_pct - 1)\n",
    "        row_c = (1-s) * (2*row_pct - 1)\n",
    "        self.mat = stack([stack([s,     t0(s), col_c], dim=1),\n",
    "                          stack([t0(s), s, row_c], dim=1),\n",
    "                          stack([t0(s), t0(s), t1(s)], dim=1)], dim=1)\n",
    "    \n",
    "    def __call__(self): return self.mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Zoom()], [])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Coordinates"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### warping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def find_coeffs(p1, p2):\n",
    "    matrix = []\n",
    "    p = p1[:,0,0]\n",
    "    #The equations we'll need to solve.\n",
    "    for i in range(p1.shape[1]):\n",
    "        matrix.append(stack([p2[:,i,0], p2[:,i,1], t1(p), t0(p), t0(p), t0(p), -p1[:,i,0]*p2[:,i,0], -p1[:,i,0]*p2[:,i,1]]))\n",
    "        matrix.append(stack([t0(p), t0(p), t0(p), p2[:,i,0], p2[:,i,1], t1(p), -p1[:,i,1]*p2[:,i,0], -p1[:,i,1]*p2[:,i,1]]))\n",
    "    #The 8 scalars we seek are solution of AX = B\n",
    "    A = stack(matrix).permute(2, 0, 1)\n",
    "    B = p1.view(p1.shape[0], 8, 1)\n",
    "    return torch.solve(B,A)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def apply_perspective(coords, coeffs):\n",
    "    sz = coords.shape\n",
    "    coords = coords.view(sz[0], -1, 2)\n",
    "    coeffs = torch.cat([coeffs, t1(coeffs[:,:1])], dim=1).view(coeffs.shape[0], 3,3)\n",
    "    coords = coords @ coeffs[...,:2].transpose(1,2) + coeffs[...,2].unsqueeze(1)\n",
    "    coords.div_(coords[...,2].unsqueeze(-1))\n",
    "    return coords[...,:2].view(*sz)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class Warp():\n",
    "    def __init__(self, magnitude=0.2, p=0.5):\n",
    "        self.coeffs,self.magnitude,self.p = None,magnitude,p\n",
    "    \n",
    "    def randomize(self, x):\n",
    "        up_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)\n",
    "        lr_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)\n",
    "        orig_pts = torch.tensor([[-1,-1], [-1,1], [1,-1], [1,1]], dtype=x.dtype, device=x.device)\n",
    "        self.orig_pts = orig_pts.unsqueeze(0).expand(x.size(0),4,2)\n",
    "        targ_pts = stack([stack([-1-up_t, -1-lr_t]), stack([-1+up_t, 1+lr_t]), \n",
    "                          stack([ 1+up_t, -1+lr_t]), stack([ 1-up_t, 1-lr_t])])\n",
    "        self.targ_pts = targ_pts.permute(2,0,1)\n",
    "    \n",
    "    def __call__(self, x, invert=False): \n",
    "        coeffs = find_coeffs(self.targ_pts, self.orig_pts) if invert else find_coeffs(self.orig_pts, self.targ_pts)\n",
    "        return apply_perspective(x, coeffs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation()], [Warp()])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Ligthing transforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "def logit(x):\n",
    "    \"Logit of `x`, clamped to avoid inf.\"\n",
    "    x = x.clamp(1e-7, 1-1e-7)\n",
    "    return -(1/x-1).log()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class LightingTransform(ImageTransform):\n",
    "    _order = 15\n",
    "    _data_aug=True\n",
    "    def __init__(self, tfms): self.tfms=listify(tfms)\n",
    "    def randomize(self): \n",
    "        for t in self.tfms: t.randomize(self.x)\n",
    "    \n",
    "    def apply(self,x):       return torch.sigmoid(apply_all(logit(x), self.tfms))\n",
    "    def apply_mask(self, x): return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "from math import log\n",
    "def masked_log_uniform(x, a, b, *sz, p=0.5, neutral=0.):\n",
    "    return torch.exp(masked_uniform(x, log(a), log(b), *sz, p=p, neutral=neutral))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "class Brightness():\n",
    "    \"Apply `change` in brightness of image `x`.\"\n",
    "    def __init__(self, max_lighting=0.2, p=0.75): \n",
    "        self.p = p\n",
    "        self.range = (0.5*(1-max_lighting), 0.5*(1+max_lighting))\n",
    "    def randomize(self, x): \n",
    "        self.change = masked_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p, neutral=0.5)\n",
    "    def __call__(self, x): return x.add_(self.change)\n",
    "    \n",
    "class Contrast():\n",
    "    \"Apply `change` in brightness of image `x`.\"\n",
    "    def __init__(self, max_lighting=0.2, p=0.75): \n",
    "        self.p = p\n",
    "        self.range = (1-max_lighting, 1/(1-max_lighting))\n",
    "    def randomize(self, x): \n",
    "        self.change = masked_log_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p)\n",
    "    def __call__(self, x): return x.mul_(self.change)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(1), Contrast(0.5)])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## All at once"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), ResizeFixed(224), ToByteTensor(), Flip()]\n",
    "dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(), Contrast()]), \n",
    "           AffineAndCoordTfm([Rotation(), Zoom()], [Warp()])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%timeit -n 10 _ = coco.one_batch(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Crops and pads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### On the CPU"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### crop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CenterCrop(ImageTransform):\n",
    "    _order = 12\n",
    "    def __init__(self, size):\n",
    "        if isinstance(size,int): size=(size,size)\n",
    "        self.size = (size[1],size[0])\n",
    "    \n",
    "    def randomize(self):\n",
    "        w,h = self.x.size\n",
    "        self.tl = (w-self.size[0]//2, h-self.size[1]//2)\n",
    "    \n",
    "    def apply(self, x):\n",
    "        return x.crop((self.tl[0],self.tl[1],self.tl[0]+self.size[0],self.tl[1]+self.size[1]))\n",
    "    \n",
    "    def apply_point(self, y):\n",
    "        old_sz,new_sz,tl = map(lambda o: tensor(o).float(), (self.x.size,self.size,self.tl))\n",
    "        return (y + 1) * old_sz/new_sz - tl * 2/new_sz - 1\n",
    "    \n",
    "    def apply_bbox(self, y):\n",
    "        bbox,label = y\n",
    "        bbox = self.apply_point(bbox.view(-1,2)).view(-1,4)\n",
    "        return clip_remove_empty(bbox, label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RandomCrop(CenterCrop):\n",
    "    def randomize(self):\n",
    "        w,h = self.x.size\n",
    "        if self.filt != 0: self.tl = (w-self.size[0]//2, h-self.size[1]//2)\n",
    "        self.tl = (random.randint(0,w-self.size[0]), random.randint(0,h-self.size[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), RandomCrop(100), ToByteTensor()]\n",
    "dl_tfms = [Cuda(device), ToFloatTensor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### pad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torchvision.transforms.functional as tvfunc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Pad(CenterCrop):\n",
    "    _order = 15\n",
    "    _pad_modes = {'zeros': 'constant', 'border': 'replicate', 'reflection': 'reflect'}\n",
    "    def __init__(self, size, mode='zeros'):\n",
    "        if isinstance(size,int): size=(size,size)\n",
    "        self.size = (size[1],size[0])\n",
    "        self.size,self.mode = size,self._pad_modes[mode]\n",
    "    \n",
    "    def randomize(self):\n",
    "        ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]\n",
    "        self.tl = (-ph//2,-pw//2)\n",
    "        self.pad = (pw//2,ph//2,pw-pw//2,ph-ph//2)\n",
    "    \n",
    "    def apply(self, x): return tvfunc.pad(x, self.pad, padding_mode=self.mode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RandomPad(Pad):\n",
    "    def randomize(self):\n",
    "        ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]\n",
    "        c,r = random.randint(0,ph),random.randint(0,pw)\n",
    "        self.tl = (-r,-c)\n",
    "        self.pad = (r,c,pw-r,ph-c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_tfms = [DecodeImg(), RandomPad(150, mode='reflection'), ToByteTensor()]\n",
    "dl_tfms = [Cuda(device), ToFloatTensor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)\n",
    "coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco.show_batch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}