{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from nb_005 import *\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Faces" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Download the dataset [here](https://download.pytorch.org/tutorial/faces.zip) from the pytoch tutorial on transforms. Unzip it in the data directory, so that data/faces/ contains the images and the csv file." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PATH = Path('data/faces/')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "get_image_files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "img_fns = get_image_files(PATH)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(img_fns)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "poses = pd.read_csv(PATH/'face_landmarks.csv')\n", "poses.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pose_dict = {o[0]:o[1:].astype(np.float32) for o in poses.values}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Reading the coordinates. We adopt pytorch convention in grid_sampler where the coordinates are normalized between -1 and 1. (-1,-1) is the top left corner, (1,1) the bottom right. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def scale_coords(x, size, to_unit=True):\n", " s = tensor([size[1]/2,size[0]/2])[None]\n", " return (x/s-1) if to_unit else ((x+1)*s)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pose_pnts = []\n", "for i, fname in enumerate(img_fns):\n", " size = open_image(fname).size\n", " coords = tensor(pose_dict[fname.name]).view(-1,2)\n", " pose_pnts.append(scale_coords(coords, size))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(pose_pnts)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pose_pnts[0].shape,pose_pnts[0][0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's have a look at the data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def show_pose(img, pnts, ax=None):\n", " if ax is None: _,ax = plt.subplots()\n", " img.show(ax, hide_axis=False)\n", " pnts = scale_coords(pnts, img.size, to_unit=False)\n", " ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "img = open_image(img_fns[0])\n", "show_pose(img, pose_pnts[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So when we change the picture, the points must be changed accordingly!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@dataclass\n", "class CoordTargetDataset(DatasetBase):\n", " x:List[Path]; y:List[List[float]]\n", " def __post_init__(self): assert len(self.x)==len(self.y)\n", " def __getitem__(self, i): return open_image(self.x[i]), self.y[i]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "img_ds = CoordTargetDataset(img_fns, pose_pnts)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transforms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def affine_inv_mult(c,m, shape):\n", " \"Applies the inverse affine transform described in m\"\n", " size = c.size()\n", " _,h,w = shape\n", " m[0,1] *= h/w\n", " m[1,0] *= w/h\n", " c = c.view(-1,2)\n", " a = torch.inverse(m[:2,:2].t()) \n", " c = torch.addmm(-torch.mv(a,m[:2,2]), c, a) \n", " return c.view(size)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def apply_pad_coords(c, padding_mode='reflect', **kwargs):\n", " \"Apply the padding mode to the coords\"\n", " if padding_mode=='zeros' or padding_mode=='border':\n", " mask = (c[:,0] >=-1) * (c[:,0] <=1) * (c[:,1] >=-1) * (c[:,1] <=1)\n", " return c[mask]\n", " elif padding_mode=='reflect':\n", " c[c < -1] = -2 - c[c < -1]\n", " c[c > 1] = 2 - c[c > 1]\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class ImagePoints():\n", " def __init__(self, pts, shape):\n", " self._flow=pts\n", " self._affine_mat=None\n", " self.sample_kwargs = {}\n", " self._shape = shape\n", "\n", " @property\n", " def shape(self): return self._shape\n", " \n", " def __repr__(self): return f'{self.__class__.__name__} ({self.shape})'\n", "\n", " @property\n", " def px(self): return self.flow\n", " @px.setter\n", " def px(self,v): self._px=v\n", "\n", " @property\n", " def flow(self):\n", " if self._flow is None:\n", " self._flow = affine_grid(self.shape)\n", " if self._affine_mat is not None:\n", " self._flow = affine_inv_mult(self._flow,self._affine_mat,self._shape)\n", " self._flow = apply_pad_coords(self._flow, **self.sample_kwargs)\n", " self._affine_mat = None\n", " \n", " return self._flow\n", " @flow.setter\n", " def flow(self,v): self._flow=v\n", "\n", " def lighting(self, func, *args, **kwargs): return self\n", " \n", " #TODO: special func for points\n", " def pixel(self, func, *args, **kwargs):\n", " self.px = func(self.px, *args, **kwargs)\n", " return self\n", "\n", " def coord(self, func, *args, **kwargs):\n", " self.flow = func(self.flow, self.shape, *args, invert=True, **kwargs)\n", " return self\n", "\n", " def affine(self, func, *args, **kwargs):\n", " m = func(*args, **kwargs)\n", " self.affine_mat = self.affine_mat @ self._flow.new(m)\n", " return self\n", "\n", " def set_sample(self, **kwargs):\n", " self.sample_kwargs = kwargs\n", " return self\n", "\n", " def resize(self, size):\n", " assert self._flow is None\n", " if isinstance(size, int): size=(self.shape[0], size, size)\n", " self.flow = affine_grid(size)\n", " return self\n", "\n", " @property\n", " def affine_mat(self):\n", " if self._affine_mat is None: self._affine_mat = self._flow.new(torch.eye(3))\n", " return self._affine_mat\n", " @affine_mat.setter\n", " def affine_mat(self,v): self._affine_mat=v\n", " \n", " def clone(self): return self.__class__(self.flow.clone(), self.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def apply_tfms(tfms, x, do_resolve=True, xtra=None, size=None, is_coord=False, x_sz=None, **kwargs):\n", " if not tfms: return x\n", " if not xtra: xtra={}\n", " tfms = sorted(listify(tfms), key=lambda o: o.tfm.order)\n", " if do_resolve: resolve_tfms(tfms)\n", " x = Image(x.clone()) if not is_coord else ImagePoints(x.clone(), x_sz)\n", " if kwargs: x.set_sample(**kwargs)\n", " if size: x.resize(size)\n", " for tfm in tfms:\n", " if tfm.tfm in xtra: x = tfm(x, **xtra[tfm.tfm])\n", " else: x = tfm(x)\n", " return x.px" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox')\n", "\n", "class DatasetTfm(Dataset):\n", " def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs):\n", " self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs\n", " \n", " def __len__(self): return len(self.ds)\n", " def __getattr__(self, k): return getattr(self.ds, k)\n", " \n", " def __getitem__(self,idx):\n", " if isinstance(idx, tuple): idx,xtra = idx\n", " else: xtra={}\n", " x,y = self.ds[idx]\n", " if self.tfm_y == TfmY.Coord: x_sz = x.size()\n", " x = apply_tfms(self.tfms, x, **{**self.kwargs, **xtra})\n", " if self.tfm_y == TfmY.Coord: y = apply_tfms(self.tfms, y, do_resolve=False, is_coord=True, x_sz=x_sz, \n", " **{**self.kwargs, **xtra})\n", " return x, y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [rotate(degrees=(-30,30.))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = DatasetTfm(img_ds, tfms, TfmY.Coord, padding_mode='zeros')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def show_pose(img, pnts, ax=None):\n", " if ax is None: _,ax = plt.subplots()\n", " ax.imshow(img.numpy().transpose(1,2,0))\n", " w,h = img.size(2),img.size(1)\n", " if len(pnts) != 0:\n", " pnts = (pnts.numpy() + 1) * np.array([w/2,h/2])[None]\n", " ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [zoom(scale=(1,2),row_pct=(0,1),col_pct=(0,1))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = DatasetTfm(img_ds, tfms, TfmY.Coord, padding_mode='reflect')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Affine, coords, crop transform first." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def apply_pad_coords(c, padding_mode):\n", " \"Apply the padding mode to the coords\"\n", " if padding_mode=='zeros' or padding_mode=='border':\n", " mask = (c[:,0] >=-1) * (c[:,0] <=1) * (c[:,1] >=-1) * (c[:,1] <=1)\n", " return c[mask]\n", " elif padding_mode=='reflect':\n", " c[c < -1] = -2 - c[c < -1]\n", " c[c > 1] = 2 - c[c > 1]\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def affine_inv_mult(c,m):\n", " \"Applies the inverse affine transform described in m\"\n", " size = c.size()\n", " c = c.view(-1,2)\n", " a = torch.inverse(m[:2,:2].t()) \n", " c = torch.addmm(-torch.mv(a,m[:2,2]), c, a) \n", " return c.view(size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Problem in affine: we must apply the inverse tranformation (affine_inv_mult function)\n", "- Problem in coord: we must also apply the inverse transformation (flag invert)\n", "- Probem in crop (and other transforms and the beginning): we need the size of the image where as if the object pass is a list of points, it doesn't have it. To work around this, we create a class that will wrap the list of points and the image size, with a function size (to mimic the behavior of img.size())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@dataclass \n", "class TargetCoords():\n", " coords:Tensor\n", " img_size:torch.Size\n", " \n", " def size(self, int=None) -> Union[int,torch.Size]: \n", " if int is None: return self.img_size\n", " else: return self.img_size[int]\n", " \n", " def clone(self): return TargetCoords(self.coords.clone(), self.img_size)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tc = TargetCoords(pose_pnts[0], open_image(img_fns[0]).size())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tc.size(), tc.size(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox')\n", "\n", "class DatasetTfm(Dataset):\n", " def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs):\n", " self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs\n", " \n", " def __len__(self): return len(self.ds)\n", " def __getattr__(self, k): return getattr(self.ds, k)\n", " \n", " def __getitem__(self,idx):\n", " if isinstance(idx, tuple): idx,xtra = idx\n", " else: xtra={}\n", " x,y = self.ds[idx]\n", " if self.tfm_y == TfmY.Coord: y = TargetCoords(y, x.size())\n", " x = apply_tfms(self.tfms, x, **{**self.kwargs, **xtra})\n", " if self.tfm_y == TfmY.Coord: y = apply_tfms(self.tfms, y, do_resolve=False, is_coord=coord, x_sz=x.size(), \n", " **{**self.kwargs, **xtra})\n", " return apply_tfms(self.tfms, x, **{**self.kwargs, **xtra}), y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox')\n", "\n", "class TfmDataset(Dataset):\n", " def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs):\n", " self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs\n", "\n", " def __len__(self): return len(self.ds)\n", "\n", " def __getitem__(self,idx):\n", " x,y = self.ds[idx]\n", " if self.tfms is not None:\n", " tfm = apply_tfms(self.tfms)\n", " if self.tfm_y == TfmY.Coord: y = TargetCoords(y, x.size())\n", " x = apply_tfms(self.tfms, x, **self.kwargs)\n", " if self.tfm_y != TfmY.No and y is not None:\n", " coord,seg = (self.tfm_y == TfmY.Coord),(self.tfm_y == TfmY.Mask)\n", " y = tfm(y, segmentation=seg, is_coord=coord, **self.kwargs).coords\n", " return x,y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_resize_target(img_sz, crop_target, do_crop=False):\n", " if crop_target is None: return None\n", " ch,r,c = img_sz\n", " target_r,target_c = crop_target\n", " ratio = (min if do_crop else max)(r/target_r, c/target_c)\n", " return ch,round(r/ratio),round(c/ratio)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def _apply_affine(img, size=None, padding_mode='reflect', do_crop=False, aspect=None, mult=32,\n", " mats=None, func=None, crop_func=None, is_coord=False, **kwargs):\n", " if size is not None and not is_listy(size):\n", " size = listify(size,2) if aspect is None else get_crop_target(size, aspect, mult)\n", " if (not mats) and func is None and size is None: return img\n", " resize_target = get_resize_target(img.size(), size, do_crop=do_crop)\n", " c = img.coords if is_coord else affine_grid(img, torch.eye(3), size=resize_target)\n", " if func is not None: c = func(c, img.size(), invert=is_coord)\n", " if mats:\n", " m = affines_mat(mats)\n", " c = affine_mult(c, img.new_tensor(m)) if not is_coord else affine_inv_mult(c, img.coords.new_tensor(m))\n", " if is_coord:\n", " if resize_target is None: resize_target = img.size()\n", " res = TargetCoords(apply_pad_coords(c, padding_mode=padding_mode), torch.Size(resize_target))\n", " else: \n", " res = grid_sample(img, c, padding_mode=padding_mode, **kwargs)\n", " if padding_mode=='zeros': padding_mode='constant'\n", " if crop_func is not None: res = crop_func(res, size=size, padding_mode=padding_mode, is_coord=is_coord)\n", " return res\n", "\n", "def apply_affine(mats=None, func=None, crop_func=None):\n", " return partial(_apply_affine, mats=mats, func=func, crop_func=crop_func)\n", "\n", "nb_003a.apply_affine = apply_affine" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Test on a rotation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [rotate(degrees=(-30,30.))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def show_pose(img, pnts, ax=None):\n", " if ax is None: _,ax = plt.subplots()\n", " ax.imshow(img.numpy().transpose(1,2,0))\n", " w,h = img.size(2),img.size(1)\n", " if len(pnts) != 0:\n", " pnts = (pnts.numpy() + 1) * np.array([w/2,h/2])[None]\n", " ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Test on a zoom" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [zoom_tfm(scale=(1,2),row_pct=(0,1),col_pct=(0,1))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', size=100, do_crop=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Whole pipeline: we pass is_coord as an argument to every transform." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def _apply_tfm_func(pixel_func,lighting_func,affine_func,start_func, x, segmentation=False, is_coord=False, **kwargs):\n", " if not np.any([pixel_func,lighting_func,affine_func,start_func]): return x\n", " x = x.clone()\n", " if start_func is not None: x = start_func(x, is_coord=is_coord)\n", " if affine_func is not None: x = affine_func(x, is_coord=is_coord, **kwargs)\n", " if lighting_func is not None and not segmentation and not is_coord: x = lighting_func(x)\n", " if pixel_func is not None: x = pixel_func(x, is_coord=is_coord)\n", " return x\n", "\n", "def apply_tfms(tfms):\n", " resolve_tfms(tfms)\n", " grouped_tfms = dict_groupby(listify(tfms), lambda o: o.tfm_type)\n", " start_tfms,affine_tfms,coord_tfms,pixel_tfms,lighting_tfms,crop_tfms = [\n", " (grouped_tfms.get(o)) for o in TfmType]\n", " lighting_func = apply_lighting(compose(lighting_tfms))\n", " mats = [o() for o in listify(affine_tfms)]\n", " affine_func = apply_affine(mats, func=compose(coord_tfms), crop_func=compose(crop_tfms))\n", " return partial(_apply_tfm_func,\n", " compose(pixel_tfms),lighting_func,affine_func,compose(start_tfms))\n", "\n", "nb_003a.apply_tfms = apply_tfms" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Simple padding" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def pad_coord(x:TargetCoords, row_pad:int, col_pad:int):\n", " c,h,w = x.size()\n", " pad = torch.Tensor([w/(w + 2*col_pad), h/(h + 2*row_pad)])\n", " new_sz = torch.Size([c, h+2*row_pad, w+2*col_pad]) \n", " return TargetCoords(x.coords.mul_(pad[None]), new_sz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@reg_transform\n", "def pad(x, padding, mode='reflect', is_coord=False) -> TfmType.Start:\n", " if is_coord: return pad_coord(x, padding, padding)\n", " else: return F.pad(x[None], (padding,)*4, mode=mode)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [pad_tfm(padding=100)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Not implemented: the points aren't reflected (is it important?)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Simple crop" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def crop_coords(x:TargetCoords, row:int, col:int, rows:int, cols:int):\n", " ch,h,w = x.size()\n", " c = x.coords \n", " c.mul_(torch.Tensor([w/cols,h/rows])[None])\n", " c.add_(-1 + torch.Tensor([w/cols-2*col/cols,h/rows-2*row/rows])[None])\n", " c = apply_pad_coords(c, padding_mode='zeros')\n", " new_sz = torch.Size([ch, rows, cols])\n", " return TargetCoords(c, new_sz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@reg_transform\n", "def crop(x, size, is_coord=False, row_pct:uniform=0.5, col_pct:uniform=0.5) -> TfmType.Pixel:\n", " size = listify(size,2)\n", " rows,cols = size\n", " row = int((x.size(1)-rows+1) * row_pct)\n", " col = int((x.size(2)-cols+1) * col_pct)\n", " if is_coord: return crop_coords(x,row,col,rows,cols)\n", " else: return x[:, row:row+rows, col:col+cols].contiguous()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [crop_tfm(size=200,row_pct=(0,1),col_pct=(0,1))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Crop pad transform" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@reg_transform\n", "def crop_pad(x, size, padding_mode='reflect', is_coord=False,\n", " row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Crop:\n", " size = listify(size,2)\n", " rows,cols = size\n", " if x.size(1) TfmType.Coord:\n", " orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]\n", " if direction == 0: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]\n", " elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]\n", " elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]\n", " elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]] \n", " coeffs = find_coeffs(targ_pts, orig_pts) if invert else find_coeffs(orig_pts, targ_pts)\n", " return apply_perspective(c, coeffs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [tilt_tfm(direction=(0,3), magnitude=(-0.4,0.4))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', do_crop=True, size=100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Flip" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def flip_lr_coord(x):\n", " x.coords[:,0] = -x.coords[:,0]\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@reg_transform\n", "def flip_lr(x, is_coord=False) -> TfmType.Pixel: \n", " if is_coord: return flip_lr_coord(x)\n", " else: return x.flip(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfms = [flip_lr_tfm(p=0.5)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', do_crop=True, size=100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_,axs = plt.subplots(1,4, figsize=(12,6))\n", "for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }