{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp vision.data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from fastai2.torch_basics import *\n", "from fastai2.data.all import *\n", "from fastai2.vision.core import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide\n", "from nbdev.showdoc import *\n", "# from fastai2.vision.augment import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Vision data\n", "\n", "> Helper functions to get data in a `DataLoaders` in the vision application and higher class `ImageDataLoaders`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The main classes defined in this module are `ImageDataLoaders` and `SegmentationDataLoaders`, so you probably want to jump to their definitions. They provide factory methods that are a great way to quickly get your data ready for training, see the [vision tutorial](http://dev.fast.ai/tutorial.vision) for examples." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helper functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "@delegates(subplots)\n", "def get_grid(n, nrows=None, ncols=None, add_vert=0, figsize=None, double=False, title=None, return_fig=False, **kwargs):\n", " \"Return a grid of `n` axes, `rows` by `cols`\"\n", " nrows = nrows or int(math.sqrt(n))\n", " ncols = ncols or int(np.ceil(n/nrows))\n", " if double: ncols*=2 ; n*=2\n", " fig,axs = subplots(nrows, ncols, figsize=figsize, **kwargs)\n", " axs = [ax if iPointBlock[source]\n", "\n", "A [`TransformBlock`](/data.block#TransformBlock) for points in an image" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(PointBlock, name='PointBlock')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

BBoxBlock[source]

\n", "\n", "A [`TransformBlock`](/data.block#TransformBlock) for bounding boxes in an image" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(BBoxBlock, name='BBoxBlock')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def BBoxLblBlock(vocab=None, add_na=True):\n", " \"A `TransformBlock` for labeled bounding boxes, potentially with `vocab`\"\n", " return TransformBlock(type_tfms=MultiCategorize(vocab=vocab, add_na=add_na), item_tfms=BBoxLabeler)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If `add_na` is `True`, a new category is added for NaN (that will represent the background class)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ImageDataLoaders -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class ImageDataLoaders(DataLoaders):\n", " \"Basic wrapper around several `DataLoader`s with factory methods for computer vision problems\"\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_folder(cls, path, train='train', valid='valid', valid_pct=None, seed=None, vocab=None, item_tfms=None,\n", " batch_tfms=None, **kwargs):\n", " \"Create from imagenet style dataset in `path` with `train` and `valid` subfolders (or provide `valid_pct`)\"\n", " splitter = GrandparentSplitter(train_name=train, valid_name=valid) if valid_pct is None else RandomSplitter(valid_pct, seed=seed)\n", " get_items = get_image_files if valid_pct else partial(get_image_files, folders=[train, valid])\n", " dblock = DataBlock(blocks=(ImageBlock, CategoryBlock(vocab=vocab)),\n", " get_items=get_items,\n", " splitter=splitter,\n", " get_y=parent_label,\n", " item_tfms=item_tfms,\n", " batch_tfms=batch_tfms)\n", " return cls.from_dblock(dblock, path, path=path, **kwargs)\n", "\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_path_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None, item_tfms=None, batch_tfms=None, **kwargs):\n", " \"Create from list of `fnames` in `path`s with `label_func`\"\n", " dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),\n", " splitter=RandomSplitter(valid_pct, seed=seed),\n", " get_y=label_func,\n", " item_tfms=item_tfms,\n", " batch_tfms=batch_tfms)\n", " return cls.from_dblock(dblock, fnames, path=path, **kwargs)\n", "\n", " @classmethod\n", " def from_name_func(cls, path, fnames, label_func, **kwargs):\n", " \"Create from the name attrs of `fnames` in `path`s with `label_func`\"\n", " f = using_attr(label_func, 'name')\n", " return cls.from_path_func(path, fnames, f, **kwargs)\n", "\n", " @classmethod\n", " def from_path_re(cls, path, fnames, pat, **kwargs):\n", " \"Create from list of `fnames` in `path`s with re expression `pat`\"\n", " return cls.from_path_func(path, fnames, RegexLabeller(pat), **kwargs)\n", "\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_name_re(cls, path, fnames, pat, **kwargs):\n", " \"Create from the name attrs of `fnames` in `path`s with re expression `pat`\"\n", " return cls.from_name_func(path, fnames, RegexLabeller(pat), **kwargs)\n", "\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_df(cls, df, path='.', valid_pct=0.2, seed=None, fn_col=0, folder=None, suff='', label_col=1, label_delim=None,\n", " y_block=None, valid_col=None, item_tfms=None, batch_tfms=None, **kwargs):\n", " \"Create from `df` using `fn_col` and `label_col`\"\n", " pref = f'{Path(path) if folder is None else Path(path)/folder}{os.path.sep}'\n", " if y_block is None:\n", " is_multi = (is_listy(label_col) and len(label_col) > 1) or label_delim is not None\n", " y_block = MultiCategoryBlock if is_multi else CategoryBlock\n", " splitter = RandomSplitter(valid_pct, seed=seed) if valid_col is None else ColSplitter(valid_col)\n", " dblock = DataBlock(blocks=(ImageBlock, y_block),\n", " get_x=ColReader(fn_col, pref=pref, suff=suff),\n", " get_y=ColReader(label_col, label_delim=label_delim),\n", " splitter=splitter,\n", " item_tfms=item_tfms,\n", " batch_tfms=batch_tfms)\n", " return cls.from_dblock(dblock, df, path=path, **kwargs)\n", "\n", " @classmethod\n", " def from_csv(cls, path, csv_fname='labels.csv', header='infer', delimiter=None, **kwargs):\n", " \"Create from `path/csv_fname` using `fn_col` and `label_col`\"\n", " df = pd.read_csv(Path(path)/csv_fname, header=header, delimiter=delimiter)\n", " return cls.from_df(df, path=path, **kwargs)\n", "\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_lists(cls, path, fnames, labels, valid_pct=0.2, seed:int=None, y_block=None, item_tfms=None, batch_tfms=None,\n", " **kwargs):\n", " \"Create from list of `fnames` and `labels` in `path`\"\n", " if y_block is None:\n", " y_block = MultiCategoryBlock if is_listy(labels[0]) and len(labels[0]) > 1 else (\n", " RegressionBlock if isinstance(labels[0], float) else CategoryBlock)\n", " dblock = DataBlock.from_columns(blocks=(ImageBlock, y_block),\n", " splitter=RandomSplitter(valid_pct, seed=seed),\n", " item_tfms=item_tfms,\n", " batch_tfms=batch_tfms)\n", " return cls.from_dblock(dblock, (fnames, labels), path=path, **kwargs)\n", "\n", "ImageDataLoaders.from_csv = delegates(to=ImageDataLoaders.from_df)(ImageDataLoaders.from_csv)\n", "ImageDataLoaders.from_name_func = delegates(to=ImageDataLoaders.from_path_func)(ImageDataLoaders.from_name_func)\n", "ImageDataLoaders.from_path_re = delegates(to=ImageDataLoaders.from_path_func)(ImageDataLoaders.from_path_re)\n", "ImageDataLoaders.from_name_re = delegates(to=ImageDataLoaders.from_name_func)(ImageDataLoaders.from_name_re)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This class should not be used directly, one of the factory methods should be prefered instead. All those factory methods accept as arguments:\n", "\n", "- `item_tfms`: one or several transforms applied to the items before batching them\n", "- `batch_tfms`: one or several transforms applied to the batches once they are formed\n", "- `bs`: the batch size\n", "- `val_bs`: the batch size for the validation `DataLoader` (defaults to `bs`)\n", "- `shuffle_train`: if we shuffle the training `DataLoader` or not\n", "- `device`: the PyTorch device to use (defaults to `default_device()`)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_folder[source]

\n", "\n", "> ImageDataLoaders.from_folder(**`path`**, **`train`**=*`'train'`*, **`valid`**=*`'valid'`*, **`valid_pct`**=*`None`*, **`seed`**=*`None`*, **`vocab`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from imagenet style dataset in `path` with `train` and `valid` subfolders (or provide `valid_pct`)" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_folder)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If `valid_pct` is provided, a random split is performed (with an optional `seed`) by setting aside that percentage of the data for the validation set (instead of looking at the grandparents folder). If a `vocab` is passed, only the folders with names in `vocab` are kept.\n", "\n", "Here is an example loading a subsample of MNIST:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.MNIST_TINY)\n", "dls = ImageDataLoaders.from_folder(path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Passing `valid_pct` will ignore the valid/train folders and do a new random split:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Path('/home/jhoward/.fastai/data/mnist_tiny/test/5071.png'),\n", " Path('/home/jhoward/.fastai/data/mnist_tiny/train/3/8684.png'),\n", " Path('/home/jhoward/.fastai/data/mnist_tiny/train/3/8188.png')]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dls = ImageDataLoaders.from_folder(path, valid_pct=0.2)\n", "dls.valid_ds.items[:3]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_path_func[source]

\n", "\n", "> ImageDataLoaders.from_path_func(**`path`**, **`fnames`**, **`label_func`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from list of `fnames` in `path`s with `label_func`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_path_func)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random `subset` of `valid_pct`, optionally created with `seed` for reproducibility.\n", "\n", "Here is how to create the same `DataLoaders` on the MNIST dataset as the previous example with a `label_func`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fnames = get_image_files(path)\n", "def label_func(x): return x.parent.name\n", "dls = ImageDataLoaders.from_path_func(path, fnames, label_func)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here is another example on the pets dataset. Here filenames are all in an \"images\" folder and their names have the form `class_name_123.jpg`. One way to properly label them is thus to throw away everything after the last `_`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_path_re[source]

\n", "\n", "> ImageDataLoaders.from_path_re(**`path`**, **`fnames`**, **`pat`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from list of `fnames` in `path`s with re expression `pat`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_path_re)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility.\n", "\n", "Here is how to create the same `DataLoaders` on the MNIST dataset as the previous example (you will need to change the initial two / by a \\ on Windows):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pat = r'/([^/]*)/\\d+.png$'\n", "dls = ImageDataLoaders.from_path_re(path, fnames, pat)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_name_func[source]

\n", "\n", "> ImageDataLoaders.from_name_func(**`path`**, **`fnames`**, **`label_func`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from the name attrs of `fnames` in `path`s with `label_func`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_name_func)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility. This method does the same as `ImageDataLoaders.from_path_func` except `label_func` is applied to the name of each filenames, and not the full path." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_name_re[source]

\n", "\n", "> ImageDataLoaders.from_name_re(**`path`**, **`fnames`**, **`pat`**, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from the name attrs of `fnames` in `path`s with re expression `pat`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_name_re)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility. This method does the same as `ImageDataLoaders.from_path_re` except `pat` is applied to the name of each filenames, and not the full path." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_df[source]

\n", "\n", "> ImageDataLoaders.from_df(**`df`**, **`path`**=*`'.'`*, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`fn_col`**=*`0`*, **`folder`**=*`None`*, **`suff`**=*`''`*, **`label_col`**=*`1`*, **`label_delim`**=*`None`*, **`y_block`**=*`None`*, **`valid_col`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from `df` using `fn_col` and `label_col`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility. Alternatively, if your `df` contains a `valid_col`, give its name or its index to that argument (the column should have `True` for the elements going to the validation set). \n", "\n", "You can add an additional `folder` to the filenames in `df` if they should not be concatenated directly to `path`. If they do not contain the proper extensions, you can add `suff`. If your label column contains multiple labels on each row, you can use `label_delim` to warn the library you have a multi-label problem. \n", "\n", "`y_block` should be passed when the task automatically picked by the library is wrong, you should then give `CategoryBlock`, `MultiCategoryBlock` or `RegressionBlock`. For more advanced uses, you should use the data block API.\n", "\n", "The tiny mnist example from before also contains a version in a dataframe:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namelabel
0train/3/7463.png3
1train/3/9829.png3
2train/3/7881.png3
3train/3/8065.png3
4train/3/7046.png3
\n", "
" ], "text/plain": [ " name label\n", "0 train/3/7463.png 3\n", "1 train/3/9829.png 3\n", "2 train/3/7881.png 3\n", "3 train/3/8065.png 3\n", "4 train/3/7046.png 3" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = untar_data(URLs.MNIST_TINY)\n", "df = pd.read_csv(path/'labels.csv')\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here is how to load it using `ImageDataLoaders.from_df`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dls = ImageDataLoaders.from_df(df, path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here is another example with a multi-label problem:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fnamelabelsis_valid
0000005.jpgchairTrue
1000007.jpgcarTrue
2000009.jpghorse personTrue
3000012.jpgcarFalse
4000016.jpgbicycleTrue
\n", "
" ], "text/plain": [ " fname labels is_valid\n", "0 000005.jpg chair True\n", "1 000007.jpg car True\n", "2 000009.jpg horse person True\n", "3 000012.jpg car False\n", "4 000016.jpg bicycle True" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = untar_data(URLs.PASCAL_2007)\n", "df = pd.read_csv(path/'train.csv')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dls = ImageDataLoaders.from_df(df, path, folder='train', valid_col='is_valid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that can also pass `2` to valid_col (the index, starting with 0)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_csv[source]

\n", "\n", "> ImageDataLoaders.from_csv(**`path`**, **`csv_fname`**=*`'labels.csv'`*, **`header`**=*`'infer'`*, **`delimiter`**=*`None`*, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`fn_col`**=*`0`*, **`folder`**=*`None`*, **`suff`**=*`''`*, **`label_col`**=*`1`*, **`label_delim`**=*`None`*, **`y_block`**=*`None`*, **`valid_col`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from `path/csv_fname` using `fn_col` and `label_col`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_csv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Same as `ImageDataLoaders.from_df` after loading the file with `header` and `delimiter`.\n", "\n", "Here is how to load the same dataset as before with this method:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dls = ImageDataLoaders.from_csv(path, 'train.csv', folder='train', valid_col='is_valid')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

ImageDataLoaders.from_lists[source]

\n", "\n", "> ImageDataLoaders.from_lists(**`path`**, **`fnames`**, **`labels`**, **`valid_pct`**=*`0.2`*, **`seed`**:`int`=*`None`*, **`y_block`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from list of `fnames` and `labels` in `path`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(ImageDataLoaders.from_lists)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility. `y_block` can be passed to specify the type of the targets." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.PETS)\n", "fnames = get_image_files(path/\"images\")\n", "labels = ['_'.join(x.name.split('_')[:-1]) for x in fnames]\n", "dls = ImageDataLoaders.from_lists(path, fnames, labels)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class SegmentationDataLoaders(DataLoaders):\n", " \"Basic wrapper around several `DataLoader`s with factory methods for segmentation problems\"\n", " @classmethod\n", " @delegates(DataLoaders.from_dblock)\n", " def from_label_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None, codes=None, item_tfms=None, batch_tfms=None, **kwargs):\n", " \"Create from list of `fnames` in `path`s with `label_func`.\"\n", " dblock = DataBlock(blocks=(ImageBlock, MaskBlock(codes=codes)),\n", " splitter=RandomSplitter(valid_pct, seed=seed),\n", " get_y=label_func,\n", " item_tfms=item_tfms,\n", " batch_tfms=batch_tfms)\n", " res = cls.from_dblock(dblock, fnames, path=path, **kwargs)\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "

SegmentationDataLoaders.from_label_func[source]

\n", "\n", "> SegmentationDataLoaders.from_label_func(**`path`**, **`fnames`**, **`label_func`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`codes`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`None`*)\n", "\n", "Create from list of `fnames` in `path`s with `label_func`." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_doc(SegmentationDataLoaders.from_label_func)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The validation set is a random subset of `valid_pct`, optionally created with `seed` for reproducibility. `codes` contain the mapping index to label." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.CAMVID_TINY)\n", "fnames = get_image_files(path/'images')\n", "def label_func(x): return path/'labels'/f'{x.stem}_P{x.suffix}'\n", "codes = np.loadtxt(path/'codes.txt', dtype=str)\n", " \n", "dls = SegmentationDataLoaders.from_label_func(path, fnames, label_func, codes=codes)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Export -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Converted 00_torch_core.ipynb.\n", "Converted 01_layers.ipynb.\n", "Converted 02_data.load.ipynb.\n", "Converted 03_data.core.ipynb.\n", "Converted 04_data.external.ipynb.\n", "Converted 05_data.transforms.ipynb.\n", "Converted 06_data.block.ipynb.\n", "Converted 07_vision.core.ipynb.\n", "Converted 08_vision.data.ipynb.\n", "Converted 09_vision.augment.ipynb.\n", "Converted 09b_vision.utils.ipynb.\n", "Converted 09c_vision.widgets.ipynb.\n", "Converted 10_tutorial.pets.ipynb.\n", "Converted 11_vision.models.xresnet.ipynb.\n", "Converted 12_optimizer.ipynb.\n", "Converted 13_callback.core.ipynb.\n", "Converted 13a_learner.ipynb.\n", "Converted 13b_metrics.ipynb.\n", "Converted 14_callback.schedule.ipynb.\n", "Converted 14a_callback.data.ipynb.\n", "Converted 15_callback.hook.ipynb.\n", "Converted 15a_vision.models.unet.ipynb.\n", "Converted 16_callback.progress.ipynb.\n", "Converted 17_callback.tracker.ipynb.\n", "Converted 18_callback.fp16.ipynb.\n", "Converted 18a_callback.training.ipynb.\n", "Converted 19_callback.mixup.ipynb.\n", "Converted 20_interpret.ipynb.\n", "Converted 20a_distributed.ipynb.\n", "Converted 21_vision.learner.ipynb.\n", "Converted 22_tutorial.imagenette.ipynb.\n", "Converted 23_tutorial.vision.ipynb.\n", "Converted 24_tutorial.siamese.ipynb.\n", "Converted 24_vision.gan.ipynb.\n", "Converted 30_text.core.ipynb.\n", "Converted 31_text.data.ipynb.\n", "Converted 32_text.models.awdlstm.ipynb.\n", "Converted 33_text.models.core.ipynb.\n", "Converted 34_callback.rnn.ipynb.\n", "Converted 35_tutorial.wikitext.ipynb.\n", "Converted 36_text.models.qrnn.ipynb.\n", "Converted 37_text.learner.ipynb.\n", "Converted 38_tutorial.text.ipynb.\n", "Converted 39_tutorial.transformers.ipynb.\n", "Converted 40_tabular.core.ipynb.\n", "Converted 41_tabular.data.ipynb.\n", "Converted 42_tabular.model.ipynb.\n", "Converted 43_tabular.learner.ipynb.\n", "Converted 44_tutorial.tabular.ipynb.\n", "Converted 45_collab.ipynb.\n", "Converted 46_tutorial.collab.ipynb.\n", "Converted 50_tutorial.datablock.ipynb.\n", "Converted 60_medical.imaging.ipynb.\n", "Converted 61_tutorial.medical_imaging.ipynb.\n", "Converted 65_medical.text.ipynb.\n", "Converted 70_callback.wandb.ipynb.\n", "Converted 71_callback.tensorboard.ipynb.\n", "Converted 72_callback.neptune.ipynb.\n", "Converted 73_callback.captum.ipynb.\n", "Converted 74_callback.cutmix.ipynb.\n", "Converted 97_test_utils.ipynb.\n", "Converted 99_pytorch_doc.ipynb.\n", "Converted index.ipynb.\n", "Converted tutorial.ipynb.\n" ] } ], "source": [ "#hide\n", "from nbdev.export import notebook2script\n", "notebook2script()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "split_at_heading": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }