{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp vision.data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from local.test import *\n", "from local.torch_basics import *\n", "from local.data.all import *\n", "\n", "from local.vision.core import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from local.notebook.showdoc import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Vision data\n", "\n", "> Helper functions to get data in a `DataBunch` un the vision applicaiton and higher class `ImageDataBunch`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ImageDataBunch -" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class ImageDataBunch(DataBunch): \n", " \n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_folder(cls, path, train='train', valid='valid', valid_pct=None, seed=None, vocab=None, **kwargs):\n", " \"Create from imagenet style dataset in `path` with `train`,`valid`,`test` subfolders (or provide `valid_pct`).\"\n", " splitter = GrandparentSplitter(train_name=train, valid_name=valid) if valid_pct is None else RandomSplitter(valid_pct, seed=seed)\n", " dblock = DataBlock(blocks=(ImageBlock, CategoryBlock(vocab=vocab)),\n", " get_items=get_image_files,\n", " splitter=splitter,\n", " get_y=parent_label)\n", " return cls.from_dblock(dblock, path, path=path, **kwargs)\n", "\n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_name_func(cls, path, fnames, label_func, valid_pct=0.2, seed=None, **kwargs):\n", " \"Create from list of `fnames` in `path`s with `label_func`.\"\n", " dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),\n", " splitter=RandomSplitter(valid_pct, seed=seed),\n", " get_y=label_func)\n", " return cls.from_dblock(dblock, fnames, path=path, **kwargs)\n", " \n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_name_re(cls, path, fnames, pat, **kwargs):\n", " \"Create from list of `fnames` in `path`s with re expression `pat`.\"\n", " return cls.from_name_func(path, fnames, RegexLabeller(pat), **kwargs)\n", " \n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_df(cls, df, path='.', valid_pct=0.2, seed=None, fn_col=0, folder=None, suff='', label_col=1, label_delim=None, y_block=None, **kwargs):\n", " pref = f'{Path(path) if folder is None else Path(path)/folder}{os.path.sep}'\n", " if y_block is None: y_block = MultiCategoryBlock if is_listy(label_col) and len(label_col) > 1 else CategoryBlock\n", " dblock = DataBlock(blocks=(ImageBlock, y_block),\n", " get_x=ColReader(fn_col, pref=pref, suff=suff),\n", " get_y=ColReader(label_col, label_delim=label_delim),\n", " splitter=RandomSplitter(valid_pct, seed=seed))\n", " return cls.from_dblock(dblock, df, path=path, **kwargs)\n", " \n", " @classmethod\n", " def from_csv(cls, path, csv_fname='labels.csv', header='infer', delimiter=None, **kwargs):\n", " df = pd.read_csv(Path(path)/csv_fname, header=header, delimiter=delimiter)\n", " return cls.from_df(df, path=path, **kwargs)\n", " \n", " @classmethod\n", " @delegates(DataBunch.from_dblock)\n", " def from_lists(cls, path, fnames, labels, valid_pct=0.2, seed:int=None, y_block=None, **kwargs):\n", " \"Create from list of `fnames` in `path`.\"\n", " if y_block is None:\n", " y_block = MultiCategoryBlock if is_listy(labels[0]) and len(labels[0]) > 1 else (TransformBlock if isinstance(labels[0], float) else CategoryBlock)\n", " dblock = DataBlock(blocks=(ImageBlock, y_block),\n", " splitter=RandomSplitter(valid_pct, seed=seed))\n", " return cls.from_dblock(dblock, (fnames, labels), path=path, **kwargs)\n", " \n", "ImageDataBunch.from_csv = delegates(to=ImageDataBunch.from_df)(ImageDataBunch.from_csv)\n", "ImageDataBunch.from_name_re = delegates(to=ImageDataBunch.from_name_func)(ImageDataBunch.from_name_re)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "
ImageDataBunch.from_folder
[source]ImageDataBunch.from_folder
(**`path`**, **`train`**=*`'train'`*, **`valid`**=*`'valid'`*, **`valid_pct`**=*`None`*, **`seed`**=*`None`*, **`vocab`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n",
"Create from imagenet style dataset in `path` with `train`,`valid`,[`test`](/test.html) subfolders (or provide `valid_pct`)."
],
"text/plain": [
"ImageDataBunch.from_name_func
[source]ImageDataBunch.from_name_func
(**`path`**, **`fnames`**, **`label_func`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n",
"Create from list of `fnames` in `path`s with `label_func`."
],
"text/plain": [
"ImageDataBunch.from_name_re
[source]ImageDataBunch.from_name_re
(**`path`**, **`fnames`**, **`pat`**, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n",
"Create from list of `fnames` in `path`s with re expression `pat`."
],
"text/plain": [
"ImageDataBunch.from_df
[source]ImageDataBunch.from_df
(**`df`**, **`path`**=*`'.'`*, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`fn_col`**=*`0`*, **`pref`**=*`''`*, **`suff`**=*`''`*, **`label_col`**=*`1`*, **`label_delim`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n"
],
"text/plain": [
"ImageDataBunch.from_csv
[source]ImageDataBunch.from_csv
(**`path`**, **`csv_fname`**=*`'labels.csv'`*, **`header`**=*`'infer'`*, **`delimiter`**=*`None`*, **`valid_pct`**=*`0.2`*, **`seed`**=*`None`*, **`fn_col`**=*`0`*, **`pref`**=*`''`*, **`suff`**=*`''`*, **`label_col`**=*`1`*, **`label_delim`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n"
],
"text/plain": [
"ImageDataBunch.from_lists
[source]ImageDataBunch.from_lists
(**`path`**, **`fnames`**, **`labels`**, **`valid_pct`**=*`0.2`*, **`seed`**:`int`=*`None`*, **`y_block`**=*`None`*, **`type_tfms`**=*`None`*, **`item_tfms`**=*`None`*, **`batch_tfms`**=*`None`*, **`bs`**=*`16`*, **`shuffle`**=*`False`*, **`num_workers`**=*`None`*, **`pin_memory`**=*`False`*, **`timeout`**=*`0`*, **`drop_last`**=*`False`*, **`indexed`**=*`None`*, **`n`**=*`None`*, **`wif`**=*`None`*, **`before_iter`**=*`None`*, **`create_batches`**=*`None`*, **`create_item`**=*`None`*, **`after_item`**=*`None`*, **`before_batch`**=*`None`*, **`create_batch`**=*`None`*, **`retain`**=*`None`*, **`after_batch`**=*`None`*, **`after_iter`**=*`None`*)\n",
"\n",
"Create from list of `fnames` in `path`."
],
"text/plain": [
"