{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Installing `fastai` and version info:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Name: fastai\n",
"Version: 0.0.17\n",
"Summary: Version 2 of the fastai library\n",
"Home-page: https://github.com/fastai/fastai\n",
"Author: Jeremy Howard, Sylvain Gugger, and contributors\n",
"Author-email: info@fast.ai\n",
"License: Apache Software License 2.0\n",
"Location: /usr/local/lib/python3.6/dist-packages\n",
"Requires: requests, pillow, torchvision, pandas, spacy, pyyaml, fastprogress, scipy, scikit-learn, fastcore, torch, matplotlib\n",
"Required-by: \n",
"---\n",
"Name: fastcore\n",
"Version: 0.1.17\n",
"Summary: Python supercharged for fastai development\n",
"Home-page: https://github.com/fastai/fastcore\n",
"Author: Jeremy Howard and Sylvain Gugger\n",
"Author-email: infos@fast.ai\n",
"License: Apache Software License 2.0\n",
"Location: /usr/local/lib/python3.6/dist-packages\n",
"Requires: numpy, dataclasses\n",
"Required-by: fastai\n"
]
}
],
"source": [
"pip install fastai --quiet\n",
"pip show fastai fastcore"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Library Imports & Pre-Process"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fastai.tabular.all import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.ADULT_SAMPLE)\n",
"df = pd.read_csv(path/'adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
"cont_names = ['age', 'fnlwgt', 'education-num']\n",
"procs = [Categorify, FillMissing, Normalize]\n",
"y_names = 'salary'\n",
"splits = RandomSplitter()(range_of(df))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `TabularPandas`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,\n",
" y_names=y_names, splits=splits)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Raw x's and y's:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" workclass | \n",
" education | \n",
" marital-status | \n",
" occupation | \n",
" relationship | \n",
" race | \n",
" education-num_na | \n",
" age | \n",
" fnlwgt | \n",
" education-num | \n",
"
\n",
" \n",
" \n",
" \n",
" | 27147 | \n",
" 5 | \n",
" 8 | \n",
" 5 | \n",
" 14 | \n",
" 2 | \n",
" 5 | \n",
" 1 | \n",
" -0.631822 | \n",
" -0.972539 | \n",
" 0.751850 | \n",
"
\n",
" \n",
" | 20557 | \n",
" 5 | \n",
" 12 | \n",
" 1 | \n",
" 11 | \n",
" 2 | \n",
" 5 | \n",
" 1 | \n",
" -0.705017 | \n",
" -1.500515 | \n",
" -0.424423 | \n",
"
\n",
" \n",
" | 5537 | \n",
" 5 | \n",
" 10 | \n",
" 3 | \n",
" 13 | \n",
" 1 | \n",
" 5 | \n",
" 1 | \n",
" 0.026942 | \n",
" -0.122164 | \n",
" 1.143940 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" workclass education marital-status ... age fnlwgt education-num\n",
"27147 5 8 5 ... -0.631822 -0.972539 0.751850\n",
"20557 5 12 1 ... -0.705017 -1.500515 -0.424423\n",
"5537 5 10 3 ... 0.026942 -0.122164 1.143940\n",
"\n",
"[3 rows x 10 columns]"
]
},
"execution_count": null,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"to.train.xs.iloc[:3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" | 23736 | \n",
" 0 | \n",
"
\n",
" \n",
" | 24771 | \n",
" 0 | \n",
"
\n",
" \n",
" | 6144 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" salary\n",
"23736 0\n",
"24771 0\n",
"6144 0"
]
},
"execution_count": null,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"to.train.ys.iloc[:3]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Utility Functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.utils import io as io_p"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_b_w(t):\n",
" best = round(t.best*1000, 2)\n",
" worst = round(t.worst*1000, 2)\n",
" return best, worst"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_avg(a, b, dl):\n",
" best = round(a/len(dl), 2)\n",
" worst = round(b/len(dl), 2)\n",
" return best, worst"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Standard `fastai` DataLoader"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dls = to.dataloaders(bs=128, device='cpu')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CPU"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = NumPy\n",
"Device = cpu\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 19.43ms, Worst: 20.96ms\n",
"\t`valid`: Best: 3.59ms, Worst: 3.69ms\n",
"All Batches:\n",
"\t`train`: Best: 703.5ms, Worst: 726.88ms\n",
"\t`valid`: Best: 170.05ms, Worst: 176.59ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 3.47ms/batch, Worst: 3.58ms/batch\n",
"\t`valid`: Best: 3.33ms/batch, Worst: 3.46ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CUDA"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dls.device = 'cuda'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = fastai\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = fastai\n",
"Device = cuda\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 20.0ms, Worst: 2852.74ms\n",
"\t`valid`: Best: 3.6ms, Worst: 4.01ms\n",
"All Batches:\n",
"\t`train`: Best: 711.13ms, Worst: 723.7ms\n",
"\t`valid`: Best: 174.96ms, Worst: 180.12ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 3.5ms/batch, Worst: 3.57ms/batch\n",
"\t`valid`: Best: 3.43ms/batch, Worst: 3.53ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Building a Custom `DL`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TabDataset():\n",
" \"A `NumPy` dataset from a `TabularPandas` object\"\n",
" def __init__(self, to):\n",
" self.cats = to.cats.to_numpy().astype(np.long)\n",
" self.conts = to.conts.to_numpy().astype(np.float32)\n",
" self.ys = to.ys.to_numpy()\n",
"\n",
" def __getitem__(self, idx):\n",
" idx = idx[0]\n",
" return self.cats[idx:idx+self.bs], self.conts[idx:idx+self.bs], self.ys[idx:idx+self.bs]\n",
"\n",
" def __len__(self): return len(self.cats)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_ds = TabDataset(to.train)\n",
"valid_ds = TabDataset(to.valid)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TabDataLoader(DataLoader):\n",
" def __init__(self, dataset, bs=1, num_workers=0, device='cuda', shuffle=False, **kwargs):\n",
" \"A `DataLoader` based on a `TabDataset`\"\n",
" super().__init__(dataset, bs=bs, num_workers=num_workers, shuffle=shuffle, \n",
" device=device, drop_last=shuffle, **kwargs)\n",
" self.dataset.bs=bs\n",
" \n",
" def create_item(self, s): return s\n",
"\n",
" def create_batch(self, b):\n",
" cat, cont, y = self.dataset[b]\n",
" return tensor(cat).to(self.device), tensor(cont).to(self.device), tensor(y).to(self.device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_dl = TabDataLoader(train_ds, bs=128, shuffle=False)\n",
"valid_dl = TabDataLoader(train_ds, bs=128, shuffle=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CPU"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dls = DataLoaders(train_dl, valid_dl, device='cpu')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = NumPy\n",
"Device = cpu\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 0.89ms, Worst: 1.34ms\n",
"\t`valid`: Best: 0.9ms, Worst: 1.03ms\n",
"All Batches:\n",
"\t`train`: Best: 31.86ms, Worst: 34.0ms\n",
"\t`valid`: Best: 32.57ms, Worst: 40.9ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 0.16ms/batch, Worst: 0.17ms/batch\n",
"\t`valid`: Best: 0.16ms/batch, Worst: 0.2ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CUDA"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dls.device = 'cuda'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = NumPy\n",
"Device = cuda\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 1.03ms, Worst: 1.7ms\n",
"\t`valid`: Best: 1.03ms, Worst: 1.13ms\n",
"All Batches:\n",
"\t`train`: Best: 52.53ms, Worst: 54.39ms\n",
"\t`valid`: Best: 53.38ms, Worst: 60.92ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 0.26ms/batch, Worst: 0.27ms/batch\n",
"\t`valid`: Best: 0.26ms/batch, Worst: 0.3ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# The `shuffle_fn`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Don't run\n",
"def shuffle_fn(self, idxs): return self.rng.sample(idxs, len(idxs))\n",
"def randomize(self): self.rng = random.Random(self.rng.randint(0,2**32-1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@patch\n",
"def shuffle_fn(x:TabDataLoader):\n",
" \"Shuffle the interior dataset\"\n",
" rng = np.random.permutation(len(x.dataset))\n",
" x.dataset.cats = x.dataset.cats[rng]\n",
" x.dataset.conts = x.dataset.conts[rng]\n",
" x.dataset.ys = x.dataset.ys[rng]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# `get_idxs`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Don't run\n",
"def get_idxs(self):\n",
" idxs = Inf.count if self.indexed else Inf.nones\n",
" if self.n is not None: idxs = list(itertools.islice(idxs, self.n))\n",
" if self.shuffle: idxs = self.shuffle_fn(idxs)\n",
" return idxs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@patch\n",
"def get_idxs(x:TabDataLoader):\n",
" \"Get index's to select\"\n",
" idxs = Inf.count if x.indexed else Inf.nones\n",
" if x.n is not None: idxs = list(range(len(x.dataset)))\n",
" if x.shuffle: x.shuffle_fn()\n",
" return idxs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Final Timings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_dl = TabDataLoader(train_ds, shuffle=True, bs=128)\n",
"valid_dl = TabDataLoader(valid_ds, bs=128)\n",
"dls = DataLoaders(train_dl, valid_dl, device='cpu')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CPU"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"dls_f = to.dataloaders(bs=128, device='cpu')\n",
"print(f'Type = fastai\\nDevice = {dls_f.device}\\nBatch Size = {dls_f.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls_f.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls_f.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls_f.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls_f.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls_f.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls_f.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out2 = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = NumPy\n",
"Device = cpu\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 2.4ms, Worst: 4.49ms\n",
"\t`valid`: Best: 0.3ms, Worst: 0.44ms\n",
"All Batches:\n",
"\t`train`: Best: 33.32ms, Worst: 39.55ms\n",
"\t`valid`: Best: 7.84ms, Worst: 8.0ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 0.16ms/batch, Worst: 0.19ms/batch\n",
"\t`valid`: Best: 0.15ms/batch, Worst: 0.16ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = fastai\n",
"Device = cpu\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 18.73ms, Worst: 22.48ms\n",
"\t`valid`: Best: 3.51ms, Worst: 3.67ms\n",
"All Batches:\n",
"\t`train`: Best: 683.69ms, Worst: 693.98ms\n",
"\t`valid`: Best: 163.9ms, Worst: 178.87ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 3.37ms/batch, Worst: 3.42ms/batch\n",
"\t`valid`: Best: 3.21ms/batch, Worst: 3.51ms/batch\n",
"\n"
]
}
],
"source": [
"print(out2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hidden"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dls.device = 'cuda'\n",
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# We're going to redirect where print goes to\n",
"old_stdout = sys.stdout\n",
"new_stdout = io.StringIO()\n",
"sys.stdout = new_stdout\n",
"dls_f.device = 'cuda'\n",
"print(f'Type = fastai\\nDevice = {dls_f.device}\\nBatch Size = {dls_f.bs}') # Print device and batch size\n",
"with io_p.capture_output() as captured: # Hide %timeit output\n",
" t = %timeit -o next(iter(dls_f.train)) # Time getting first batch\n",
" best, worst = get_b_w(t) # Round\n",
"print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o next(iter(dls_f.valid))\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls_f.train: pass # Time going over all batches\n",
" best, worst = get_b_w(t)\n",
"print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
"b_t,w_t = get_avg(best, worst, dls_f.train)\n",
"with io_p.capture_output() as captured:\n",
" t = %timeit -o for _ in dls_f.valid: pass\n",
" best, worst = get_b_w(t)\n",
"print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
"b_v,w_v = get_avg(best, worst, dls_f.valid)\n",
"print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
"print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
"out2 = new_stdout.getvalue()\n",
"sys.stdout = old_stdout"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = NumPy\n",
"Device = cuda\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 2.5ms, Worst: 9.24ms\n",
"\t`valid`: Best: 0.44ms, Worst: 0.62ms\n",
"All Batches:\n",
"\t`train`: Best: 48.83ms, Worst: 49.51ms\n",
"\t`valid`: Best: 13.04ms, Worst: 13.57ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 0.24ms/batch, Worst: 0.24ms/batch\n",
"\t`valid`: Best: 0.26ms/batch, Worst: 0.27ms/batch\n",
"\n"
]
}
],
"source": [
"print(out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type = fastai\n",
"Device = cuda\n",
"Batch Size = 128\n",
"First Batch:\n",
"\t`train`: Best: 19.31ms, Worst: 22.86ms\n",
"\t`valid`: Best: 3.57ms, Worst: 3.75ms\n",
"All Batches:\n",
"\t`train`: Best: 713.25ms, Worst: 745.08ms\n",
"\t`valid`: Best: 170.11ms, Worst: 179.02ms\n",
"Average Per Batch:\n",
"\t`train`: Best: 3.51ms/batch, Worst: 3.67ms/batch\n",
"\t`valid`: Best: 3.34ms/batch, Worst: 3.51ms/batch\n",
"\n"
]
}
],
"source": [
"print(out2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}