{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Installing `fastai` and version info:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name: fastai\n",
      "Version: 0.0.17\n",
      "Summary: Version 2 of the fastai library\n",
      "Home-page: https://github.com/fastai/fastai\n",
      "Author: Jeremy Howard, Sylvain Gugger, and contributors\n",
      "Author-email: info@fast.ai\n",
      "License: Apache Software License 2.0\n",
      "Location: /usr/local/lib/python3.6/dist-packages\n",
      "Requires: requests, pillow, torchvision, pandas, spacy, pyyaml, fastprogress, scipy, scikit-learn, fastcore, torch, matplotlib\n",
      "Required-by: \n",
      "---\n",
      "Name: fastcore\n",
      "Version: 0.1.17\n",
      "Summary: Python supercharged for fastai development\n",
      "Home-page: https://github.com/fastai/fastcore\n",
      "Author: Jeremy Howard and Sylvain Gugger\n",
      "Author-email: infos@fast.ai\n",
      "License: Apache Software License 2.0\n",
      "Location: /usr/local/lib/python3.6/dist-packages\n",
      "Requires: numpy, dataclasses\n",
      "Required-by: fastai\n"
     ]
    }
   ],
   "source": [
    "pip install fastai --quiet\n",
    "pip show fastai fastcore"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Library Imports & Pre-Process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.tabular.all import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = untar_data(URLs.ADULT_SAMPLE)\n",
    "df = pd.read_csv(path/'adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
    "cont_names = ['age', 'fnlwgt', 'education-num']\n",
    "procs = [Categorify, FillMissing, Normalize]\n",
    "y_names = 'salary'\n",
    "splits = RandomSplitter()(range_of(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## `TabularPandas`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,\n",
    "                   y_names=y_names, splits=splits)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Raw x's and y's:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>education-num_na</th>\n",
       "      <th>age</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education-num</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27147</th>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>-0.631822</td>\n",
       "      <td>-0.972539</td>\n",
       "      <td>0.751850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20557</th>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>-0.705017</td>\n",
       "      <td>-1.500515</td>\n",
       "      <td>-0.424423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5537</th>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.026942</td>\n",
       "      <td>-0.122164</td>\n",
       "      <td>1.143940</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       workclass  education  marital-status  ...       age    fnlwgt  education-num\n",
       "27147          5          8               5  ... -0.631822 -0.972539       0.751850\n",
       "20557          5         12               1  ... -0.705017 -1.500515      -0.424423\n",
       "5537           5         10               3  ...  0.026942 -0.122164       1.143940\n",
       "\n",
       "[3 rows x 10 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "to.train.xs.iloc[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>23736</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24771</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6144</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       salary\n",
       "23736       0\n",
       "24771       0\n",
       "6144        0"
      ]
     },
     "execution_count": null,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "to.train.ys.iloc[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Utility Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.utils import io as io_p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_b_w(t):\n",
    "    best = round(t.best*1000, 2)\n",
    "    worst = round(t.worst*1000, 2)\n",
    "    return best, worst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_avg(a, b, dl):\n",
    "    best = round(a/len(dl), 2)\n",
    "    worst = round(b/len(dl), 2)\n",
    "    return best, worst"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Standard `fastai` DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dls = to.dataloaders(bs=128, device='cpu')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CPU"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = NumPy\n",
      "Device = cpu\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 19.43ms, Worst: 20.96ms\n",
      "\t`valid`: Best: 3.59ms, Worst: 3.69ms\n",
      "All Batches:\n",
      "\t`train`: Best: 703.5ms, Worst: 726.88ms\n",
      "\t`valid`: Best: 170.05ms, Worst: 176.59ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 3.47ms/batch, Worst: 3.58ms/batch\n",
      "\t`valid`: Best: 3.33ms/batch, Worst: 3.46ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CUDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dls.device = 'cuda'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = fastai\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = fastai\n",
      "Device = cuda\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 20.0ms, Worst: 2852.74ms\n",
      "\t`valid`: Best: 3.6ms, Worst: 4.01ms\n",
      "All Batches:\n",
      "\t`train`: Best: 711.13ms, Worst: 723.7ms\n",
      "\t`valid`: Best: 174.96ms, Worst: 180.12ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 3.5ms/batch, Worst: 3.57ms/batch\n",
      "\t`valid`: Best: 3.43ms/batch, Worst: 3.53ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Building a Custom `DL`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class TabDataset():\n",
    "    \"A `NumPy` dataset from a `TabularPandas` object\"\n",
    "    def __init__(self, to):\n",
    "        self.cats = to.cats.to_numpy().astype(np.long)\n",
    "        self.conts = to.conts.to_numpy().astype(np.float32)\n",
    "        self.ys = to.ys.to_numpy()\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        idx = idx[0]\n",
    "        return self.cats[idx:idx+self.bs], self.conts[idx:idx+self.bs], self.ys[idx:idx+self.bs]\n",
    "\n",
    "    def __len__(self): return len(self.cats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_ds = TabDataset(to.train)\n",
    "valid_ds = TabDataset(to.valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class TabDataLoader(DataLoader):\n",
    "    def __init__(self, dataset, bs=1, num_workers=0, device='cuda', shuffle=False, **kwargs):\n",
    "        \"A `DataLoader` based on a `TabDataset`\"\n",
    "        super().__init__(dataset, bs=bs, num_workers=num_workers, shuffle=shuffle, \n",
    "                         device=device, drop_last=shuffle, **kwargs)\n",
    "        self.dataset.bs=bs\n",
    "    \n",
    "    def create_item(self, s): return s\n",
    "\n",
    "    def create_batch(self, b):\n",
    "        cat, cont, y = self.dataset[b]\n",
    "        return tensor(cat).to(self.device), tensor(cont).to(self.device), tensor(y).to(self.device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dl = TabDataLoader(train_ds, bs=128, shuffle=False)\n",
    "valid_dl = TabDataLoader(train_ds, bs=128, shuffle=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dls = DataLoaders(train_dl, valid_dl, device='cpu')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = NumPy\n",
      "Device = cpu\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 0.89ms, Worst: 1.34ms\n",
      "\t`valid`: Best: 0.9ms, Worst: 1.03ms\n",
      "All Batches:\n",
      "\t`train`: Best: 31.86ms, Worst: 34.0ms\n",
      "\t`valid`: Best: 32.57ms, Worst: 40.9ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 0.16ms/batch, Worst: 0.17ms/batch\n",
      "\t`valid`: Best: 0.16ms/batch, Worst: 0.2ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CUDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dls.device = 'cuda'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = NumPy\n",
      "Device = cuda\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 1.03ms, Worst: 1.7ms\n",
      "\t`valid`: Best: 1.03ms, Worst: 1.13ms\n",
      "All Batches:\n",
      "\t`train`: Best: 52.53ms, Worst: 54.39ms\n",
      "\t`valid`: Best: 53.38ms, Worst: 60.92ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 0.26ms/batch, Worst: 0.27ms/batch\n",
      "\t`valid`: Best: 0.26ms/batch, Worst: 0.3ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The `shuffle_fn`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Don't run\n",
    "def shuffle_fn(self, idxs): return self.rng.sample(idxs, len(idxs))\n",
    "def randomize(self): self.rng = random.Random(self.rng.randint(0,2**32-1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@patch\n",
    "def shuffle_fn(x:TabDataLoader):\n",
    "    \"Shuffle the interior dataset\"\n",
    "    rng = np.random.permutation(len(x.dataset))\n",
    "    x.dataset.cats = x.dataset.cats[rng]\n",
    "    x.dataset.conts = x.dataset.conts[rng]\n",
    "    x.dataset.ys = x.dataset.ys[rng]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# `get_idxs`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Don't run\n",
    "def get_idxs(self):\n",
    "    idxs = Inf.count if self.indexed else Inf.nones\n",
    "    if self.n is not None: idxs = list(itertools.islice(idxs, self.n))\n",
    "    if self.shuffle: idxs = self.shuffle_fn(idxs)\n",
    "    return idxs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@patch\n",
    "def get_idxs(x:TabDataLoader):\n",
    "    \"Get index's to select\"\n",
    "    idxs = Inf.count if x.indexed else Inf.nones\n",
    "    if x.n is not None: idxs = list(range(len(x.dataset)))\n",
    "    if x.shuffle: x.shuffle_fn()\n",
    "    return idxs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Final Timings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dl = TabDataLoader(train_ds, shuffle=True, bs=128)\n",
    "valid_dl = TabDataLoader(valid_ds, bs=128)\n",
    "dls = DataLoaders(train_dl, valid_dl, device='cpu')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CPU"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "dls_f = to.dataloaders(bs=128, device='cpu')\n",
    "print(f'Type = fastai\\nDevice = {dls_f.device}\\nBatch Size = {dls_f.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls_f.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls_f.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls_f.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls_f.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls_f.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls_f.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out2 = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = NumPy\n",
      "Device = cpu\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 2.4ms, Worst: 4.49ms\n",
      "\t`valid`: Best: 0.3ms, Worst: 0.44ms\n",
      "All Batches:\n",
      "\t`train`: Best: 33.32ms, Worst: 39.55ms\n",
      "\t`valid`: Best: 7.84ms, Worst: 8.0ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 0.16ms/batch, Worst: 0.19ms/batch\n",
      "\t`valid`: Best: 0.15ms/batch, Worst: 0.16ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = fastai\n",
      "Device = cpu\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 18.73ms, Worst: 22.48ms\n",
      "\t`valid`: Best: 3.51ms, Worst: 3.67ms\n",
      "All Batches:\n",
      "\t`train`: Best: 683.69ms, Worst: 693.98ms\n",
      "\t`valid`: Best: 163.9ms, Worst: 178.87ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 3.37ms/batch, Worst: 3.42ms/batch\n",
      "\t`valid`: Best: 3.21ms/batch, Worst: 3.51ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dls.device = 'cuda'\n",
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "print(f'Type = NumPy\\nDevice = {dls.device}\\nBatch Size = {dls.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're going to redirect where print goes to\n",
    "old_stdout = sys.stdout\n",
    "new_stdout = io.StringIO()\n",
    "sys.stdout = new_stdout\n",
    "dls_f.device = 'cuda'\n",
    "print(f'Type = fastai\\nDevice = {dls_f.device}\\nBatch Size = {dls_f.bs}') # Print device and batch size\n",
    "with io_p.capture_output() as captured: # Hide %timeit output\n",
    "    t = %timeit -o next(iter(dls_f.train)) # Time getting first batch\n",
    "    best, worst = get_b_w(t) # Round\n",
    "print(f'First Batch:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o next(iter(dls_f.valid))\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls_f.train: pass # Time going over all batches\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'All Batches:\\n\\t`train`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_t,w_t = get_avg(best, worst, dls_f.train)\n",
    "with io_p.capture_output() as captured:\n",
    "    t = %timeit -o for _ in dls_f.valid: pass\n",
    "    best, worst = get_b_w(t)\n",
    "print(f'\\t`valid`: Best: {best}ms, Worst: {worst}ms')\n",
    "b_v,w_v = get_avg(best, worst, dls_f.valid)\n",
    "print(f'Average Per Batch:\\n\\t`train`: Best: {b_t}ms/batch, Worst: {w_t}ms/batch')\n",
    "print(f'\\t`valid`: Best: {b_v}ms/batch, Worst: {w_v}ms/batch')\n",
    "out2 = new_stdout.getvalue()\n",
    "sys.stdout = old_stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = NumPy\n",
      "Device = cuda\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 2.5ms, Worst: 9.24ms\n",
      "\t`valid`: Best: 0.44ms, Worst: 0.62ms\n",
      "All Batches:\n",
      "\t`train`: Best: 48.83ms, Worst: 49.51ms\n",
      "\t`valid`: Best: 13.04ms, Worst: 13.57ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 0.24ms/batch, Worst: 0.24ms/batch\n",
      "\t`valid`: Best: 0.26ms/batch, Worst: 0.27ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Type = fastai\n",
      "Device = cuda\n",
      "Batch Size = 128\n",
      "First Batch:\n",
      "\t`train`: Best: 19.31ms, Worst: 22.86ms\n",
      "\t`valid`: Best: 3.57ms, Worst: 3.75ms\n",
      "All Batches:\n",
      "\t`train`: Best: 713.25ms, Worst: 745.08ms\n",
      "\t`valid`: Best: 170.11ms, Worst: 179.02ms\n",
      "Average Per Batch:\n",
      "\t`train`: Best: 3.51ms/batch, Worst: 3.67ms/batch\n",
      "\t`valid`: Best: 3.34ms/batch, Worst: 3.51ms/batch\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(out2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}