{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tabular models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai import *\n",
    "from fastai.tabular import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Tabular data should be in a Pandas `DataFrame`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = untar_data(URLs.ADULT_SAMPLE)\n",
    "df = pd.read_csv(path/'adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dep_var = '>=50k'\n",
    "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
    "cont_names = ['age', 'fnlwgt', 'education-num']\n",
    "procs = [FillMissing, Categorify, Normalize]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n",
    "                           .split_by_idx(list(range(800,1000)))\n",
    "                           .label_from_df(cols=dep_var)\n",
    "                           .add_test(test, label=0)\n",
    "                           .databunch())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <col width='10px'>  <tr>\n",
       "    <th>workclass</th>\n",
       "    <th>education</th>\n",
       "    <th>marital-status</th>\n",
       "    <th>occupation</th>\n",
       "    <th>relationship</th>\n",
       "    <th>race</th>\n",
       "    <th>education-num_na</th>\n",
       "    <th>age</th>\n",
       "    <th>fnlwgt</th>\n",
       "    <th>education-num</th>\n",
       "    <th>target</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> Prof-school</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Prof-specialty</th>\n",
       "    <th> Husband</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>0.1036</th>\n",
       "    <th>0.9224</th>\n",
       "    <th>1.9245</th>\n",
       "    <th>1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Self-emp-inc</th>\n",
       "    <th> Bachelors</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Farming-fishing</th>\n",
       "    <th> Husband</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>1.7161</th>\n",
       "    <th>-1.2654</th>\n",
       "    <th>1.1422</th>\n",
       "    <th>1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> HS-grad</th>\n",
       "    <th> Never-married</th>\n",
       "    <th> Adm-clerical</th>\n",
       "    <th> Other-relative</th>\n",
       "    <th> Black</th>\n",
       "    <th>False</th>\n",
       "    <th>-0.7760</th>\n",
       "    <th>1.1905</th>\n",
       "    <th>-0.4224</th>\n",
       "    <th>0</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> 10th</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Sales</th>\n",
       "    <th> Own-child</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>-1.5823</th>\n",
       "    <th>-0.0268</th>\n",
       "    <th>-1.5958</th>\n",
       "    <th>0</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> Some-college</th>\n",
       "    <th> Never-married</th>\n",
       "    <th> Handlers-cleaners</th>\n",
       "    <th> Own-child</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>-1.3624</th>\n",
       "    <th>0.0284</th>\n",
       "    <th>-0.0312</th>\n",
       "    <th>0</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> Some-college</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Prof-specialty</th>\n",
       "    <th> Husband</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>0.3968</th>\n",
       "    <th>0.4367</th>\n",
       "    <th>-0.0312</th>\n",
       "    <th>1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> ?</th>\n",
       "    <th> Some-college</th>\n",
       "    <th> Never-married</th>\n",
       "    <th> ?</th>\n",
       "    <th> Own-child</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>-1.4357</th>\n",
       "    <th>-0.7295</th>\n",
       "    <th>-0.0312</th>\n",
       "    <th>0</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Self-emp-not-inc</th>\n",
       "    <th> 5th-6th</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Sales</th>\n",
       "    <th> Husband</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>0.6166</th>\n",
       "    <th>-0.6503</th>\n",
       "    <th>-2.7692</th>\n",
       "    <th>1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Private</th>\n",
       "    <th> Some-college</th>\n",
       "    <th> Married-civ-spouse</th>\n",
       "    <th> Sales</th>\n",
       "    <th> Husband</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>1.5695</th>\n",
       "    <th>-0.8876</th>\n",
       "    <th>-0.0312</th>\n",
       "    <th>1</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th> Local-gov</th>\n",
       "    <th> Some-college</th>\n",
       "    <th> Never-married</th>\n",
       "    <th> Handlers-cleaners</th>\n",
       "    <th> Own-child</th>\n",
       "    <th> White</th>\n",
       "    <th>False</th>\n",
       "    <th>-0.6294</th>\n",
       "    <th>-1.5422</th>\n",
       "    <th>-0.0312</th>\n",
       "    <th>0</th>\n",
       "  </tr>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data.show_batch(rows=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn = tabular_learner(data, layers=[200,100], metrics=accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TabularModel(\n",
       "  (embeds): ModuleList(\n",
       "    (0): Embedding(10, 6)\n",
       "    (1): Embedding(17, 9)\n",
       "    (2): Embedding(8, 5)\n",
       "    (3): Embedding(16, 9)\n",
       "    (4): Embedding(7, 4)\n",
       "    (5): Embedding(6, 4)\n",
       "    (6): Embedding(3, 2)\n",
       "  )\n",
       "  (emb_drop): Dropout(p=0.0)\n",
       "  (bn_cont): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "  (layers): Sequential(\n",
       "    (0): Linear(in_features=42, out_features=200, bias=True)\n",
       "    (1): ReLU(inplace)\n",
       "    (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (3): Linear(in_features=200, out_features=100, bias=True)\n",
       "    (4): ReLU(inplace)\n",
       "    (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (6): Linear(in_features=100, out_features=2, bias=True)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learn.model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total time: 00:03\n",
      "epoch  train_loss  valid_loss  accuracy\n",
      "1      0.362837    0.413169    0.785000  (00:03)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "learn.fit(1, 1e-2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "row = df.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, tensor(0), tensor([0.6365, 0.3635]))"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "learn.predict(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}