{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Lesson 4 - Tabular models"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from fastai.tabular import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tabular data should be in a Pandas `DataFrame`."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.ADULT_SAMPLE)\n",
"df = pd.read_csv(path / 'adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"dep_var = '>=50k'\n",
"cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
"cont_names = ['age', 'fnlwgt', 'education-num']\n",
"procs = [FillMissing, Categorify, Normalize]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n",
" .split_by_idx(list(range(800, 1000)))\n",
" .label_from_df(cols=dep_var)\n",
" .add_test(test, label=0)\n",
" .databunch())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
" workclass | \n",
" education | \n",
" marital-status | \n",
" occupation | \n",
" relationship | \n",
" race | \n",
" education-num_na | \n",
" age | \n",
" fnlwgt | \n",
" education-num | \n",
" target | \n",
"
\n",
" \n",
" Private | \n",
" 7th-8th | \n",
" Married-civ-spouse | \n",
" Machine-op-inspct | \n",
" Husband | \n",
" White | \n",
" False | \n",
" -0.2629 | \n",
" -0.9428 | \n",
" -2.3781 | \n",
" 1 | \n",
"
\n",
" \n",
" Self-emp-inc | \n",
" HS-grad | \n",
" Married-civ-spouse | \n",
" Transport-moving | \n",
" Husband | \n",
" White | \n",
" False | \n",
" 2.0093 | \n",
" -1.0762 | \n",
" -0.4224 | \n",
" 1 | \n",
"
\n",
" \n",
" Self-emp-not-inc | \n",
" Some-college | \n",
" Never-married | \n",
" Craft-repair | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" -0.3362 | \n",
" -0.3120 | \n",
" -0.0312 | \n",
" 0 | \n",
"
\n",
" \n",
" Local-gov | \n",
" HS-grad | \n",
" Never-married | \n",
" Craft-repair | \n",
" Own-child | \n",
" White | \n",
" False | \n",
" 0.5434 | \n",
" -0.8287 | \n",
" -0.4224 | \n",
" 0 | \n",
"
\n",
" \n",
" Private | \n",
" Masters | \n",
" Never-married | \n",
" Tech-support | \n",
" Other-relative | \n",
" White | \n",
" False | \n",
" -0.9226 | \n",
" -1.5147 | \n",
" 1.5334 | \n",
" 0 | \n",
"
\n",
" \n",
" Private | \n",
" 10th | \n",
" Widowed | \n",
" Transport-moving | \n",
" Not-in-family | \n",
" Black | \n",
" False | \n",
" 1.2030 | \n",
" -0.7890 | \n",
" -1.5958 | \n",
" 0 | \n",
"
\n",
" \n",
" State-gov | \n",
" Bachelors | \n",
" Never-married | \n",
" Prof-specialty | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" -1.1425 | \n",
" 2.9637 | \n",
" 1.1422 | \n",
" 0 | \n",
"
\n",
" \n",
" Private | \n",
" Assoc-acdm | \n",
" Divorced | \n",
" Craft-repair | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" 0.8365 | \n",
" 0.1033 | \n",
" 0.7511 | \n",
" 0 | \n",
"
\n",
" \n",
" Private | \n",
" Some-college | \n",
" Separated | \n",
" Sales | \n",
" Unmarried | \n",
" Black | \n",
" False | \n",
" -0.6294 | \n",
" 0.2097 | \n",
" -0.0312 | \n",
" 0 | \n",
"
\n",
" \n",
" Private | \n",
" HS-grad | \n",
" Married-civ-spouse | \n",
" Machine-op-inspct | \n",
" Husband | \n",
" White | \n",
" False | \n",
" -0.7760 | \n",
" 0.0061 | \n",
" -0.4224 | \n",
" 0 | \n",
"
\n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data.show_batch(rows=10)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"learn = tabular_learner(data, layers=[200, 100], metrics=accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Total time: 00:03 \n",
" \n",
" epoch | \n",
" train_loss | \n",
" valid_loss | \n",
" accuracy | \n",
"
\n",
" \n",
" 1 | \n",
" 0.361543 | \n",
" 0.376106 | \n",
" 0.815000 | \n",
"
\n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit(1, 1e-2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inference"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"row = df.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Category 1, tensor(1), tensor([0.2809, 0.7191]))"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learn.predict(row)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "fastai-v1",
"language": "python",
"name": "fastai-v1"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}