{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tabular example"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fastai.tabular import * # Quick accesss to tabular functionality"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tabular data should be in a Pandas `DataFrame`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"path = untar_data(URLs.ADULT_SAMPLE)\n",
"df = pd.read_csv(path/'adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['>=50k', '<50k'], dtype=object)"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['salary'].unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# function import\n",
"from fastai.utils.mem import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 8109)"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# other function teset\n",
"gpu_with_max_free_mem()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Memory usage of dataframe is 3.73 MB\n",
"Memory usage after optimization is: 0.78 MB\n",
"Decreased by 79.0%\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" age | \n",
" workclass | \n",
" fnlwgt | \n",
" education | \n",
" education-num | \n",
" marital-status | \n",
" occupation | \n",
" relationship | \n",
" race | \n",
" sex | \n",
" capital-gain | \n",
" capital-loss | \n",
" hours-per-week | \n",
" native-country | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 49 | \n",
" Private | \n",
" 101320 | \n",
" Assoc-acdm | \n",
" 12.0 | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Wife | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 1902 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 1 | \n",
" 44 | \n",
" Private | \n",
" 236746 | \n",
" Masters | \n",
" 14.0 | \n",
" Divorced | \n",
" Exec-managerial | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 10520 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 2 | \n",
" 38 | \n",
" Private | \n",
" 96185 | \n",
" HS-grad | \n",
" NaN | \n",
" Divorced | \n",
" NaN | \n",
" Unmarried | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 32 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 3 | \n",
" 38 | \n",
" Self-emp-inc | \n",
" 112847 | \n",
" Prof-school | \n",
" 15.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 4 | \n",
" 42 | \n",
" Self-emp-not-inc | \n",
" 82297 | \n",
" 7th-8th | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" Other-service | \n",
" Wife | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 50 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 5 | \n",
" 20 | \n",
" Private | \n",
" 63210 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Never-married | \n",
" Handlers-cleaners | \n",
" Own-child | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 15 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 6 | \n",
" 49 | \n",
" Private | \n",
" 44434 | \n",
" Some-college | \n",
" 10.0 | \n",
" Divorced | \n",
" NaN | \n",
" Other-relative | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 35 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 7 | \n",
" 37 | \n",
" Private | \n",
" 138940 | \n",
" 11th | \n",
" 7.0 | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 8 | \n",
" 46 | \n",
" Private | \n",
" 328216 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Married-civ-spouse | \n",
" Craft-repair | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 9 | \n",
" 36 | \n",
" Self-emp-inc | \n",
" 216711 | \n",
" HS-grad | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 99999 | \n",
" 0 | \n",
" 50 | \n",
" ? | \n",
" >=50k | \n",
"
\n",
" \n",
" 10 | \n",
" 23 | \n",
" Private | \n",
" 529223 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Never-married | \n",
" NaN | \n",
" Own-child | \n",
" Black | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 10 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 11 | \n",
" 18 | \n",
" Private | \n",
" 216284 | \n",
" 11th | \n",
" NaN | \n",
" Never-married | \n",
" Adm-clerical | \n",
" Own-child | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 20 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 12 | \n",
" 30 | \n",
" Private | \n",
" 151989 | \n",
" Assoc-voc | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Wife | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 13 | \n",
" 30 | \n",
" Private | \n",
" 55291 | \n",
" Bachelors | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 14 | \n",
" 43 | \n",
" Private | \n",
" 84661 | \n",
" Assoc-voc | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" Sales | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 15 | \n",
" 51 | \n",
" Private | \n",
" 284329 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Widowed | \n",
" NaN | \n",
" Unmarried | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 16 | \n",
" 38 | \n",
" Private | \n",
" 170174 | \n",
" 10th | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" Machine-op-inspct | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 17 | \n",
" 35 | \n",
" Private | \n",
" 261293 | \n",
" Masters | \n",
" 14.0 | \n",
" Never-married | \n",
" NaN | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 60 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 18 | \n",
" 56 | \n",
" State-gov | \n",
" 274111 | \n",
" Masters | \n",
" 14.0 | \n",
" Divorced | \n",
" NaN | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 1669 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 19 | \n",
" 45 | \n",
" Private | \n",
" 267967 | \n",
" Bachelors | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 20 | \n",
" 40 | \n",
" Private | \n",
" 188942 | \n",
" Some-college | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Wife | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" Puerto-Rico | \n",
" <50k | \n",
"
\n",
" \n",
" 21 | \n",
" 26 | \n",
" Private | \n",
" 746432 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Never-married | \n",
" Handlers-cleaners | \n",
" Own-child | \n",
" Black | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 48 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 22 | \n",
" 46 | \n",
" Private | \n",
" 117605 | \n",
" 9th | \n",
" NaN | \n",
" Divorced | \n",
" Sales | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 35 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 23 | \n",
" 29 | \n",
" Private | \n",
" 1268339 | \n",
" HS-grad | \n",
" NaN | \n",
" Married-spouse-absent | \n",
" NaN | \n",
" Own-child | \n",
" Black | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 24 | \n",
" 49 | \n",
" Private | \n",
" 247294 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Married-civ-spouse | \n",
" Craft-repair | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 25 | \n",
" 55 | \n",
" Self-emp-inc | \n",
" 222615 | \n",
" Masters | \n",
" 14.0 | \n",
" Married-civ-spouse | \n",
" Exec-managerial | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 60 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 26 | \n",
" 47 | \n",
" Self-emp-not-inc | \n",
" 213745 | \n",
" Some-college | \n",
" NaN | \n",
" Divorced | \n",
" NaN | \n",
" Unmarried | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 27 | \n",
" 41 | \n",
" Self-emp-inc | \n",
" 151089 | \n",
" Some-college | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 50 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 28 | \n",
" 27 | \n",
" Private | \n",
" 153078 | \n",
" Prof-school | \n",
" NaN | \n",
" Never-married | \n",
" Prof-specialty | \n",
" Own-child | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 29 | \n",
" 42 | \n",
" Private | \n",
" 70055 | \n",
" 11th | \n",
" 7.0 | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 32531 | \n",
" 25 | \n",
" Private | \n",
" 203871 | \n",
" Assoc-voc | \n",
" 11.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Wife | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 1887 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 32532 | \n",
" 52 | \n",
" State-gov | \n",
" 71344 | \n",
" Masters | \n",
" 14.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32533 | \n",
" 19 | \n",
" Private | \n",
" 445728 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Never-married | \n",
" Craft-repair | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32534 | \n",
" 21 | \n",
" Private | \n",
" 222490 | \n",
" Some-college | \n",
" 10.0 | \n",
" Never-married | \n",
" Handlers-cleaners | \n",
" Own-child | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32535 | \n",
" 49 | \n",
" Private | \n",
" 213431 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Separated | \n",
" Prof-specialty | \n",
" Unmarried | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32536 | \n",
" 52 | \n",
" Private | \n",
" 163998 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Married-civ-spouse | \n",
" Sales | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 99999 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 32537 | \n",
" 34 | \n",
" Local-gov | \n",
" 90934 | \n",
" Assoc-voc | \n",
" 11.0 | \n",
" Divorced | \n",
" Protective-serv | \n",
" Own-child | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32538 | \n",
" 26 | \n",
" Local-gov | \n",
" 202286 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Never-married | \n",
" Tech-support | \n",
" Own-child | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32539 | \n",
" 44 | \n",
" Private | \n",
" 219441 | \n",
" 10th | \n",
" 6.0 | \n",
" Never-married | \n",
" Sales | \n",
" Unmarried | \n",
" Other | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 35 | \n",
" Dominican-Republic | \n",
" <50k | \n",
"
\n",
" \n",
" 32540 | \n",
" 47 | \n",
" Self-emp-not-inc | \n",
" 162236 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Never-married | \n",
" Craft-repair | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32541 | \n",
" 24 | \n",
" Private | \n",
" 241857 | \n",
" Some-college | \n",
" 10.0 | \n",
" Never-married | \n",
" Adm-clerical | \n",
" Not-in-family | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 35 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32542 | \n",
" 34 | \n",
" Private | \n",
" 98283 | \n",
" Prof-school | \n",
" 15.0 | \n",
" Never-married | \n",
" Tech-support | \n",
" Not-in-family | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 1564 | \n",
" 40 | \n",
" India | \n",
" >=50k | \n",
"
\n",
" \n",
" 32543 | \n",
" 38 | \n",
" Private | \n",
" 29874 | \n",
" Assoc-voc | \n",
" 11.0 | \n",
" Married-civ-spouse | \n",
" Craft-repair | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32544 | \n",
" 33 | \n",
" Private | \n",
" 124052 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Married-civ-spouse | \n",
" Craft-repair | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32545 | \n",
" 33 | \n",
" Private | \n",
" 206609 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Married-civ-spouse | \n",
" Exec-managerial | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32546 | \n",
" 31 | \n",
" Private | \n",
" 188246 | \n",
" Masters | \n",
" 14.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 50 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32547 | \n",
" 23 | \n",
" Private | \n",
" 267955 | \n",
" Some-college | \n",
" 10.0 | \n",
" Never-married | \n",
" Sales | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32548 | \n",
" 28 | \n",
" Private | \n",
" 187479 | \n",
" Some-college | \n",
" 10.0 | \n",
" Married-civ-spouse | \n",
" Exec-managerial | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 55 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32549 | \n",
" 27 | \n",
" Private | \n",
" 171655 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Never-married | \n",
" Adm-clerical | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 42 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32550 | \n",
" 27 | \n",
" Private | \n",
" 116358 | \n",
" Some-college | \n",
" 10.0 | \n",
" Never-married | \n",
" Craft-repair | \n",
" Own-child | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 1980 | \n",
" 40 | \n",
" Philippines | \n",
" <50k | \n",
"
\n",
" \n",
" 32551 | \n",
" 60 | \n",
" Private | \n",
" 230545 | \n",
" 7th-8th | \n",
" 4.0 | \n",
" Divorced | \n",
" Adm-clerical | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 35 | \n",
" Cuba | \n",
" <50k | \n",
"
\n",
" \n",
" 32552 | \n",
" 39 | \n",
" Private | \n",
" 139743 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Separated | \n",
" Adm-clerical | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 20 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32553 | \n",
" 35 | \n",
" Self-emp-inc | \n",
" 135436 | \n",
" Prof-school | \n",
" 15.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 50 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 32554 | \n",
" 53 | \n",
" Private | \n",
" 35102 | \n",
" Some-college | \n",
" 10.0 | \n",
" Divorced | \n",
" Adm-clerical | \n",
" Not-in-family | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 34 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32555 | \n",
" 48 | \n",
" Private | \n",
" 355320 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 45 | \n",
" Canada | \n",
" >=50k | \n",
"
\n",
" \n",
" 32556 | \n",
" 36 | \n",
" Private | \n",
" 297449 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Divorced | \n",
" Prof-specialty | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 14084 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 32557 | \n",
" 23 | \n",
" ? | \n",
" 123983 | \n",
" Bachelors | \n",
" 13.0 | \n",
" Never-married | \n",
" ? | \n",
" Own-child | \n",
" Other | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32558 | \n",
" 53 | \n",
" Private | \n",
" 157069 | \n",
" Assoc-acdm | \n",
" 12.0 | \n",
" Married-civ-spouse | \n",
" Machine-op-inspct | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 32559 | \n",
" 32 | \n",
" Local-gov | \n",
" 217296 | \n",
" HS-grad | \n",
" 9.0 | \n",
" Married-civ-spouse | \n",
" Transport-moving | \n",
" Wife | \n",
" White | \n",
" Female | \n",
" 4064 | \n",
" 0 | \n",
" 22 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 32560 | \n",
" 26 | \n",
" Private | \n",
" 182308 | \n",
" Some-college | \n",
" 10.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" White | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
"
\n",
"
32561 rows × 15 columns
\n",
"
"
],
"text/plain": [
" age workclass fnlwgt education education-num \\\n",
"0 49 Private 101320 Assoc-acdm 12.0 \n",
"1 44 Private 236746 Masters 14.0 \n",
"2 38 Private 96185 HS-grad NaN \n",
"3 38 Self-emp-inc 112847 Prof-school 15.0 \n",
"4 42 Self-emp-not-inc 82297 7th-8th NaN \n",
"5 20 Private 63210 HS-grad 9.0 \n",
"6 49 Private 44434 Some-college 10.0 \n",
"7 37 Private 138940 11th 7.0 \n",
"8 46 Private 328216 HS-grad 9.0 \n",
"9 36 Self-emp-inc 216711 HS-grad NaN \n",
"10 23 Private 529223 Bachelors 13.0 \n",
"11 18 Private 216284 11th NaN \n",
"12 30 Private 151989 Assoc-voc NaN \n",
"13 30 Private 55291 Bachelors NaN \n",
"14 43 Private 84661 Assoc-voc NaN \n",
"15 51 Private 284329 HS-grad 9.0 \n",
"16 38 Private 170174 10th NaN \n",
"17 35 Private 261293 Masters 14.0 \n",
"18 56 State-gov 274111 Masters 14.0 \n",
"19 45 Private 267967 Bachelors NaN \n",
"20 40 Private 188942 Some-college NaN \n",
"21 26 Private 746432 HS-grad 9.0 \n",
"22 46 Private 117605 9th NaN \n",
"23 29 Private 1268339 HS-grad NaN \n",
"24 49 Private 247294 HS-grad 9.0 \n",
"25 55 Self-emp-inc 222615 Masters 14.0 \n",
"26 47 Self-emp-not-inc 213745 Some-college NaN \n",
"27 41 Self-emp-inc 151089 Some-college NaN \n",
"28 27 Private 153078 Prof-school NaN \n",
"29 42 Private 70055 11th 7.0 \n",
"... ... ... ... ... ... \n",
"32531 25 Private 203871 Assoc-voc 11.0 \n",
"32532 52 State-gov 71344 Masters 14.0 \n",
"32533 19 Private 445728 HS-grad 9.0 \n",
"32534 21 Private 222490 Some-college 10.0 \n",
"32535 49 Private 213431 HS-grad 9.0 \n",
"32536 52 Private 163998 HS-grad 9.0 \n",
"32537 34 Local-gov 90934 Assoc-voc 11.0 \n",
"32538 26 Local-gov 202286 Bachelors 13.0 \n",
"32539 44 Private 219441 10th 6.0 \n",
"32540 47 Self-emp-not-inc 162236 Bachelors 13.0 \n",
"32541 24 Private 241857 Some-college 10.0 \n",
"32542 34 Private 98283 Prof-school 15.0 \n",
"32543 38 Private 29874 Assoc-voc 11.0 \n",
"32544 33 Private 124052 HS-grad 9.0 \n",
"32545 33 Private 206609 Bachelors 13.0 \n",
"32546 31 Private 188246 Masters 14.0 \n",
"32547 23 Private 267955 Some-college 10.0 \n",
"32548 28 Private 187479 Some-college 10.0 \n",
"32549 27 Private 171655 HS-grad 9.0 \n",
"32550 27 Private 116358 Some-college 10.0 \n",
"32551 60 Private 230545 7th-8th 4.0 \n",
"32552 39 Private 139743 HS-grad 9.0 \n",
"32553 35 Self-emp-inc 135436 Prof-school 15.0 \n",
"32554 53 Private 35102 Some-college 10.0 \n",
"32555 48 Private 355320 Bachelors 13.0 \n",
"32556 36 Private 297449 Bachelors 13.0 \n",
"32557 23 ? 123983 Bachelors 13.0 \n",
"32558 53 Private 157069 Assoc-acdm 12.0 \n",
"32559 32 Local-gov 217296 HS-grad 9.0 \n",
"32560 26 Private 182308 Some-college 10.0 \n",
"\n",
" marital-status occupation relationship \\\n",
"0 Married-civ-spouse NaN Wife \n",
"1 Divorced Exec-managerial Not-in-family \n",
"2 Divorced NaN Unmarried \n",
"3 Married-civ-spouse Prof-specialty Husband \n",
"4 Married-civ-spouse Other-service Wife \n",
"5 Never-married Handlers-cleaners Own-child \n",
"6 Divorced NaN Other-relative \n",
"7 Married-civ-spouse NaN Husband \n",
"8 Married-civ-spouse Craft-repair Husband \n",
"9 Married-civ-spouse NaN Husband \n",
"10 Never-married NaN Own-child \n",
"11 Never-married Adm-clerical Own-child \n",
"12 Married-civ-spouse NaN Wife \n",
"13 Married-civ-spouse NaN Husband \n",
"14 Married-civ-spouse Sales Husband \n",
"15 Widowed NaN Unmarried \n",
"16 Married-civ-spouse Machine-op-inspct Husband \n",
"17 Never-married NaN Not-in-family \n",
"18 Divorced NaN Not-in-family \n",
"19 Married-civ-spouse Prof-specialty Husband \n",
"20 Married-civ-spouse NaN Wife \n",
"21 Never-married Handlers-cleaners Own-child \n",
"22 Divorced Sales Not-in-family \n",
"23 Married-spouse-absent NaN Own-child \n",
"24 Married-civ-spouse Craft-repair Husband \n",
"25 Married-civ-spouse Exec-managerial Husband \n",
"26 Divorced NaN Unmarried \n",
"27 Married-civ-spouse NaN Husband \n",
"28 Never-married Prof-specialty Own-child \n",
"29 Married-civ-spouse NaN Husband \n",
"... ... ... ... \n",
"32531 Married-civ-spouse Prof-specialty Wife \n",
"32532 Married-civ-spouse Prof-specialty Husband \n",
"32533 Never-married Craft-repair Not-in-family \n",
"32534 Never-married Handlers-cleaners Own-child \n",
"32535 Separated Prof-specialty Unmarried \n",
"32536 Married-civ-spouse Sales Husband \n",
"32537 Divorced Protective-serv Own-child \n",
"32538 Never-married Tech-support Own-child \n",
"32539 Never-married Sales Unmarried \n",
"32540 Never-married Craft-repair Not-in-family \n",
"32541 Never-married Adm-clerical Not-in-family \n",
"32542 Never-married Tech-support Not-in-family \n",
"32543 Married-civ-spouse Craft-repair Husband \n",
"32544 Married-civ-spouse Craft-repair Husband \n",
"32545 Married-civ-spouse Exec-managerial Husband \n",
"32546 Married-civ-spouse Prof-specialty Husband \n",
"32547 Never-married Sales Not-in-family \n",
"32548 Married-civ-spouse Exec-managerial Husband \n",
"32549 Never-married Adm-clerical Not-in-family \n",
"32550 Never-married Craft-repair Own-child \n",
"32551 Divorced Adm-clerical Not-in-family \n",
"32552 Separated Adm-clerical Not-in-family \n",
"32553 Married-civ-spouse Prof-specialty Husband \n",
"32554 Divorced Adm-clerical Not-in-family \n",
"32555 Married-civ-spouse Prof-specialty Husband \n",
"32556 Divorced Prof-specialty Not-in-family \n",
"32557 Never-married ? Own-child \n",
"32558 Married-civ-spouse Machine-op-inspct Husband \n",
"32559 Married-civ-spouse Transport-moving Wife \n",
"32560 Married-civ-spouse Prof-specialty Husband \n",
"\n",
" race sex capital-gain capital-loss \\\n",
"0 White Female 0 1902 \n",
"1 White Male 10520 0 \n",
"2 Black Female 0 0 \n",
"3 Asian-Pac-Islander Male 0 0 \n",
"4 Black Female 0 0 \n",
"5 White Male 0 0 \n",
"6 White Male 0 0 \n",
"7 White Male 0 0 \n",
"8 White Male 0 0 \n",
"9 White Male 99999 0 \n",
"10 Black Male 0 0 \n",
"11 White Female 0 0 \n",
"12 White Female 0 0 \n",
"13 White Male 0 0 \n",
"14 White Male 0 0 \n",
"15 White Male 0 0 \n",
"16 White Male 0 0 \n",
"17 White Male 0 0 \n",
"18 White Male 0 1669 \n",
"19 White Male 0 0 \n",
"20 Black Female 0 0 \n",
"21 Black Male 0 0 \n",
"22 White Male 0 0 \n",
"23 Black Male 0 0 \n",
"24 White Male 0 0 \n",
"25 White Male 0 0 \n",
"26 White Female 0 0 \n",
"27 White Male 0 0 \n",
"28 Asian-Pac-Islander Male 0 0 \n",
"29 White Male 0 0 \n",
"... ... ... ... ... \n",
"32531 White Female 0 1887 \n",
"32532 White Male 0 0 \n",
"32533 White Male 0 0 \n",
"32534 White Female 0 0 \n",
"32535 Black Female 0 0 \n",
"32536 White Male 99999 0 \n",
"32537 Asian-Pac-Islander Male 0 0 \n",
"32538 White Male 0 0 \n",
"32539 Other Female 0 0 \n",
"32540 White Female 0 0 \n",
"32541 Black Female 0 0 \n",
"32542 Asian-Pac-Islander Male 0 1564 \n",
"32543 White Male 0 0 \n",
"32544 White Male 0 0 \n",
"32545 White Male 0 0 \n",
"32546 White Male 0 0 \n",
"32547 White Female 0 0 \n",
"32548 White Male 0 0 \n",
"32549 White Female 0 0 \n",
"32550 Asian-Pac-Islander Male 0 1980 \n",
"32551 White Female 0 0 \n",
"32552 White Female 0 0 \n",
"32553 White Male 0 0 \n",
"32554 White Female 0 0 \n",
"32555 White Male 0 0 \n",
"32556 White Male 14084 0 \n",
"32557 Other Male 0 0 \n",
"32558 White Male 0 0 \n",
"32559 White Female 4064 0 \n",
"32560 White Male 0 0 \n",
"\n",
" hours-per-week native-country salary \n",
"0 40 United-States >=50k \n",
"1 45 United-States >=50k \n",
"2 32 United-States <50k \n",
"3 40 United-States >=50k \n",
"4 50 United-States <50k \n",
"5 15 United-States <50k \n",
"6 35 United-States <50k \n",
"7 40 United-States <50k \n",
"8 40 United-States >=50k \n",
"9 50 ? >=50k \n",
"10 10 United-States <50k \n",
"11 20 United-States <50k \n",
"12 40 United-States <50k \n",
"13 40 United-States >=50k \n",
"14 45 United-States <50k \n",
"15 40 United-States <50k \n",
"16 40 United-States >=50k \n",
"17 60 United-States <50k \n",
"18 40 United-States <50k \n",
"19 45 United-States >=50k \n",
"20 40 Puerto-Rico <50k \n",
"21 48 United-States <50k \n",
"22 35 United-States <50k \n",
"23 40 United-States <50k \n",
"24 45 United-States >=50k \n",
"25 60 United-States <50k \n",
"26 45 United-States <50k \n",
"27 50 United-States <50k \n",
"28 40 United-States <50k \n",
"29 45 United-States <50k \n",
"... ... ... ... \n",
"32531 40 United-States >=50k \n",
"32532 40 United-States <50k \n",
"32533 40 United-States <50k \n",
"32534 40 United-States <50k \n",
"32535 40 United-States <50k \n",
"32536 45 United-States >=50k \n",
"32537 40 United-States <50k \n",
"32538 40 United-States <50k \n",
"32539 35 Dominican-Republic <50k \n",
"32540 40 United-States <50k \n",
"32541 35 United-States <50k \n",
"32542 40 India >=50k \n",
"32543 40 United-States <50k \n",
"32544 40 United-States <50k \n",
"32545 45 United-States <50k \n",
"32546 50 United-States <50k \n",
"32547 40 United-States <50k \n",
"32548 55 United-States <50k \n",
"32549 42 United-States <50k \n",
"32550 40 Philippines <50k \n",
"32551 35 Cuba <50k \n",
"32552 20 United-States <50k \n",
"32553 50 United-States >=50k \n",
"32554 34 United-States <50k \n",
"32555 45 Canada >=50k \n",
"32556 40 United-States >=50k \n",
"32557 40 United-States <50k \n",
"32558 40 United-States >=50k \n",
"32559 22 United-States <50k \n",
"32560 40 United-States <50k \n",
"\n",
"[32561 rows x 15 columns]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# test reduce_mem_usage(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" age | \n",
" workclass | \n",
" fnlwgt | \n",
" education | \n",
" education-num | \n",
" marital-status | \n",
" occupation | \n",
" relationship | \n",
" race | \n",
" sex | \n",
" capital-gain | \n",
" capital-loss | \n",
" hours-per-week | \n",
" native-country | \n",
" salary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 49 | \n",
" Private | \n",
" 101320 | \n",
" Assoc-acdm | \n",
" 12.0 | \n",
" Married-civ-spouse | \n",
" NaN | \n",
" Wife | \n",
" White | \n",
" Female | \n",
" 0 | \n",
" 1902 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 1 | \n",
" 44 | \n",
" Private | \n",
" 236746 | \n",
" Masters | \n",
" 14.0 | \n",
" Divorced | \n",
" Exec-managerial | \n",
" Not-in-family | \n",
" White | \n",
" Male | \n",
" 10520 | \n",
" 0 | \n",
" 45 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 2 | \n",
" 38 | \n",
" Private | \n",
" 96185 | \n",
" HS-grad | \n",
" NaN | \n",
" Divorced | \n",
" NaN | \n",
" Unmarried | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 32 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
" 3 | \n",
" 38 | \n",
" Self-emp-inc | \n",
" 112847 | \n",
" Prof-school | \n",
" 15.0 | \n",
" Married-civ-spouse | \n",
" Prof-specialty | \n",
" Husband | \n",
" Asian-Pac-Islander | \n",
" Male | \n",
" 0 | \n",
" 0 | \n",
" 40 | \n",
" United-States | \n",
" >=50k | \n",
"
\n",
" \n",
" 4 | \n",
" 42 | \n",
" Self-emp-not-inc | \n",
" 82297 | \n",
" 7th-8th | \n",
" NaN | \n",
" Married-civ-spouse | \n",
" Other-service | \n",
" Wife | \n",
" Black | \n",
" Female | \n",
" 0 | \n",
" 0 | \n",
" 50 | \n",
" United-States | \n",
" <50k | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age workclass fnlwgt education education-num \\\n",
"0 49 Private 101320 Assoc-acdm 12.0 \n",
"1 44 Private 236746 Masters 14.0 \n",
"2 38 Private 96185 HS-grad NaN \n",
"3 38 Self-emp-inc 112847 Prof-school 15.0 \n",
"4 42 Self-emp-not-inc 82297 7th-8th NaN \n",
"\n",
" marital-status occupation relationship race \\\n",
"0 Married-civ-spouse NaN Wife White \n",
"1 Divorced Exec-managerial Not-in-family White \n",
"2 Divorced NaN Unmarried Black \n",
"3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n",
"4 Married-civ-spouse Other-service Wife Black \n",
"\n",
" sex capital-gain capital-loss hours-per-week native-country salary \n",
"0 Female 0 1902 40 United-States >=50k \n",
"1 Male 10520 0 45 United-States >=50k \n",
"2 Female 0 0 32 United-States <50k \n",
"3 Male 0 0 40 United-States >=50k \n",
"4 Female 0 0 50 United-States <50k "
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dep_var = 'salary'\n",
"cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
"cont_names = ['age', 'fnlwgt', 'education-num']\n",
"procs = [FillMissing, Categorify, Normalize]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n",
" .split_by_idx(list(range(800,1000)))\n",
" .label_from_df(cols=dep_var)\n",
" .add_test(test)\n",
" .databunch())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" workclass | \n",
" education | \n",
" marital-status | \n",
" occupation | \n",
" relationship | \n",
" race | \n",
" education-num_na | \n",
" age | \n",
" fnlwgt | \n",
" education-num | \n",
" target | \n",
"
\n",
" \n",
" \n",
" \n",
" Private | \n",
" Bachelors | \n",
" Married-civ-spouse | \n",
" Exec-managerial | \n",
" Husband | \n",
" White | \n",
" False | \n",
" 1.5695 | \n",
" -0.9075 | \n",
" 1.1422 | \n",
" >=50k | \n",
"
\n",
" \n",
" Private | \n",
" Assoc-acdm | \n",
" Never-married | \n",
" Craft-repair | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" 0.0303 | \n",
" -0.7171 | \n",
" 0.7511 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" 7th-8th | \n",
" Never-married | \n",
" Farming-fishing | \n",
" Unmarried | \n",
" White | \n",
" False | \n",
" -1.2891 | \n",
" 1.4882 | \n",
" -2.3780 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" HS-grad | \n",
" Widowed | \n",
" Sales | \n",
" Unmarried | \n",
" White | \n",
" False | \n",
" 3.4020 | \n",
" -0.7050 | \n",
" -0.4223 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" Some-college | \n",
" Never-married | \n",
" Other-service | \n",
" Unmarried | \n",
" Black | \n",
" False | \n",
" 0.0303 | \n",
" -0.2917 | \n",
" -0.0312 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" Bachelors | \n",
" Divorced | \n",
" Adm-clerical | \n",
" Unmarried | \n",
" White | \n",
" False | \n",
" 1.0564 | \n",
" -0.4456 | \n",
" 1.1422 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" HS-grad | \n",
" Widowed | \n",
" Sales | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" 2.0093 | \n",
" 0.8941 | \n",
" -0.4223 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" Some-college | \n",
" Widowed | \n",
" Adm-clerical | \n",
" Unmarried | \n",
" White | \n",
" False | \n",
" -0.4095 | \n",
" -0.6688 | \n",
" -0.0312 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" Some-college | \n",
" Married-civ-spouse | \n",
" Exec-managerial | \n",
" Own-child | \n",
" Black | \n",
" False | \n",
" -0.7027 | \n",
" -0.4258 | \n",
" -0.0312 | \n",
" <50k | \n",
"
\n",
" \n",
" Private | \n",
" HS-grad | \n",
" Never-married | \n",
" Handlers-cleaners | \n",
" Not-in-family | \n",
" White | \n",
" False | \n",
" -0.9226 | \n",
" -0.7288 | \n",
" -0.4223 | \n",
" <50k | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data.show_batch(rows=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Total time: 00:03 \n",
" \n",
" \n",
" epoch | \n",
" train_loss | \n",
" valid_loss | \n",
" accuracy | \n",
" time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.371782 | \n",
" 0.411682 | \n",
" 0.830000 | \n",
" 00:03 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn = tabular_learner(data, layers=[200,100], metrics=accuracy)\n",
"learn.fit(1, 1e-2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inference"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"row = df.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Category <50k, tensor(0), tensor([0.5185, 0.4815]))"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learn.predict(row)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}