{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai.tabular import * # Quick accesss to tabular functionality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabular data should be in a Pandas `DataFrame`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.ADULT_SAMPLE)\n", "df = pd.read_csv(path/'adult.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['>=50k', '<50k'], dtype=object)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['salary'].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
049Private101320Assoc-acdm12.0Married-civ-spouseNaNWifeWhiteFemale0190240United-States>=50k
144Private236746Masters14.0DivorcedExec-managerialNot-in-familyWhiteMale10520045United-States>=50k
238Private96185HS-gradNaNDivorcedNaNUnmarriedBlackFemale0032United-States<50k
338Self-emp-inc112847Prof-school15.0Married-civ-spouseProf-specialtyHusbandAsian-Pac-IslanderMale0040United-States>=50k
442Self-emp-not-inc822977th-8thNaNMarried-civ-spouseOther-serviceWifeBlackFemale0050United-States<50k
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 49 Private 101320 Assoc-acdm 12.0 \n", "1 44 Private 236746 Masters 14.0 \n", "2 38 Private 96185 HS-grad NaN \n", "3 38 Self-emp-inc 112847 Prof-school 15.0 \n", "4 42 Self-emp-not-inc 82297 7th-8th NaN \n", "\n", " marital-status occupation relationship race \\\n", "0 Married-civ-spouse NaN Wife White \n", "1 Divorced Exec-managerial Not-in-family White \n", "2 Divorced NaN Unmarried Black \n", "3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n", "4 Married-civ-spouse Other-service Wife Black \n", "\n", " sex capital-gain capital-loss hours-per-week native-country salary \n", "0 Female 0 1902 40 United-States >=50k \n", "1 Male 10520 0 45 United-States >=50k \n", "2 Female 0 0 32 United-States <50k \n", "3 Male 0 0 40 United-States >=50k \n", "4 Female 0 0 50 United-States <50k " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dep_var = 'salary'\n", "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", "cont_names = ['age', 'fnlwgt', 'education-num']\n", "procs = [FillMissing, Categorify, Normalize]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", " .split_by_idx(list(range(800,1000)))\n", " .label_from_df(cols=dep_var)\n", " .add_test(test)\n", " .databunch())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
workclasseducationmarital-statusoccupationrelationshipraceeducation-num_naagefnlwgteducation-numtarget
Private Some-college Never-married Handlers-cleaners Own-child WhiteFalse-1.50900.2360-0.0312<50k
Private Some-college Divorced Exec-managerial Not-in-family WhiteFalse-0.48280.0802-0.0312<50k
Local-gov HS-grad Separated Other-service Not-in-family BlackFalse0.25020.3442-0.4224<50k
Private Assoc-voc Married-civ-spouse Craft-repair Husband WhiteFalse0.9098-0.85950.3599<50k
Private HS-grad Never-married Sales Not-in-family WhiteFalse-1.0692-0.6139-0.4224<50k
Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial Husband WhiteFalse-0.33620.22291.1422<50k
Private Bachelors Never-married Farming-fishing Own-child WhiteFalse-0.9959-0.82711.1422>=50k
State-gov Bachelors Divorced Adm-clerical Unmarried WhiteFalse0.6899-1.31761.1422<50k
Private HS-grad Married-civ-spouse Adm-clerical Wife BlackFalse-1.0692-1.3076-0.4224<50k
Private Masters Married-civ-spouse Exec-managerial Husband Asian-Pac-IslanderFalse-0.3362-0.85181.5334>=50k
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data.show_batch(rows=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:04

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_lossaccuracy
10.3580960.3700090.830000
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn = tabular_learner(data, layers=[200,100], metrics=accuracy)\n", "learn.fit(1, 1e-2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "row = df.iloc[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Category <50k, tensor(0), tensor([0.5142, 0.4858]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(row)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }