{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai.tabular import * # Quick accesss to tabular functionality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabular data should be in a Pandas `DataFrame`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.ADULT_SAMPLE)\n", "df = pd.read_csv(path/'adult.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['>=50k', '<50k'], dtype=object)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['salary'].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# function import\n", "from fastai.utils.mem import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0, 7812)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# other function teset\n", "gpu_with_max_free_mem()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# test reduce_mem_usage(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
049Private101320Assoc-acdm12.0Married-civ-spouseNaNWifeWhiteFemale0190240United-States>=50k
144Private236746Masters14.0DivorcedExec-managerialNot-in-familyWhiteMale10520045United-States>=50k
238Private96185HS-gradNaNDivorcedNaNUnmarriedBlackFemale0032United-States<50k
338Self-emp-inc112847Prof-school15.0Married-civ-spouseProf-specialtyHusbandAsian-Pac-IslanderMale0040United-States>=50k
442Self-emp-not-inc822977th-8thNaNMarried-civ-spouseOther-serviceWifeBlackFemale0050United-States<50k
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 49 Private 101320 Assoc-acdm 12.0 \n", "1 44 Private 236746 Masters 14.0 \n", "2 38 Private 96185 HS-grad NaN \n", "3 38 Self-emp-inc 112847 Prof-school 15.0 \n", "4 42 Self-emp-not-inc 82297 7th-8th NaN \n", "\n", " marital-status occupation relationship race \\\n", "0 Married-civ-spouse NaN Wife White \n", "1 Divorced Exec-managerial Not-in-family White \n", "2 Divorced NaN Unmarried Black \n", "3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n", "4 Married-civ-spouse Other-service Wife Black \n", "\n", " sex capital-gain capital-loss hours-per-week native-country salary \n", "0 Female 0 1902 40 United-States >=50k \n", "1 Male 10520 0 45 United-States >=50k \n", "2 Female 0 0 32 United-States <50k \n", "3 Male 0 0 40 United-States >=50k \n", "4 Female 0 0 50 United-States <50k " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dep_var = 'salary'\n", "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", "cont_names = ['age', 'fnlwgt', 'education-num']\n", "procs = [FillMissing, Categorify, Normalize]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", " .split_by_idx(list(range(800,1000)))\n", " .label_from_df(cols=dep_var)\n", " .add_test(test)\n", " .databunch())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
workclasseducationmarital-statusoccupationrelationshipraceeducation-num_naagefnlwgteducation-numtarget
PrivateBachelorsNever-marriedExec-managerialNot-in-familyWhiteFalse-1.1425-0.92801.1422<50k
PrivateHS-gradNever-marriedOther-serviceNot-in-familyWhiteFalse-0.55610.7244-0.4224<50k
PrivateSome-collegeNever-marriedOther-serviceOwn-childWhiteFalse-1.5090-0.1673-0.0312<50k
PrivateHS-gradDivorcedAdm-clericalOther-relativeAmer-Indian-EskimoFalse1.2763-0.8370-0.4224<50k
Local-govBachelorsDivorcedTransport-movingNot-in-familyWhiteFalse0.2502-1.36171.1422<50k
PrivateSome-collegeMarried-civ-spouseProf-specialtyHusbandWhiteFalse-0.8493-0.3286-0.0312>=50k
Private11thNever-marriedProf-specialtyOwn-childWhiteFalse-1.50900.8521-1.2046<50k
Private7th-8thMarried-civ-spouseTech-supportHusbandWhiteFalse-0.26290.0550-2.3781<50k
PrivateHS-gradNever-marriedTransport-movingNot-in-familyWhiteFalse-0.8493-0.3286-0.4224<50k
PrivateBachelorsNever-marriedAdm-clericalNot-in-familyWhiteFalse-0.77601.31591.1422<50k
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data.show_batch(rows=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:02

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_lossaccuracytime
00.3571220.3816490.79000000:02
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn = tabular_learner(data, layers=[200,100], metrics=accuracy)\n", "learn.fit(1, 1e-2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "row = df.iloc[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Category >=50k, tensor(1), tensor([0.3581, 0.6419]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }