{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai.tabular import * # Quick accesss to tabular functionality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabular data should be in a Pandas `DataFrame`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.ADULT_SAMPLE)\n", "df = pd.read_csv(path/'adult.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['>=50k', '<50k'], dtype=object)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['salary'].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# function import\n", "from fastai.utils.mem import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1, 8109)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# other function teset\n", "gpu_with_max_free_mem()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Memory usage of dataframe is 3.73 MB\n", "Memory usage after optimization is: 0.78 MB\n", "Decreased by 79.0%\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
049Private101320Assoc-acdm12.0Married-civ-spouseNaNWifeWhiteFemale0190240United-States>=50k
144Private236746Masters14.0DivorcedExec-managerialNot-in-familyWhiteMale10520045United-States>=50k
238Private96185HS-gradNaNDivorcedNaNUnmarriedBlackFemale0032United-States<50k
338Self-emp-inc112847Prof-school15.0Married-civ-spouseProf-specialtyHusbandAsian-Pac-IslanderMale0040United-States>=50k
442Self-emp-not-inc822977th-8thNaNMarried-civ-spouseOther-serviceWifeBlackFemale0050United-States<50k
520Private63210HS-grad9.0Never-marriedHandlers-cleanersOwn-childWhiteMale0015United-States<50k
649Private44434Some-college10.0DivorcedNaNOther-relativeWhiteMale0035United-States<50k
737Private13894011th7.0Married-civ-spouseNaNHusbandWhiteMale0040United-States<50k
846Private328216HS-grad9.0Married-civ-spouseCraft-repairHusbandWhiteMale0040United-States>=50k
936Self-emp-inc216711HS-gradNaNMarried-civ-spouseNaNHusbandWhiteMale99999050?>=50k
1023Private529223Bachelors13.0Never-marriedNaNOwn-childBlackMale0010United-States<50k
1118Private21628411thNaNNever-marriedAdm-clericalOwn-childWhiteFemale0020United-States<50k
1230Private151989Assoc-vocNaNMarried-civ-spouseNaNWifeWhiteFemale0040United-States<50k
1330Private55291BachelorsNaNMarried-civ-spouseNaNHusbandWhiteMale0040United-States>=50k
1443Private84661Assoc-vocNaNMarried-civ-spouseSalesHusbandWhiteMale0045United-States<50k
1551Private284329HS-grad9.0WidowedNaNUnmarriedWhiteMale0040United-States<50k
1638Private17017410thNaNMarried-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>=50k
1735Private261293Masters14.0Never-marriedNaNNot-in-familyWhiteMale0060United-States<50k
1856State-gov274111Masters14.0DivorcedNaNNot-in-familyWhiteMale0166940United-States<50k
1945Private267967BachelorsNaNMarried-civ-spouseProf-specialtyHusbandWhiteMale0045United-States>=50k
2040Private188942Some-collegeNaNMarried-civ-spouseNaNWifeBlackFemale0040Puerto-Rico<50k
2126Private746432HS-grad9.0Never-marriedHandlers-cleanersOwn-childBlackMale0048United-States<50k
2246Private1176059thNaNDivorcedSalesNot-in-familyWhiteMale0035United-States<50k
2329Private1268339HS-gradNaNMarried-spouse-absentNaNOwn-childBlackMale0040United-States<50k
2449Private247294HS-grad9.0Married-civ-spouseCraft-repairHusbandWhiteMale0045United-States>=50k
2555Self-emp-inc222615Masters14.0Married-civ-spouseExec-managerialHusbandWhiteMale0060United-States<50k
2647Self-emp-not-inc213745Some-collegeNaNDivorcedNaNUnmarriedWhiteFemale0045United-States<50k
2741Self-emp-inc151089Some-collegeNaNMarried-civ-spouseNaNHusbandWhiteMale0050United-States<50k
2827Private153078Prof-schoolNaNNever-marriedProf-specialtyOwn-childAsian-Pac-IslanderMale0040United-States<50k
2942Private7005511th7.0Married-civ-spouseNaNHusbandWhiteMale0045United-States<50k
................................................
3253125Private203871Assoc-voc11.0Married-civ-spouseProf-specialtyWifeWhiteFemale0188740United-States>=50k
3253252State-gov71344Masters14.0Married-civ-spouseProf-specialtyHusbandWhiteMale0040United-States<50k
3253319Private445728HS-grad9.0Never-marriedCraft-repairNot-in-familyWhiteMale0040United-States<50k
3253421Private222490Some-college10.0Never-marriedHandlers-cleanersOwn-childWhiteFemale0040United-States<50k
3253549Private213431HS-grad9.0SeparatedProf-specialtyUnmarriedBlackFemale0040United-States<50k
3253652Private163998HS-grad9.0Married-civ-spouseSalesHusbandWhiteMale99999045United-States>=50k
3253734Local-gov90934Assoc-voc11.0DivorcedProtective-servOwn-childAsian-Pac-IslanderMale0040United-States<50k
3253826Local-gov202286Bachelors13.0Never-marriedTech-supportOwn-childWhiteMale0040United-States<50k
3253944Private21944110th6.0Never-marriedSalesUnmarriedOtherFemale0035Dominican-Republic<50k
3254047Self-emp-not-inc162236Bachelors13.0Never-marriedCraft-repairNot-in-familyWhiteFemale0040United-States<50k
3254124Private241857Some-college10.0Never-marriedAdm-clericalNot-in-familyBlackFemale0035United-States<50k
3254234Private98283Prof-school15.0Never-marriedTech-supportNot-in-familyAsian-Pac-IslanderMale0156440India>=50k
3254338Private29874Assoc-voc11.0Married-civ-spouseCraft-repairHusbandWhiteMale0040United-States<50k
3254433Private124052HS-grad9.0Married-civ-spouseCraft-repairHusbandWhiteMale0040United-States<50k
3254533Private206609Bachelors13.0Married-civ-spouseExec-managerialHusbandWhiteMale0045United-States<50k
3254631Private188246Masters14.0Married-civ-spouseProf-specialtyHusbandWhiteMale0050United-States<50k
3254723Private267955Some-college10.0Never-marriedSalesNot-in-familyWhiteFemale0040United-States<50k
3254828Private187479Some-college10.0Married-civ-spouseExec-managerialHusbandWhiteMale0055United-States<50k
3254927Private171655HS-grad9.0Never-marriedAdm-clericalNot-in-familyWhiteFemale0042United-States<50k
3255027Private116358Some-college10.0Never-marriedCraft-repairOwn-childAsian-Pac-IslanderMale0198040Philippines<50k
3255160Private2305457th-8th4.0DivorcedAdm-clericalNot-in-familyWhiteFemale0035Cuba<50k
3255239Private139743HS-grad9.0SeparatedAdm-clericalNot-in-familyWhiteFemale0020United-States<50k
3255335Self-emp-inc135436Prof-school15.0Married-civ-spouseProf-specialtyHusbandWhiteMale0050United-States>=50k
3255453Private35102Some-college10.0DivorcedAdm-clericalNot-in-familyWhiteFemale0034United-States<50k
3255548Private355320Bachelors13.0Married-civ-spouseProf-specialtyHusbandWhiteMale0045Canada>=50k
3255636Private297449Bachelors13.0DivorcedProf-specialtyNot-in-familyWhiteMale14084040United-States>=50k
3255723?123983Bachelors13.0Never-married?Own-childOtherMale0040United-States<50k
3255853Private157069Assoc-acdm12.0Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>=50k
3255932Local-gov217296HS-grad9.0Married-civ-spouseTransport-movingWifeWhiteFemale4064022United-States<50k
3256026Private182308Some-college10.0Married-civ-spouseProf-specialtyHusbandWhiteMale0040United-States<50k
\n", "

32561 rows × 15 columns

\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 49 Private 101320 Assoc-acdm 12.0 \n", "1 44 Private 236746 Masters 14.0 \n", "2 38 Private 96185 HS-grad NaN \n", "3 38 Self-emp-inc 112847 Prof-school 15.0 \n", "4 42 Self-emp-not-inc 82297 7th-8th NaN \n", "5 20 Private 63210 HS-grad 9.0 \n", "6 49 Private 44434 Some-college 10.0 \n", "7 37 Private 138940 11th 7.0 \n", "8 46 Private 328216 HS-grad 9.0 \n", "9 36 Self-emp-inc 216711 HS-grad NaN \n", "10 23 Private 529223 Bachelors 13.0 \n", "11 18 Private 216284 11th NaN \n", "12 30 Private 151989 Assoc-voc NaN \n", "13 30 Private 55291 Bachelors NaN \n", "14 43 Private 84661 Assoc-voc NaN \n", "15 51 Private 284329 HS-grad 9.0 \n", "16 38 Private 170174 10th NaN \n", "17 35 Private 261293 Masters 14.0 \n", "18 56 State-gov 274111 Masters 14.0 \n", "19 45 Private 267967 Bachelors NaN \n", "20 40 Private 188942 Some-college NaN \n", "21 26 Private 746432 HS-grad 9.0 \n", "22 46 Private 117605 9th NaN \n", "23 29 Private 1268339 HS-grad NaN \n", "24 49 Private 247294 HS-grad 9.0 \n", "25 55 Self-emp-inc 222615 Masters 14.0 \n", "26 47 Self-emp-not-inc 213745 Some-college NaN \n", "27 41 Self-emp-inc 151089 Some-college NaN \n", "28 27 Private 153078 Prof-school NaN \n", "29 42 Private 70055 11th 7.0 \n", "... ... ... ... ... ... \n", "32531 25 Private 203871 Assoc-voc 11.0 \n", "32532 52 State-gov 71344 Masters 14.0 \n", "32533 19 Private 445728 HS-grad 9.0 \n", "32534 21 Private 222490 Some-college 10.0 \n", "32535 49 Private 213431 HS-grad 9.0 \n", "32536 52 Private 163998 HS-grad 9.0 \n", "32537 34 Local-gov 90934 Assoc-voc 11.0 \n", "32538 26 Local-gov 202286 Bachelors 13.0 \n", "32539 44 Private 219441 10th 6.0 \n", "32540 47 Self-emp-not-inc 162236 Bachelors 13.0 \n", "32541 24 Private 241857 Some-college 10.0 \n", "32542 34 Private 98283 Prof-school 15.0 \n", "32543 38 Private 29874 Assoc-voc 11.0 \n", "32544 33 Private 124052 HS-grad 9.0 \n", "32545 33 Private 206609 Bachelors 13.0 \n", "32546 31 Private 188246 Masters 14.0 \n", "32547 23 Private 267955 Some-college 10.0 \n", "32548 28 Private 187479 Some-college 10.0 \n", "32549 27 Private 171655 HS-grad 9.0 \n", "32550 27 Private 116358 Some-college 10.0 \n", "32551 60 Private 230545 7th-8th 4.0 \n", "32552 39 Private 139743 HS-grad 9.0 \n", "32553 35 Self-emp-inc 135436 Prof-school 15.0 \n", "32554 53 Private 35102 Some-college 10.0 \n", "32555 48 Private 355320 Bachelors 13.0 \n", "32556 36 Private 297449 Bachelors 13.0 \n", "32557 23 ? 123983 Bachelors 13.0 \n", "32558 53 Private 157069 Assoc-acdm 12.0 \n", "32559 32 Local-gov 217296 HS-grad 9.0 \n", "32560 26 Private 182308 Some-college 10.0 \n", "\n", " marital-status occupation relationship \\\n", "0 Married-civ-spouse NaN Wife \n", "1 Divorced Exec-managerial Not-in-family \n", "2 Divorced NaN Unmarried \n", "3 Married-civ-spouse Prof-specialty Husband \n", "4 Married-civ-spouse Other-service Wife \n", "5 Never-married Handlers-cleaners Own-child \n", "6 Divorced NaN Other-relative \n", "7 Married-civ-spouse NaN Husband \n", "8 Married-civ-spouse Craft-repair Husband \n", "9 Married-civ-spouse NaN Husband \n", "10 Never-married NaN Own-child \n", "11 Never-married Adm-clerical Own-child \n", "12 Married-civ-spouse NaN Wife \n", "13 Married-civ-spouse NaN Husband \n", "14 Married-civ-spouse Sales Husband \n", "15 Widowed NaN Unmarried \n", "16 Married-civ-spouse Machine-op-inspct Husband \n", "17 Never-married NaN Not-in-family \n", "18 Divorced NaN Not-in-family \n", "19 Married-civ-spouse Prof-specialty Husband \n", "20 Married-civ-spouse NaN Wife \n", "21 Never-married Handlers-cleaners Own-child \n", "22 Divorced Sales Not-in-family \n", "23 Married-spouse-absent NaN Own-child \n", "24 Married-civ-spouse Craft-repair Husband \n", "25 Married-civ-spouse Exec-managerial Husband \n", "26 Divorced NaN Unmarried \n", "27 Married-civ-spouse NaN Husband \n", "28 Never-married Prof-specialty Own-child \n", "29 Married-civ-spouse NaN Husband \n", "... ... ... ... \n", "32531 Married-civ-spouse Prof-specialty Wife \n", "32532 Married-civ-spouse Prof-specialty Husband \n", "32533 Never-married Craft-repair Not-in-family \n", "32534 Never-married Handlers-cleaners Own-child \n", "32535 Separated Prof-specialty Unmarried \n", "32536 Married-civ-spouse Sales Husband \n", "32537 Divorced Protective-serv Own-child \n", "32538 Never-married Tech-support Own-child \n", "32539 Never-married Sales Unmarried \n", "32540 Never-married Craft-repair Not-in-family \n", "32541 Never-married Adm-clerical Not-in-family \n", "32542 Never-married Tech-support Not-in-family \n", "32543 Married-civ-spouse Craft-repair Husband \n", "32544 Married-civ-spouse Craft-repair Husband \n", "32545 Married-civ-spouse Exec-managerial Husband \n", "32546 Married-civ-spouse Prof-specialty Husband \n", "32547 Never-married Sales Not-in-family \n", "32548 Married-civ-spouse Exec-managerial Husband \n", "32549 Never-married Adm-clerical Not-in-family \n", "32550 Never-married Craft-repair Own-child \n", "32551 Divorced Adm-clerical Not-in-family \n", "32552 Separated Adm-clerical Not-in-family \n", "32553 Married-civ-spouse Prof-specialty Husband \n", "32554 Divorced Adm-clerical Not-in-family \n", "32555 Married-civ-spouse Prof-specialty Husband \n", "32556 Divorced Prof-specialty Not-in-family \n", "32557 Never-married ? Own-child \n", "32558 Married-civ-spouse Machine-op-inspct Husband \n", "32559 Married-civ-spouse Transport-moving Wife \n", "32560 Married-civ-spouse Prof-specialty Husband \n", "\n", " race sex capital-gain capital-loss \\\n", "0 White Female 0 1902 \n", "1 White Male 10520 0 \n", "2 Black Female 0 0 \n", "3 Asian-Pac-Islander Male 0 0 \n", "4 Black Female 0 0 \n", "5 White Male 0 0 \n", "6 White Male 0 0 \n", "7 White Male 0 0 \n", "8 White Male 0 0 \n", "9 White Male 99999 0 \n", "10 Black Male 0 0 \n", "11 White Female 0 0 \n", "12 White Female 0 0 \n", "13 White Male 0 0 \n", "14 White Male 0 0 \n", "15 White Male 0 0 \n", "16 White Male 0 0 \n", "17 White Male 0 0 \n", "18 White Male 0 1669 \n", "19 White Male 0 0 \n", "20 Black Female 0 0 \n", "21 Black Male 0 0 \n", "22 White Male 0 0 \n", "23 Black Male 0 0 \n", "24 White Male 0 0 \n", "25 White Male 0 0 \n", "26 White Female 0 0 \n", "27 White Male 0 0 \n", "28 Asian-Pac-Islander Male 0 0 \n", "29 White Male 0 0 \n", "... ... ... ... ... \n", "32531 White Female 0 1887 \n", "32532 White Male 0 0 \n", "32533 White Male 0 0 \n", "32534 White Female 0 0 \n", "32535 Black Female 0 0 \n", "32536 White Male 99999 0 \n", "32537 Asian-Pac-Islander Male 0 0 \n", "32538 White Male 0 0 \n", "32539 Other Female 0 0 \n", "32540 White Female 0 0 \n", "32541 Black Female 0 0 \n", "32542 Asian-Pac-Islander Male 0 1564 \n", "32543 White Male 0 0 \n", "32544 White Male 0 0 \n", "32545 White Male 0 0 \n", "32546 White Male 0 0 \n", "32547 White Female 0 0 \n", "32548 White Male 0 0 \n", "32549 White Female 0 0 \n", "32550 Asian-Pac-Islander Male 0 1980 \n", "32551 White Female 0 0 \n", "32552 White Female 0 0 \n", "32553 White Male 0 0 \n", "32554 White Female 0 0 \n", "32555 White Male 0 0 \n", "32556 White Male 14084 0 \n", "32557 Other Male 0 0 \n", "32558 White Male 0 0 \n", "32559 White Female 4064 0 \n", "32560 White Male 0 0 \n", "\n", " hours-per-week native-country salary \n", "0 40 United-States >=50k \n", "1 45 United-States >=50k \n", "2 32 United-States <50k \n", "3 40 United-States >=50k \n", "4 50 United-States <50k \n", "5 15 United-States <50k \n", "6 35 United-States <50k \n", "7 40 United-States <50k \n", "8 40 United-States >=50k \n", "9 50 ? >=50k \n", "10 10 United-States <50k \n", "11 20 United-States <50k \n", "12 40 United-States <50k \n", "13 40 United-States >=50k \n", "14 45 United-States <50k \n", "15 40 United-States <50k \n", "16 40 United-States >=50k \n", "17 60 United-States <50k \n", "18 40 United-States <50k \n", "19 45 United-States >=50k \n", "20 40 Puerto-Rico <50k \n", "21 48 United-States <50k \n", "22 35 United-States <50k \n", "23 40 United-States <50k \n", "24 45 United-States >=50k \n", "25 60 United-States <50k \n", "26 45 United-States <50k \n", "27 50 United-States <50k \n", "28 40 United-States <50k \n", "29 45 United-States <50k \n", "... ... ... ... \n", "32531 40 United-States >=50k \n", "32532 40 United-States <50k \n", "32533 40 United-States <50k \n", "32534 40 United-States <50k \n", "32535 40 United-States <50k \n", "32536 45 United-States >=50k \n", "32537 40 United-States <50k \n", "32538 40 United-States <50k \n", "32539 35 Dominican-Republic <50k \n", "32540 40 United-States <50k \n", "32541 35 United-States <50k \n", "32542 40 India >=50k \n", "32543 40 United-States <50k \n", "32544 40 United-States <50k \n", "32545 45 United-States <50k \n", "32546 50 United-States <50k \n", "32547 40 United-States <50k \n", "32548 55 United-States <50k \n", "32549 42 United-States <50k \n", "32550 40 Philippines <50k \n", "32551 35 Cuba <50k \n", "32552 20 United-States <50k \n", "32553 50 United-States >=50k \n", "32554 34 United-States <50k \n", "32555 45 Canada >=50k \n", "32556 40 United-States >=50k \n", "32557 40 United-States <50k \n", "32558 40 United-States >=50k \n", "32559 22 United-States <50k \n", "32560 40 United-States <50k \n", "\n", "[32561 rows x 15 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test reduce_mem_usage(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
049Private101320Assoc-acdm12.0Married-civ-spouseNaNWifeWhiteFemale0190240United-States>=50k
144Private236746Masters14.0DivorcedExec-managerialNot-in-familyWhiteMale10520045United-States>=50k
238Private96185HS-gradNaNDivorcedNaNUnmarriedBlackFemale0032United-States<50k
338Self-emp-inc112847Prof-school15.0Married-civ-spouseProf-specialtyHusbandAsian-Pac-IslanderMale0040United-States>=50k
442Self-emp-not-inc822977th-8thNaNMarried-civ-spouseOther-serviceWifeBlackFemale0050United-States<50k
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 49 Private 101320 Assoc-acdm 12.0 \n", "1 44 Private 236746 Masters 14.0 \n", "2 38 Private 96185 HS-grad NaN \n", "3 38 Self-emp-inc 112847 Prof-school 15.0 \n", "4 42 Self-emp-not-inc 82297 7th-8th NaN \n", "\n", " marital-status occupation relationship race \\\n", "0 Married-civ-spouse NaN Wife White \n", "1 Divorced Exec-managerial Not-in-family White \n", "2 Divorced NaN Unmarried Black \n", "3 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander \n", "4 Married-civ-spouse Other-service Wife Black \n", "\n", " sex capital-gain capital-loss hours-per-week native-country salary \n", "0 Female 0 1902 40 United-States >=50k \n", "1 Male 10520 0 45 United-States >=50k \n", "2 Female 0 0 32 United-States <50k \n", "3 Male 0 0 40 United-States >=50k \n", "4 Female 0 0 50 United-States <50k " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dep_var = 'salary'\n", "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", "cont_names = ['age', 'fnlwgt', 'education-num']\n", "procs = [FillMissing, Categorify, Normalize]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", " .split_by_idx(list(range(800,1000)))\n", " .label_from_df(cols=dep_var)\n", " .add_test(test)\n", " .databunch())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
workclasseducationmarital-statusoccupationrelationshipraceeducation-num_naagefnlwgteducation-numtarget
PrivateBachelorsMarried-civ-spouseExec-managerialHusbandWhiteFalse1.5695-0.90751.1422>=50k
PrivateAssoc-acdmNever-marriedCraft-repairNot-in-familyWhiteFalse0.0303-0.71710.7511<50k
Private7th-8thNever-marriedFarming-fishingUnmarriedWhiteFalse-1.28911.4882-2.3780<50k
PrivateHS-gradWidowedSalesUnmarriedWhiteFalse3.4020-0.7050-0.4223<50k
PrivateSome-collegeNever-marriedOther-serviceUnmarriedBlackFalse0.0303-0.2917-0.0312<50k
PrivateBachelorsDivorcedAdm-clericalUnmarriedWhiteFalse1.0564-0.44561.1422<50k
PrivateHS-gradWidowedSalesNot-in-familyWhiteFalse2.00930.8941-0.4223<50k
PrivateSome-collegeWidowedAdm-clericalUnmarriedWhiteFalse-0.4095-0.6688-0.0312<50k
PrivateSome-collegeMarried-civ-spouseExec-managerialOwn-childBlackFalse-0.7027-0.4258-0.0312<50k
PrivateHS-gradNever-marriedHandlers-cleanersNot-in-familyWhiteFalse-0.9226-0.7288-0.4223<50k
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data.show_batch(rows=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:03

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_lossaccuracytime
00.3717820.4116820.83000000:03
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn = tabular_learner(data, layers=[200,100], metrics=accuracy)\n", "learn.fit(1, 1e-2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "row = df.iloc[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Category <50k, tensor(0), tensor([0.5185, 0.4815]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(row)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }