{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular models" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai import *\n", "from fastai.tabular import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabular data should be in a Pandas `DataFrame`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.ADULT_SAMPLE)\n", "df = pd.read_csv(path/'adult.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dep_var = '>=50k'\n", "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", "cont_names = ['age', 'fnlwgt', 'education-num']\n", "procs = [FillMissing, Categorify, Normalize]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", " .split_by_idx(list(range(800,1000)))\n", " .label_from_df(cols=dep_var)\n", " .add_test(test, label=0)\n", " .databunch())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
workclass | \n", "education | \n", "marital-status | \n", "occupation | \n", "relationship | \n", "race | \n", "education-num_na | \n", "age | \n", "fnlwgt | \n", "education-num | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|
Private | \n", "Prof-school | \n", "Married-civ-spouse | \n", "Prof-specialty | \n", "Husband | \n", "White | \n", "False | \n", "0.1036 | \n", "0.9224 | \n", "1.9245 | \n", "1 | \n", "
Self-emp-inc | \n", "Bachelors | \n", "Married-civ-spouse | \n", "Farming-fishing | \n", "Husband | \n", "White | \n", "False | \n", "1.7161 | \n", "-1.2654 | \n", "1.1422 | \n", "1 | \n", "
Private | \n", "HS-grad | \n", "Never-married | \n", "Adm-clerical | \n", "Other-relative | \n", "Black | \n", "False | \n", "-0.7760 | \n", "1.1905 | \n", "-0.4224 | \n", "0 | \n", "
Private | \n", "10th | \n", "Married-civ-spouse | \n", "Sales | \n", "Own-child | \n", "White | \n", "False | \n", "-1.5823 | \n", "-0.0268 | \n", "-1.5958 | \n", "0 | \n", "
Private | \n", "Some-college | \n", "Never-married | \n", "Handlers-cleaners | \n", "Own-child | \n", "White | \n", "False | \n", "-1.3624 | \n", "0.0284 | \n", "-0.0312 | \n", "0 | \n", "
Private | \n", "Some-college | \n", "Married-civ-spouse | \n", "Prof-specialty | \n", "Husband | \n", "White | \n", "False | \n", "0.3968 | \n", "0.4367 | \n", "-0.0312 | \n", "1 | \n", "
? | \n", "Some-college | \n", "Never-married | \n", "? | \n", "Own-child | \n", "White | \n", "False | \n", "-1.4357 | \n", "-0.7295 | \n", "-0.0312 | \n", "0 | \n", "
Self-emp-not-inc | \n", "5th-6th | \n", "Married-civ-spouse | \n", "Sales | \n", "Husband | \n", "White | \n", "False | \n", "0.6166 | \n", "-0.6503 | \n", "-2.7692 | \n", "1 | \n", "
Private | \n", "Some-college | \n", "Married-civ-spouse | \n", "Sales | \n", "Husband | \n", "White | \n", "False | \n", "1.5695 | \n", "-0.8876 | \n", "-0.0312 | \n", "1 | \n", "
Local-gov | \n", "Some-college | \n", "Never-married | \n", "Handlers-cleaners | \n", "Own-child | \n", "White | \n", "False | \n", "-0.6294 | \n", "-1.5422 | \n", "-0.0312 | \n", "0 | \n", "