{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular example" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai import * # Quick accesss to most common functionality\n", "from fastai.tabular import * # Quick accesss to tabular functionality # Access to example data provided with fastai" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabular data should be in a Pandas `DataFrame`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.ADULT_SAMPLE)\n", "df = pd.read_csv(path/'adult.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dep_var = '>=50k'\n", "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n", "cont_names = ['age', 'fnlwgt', 'education-num']\n", "procs = [FillMissing, Categorify, Normalize]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)\n", " .split_by_idx(list(range(800,1000)))\n", " .label_from_df(cols=dep_var)\n", " .add_test(test, label=0)\n", " .databunch())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
workclass | \n", "education | \n", "marital-status | \n", "occupation | \n", "relationship | \n", "race | \n", "education-num_na | \n", "age | \n", "fnlwgt | \n", "education-num | \n", "
---|---|---|---|---|---|---|---|---|---|
Private | \n", "HS-grad | \n", "Married-civ-spouse | \n", "Craft-repair | \n", "Husband | \n", "Asian-Pac-Islander | \n", "False | \n", "0.1769 | \n", "-0.1070 | \n", "-0.4224 | \n", "
Local-gov | \n", "Some-college | \n", "Never-married | \n", "Prof-specialty | \n", "Own-child | \n", "White | \n", "False | \n", "-0.0430 | \n", "0.4453 | \n", "-0.0312 | \n", "
Private | \n", "Some-college | \n", "Married-civ-spouse | \n", "Transport-moving | \n", "Husband | \n", "White | \n", "False | \n", "0.9098 | \n", "1.1638 | \n", "-0.0312 | \n", "
Private | \n", "Some-college | \n", "Married-civ-spouse | \n", "Adm-clerical | \n", "Wife | \n", "White | \n", "False | \n", "-0.9959 | \n", "-1.0879 | \n", "-0.0312 | \n", "
? | \n", "HS-grad | \n", "Widowed | \n", "? | \n", "Unmarried | \n", "White | \n", "False | \n", "2.0093 | \n", "-0.9140 | \n", "-0.4224 | \n", "
Private | \n", "HS-grad | \n", "Divorced | \n", "Adm-clerical | \n", "Not-in-family | \n", "White | \n", "False | \n", "0.9831 | \n", "-0.6828 | \n", "-0.4224 | \n", "
Private | \n", "HS-grad | \n", "Married-civ-spouse | \n", "Craft-repair | \n", "Husband | \n", "White | \n", "False | \n", "-0.4828 | \n", "-1.3949 | \n", "-0.4224 | \n", "
Private | \n", "Bachelors | \n", "Never-married | \n", "Prof-specialty | \n", "Not-in-family | \n", "White | \n", "False | \n", "0.4701 | \n", "-0.2632 | \n", "1.1422 | \n", "
Private | \n", "Assoc-acdm | \n", "Married-civ-spouse | \n", "Adm-clerical | \n", "Husband | \n", "Asian-Pac-Islander | \n", "False | \n", "0.5434 | \n", "-0.1371 | \n", "0.7511 | \n", "
Private | \n", "Bachelors | \n", "Married-civ-spouse | \n", "Exec-managerial | \n", "Husband | \n", "White | \n", "False | \n", "-0.0430 | \n", "0.0111 | \n", "1.1422 | \n", "