{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predictive Models"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## \"Housekeeping\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.linear_model import Lasso, LinearRegression, Ridge\n",
    "from sklearn.metrics import (\n",
    "    make_scorer,\n",
    "    mean_absolute_error,\n",
    "    mean_squared_error,\n",
    "    r2_score,\n",
    ")\n",
    "from sklearn.model_selection import GridSearchCV, KFold\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.utils import shuffle\n",
    "from tqdm import tqdm as progress_bar\n",
    "\n",
    "from utils import (\n",
    "    CONTINUOUS_VARIABLES,\n",
    "    DISCRETE_VARIABLES,\n",
    "    NOMINAL_VARIABLES,\n",
    "    ORDINAL_VARIABLES,\n",
    "    TARGET_VARIABLES,\n",
    "    bias_score,\n",
    "    encode_ordinals,\n",
    "    load_clean_data,\n",
    "    max_deviation,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "random_state = np.random.RandomState(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option(\"display.max_columns\", 250)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load the Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Original Data\n",
    "\n",
    "The DataFrame `df1` holds the cleaned data from notebook 1 with the all the nominal and ordinal features automatically translated to factor variables and ordered integer values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = load_clean_data(\"data/data_clean.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This cell basically **replaces** all the manual work that went into generating new and identifying \"interesting\" features in notebooks 2 and 3."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.concat([\n",
    "    df1[CONTINUOUS_VARIABLES + DISCRETE_VARIABLES + ORDINAL_VARIABLES + TARGET_VARIABLES],\n",
    "    pd.get_dummies(df1[NOMINAL_VARIABLES], dtype=int),\n",
    "], axis=1)\n",
    "# Re-order the columns for convenience.\n",
    "df1 = df1[sorted(set(df1.columns) - set(TARGET_VARIABLES)) + TARGET_VARIABLES]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = encode_ordinals(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = shuffle(df1, random_state=random_state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "MultiIndex: 2898 entries, (np.int64(144), np.int64(535153070)) to (np.int64(867), np.int64(907253130))\n",
      "Columns: 248 entries, 1st Flr SF to SalePrice\n",
      "dtypes: float64(19), int64(229)\n",
      "memory usage: 5.7 MB\n"
     ]
    }
   ],
   "source": [
    "df1.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>1st Flr SF</th>\n",
       "      <th>2nd Flr SF</th>\n",
       "      <th>3Ssn Porch</th>\n",
       "      <th>Alley_Grvl</th>\n",
       "      <th>Alley_NA</th>\n",
       "      <th>Alley_Pave</th>\n",
       "      <th>Bedroom AbvGr</th>\n",
       "      <th>Bldg Type_1Fam</th>\n",
       "      <th>Bldg Type_2FmCon</th>\n",
       "      <th>Bldg Type_Duplx</th>\n",
       "      <th>Bldg Type_TwnhsE</th>\n",
       "      <th>Bldg Type_TwnhsI</th>\n",
       "      <th>Bsmt Cond</th>\n",
       "      <th>Bsmt Exposure</th>\n",
       "      <th>Bsmt Full Bath</th>\n",
       "      <th>Bsmt Half Bath</th>\n",
       "      <th>Bsmt Qual</th>\n",
       "      <th>Bsmt Unf SF</th>\n",
       "      <th>BsmtFin SF 1</th>\n",
       "      <th>BsmtFin SF 2</th>\n",
       "      <th>BsmtFin Type 1</th>\n",
       "      <th>BsmtFin Type 2</th>\n",
       "      <th>Central Air_N</th>\n",
       "      <th>Central Air_Y</th>\n",
       "      <th>Condition 1_Artery</th>\n",
       "      <th>Condition 1_Feedr</th>\n",
       "      <th>Condition 1_Norm</th>\n",
       "      <th>Condition 1_PosA</th>\n",
       "      <th>Condition 1_PosN</th>\n",
       "      <th>Condition 1_RRAe</th>\n",
       "      <th>Condition 1_RRAn</th>\n",
       "      <th>Condition 1_RRNe</th>\n",
       "      <th>Condition 1_RRNn</th>\n",
       "      <th>Condition 2_Artery</th>\n",
       "      <th>Condition 2_Feedr</th>\n",
       "      <th>Condition 2_Norm</th>\n",
       "      <th>Condition 2_PosA</th>\n",
       "      <th>Condition 2_PosN</th>\n",
       "      <th>Condition 2_RRAe</th>\n",
       "      <th>Condition 2_RRAn</th>\n",
       "      <th>Condition 2_RRNe</th>\n",
       "      <th>Condition 2_RRNn</th>\n",
       "      <th>Electrical</th>\n",
       "      <th>Enclosed Porch</th>\n",
       "      <th>Exter Cond</th>\n",
       "      <th>Exter Qual</th>\n",
       "      <th>Exterior 1st_AsbShng</th>\n",
       "      <th>Exterior 1st_AsphShn</th>\n",
       "      <th>Exterior 1st_BrkComm</th>\n",
       "      <th>Exterior 1st_BrkFace</th>\n",
       "      <th>Exterior 1st_CBlock</th>\n",
       "      <th>Exterior 1st_CemntBd</th>\n",
       "      <th>Exterior 1st_HdBoard</th>\n",
       "      <th>Exterior 1st_ImStucc</th>\n",
       "      <th>Exterior 1st_MetalSd</th>\n",
       "      <th>Exterior 1st_Other</th>\n",
       "      <th>Exterior 1st_Plywood</th>\n",
       "      <th>Exterior 1st_PreCast</th>\n",
       "      <th>Exterior 1st_Stone</th>\n",
       "      <th>Exterior 1st_Stucco</th>\n",
       "      <th>Exterior 1st_VinylSd</th>\n",
       "      <th>Exterior 1st_Wd Sdng</th>\n",
       "      <th>Exterior 1st_WdShing</th>\n",
       "      <th>Exterior 2nd_AsbShng</th>\n",
       "      <th>Exterior 2nd_AsphShn</th>\n",
       "      <th>Exterior 2nd_BrkComm</th>\n",
       "      <th>Exterior 2nd_BrkFace</th>\n",
       "      <th>Exterior 2nd_CBlock</th>\n",
       "      <th>Exterior 2nd_CemntBd</th>\n",
       "      <th>Exterior 2nd_HdBoard</th>\n",
       "      <th>Exterior 2nd_ImStucc</th>\n",
       "      <th>Exterior 2nd_MetalSd</th>\n",
       "      <th>Exterior 2nd_Other</th>\n",
       "      <th>Exterior 2nd_Plywood</th>\n",
       "      <th>Exterior 2nd_PreCast</th>\n",
       "      <th>Exterior 2nd_Stone</th>\n",
       "      <th>Exterior 2nd_Stucco</th>\n",
       "      <th>Exterior 2nd_VinylSd</th>\n",
       "      <th>Exterior 2nd_Wd Sdng</th>\n",
       "      <th>Exterior 2nd_WdShing</th>\n",
       "      <th>Fence</th>\n",
       "      <th>Fireplace Qu</th>\n",
       "      <th>Fireplaces</th>\n",
       "      <th>Foundation_BrkTil</th>\n",
       "      <th>Foundation_CBlock</th>\n",
       "      <th>Foundation_PConc</th>\n",
       "      <th>Foundation_Slab</th>\n",
       "      <th>Foundation_Stone</th>\n",
       "      <th>Foundation_Wood</th>\n",
       "      <th>Full Bath</th>\n",
       "      <th>Functional</th>\n",
       "      <th>Garage Area</th>\n",
       "      <th>Garage Cars</th>\n",
       "      <th>Garage Cond</th>\n",
       "      <th>Garage Finish</th>\n",
       "      <th>Garage Qual</th>\n",
       "      <th>Garage Type_2Types</th>\n",
       "      <th>Garage Type_Attchd</th>\n",
       "      <th>Garage Type_Basment</th>\n",
       "      <th>Garage Type_BuiltIn</th>\n",
       "      <th>Garage Type_CarPort</th>\n",
       "      <th>Garage Type_Detchd</th>\n",
       "      <th>Garage Type_NA</th>\n",
       "      <th>Gr Liv Area</th>\n",
       "      <th>Half Bath</th>\n",
       "      <th>Heating QC</th>\n",
       "      <th>Heating_Floor</th>\n",
       "      <th>Heating_GasA</th>\n",
       "      <th>Heating_GasW</th>\n",
       "      <th>Heating_Grav</th>\n",
       "      <th>Heating_OthW</th>\n",
       "      <th>Heating_Wall</th>\n",
       "      <th>House Style_1.5Fin</th>\n",
       "      <th>House Style_1.5Unf</th>\n",
       "      <th>House Style_1Story</th>\n",
       "      <th>House Style_2.5Fin</th>\n",
       "      <th>House Style_2.5Unf</th>\n",
       "      <th>House Style_2Story</th>\n",
       "      <th>House Style_SFoyer</th>\n",
       "      <th>House Style_SLvl</th>\n",
       "      <th>Kitchen AbvGr</th>\n",
       "      <th>Kitchen Qual</th>\n",
       "      <th>Land Contour_Bnk</th>\n",
       "      <th>Land Contour_HLS</th>\n",
       "      <th>Land Contour_Low</th>\n",
       "      <th>Land Contour_Lvl</th>\n",
       "      <th>Land Slope</th>\n",
       "      <th>Lot Area</th>\n",
       "      <th>Lot Config_Corner</th>\n",
       "      <th>Lot Config_CulDSac</th>\n",
       "      <th>Lot Config_FR2</th>\n",
       "      <th>Lot Config_FR3</th>\n",
       "      <th>Lot Config_Inside</th>\n",
       "      <th>Lot Shape</th>\n",
       "      <th>Low Qual Fin SF</th>\n",
       "      <th>MS SubClass_020</th>\n",
       "      <th>MS SubClass_030</th>\n",
       "      <th>MS SubClass_040</th>\n",
       "      <th>MS SubClass_045</th>\n",
       "      <th>MS SubClass_050</th>\n",
       "      <th>MS SubClass_060</th>\n",
       "      <th>MS SubClass_070</th>\n",
       "      <th>MS SubClass_075</th>\n",
       "      <th>MS SubClass_080</th>\n",
       "      <th>MS SubClass_085</th>\n",
       "      <th>MS SubClass_090</th>\n",
       "      <th>MS SubClass_120</th>\n",
       "      <th>MS SubClass_150</th>\n",
       "      <th>MS SubClass_160</th>\n",
       "      <th>MS SubClass_180</th>\n",
       "      <th>MS SubClass_190</th>\n",
       "      <th>MS Zoning_A</th>\n",
       "      <th>MS Zoning_C</th>\n",
       "      <th>MS Zoning_FV</th>\n",
       "      <th>MS Zoning_I</th>\n",
       "      <th>MS Zoning_RH</th>\n",
       "      <th>MS Zoning_RL</th>\n",
       "      <th>MS Zoning_RM</th>\n",
       "      <th>MS Zoning_RP</th>\n",
       "      <th>Mas Vnr Area</th>\n",
       "      <th>Mas Vnr Type_BrkCmn</th>\n",
       "      <th>Mas Vnr Type_BrkFace</th>\n",
       "      <th>Mas Vnr Type_CBlock</th>\n",
       "      <th>Mas Vnr Type_None</th>\n",
       "      <th>Mas Vnr Type_Stone</th>\n",
       "      <th>Misc Feature_Elev</th>\n",
       "      <th>Misc Feature_Gar2</th>\n",
       "      <th>Misc Feature_NA</th>\n",
       "      <th>Misc Feature_Othr</th>\n",
       "      <th>Misc Feature_Shed</th>\n",
       "      <th>Misc Feature_TenC</th>\n",
       "      <th>Misc Val</th>\n",
       "      <th>Mo Sold</th>\n",
       "      <th>Neighborhood_Blmngtn</th>\n",
       "      <th>Neighborhood_Blueste</th>\n",
       "      <th>Neighborhood_BrDale</th>\n",
       "      <th>Neighborhood_BrkSide</th>\n",
       "      <th>Neighborhood_ClearCr</th>\n",
       "      <th>Neighborhood_CollgCr</th>\n",
       "      <th>Neighborhood_Crawfor</th>\n",
       "      <th>Neighborhood_Edwards</th>\n",
       "      <th>Neighborhood_Gilbert</th>\n",
       "      <th>Neighborhood_Greens</th>\n",
       "      <th>Neighborhood_GrnHill</th>\n",
       "      <th>Neighborhood_IDOTRR</th>\n",
       "      <th>Neighborhood_Landmrk</th>\n",
       "      <th>Neighborhood_MeadowV</th>\n",
       "      <th>Neighborhood_Mitchel</th>\n",
       "      <th>Neighborhood_NPkVill</th>\n",
       "      <th>Neighborhood_NWAmes</th>\n",
       "      <th>Neighborhood_Names</th>\n",
       "      <th>Neighborhood_NoRidge</th>\n",
       "      <th>Neighborhood_NridgHt</th>\n",
       "      <th>Neighborhood_OldTown</th>\n",
       "      <th>Neighborhood_SWISU</th>\n",
       "      <th>Neighborhood_Sawyer</th>\n",
       "      <th>Neighborhood_SawyerW</th>\n",
       "      <th>Neighborhood_Somerst</th>\n",
       "      <th>Neighborhood_StoneBr</th>\n",
       "      <th>Neighborhood_Timber</th>\n",
       "      <th>Neighborhood_Veenker</th>\n",
       "      <th>Open Porch SF</th>\n",
       "      <th>Overall Cond</th>\n",
       "      <th>Overall Qual</th>\n",
       "      <th>Paved Drive</th>\n",
       "      <th>Pool Area</th>\n",
       "      <th>Pool QC</th>\n",
       "      <th>Roof Matl_ClyTile</th>\n",
       "      <th>Roof Matl_CompShg</th>\n",
       "      <th>Roof Matl_Membran</th>\n",
       "      <th>Roof Matl_Metal</th>\n",
       "      <th>Roof Matl_Roll</th>\n",
       "      <th>Roof Matl_Tar&amp;Grv</th>\n",
       "      <th>Roof Matl_WdShake</th>\n",
       "      <th>Roof Matl_WdShngl</th>\n",
       "      <th>Roof Style_Flat</th>\n",
       "      <th>Roof Style_Gable</th>\n",
       "      <th>Roof Style_Gambrel</th>\n",
       "      <th>Roof Style_Hip</th>\n",
       "      <th>Roof Style_Mansard</th>\n",
       "      <th>Roof Style_Shed</th>\n",
       "      <th>Sale Condition_Abnorml</th>\n",
       "      <th>Sale Condition_AdjLand</th>\n",
       "      <th>Sale Condition_Alloca</th>\n",
       "      <th>Sale Condition_Family</th>\n",
       "      <th>Sale Condition_Normal</th>\n",
       "      <th>Sale Condition_Partial</th>\n",
       "      <th>Sale Type_COD</th>\n",
       "      <th>Sale Type_CWD</th>\n",
       "      <th>Sale Type_Con</th>\n",
       "      <th>Sale Type_ConLD</th>\n",
       "      <th>Sale Type_ConLI</th>\n",
       "      <th>Sale Type_ConLw</th>\n",
       "      <th>Sale Type_New</th>\n",
       "      <th>Sale Type_Oth</th>\n",
       "      <th>Sale Type_VWD</th>\n",
       "      <th>Sale Type_WD</th>\n",
       "      <th>Screen Porch</th>\n",
       "      <th>Street_Grvl</th>\n",
       "      <th>Street_Pave</th>\n",
       "      <th>TotRms AbvGrd</th>\n",
       "      <th>Total Bsmt SF</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>Wood Deck SF</th>\n",
       "      <th>Year Built</th>\n",
       "      <th>Year Remod/Add</th>\n",
       "      <th>Yr Sold</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Order</th>\n",
       "      <th>PID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <th>535153070</th>\n",
       "      <td>1194.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1194.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>120.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>312.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1194.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>8760.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>220.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>1194.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1959</td>\n",
       "      <td>1959</td>\n",
       "      <td>2010</td>\n",
       "      <td>148000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1574</th>\n",
       "      <th>916380060</th>\n",
       "      <td>1537.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>482.0</td>\n",
       "      <td>1036.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>788.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1537.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>11563.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>258.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1518.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2006</td>\n",
       "      <td>2007</td>\n",
       "      <td>2008</td>\n",
       "      <td>294000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>490</th>\n",
       "      <th>528290190</th>\n",
       "      <td>774.0</td>\n",
       "      <td>656.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>384.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>400.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1430.0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>7750.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>384.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1999</td>\n",
       "      <td>2000</td>\n",
       "      <td>2009</td>\n",
       "      <td>156000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1730</th>\n",
       "      <th>528218050</th>\n",
       "      <td>783.0</td>\n",
       "      <td>701.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>783.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>393.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1484.0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>10237.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>783.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2005</td>\n",
       "      <td>2007</td>\n",
       "      <td>2007</td>\n",
       "      <td>178900.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2276</th>\n",
       "      <th>921128030</th>\n",
       "      <td>1824.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1824.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>932.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1824.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>12633.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1824.0</td>\n",
       "      <td>3</td>\n",
       "      <td>160.0</td>\n",
       "      <td>2006</td>\n",
       "      <td>2007</td>\n",
       "      <td>2007</td>\n",
       "      <td>392000.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 1st Flr SF  2nd Flr SF  3Ssn Porch  Alley_Grvl  Alley_NA  \\\n",
       "Order PID                                                                   \n",
       "144   535153070      1194.0         0.0         0.0           0         1   \n",
       "1574  916380060      1537.0         0.0         0.0           0         1   \n",
       "490   528290190       774.0       656.0         0.0           0         1   \n",
       "1730  528218050       783.0       701.0         0.0           0         1   \n",
       "2276  921128030      1824.0         0.0         0.0           0         1   \n",
       "\n",
       "                 Alley_Pave  Bedroom AbvGr  Bldg Type_1Fam  Bldg Type_2FmCon  \\\n",
       "Order PID                                                                      \n",
       "144   535153070           0              3               1                 0   \n",
       "1574  916380060           0              3               1                 0   \n",
       "490   528290190           0              3               1                 0   \n",
       "1730  528218050           0              3               1                 0   \n",
       "2276  921128030           0              3               1                 0   \n",
       "\n",
       "                 Bldg Type_Duplx  Bldg Type_TwnhsE  Bldg Type_TwnhsI  \\\n",
       "Order PID                                                              \n",
       "144   535153070                0                 0                 0   \n",
       "1574  916380060                0                 0                 0   \n",
       "490   528290190                0                 0                 0   \n",
       "1730  528218050                0                 0                 0   \n",
       "2276  921128030                0                 0                 0   \n",
       "\n",
       "                 Bsmt Cond  Bsmt Exposure  Bsmt Full Bath  Bsmt Half Bath  \\\n",
       "Order PID                                                                   \n",
       "144   535153070          3              1               1               0   \n",
       "1574  916380060          3              4               1               0   \n",
       "490   528290190          3              1               0               0   \n",
       "1730  528218050          3              1               0               0   \n",
       "2276  921128030          3              4               0               0   \n",
       "\n",
       "                 Bsmt Qual  Bsmt Unf SF  BsmtFin SF 1  BsmtFin SF 2  \\\n",
       "Order PID                                                             \n",
       "144   535153070          3       1194.0           0.0           0.0   \n",
       "1574  916380060          5        482.0        1036.0           0.0   \n",
       "490   528290190          4        384.0           0.0           0.0   \n",
       "1730  528218050          4        783.0           0.0           0.0   \n",
       "2276  921128030          5       1824.0           0.0           0.0   \n",
       "\n",
       "                 BsmtFin Type 1  BsmtFin Type 2  Central Air_N  Central Air_Y  \\\n",
       "Order PID                                                                       \n",
       "144   535153070               1               1              0              1   \n",
       "1574  916380060               6               1              0              1   \n",
       "490   528290190               1               1              0              1   \n",
       "1730  528218050               1               1              0              1   \n",
       "2276  921128030               1               1              0              1   \n",
       "\n",
       "                 Condition 1_Artery  Condition 1_Feedr  Condition 1_Norm  \\\n",
       "Order PID                                                                  \n",
       "144   535153070                   0                  0                 1   \n",
       "1574  916380060                   0                  0                 1   \n",
       "490   528290190                   0                  0                 1   \n",
       "1730  528218050                   0                  0                 0   \n",
       "2276  921128030                   0                  0                 0   \n",
       "\n",
       "                 Condition 1_PosA  Condition 1_PosN  Condition 1_RRAe  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                 0                 0   \n",
       "1574  916380060                 0                 0                 0   \n",
       "490   528290190                 0                 0                 0   \n",
       "1730  528218050                 0                 0                 0   \n",
       "2276  921128030                 1                 0                 0   \n",
       "\n",
       "                 Condition 1_RRAn  Condition 1_RRNe  Condition 1_RRNn  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                 0                 0   \n",
       "1574  916380060                 0                 0                 0   \n",
       "490   528290190                 0                 0                 0   \n",
       "1730  528218050                 1                 0                 0   \n",
       "2276  921128030                 0                 0                 0   \n",
       "\n",
       "                 Condition 2_Artery  Condition 2_Feedr  Condition 2_Norm  \\\n",
       "Order PID                                                                  \n",
       "144   535153070                   0                  0                 1   \n",
       "1574  916380060                   0                  0                 1   \n",
       "490   528290190                   0                  0                 1   \n",
       "1730  528218050                   0                  0                 1   \n",
       "2276  921128030                   0                  0                 0   \n",
       "\n",
       "                 Condition 2_PosA  Condition 2_PosN  Condition 2_RRAe  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                 0                 0   \n",
       "1574  916380060                 0                 0                 0   \n",
       "490   528290190                 0                 0                 0   \n",
       "1730  528218050                 0                 0                 0   \n",
       "2276  921128030                 1                 0                 0   \n",
       "\n",
       "                 Condition 2_RRAn  Condition 2_RRNe  Condition 2_RRNn  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                 0                 0   \n",
       "1574  916380060                 0                 0                 0   \n",
       "490   528290190                 0                 0                 0   \n",
       "1730  528218050                 0                 0                 0   \n",
       "2276  921128030                 0                 0                 0   \n",
       "\n",
       "                 Electrical  Enclosed Porch  Exter Cond  Exter Qual  \\\n",
       "Order PID                                                             \n",
       "144   535153070           4           120.0           2           2   \n",
       "1574  916380060           4             0.0           2           3   \n",
       "490   528290190           4             0.0           2           2   \n",
       "1730  528218050           4             0.0           2           3   \n",
       "2276  921128030           4             0.0           2           4   \n",
       "\n",
       "                 Exterior 1st_AsbShng  Exterior 1st_AsphShn  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 1st_BrkComm  Exterior 1st_BrkFace  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 1st_CBlock  Exterior 1st_CemntBd  \\\n",
       "Order PID                                                    \n",
       "144   535153070                    0                     0   \n",
       "1574  916380060                    0                     0   \n",
       "490   528290190                    0                     0   \n",
       "1730  528218050                    0                     0   \n",
       "2276  921128030                    0                     0   \n",
       "\n",
       "                 Exterior 1st_HdBoard  Exterior 1st_ImStucc  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 1st_MetalSd  Exterior 1st_Other  \\\n",
       "Order PID                                                   \n",
       "144   535153070                     1                   0   \n",
       "1574  916380060                     0                   0   \n",
       "490   528290190                     0                   0   \n",
       "1730  528218050                     0                   0   \n",
       "2276  921128030                     1                   0   \n",
       "\n",
       "                 Exterior 1st_Plywood  Exterior 1st_PreCast  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 1st_Stone  Exterior 1st_Stucco  \\\n",
       "Order PID                                                  \n",
       "144   535153070                   0                    0   \n",
       "1574  916380060                   0                    0   \n",
       "490   528290190                   0                    0   \n",
       "1730  528218050                   0                    0   \n",
       "2276  921128030                   0                    0   \n",
       "\n",
       "                 Exterior 1st_VinylSd  Exterior 1st_Wd Sdng  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     1                     0   \n",
       "490   528290190                     1                     0   \n",
       "1730  528218050                     1                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 1st_WdShing  Exterior 2nd_AsbShng  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 2nd_AsphShn  Exterior 2nd_BrkComm  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 2nd_BrkFace  Exterior 2nd_CBlock  \\\n",
       "Order PID                                                    \n",
       "144   535153070                     0                    0   \n",
       "1574  916380060                     0                    0   \n",
       "490   528290190                     0                    0   \n",
       "1730  528218050                     0                    0   \n",
       "2276  921128030                     0                    0   \n",
       "\n",
       "                 Exterior 2nd_CemntBd  Exterior 2nd_HdBoard  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Exterior 2nd_ImStucc  Exterior 2nd_MetalSd  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     1   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     1   \n",
       "\n",
       "                 Exterior 2nd_Other  Exterior 2nd_Plywood  \\\n",
       "Order PID                                                   \n",
       "144   535153070                   0                     0   \n",
       "1574  916380060                   0                     0   \n",
       "490   528290190                   0                     0   \n",
       "1730  528218050                   0                     0   \n",
       "2276  921128030                   0                     0   \n",
       "\n",
       "                 Exterior 2nd_PreCast  Exterior 2nd_Stone  \\\n",
       "Order PID                                                   \n",
       "144   535153070                     0                   0   \n",
       "1574  916380060                     0                   0   \n",
       "490   528290190                     0                   0   \n",
       "1730  528218050                     0                   0   \n",
       "2276  921128030                     0                   0   \n",
       "\n",
       "                 Exterior 2nd_Stucco  Exterior 2nd_VinylSd  \\\n",
       "Order PID                                                    \n",
       "144   535153070                    0                     0   \n",
       "1574  916380060                    0                     1   \n",
       "490   528290190                    0                     1   \n",
       "1730  528218050                    0                     1   \n",
       "2276  921128030                    0                     0   \n",
       "\n",
       "                 Exterior 2nd_Wd Sdng  Exterior 2nd_WdShing  Fence  \\\n",
       "Order PID                                                            \n",
       "144   535153070                     0                     0      0   \n",
       "1574  916380060                     0                     0      0   \n",
       "490   528290190                     0                     0      0   \n",
       "1730  528218050                     0                     0      0   \n",
       "2276  921128030                     0                     0      0   \n",
       "\n",
       "                 Fireplace Qu  Fireplaces  Foundation_BrkTil  \\\n",
       "Order PID                                                      \n",
       "144   535153070             0           0                  0   \n",
       "1574  916380060             0           0                  0   \n",
       "490   528290190             3           1                  0   \n",
       "1730  528218050             4           1                  0   \n",
       "2276  921128030             4           1                  0   \n",
       "\n",
       "                 Foundation_CBlock  Foundation_PConc  Foundation_Slab  \\\n",
       "Order PID                                                               \n",
       "144   535153070                  1                 0                0   \n",
       "1574  916380060                  0                 1                0   \n",
       "490   528290190                  0                 1                0   \n",
       "1730  528218050                  0                 1                0   \n",
       "2276  921128030                  0                 1                0   \n",
       "\n",
       "                 Foundation_Stone  Foundation_Wood  Full Bath  Functional  \\\n",
       "Order PID                                                                   \n",
       "144   535153070                 0                0          1           7   \n",
       "1574  916380060                 0                0          2           7   \n",
       "490   528290190                 0                0          2           7   \n",
       "1730  528218050                 0                0          2           7   \n",
       "2276  921128030                 0                0          2           7   \n",
       "\n",
       "                 Garage Area  Garage Cars  Garage Cond  Garage Finish  \\\n",
       "Order PID                                                               \n",
       "144   535153070        312.0            1            3              2   \n",
       "1574  916380060        788.0            3            3              3   \n",
       "490   528290190        400.0            2            3              2   \n",
       "1730  528218050        393.0            2            3              3   \n",
       "2276  921128030        932.0            3            3              3   \n",
       "\n",
       "                 Garage Qual  Garage Type_2Types  Garage Type_Attchd  \\\n",
       "Order PID                                                              \n",
       "144   535153070            3                   0                   1   \n",
       "1574  916380060            3                   0                   1   \n",
       "490   528290190            3                   0                   0   \n",
       "1730  528218050            3                   0                   1   \n",
       "2276  921128030            3                   0                   1   \n",
       "\n",
       "                 Garage Type_Basment  Garage Type_BuiltIn  \\\n",
       "Order PID                                                   \n",
       "144   535153070                    0                    0   \n",
       "1574  916380060                    0                    0   \n",
       "490   528290190                    0                    1   \n",
       "1730  528218050                    0                    0   \n",
       "2276  921128030                    0                    0   \n",
       "\n",
       "                 Garage Type_CarPort  Garage Type_Detchd  Garage Type_NA  \\\n",
       "Order PID                                                                  \n",
       "144   535153070                    0                   0               0   \n",
       "1574  916380060                    0                   0               0   \n",
       "490   528290190                    0                   0               0   \n",
       "1730  528218050                    0                   0               0   \n",
       "2276  921128030                    0                   0               0   \n",
       "\n",
       "                 Gr Liv Area  Half Bath  Heating QC  Heating_Floor  \\\n",
       "Order PID                                                            \n",
       "144   535153070       1194.0          0           2              0   \n",
       "1574  916380060       1537.0          0           4              0   \n",
       "490   528290190       1430.0          1           4              0   \n",
       "1730  528218050       1484.0          1           4              0   \n",
       "2276  921128030       1824.0          0           4              0   \n",
       "\n",
       "                 Heating_GasA  Heating_GasW  Heating_Grav  Heating_OthW  \\\n",
       "Order PID                                                                 \n",
       "144   535153070             1             0             0             0   \n",
       "1574  916380060             1             0             0             0   \n",
       "490   528290190             1             0             0             0   \n",
       "1730  528218050             1             0             0             0   \n",
       "2276  921128030             1             0             0             0   \n",
       "\n",
       "                 Heating_Wall  House Style_1.5Fin  House Style_1.5Unf  \\\n",
       "Order PID                                                               \n",
       "144   535153070             0                   0                   0   \n",
       "1574  916380060             0                   0                   0   \n",
       "490   528290190             0                   0                   0   \n",
       "1730  528218050             0                   0                   0   \n",
       "2276  921128030             0                   0                   0   \n",
       "\n",
       "                 House Style_1Story  House Style_2.5Fin  House Style_2.5Unf  \\\n",
       "Order PID                                                                     \n",
       "144   535153070                   1                   0                   0   \n",
       "1574  916380060                   1                   0                   0   \n",
       "490   528290190                   0                   0                   0   \n",
       "1730  528218050                   0                   0                   0   \n",
       "2276  921128030                   1                   0                   0   \n",
       "\n",
       "                 House Style_2Story  House Style_SFoyer  House Style_SLvl  \\\n",
       "Order PID                                                                   \n",
       "144   535153070                   0                   0                 0   \n",
       "1574  916380060                   0                   0                 0   \n",
       "490   528290190                   0                   0                 1   \n",
       "1730  528218050                   1                   0                 0   \n",
       "2276  921128030                   0                   0                 0   \n",
       "\n",
       "                 Kitchen AbvGr  Kitchen Qual  Land Contour_Bnk  \\\n",
       "Order PID                                                        \n",
       "144   535153070              1             2                 0   \n",
       "1574  916380060              1             3                 0   \n",
       "490   528290190              1             2                 0   \n",
       "1730  528218050              1             3                 0   \n",
       "2276  921128030              1             4                 0   \n",
       "\n",
       "                 Land Contour_HLS  Land Contour_Low  Land Contour_Lvl  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                 0                 1   \n",
       "1574  916380060                 1                 0                 0   \n",
       "490   528290190                 0                 0                 1   \n",
       "1730  528218050                 0                 0                 1   \n",
       "2276  921128030                 1                 0                 0   \n",
       "\n",
       "                 Land Slope  Lot Area  Lot Config_Corner  Lot Config_CulDSac  \\\n",
       "Order PID                                                                      \n",
       "144   535153070           2    8760.0                  0                   0   \n",
       "1574  916380060           2   11563.0                  0                   0   \n",
       "490   528290190           2    7750.0                  0                   0   \n",
       "1730  528218050           2   10237.0                  0                   0   \n",
       "2276  921128030           2   12633.0                  0                   0   \n",
       "\n",
       "                 Lot Config_FR2  Lot Config_FR3  Lot Config_Inside  Lot Shape  \\\n",
       "Order PID                                                                       \n",
       "144   535153070               0               0                  1          3   \n",
       "1574  916380060               0               0                  1          2   \n",
       "490   528290190               0               0                  1          3   \n",
       "1730  528218050               0               0                  1          3   \n",
       "2276  921128030               0               0                  1          2   \n",
       "\n",
       "                 Low Qual Fin SF  MS SubClass_020  MS SubClass_030  \\\n",
       "Order PID                                                            \n",
       "144   535153070              0.0                1                0   \n",
       "1574  916380060              0.0                1                0   \n",
       "490   528290190              0.0                0                0   \n",
       "1730  528218050              0.0                0                0   \n",
       "2276  921128030              0.0                1                0   \n",
       "\n",
       "                 MS SubClass_040  MS SubClass_045  MS SubClass_050  \\\n",
       "Order PID                                                            \n",
       "144   535153070                0                0                0   \n",
       "1574  916380060                0                0                0   \n",
       "490   528290190                0                0                0   \n",
       "1730  528218050                0                0                0   \n",
       "2276  921128030                0                0                0   \n",
       "\n",
       "                 MS SubClass_060  MS SubClass_070  MS SubClass_075  \\\n",
       "Order PID                                                            \n",
       "144   535153070                0                0                0   \n",
       "1574  916380060                0                0                0   \n",
       "490   528290190                0                0                0   \n",
       "1730  528218050                1                0                0   \n",
       "2276  921128030                0                0                0   \n",
       "\n",
       "                 MS SubClass_080  MS SubClass_085  MS SubClass_090  \\\n",
       "Order PID                                                            \n",
       "144   535153070                0                0                0   \n",
       "1574  916380060                0                0                0   \n",
       "490   528290190                1                0                0   \n",
       "1730  528218050                0                0                0   \n",
       "2276  921128030                0                0                0   \n",
       "\n",
       "                 MS SubClass_120  MS SubClass_150  MS SubClass_160  \\\n",
       "Order PID                                                            \n",
       "144   535153070                0                0                0   \n",
       "1574  916380060                0                0                0   \n",
       "490   528290190                0                0                0   \n",
       "1730  528218050                0                0                0   \n",
       "2276  921128030                0                0                0   \n",
       "\n",
       "                 MS SubClass_180  MS SubClass_190  MS Zoning_A  MS Zoning_C  \\\n",
       "Order PID                                                                     \n",
       "144   535153070                0                0            0            0   \n",
       "1574  916380060                0                0            0            0   \n",
       "490   528290190                0                0            0            0   \n",
       "1730  528218050                0                0            0            0   \n",
       "2276  921128030                0                0            0            0   \n",
       "\n",
       "                 MS Zoning_FV  MS Zoning_I  MS Zoning_RH  MS Zoning_RL  \\\n",
       "Order PID                                                                \n",
       "144   535153070             0            0             0             1   \n",
       "1574  916380060             0            0             0             1   \n",
       "490   528290190             0            0             0             1   \n",
       "1730  528218050             0            0             0             1   \n",
       "2276  921128030             0            0             0             1   \n",
       "\n",
       "                 MS Zoning_RM  MS Zoning_RP  Mas Vnr Area  \\\n",
       "Order PID                                                   \n",
       "144   535153070             0             0         220.0   \n",
       "1574  916380060             0             0         258.0   \n",
       "490   528290190             0             0           0.0   \n",
       "1730  528218050             0             0           0.0   \n",
       "2276  921128030             0             0         242.0   \n",
       "\n",
       "                 Mas Vnr Type_BrkCmn  Mas Vnr Type_BrkFace  \\\n",
       "Order PID                                                    \n",
       "144   535153070                    0                     1   \n",
       "1574  916380060                    0                     0   \n",
       "490   528290190                    0                     0   \n",
       "1730  528218050                    0                     0   \n",
       "2276  921128030                    0                     1   \n",
       "\n",
       "                 Mas Vnr Type_CBlock  Mas Vnr Type_None  Mas Vnr Type_Stone  \\\n",
       "Order PID                                                                     \n",
       "144   535153070                    0                  0                   0   \n",
       "1574  916380060                    0                  0                   1   \n",
       "490   528290190                    0                  1                   0   \n",
       "1730  528218050                    0                  1                   0   \n",
       "2276  921128030                    0                  0                   0   \n",
       "\n",
       "                 Misc Feature_Elev  Misc Feature_Gar2  Misc Feature_NA  \\\n",
       "Order PID                                                                \n",
       "144   535153070                  0                  0                1   \n",
       "1574  916380060                  0                  0                1   \n",
       "490   528290190                  0                  0                1   \n",
       "1730  528218050                  0                  0                1   \n",
       "2276  921128030                  0                  0                1   \n",
       "\n",
       "                 Misc Feature_Othr  Misc Feature_Shed  Misc Feature_TenC  \\\n",
       "Order PID                                                                  \n",
       "144   535153070                  0                  0                  0   \n",
       "1574  916380060                  0                  0                  0   \n",
       "490   528290190                  0                  0                  0   \n",
       "1730  528218050                  0                  0                  0   \n",
       "2276  921128030                  0                  0                  0   \n",
       "\n",
       "                 Misc Val  Mo Sold  Neighborhood_Blmngtn  \\\n",
       "Order PID                                                  \n",
       "144   535153070       0.0        4                     0   \n",
       "1574  916380060       0.0        4                     0   \n",
       "490   528290190       0.0        3                     0   \n",
       "1730  528218050       0.0        7                     0   \n",
       "2276  921128030       0.0        9                     0   \n",
       "\n",
       "                 Neighborhood_Blueste  Neighborhood_BrDale  \\\n",
       "Order PID                                                    \n",
       "144   535153070                     0                    0   \n",
       "1574  916380060                     0                    0   \n",
       "490   528290190                     0                    0   \n",
       "1730  528218050                     0                    0   \n",
       "2276  921128030                     0                    0   \n",
       "\n",
       "                 Neighborhood_BrkSide  Neighborhood_ClearCr  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_CollgCr  Neighborhood_Crawfor  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_Edwards  Neighborhood_Gilbert  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     1   \n",
       "1730  528218050                     0                     1   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_Greens  Neighborhood_GrnHill  \\\n",
       "Order PID                                                    \n",
       "144   535153070                    0                     0   \n",
       "1574  916380060                    0                     0   \n",
       "490   528290190                    0                     0   \n",
       "1730  528218050                    0                     0   \n",
       "2276  921128030                    0                     0   \n",
       "\n",
       "                 Neighborhood_IDOTRR  Neighborhood_Landmrk  \\\n",
       "Order PID                                                    \n",
       "144   535153070                    0                     0   \n",
       "1574  916380060                    0                     0   \n",
       "490   528290190                    0                     0   \n",
       "1730  528218050                    0                     0   \n",
       "2276  921128030                    0                     0   \n",
       "\n",
       "                 Neighborhood_MeadowV  Neighborhood_Mitchel  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_NPkVill  Neighborhood_NWAmes  \\\n",
       "Order PID                                                    \n",
       "144   535153070                     0                    0   \n",
       "1574  916380060                     0                    0   \n",
       "490   528290190                     0                    0   \n",
       "1730  528218050                     0                    0   \n",
       "2276  921128030                     0                    0   \n",
       "\n",
       "                 Neighborhood_Names  Neighborhood_NoRidge  \\\n",
       "Order PID                                                   \n",
       "144   535153070                   1                     0   \n",
       "1574  916380060                   0                     0   \n",
       "490   528290190                   0                     0   \n",
       "1730  528218050                   0                     0   \n",
       "2276  921128030                   0                     0   \n",
       "\n",
       "                 Neighborhood_NridgHt  Neighborhood_OldTown  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_SWISU  Neighborhood_Sawyer  \\\n",
       "Order PID                                                  \n",
       "144   535153070                   0                    0   \n",
       "1574  916380060                   0                    0   \n",
       "490   528290190                   0                    0   \n",
       "1730  528218050                   0                    0   \n",
       "2276  921128030                   0                    0   \n",
       "\n",
       "                 Neighborhood_SawyerW  Neighborhood_Somerst  \\\n",
       "Order PID                                                     \n",
       "144   535153070                     0                     0   \n",
       "1574  916380060                     0                     0   \n",
       "490   528290190                     0                     0   \n",
       "1730  528218050                     0                     0   \n",
       "2276  921128030                     0                     0   \n",
       "\n",
       "                 Neighborhood_StoneBr  Neighborhood_Timber  \\\n",
       "Order PID                                                    \n",
       "144   535153070                     0                    0   \n",
       "1574  916380060                     0                    1   \n",
       "490   528290190                     0                    0   \n",
       "1730  528218050                     0                    0   \n",
       "2276  921128030                     0                    1   \n",
       "\n",
       "                 Neighborhood_Veenker  Open Porch SF  Overall Cond  \\\n",
       "Order PID                                                            \n",
       "144   535153070                     0            0.0             5   \n",
       "1574  916380060                     0           26.0             4   \n",
       "490   528290190                     0            0.0             4   \n",
       "1730  528218050                     0           72.0             4   \n",
       "2276  921128030                     0           36.0             4   \n",
       "\n",
       "                 Overall Qual  Paved Drive  Pool Area  Pool QC  \\\n",
       "Order PID                                                        \n",
       "144   535153070             5            2        0.0        0   \n",
       "1574  916380060             7            2        0.0        0   \n",
       "490   528290190             6            2        0.0        0   \n",
       "1730  528218050             5            2        0.0        0   \n",
       "2276  921128030             9            2        0.0        0   \n",
       "\n",
       "                 Roof Matl_ClyTile  Roof Matl_CompShg  Roof Matl_Membran  \\\n",
       "Order PID                                                                  \n",
       "144   535153070                  0                  1                  0   \n",
       "1574  916380060                  0                  1                  0   \n",
       "490   528290190                  0                  1                  0   \n",
       "1730  528218050                  0                  1                  0   \n",
       "2276  921128030                  0                  1                  0   \n",
       "\n",
       "                 Roof Matl_Metal  Roof Matl_Roll  Roof Matl_Tar&Grv  \\\n",
       "Order PID                                                             \n",
       "144   535153070                0               0                  0   \n",
       "1574  916380060                0               0                  0   \n",
       "490   528290190                0               0                  0   \n",
       "1730  528218050                0               0                  0   \n",
       "2276  921128030                0               0                  0   \n",
       "\n",
       "                 Roof Matl_WdShake  Roof Matl_WdShngl  Roof Style_Flat  \\\n",
       "Order PID                                                                \n",
       "144   535153070                  0                  0                0   \n",
       "1574  916380060                  0                  0                0   \n",
       "490   528290190                  0                  0                0   \n",
       "1730  528218050                  0                  0                0   \n",
       "2276  921128030                  0                  0                0   \n",
       "\n",
       "                 Roof Style_Gable  Roof Style_Gambrel  Roof Style_Hip  \\\n",
       "Order PID                                                               \n",
       "144   535153070                 0                   0               1   \n",
       "1574  916380060                 0                   0               1   \n",
       "490   528290190                 1                   0               0   \n",
       "1730  528218050                 1                   0               0   \n",
       "2276  921128030                 0                   0               1   \n",
       "\n",
       "                 Roof Style_Mansard  Roof Style_Shed  Sale Condition_Abnorml  \\\n",
       "Order PID                                                                      \n",
       "144   535153070                   0                0                       0   \n",
       "1574  916380060                   0                0                       0   \n",
       "490   528290190                   0                0                       0   \n",
       "1730  528218050                   0                0                       0   \n",
       "2276  921128030                   0                0                       0   \n",
       "\n",
       "                 Sale Condition_AdjLand  Sale Condition_Alloca  \\\n",
       "Order PID                                                        \n",
       "144   535153070                       0                      0   \n",
       "1574  916380060                       0                      0   \n",
       "490   528290190                       0                      0   \n",
       "1730  528218050                       0                      0   \n",
       "2276  921128030                       0                      0   \n",
       "\n",
       "                 Sale Condition_Family  Sale Condition_Normal  \\\n",
       "Order PID                                                       \n",
       "144   535153070                      0                      1   \n",
       "1574  916380060                      0                      1   \n",
       "490   528290190                      0                      1   \n",
       "1730  528218050                      0                      0   \n",
       "2276  921128030                      0                      0   \n",
       "\n",
       "                 Sale Condition_Partial  Sale Type_COD  Sale Type_CWD  \\\n",
       "Order PID                                                               \n",
       "144   535153070                       0              0              0   \n",
       "1574  916380060                       0              0              0   \n",
       "490   528290190                       0              0              0   \n",
       "1730  528218050                       1              0              0   \n",
       "2276  921128030                       1              0              0   \n",
       "\n",
       "                 Sale Type_Con  Sale Type_ConLD  Sale Type_ConLI  \\\n",
       "Order PID                                                          \n",
       "144   535153070              0                0                0   \n",
       "1574  916380060              0                0                0   \n",
       "490   528290190              0                0                0   \n",
       "1730  528218050              0                0                0   \n",
       "2276  921128030              0                0                0   \n",
       "\n",
       "                 Sale Type_ConLw  Sale Type_New  Sale Type_Oth  Sale Type_VWD  \\\n",
       "Order PID                                                                       \n",
       "144   535153070                0              0              0              0   \n",
       "1574  916380060                0              0              0              0   \n",
       "490   528290190                0              0              0              0   \n",
       "1730  528218050                0              1              0              0   \n",
       "2276  921128030                0              1              0              0   \n",
       "\n",
       "                 Sale Type_WD  Screen Porch  Street_Grvl  Street_Pave  \\\n",
       "Order PID                                                               \n",
       "144   535153070             1           0.0            0            1   \n",
       "1574  916380060             1           0.0            0            1   \n",
       "490   528290190             1           0.0            0            1   \n",
       "1730  528218050             0           0.0            0            1   \n",
       "2276  921128030             0         108.0            0            1   \n",
       "\n",
       "                 TotRms AbvGrd  Total Bsmt SF  Utilities  Wood Deck SF  \\\n",
       "Order PID                                                                \n",
       "144   535153070              6         1194.0          3           0.0   \n",
       "1574  916380060              8         1518.0          3           0.0   \n",
       "490   528290190              7          384.0          3           0.0   \n",
       "1730  528218050              8          783.0          3           0.0   \n",
       "2276  921128030              8         1824.0          3         160.0   \n",
       "\n",
       "                 Year Built  Year Remod/Add  Yr Sold  SalePrice  \n",
       "Order PID                                                        \n",
       "144   535153070        1959            1959     2010   148000.0  \n",
       "1574  916380060        2006            2007     2008   294000.0  \n",
       "490   528290190        1999            2000     2009   156000.0  \n",
       "1730  528218050        2005            2007     2007   178900.0  \n",
       "2276  921128030        2006            2007     2007   392000.0  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtain the raw numpy arrays:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1 = df1.drop(columns=\"SalePrice\").values\n",
    "y1 = df1[\"SalePrice\"].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Improved Data\n",
    "\n",
    "The DataFrame `df2` holds the data as manually processed in notebooks 2 and 3.\n",
    "\n",
    "New features like the *years_since_\\** columns were generated or derived from other variables like *has 2nd Flr* (from the continuous *2nd Flr SF*). Further, factor variables were created taking into account patterns in the visualizations. For example, *Bldg Type*'s (from `df1`) five categories were condensed into just three. In summary, `df2` has less than half as many dimensions as `df1` to accomodate for a potential curse of dimensionality."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = load_clean_data(\"data/data_clean_with_transformations_and_factors.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = encode_ordinals(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = shuffle(df2, random_state=random_state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "MultiIndex: 2893 entries, (np.int64(976), np.int64(923226270)) to (np.int64(2650), np.int64(902128100))\n",
      "Columns: 109 entries, 1st Flr SF to SalePrice (box-cox-0)\n",
      "dtypes: float64(27), int64(82)\n",
      "memory usage: 2.6 MB\n"
     ]
    }
   ],
   "source": [
    "df2.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>1st Flr SF</th>\n",
       "      <th>1st Flr SF (box-cox-0)</th>\n",
       "      <th>2nd Flr SF</th>\n",
       "      <th>3Ssn Porch</th>\n",
       "      <th>Bedroom AbvGr</th>\n",
       "      <th>Bsmt Cond</th>\n",
       "      <th>Bsmt Exposure</th>\n",
       "      <th>Bsmt Full Bath</th>\n",
       "      <th>Bsmt Half Bath</th>\n",
       "      <th>Bsmt Qual</th>\n",
       "      <th>Bsmt Unf SF</th>\n",
       "      <th>BsmtFin SF 1</th>\n",
       "      <th>BsmtFin SF 2</th>\n",
       "      <th>BsmtFin Type 1</th>\n",
       "      <th>BsmtFin Type 2</th>\n",
       "      <th>Electrical</th>\n",
       "      <th>Enclosed Porch</th>\n",
       "      <th>Fence</th>\n",
       "      <th>Fireplace Qu</th>\n",
       "      <th>Fireplaces</th>\n",
       "      <th>Full Bath</th>\n",
       "      <th>Functional</th>\n",
       "      <th>Garage Area</th>\n",
       "      <th>Garage Cars</th>\n",
       "      <th>Garage Cond</th>\n",
       "      <th>Garage Finish</th>\n",
       "      <th>Garage Qual</th>\n",
       "      <th>Gr Liv Area</th>\n",
       "      <th>Gr Liv Area (box-cox-0)</th>\n",
       "      <th>Half Bath</th>\n",
       "      <th>Kitchen AbvGr</th>\n",
       "      <th>Kitchen Qual</th>\n",
       "      <th>Land Slope</th>\n",
       "      <th>Lot Area</th>\n",
       "      <th>Lot Area (box-cox-0.1)</th>\n",
       "      <th>Lot Shape</th>\n",
       "      <th>Low Qual Fin SF</th>\n",
       "      <th>Mas Vnr Area</th>\n",
       "      <th>Misc Val</th>\n",
       "      <th>Mo Sold</th>\n",
       "      <th>Open Porch SF</th>\n",
       "      <th>Overall Cond</th>\n",
       "      <th>Overall Qual</th>\n",
       "      <th>Paved Drive</th>\n",
       "      <th>Pool Area</th>\n",
       "      <th>Pool QC</th>\n",
       "      <th>Screen Porch</th>\n",
       "      <th>TotRms AbvGrd</th>\n",
       "      <th>Total Bath</th>\n",
       "      <th>Total Bsmt SF</th>\n",
       "      <th>Total Porch SF</th>\n",
       "      <th>Total SF</th>\n",
       "      <th>Total SF (box-cox-0.2)</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>Wood Deck SF</th>\n",
       "      <th>abnormal_sale</th>\n",
       "      <th>air_cond</th>\n",
       "      <th>build_type_1Fam</th>\n",
       "      <th>build_type_2Fam</th>\n",
       "      <th>build_type_Twnhs</th>\n",
       "      <th>found_BrkTil</th>\n",
       "      <th>found_CBlock</th>\n",
       "      <th>found_PConc</th>\n",
       "      <th>has 2nd Flr</th>\n",
       "      <th>has Bsmt</th>\n",
       "      <th>has Fireplace</th>\n",
       "      <th>has Garage</th>\n",
       "      <th>has Pool</th>\n",
       "      <th>has Porch</th>\n",
       "      <th>major_street</th>\n",
       "      <th>new_home</th>\n",
       "      <th>nhood_Blmngtn</th>\n",
       "      <th>nhood_Blueste</th>\n",
       "      <th>nhood_BrDale</th>\n",
       "      <th>nhood_BrkSide</th>\n",
       "      <th>nhood_ClearCr</th>\n",
       "      <th>nhood_CollgCr</th>\n",
       "      <th>nhood_Crawfor</th>\n",
       "      <th>nhood_Edwards</th>\n",
       "      <th>nhood_Gilbert</th>\n",
       "      <th>nhood_Greens</th>\n",
       "      <th>nhood_GrnHill</th>\n",
       "      <th>nhood_IDOTRR</th>\n",
       "      <th>nhood_Landmrk</th>\n",
       "      <th>nhood_MeadowV</th>\n",
       "      <th>nhood_Mitchel</th>\n",
       "      <th>nhood_NPkVill</th>\n",
       "      <th>nhood_NWAmes</th>\n",
       "      <th>nhood_Names</th>\n",
       "      <th>nhood_NoRidge</th>\n",
       "      <th>nhood_NridgHt</th>\n",
       "      <th>nhood_OldTown</th>\n",
       "      <th>nhood_SWISU</th>\n",
       "      <th>nhood_Sawyer</th>\n",
       "      <th>nhood_SawyerW</th>\n",
       "      <th>nhood_Somerst</th>\n",
       "      <th>nhood_StoneBr</th>\n",
       "      <th>nhood_Timber</th>\n",
       "      <th>nhood_Veenker</th>\n",
       "      <th>park</th>\n",
       "      <th>partial_sale</th>\n",
       "      <th>railway</th>\n",
       "      <th>recently_built</th>\n",
       "      <th>recently_remodeled</th>\n",
       "      <th>remodeled</th>\n",
       "      <th>years_since_built</th>\n",
       "      <th>years_since_remodeled</th>\n",
       "      <th>SalePrice</th>\n",
       "      <th>SalePrice (box-cox-0)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Order</th>\n",
       "      <th>PID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <th>923226270</th>\n",
       "      <td>630.0</td>\n",
       "      <td>6.445720</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>115.0</td>\n",
       "      <td>515.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>286.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>630.0</td>\n",
       "      <td>6.445720</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1526.0</td>\n",
       "      <td>10.813995</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>2.0</td>\n",
       "      <td>630.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1260.0</td>\n",
       "      <td>15.847026</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>39</td>\n",
       "      <td>7</td>\n",
       "      <td>86000.0</td>\n",
       "      <td>11.362103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1112</th>\n",
       "      <th>528427070</th>\n",
       "      <td>894.0</td>\n",
       "      <td>6.795706</td>\n",
       "      <td>1039.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>894.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>668.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1933.0</td>\n",
       "      <td>7.566828</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>14598.0</td>\n",
       "      <td>16.087312</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>18.0</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "      <td>2.5</td>\n",
       "      <td>894.0</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2827.0</td>\n",
       "      <td>19.503897</td>\n",
       "      <td>3</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>214000.0</td>\n",
       "      <td>12.273731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>819</th>\n",
       "      <th>906340100</th>\n",
       "      <td>1680.0</td>\n",
       "      <td>7.426549</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>534.0</td>\n",
       "      <td>1021.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1138.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1680.0</td>\n",
       "      <td>7.426549</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>10933.0</td>\n",
       "      <td>15.343929</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>242.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7</td>\n",
       "      <td>24.0</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1555.0</td>\n",
       "      <td>209.0</td>\n",
       "      <td>3235.0</td>\n",
       "      <td>20.173573</td>\n",
       "      <td>3</td>\n",
       "      <td>185.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>323262.0</td>\n",
       "      <td>12.686218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1651</th>\n",
       "      <th>527327050</th>\n",
       "      <td>1118.0</td>\n",
       "      <td>7.019297</td>\n",
       "      <td>912.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>692.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>551.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2030.0</td>\n",
       "      <td>7.615791</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>12046.0</td>\n",
       "      <td>15.590825</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>298.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>224.0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8</td>\n",
       "      <td>2.5</td>\n",
       "      <td>848.0</td>\n",
       "      <td>224.0</td>\n",
       "      <td>2878.0</td>\n",
       "      <td>19.591678</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>31</td>\n",
       "      <td>31</td>\n",
       "      <td>195000.0</td>\n",
       "      <td>12.180755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1140</th>\n",
       "      <th>531382090</th>\n",
       "      <td>754.0</td>\n",
       "      <td>6.625392</td>\n",
       "      <td>855.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>392.0</td>\n",
       "      <td>362.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>525.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1609.0</td>\n",
       "      <td>7.383368</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>8453.0</td>\n",
       "      <td>14.700235</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>70.0</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "      <td>2.5</td>\n",
       "      <td>754.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>2363.0</td>\n",
       "      <td>18.640832</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>182000.0</td>\n",
       "      <td>12.111762</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 1st Flr SF  1st Flr SF (box-cox-0)  2nd Flr SF  3Ssn Porch  \\\n",
       "Order PID                                                                     \n",
       "976   923226270       630.0                6.445720         0.0         0.0   \n",
       "1112  528427070       894.0                6.795706      1039.0         0.0   \n",
       "819   906340100      1680.0                7.426549         0.0         0.0   \n",
       "1651  527327050      1118.0                7.019297       912.0         0.0   \n",
       "1140  531382090       754.0                6.625392       855.0         0.0   \n",
       "\n",
       "                 Bedroom AbvGr  Bsmt Cond  Bsmt Exposure  Bsmt Full Bath  \\\n",
       "Order PID                                                                  \n",
       "976   923226270              1          3              3               1   \n",
       "1112  528427070              4          3              2               0   \n",
       "819   906340100              1          3              4               1   \n",
       "1651  527327050              4          3              1               0   \n",
       "1140  531382090              3          3              1               0   \n",
       "\n",
       "                 Bsmt Half Bath  Bsmt Qual  Bsmt Unf SF  BsmtFin SF 1  \\\n",
       "Order PID                                                               \n",
       "976   923226270               0          4        115.0         515.0   \n",
       "1112  528427070               0          4        894.0           0.0   \n",
       "819   906340100               0          5        534.0        1021.0   \n",
       "1651  527327050               0          3        692.0         156.0   \n",
       "1140  531382090               0          4        392.0         362.0   \n",
       "\n",
       "                 BsmtFin SF 2  BsmtFin Type 1  BsmtFin Type 2  Electrical  \\\n",
       "Order PID                                                                   \n",
       "976   923226270           0.0               6               1           4   \n",
       "1112  528427070           0.0               1               1           4   \n",
       "819   906340100           0.0               6               1           4   \n",
       "1651  527327050           0.0               2               1           4   \n",
       "1140  531382090           0.0               6               1           4   \n",
       "\n",
       "                 Enclosed Porch  Fence  Fireplace Qu  Fireplaces  Full Bath  \\\n",
       "Order PID                                                                     \n",
       "976   923226270             0.0      0             0           0          1   \n",
       "1112  528427070             0.0      0             4           1          2   \n",
       "819   906340100             0.0      0             4           1          1   \n",
       "1651  527327050             0.0      0             3           1          2   \n",
       "1140  531382090             0.0      0             0           0          2   \n",
       "\n",
       "                 Functional  Garage Area  Garage Cars  Garage Cond  \\\n",
       "Order PID                                                            \n",
       "976   923226270           7        286.0            1            3   \n",
       "1112  528427070           7        668.0            3            3   \n",
       "819   906340100           7       1138.0            3            3   \n",
       "1651  527327050           7        551.0            2            3   \n",
       "1140  531382090           7        525.0            2            3   \n",
       "\n",
       "                 Garage Finish  Garage Qual  Gr Liv Area  \\\n",
       "Order PID                                                  \n",
       "976   923226270              1            3        630.0   \n",
       "1112  528427070              3            3       1933.0   \n",
       "819   906340100              3            3       1680.0   \n",
       "1651  527327050              3            3       2030.0   \n",
       "1140  531382090              2            3       1609.0   \n",
       "\n",
       "                 Gr Liv Area (box-cox-0)  Half Bath  Kitchen AbvGr  \\\n",
       "Order PID                                                            \n",
       "976   923226270                 6.445720          0              1   \n",
       "1112  528427070                 7.566828          1              1   \n",
       "819   906340100                 7.426549          1              1   \n",
       "1651  527327050                 7.615791          1              1   \n",
       "1140  531382090                 7.383368          1              1   \n",
       "\n",
       "                 Kitchen Qual  Land Slope  Lot Area  Lot Area (box-cox-0.1)  \\\n",
       "Order PID                                                                     \n",
       "976   923226270             3           2    1526.0               10.813995   \n",
       "1112  528427070             3           2   14598.0               16.087312   \n",
       "819   906340100             4           2   10933.0               15.343929   \n",
       "1651  527327050             3           2   12046.0               15.590825   \n",
       "1140  531382090             3           2    8453.0               14.700235   \n",
       "\n",
       "                 Lot Shape  Low Qual Fin SF  Mas Vnr Area  Misc Val  Mo Sold  \\\n",
       "Order PID                                                                      \n",
       "976   923226270          3              0.0           0.0       0.0        5   \n",
       "1112  528427070          2              0.0          74.0       0.0        1   \n",
       "819   906340100          3              0.0         242.0       0.0        7   \n",
       "1651  527327050          2              0.0         298.0       0.0        6   \n",
       "1140  531382090          2              0.0          38.0       0.0        4   \n",
       "\n",
       "                 Open Porch SF  Overall Cond  Overall Qual  Paved Drive  \\\n",
       "Order PID                                                                 \n",
       "976   923226270            0.0             7             3            2   \n",
       "1112  528427070           18.0             4             5            2   \n",
       "819   906340100           24.0             4             8            2   \n",
       "1651  527327050          224.0             5             5            2   \n",
       "1140  531382090           70.0             4             5            2   \n",
       "\n",
       "                 Pool Area  Pool QC  Screen Porch  TotRms AbvGrd  Total Bath  \\\n",
       "Order PID                                                                      \n",
       "976   923226270        0.0        0           0.0              3         2.0   \n",
       "1112  528427070        0.0        0           0.0              9         2.5   \n",
       "819   906340100        0.0        0           0.0              8         2.5   \n",
       "1651  527327050        0.0        0           0.0              8         2.5   \n",
       "1140  531382090        0.0        0           0.0              6         2.5   \n",
       "\n",
       "                 Total Bsmt SF  Total Porch SF  Total SF  \\\n",
       "Order PID                                                  \n",
       "976   923226270          630.0             0.0    1260.0   \n",
       "1112  528427070          894.0           118.0    2827.0   \n",
       "819   906340100         1555.0           209.0    3235.0   \n",
       "1651  527327050          848.0           224.0    2878.0   \n",
       "1140  531382090          754.0            70.0    2363.0   \n",
       "\n",
       "                 Total SF (box-cox-0.2)  Utilities  Wood Deck SF  \\\n",
       "Order PID                                                          \n",
       "976   923226270               15.847026          3           0.0   \n",
       "1112  528427070               19.503897          3         100.0   \n",
       "819   906340100               20.173573          3         185.0   \n",
       "1651  527327050               19.591678          3           0.0   \n",
       "1140  531382090               18.640832          3           0.0   \n",
       "\n",
       "                 abnormal_sale  air_cond  build_type_1Fam  build_type_2Fam  \\\n",
       "Order PID                                                                    \n",
       "976   923226270              0         1                0                0   \n",
       "1112  528427070              0         1                1                0   \n",
       "819   906340100              0         1                1                0   \n",
       "1651  527327050              0         1                1                0   \n",
       "1140  531382090              0         1                1                0   \n",
       "\n",
       "                 build_type_Twnhs  found_BrkTil  found_CBlock  found_PConc  \\\n",
       "Order PID                                                                    \n",
       "976   923226270                 1             0             1            0   \n",
       "1112  528427070                 0             0             0            1   \n",
       "819   906340100                 0             0             0            1   \n",
       "1651  527327050                 0             0             1            0   \n",
       "1140  531382090                 0             0             0            1   \n",
       "\n",
       "                 has 2nd Flr  has Bsmt  has Fireplace  has Garage  has Pool  \\\n",
       "Order PID                                                                     \n",
       "976   923226270            0         1              0           1         0   \n",
       "1112  528427070            1         1              1           1         0   \n",
       "819   906340100            0         1              1           1         0   \n",
       "1651  527327050            1         1              1           1         0   \n",
       "1140  531382090            1         1              0           1         0   \n",
       "\n",
       "                 has Porch  major_street  new_home  nhood_Blmngtn  \\\n",
       "Order PID                                                           \n",
       "976   923226270          0             0         0              0   \n",
       "1112  528427070          1             1         0              0   \n",
       "819   906340100          1             0         1              0   \n",
       "1651  527327050          1             0         0              0   \n",
       "1140  531382090          1             0         0              0   \n",
       "\n",
       "                 nhood_Blueste  nhood_BrDale  nhood_BrkSide  nhood_ClearCr  \\\n",
       "Order PID                                                                    \n",
       "976   923226270              0             0              0              0   \n",
       "1112  528427070              0             0              0              0   \n",
       "819   906340100              0             0              0              0   \n",
       "1651  527327050              0             0              0              0   \n",
       "1140  531382090              0             0              0              0   \n",
       "\n",
       "                 nhood_CollgCr  nhood_Crawfor  nhood_Edwards  nhood_Gilbert  \\\n",
       "Order PID                                                                     \n",
       "976   923226270              0              0              0              0   \n",
       "1112  528427070              0              0              0              0   \n",
       "819   906340100              1              0              0              0   \n",
       "1651  527327050              0              0              0              0   \n",
       "1140  531382090              0              0              0              0   \n",
       "\n",
       "                 nhood_Greens  nhood_GrnHill  nhood_IDOTRR  nhood_Landmrk  \\\n",
       "Order PID                                                                   \n",
       "976   923226270             0              0             0              0   \n",
       "1112  528427070             0              0             0              0   \n",
       "819   906340100             0              0             0              0   \n",
       "1651  527327050             0              0             0              0   \n",
       "1140  531382090             0              0             0              0   \n",
       "\n",
       "                 nhood_MeadowV  nhood_Mitchel  nhood_NPkVill  nhood_NWAmes  \\\n",
       "Order PID                                                                    \n",
       "976   923226270              1              0              0             0   \n",
       "1112  528427070              0              0              0             0   \n",
       "819   906340100              0              0              0             0   \n",
       "1651  527327050              0              0              0             1   \n",
       "1140  531382090              0              0              0             0   \n",
       "\n",
       "                 nhood_Names  nhood_NoRidge  nhood_NridgHt  nhood_OldTown  \\\n",
       "Order PID                                                                   \n",
       "976   923226270            0              0              0              0   \n",
       "1112  528427070            0              0              0              0   \n",
       "819   906340100            0              0              0              0   \n",
       "1651  527327050            0              0              0              0   \n",
       "1140  531382090            0              0              0              0   \n",
       "\n",
       "                 nhood_SWISU  nhood_Sawyer  nhood_SawyerW  nhood_Somerst  \\\n",
       "Order PID                                                                  \n",
       "976   923226270            0             0              0              0   \n",
       "1112  528427070            0             0              0              1   \n",
       "819   906340100            0             0              0              0   \n",
       "1651  527327050            0             0              0              0   \n",
       "1140  531382090            0             0              1              0   \n",
       "\n",
       "                 nhood_StoneBr  nhood_Timber  nhood_Veenker  park  \\\n",
       "Order PID                                                           \n",
       "976   923226270              0             0              0     0   \n",
       "1112  528427070              0             0              0     0   \n",
       "819   906340100              0             0              0     0   \n",
       "1651  527327050              0             0              0     0   \n",
       "1140  531382090              0             0              0     0   \n",
       "\n",
       "                 partial_sale  railway  recently_built  recently_remodeled  \\\n",
       "Order PID                                                                    \n",
       "976   923226270             0        0               0                   1   \n",
       "1112  528427070             0        0               1                   1   \n",
       "819   906340100             1        0               1                   1   \n",
       "1651  527327050             0        0               0                   0   \n",
       "1140  531382090             0        0               0                   0   \n",
       "\n",
       "                 remodeled  years_since_built  years_since_remodeled  \\\n",
       "Order PID                                                              \n",
       "976   923226270          1                 39                      7   \n",
       "1112  528427070          0                  1                      1   \n",
       "819   906340100          0                  0                      0   \n",
       "1651  527327050          0                 31                     31   \n",
       "1140  531382090          0                 13                     13   \n",
       "\n",
       "                 SalePrice  SalePrice (box-cox-0)  \n",
       "Order PID                                          \n",
       "976   923226270    86000.0              11.362103  \n",
       "1112  528427070   214000.0              12.273731  \n",
       "819   906340100   323262.0              12.686218  \n",
       "1651  527327050   195000.0              12.180755  \n",
       "1140  531382090   182000.0              12.111762  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtain the raw numpy arrays:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2 = df2.drop(columns=[\"SalePrice\", \"SalePrice (box-cox-0)\"]).values\n",
    "y2 = df2[\"SalePrice\"].values\n",
    "y2l = df2[\"SalePrice (box-cox-0)\"].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Manual Feature Pre-Selection\n",
    "\n",
    "Also, notebook 2 collects variables that correlate either weakly ($0.33 < \\vert\\rho\\vert < 0.66$) or strongly ($\\vert\\rho\\vert > 0.66$) with the *SalePrice* (or the logarithm thereof). These variables serve as a \"naive\" feature pre-selection."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"data/correlated_variables.json\", \"r\") as file:\n",
    "    _ = json.loads(file.read())\n",
    "    weakly_correlated = _[\"weakly_correlated\"]\n",
    "    strongly_correlated = _[\"strongly_correlated\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "pre_selection = sorted(set(weakly_correlated + strongly_correlated) & set(df2.columns))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `df3` DataFrame is just a subset of `df2` (71 columns)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3 = df2[pre_selection + TARGET_VARIABLES]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "MultiIndex: 2893 entries, (np.int64(976), np.int64(923226270)) to (np.int64(2650), np.int64(902128100))\n",
      "Columns: 32 entries, 1st Flr SF to SalePrice (box-cox-0)\n",
      "dtypes: float64(16), int64(16)\n",
      "memory usage: 908.9 KB\n"
     ]
    }
   ],
   "source": [
    "df3.info(verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>1st Flr SF</th>\n",
       "      <th>1st Flr SF (box-cox-0)</th>\n",
       "      <th>Bsmt Exposure</th>\n",
       "      <th>Bsmt Qual</th>\n",
       "      <th>BsmtFin SF 1</th>\n",
       "      <th>BsmtFin Type 1</th>\n",
       "      <th>Fireplace Qu</th>\n",
       "      <th>Fireplaces</th>\n",
       "      <th>Full Bath</th>\n",
       "      <th>Garage Area</th>\n",
       "      <th>Garage Cars</th>\n",
       "      <th>Garage Cond</th>\n",
       "      <th>Garage Finish</th>\n",
       "      <th>Garage Qual</th>\n",
       "      <th>Gr Liv Area</th>\n",
       "      <th>Gr Liv Area (box-cox-0)</th>\n",
       "      <th>Half Bath</th>\n",
       "      <th>Kitchen Qual</th>\n",
       "      <th>Lot Area (box-cox-0.1)</th>\n",
       "      <th>Lot Shape</th>\n",
       "      <th>Mas Vnr Area</th>\n",
       "      <th>Overall Qual</th>\n",
       "      <th>Paved Drive</th>\n",
       "      <th>TotRms AbvGrd</th>\n",
       "      <th>Total Bath</th>\n",
       "      <th>Total Bsmt SF</th>\n",
       "      <th>Total Porch SF</th>\n",
       "      <th>Total SF</th>\n",
       "      <th>Total SF (box-cox-0.2)</th>\n",
       "      <th>Wood Deck SF</th>\n",
       "      <th>SalePrice</th>\n",
       "      <th>SalePrice (box-cox-0)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Order</th>\n",
       "      <th>PID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <th>923226270</th>\n",
       "      <td>630.0</td>\n",
       "      <td>6.445720</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>515.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>286.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>630.0</td>\n",
       "      <td>6.445720</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>10.813995</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2.0</td>\n",
       "      <td>630.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1260.0</td>\n",
       "      <td>15.847026</td>\n",
       "      <td>0.0</td>\n",
       "      <td>86000.0</td>\n",
       "      <td>11.362103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1112</th>\n",
       "      <th>528427070</th>\n",
       "      <td>894.0</td>\n",
       "      <td>6.795706</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>668.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1933.0</td>\n",
       "      <td>7.566828</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>16.087312</td>\n",
       "      <td>2</td>\n",
       "      <td>74.0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>2.5</td>\n",
       "      <td>894.0</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2827.0</td>\n",
       "      <td>19.503897</td>\n",
       "      <td>100.0</td>\n",
       "      <td>214000.0</td>\n",
       "      <td>12.273731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>819</th>\n",
       "      <th>906340100</th>\n",
       "      <td>1680.0</td>\n",
       "      <td>7.426549</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>1021.0</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1138.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1680.0</td>\n",
       "      <td>7.426549</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>15.343929</td>\n",
       "      <td>3</td>\n",
       "      <td>242.0</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1555.0</td>\n",
       "      <td>209.0</td>\n",
       "      <td>3235.0</td>\n",
       "      <td>20.173573</td>\n",
       "      <td>185.0</td>\n",
       "      <td>323262.0</td>\n",
       "      <td>12.686218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1651</th>\n",
       "      <th>527327050</th>\n",
       "      <td>1118.0</td>\n",
       "      <td>7.019297</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>156.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>551.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2030.0</td>\n",
       "      <td>7.615791</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>15.590825</td>\n",
       "      <td>2</td>\n",
       "      <td>298.0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>2.5</td>\n",
       "      <td>848.0</td>\n",
       "      <td>224.0</td>\n",
       "      <td>2878.0</td>\n",
       "      <td>19.591678</td>\n",
       "      <td>0.0</td>\n",
       "      <td>195000.0</td>\n",
       "      <td>12.180755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1140</th>\n",
       "      <th>531382090</th>\n",
       "      <td>754.0</td>\n",
       "      <td>6.625392</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>362.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>525.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1609.0</td>\n",
       "      <td>7.383368</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>14.700235</td>\n",
       "      <td>2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>2.5</td>\n",
       "      <td>754.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>2363.0</td>\n",
       "      <td>18.640832</td>\n",
       "      <td>0.0</td>\n",
       "      <td>182000.0</td>\n",
       "      <td>12.111762</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 1st Flr SF  1st Flr SF (box-cox-0)  Bsmt Exposure  Bsmt Qual  \\\n",
       "Order PID                                                                       \n",
       "976   923226270       630.0                6.445720              3          4   \n",
       "1112  528427070       894.0                6.795706              2          4   \n",
       "819   906340100      1680.0                7.426549              4          5   \n",
       "1651  527327050      1118.0                7.019297              1          3   \n",
       "1140  531382090       754.0                6.625392              1          4   \n",
       "\n",
       "                 BsmtFin SF 1  BsmtFin Type 1  Fireplace Qu  Fireplaces  \\\n",
       "Order PID                                                                 \n",
       "976   923226270         515.0               6             0           0   \n",
       "1112  528427070           0.0               1             4           1   \n",
       "819   906340100        1021.0               6             4           1   \n",
       "1651  527327050         156.0               2             3           1   \n",
       "1140  531382090         362.0               6             0           0   \n",
       "\n",
       "                 Full Bath  Garage Area  Garage Cars  Garage Cond  \\\n",
       "Order PID                                                           \n",
       "976   923226270          1        286.0            1            3   \n",
       "1112  528427070          2        668.0            3            3   \n",
       "819   906340100          1       1138.0            3            3   \n",
       "1651  527327050          2        551.0            2            3   \n",
       "1140  531382090          2        525.0            2            3   \n",
       "\n",
       "                 Garage Finish  Garage Qual  Gr Liv Area  \\\n",
       "Order PID                                                  \n",
       "976   923226270              1            3        630.0   \n",
       "1112  528427070              3            3       1933.0   \n",
       "819   906340100              3            3       1680.0   \n",
       "1651  527327050              3            3       2030.0   \n",
       "1140  531382090              2            3       1609.0   \n",
       "\n",
       "                 Gr Liv Area (box-cox-0)  Half Bath  Kitchen Qual  \\\n",
       "Order PID                                                           \n",
       "976   923226270                 6.445720          0             3   \n",
       "1112  528427070                 7.566828          1             3   \n",
       "819   906340100                 7.426549          1             4   \n",
       "1651  527327050                 7.615791          1             3   \n",
       "1140  531382090                 7.383368          1             3   \n",
       "\n",
       "                 Lot Area (box-cox-0.1)  Lot Shape  Mas Vnr Area  \\\n",
       "Order PID                                                          \n",
       "976   923226270               10.813995          3           0.0   \n",
       "1112  528427070               16.087312          2          74.0   \n",
       "819   906340100               15.343929          3         242.0   \n",
       "1651  527327050               15.590825          2         298.0   \n",
       "1140  531382090               14.700235          2          38.0   \n",
       "\n",
       "                 Overall Qual  Paved Drive  TotRms AbvGrd  Total Bath  \\\n",
       "Order PID                                                               \n",
       "976   923226270             3            2              3         2.0   \n",
       "1112  528427070             5            2              9         2.5   \n",
       "819   906340100             8            2              8         2.5   \n",
       "1651  527327050             5            2              8         2.5   \n",
       "1140  531382090             5            2              6         2.5   \n",
       "\n",
       "                 Total Bsmt SF  Total Porch SF  Total SF  \\\n",
       "Order PID                                                  \n",
       "976   923226270          630.0             0.0    1260.0   \n",
       "1112  528427070          894.0           118.0    2827.0   \n",
       "819   906340100         1555.0           209.0    3235.0   \n",
       "1651  527327050          848.0           224.0    2878.0   \n",
       "1140  531382090          754.0            70.0    2363.0   \n",
       "\n",
       "                 Total SF (box-cox-0.2)  Wood Deck SF  SalePrice  \\\n",
       "Order PID                                                          \n",
       "976   923226270               15.847026           0.0    86000.0   \n",
       "1112  528427070               19.503897         100.0   214000.0   \n",
       "819   906340100               20.173573         185.0   323262.0   \n",
       "1651  527327050               19.591678           0.0   195000.0   \n",
       "1140  531382090               18.640832           0.0   182000.0   \n",
       "\n",
       "                 SalePrice (box-cox-0)  \n",
       "Order PID                               \n",
       "976   923226270              11.362103  \n",
       "1112  528427070              12.273731  \n",
       "819   906340100              12.686218  \n",
       "1651  527327050              12.180755  \n",
       "1140  531382090              12.111762  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtain the raw numpy arrays:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "X3 = df3.drop(columns=[\"SalePrice\", \"SalePrice (box-cox-0)\"]).values\n",
    "y3 = df3[\"SalePrice\"].values\n",
    "y3l = df3[\"SalePrice (box-cox-0)\"].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Re-usable Components\n",
    "\n",
    "Define a function to run k-fold cross validation to obtain unbiased estimators for the following scores / errors:\n",
    "- Bias\n",
    "- Mean Absolute Error\n",
    "- Maximum Deviation (just to see the worst case prediction of a model)\n",
    "- R2 (coefficient of determination)\n",
    "- Root Mean Squared Error (default for comparison)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cross_validation(X, y, *, model, k=10, log=False, desc=None):\n",
    "    \"\"\"Perform a k-fold cross validation.\"\"\"\n",
    "    bias, mae, max_dev, r2, rmse = [], [], [], [], []\n",
    "    # Iterate over the k folds.\n",
    "    for train, test in progress_bar(KFold(n_splits=k).split(X), desc=desc, total=k):\n",
    "        model.fit(X[train], y[train])\n",
    "        y_pred = model.predict(X[test])\n",
    "        # If the sales price is provided on a log scale,\n",
    "        # take the exponent first so that scores and\n",
    "        # errors are comparable to the non-logged counterparts.\n",
    "        if log:\n",
    "            y_true, y_pred = np.exp(y[test]), np.exp(y_pred) \n",
    "        else:\n",
    "            y_true, y_pred = y[test], y_pred\n",
    "        # Collect the scores/errors for each fold.\n",
    "        bias.append(bias_score(y_true, y_pred))\n",
    "        mae.append(mean_absolute_error(y_true, y_pred))\n",
    "        max_dev.append(max_deviation(y_true, y_pred))\n",
    "        r2.append(r2_score(y_true, y_pred))\n",
    "        rmse.append(mean_squared_error(y_true, y_pred))\n",
    "    # Round for convenience.\n",
    "    return {\n",
    "        \"bias\": np.round(np.mean(bias)),\n",
    "        \"mae\": np.round(np.mean(mae)),\n",
    "        \"max_dev\": np.round(np.mean(max_dev)),\n",
    "        \"r2\": np.round(np.mean(r2), 3),\n",
    "        \"rmse\": np.round(np.sqrt(np.mean(rmse))),\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use one dictionary to store all the results in a systematic way."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = {}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Regression\n",
    "\n",
    "A plain OLS regression model serves as the base case for benchmarking."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "lm = LinearRegression()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Original Data\n",
    "\n",
    "Given the unprocessed data, the linear model is not able to make a good fit at all."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:01<00:00,  9.86it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(25204734.0),\n",
       " 'mae': np.float64(32984598.0),\n",
       " 'max_dev': np.float64(9091844797.0),\n",
       " 'r2': np.float64(-374439996.215),\n",
       " 'rmse': np.float64(1541948537.0)}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[('lm','o')] = cross_validation(X1, y1, model=lm)\n",
    "results[('lm','o')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Improved Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 22.99it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-40.0),\n",
       " 'mae': np.float64(15377.0),\n",
       " 'max_dev': np.float64(121895.0),\n",
       " 'r2': np.float64(0.92),\n",
       " 'rmse': np.float64(22178.0)}"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[('lm','i')] = cross_validation(X2, y2, model=lm)\n",
    "results[('lm','i')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 19.21it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-888.0),\n",
       " 'mae': np.float64(12851.0),\n",
       " 'max_dev': np.float64(108012.0),\n",
       " 'r2': np.float64(0.94),\n",
       " 'rmse': np.float64(19210.0)}"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[('lm','il')] = cross_validation(X2, y2l, model=lm, log=True)\n",
    "results[('lm','il')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Improved Data with pre-selected Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 278.33it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(25.0),\n",
       " 'mae': np.float64(18570.0),\n",
       " 'max_dev': np.float64(136253.0),\n",
       " 'r2': np.float64(0.89),\n",
       " 'rmse': np.float64(25994.0)}"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[('lm','p')] = cross_validation(X3, y3, model=lm)\n",
    "results[('lm','p')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 271.17it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-1430.0),\n",
       " 'mae': np.float64(16155.0),\n",
       " 'max_dev': np.float64(127999.0),\n",
       " 'r2': np.float64(0.911),\n",
       " 'rmse': np.float64(23391.0)}"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[('lm','pl')] = cross_validation(X3, y3l, model=lm, log=True)\n",
    "results[('lm','pl')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## LASSO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "tol = 0.1\n",
    "grid_search = GridSearchCV(\n",
    "    estimator=Lasso(tol=tol, selection=\"random\", random_state=random_state),\n",
    "    param_grid={\"alpha\": [2 ** x for x in range(-8, 4)] + list(range(12, 65, 4))},\n",
    "    cv=KFold(n_splits=4),\n",
    "    n_jobs=-1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Original Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X1, y1)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 38.22it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(185.0),\n",
       " 'mae': np.float64(20586.0),\n",
       " 'max_dev': np.float64(268155.0),\n",
       " 'r2': np.float64(0.822),\n",
       " 'rmse': np.float64(33116.0)}"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"lasso\", \"o\")] = cross_validation(X1, y1, model=Lasso(alpha=alpha, tol=tol))\n",
    "results[(\"lasso\", \"o\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Improved Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "28"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X2, y2)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 54.59it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-68.0),\n",
       " 'mae': np.float64(17523.0),\n",
       " 'max_dev': np.float64(129093.0),\n",
       " 'r2': np.float64(0.9),\n",
       " 'rmse': np.float64(24731.0)}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"lasso\", \"i\")] = cross_validation(X2, y2, model=Lasso(alpha=alpha, tol=tol))\n",
    "results[(\"lasso\", \"i\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.00390625"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X2, y2l)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 46.34it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-843.0),\n",
       " 'mae': np.float64(14414.0),\n",
       " 'max_dev': np.float64(118524.0),\n",
       " 'r2': np.float64(0.927),\n",
       " 'rmse': np.float64(21134.0)}"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"lasso\", \"il\")] = cross_validation(X2, y2l, model=Lasso(alpha=alpha, tol=tol), log=True)\n",
    "results[(\"lasso\", \"il\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Improved Data with pre-selected Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.00390625"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X3, y3)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 328.75it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(30.0),\n",
       " 'mae': np.float64(22904.0),\n",
       " 'max_dev': np.float64(158375.0),\n",
       " 'r2': np.float64(0.84),\n",
       " 'rmse': np.float64(31248.0)}"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"lasso\", \"p\")] = cross_validation(X3, y3, model=Lasso(alpha=alpha, tol=tol))\n",
    "results[(\"lasso\", \"p\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.00390625"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X3, y3l)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 299.57it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-875.0),\n",
       " 'mae': np.float64(16644.0),\n",
       " 'max_dev': np.float64(135627.0),\n",
       " 'r2': np.float64(0.904),\n",
       " 'rmse': np.float64(24239.0)}"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"lasso\", \"pl\")] = cross_validation(X3, y3l, model=Lasso(alpha=alpha, tol=tol), log=True)\n",
    "results[(\"lasso\", \"pl\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Ridge Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search = GridSearchCV(\n",
    "    estimator=Ridge(),\n",
    "    param_grid={\"alpha\": [2 ** x for x in range(-8, 4)] + list(range(12, 65, 4))},\n",
    "    cv=KFold(n_splits=4),\n",
    "    n_jobs=-1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Original Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.125"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X1, y1)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 41.08it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(152.0),\n",
       " 'mae': np.float64(17064.0),\n",
       " 'max_dev': np.float64(263561.0),\n",
       " 'r2': np.float64(0.853),\n",
       " 'rmse': np.float64(29970.0)}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"ridge\", \"o\")] = cross_validation(X1, y1, model=Ridge(alpha=alpha))\n",
    "results[(\"ridge\", \"o\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Improved Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X2, y2)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 69.82it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-52.0),\n",
       " 'mae': np.float64(15351.0),\n",
       " 'max_dev': np.float64(122508.0),\n",
       " 'r2': np.float64(0.92),\n",
       " 'rmse': np.float64(22106.0)}"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"ridge\", \"i\")] = cross_validation(X2, y2, model=Ridge(alpha=alpha))\n",
    "results[(\"ridge\", \"i\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X2, y2l)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 74.39it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-916.0),\n",
       " 'mae': np.float64(12836.0),\n",
       " 'max_dev': np.float64(107968.0),\n",
       " 'r2': np.float64(0.94),\n",
       " 'rmse': np.float64(19152.0)}"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"ridge\", \"il\")] = cross_validation(X2, y2l, model=Ridge(alpha=alpha), log=True)\n",
    "results[(\"ridge\", \"il\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Improved Data with pre-selected Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X3, y3)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 64.44it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(33.0),\n",
       " 'mae': np.float64(18534.0),\n",
       " 'max_dev': np.float64(136836.0),\n",
       " 'r2': np.float64(0.89),\n",
       " 'rmse': np.float64(25965.0)}"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"ridge\", \"p\")] = cross_validation(X3, y3, model=Ridge(alpha=alpha))\n",
    "results[(\"ridge\", \"p\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X3, y3l)\n",
    "alpha = grid_search.best_params_[\"alpha\"]\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:00<00:00, 60.47it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-1389.0),\n",
       " 'mae': np.float64(16141.0),\n",
       " 'max_dev': np.float64(127870.0),\n",
       " 'r2': np.float64(0.911),\n",
       " 'rmse': np.float64(23366.0)}"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"ridge\", \"pl\")] = cross_validation(X3, y3l, model=Ridge(alpha=alpha), log=True)\n",
    "results[(\"ridge\", \"pl\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Forest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "rf = RandomForestRegressor(\n",
    "    n_estimators=500,\n",
    "    n_jobs=-1, random_state=random_state\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Original Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:19<00:00,  1.96s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-27.0),\n",
       " 'mae': np.float64(15331.0),\n",
       " 'max_dev': np.float64(164293.0),\n",
       " 'r2': np.float64(0.898),\n",
       " 'rmse': np.float64(25371.0)}"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"rf\", \"o\")] = cross_validation(X1, y1, model=rf)\n",
    "results[(\"rf\", \"o\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Improved Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:16<00:00,  1.69s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-53.0),\n",
       " 'mae': np.float64(15018.0),\n",
       " 'max_dev': np.float64(124828.0),\n",
       " 'r2': np.float64(0.912),\n",
       " 'rmse': np.float64(23190.0)}"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"rf\", \"i\")] = cross_validation(X2, y2, model=rf)\n",
    "results[(\"rf\", \"i\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:19<00:00,  1.97s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-2089.0),\n",
       " 'mae': np.float64(15068.0),\n",
       " 'max_dev': np.float64(136284.0),\n",
       " 'r2': np.float64(0.911),\n",
       " 'rmse': np.float64(23306.0)}"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"rf\", \"il\")] = cross_validation(X2, y2l, model=rf, log=True)\n",
    "results[(\"rf\", \"il\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Improved Data with pre-selected Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### a) Normal Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:12<00:00,  1.29s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-232.0),\n",
       " 'mae': np.float64(16274.0),\n",
       " 'max_dev': np.float64(130943.0),\n",
       " 'r2': np.float64(0.9),\n",
       " 'rmse': np.float64(24685.0)}"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"rf\", \"p\")] = cross_validation(X3, y3, model=rf)\n",
    "results[(\"rf\", \"p\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### b) Log Scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [00:11<00:00,  1.13s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'bias': np.float64(-2390.0),\n",
       " 'mae': np.float64(16388.0),\n",
       " 'max_dev': np.float64(141335.0),\n",
       " 'r2': np.float64(0.898),\n",
       " 'rmse': np.float64(24924.0)}"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[(\"rf\", \"pl\")] = cross_validation(X3, y3l, model=rf, log=True)\n",
    "results[(\"rf\", \"pl\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis of Results\n",
    "\n",
    "This notebook did not focus on hyper-parameter optimization. Therefore, the predictions by Lasso, Ridge, and the Random Forest can potentially be improved by fine-graining the grid search even more.\n",
    "\n",
    "In general, the manually \"improved\" data clearly outperform the data that were only cleaned with the minimum effort. Also, the result suggests to allow the model to select its features. The manually pre-selected features perform well but not as good as the full feature set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "def scores_by_source(source, score=\"rmse\", *, ascending=True):\n",
    "    rv = [\n",
    "        (model, scores[score])\n",
    "        for (model, data_source), scores in results.items()\n",
    "        if data_source == source\n",
    "    ]\n",
    "    return sorted(rv, key=lambda x: x[1], reverse=(not ascending))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Root Mean Squared Error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('rf', np.float64(25371.0)),\n",
       " ('ridge', np.float64(29970.0)),\n",
       " ('lasso', np.float64(33116.0)),\n",
       " ('lm', np.float64(1541948537.0))]"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"o\", \"rmse\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ridge', np.float64(22106.0)),\n",
       " ('lm', np.float64(22178.0)),\n",
       " ('rf', np.float64(23190.0)),\n",
       " ('lasso', np.float64(24731.0))]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"i\", \"rmse\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ridge', np.float64(19152.0)),\n",
       " ('lm', np.float64(19210.0)),\n",
       " ('lasso', np.float64(21134.0)),\n",
       " ('rf', np.float64(23306.0))]"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"il\", \"rmse\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('rf', np.float64(24685.0)),\n",
       " ('ridge', np.float64(25965.0)),\n",
       " ('lm', np.float64(25994.0)),\n",
       " ('lasso', np.float64(31248.0))]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"p\", \"rmse\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ridge', np.float64(23366.0)),\n",
       " ('lm', np.float64(23391.0)),\n",
       " ('lasso', np.float64(24239.0)),\n",
       " ('rf', np.float64(24924.0))]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"pl\", \"rmse\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### R2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('rf', np.float64(0.898)),\n",
       " ('ridge', np.float64(0.853)),\n",
       " ('lasso', np.float64(0.822)),\n",
       " ('lm', np.float64(-374439996.215))]"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"o\", \"r2\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('lm', np.float64(0.92)),\n",
       " ('ridge', np.float64(0.92)),\n",
       " ('rf', np.float64(0.912)),\n",
       " ('lasso', np.float64(0.9))]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"i\", \"r2\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('lm', np.float64(0.94)),\n",
       " ('ridge', np.float64(0.94)),\n",
       " ('lasso', np.float64(0.927)),\n",
       " ('rf', np.float64(0.911))]"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"il\", \"r2\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('rf', np.float64(0.9)),\n",
       " ('lm', np.float64(0.89)),\n",
       " ('ridge', np.float64(0.89)),\n",
       " ('lasso', np.float64(0.84))]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"p\", \"r2\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('lm', np.float64(0.911)),\n",
       " ('ridge', np.float64(0.911)),\n",
       " ('lasso', np.float64(0.904)),\n",
       " ('rf', np.float64(0.898))]"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores_by_source(\"pl\", \"r2\", ascending=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ames-housing",
   "language": "python",
   "name": "ames-housing"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}