{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ArbitraryDiscretiser\n", "\n", "The ArbitraryDiscretiser() divides continuous numerical variables into contiguous intervals are arbitrarily entered by the user.\n", "\n", "The user needs to enter a dictionary with variable names as keys, and a list of the limits of the intervals as values. For example {'var1': [0, 10, 100, 1000], 'var2': [5, 10, 15, 20]}.\n", "\n", "**Note**\n", "\n", "For this demonstration, we use the Ames House Prices dataset produced by Professor Dean De Cock:\n", "\n", "Dean De Cock (2011) Ames, Iowa: Alternative to the Boston Housing\n", "Data as an End of Semester Regression Project, Journal of Statistics Education, Vol.19, No. 3\n", "\n", "http://jse.amstat.org/v19n3/decock.pdf\n", "\n", "https://www.tandfonline.com/doi/abs/10.1080/10691898.2011.11889627\n", "\n", "The version of the dataset used in this notebook can be obtained from [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from feature_engine.discretisation import ArbitraryDiscretiser\n", "plt.rcParams[\"figure.figsize\"] = [15,5]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
\n", "

5 rows × 81 columns

\n", "
" ], "text/plain": [ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", "0 1 60 RL 65.0 8450 Pave NaN Reg \n", "1 2 20 RL 80.0 9600 Pave NaN Reg \n", "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", "\n", " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold \\\n", "0 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", "1 Lvl AllPub ... 0 NaN NaN NaN 0 5 \n", "2 Lvl AllPub ... 0 NaN NaN NaN 0 9 \n", "3 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", "4 Lvl AllPub ... 0 NaN NaN NaN 0 12 \n", "\n", " YrSold SaleType SaleCondition SalePrice \n", "0 2008 WD Normal 208500 \n", "1 2007 WD Normal 181500 \n", "2 2008 WD Normal 223500 \n", "3 2006 WD Abnorml 140000 \n", "4 2008 WD Normal 250000 \n", "\n", "[5 rows x 81 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv('housing.csv')\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X_train : (1022, 79)\n", "X_test : (438, 79)\n" ] } ], "source": [ "# let's separate into training and testing set\n", "X = data.drop([\"Id\", \"SalePrice\"], axis=1)\n", "y = data.SalePrice\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.3, random_state=0)\n", "\n", "print(\"X_train :\", X_train.shape)\n", "print(\"X_test :\", X_test.shape)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAE/CAYAAADCCbvWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAepElEQVR4nO3df7Cld10f8PfHBBIkKIkh65KkbGhTK5gS6E6M0qG3BE0gjomOOLEBNpbOtiMi1nRgI46/pumktjrQUbQpoEv5EcMPJzsEq2nwjmPFRH6EH0mICbCQJUsWUIRlbOzCp3+cZ/WyuXfvPXfvj+fe+3rNnDnnfM/znPM9nz33Pve93+/zPdXdAQAAYH1903p3AAAAAOEMAABgFIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEM1hEVR2uqqeudz8AANjchDM2paq6qqruqKqvVtWh4fZPVFUtsP3vVNV/nO+x7j6tuz85xWtfU1VdVT+63P4DwEZSVfur6nlTbD9TVQcWeMxxlC1LOGPTqaprk7w2yX9J8u1JtiX5d0meneSx82x/0gp3YVeSvxyuj9fPk1f4dQFgM3AcZcsSzthUqupbk/xykp/o7nd091d64kPdfXV3PzKMkv1mVb2nqr6a5F8u8pxdVf+oqi6uqs/NDXNV9UNV9ZE595+S5F8k2Z3k0qraNuexmao6UFWvqqrPJfntqvqmqtpTVZ+oqi9W1c1Vdcacfd4+vOZfV9UfV9XTV6xYALCKquqUqnpNVT00XF4ztD0+ye8nefJw6sDhqnrysI/jKFuacMZm8z1JTklyyyLb/ask1yd5QpI/WcoTd/efJflqkuce8zxvnXP/JUne393vTHJvkquPeZpvT3JGkqdkcuD5qSRXZnIgenKSv0ryG3O2//0k5yc5K8kHk7xlKX0FgBF4dZKLk1yY5BlJLkryc9391STPT/LQcOrAad390LCP4yhbmnDGZnNmki9095GjDVX1p1X1par6m6p6ztB8S3f/n+7+enf/3yme/21Jfmx43ickecHQdtRL8vdh7a159JSMryf5he5+pLv/Jsm/TfLq7j7Q3Y8k+cUkP3J0qkZ3v3EY/Tv62DOG0UEAGLurk/xydx/q7s8n+aUkL15kH8dRtjThjM3mi0nOnDsPvbu/t7ufODx29DP/4DKf/61JfriqTknyw0k+2N2fTpKqenaS85LcNGfbC6rqwjn7f/6YMPiUJL83hMcvZfK/hF9Lsq2qTqqqG4apGl9Osn/Y58xl9h0A1tKTk3x6zv1PD23zchwF4YzN531JHklyxSLb9XKevLvvyeTg8vw8ekrjriSV5K5hLvwdQ/tLjvO6DyZ5fnc/cc7l1O7+7PD8VyR5XpJvTbJj2GfeFScBYGQeyiQ8HfUPhrZk/uOw4yhbnnDGptLdX8pk2sTrqupHquq04WThC5M8fpHdT6qqU+dcHrWy4+Ctmcxxf06StydJVZ2a5Eczmf9+4ZzLy5NcfZwVpX4ryfXDCdCpqidV1dFg+YRMguYXk3xzkv+02PsHgHX0mLnH0Uym/f/ccGw7M8nPJ3nzsO3DSb7t6BRDx1GYEM7YdLr7V5L8TJJXJjmUyQHgvyd5VZI/Pc6ue5L8zZzLexfY7m1JZpK8t7u/MLRdOezzpu7+3NFLkjckOSnJZQs812uT7Evyh1X1lSR/luS7h8felMko3WeT3DM8BgBj9Z5843H01CTvT/KRJB/NZEGO/5gk3f3xTI6nnxymI/5wHEch1b2s2V0AAACsICNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACOw0HdGrIozzzyzd+zYsZYvOVpf/epX8/jHL/a1WxylXtNTs+mo1zf6wAc+8IXuftJ694OlO5FjrM//9NRsemq2POo2vbHX7HjH2DUNZzt27Mj73//+tXzJ0Zqdnc3MzMx6d2PDUK/pqdl01OsbVdWn17sPTOdEjrE+/9NTs+mp2fKo2/TGXrPjHWNNawQAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBE4eb07wMa1Y8+t87bvv+HyNe4JAKtpod/3id/5ACvJyBkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAILCmcVdW/r6q7q+pjVfW2qjq1qs6oqtuq6v7h+vTV7iwAAMBmtWg4q6qzk/xUkp3d/V1JTkpyVZI9SW7v7vOT3D7cBwAAYBmWOq3x5CSPq6qTk3xzkoeSXJFk7/D43iRXrnz3AAAAtoZFw1l3fzbJf03ymSQHk/x1d/9hkm3dfXDY5mCSs1azowAAAJvZyYttMJxLdkWS85J8Kcnbq+pFS32BqtqdZHeSbNu2LbOzs8vr6SZz+PDhDV+Lay84Mm/7aryvzVCvtaZm01EvAGC9LRrOkjwvyae6+/NJUlXvSvK9SR6uqu3dfbCqtic5NN/O3X1jkhuTZOfOnT0zM7MiHd/oZmdns9Frcc2eW+dt33/1zIq/1mao11pTs+moFwCw3pZyztlnklxcVd9cVZXkkiT3JtmXZNewza4kt6xOFwEAADa/RUfOuvuOqnpHkg8mOZLkQ5mMhJ2W5OaqemkmAe6Fq9lRAACAzWwp0xrT3b+Q5BeOaX4kk1E0AAAATtBSl9IHAABgFQlnAAAAIyCcAQAAjIBwBgBroKpOqqoPVdW7h/tnVNVtVXX/cH36nG2vq6oHquq+qrp0/XoNwFoSzgBgbbwik6+iOWpPktu7+/wktw/3U1VPS3JVkqcnuSzJ66rqpDXuKwDrQDgDgFVWVeckuTzJ6+c0X5Fk73B7b5Ir57Tf1N2PdPenkjyQ5KK16isA60c4A4DV95okr0zy9Tlt27r7YJIM12cN7WcneXDOdgeGNgA2uSV9zxkAsDxV9QNJDnX3B6pqZim7zNPWCzz37iS7k2Tbtm2ZnZ1dVh8PHz583H2vveDIgo8t9zU3usVqxqOp2fKo2/Q2cs2EMwBYXc9O8oNV9YIkpyb5lqp6c5KHq2p7dx+squ1JDg3bH0hy7pz9z0ny0HxP3N03JrkxSXbu3NkzMzPL6uDs7GyOt+81e25d8LH9Vy/vNTe6xWrGo6nZ8qjb9DZyzUxrBIBV1N3Xdfc53b0jk4U+3tvdL0qyL8muYbNdSW4Zbu9LclVVnVJV5yU5P8mda9xtANaBkTOSJDsW+F/R/TdcvsY9Adgybkhyc1W9NMlnkrwwSbr77qq6Ock9SY4keVl3f239ugnAWhHOAGCNdPdsktnh9heTXLLAdtcnuX7NOgbAKJjWCAAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIzAyevdAcZtx55b17sLAACwJRg5AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgBqzWO2HJWStx/w+Wr0BMAAGC1GTkDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAGrNa6z5azICAAAbD5GzgAAAEbAyBkAsGwLzQDxvZsA0xPONhnTJAEAYGMyrREAAGAEhDMAAIARMK1xBR1vSqG59wAAwPEYOQMAABgB4QwAAGAEhDMAAIAREM4AAABGYEnhrKqeWFXvqKqPV9W9VfU9VXVGVd1WVfcP16evdmcBAAA2q6WOnL02yf/q7n+S5BlJ7k2yJ8nt3X1+ktuH+wAAACzDouGsqr4lyXOSvCFJuvtvu/tLSa5IsnfYbG+SK1erkwAAAJvdUkbOnprk80l+u6o+VFWvr6rHJ9nW3QeTZLg+axX7CQAAsKkt5UuoT07yrCQv7+47quq1mWIKY1XtTrI7SbZt25bZ2dnl9HNDuPaCIws+duz7Pnz4cGZnZ4+7z0a1Gv/GR+vF0qnZdNQLAFhvSwlnB5Ic6O47hvvvyCScPVxV27v7YFVtT3Jovp27+8YkNybJzp07e2Zm5sR7PVLX7Ll1wcf2Xz3zDfdnZ2czMzNz3H02qmPf60o4Wi+WTs2mo14AwHpbdFpjd38uyYNV9R1D0yVJ7kmyL8muoW1XkltWpYcAAABbwFJGzpLk5UneUlWPTfLJJD+eSbC7uapemuQzSV64Ol0EAADY/JYUzrr7riQ753nokpXtDgAAwNa01O85AwCWqapOrao7q+rDVXV3Vf3S0H5GVd1WVfcP16fP2ee6qnqgqu6rqkvXr/cArBXhDABW3yNJntvdz0hyYZLLquriTBbYur27z09y+3A/VfW0JFcleXqSy5K8rqpOWpeeA7BmhDMAWGU9cXi4+5jh0kmuSLJ3aN+b5Mrh9hVJburuR7r7U0keSHLRGnYZgHUgnAHAGqiqk6rqrky+eua24StqtnX3wSQZrs8aNj87yYNzdj8wtAGwiS11tUYA4AR099eSXFhVT0zye1X1XcfZvOZ7ikdtVLU7ye4k2bZt27K/SH2xL2G/9oIjUz/nZv9Sd19cPz01Wx51m95GrplwBgBrqLu/VFWzmZxL9nBVbe/ug1W1PZNRtWQyUnbunN3OSfLQPM91Y5Ibk2Tnzp293C9SX+xL2K/Zc+vUz7n/6uX1ZaPwxfXTU7PlUbfpbeSamdYIAKusqp40jJilqh6X5HlJPp5kX5Jdw2a7ktwy3N6X5KqqOqWqzktyfpI717bXAKw1I2cAsPq2J9k7rLj4TUlu7u53V9X7ktxcVS9N8pkkL0yS7r67qm5Ock+SI0leNkyLBGATE84AYJV190eSPHOe9i8muWSBfa5Pcv0qdw2AETGtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGwPecseJ27Ll1wcf233D5GvYEAAA2DiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAI+BLqNXLsFzNfe8GRXHOcL2sGAAC2FiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAievdwfgqB17bp23ff8Nl69xTwAAYO0ZOQMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAElhzOquqkqvpQVb17uH9GVd1WVfcP16evXjcBAAA2t2lGzl6R5N459/ckub27z09y+3AfAACAZVhSOKuqc5JcnuT1c5qvSLJ3uL03yZUr2zUAAICt4+QlbveaJK9M8oQ5bdu6+2CSdPfBqjprvh2raneS3Umybdu2zM7OLr+3I3ftBUeWvO22x023/WZxvH//heoxOzubw4cPb+rPzmpQs+moFwCw3hYNZ1X1A0kOdfcHqmpm2hfo7huT3JgkO3fu7JmZqZ9iw7hmz61L3vbaC47kVz+61Gy8eey/embBxxaq3/6rZzI7O5vN/NlZDWo2HfUCANbbUqY1PjvJD1bV/iQ3JXluVb05ycNVtT1JhutDq9ZLANjAqurcqvqjqrq3qu6uqlcM7QsurlVV11XVA1V1X1Vdun69B2CtLBrOuvu67j6nu3ckuSrJe7v7RUn2Jdk1bLYryS2r1ksA2NiOJLm2u78zycVJXlZVT8sCi2sNj12V5OlJLkvyuqo6aV16DsCaOZHvObshyfdV1f1Jvm+4DwAco7sPdvcHh9tfyWT147Oz8OJaVyS5qbsf6e5PJXkgyUVr22sA1tpUJz1192yS2eH2F5NcsvJdAoDNq6p2JHlmkjuy8OJaZyf5szm7HRjaANjEtt6KFACwTqrqtCTvTPLT3f3lqlpw03naep7nW5EVkRdbrXQ5qwtv9tVPrfA6PTVbHnWb3kaumXAGAGugqh6TSTB7S3e/a2h+uKq2D6NmcxfXOpDk3Dm7n5PkoWOfc6VWRF5stdJpViM+6nir824GVnidnpotj7pNbyPX7ETOOQMAlqAmQ2RvSHJvd//anIcWWlxrX5KrquqUqjovyflJ7lyr/gKwPoycsaZ2LON/XwE2gWcneXGSj1bVXUPbz2aymNbNVfXSJJ9J8sIk6e67q+rmJPdkstLjy7r7a2vfbQDWknAGAKusu/8k859HliywuFZ3X5/k+lXrFACjY1ojAADACBg5WwZT8wAAgJVm5AwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABiBk9e7A7CYHXtuzbUXHMk1e2591GP7b7h8HXoEAAArz8gZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAhYrZENbcc8KzgmVnEEAGDjMXIGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693BwCAcdix59b17gLAlmbkDAAAYASEMwAAgBEQzgAAAEZg0XBWVedW1R9V1b1VdXdVvWJoP6Oqbquq+4fr01e/uwAAAJvTUkbOjiS5tru/M8nFSV5WVU9LsifJ7d19fpLbh/sAAAAsw6LhrLsPdvcHh9tfSXJvkrOTXJFk77DZ3iRXrlYnAQAANrupzjmrqh1JnpnkjiTbuvtgMglwSc5a6c4BwGZQVW+sqkNV9bE5bQueHlBV11XVA1V1X1Vduj69BmCtLfl7zqrqtCTvTPLT3f3lqlrqfruT7E6Sbdu2ZXZ2dhndHJdrLzhyws+x7XEr8zxbxbT12gyfsxN1+PBhdZiCerHKfifJryd505y2o6cH3FBVe4b7rxpOHbgqydOTPDnJ/66qf9zdX1vjPgOwxpYUzqrqMZkEs7d097uG5oerant3H6yq7UkOzbdvd9+Y5MYk2blzZ8/MzJx4r9fZNSvwJZ3XXnAkv/pR3wG+VNPWa//VM6vXmQ1idnY2m+Hnba2oF6upu/94mH0y1xVJZobbe5PMJnnV0H5Tdz+S5FNV9UCSi5K8by36CsD6WcpqjZXkDUnu7e5fm/PQviS7htu7ktyy8t0DgE1rodMDzk7y4JztDgxtAGxySxmKeHaSFyf5aFXdNbT9bJIbktxcVS9N8pkkL1ydLsLK2nGckc/9N1y+hj0BmNd85w30vBuu0KkDR6f1ruR0+80+TdhU6Omp2fKo2/Q2cs0WDWfd/SeZ/0CRJJesbHcAYMtY6PSAA0nOnbPdOUkemu8JVurUgaPTeldi2v5Rm316uanQ01Oz5VG36W3kmk21WiMAsGIWOj1gX5KrquqUqjovyflJ7lyH/gGwxqxIAQCrrKrelsniH2dW1YEkv5AFTg/o7rur6uYk9yQ5kuRlVmoE2BqEMwBYZd39Yws8NO/pAd19fZLrV69HAIyRaY0AAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMAK+5wzm2LHn1nnb999w+Rr3BACArcbIGQAAwAgIZwAAACNgWuMCFprexsbg3w8AgI1GOAMAVtzx/pPMebwA8zOtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAETh5vTsAG8GOPbcua7/9N1y+wj0BAGCzMnIGAAAwAsIZAADACJjWCKtooemQpjsCAHAsI2cAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693B9bTjj23rncX2KKO99nbf8Pla9gTAADGwsgZAADACAhnAAAAI7ClpzXCGC1nuq2pkAAAG5+RMwAAgBEQzgAAAEbAtEbgURaaWmn6JLASrFgLMD8jZwAAACMgnAEAAIzAppnWaIoETGelv4R9OT+Dfm6BY5lWDWxlRs4AAABG4ITCWVVdVlX3VdUDVbVnpToFAFudYyzA1rPsaY1VdVKS30jyfUkOJPnzqtrX3fesVOeOtdLTsGCzmO9n49oLjuSakfzMbKWf3bWcqmn61+a1HsdYANbfiZxzdlGSB7r7k0lSVTcluSKJAwcAnBjH2Ck4fxXYLE4knJ2d5ME59w8k+e4T6w4AEMfYR1nuCPxKjtwfL+gt53WWExwF0b9n9sDy+Rwtbr0+X9Xdy9ux6oVJLu3ufzPcf3GSi7r75cdstzvJ7uHudyS5b/nd3VTOTPKF9e7EBqJe01Oz6ajXN3pKdz9pvTuxVa3DMdbnf3pqNj01Wx51m97Ya7bgMfZERs4OJDl3zv1zkjx07EbdfWOSG0/gdTalqnp/d+9c735sFOo1PTWbjnoxMmt6jPX5n56aTU/NlkfdpreRa3YiqzX+eZLzq+q8qnpskquS7FuZbgHAluYYC7AFLXvkrLuPVNVPJvmDJCcleWN3371iPQOALcoxFmBrOpFpjenu9yR5zwr1Zasx1XM66jU9NZuOejEqa3yM9fmfnppNT82WR92mt2FrtuwFQQAAAFg5J3LOGQAAACtEOFtBVfXGqjpUVR+b03ZGVd1WVfcP16fPeey6qnqgqu6rqkvntP+zqvro8Nh/q6pa6/ey2qrq3Kr6o6q6t6rurqpXDO3qtYCqOrWq7qyqDw81+6WhXc2Oo6pOqqoPVdW7h/vqBYOqumz4vD9QVXvWuz/roar2Dz/fd1XV+4e2Ffs9UVWnVNXvDu13VNWOtX6PJ2q1/745Xo2qatfwGvdX1a61eccrY4G6/WJVfXb4vN1VVS+Y89iWrlutwd+GG6Jm3e2yQpckz0nyrCQfm9P2K0n2DLf3JPnPw+2nJflwklOSnJfkE0lOGh67M8n3JKkkv5/k+ev93lahVtuTPGu4/YQkfzHURL0WrlklOW24/ZgkdyS5WM0WrdvPJHlrkncP99XLxaU7mSw08okkT03y2OHz/7T17tc61GF/kjOPaVux3xNJfiLJbw23r0ryu+v9npdRo1X9+2ahGiU5I8knh+vTh9unr3c9TrBuv5jkP8yz7ZavW9bgb8ONUDMjZyuou/84yV8e03xFkr3D7b1JrpzTflN3P9Ldn0ryQJKLqmp7km/p7vf15NPypjn7bBrdfbC7Pzjc/kqSe5OcHfVaUE8cHu4+Zrh01GxBVXVOksuTvH5Os3rBxEVJHujuT3b33ya5KZOfA1b298Tc53pHkks22uj7Gvx9s1CNLk1yW3f/ZXf/VZLbkly28u9wdSxQt4Vs+bqt0d+Go6+ZcLb6tnX3wWTyoUty1tB+dpIH52x3YGg7e7h9bPumNQwpPzOTkSD1Oo6aTNG7K8mhTH6JqNnxvSbJK5N8fU6besHEQp/5raaT/GFVfaCqdg9tK/l74u/26e4jSf46ybetwvtYa2tRo836Gf3JqvrIMO3x6BQ9dZtjFf82HH3NhLP1M9//mvVx2jelqjotyTuT/HR3f/l4m87TtuXq1d1f6+4Lk5yTyf8OfddxNt/SNauqH0hyqLs/sNRd5mnbMvViS/LZnnh2dz8ryfOTvKyqnnOcbZfze2Kr1Xkla7QZa/ebSf5hkguTHEzyq0O7ug1W+W/D0ddMOFt9Dw/DqxmuDw3tB5KcO2e7c5I8NLSfM0/7plNVj8nkh+8t3f2uoVm9lqC7v5RkNpMhdzWb37OT/GBV7c9kutZzq+rNUS84aqHP/JbS3Q8N14eS/F4m0z1X8vfE3+1TVScn+dYsfarbmK1FjTbdZ7S7Hx7+o/XrSf5HJp+3RN2SrMnfhqOvmXC2+vYlObriy64kt8xpv2pYNea8JOcnuXMYrv1KVV08zIF9yZx9No3hvb0hyb3d/WtzHlKvBVTVk6rqicPtxyV5XpKPR83m1d3Xdfc53b0jk5N+39vdL4p6wVF/nuT8qjqvqh6byc/JvnXu05qqqsdX1ROO3k7y/Uk+lpX9PTH3uX4kk99FG2YU4zjWokZ/kOT7q+r0Yfrf9w9tG9bRkDH4oUw+b4m6rdXfhuOv2TSrh7gsusrM2zIZov5/mSTwl2Yyj/X2JPcP12fM2f7Vmawsc1/mrP6WZGcmP6yfSPLrGb4sfDNdkvzzTIaLP5LkruHyAvU6bs3+aZIPDTX7WJKfH9rVbPHazeTvV2tULxeX4TL83v2L4bP96vXuzzq8/6dmstrbh5PcfbQGK/l7IsmpSd6eyWIFdyZ56nq/72XUaVX/vjlejZL866H9gSQ/vt61WIG6/c8kHx2O5fuSbFe3v+vzqv9tuBFqdrSjAAAArCPTGgEAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAEfj/zbRLNhWNFzsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# we will discretise two continuous variables\n", "\n", "X_train[[\"LotArea\", 'GrLivArea']].hist(bins=50)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The ArbitraryDiscretiser() works only with numerical variables. The discretiser will\n", "check if the dictionary entered by the user contains variables present in the\n", "training set, and if these variables are cast as numerical, before doing any\n", "transformation.\n", "\n", "Then it transforms the variables, that is, it sorts the values into the intervals,\n", "transform." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ArbitraryDiscretiser(binning_dict={'GrLivArea': [-inf, 500, 1000, 1500, 2000,\n", " 2500, inf],\n", " 'LotArea': [-inf, 4000, 8000, 12000, 16000,\n", " 20000, inf]},\n", " return_boundaries=False, return_object=False)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "Parameters\n", "----------\n", "\n", "binning_dict : dict\n", " The dictionary with the variable : interval limits pairs, provided by the user.\n", " A valid dictionary looks like this:\n", "\n", " binning_dict = {'var1':[0, 10, 100, 1000], 'var2':[5, 10, 15, 20]}.\n", "\n", "return_object : bool, default=False\n", " Whether the numbers in the discrete variable should be returned as\n", " numeric or as object. The decision is made by the user based on\n", " whether they would like to proceed the engineering of the variable as\n", " if it was numerical or categorical.\n", "\n", "return_boundaries: bool, default=False\n", " whether the output should be the interval boundaries. If True, it returns\n", " the interval boundaries. If False, it returns integers.\n", "'''\n", "\n", "atd = ArbitraryDiscretiser(binning_dict={\"LotArea\":[-np.inf,4000,8000,12000,16000,20000,np.inf],\n", " \"GrLivArea\":[-np.inf,500,1000,1500,2000,2500,np.inf]})\n", "\n", "atd.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'LotArea': [-inf, 4000, 8000, 12000, 16000, 20000, inf],\n", " 'GrLivArea': [-inf, 500, 1000, 1500, 2000, 2500, inf]}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# binner_dict contains the boundaries of the different bins\n", "atd.binner_dict_" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "train_t = atd.transform(X_train)\n", "test_t = atd.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4 2 1 0 3 5]\n", "[2 0 1 3 5 4]\n" ] } ], "source": [ "# the below are the bins into which the observations were sorted\n", "print(train_t['GrLivArea'].unique())\n", "print(train_t['LotArea'].unique())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LotAreaGrLivAreaLotArea_binnedGrLivArea_binned
649375203424
6822887129102
960720785811
13849060125822
1100840043820
\n", "
" ], "text/plain": [ " LotArea GrLivArea LotArea_binned GrLivArea_binned\n", "64 9375 2034 2 4\n", "682 2887 1291 0 2\n", "960 7207 858 1 1\n", "1384 9060 1258 2 2\n", "1100 8400 438 2 0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here I put side by side the original variable and the transformed variable\n", "tmp = pd.concat([X_train[[\"LotArea\", 'GrLivArea']], train_t[[\"LotArea\", 'GrLivArea']]], axis=1)\n", "tmp.columns = [\"LotArea\", 'GrLivArea',\"LotArea_binned\", 'GrLivArea_binned']\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAFKCAYAAACkdEbCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de7hdZXnv/e+PQ0EOW0ECBhIIKuoGtVFT1OK2eISKitpiQ5WiUrF9acVuawHrLlKbt3TXQ7t3q5VWBA+IqYpEoQpSwBerQEAEwkFSiJAGIYoKeIgm3u8fY0Qmy7VWZlYy15xrrO/nuuY1x3zG6R5zraw79xjPM0aqCkmSJElSt2wz7AAkSZIkSVufxZ4kSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkdZDFniRJkiR1kMWeZowkZyX5qyHtO0k+nOR7Sa7azHUXJKkk2w0qvkFI8pokFw07jq2p/Tk8foJ5nTteSbOD+XF6dTFfmB+7y2JPU5ZkVZJ7kuzc0/b7SS4bYliD8hzgRcC8qjp42MFsbeMl3Kr6eFW9eJhxTafZdrySBsf82B3mx9l3vF1jsacttR1w4rCD2FxJtt3MVfYDVlXVDwcRz5Zqz6z677k1084SS+ok8+MIMD8+nPlx9vGXX1vqb4E/TfKosTPGOxuW5LIkv99Ovy7JV5K8L8n3k9ye5Nfb9ruS3Jvk2DGb3SPJxUkeSHJ5kv16tv2kdt59SW5N8uqeeWcl+UCSC5P8EHjeOPHunWRZu/7KJG9s248D/gV4dpIHk5w2zrrbJHlHkm+1cX8kySPHLPaGJGuS3J3krT3rHpxkeZL72zPB7+2Z96wk/9F+P99IcuiY73JJkq8APwLenmT5mLj+JMmydvqIJF9v93NXknf2LPrl9v377TE+u/05XNGzrV9PcnWSH7Tvvz4mlne1P88HklyUZI923o5JPpbku+1xXJ1kr7HfYbvsqiSnJLkpTZegDyfZsWf+S5Nc127nP5I8dcy6JyW5HvjhJAntJe3v2neS/O3G/wSMc7yV5A+S3NbG8o9JMsE2JWks8yPmR/Ojhq6qfPma0gtYBbwQ+AzwV23b7wOXtdMLgAK261nnMuD32+nXAeuB1wPbAn8F3An8I7AD8GLgAWCXdvmz2s/Pbef/PXBFO29n4K52W9sBTwe+AxzUs+4PgENoTnLsOM7xXA68H9gRWAisBV7QE+sVk3wXbwBWAo8Fdmm/k4+O+R4+0cb5lHbbL2znfxU4pp3eBXhWO70P8F3gJW3ML2o/z+n5Lu8EDmqP+ZHt93NAT1xXA4vb6UPbfW8DPBW4B3jFJD+rXxwzsDvwPeCYdl9Ht58f3RPLfwJPAB7Rfj69nfcm4HPATu3P+RnAf5vkd+pGYH67z6/w0O/W04F7gWe22zm2XX6HnnWva9d9xATbL+DSdtv7At/k4b+PV4xZ9vPAo9pl1wKHD/vfnS9fvkb/hfmxd13zo/nR1xBfXtnT1vAXwB8nmTOFde+oqg9X1QbgkzR/iP6yqtZV1UXAT4HeAcMXVNWXq2od8Oc0ZxPnAy+l6Uby4apaX1XXAp8Gfrtn3fOr6itV9fOq+klvEO02ngOcVFU/qarraM5WHtPncbwGeG9V3V5VDwKnAIvHnD07rap+WFU3AB+mSQgAPwMen2SPqnqwqr7Wtr8WuLCqLmxjvhhYTpPcNjqrqla0x/wD4PyN201yAPAkYBlAVV1WVTe027qeJrn+Rp/HdwRwW1V9tN3XJ4BbgJf1LPPhqvpmVf0YWErzH4KNx/do4PFVtaGqrqmq+yfZ1z9U1V1VdR+wpOd7eiPwwaq6st3O2cA64Fk96/6fdt0fT7L9v6mq+6rqTuDverY/ntOr6vvtspf2HJMk9cP8aH4E86OGyGJPW6yqbqQ5w3PyFFa/p2f6x+32xrbt0vP5rp79PgjcB+xNM2bgmW33he8n+T5NgnnMeOuOY2/gvqp6oKftWzRnD/uxd7t877rbAb3dMe4aM3/vdvo4mjN+t7RdOF7atu8HHDXmmJ4DzJ3kmM7hoT/Ovwt8tqp+BJDkmUkuTbI2yQ+APwD2mOLxbTyG3u/n2z3TP+Khn9tHgS8C57bddP53ku0n2ddE39N+wFvHfB/ze+aPXXdztz+eiY5JkjbJ/PiL9c2PDzE/alpZ7GlrOZXmzFLvH7eNg7V36mnrTS5TMX/jRJJdaLobrKH5A3V5VT2q57VLVf1hz7o1yXbXALsn2bWnbV/gv/qMaw3NH9veddfz8GQ9f8z8NQBVdVtVHQ3sCfwN8Kk0d3C7i6arS+8x7VxVp09yTBfRjNtYSJPUzumZdw7NWcz5VfVI4J+AjX3sJ/tuxju+jcewye+nqn5WVadV1YHAr9OcZf69SVYZ93ui+T6WjPk+dmrPov5id5uKZ5LtS9IgmB/Nj+MyP2o6WOxpq6iqlTTdTN7c07aW5o/da5Nsm+QNwOO2cFcvSfKcJL8CvAu4sqruojlz+oQkxyTZvn39WpL/3mf8dwH/Afx1O2D6qTRnFD/eZ1yfAP4kyf5tkv1/gU9W1fqeZf5Xkp2SHEQzduKTAElem2ROVf0c+H677AbgY8DLkhzWfn87Jjk0ybxJjmM98CmaGwPsDlzcM3tXmrOzP0lyMM2ZzY3WAj+nGVMxngtpvt/fTbJdkt8BDqT53ieV5HlJnpLmDm/303Rb2TDJKickmZdkd+DttN8T8M/AH7RnYJNk5zSD6nedeFPjeluS3dquSSf2bF+Stjrzo/lxIuZHTQeLPW1Nf0kzwLrXG4G30QycPogmYWyJc2jOkt5HM5D5NQBt95IXA4tpzkR9m+Ys4A6bse2jaQZirwHOA05txwH040ya7hhfBu4AfgL88ZhlLqcZpH4J8O52zAXA4cCKJA/SDKpf3I6LuAs4kuYP+lqaM3dvY9P/bs+huTHAv45Jpv8P8JdJHqAZR7J044y2K8sS4CttF5Defv5U1Xdpzji+leZn+WfAS6vqO5uIBZqz1Z+iSWQ3t9/DxzYR/0XA7e3rr9oYltP8Pv0DzeD3lTSDxjfX+cA1NIPVLwA+NIVtSNLmMD+aH8djftTApaqfq7qSNHhJVtHc/etLw45FkqRRYX7UVHllT5IkSZI6yGJPkiRJkjrIbpySJEmS1EFe2ZMkSZKkDrLYkyRJkqQO2m7YAWyJPfbYoxYsWDDsMCRJ0+Caa675TlXNGXYcM4U5UpJmh8ny44wu9hYsWMDy5cuHHYYkaRok+dawY5hJzJGSNDtMlh/txilJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdtN2wA5CGYcHJFwx1/6tOP2Ko+5ckaSLDzJHmR2nr8sqeJEmSJHWQxZ4kSUOQZFWSG5Jcl2R527Z7kouT3Na+79az/ClJVia5Nclhw4tckjRTWOxJkjQ8z6uqhVW1qP18MnBJVR0AXNJ+JsmBwGLgIOBw4P1Jth1GwJKkmcNiT5Kk0XEkcHY7fTbwip72c6tqXVXdAawEDh5CfJKkGcRiT5Kk4SjgoiTXJDm+bdurqu4GaN/3bNv3Ae7qWXd12/YwSY5PsjzJ8rVr1w4wdEnSTODdOCVJGo5DqmpNkj2Bi5PcMsmyGaetfqmh6gzgDIBFixb90nxJ0uzilT1Jkoagqta07/cC59F0y7wnyVyA9v3edvHVwPye1ecBa6YvWknSTGSxJ0nSNEuyc5JdN04DLwZuBJYBx7aLHQuc304vAxYn2SHJ/sABwFXTG7UkaaaxG6ckSdNvL+C8JNDk4nOq6gtJrgaWJjkOuBM4CqCqViRZCtwErAdOqKoNwwldkjRTWOxJkjTNqup24FfHaf8u8IIJ1lkCLBlwaJKkDrEbpyRJkiR1kMWeJEmSJHXQwIq9JDsmuSrJN5KsSHJa2/7OJP+V5Lr29ZKedU5JsjLJrUkOG1RskiRJktR1gxyztw54flU9mGR74Iok/9bOe19Vvbt34SQHAouBg4C9gS8leYID0CVJkiRp8w3syl41Hmw/bt++JnvA65HAuVW1rqruAFbSPHNIkiRJkrSZBjpmL8m2Sa6jeSjsxVV1ZTvrj5Jcn+TMJLu1bfsAd/WsvrptG7vN45MsT7J87dq1gwxfkiRJkmasgRZ7VbWhqhYC84CDkzwZ+ADwOGAhcDfwnnbxjLeJcbZ5RlUtqqpFc+bMGVDkkiRJkjSzTcvdOKvq+8BlwOFVdU9bBP4c+Gce6qq5Gpjfs9o8YM10xCdJkiRJXTPIu3HOSfKodvoRwAuBW5LM7VnslcCN7fQyYHGSHZLsDxwAXDWo+CRJkiSpywZ5N865wNlJtqUpKpdW1eeTfDTJQpoumquANwFU1YokS4GbgPXACd6JU5IkSZKmZmDFXlVdDzxtnPZjJllnCbBkUDFJkiRJ0mwxLWP2JEmSJEnTy2JPkiRJkjrIYk+SJEmSOshiT5IkSZI6aJB345Q0ghacfMFQ97/q9COGun9JkqTZwit7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEDK/aS7JjkqiTfSLIiyWlt++5JLk5yW/u+W886pyRZmeTWJIcNKjZJkiRJ6rpBXtlbBzy/qn4VWAgcnuRZwMnAJVV1AHBJ+5kkBwKLgYOAw4H3J9l2gPFJkiRJUmcNrNirxoPtx+3bVwFHAme37WcDr2injwTOrap1VXUHsBI4eFDxSZIkSVKXDXTMXpJtk1wH3AtcXFVXAntV1d0A7fue7eL7AHf1rL66bZMkSZIkbaaBFntVtaGqFgLzgIOTPHmSxTPeJn5poeT4JMuTLF+7du3WClWSJEmSOmVa7sZZVd8HLqMZi3dPkrkA7fu97WKrgfk9q80D1oyzrTOqalFVLZozZ85A45YkaVDa3i9fT/L59rM3MJMkbVWDvBvnnCSPaqcfAbwQuAVYBhzbLnYscH47vQxYnGSHJPsDBwBXDSo+SZKG7ETg5p7P3sBMkrRVDfLK3lzg0iTXA1fTjNn7PHA68KIktwEvaj9TVSuApcBNwBeAE6pqwwDjkyRpKJLMA44A/qWn2RuYSZK2qu0GteGquh542jjt3wVeMME6S4Alg4pJkqQR8XfAnwG79rQ97AZmSXpvYPa1nuW8gZkkqS/TMmZPkiQ1krwUuLeqrul3lXHafukGZu22vYmZJOkXLPYkSZpehwAvT7IKOBd4fpKPsYU3MANvYiZJejiLPUmSplFVnVJV86pqAc2NV/69ql6LNzCTJG1lAxuzJ0mSNsvpwNIkxwF3AkdBcwOzJBtvYLYeb2AmSeqTxZ4kSUNSVZfRPIfWG5hJkrY6u3FKkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHbbLYS3JUkl3b6Xck+UySpw8+NEmSRps5UpI0yvq5sve/quqBJM8BDgPOBj4w2LAkSZoRzJGSpJHVT7G3oX0/AvhAVZ0P/MrgQpIkacYwR0qSRlY/xd5/Jfkg8GrgwiQ79LNekvlJLk1yc5IVSU5s29+Z5L+SXNe+XtKzzilJVia5NclhUz0oSZKmyZRypCRJ02G7PpZ5NXA48O6q+n6SucDb+lhvPfDWqrq2Hc9wTZKL23nvq6p39y6c5EBgMXAQsDfwpSRPqKoNSJI0mqaaIyVJGrhNnn2sqh8B9wLPaZvWA7f1sd7dVXVtO/0AcDOwzySrHAmcW1XrquoOYCVw8Kb2I0nSsEw1R0qSNB366Y55KnAScErbtD3wsc3ZSZIFwNOAK9umP0pyfZIzk+zWtu0D3NWz2mrGKQ6THJ9keZLla9eu3ZwwJEnaqrZGjpQkaVD6GVfwSuDlwA8BqmoNsGu/O0iyC/Bp4C1VdT/NXcoeBywE7gbes3HRcVavX2qoOqOqFlXVojlz5vQbhiRJg7BFOVKSpEHqp9j7aVUVbeGVZOd+N55ke5pC7+NV9RmAqrqnqjZU1c+Bf+ahrpqrgfk9q88D1vS7L0mShmDKOVKSpEHrp9hb2t5p7FFJ3gh8iaZIm1SSAB8Cbq6q9/a0z+1Z7JXAje30MmBxkh2S7A8cAFzV32FIkjQUU8qRkiRNh03ejbOq3p3kRcD9wBOBv6iqizexGsAhwDHADUmua9veDhydZCHNWdBVwJva/axIshS4iWaA+wneiVOSNMq2IEdKkjRwmyz22i4p/15VFyd5IvDEJNtX1c8mW6+qrmD8cXgXTrLOEmDJpmKSJGkUTDVHSpI0HfrpxvllYIck+9B0T3k9cNYgg5IkaYYwR0qSRlY/xV7a5wi9Cvi/VfVK4MDBhiVJ0oxgjpQkjay+ir0kzwZeA1zQtm2y+6ckSbOAOVKSNLL6KfbeQvOw2PPam6g8Frh0sGFJkjQjmCMlSSOrn7txXg5c3vP5duDNgwxKkqSZwBwpSRpl/dyN81Lah8X2qqrnDyQiSZJmCHOkJGmU9TOu4E97pncEfovmOXiSJM125khJ0sjqpxvnNWOavpLk8nEXliRpFjFHSpJGWT/dOHfv+bgN8AzgMQOLSJKkGcIcKUkaZf1047yGZjxCaLqm3AEcN8igJEmaIcyRkqSR1U83zv2nIxBJkmYac6QkaZT1041ze+APgee2TZcBH6yqnw0wLkmSRp45UpI0yvp5qPoHaMYgvL99PaNtkyRpttvsHJlkxyRXJflGkhVJTmvbd09ycZLb2vfdetY5JcnKJLcmOWyAxyNJ6pB+xuz9WlX9as/nf0/yjUEFJEnSDDKVHLkOeH5VPdheGbwiyb8BrwIuqarTk5wMnAyclORAYDFwELA38KUkT6iqDVv/cCRJXdLPlb0NSR638UOSxwImGEmSppAjq/Fg+3H79lXAkcDZbfvZwCva6SOBc6tqXVXdAawEDt56hyBJ6qp+ruy9Dbg0ye00dxvbD3j9QKOSJGlmmFKOTLItzZ08Hw/8Y1VdmWSvqroboKruTrJnu/g+wNd6Vl/dtkmSNKl+7sZ5SZIDgCfSJLJbqmrdwCOTJGnETTVHtl0wFyZ5FHBekidPsnjG28S4CybHA8cD7LvvvpsKQ5LUcf1c2YNmwPmCdvlfTUJVfWRgUUmSNHNMOUdW1feTXAYcDtyTZG57VW8ucG+72Gpgfs9q84A1E2zvDOAMgEWLFo1bEEqSZo9+Hr3wUeBxwHU8NA6hAIs9SdKsNpUcmWQO8LO20HsE8ELgb4BlwLHA6e37+e0qy4BzkryX5gYtBwBXbf2jkSR1TT9X9hYBB1aVZwglSXq4qeTIucDZ7bi9bYClVfX5JF8FliY5DrgTOAqgqlYkWQrcBKwHTvBOnJKkfvRT7N0IPAa4e8CxSJI002x2jqyq64GnjdP+XeAFE6yzBFgyxRglSbPUhMVeks/RdEXZFbgpyVU0zwYCoKpePvjwJEkaPeZISdJMMNmVvXdPWxSSJM0s5khJ0sibsNirqsunMxBJkmYKc6QkaSbYZtgBSJIkSZK2Pos9SZIkSeqgCYu9JJe0738zlQ0nmZ/k0iQ3J1mR5MS2ffckFye5rX3frWedU5KsTHJrksOmsl9JkgZtS3OkJEnTYbIbtMxN8hvAy5OcC6R3ZlVdu4ltrwfeWlXXJtkVuCbJxcDrgEuq6vQkJwMnAyclORBYDBxE89DYLyV5gs8SkiSNoC3NkZIkDdxkxd5f0BRi84D3jplXwPMn23BV3U373KGqeiDJzcA+wJHAoe1iZwOXASe17edW1TrgjiQrgYOBr/Z/OJIkTYstypGSJE2Hye7G+SngU0n+V1W9a0t2kmQBzQNkrwT2agtBquruJHu2i+0DfK1ntdVt29htHQ8cD7DvvvtuSViSJE3J1syRkiQNymRX9gCoqncleTnw3Lbpsqr6fL87SLIL8GngLVV1f5IJFx1v9+PEcwZwBsCiRYt+ab4kSdNlS3OkJEmDtMm7cSb5a+BE4Kb2dWLbtklJtqcp9D5eVZ9pm+9JMredPxe4t21fDczvWX0esKaf/UiSNAxbkiMlSRq0fh69cATwoqo6s6rOBA5v2yaV5hLeh4Cbq6p3PMMy4Nh2+ljg/J72xUl2SLI/cABwVX+HIUnSUEwpR0qSNB022Y2z9Sjgvnb6kX2ucwhwDHBDkuvatrcDpwNLkxwH3AkcBVBVK5IspTkzuh44wTtxSpJmgKnkSEmSBq6fYu+vga8nuZRmXN1zgVM2tVJVXcH44/AAXjDBOkuAJX3EJEnSKJhSjpQkaTr0c4OWTyS5DPg1mkR2UlV9e9CBSZI06syRkqRR1lc3zvZRCcsGHIskSTOOOVKSNKr6uUGLJEmSJGmGsdiTJEmSpA6atNhLsk2SG6crGEmSZgpzpCRp1E1a7FXVz4FvJNl3muKRJGlGMEdKkkZdPzdomQusSHIV8MONjVX18oFFJUnSzGCOlCSNrH6KvdMGHoUkSTOTOVKSNLL6ec7e5Un2Aw6oqi8l2QnYdvChSZI02syRkqRRtsm7cSZ5I/Ap4INt0z7AZwcZlCRJM4E5UpI0yvp59MIJwCHA/QBVdRuw5yCDkiRphjBHSpJGVj/F3rqq+unGD0m2A2pwIUmSNGOYIyVJI6ufYu/yJG8HHpHkRcC/Ap8bbFiSJM0I5khJ0sjqp9g7GVgL3AC8CbgQeMcgg5IkaYYwR0qSRlY/d+P8eZKzgStpuqbcWlV2UZEkzXrmSEnSKNtksZfkCOCfgP8EAuyf5E1V9W+DDk6SpFFmjpQkjbJ+Hqr+HuB5VbUSIMnjgAsAE5kkabYzR0qSRlY/Y/bu3ZjEWrcD9w4oHkmSZhJzpCRpZE14ZS/Jq9rJFUkuBJbSjEc4Crh6GmKTJGkkmSO7bcHJFwx1/6tOP2Ko+5fUHZN143xZz/Q9wG+002uB3QYWkSRJo88cKUkaeRMWe1X1+ukMRJKkmcIcKUmaCfq5G+f+wB8DC3qXr6qXDy4sSZJGnzlSkjTK+rkb52eBDwGfA34+2HAkSZpRzJGSpJHVT7H3k6r6PwOPRJKkmcccKUkaWf0Ue3+f5FTgImDdxsaqunZgUWlaDPNuY95pTFJHmCMlSSOrn2LvKcAxwPN5qItKtZ8nlORM4KU0zyB6ctv2TuCNNHcrA3h7VV3YzjsFOA7YALy5qr64WUciSdL0m1KOlCRpOvRT7L0SeGxV/XQzt30W8A/AR8a0v6+q3t3bkORAYDFwELA38KUkT6iqDZu5T0mSptNUc6QkSQO3TR/LfAN41OZuuKq+DNzX5+JHAudW1bqqugNYCRy8ufuUJGmaTSlHSpI0Hfop9vYCbknyxSTLNr62YJ9/lOT6JGcm2fjg2X2Au3qWWd22SZI0yjY7RyaZn+TSJDcnWZHkxLZ99yQXJ7mtfd+tZ51TkqxMcmuSwwZ8TJKkjuinG+epW3F/HwDeRTOe4V3Ae4A3ABln2RpvA0mOB44H2HfffbdiaJIkbbap5Mj1wFur6tokuwLXJLkYeB1wSVWdnuRk4GTgJIc6SJKmapPFXlVdvrV2VlX3bJxO8s/A59uPq4H5PYvOA9ZMsI0zgDMAFi1aNG5BKEnSdJhKjqyqu4G72+kHktxM05vlSODQdrGzgcuAk+gZ6gDckWTjUIevbmn8kqRu22Q3ziQPJLm/ff0kyYYk909lZ0nm9nx8JXBjO70MWJxkhyT7AwcAV01lH5IkTZctzZFJFgBPA64E9moLwY0F4Z7tYn0PdUhyfJLlSZavXbt2vEUkSbNIP1f2du39nOQV9HHzlCSfoDlDuUeS1TRdXQ5NspCmi+Yq4E3tPlYkWQrcRNO95QS7p0iSRt1Uc2S77C7Ap4G3VNX9yXgjGppFx9v1BPHY+0WS9Av9jNl7mKr6bDuWYFPLHT1O84cmWX4JsGRz45EkaVT0myOTbE9T6H28qj7TNt+TZG5V3d32hLm3be97qIMkSb02WewleVXPx22ARUxwRlGSpNlkKjkyzSW8DwE3V9V7e2YtA44FTm/fz+9pPyfJe2lu0OJQB0lSX/q5sveynun1NN0vjxxINJIkzSxTyZGHAMcANyS5rm17O02RtzTJccCdwFHgUAdJ0tT1M2bv9dMRiCRJM81UcmRVXcH44/AAXjDBOg51kCRttgmLvSR/Mcl6VVXvGkA8kiSNPHOkJGkmmOzK3g/HadsZOA54NM1D0SVJmo3MkZKkkTdhsVdV79k4nWRX4ETg9cC5wHsmWk+SpK4zR0qSZoJJx+wl2R34n8BrgLOBp1fV96YjMEmSRpk5UpI06iYbs/e3wKtoHs76lKp6cNqikiRphJkjJUkzwTaTzHsrzfN83gGsSXJ/+3ogyf3TE54kSSPJHClJGnmTjdmbrBCUJGnWMkdKkmYCk5UkSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkddB2ww5AkiRJGgULTr5gqPtfdfoRQ92/uscre5IkSZLUQRZ7kiRJktRBA+vGmeRM4KXAvVX15LZtd+CTwAJgFfDqqvpeO+8U4DhgA/DmqvrioGKTNHvZRUeSJM0Wg7yydxZw+Ji2k4FLquoA4JL2M0kOBBYDB7XrvD/JtgOMTZIkSZI6bWDFXlV9GbhvTPORwNnt9NnAK3raz62qdVV1B7ASOHhQsUmSJElS1033mL29qupugPZ9z7Z9H+CunuVWt22/JMnxSTdSn18AAA+lSURBVJYnWb527dqBBitJkiRJM9Wo3KAl47TVeAtW1RlVtaiqFs2ZM2fAYUmSJEnSzDTdxd49SeYCtO/3tu2rgfk9y80D1kxzbJIkSZLUGdNd7C0Djm2njwXO72lfnGSHJPsDBwBXTXNskiRJktQZg3z0wieAQ4E9kqwGTgVOB5YmOQ64EzgKoKpWJFkK3ASsB06oqg2Dik2SJEmSum5gxV5VHT3BrBdMsPwSYMmg4pEkSZKk2WRUbtAiSZIkSdqKLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmaZknOTHJvkht72nZPcnGS29r33XrmnZJkZZJbkxw2nKglSTONxZ4kSdPvLODwMW0nA5dU1QHAJe1nkhwILAYOatd5f5Jtpy9USdJMZbEnSdI0q6ovA/eNaT4SOLudPht4RU/7uVW1rqruAFYCB09LoJKkGc1iT5Kk0bBXVd0N0L7v2bbvA9zVs9zqtk2SpElZ7EmSNNoyTluNu2ByfJLlSZavXbt2wGFJkkadxZ4kSaPhniRzAdr3e9v21cD8nuXmAWvG20BVnVFVi6pq0Zw5cwYarCRp9FnsSZI0GpYBx7bTxwLn97QvTrJDkv2BA4CrhhCfJGmG2W7YAUiSNNsk+QRwKLBHktXAqcDpwNIkxwF3AkcBVNWKJEuBm4D1wAlVtWEogUuSZhSLPUmSpllVHT3BrBdMsPwSYMngIpIkdZHdOCVJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpg4bynL0kq4AHgA3A+qpalGR34JPAAmAV8Oqq+t4w4pMkSZKkmW6YV/aeV1ULq2pR+/lk4JKqOgC4pP0sSZIkSZqCUerGeSRwdjt9NvCKIcYiSZIkSTPasIq9Ai5Kck2S49u2varqboD2fc8hxSZJkiRJM95QxuwBh1TVmiR7AhcnuaXfFdvi8HiAfffdd1DxSZIkSdKMNpQre1W1pn2/FzgPOBi4J8lcgPb93gnWPaOqFlXVojlz5kxXyJIkSZI0o0z7lb0kOwPbVNUD7fSLgb8ElgHHAqe37+dPd2yS1GULTr5gqPtfdfoRQ92/Ns8wf1/8XZGkrWMY3Tj3As5LsnH/51TVF5JcDSxNchxwJ3DUEGKTJEmSpE6Y9mKvqm4HfnWc9u8CL5jueCRJkiSpi0bp0QuSJEmSpK3EYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjpou2EHIEmSJEnDtODkC4a271WnHzGwbXtlT5IkSZI6yCt7kiRJ0iw3zCtbMNirW7OZV/YkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYNGrthLcniSW5OsTHLysOORJGkUmB8lSZtru2EH0CvJtsA/Ai8CVgNXJ1lWVTcNYn8LTr5gEJvt26rTjxjq/iVJM8N050dJUjeM2pW9g4GVVXV7Vf0UOBc4csgxSZI0bOZHSdJmG7Vibx/grp7Pq9s2SZJmM/OjJGmzpaqGHcMvJDkKOKyqfr/9fAxwcFX9cc8yxwPHtx+fCNw67YE+ZA/gO0Pc/zDN5mOH2X38s/nYYXYf/7CPfb+qmjPE/Q9NP/mxbTdHjgaPffaazcc/m48dhnv8E+bHkRqzR3Omcn7P53nAmt4FquoM4IzpDGoiSZZX1aJhxzEMs/nYYXYf/2w+dpjdxz+bj30EbDI/gjlyVHjss/PYYXYf/2w+dhjd4x+1bpxXAwck2T/JrwCLgWVDjkmSpGEzP0qSNttIXdmrqvVJ/gj4IrAtcGZVrRhyWJIkDZX5UZI0FSNV7AFU1YXAhcOOo08j0VVmSGbzscPsPv7ZfOwwu49/Nh/70M2w/Aiz+/fFY5+9ZvPxz+ZjhxE9/pG6QYskSZIkaesYtTF7kiRJkqStwGJPkiRJkjpo5MbsjbIkTwKOpHmQbdHc9npZVd081MA0cO3Pfh/gyqp6sKf98Kr6wvAiG7wkBwNVVVcnORA4HLilHT80qyT5SFX93rDjGIYkzwEOBm6sqouGHY9Gi/lx9prN+RHMkb1ma44c9fzomL0+JTkJOBo4l+Z5R9A852gxcG5VnT6s2IYtyeur6sPDjmNQkrwZOAG4GVgInFhV57fzrq2qpw8zvkFKcirwmzQnhi4GnglcBrwQ+GJVLRledIOVZOxt7QM8D/h3gKp6+bQHNY2SXFVVB7fTb6T5N3Ae8GLgc7P5b54ezvw4MfNjd/MjmCPHNjFLcuRMy48We31K8k3goKr62Zj2XwFWVNUBw4ls+JLcWVX7DjuOQUlyA/DsqnowyQLgU8BHq+rvk3y9qp421AAHqD32hcAOwLeBeVV1f5JH0JzFfepQAxygJNcCNwH/QnOlIsAnaP4DS1VdPrzoBq/3dzvJ1cBLqmptkp2Br1XVU4YboUaF+XFi5sfu5kcwRzJLc+RMy4924+zfz4G9gW+NaZ/bzuu0JNdPNAvYazpjGYJtN3ZNqapVSQ4FPpVkP5rj77L1VbUB+FGS/6yq+wGq6sdJuv57vwg4Efhz4G1VdV2SH3c5gY2xTZLdaMZ2p6rWAlTVD5OsH25oGjHmxwlmYX7sOnPk7MyRMyo/Wuz17y3AJUluA+5q2/YFHg/80dCimj57AYcB3xvTHuA/pj+cafXtJAur6jqA9gzmS4EzgZE6ezMAP02yU1X9CHjGxsYkj6Tj/4mrqp8D70vyr+37Pcyuv5mPBK6h+TdeSR5TVd9Osguz4z9x6p/50fw4G/MjmCNna46cUfnRbpybIck2NAMw96H5Ya4Grm7P6nRakg8BH66qK8aZd05V/e4QwpoWSebRnL379jjzDqmqrwwhrGmRZIeqWjdO+x7A3Kq6YQhhDUWSI4BDqurtw45lmJLsBOxVVXcMOxaNDvOj+XGceZ3Oj2CO7GWOHN38aLEnSZIkSR3kc/YkSZIkqYMs9iRJkiSpgyz2JEmSJKmDLPakVpK9kpyT5PYk1yT5apJXjrPcgiQ3jtP+l0le2Md+npakkhy2tWKfZF8PTtDeV6xbuO/XJfmHQe5DkjQcE+WXCZZ9XZK9x7TNSfKzJG/a+tH90v5XtTdNGdv+B0l+b8D7PjTJ5we5D2kyFnsSkCTAZ4EvV9Vjq+oZNA8GnTdmuQlvK1xVf1FVX+pjd0cDV7Tv48bS3tluYDYjVkmSttTraJ7F2Oso4GtMkAsBkmw7wJioqn+qqo8Mch/SsFnsSY3nAz+tqn/a2FBV36qq/9uekfzXJJ8DLppoA0nOSvLbSX4zydKe9kPbdTcWlb9Nk/henGTHtn1BkpuTvB+4Fpif5G1Jrk5yfZLTerb32fbK44okx2/qwJK8J8m1SS5JMqc31nZ6VZLT2mVuSPKktv2dSc5Mcll7tfPNPdt8bZKrklyX5IMbE3KS1yf5ZpLLgUM2+a1LkjojycIkX2vz1nlJdmtzzSLg423OeES7+NHAW4F5Sfbp2caDbe+TK4FnT5JvPpBkeZsLTxsbyzje1m7nqiSPb7fxziR/2k5fluRv2vnfTPI/2vbXJflMki8kuS3J/+6J9cVtL6Br2/8n7NK2H57kliRXAK/a0u9V2hIWe1LjIJoiayLPBo6tquf3sa2LgWcl2bn9/DvAJ9vpQ4A7quo/gcuAl/Ss90TgI1X1tHb6AJrnVi0EnpHkue1yb2ivPC4C3pzk0ZPEsjNwbVU9HbgcOHWC5b7TLvMB4E972p9E87Dgg4FTk2yf5L+3x3RIVS0ENgCvSTIXOK09xhcBB04SlySpez4CnFRVTwVuAE6tqk8By4HXVNXCqvpxkvnAY6rqKmApTU7ZaGfgxqp6JvBdxsk37XJ/XlWLgKcCv5HkqZuI7f6qOhj4B+DvJlhmu3aZt/DwfLmwjeMpwO8kmd92C30H8MI2fy4H/md7EvefgZcB/wN4zCbikgbKYk8aR5J/TPKNJFe3TRdX1X39rFtV64EvAC9ru30eAZzfzj4aOLedPpeHd1/5VlV9rZ1+cfv6Ok0R+iSa4g+aAu8bNN1f5ve0j+fnPFRofgx4zgTLfaZ9vwZY0NN+QVWtq6rvAPcCewEvAJ4BXJ3kuvbzY4FnApdV1dqq+mnPfiVJHZfkkcCjqurytuls4LkTLL6YpsiDX86FG4BPt9MT5RuAVye5liZPHsSmTzB+ouf92RMsM1EuvKSqflBVPwFuAvYDntXu8yttbMe27U+iOal7WzUPs/7YJuKSBmrC8UfSLLMC+K2NH6rqhPas3fK26Yebub1PAicA9wFXV9UDbdeT3wJenuTPgQCPTrLrOPsI8NdV9cHejSY5FHgh8Oyq+lGSy4AdNyOumqB9Xfu+gYf/XVjXM71xXoCzq+qUMbG9YpLtS5K00dHAXkk2XqXbO8kBVXUb8JOq2tC2T5Rv9qfphfJrVfW9JGex6VxYE0z32txceHFVPWzMYZKFk2xfmnZe2ZMa/w7smOQPe9p22oLtXQY8HXgjD13heiHwjaqaX1ULqmo/mrOXrxhn/S8Cb+jp/79Pkj2BRwLfawu9J9GcWZzMNjRjBAF+l+bGMFvqEuC323hIsnuS/YArgUOTPDrJ9jSD7yVJs0BV/QD43saxbsAxNMMHAB4AdgVI8kRg56rap82FC4C/prnaN9ZE+ea/0Zwg/UGSvYDf7CPE3+l5/+rmHt84vgYc0jP+b6ckTwBuAfZP8rh2uQlvQCNNB6/sSUBVVXtl6n1J/gxYS5NITgIeMc4qT0yyuufzn4zZ3oY0t1p+HU3XDmj+4J83ZjufBv4Q+P/GrH9ROzbuq0kAHgReS9M99A+SXA/cSpNsJvND4KAk1wA/4OHjIqakqm5K8g7gojR3Df0ZcEJVfS3JO2mS6N003U8Heic1SdLQ7DQmD76XJt/9U5KdgNuB17fzzmrbf0xzMnO8XHgu8K7exk3km6/T9Mq5HfhKH/Hu0N70ZRu2QgFWVWuTvA74RJId2uZ3VNU309w87YIk36E5yfrkLd2fNFVpuhNLkiRJkrrEbpySJEmS1EF245Q6oO2assOY5mOq6oZhxCNJ0nRLch6w/5jmk6rqi8OIRxoFduOUJEmSpA6yG6ckSZIkdZDFniRJkiR1kMWeJEmSJHWQxZ4kSZIkdZDFniRJkiR10P8PtBgaI8lDPEMAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.subplot(1,2,1)\n", "tmp.groupby('GrLivArea_binned')['GrLivArea'].count().plot.bar()\n", "plt.ylabel('Number of houses')\n", "plt.title('Number of observations per bin')\n", "plt.subplot(1,2,2)\n", "tmp.groupby('LotArea_binned')['LotArea'].count().plot.bar()\n", "plt.ylabel('Number of houses')\n", "plt.title('Number of observations per bin')\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Now return interval boundaries instead" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ArbitraryDiscretiser(binning_dict={'GrLivArea': [-inf, 500, 1000, 1500, 2000,\n", " 2500, inf],\n", " 'LotArea': [-inf, 4000, 8000, 12000, 16000,\n", " 20000, inf]},\n", " return_boundaries=True, return_object=False)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atd = ArbitraryDiscretiser(binning_dict={\"LotArea\": [-np.inf, 4000, 8000, 12000, 16000, 20000, np.inf],\n", " \"GrLivArea\": [-np.inf, 500, 1000, 1500, 2000, 2500, np.inf]},\n", " # to return the boundary limits\n", " return_boundaries=True)\n", "\n", "atd.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "train_t = atd.transform(X_train)\n", "test_t = atd.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([Interval(-inf, 500.0, closed='right'),\n", " Interval(500.0, 1000.0, closed='right'),\n", " Interval(1000.0, 1500.0, closed='right'),\n", " Interval(1500.0, 2000.0, closed='right'),\n", " Interval(2000.0, 2500.0, closed='right'),\n", " Interval(2500.0, inf, closed='right')], dtype=object)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# the numbers are the different bins into which the observations\n", "# were sorted\n", "np.sort(np.ravel(train_t['GrLivArea'].unique()))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([Interval(500.0, 1000.0, closed='right'),\n", " Interval(1000.0, 1500.0, closed='right'),\n", " Interval(1500.0, 2000.0, closed='right'),\n", " Interval(2000.0, 2500.0, closed='right'),\n", " Interval(2500.0, inf, closed='right')], dtype=object)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sort(np.ravel(test_t['GrLivArea'].unique()))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAFfCAYAAAChhtABAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de9ylc73/8dc7x0IhQxhjEHaUbE1UVKJCitgRu2yE0WnTrl2msjvsaFM67iIqh93BIYcoKpLDrv1zGiFMchoMg8khohTevz+u616z3O5132vmXmtda13r/Xw81mPW+l7rWtfne1+sz/p+r+/1/co2ERERAM+pOoCIiOgfSQoREdGQpBAREQ1JChER0ZCkEBERDUkKERHRkKQQXSXpREmHVXRsSTpB0kOSrhhj+z6Sfl1FbP1O0p8lrVt1HBORtLWkeVXHUSdJCkNG0lxJ90larqlsf0kXVxhWt2wFvAmYanvzqoMZJLaXt31bO++VZEkv7nZM0RtJCsNpSeDgqoNYVJKWWMRd1gbm2n6sG/HUgaQlh/n48WxJCsPpi8C/S1px9AZJ08tffks2lV0saf/y+T6SfiPpK5IelnSbpNeU5XdJul/S3qM+dhVJF0h6VNIlktZu+ux/KLc9KOkmSbs3bTtR0jGSzpP0GPCGMeJdQ9I55f63SDqgLN8P+A7w6rIr5LOt/hiSjiq7mG6XtMNEn90U22FNr5/RjSHpEEl3l3W+SdK2ZflzJM2SdKukBySdJmnlctuykr5flj8s6UpJq7WIea6kj0u6sYz9BEnLNm1/q6Rrys/5P0mbjNr3EEnXAY+N9cXc/Ou/rOs3JZ1b1udySeuV2y4td7m2/Du/czGOf6ik00cd/2uSvl4+31fSnPLYt0k6sNW5jA6wnccQPYC5wBuBM4HDyrL9gYvL59MBA0s27XMxsH/5fB/gSWBfYAngMOBO4JvAMsCbgUeB5cv3n1i+fl25/WvAr8ttywF3lZ+1JLAZ8Edg46Z9/wRsSfEDZtkx6nMJcDSwLLApsADYtinWX4/zt9gH+DtwQFmX9wH3AGrjs08c+fuVr7cG5pXPNyzrtUbT33S98vmHgMuAqeXf41jg5HLbgcBPgOeV8bwCeP445/F6YC1gZeA3TedzM+B+YIvyc/Yu379M077XlPs+t8XnG3hxU10fBDYvz9MPgFPGeu/iHJ+iRff4SF3LfeYDrypf7wisBwh4ffnezUb/3fPozCMtheH1KeBfJU1ZjH1vt32C7aeAUyn+5/5P20/YPh/4G9Dcx3yu7UttPwF8kuLX+1rAWym6d06w/aTtq4EzgHc07Xu27d/Yftr2X5uDKD9jK+AQ23+1fQ1F62CvRajLHba/XdblJGB1YLVJfvZTFF/4G0layvZc27eW2w4EPml7Xvn3+AzwjvLX+t+BF1J8wT5le7btR8Y5zjds32X7QeBwYM+y/ADgWNuXl59zEvAE8Kqmfb9e7vuXNuoDcKbtK2w/SZEUNh3nvYt0fNt3AFcDby+3bQM8bvsyANvn2r7VhUuA84HXthl3LKIkhSFl+3rgp8Csxdj9vqbnfyk/b3TZ8k2v72o67p8pfnWuQfELcYuyi+FhSQ8D7wJeNNa+Y1gDeND2o01ldwBrLkJd7m2K7fHy6fKT+Wzbt1C0CD4D3C/pFElrlJvXBs5qqu8ciiSyGvA94BfAKZLukfQFSUuNc6jmv80dZcwjx/jIqL/rWk3bR+/bjnubnj/OM8/vaItz/B+yMKn9c/kaAEk7SLqs7MZ7GHgLsMoixh9tSlIYbp+m+FXX/EU3clH2eU1lzV/Si2OtkSeSlqfo7riH4ovhEtsrNj2Wt/2+pn3Hm8b3HmBlSSs0lU0D7p5kvO189mOM8zey/UPbW1F8QRo4stx0F7DDqDova/tu23+3/VnbGwGvoWhJ/cs4Ma7V9HxaGfPIMQ4fdYzn2T65OcSJ/gCTsDjH/xGwtaSpwC6USUHSMhStx6OA1WyvCJxH0ZUUXZCkMMTKX7SnAgc1lS2g+OJ7t6QlJL2Hoj93Mt4iaStJSwOfAy63fRdFS2UDSXtJWqp8vFLSS9qM/y7g/4D/Ki/SbgLsR9G9MSltfPY1Zb1WlvQiipYBAJI2lLRN+YX2V4qW01Pl5m8Bh6u82C5piqSdy+dvkPQyFaOsHqHoThrZbywfkDS1vFD9CYpzCfBt4L2StlBhOUk7jkpwnXQf0HxPwyIfv/zv7mLgBIruyTnlpqUpuuIWAE+qGAjw5m5UIgpJCvGfFBd8mx0AfBR4ANiY4stxMn5I0Sp5kOLi6bsAyq6ZNwN7UPzKvZfiF/Uyi/DZe1JcyL0HOAv4tO0LJhlvO5/9PeBaioum57PwCxmK+I+guGh+L7AqxZc2FBfazwHOl/QoxUXnLcptLwJOp0gIcygudH9/nPh+WB77tvJxGIDtqyjO4TeAh4BbKC6qd8tngJPKrqLdJ3H8H1IMgmh0HZX/jRwEnFZ+1j9T/P2iS0ZGWUTEAJE0l2JE2C+rjiXqJS2FiIhoSFKIiIiGdB9FRERDWgoREdEw0JNRrbLKKp4+fXrVYUREDJTZs2f/0faYsxkMdFKYPn06V111VdVhREQMFEl3tNqW7qOIiGhIUoiIiIYkhYiIaEhSiIiIhiSFiIhoSFKIiIiGriUFSWtJuqhcW/UGSQeX5SurWJP35vLflZr2+biKtXBvkrRdt2KLiIixdbOl8CTwEdsvoViG7wOSNqJY6etC2+sDF5avKbftQTFV8/bA0eW88hER0SNdSwq255dr7o7MiT6HYoWvnSnWwqX8d2Rd1p0pFgN/wvbtFHOwb96t+CIi4tl6ckezpOnAPwKXUyypNx+KxCFp1fJta1IsODJiHmOshytpJjATYNq0ad0LOvrW9Fnn9vR4c4/YsafHi6hS1y80l2vyngF8yPYj4711jLJnTeFq+zjbM2zPmDJlzKk7IiJiMXU1KUhaiiIh/MD2mWXxfZJWL7evDtxfls/jmQuRT2XhQuQREdED3Rx9JOC7wBzbX27adA6wd/l8b+DspvI9JC0jaR1gfeCKbsUXERHP1s1rClsCewG/k3RNWfYJigXNT5O0H3AnsBuA7RsknQbcSDFy6QO2n+pifBERMUrXkoLtXzP2dQKAbVvsczhweLdiioiI8eWO5oiIaEhSiIiIhiSFiIhoSFKIiIiGJIWIiGhIUoiIiIYkhYiIaEhSiIiIhiSFiIhoSFKIiIiGJIWIiGhIUoiIiIYkhYiIaEhSiIiIhiSFiIhoSFKIiIiGbi7Hebyk+yVd31R2qqRrysfckRXZJE2X9Jembd/qVlwREdFaN5fjPBH4BvA/IwW23znyXNKXgD81vf9W25t2MZ6IiJhAN5fjvFTS9LG2SRKwO7BNt44fERGLrqprCq8F7rN9c1PZOpJ+K+kSSa+tKK6IiKHWze6j8ewJnNz0ej4wzfYDkl4B/FjSxrYfGb2jpJnATIBp06b1JNiIiGHR85aCpCWBXYFTR8psP2H7gfL5bOBWYIOx9rd9nO0ZtmdMmTKlFyFHRAyNKrqP3gj83va8kQJJUyQtUT5fF1gfuK2C2CIihlo3h6SeDPw/YENJ8yTtV27ag2d2HQG8DrhO0rXA6cB7bT/YrdgiImJs3Rx9tGeL8n3GKDsDOKNbsURERHtyR3NERDQkKUREREOSQkRENCQpREREQ5JCREQ0JClERERDkkJERDQkKUREREOSQkRENCQpREREQ5JCREQ0JClERERDkkJERDQkKUREREOSQkRENCQpREREQ5JCREQ0dHM5zuMl3S/p+qayz0i6W9I15eMtTds+LukWSTdJ2q5bcUVERGvdbCmcCGw/RvlXbG9aPs4DkLQRxdrNG5f7HC1piS7GFhERY+haUrB9KfBgm2/fGTjF9hO2bwduATbvVmwRETG2Kq4pfFDSdWX30kpl2ZrAXU3vmVeWPYukmZKuknTVggULuh1rRMRQ6XVSOAZYD9gUmA98qSzXGO/1WB9g+zjbM2zPmDJlSneijIgYUj1NCrbvs/2U7aeBb7Owi2gesFbTW6cC9/QytoiI6HFSkLR608tdgJGRSecAe0haRtI6wPrAFb2MLSIiYMmJ3iBpN+Dnth+VdCiwGXCY7asn2O9kYGtgFUnzgE8DW0valKJraC5wIIDtGySdBtwIPAl8wPZTi12riIhYLBMmBeA/bP9I0lbAdsBRFNcGthhvJ9t7jlH83XHefzhweBvxREREl7TTfTTyi31H4BjbZwNLdy+kiIioSjtJ4W5JxwK7A+dJWqbN/SIiYsC08+W+O/ALYHvbDwMrAx/talQREVGJCZOC7ceB+4GtyqIngZu7GVRERFRjwqQg6dPAIcDHy6KlgO93M6iIiKhGO91HuwA7AY8B2L4HWKGbQUVERDXaSQp/s23KaSckLdfdkCIioirtJIXTytFHK0o6APglxRQVERFRMxPevGb7KElvAh4BNgQ+ZfuCrkcWERE91840F8sBv7J9gaQNgQ0lLWX7790PLyIieqmd7qNLgWUkrUnRdbQvxapqERFRM+0kBZX3KuwK/LftXYCNuhtWRERUoa2kIOnVwLuAc8uydibSi4iIAdNOUvgQxY1rZ5VTXK8LXNTdsCIiogrtjD66BLik6fVtwEHdDCoiIqrRzuijixhjvWTb23QlooiIqEw71wb+ven5ssA/UUyKFxERNdNO99HsUUW/kXTJmG9uIul44K3A/bZfWpZ9EXgb8DfgVmBf2w9Lmg7MAW4qd7/M9nvbrURERHRGO7Okrtz0WEXSdsCL2vjsE4HtR5VdALzU9ibAH1g48yrArbY3LR9JCBERFWin+2g2xTUFUXQb3Q7sN9FOti8tWwDNZec3vbwMeEe7gUZERPe10320TpeO/R7g1KbX60j6LcUcS4fa/t+xdpI0E5gJMG3atC6FFhExnNoZfbQU8D7gdWXRxcCxk5n7SNInKVodPyiL5gPTbD8g6RXAjyVtbPuR0fvaPg44DmDGjBnPGhUVERGLr52b144BXgEcXT5eUZYtFkl7U1yAfle5TgO2n7D9QPl8NsVF6A0W9xgREbF42rmm8ErbL296/StJ1y7OwSRtT7G05+vL+ZRGyqcAD9p+qrxjen3gtsU5RkRELL52WgpPSVpv5EX5pf3URDtJOhn4fxRTbc+TtB/wDYqlPC+QdI2kb5Vvfx1wXZlsTgfea/vBRaxLRERMUjsthY8CF0m6jWIE0toU02ePy/aeYxR/t8V7zwDOaCOWiIjoonZGH10oaX2KVdcE/N72E12PLCIieq7dKbBfAUwv3/9ySdj+n65FFRERlWhnSOr3gPWAa1h4LcFAkkJERM2001KYAWw0Mnw0IiLqq53RR9fT3lxHEREx4Fq2FCT9hKKbaAXgRklXAI0LzLZ36n54ERHRS+N1Hx3VsygiIqIvtEwK5TKcERExRNq5phAREUMiSSEiIhrGu9B8oe1tJR1p+5BeBhWTM33WuT093twjduzp8SKie8a70Ly6pNcDO0k6hWKKiwbbV3c1soiI6LnxksKngFnAVODLo7YZ2KZbQUVERDXGG310OnC6pP+w/bkexhQRERVpZ5bUz0naiablOG3/tLthRUREFSYcfSTpv4CDgRvLx8FlWURE1Ew7E+LtCGxq+2kASScBvwU+3s3AIiKi99q9T2HFpucvaGcHScdLul/S9U1lK0u6QNLN5b8rNW37uKRbJN0kabs244qIiA5qJyn8F/BbSSeWrYTZwOfb2O9EYPtRZbOAC22vD1xYvkbSRsAewMblPkdLWqKtGkRERMdMmBRsnwy8CjizfLza9ilt7Hcp8OCo4p2Bk8rnJwFvbyo/xfYTtm8HbgE2b6sGERHRMW0tx2l7PnBOB463WvlZ2J4vadWyfE3gsqb3zSvLnkXSTGAmwLRp0zoQUkREjOiXuY80RtmYK73ZPs72DNszpkyZ0uWwIiKGS6+Twn2SVgco/72/LJ8HrNX0vqnAPT2OLSJi6I2bFCQ9p3n0UAecA+xdPt8bOLupfA9Jy0haB1gfuKKDx42IiDaMe03B9tOSrpU0zfadi/LBkk4GtgZWkTQP+DRwBHCapP2AO4HdyuPcIOk0ipvjngQ+YPupRa5NRERMSjsXmlcHbijXaH5spHCiNZpt79li07Yt3n84cHgb8URERJe0kxQ+2/UoIiKiL7QzId4lktYG1rf9S0nPA3JjWUREDbUzId4BwOnAsWXRmsCPuxlURERUo50hqR8AtgQeAbB9M7DquHtERMRAaicpPGH7byMvJC1JixvLIiJisLWTFC6R9AnguZLeBPwI+El3w4qIiCq0kxRmAQuA3wEHAucBh3YzqIiIqEY7o4+eLqfMvpyi2+gm2+k+ioiooQmTgqQdgW8Bt1JMXLeOpANt/6zbwUVERG+1c/Pal4A32L4FQNJ6wLlAkkJERM20c03h/pGEULqNhbObRkREjbRsKUjatXx6g6TzgNMorinsBlzZg9giIqLHxus+elvT8/uA15fPFwArdS2iiIioTMukYHvfXgYSERHVa2f00TrAvwLTm98/0dTZERExeNoZffRj4LsUdzE/3d1wIiKiSu0khb/a/nrXI4mIiMq1kxS+JunTwPnAEyOFtq9enANK2hA4taloXeBTwIrAARQXsgE+Yfu8xTlGREQsnnaSwsuAvYBtWNh95PL1IrN9E7ApgKQlgLuBs4B9ga/YPmpxPjciIiavnaSwC7Bu8/TZHbQtcKvtOyR14eMjImJRtHNH87UUXTvdsAdwctPrD0q6TtLxksa8F0LSTElXSbpqwYIFY70lIiIWUztJYTXg95J+IemckcdkDyxpaWAnivUZAI4B1qPoWppPMefSs9g+zvYM2zOmTJky2TAiIqJJO91Hn+7SsXcArrZ9H8DIvwCSvg38tEvHjYiIFtpZT+GSLh17T5q6jiStbnt++XIX4PouHTciIlpo547mR1m4JvPSwFLAY7afv7gHlfQ84E0UK7mN+IKkTctjzR21LSIieqCdlsIKza8lvR3YfDIHtf048MJRZXtN5jMjImLy2rnQ/Ay2f8xi3qMQERH9rZ3uo12bXj4HmMHC7qSIiKiRdkYfNa+r8CRFf//OXYkmIpg+69yeHm/uETv29HjR39q5ppB1FSIihsR4y3F+apz9bPtzXYgnIiIqNF5L4bExypYD9qMYOZSkEBFRM+Mtx9mYZkLSCsDBFDOZnkKLKSgiImKwjXtNQdLKwIeBdwEnAZvZfqgXgUVERO+Nd03hi8CuwHHAy2z/uWdRRUREJca7ee0jwBrAocA9kh4pH49KeqQ34UVERC+Nd01hke92joiIwZYv/oiIaEhSiIiIhiSFiIhoSFKIiIiGJIWIiGhIUoiIiIZ2ps7uOElzgUeBp4Anbc8o754+FZhOMT337rl7OiKit6psKbzB9qa2Z5SvZwEX2l4fuLB8HRERPdRP3Uc7U8yvRPnv2yuMJSJiKFWVFAycL2m2pJll2Wq25wOU/6461o6SZkq6StJVCxYs6FG4ERHDoZJrCsCWtu+RtCpwgaTft7uj7eMoJuljxowZWSs6IqKDKmkp2L6n/Pd+4Cxgc+A+SasDlP/eX0VsERHDrOdJQdJy5aI9SFoOeDNwPXAOsHf5tr2Bs3sdW0TEsKui+2g14CxJI8f/oe2fS7oSOE3SfsCdwG4VxBYRMdR6nhRs3wa8fIzyB4Btex1PREQs1E9DUiMiomJJChER0ZCkEBERDUkKERHRkKQQERENSQoREdGQpBAREQ1JChER0ZCkEBERDUkKERHRkKQQERENSQoREdGQpBAREQ1JChER0VDVcpyVmj7r3J4eb+4RO/b0eBERiysthYiIaEhSiIiIhirWaF5L0kWS5ki6QdLBZflnJN0t6Zry8ZZexxYRMeyquKbwJPAR21dLWgGYLemCcttXbB9VQUwREUE1azTPB+aXzx+VNAdYs9dxRETEs1V6TUHSdOAfgcvLog9Kuk7S8ZJWarHPTElXSbpqwYIFPYo0ImI4VJYUJC0PnAF8yPYjwDHAesCmFC2JL421n+3jbM+wPWPKlCk9izciYhhUkhQkLUWREH5g+0wA2/fZfsr208C3gc2riC0iYphVMfpIwHeBOba/3FS+etPbdgGu73VsERHDrorRR1sCewG/k3RNWfYJYE9JmwIG5gIHVhBbRMRQq2L00a8BjbHpvF7HEhERz5Q7miMioiFJISIiGpIUIiKiIUkhIiIahnI9hYioRt3XMqlD/dJSiIiIhiSFiIhoSFKIiIiGJIWIiGhIUoiIiIYkhYiIaEhSiIiIhiSFiIhoSFKIiIiGJIWIiGhIUoiIiIYkhYiIaOi7pCBpe0k3SbpF0qyq44mIGCZ9lRQkLQF8E9gB2Ihi3eaNqo0qImJ49FVSADYHbrF9m+2/AacAO1ccU0TE0JDtqmNokPQOYHvb+5ev9wK2sP3BpvfMBGaWLzcEbuphiKsAf+zh8Xot9Rtsda5fnesGva/f2ranjLWh3xbZ0Rhlz8hato8DjutNOM8k6SrbM6o4di+kfoOtzvWrc92gv+rXb91H84C1ml5PBe6pKJaIiKHTb0nhSmB9SetIWhrYAzin4pgiIoZGX3Uf2X5S0geBXwBLAMfbvqHisJpV0m3VQ6nfYKtz/epcN+ij+vXVheaIiKhWv3UfRUREhZIUIiKiIUkhIiIa+upCcz+RdF0bb1tge9uuB9MFkj7cxtses31s14PpgiE4f5u18ba/2/5d14PpsDrXDUDS19t42yO2D+16MGPIheYWJN0AvGW8twDn2N6kRyF1lKT5wDGMfcPgiHfZ3qBHIXXUEJy/RymGcI93/taxPb03EXVOnesGIOkO4FMTvG2W7Zf0Ip7R0lJo7UDbd4z3Bknv71UwXfA92/853hskLderYLqg7ufvStvbjPcGSb/qVTAdVue6AXzF9knjvUHSSr0K5lnHTkshIqJ3JB1p+xBJu9n+UdXxjJak0IKkFwAfB94OjEwcdT9wNnCE7Yeriq1TJP0DxSy0a1LMMXUPRZfKnEoD64AhOX+imFm4+fxd4Rr8T13zuv0O2Ay43HY71096KqOPWjsNeAjY2vYLbb8QeENZ1nfZfVFJOoRianIBV7CwD/fkmixuVPfz92bgZuAzFNdOdgQ+C9xcbhtYda5b6ecUM6JuIumRpsejkh6pOri0FFqQdJPtDRd126CQ9AdgY9t/H1W+NHCD7fWriawzhuD8zQF2sD13VPk6wHlVXaTshDrXrZmks2333XoxaSm0doekj0labaRA0mrlL+y7KoyrU54G1hijfPVy26Cr+/lbkmJW4dHuBpbqcSydVue6NfRjQoCMPhrPO4FZwCWSVi3L7qOYtXX3yqLqnA8BF0q6mYVfktOAFwMfbLnX4Kj7+TseuFLSKSw8f2tRzCz83cqi6ow6161B0q7AkcCqFF23Amz7+ZXGle6j4SXpOSy8mCeKX2dX2n6q0sCiLZJewsKBAiPn7xzbN1YaWAfUuW4jJN0CvK3fBnYkKSwGSZvZvrrqOGLx5PxFP5D0G9tbVh3HaLmmsHjeV3UA3STpp1XH0GV1P3+fqTqGbqlZ3a6SdKqkPSXtOvKoOqi0FOJZJK1ue37VccTikfQ22z+pOo5uqFPdJJ0wRrFtv6fnwTRJUhhHnW+gGQY5fxGLLkmhhfImmaMpbqK5uyyeSjE65/22z68qtk6o+x2/dT9/AJK2ozh/zUnvbNs/rzSwDqh53T5m+wuS/puibs9g+6AKwmrIkNTWvga8sdUNNMCg30BzGvArijt+7wWQ9CJgb4o7ft9UYWydUOvzJ+mrwAbA/7BwTP9U4CBJO9g+uLLgJqnOdSuNjDa6qtIoWkhLoYVy/P5LbD85qnxp4EbbL64mss4Ygjt+637+/jDWtOZll9kfBvmO9DrXbRCkpdBa3W+guUPSx4CTbN8HxR2/wD7U447fup+/v0ra3PYVo8pfCfy1ioA6qM5163tpKYxD0kbATtTwBppyvvZZFDcIjUwFcS/FHb9H2n6wqtg6pebnbzOKRZJWYGEXy1rAIxTXTGZXFdtk1blugyBJIWKAldeBGklv5PpQHdS5bv0s3Uct1H10DmQ9hapi65Syj31tFp6/JSTdV4cht3WuWyvlSoAPAGeMvhbWS7mjubVW8/E/TD3m4x/W9RTqcv5qu+ZAnes2AQFbAWdWGkSNE++kDMHonKynMMDqvOZAnes2CNJ91FrdR+eMrKcwenH7Wq2nQH3PX53XHKhz3YD+7rpNUmiteT7+0aNz6jAf/zCtp1DH81fnIbd1rttI1+2eFN23I8Nup1J03Z5i+4jKgiPdR0Mt6ykMtpoPua1z3fq66zYthXH0cxOvE2w/Lel24G+U9atTQhiC83cjMPBfkmOpc93o867bJIUW+r2JN1mSNgW+BbyA4leYgKmSHqa4QWigF6EZgvNX2yG3da5bqa+7btN91EK/N/EmS9I1wIG2Lx9V/irgWNsvryayzhiC8/cLigkNTxo1oeE+wLa2B3ZCwzrXbUQ/d90mKbQg6ffAdrbvGFW+NnB+DYY03tzqi1HSLTWYMK7u56+2Q27rXLdBkO6j1vq6idcBP5N0LsX0xM0jPP4FGPg566n/+avzkNs6121ckn5q+62VxpCWQmv93MTrBElvYewRHudVGliH1Pn8jZrQcFWK+tViQsNhmKyxFfXBUrhJCotA0k62z6k6joiIbkn3UQuSdh2j+GhJSwLYrnR+ksmStC5wKMUwzSOArwCvplgV6qOjpxgYNJI2AY6jaCX8DDjE9kPltitsb15lfJMlaQtgju1HJD2X4pf1ZhTDOD9v+0+VBjhJdR5O3O+jqzIhXmunAe8B3gq8rXwsV/5baZ9fh5xIMQnen4HLgJuAHSiuJxxfXVgdczTFhGovA/4A/FrSeuW2OkyVcDzwePn8axRDi48sy06oKqhOGOLJGh+iDyZrTPdRC5JeSfEL+nTgW7Yt6Xbb61QcWkdI+q3tfyyf32l72ljbBpWka2xv2vT6DRQth72Ao21vVllwHSBpzsjEcJKubq7P6LoPmiEYTtzXo6vSUmjB9pUUi9cvDYa67qMAAApbSURBVPxK0uYUzdi6eFrSBmXye56kGQCSXgwsUW1oHaGymQ6A7YuAfwK+RzFP/6C7XtK+5fNrm87fBsDfW+82EEbu+B2tL+747YA7JH2saU4uJK1WtpAqH12VlkIbJK0BfBWYYXvdquPpBEnbUnSxPA0cAPwb8HLg+cABts+uMLxJk/TPwG22LxtVPg34D9sHVBNZZ5QJ72vAa4E/UlxPuKt8HGT72grDmxRJ2wPfoFhT4VnDiW0P9JDpfh9dlaQQDZJWAR6qw5DNYSFpBWBdyummR8b1D7o6Dyfud0kKMXQkHWd7ZtVxdIuk5W3/ueo4orV+Hl2VawpRS5JWbvF4IcUSj3U20LOLStpE0mWS7pJ0XNndMrLtivH2HQT9ProqLYWoJUlPUUxNrKZil6/XtL10JYF1iKQPt9oEfNL2yr2Mp5Mk/Ro4jGKo9P7AvsBOtm+tyci4vh5dlZvXFpGk9wMPAGfYfrLqeDpN0urAg7afqDqWSbqNYkbNO0dvkFT5CI8O+DzwRWCs/wYHvQdg+aaLyUdJmg38XNJe1GMEYNZTqBkBWwHvopg3qG6+B6wn6Qzb/151MJPwVWAl4FlJAfhCj2PphquBH9uePXqDpP0riKeTJOkFI3dl275I0j8BZwAD2wJq0teTNab7qAVJB9v+mqQtbf+m6nh6SZKAjWzfUHUsMTZJGwIP2P7jGNtWG+RRSHUfTgz9PboqSaGFkbtCR98tWjflDTSNERCD/GUSMUj69f+9dB+1NkfSXGCKpOuaygXY9ibVhNUZeuZynHeXxbVZjjOiX6nPl8JNS2EcKpYA/AVjXDsYvaLXoFHNl+OM6Ff9/v9eksKQUs2X42ylRqOrYkD1+/976T6agKQtKaZgXpvi7zXSfTTocyDVfTnOVuoyumpMdR4yXaO69fX/e2kpTEDFAvD/BswGGiMDbD9QWVAdImkHFt5qX7vlOFup8+gqSR8A/gFY23athkzXqW79/P9eksIEJF1ue4uq44jF068jPCL6VZLCBCQdQbG+wJlAox+66hEC3SRppu3jqo5jMlqNrgL6YoRHJ/TzpGqTVfO6LQnsR7EcZ3P9zga+O3r6i15LUpiApIvGKLbtbXoeTI9IOtD2sVXHMRn9PsJjsspJ1fakmFhtXlk8FdgDOMX2EVXFNll1rhuApJMpfpycxDPrtzewsu13VhUbJClETfX7CI/J6vdJ1SajznWDCZfj/IPtDXodU7OMPmpB0rttf7/VbJS2v9zrmDqtzk10+nyERwf09aRqk1TnugE8JGk3ilFUT0Nj2ovdgIcqjYwkhfEsV/67QqVRdMmoJvrIHPVTKeZ0H/gmuu2DWozw+GY/jPDogL6eVG2S6lw3KLrBjgSOljSSBFYELiq3VSrdR0Oq7k30YdDPk6pNVp3r1qxc9EljTWxYlbQUFkHNJserexO9pTqMrgIoux4um/CNA6jOdQOQ9Hxgiu1bR5VvYvu6Frv1RJLCotHEbxkYdW+ij6dO5/FZJP3U9lurjqMb6lA3SbtTrPdxv6SlgH1sX1luPhGo9Idnuo8WgaTDbB9adRydMixN9GEjaXXb86uOoxvqULdyuPQOtudL2pxiMMQnbJ/ZD8uNpqXQgiR5VMYcnRDGes8gsf20pNuBv7Hwjt/aJISaj65qadC/NMdTk7otMVIP21dIegPwU0lT6YPlRgd9LdduukjSv5arPTVIWlrSNpJOorjZZCBJ2lTSZcDFFCMhvghcIukySQN/3aQcXXUKRQvoCuDK8vnJkmZVGVsnSHqBpCMk/V7SA+VjTlm2YtXxTUad61Z6VNJ6Iy/KBLE1xQ+YjasKakS6j1qQtCzwHoq1mNehuAPxuRSJ9HyKoY3XVBfh5AzBHb+1Hl0l6RfAr4CTbN9blr2I4ofKG22/qcr4JqPOdQOQ9HLgcds3jypfCtjd9g+qiayMI0lhYuXJWgX4i+2Hq46nE4bgjt/fA9uNXgxJ0trA+a3uKB0UE9wV23LbIKhz3aC9bucqu6ZzTaEN5a/NOvRlNqv7Hb91H111h6SPUfyavg8aM8Luw8L6Dqo61w2KrukzgLNt3zlSWLZit6JoEV1EMRKp59JSGGL9PKd7J9R5dJWklYBZFOdvtbL4XuAc4EjbD1YV22TVuW7Qsmt6WYrZmCvvmk5SiFpT1lOIPtaPXdNJCkOq3+d0nyw9cz2FeRQthaynMCDqXLd+l6QwpPp9TvfJGoLRVbVdc6DOdRsESQpDqt/ndJ+sIRhdVdsht3Wu2yDIzWvD6yFJu5UXY4Hiwqykd9IHc7p3wM8knSvpnZJeUz7eWY64qsPoqpEJDUerw4SGda5b30tLYUhJmk5xJ/M2LEwCK1HcNDTL9u3VRNY5dR5dJWl74BvAmENubQ9s4qtz3QZBkkL05ZzuMbGaD7mtbd36XW5eC2w/ACDp87Y/UXU8nVD30VVQ7wkN61y3fpeWwpCS9PXRRcBeFHc4Y/ugngfVQUMwuqq2Q27rXLdBkJbC8NqVYobU81m46MwewOyqAuqwzcYYXTUPuKwc3TLoTqT1kNsTgEEecnsi9a1b38voo+H1EuCPwPbAL22fBDxq+6Ty+aCr++iq5UZ/aQLYvgxYroJ4OqnOdet7aSkMKduPAh+S9Arg++VQzTr9SNiDYnTV0ZIeomgNrUgxumqPKgPrkDpPaFjnuvW9XFMIJAl4P/Bq2++uOp5Oq+voqpoPua1t3fpdksKQ6vc53TtB0uuA+2zfJGkr4FXAHNvnVhxaRN+qU3dBLJq6Lzf6VeAI4HuSPgd8gWLlvH+T9MVKg+sASUtIOlDS5yS9ZtS2Q1vtNwgkfVDSKuXz9SRdKukhSZdLelnV8dVdWgpDagiWG70BeClFne4G1rT9eDlV8W9tv7TSACdJ0neA51GsP70XcIntD5fbrrY9sOtsS7rB9sbl83OB79g+S9LWwOG2t6w0wJrLheYhZfuvwNEUF2L7bk73DrBtSxqZK2fk18/T1KOFvLntTQAkfYPiPJ5JMbuoxt2z/zV/L61q+ywA2xdLWqGimIZGHf7niEmy/Xfb82uUEADOlfS/wP8C3wFOk/RJ4GfApZVG1hlLjzyx/aTtmcA1FKOrlq8sqs44XdKJktYFzpL0IUnTJO0L3DnRzjE56T6K2pL0aooWw2WS1gN2ofhSOd32QM+2Ken7wPdHTw4naX/gGNtLVRNZZ0jaB3gfsB6wDMXQ1B9TLMf5pwpDq70khailYRhdFdEN6T6Kuqr16KrxSHpT1TF0S53r1i/SUohaqvvoqvFIutP2tInfOXjqXLd+kaQQtVfH0VWSzmm1CdjG9sDOEVTnug2CDEmN2ivXTphfdRwd9lrg3cCfR5WLYnGaQVbnuvW9JIWIwXQZ8LjtS0ZvkHRTBfF0Up3r1vfSfRQREQ0ZfRQxgMqZbSf9nn5U57oNgiSFiMFU5yG3da5b30v3UcQAajHkdllgCQZ8yG2d6zYIkhQiBlwdh9yOqHPd+lWSQkRENOSaQkRENCQpREREQ5JCREQ0JClERETD/wc56EnJiHnsKAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# bar plot to show the intervals returned by the transformer\n", "test_t.LotArea.value_counts(sort=False).plot.bar(figsize=(6,4))\n", "plt.ylabel('Number of houses')\n", "plt.title('Number of houses per interval')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "fengine", "language": "python", "name": "fengine" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }