{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# EqualFrequencyDiscretiser\n", "\n", "The EqualFrequencyDiscretiser() divides continuous numerical variables\n", "into contiguous equal frequency intervals, that is, intervals that contain\n", "approximately the same proportion of observations.\n", "\n", "The interval limits are determined by the quantiles. The number of intervals,\n", "i.e., the number of quantiles in which the variable should be divided is\n", "determined by the user.\n", "\n", "**Note**\n", "\n", "For this demonstration, we use the Ames House Prices dataset produced by Professor Dean De Cock:\n", "\n", "Dean De Cock (2011) Ames, Iowa: Alternative to the Boston Housing\n", "Data as an End of Semester Regression Project, Journal of Statistics Education, Vol.19, No. 3\n", "\n", "http://jse.amstat.org/v19n3/decock.pdf\n", "\n", "https://www.tandfonline.com/doi/abs/10.1080/10691898.2011.11889627\n", "\n", "The version of the dataset used in this notebook can be obtained from [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from feature_engine.discretisation import EqualFrequencyDiscretiser\n", "\n", "plt.rcParams[\"figure.figsize\"] = [15,5]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
\n", "

5 rows × 81 columns

\n", "
" ], "text/plain": [ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", "0 1 60 RL 65.0 8450 Pave NaN Reg \n", "1 2 20 RL 80.0 9600 Pave NaN Reg \n", "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", "\n", " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold \\\n", "0 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", "1 Lvl AllPub ... 0 NaN NaN NaN 0 5 \n", "2 Lvl AllPub ... 0 NaN NaN NaN 0 9 \n", "3 Lvl AllPub ... 0 NaN NaN NaN 0 2 \n", "4 Lvl AllPub ... 0 NaN NaN NaN 0 12 \n", "\n", " YrSold SaleType SaleCondition SalePrice \n", "0 2008 WD Normal 208500 \n", "1 2007 WD Normal 181500 \n", "2 2008 WD Normal 223500 \n", "3 2006 WD Abnorml 140000 \n", "4 2008 WD Normal 250000 \n", "\n", "[5 rows x 81 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv('housing.csv')\n", "\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X_train : (1022, 79)\n", "X_test : (438, 79)\n" ] } ], "source": [ "# let's separate into training and testing set\n", "X = data.drop([\"Id\", \"SalePrice\"], axis=1)\n", "y = data.SalePrice\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.3, random_state=0)\n", "\n", "print(\"X_train :\", X_train.shape)\n", "print(\"X_test :\", X_test.shape)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAE/CAYAAADCCbvWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAepElEQVR4nO3df7Cld10f8PfHBBIkKIkh65KkbGhTK5gS6E6M0qG3BE0gjomOOLEBNpbOtiMi1nRgI46/pumktjrQUbQpoEv5EcMPJzsEq2nwjmPFRH6EH0mICbCQJUsWUIRlbOzCp3+cZ/WyuXfvPXfvj+fe+3rNnDnnfM/znPM9nz33Pve93+/zPdXdAQAAYH1903p3AAAAAOEMAABgFIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEM1hEVR2uqqeudz8AANjchDM2paq6qqruqKqvVtWh4fZPVFUtsP3vVNV/nO+x7j6tuz85xWtfU1VdVT+63P4DwEZSVfur6nlTbD9TVQcWeMxxlC1LOGPTqaprk7w2yX9J8u1JtiX5d0meneSx82x/0gp3YVeSvxyuj9fPk1f4dQFgM3AcZcsSzthUqupbk/xykp/o7nd091d64kPdfXV3PzKMkv1mVb2nqr6a5F8u8pxdVf+oqi6uqs/NDXNV9UNV9ZE595+S5F8k2Z3k0qraNuexmao6UFWvqqrPJfntqvqmqtpTVZ+oqi9W1c1Vdcacfd4+vOZfV9UfV9XTV6xYALCKquqUqnpNVT00XF4ztD0+ye8nefJw6sDhqnrysI/jKFuacMZm8z1JTklyyyLb/ask1yd5QpI/WcoTd/efJflqkuce8zxvnXP/JUne393vTHJvkquPeZpvT3JGkqdkcuD5qSRXZnIgenKSv0ryG3O2//0k5yc5K8kHk7xlKX0FgBF4dZKLk1yY5BlJLkryc9391STPT/LQcOrAad390LCP4yhbmnDGZnNmki9095GjDVX1p1X1par6m6p6ztB8S3f/n+7+enf/3yme/21Jfmx43ickecHQdtRL8vdh7a159JSMryf5he5+pLv/Jsm/TfLq7j7Q3Y8k+cUkP3J0qkZ3v3EY/Tv62DOG0UEAGLurk/xydx/q7s8n+aUkL15kH8dRtjThjM3mi0nOnDsPvbu/t7ufODx29DP/4DKf/61JfriqTknyw0k+2N2fTpKqenaS85LcNGfbC6rqwjn7f/6YMPiUJL83hMcvZfK/hF9Lsq2qTqqqG4apGl9Osn/Y58xl9h0A1tKTk3x6zv1PD23zchwF4YzN531JHklyxSLb9XKevLvvyeTg8vw8ekrjriSV5K5hLvwdQ/tLjvO6DyZ5fnc/cc7l1O7+7PD8VyR5XpJvTbJj2GfeFScBYGQeyiQ8HfUPhrZk/uOw4yhbnnDGptLdX8pk2sTrqupHquq04WThC5M8fpHdT6qqU+dcHrWy4+Ctmcxxf06StydJVZ2a5Eczmf9+4ZzLy5NcfZwVpX4ryfXDCdCpqidV1dFg+YRMguYXk3xzkv+02PsHgHX0mLnH0Uym/f/ccGw7M8nPJ3nzsO3DSb7t6BRDx1GYEM7YdLr7V5L8TJJXJjmUyQHgvyd5VZI/Pc6ue5L8zZzLexfY7m1JZpK8t7u/MLRdOezzpu7+3NFLkjckOSnJZQs812uT7Evyh1X1lSR/luS7h8felMko3WeT3DM8BgBj9Z5843H01CTvT/KRJB/NZEGO/5gk3f3xTI6nnxymI/5wHEch1b2s2V0AAACsICNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACOw0HdGrIozzzyzd+zYsZYvOVpf/epX8/jHL/a1WxylXtNTs+mo1zf6wAc+8IXuftJ694OlO5FjrM//9NRsemq2POo2vbHX7HjH2DUNZzt27Mj73//+tXzJ0Zqdnc3MzMx6d2PDUK/pqdl01OsbVdWn17sPTOdEjrE+/9NTs+mp2fKo2/TGXrPjHWNNawQAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBE4eb07wMa1Y8+t87bvv+HyNe4JAKtpod/3id/5ACvJyBkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAILCmcVdW/r6q7q+pjVfW2qjq1qs6oqtuq6v7h+vTV7iwAAMBmtWg4q6qzk/xUkp3d/V1JTkpyVZI9SW7v7vOT3D7cBwAAYBmWOq3x5CSPq6qTk3xzkoeSXJFk7/D43iRXrnz3AAAAtoZFw1l3fzbJf03ymSQHk/x1d/9hkm3dfXDY5mCSs1azowAAAJvZyYttMJxLdkWS85J8Kcnbq+pFS32BqtqdZHeSbNu2LbOzs8vr6SZz+PDhDV+Lay84Mm/7aryvzVCvtaZm01EvAGC9LRrOkjwvyae6+/NJUlXvSvK9SR6uqu3dfbCqtic5NN/O3X1jkhuTZOfOnT0zM7MiHd/oZmdns9Frcc2eW+dt33/1zIq/1mao11pTs+moFwCw3pZyztlnklxcVd9cVZXkkiT3JtmXZNewza4kt6xOFwEAADa/RUfOuvuOqnpHkg8mOZLkQ5mMhJ2W5OaqemkmAe6Fq9lRAACAzWwp0xrT3b+Q5BeOaX4kk1E0AAAATtBSl9IHAABgFQlnAAAAIyCcAQAAjIBwBgBroKpOqqoPVdW7h/tnVNVtVXX/cH36nG2vq6oHquq+qrp0/XoNwFoSzgBgbbwik6+iOWpPktu7+/wktw/3U1VPS3JVkqcnuSzJ66rqpDXuKwDrQDgDgFVWVeckuTzJ6+c0X5Fk73B7b5Ir57Tf1N2PdPenkjyQ5KK16isA60c4A4DV95okr0zy9Tlt27r7YJIM12cN7WcneXDOdgeGNgA2uSV9zxkAsDxV9QNJDnX3B6pqZim7zNPWCzz37iS7k2Tbtm2ZnZ1dVh8PHz583H2vveDIgo8t9zU3usVqxqOp2fKo2/Q2cs2EMwBYXc9O8oNV9YIkpyb5lqp6c5KHq2p7dx+squ1JDg3bH0hy7pz9z0ny0HxP3N03JrkxSXbu3NkzMzPL6uDs7GyOt+81e25d8LH9Vy/vNTe6xWrGo6nZ8qjb9DZyzUxrBIBV1N3Xdfc53b0jk4U+3tvdL0qyL8muYbNdSW4Zbu9LclVVnVJV5yU5P8mda9xtANaBkTOSJDsW+F/R/TdcvsY9Adgybkhyc1W9NMlnkrwwSbr77qq6Ock9SY4keVl3f239ugnAWhHOAGCNdPdsktnh9heTXLLAdtcnuX7NOgbAKJjWCAAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIzAyevdAcZtx55b17sLAACwJRg5AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgBqzWO2HJWStx/w+Wr0BMAAGC1GTkDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAGrNa6z5azICAAAbD5GzgAAAEbAyBkAsGwLzQDxvZsA0xPONhnTJAEAYGMyrREAAGAEhDMAAIARMK1xBR1vSqG59wAAwPEYOQMAABgB4QwAAGAEhDMAAIAREM4AAABGYEnhrKqeWFXvqKqPV9W9VfU9VXVGVd1WVfcP16evdmcBAAA2q6WOnL02yf/q7n+S5BlJ7k2yJ8nt3X1+ktuH+wAAACzDouGsqr4lyXOSvCFJuvtvu/tLSa5IsnfYbG+SK1erkwAAAJvdUkbOnprk80l+u6o+VFWvr6rHJ9nW3QeTZLg+axX7CQAAsKkt5UuoT07yrCQv7+47quq1mWIKY1XtTrI7SbZt25bZ2dnl9HNDuPaCIws+duz7Pnz4cGZnZ4+7z0a1Gv/GR+vF0qnZdNQLAFhvSwlnB5Ic6O47hvvvyCScPVxV27v7YFVtT3Jovp27+8YkNybJzp07e2Zm5sR7PVLX7Ll1wcf2Xz3zDfdnZ2czMzNz3H02qmPf60o4Wi+WTs2mo14AwHpbdFpjd38uyYNV9R1D0yVJ7kmyL8muoW1XkltWpYcAAABbwFJGzpLk5UneUlWPTfLJJD+eSbC7uapemuQzSV64Ol0EAADY/JYUzrr7riQ753nokpXtDgAAwNa01O85AwCWqapOrao7q+rDVXV3Vf3S0H5GVd1WVfcP16fP2ee6qnqgqu6rqkvXr/cArBXhDABW3yNJntvdz0hyYZLLquriTBbYur27z09y+3A/VfW0JFcleXqSy5K8rqpOWpeeA7BmhDMAWGU9cXi4+5jh0kmuSLJ3aN+b5Mrh9hVJburuR7r7U0keSHLRGnYZgHUgnAHAGqiqk6rqrky+eua24StqtnX3wSQZrs8aNj87yYNzdj8wtAGwiS11tUYA4AR099eSXFhVT0zye1X1XcfZvOZ7ikdtVLU7ye4k2bZt27K/SH2xL2G/9oIjUz/nZv9Sd19cPz01Wx51m95GrplwBgBrqLu/VFWzmZxL9nBVbe/ug1W1PZNRtWQyUnbunN3OSfLQPM91Y5Ibk2Tnzp293C9SX+xL2K/Zc+vUz7n/6uX1ZaPwxfXTU7PlUbfpbeSamdYIAKusqp40jJilqh6X5HlJPp5kX5Jdw2a7ktwy3N6X5KqqOqWqzktyfpI717bXAKw1I2cAsPq2J9k7rLj4TUlu7u53V9X7ktxcVS9N8pkkL0yS7r67qm5Ock+SI0leNkyLBGATE84AYJV190eSPHOe9i8muWSBfa5Pcv0qdw2AETGtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGwPecseJ27Ll1wcf233D5GvYEAAA2DiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAI+BLqNXLsFzNfe8GRXHOcL2sGAAC2FiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAievdwfgqB17bp23ff8Nl69xTwAAYO0ZOQMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAElhzOquqkqvpQVb17uH9GVd1WVfcP16evXjcBAAA2t2lGzl6R5N459/ckub27z09y+3AfAACAZVhSOKuqc5JcnuT1c5qvSLJ3uL03yZUr2zUAAICt4+QlbveaJK9M8oQ5bdu6+2CSdPfBqjprvh2raneS3Umybdu2zM7OLr+3I3ftBUeWvO22x023/WZxvH//heoxOzubw4cPb+rPzmpQs+moFwCw3hYNZ1X1A0kOdfcHqmpm2hfo7huT3JgkO3fu7JmZqZ9iw7hmz61L3vbaC47kVz+61Gy8eey/embBxxaq3/6rZzI7O5vN/NlZDWo2HfUCANbbUqY1PjvJD1bV/iQ3JXluVb05ycNVtT1JhutDq9ZLANjAqurcqvqjqrq3qu6uqlcM7QsurlVV11XVA1V1X1Vdun69B2CtLBrOuvu67j6nu3ckuSrJe7v7RUn2Jdk1bLYryS2r1ksA2NiOJLm2u78zycVJXlZVT8sCi2sNj12V5OlJLkvyuqo6aV16DsCaOZHvObshyfdV1f1Jvm+4DwAco7sPdvcHh9tfyWT147Oz8OJaVyS5qbsf6e5PJXkgyUVr22sA1tpUJz1192yS2eH2F5NcsvJdAoDNq6p2JHlmkjuy8OJaZyf5szm7HRjaANjEtt6KFACwTqrqtCTvTPLT3f3lqlpw03naep7nW5EVkRdbrXQ5qwtv9tVPrfA6PTVbHnWb3kaumXAGAGugqh6TSTB7S3e/a2h+uKq2D6NmcxfXOpDk3Dm7n5PkoWOfc6VWRF5stdJpViM+6nir824GVnidnpotj7pNbyPX7ETOOQMAlqAmQ2RvSHJvd//anIcWWlxrX5KrquqUqjovyflJ7lyr/gKwPoycsaZ2LON/XwE2gWcneXGSj1bVXUPbz2aymNbNVfXSJJ9J8sIk6e67q+rmJPdkstLjy7r7a2vfbQDWknAGAKusu/8k859HliywuFZ3X5/k+lXrFACjY1ojAADACBg5WwZT8wAAgJVm5AwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABiBk9e7A7CYHXtuzbUXHMk1e2591GP7b7h8HXoEAAArz8gZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAhYrZENbcc8KzgmVnEEAGDjMXIGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693BwCAcdix59b17gLAlmbkDAAAYASEMwAAgBEQzgAAAEZg0XBWVedW1R9V1b1VdXdVvWJoP6Oqbquq+4fr01e/uwAAAJvTUkbOjiS5tru/M8nFSV5WVU9LsifJ7d19fpLbh/sAAAAsw6LhrLsPdvcHh9tfSXJvkrOTXJFk77DZ3iRXrlYnAQAANrupzjmrqh1JnpnkjiTbuvtgMglwSc5a6c4BwGZQVW+sqkNV9bE5bQueHlBV11XVA1V1X1Vduj69BmCtLfl7zqrqtCTvTPLT3f3lqlrqfruT7E6Sbdu2ZXZ2dhndHJdrLzhyws+x7XEr8zxbxbT12gyfsxN1+PBhdZiCerHKfifJryd505y2o6cH3FBVe4b7rxpOHbgqydOTPDnJ/66qf9zdX1vjPgOwxpYUzqrqMZkEs7d097uG5oerant3H6yq7UkOzbdvd9+Y5MYk2blzZ8/MzJx4r9fZNSvwJZ3XXnAkv/pR3wG+VNPWa//VM6vXmQ1idnY2m+Hnba2oF6upu/94mH0y1xVJZobbe5PMJnnV0H5Tdz+S5FNV9UCSi5K8by36CsD6WcpqjZXkDUnu7e5fm/PQviS7htu7ktyy8t0DgE1rodMDzk7y4JztDgxtAGxySxmKeHaSFyf5aFXdNbT9bJIbktxcVS9N8pkkL1ydLsLK2nGckc/9N1y+hj0BmNd85w30vBuu0KkDR6f1ruR0+80+TdhU6Omp2fKo2/Q2cs0WDWfd/SeZ/0CRJJesbHcAYMtY6PSAA0nOnbPdOUkemu8JVurUgaPTeldi2v5Rm316uanQ01Oz5VG36W3kmk21WiMAsGIWOj1gX5KrquqUqjovyflJ7lyH/gGwxqxIAQCrrKrelsniH2dW1YEkv5AFTg/o7rur6uYk9yQ5kuRlVmoE2BqEMwBYZd39Yws8NO/pAd19fZLrV69HAIyRaY0AAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMAK+5wzm2LHn1nnb999w+Rr3BACArcbIGQAAwAgIZwAAACNgWuMCFprexsbg3w8AgI1GOAMAVtzx/pPMebwA8zOtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAETh5vTsAG8GOPbcua7/9N1y+wj0BAGCzMnIGAAAwAsIZAADACJjWCKtooemQpjsCAHAsI2cAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693B9bTjj23rncX2KKO99nbf8Pla9gTAADGwsgZAADACAhnAAAAI7ClpzXCGC1nuq2pkAAAG5+RMwAAgBEQzgAAAEbAtEbgURaaWmn6JLASrFgLMD8jZwAAACMgnAEAAIzAppnWaIoETGelv4R9OT+Dfm6BY5lWDWxlRs4AAABG4ITCWVVdVlX3VdUDVbVnpToFAFudYyzA1rPsaY1VdVKS30jyfUkOJPnzqtrX3fesVOeOtdLTsGCzmO9n49oLjuSakfzMbKWf3bWcqmn61+a1HsdYANbfiZxzdlGSB7r7k0lSVTcluSKJAwcAnBjH2Ck4fxXYLE4knJ2d5ME59w8k+e4T6w4AEMfYR1nuCPxKjtwfL+gt53WWExwF0b9n9sDy+Rwtbr0+X9Xdy9ux6oVJLu3ufzPcf3GSi7r75cdstzvJ7uHudyS5b/nd3VTOTPKF9e7EBqJe01Oz6ajXN3pKdz9pvTuxVa3DMdbnf3pqNj01Wx51m97Ya7bgMfZERs4OJDl3zv1zkjx07EbdfWOSG0/gdTalqnp/d+9c735sFOo1PTWbjnoxMmt6jPX5n56aTU/NlkfdpreRa3YiqzX+eZLzq+q8qnpskquS7FuZbgHAluYYC7AFLXvkrLuPVNVPJvmDJCcleWN3371iPQOALcoxFmBrOpFpjenu9yR5zwr1Zasx1XM66jU9NZuOejEqa3yM9fmfnppNT82WR92mt2FrtuwFQQAAAFg5J3LOGQAAACtEOFtBVfXGqjpUVR+b03ZGVd1WVfcP16fPeey6qnqgqu6rqkvntP+zqvro8Nh/q6pa6/ey2qrq3Kr6o6q6t6rurqpXDO3qtYCqOrWq7qyqDw81+6WhXc2Oo6pOqqoPVdW7h/vqBYOqumz4vD9QVXvWuz/roar2Dz/fd1XV+4e2Ffs9UVWnVNXvDu13VNWOtX6PJ2q1/745Xo2qatfwGvdX1a61eccrY4G6/WJVfXb4vN1VVS+Y89iWrlutwd+GG6Jm3e2yQpckz0nyrCQfm9P2K0n2DLf3JPnPw+2nJflwklOSnJfkE0lOGh67M8n3JKkkv5/k+ev93lahVtuTPGu4/YQkfzHURL0WrlklOW24/ZgkdyS5WM0WrdvPJHlrkncP99XLxaU7mSw08okkT03y2OHz/7T17tc61GF/kjOPaVux3xNJfiLJbw23r0ryu+v9npdRo1X9+2ahGiU5I8knh+vTh9unr3c9TrBuv5jkP8yz7ZavW9bgb8ONUDMjZyuou/84yV8e03xFkr3D7b1JrpzTflN3P9Ldn0ryQJKLqmp7km/p7vf15NPypjn7bBrdfbC7Pzjc/kqSe5OcHfVaUE8cHu4+Zrh01GxBVXVOksuTvH5Os3rBxEVJHujuT3b33ya5KZOfA1b298Tc53pHkks22uj7Gvx9s1CNLk1yW3f/ZXf/VZLbkly28u9wdSxQt4Vs+bqt0d+Go6+ZcLb6tnX3wWTyoUty1tB+dpIH52x3YGg7e7h9bPumNQwpPzOTkSD1Oo6aTNG7K8mhTH6JqNnxvSbJK5N8fU6besHEQp/5raaT/GFVfaCqdg9tK/l74u/26e4jSf46ybetwvtYa2tRo836Gf3JqvrIMO3x6BQ9dZtjFf82HH3NhLP1M9//mvVx2jelqjotyTuT/HR3f/l4m87TtuXq1d1f6+4Lk5yTyf8OfddxNt/SNauqH0hyqLs/sNRd5mnbMvViS/LZnnh2dz8ryfOTvKyqnnOcbZfze2Kr1Xkla7QZa/ebSf5hkguTHEzyq0O7ug1W+W/D0ddMOFt9Dw/DqxmuDw3tB5KcO2e7c5I8NLSfM0/7plNVj8nkh+8t3f2uoVm9lqC7v5RkNpMhdzWb37OT/GBV7c9kutZzq+rNUS84aqHP/JbS3Q8N14eS/F4m0z1X8vfE3+1TVScn+dYsfarbmK1FjTbdZ7S7Hx7+o/XrSf5HJp+3RN2SrMnfhqOvmXC2+vYlObriy64kt8xpv2pYNea8JOcnuXMYrv1KVV08zIF9yZx9No3hvb0hyb3d/WtzHlKvBVTVk6rqicPtxyV5XpKPR83m1d3Xdfc53b0jk5N+39vdL4p6wVF/nuT8qjqvqh6byc/JvnXu05qqqsdX1ROO3k7y/Uk+lpX9PTH3uX4kk99FG2YU4zjWokZ/kOT7q+r0Yfrf9w9tG9bRkDH4oUw+b4m6rdXfhuOv2TSrh7gsusrM2zIZov5/mSTwl2Yyj/X2JPcP12fM2f7Vmawsc1/mrP6WZGcmP6yfSPLrGb4sfDNdkvzzTIaLP5LkruHyAvU6bs3+aZIPDTX7WJKfH9rVbPHazeTvV2tULxeX4TL83v2L4bP96vXuzzq8/6dmstrbh5PcfbQGK/l7IsmpSd6eyWIFdyZ56nq/72XUaVX/vjlejZL866H9gSQ/vt61WIG6/c8kHx2O5fuSbFe3v+vzqv9tuBFqdrSjAAAArCPTGgEAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAEfj/zbRLNhWNFzsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# we will use two continuous variables for transformation\n", "\n", "X_train[[\"LotArea\", 'GrLivArea']].hist(bins=50)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The EqualFrequencyDiscretiser() works only with numerical variables.\n", "A list of variables can be passed as argument. Alternatively, the discretiser\n", "will automatically select and transform all numerical variables.\n", "\n", "The EqualFrequencyDiscretiser() first finds the boundaries for the intervals or\n", "quantiles for each variable, fit.\n", "\n", "Then it transforms the variables, that is, it sorts the values into the intervals,\n", "transform.\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EqualFrequencyDiscretiser(q=10, return_boundaries=False, return_object=False,\n", " variables=['LotArea', 'GrLivArea'])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''\n", "Parameters\n", "----------\n", "\n", "q : int, default=10\n", " Desired number of equal frequency intervals / bins. In other words the\n", " number of quantiles in which the variables should be divided.\n", "\n", "variables : list\n", " The list of numerical variables that will be discretised. If None, the\n", " EqualFrequencyDiscretiser() will select all numerical variables.\n", "\n", "return_object : bool, default=False\n", " Whether the numbers in the discrete variable should be returned as\n", " numeric or as object. The decision is made by the user based on\n", " whether they would like to proceed the engineering of the variable as\n", " if it was numerical or categorical.\n", "\n", "return_boundaries: bool, default=False\n", " whether the output should be the interval boundaries. If True, it returns\n", " the interval boundaries. If False, it returns integers.\n", "'''\n", "\n", "efd = EqualFrequencyDiscretiser(q=10, variables=['LotArea', 'GrLivArea'])\n", "\n", "efd.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'LotArea': [-inf,\n", " 5007.1,\n", " 7164.6,\n", " 8165.700000000001,\n", " 8882.0,\n", " 9536.0,\n", " 10200.0,\n", " 11046.300000000001,\n", " 12166.400000000001,\n", " 14373.9,\n", " inf],\n", " 'GrLivArea': [-inf,\n", " 912.0,\n", " 1069.6000000000001,\n", " 1211.3000000000002,\n", " 1344.0,\n", " 1479.0,\n", " 1603.2000000000003,\n", " 1716.0,\n", " 1893.0000000000005,\n", " 2166.3999999999996,\n", " inf]}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# binner_dict contains the boundaries of the different bins\n", "efd.binner_dict_" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "train_t = efd.transform(X_train)\n", "test_t = efd.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([8, 3, 0, 4, 1, 2, 6, 9, 7, 5], dtype=int64)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# the numbers are the different bins into which the observations\n", "# were sorted\n", "train_t['GrLivArea'].unique()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([4, 0, 2, 3, 1, 7, 5, 8, 6, 9], dtype=int64)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# the numbers are the different bins into which the observations\n", "# were sorted\n", "train_t['LotArea'].unique()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LotAreaGrLivAreaLotArea_binnedGrLivArea_binned
649375203448
6822887129103
960720785820
13849060125843
1100840043830
\n", "
" ], "text/plain": [ " LotArea GrLivArea LotArea_binned GrLivArea_binned\n", "64 9375 2034 4 8\n", "682 2887 1291 0 3\n", "960 7207 858 2 0\n", "1384 9060 1258 4 3\n", "1100 8400 438 3 0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here I put side by side the original variable and the transformed variable\n", "tmp = pd.concat([X_train[[\"LotArea\", 'GrLivArea']], train_t[[\"LotArea\", 'GrLivArea']]], axis=1)\n", "tmp.columns = [\"LotArea\", 'GrLivArea',\"LotArea_binned\", 'GrLivArea_binned']\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAFKCAYAAACkdEbCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de5hkZXXv8e8PBkEuoggi9/GCGIhKcIIXjBIVb6ioEQMqQUQxOR4xiRrwEtEYIzkx5phjopKoYBQQiQpGjoLo4NEIMiCKiAoiAoIwinJVEFznj71bira7p3q6q2vXnu/nefqpXfv2rqrq6TVr7/d9K1WFJEmSJKlf1ht3AJIkSZKkxWexJ0mSJEk9ZLEnSZIkST1ksSdJkiRJPWSxJ0mSJEk9ZLEnSZIkST1ksacll+TYJH87praT5ENJfpbka/M8dnmSSrJsVPGNQpIXJTl93HEspiQ3J3nguONYkyR7J7lq3HFImhzmyKVljhwfc+TSsNgTSS5Pcm2STQbWvSzJyjGGNSqPA/YBtq+qPccdzGKbKdlW1Uer6injjGuxVdWmVXXZMPu278eDRx2TpH4yR/aHOfK3mSP7z2JPU5YBrx53EPOVZP15HrITcHlV3TKKeBaqvarqv8vWuK8Qj7t9SZ1hjuwAc+TdjTtHjbt9Dcd/MJryD8Brk9x7+oaZroQlWZnkZe3yS5J8Jck/Jfl5ksuSPLZdf2WS65IcPO20WyY5I8lNSc5KstPAuR/abrs+yXeTvGBg27FJ3pvktCS3AH84Q7zbJjm1Pf7SJC9v1x8K/DvwmLaLw1tnOHa9JG9K8sM27g8n2Xzabi9NcnWSa5K8ZuDYPZOsSnJjexX4XQPbHp3kv9v35xtJ9p72Xr49yVeAW4E3JFk1La6/SHJqu7xvkq+37VyZ5C0Du36pffx5+xof034OXx4412OTnJvkhvbxsdNieVv7ed6U5PQkW7bbNkrykSQ/bV/HuUm2nv4etvtenuT1Sb6dpjvQh5JsNLD9mUkuaM/z30kePu3YI5J8E7hlpmQyeCWy/Z34lySfaWM+J8mD2m1T78c32vfjj9ei/TclOXla++9O8s/t8iFJLm7bvizJK2Z6TyRNNHMk5siYI82Rk6iq/FnHf4DLgScDnwD+tl33MmBlu7wcKGDZwDErgZe1yy8B7gAOAdYH/ha4AvgXYEPgKcBNwKbt/se2zx/fbn838OV22ybAle25lgF7AD8Bdhs49gZgL5qLFRvN8HrOAv4V2AjYHVgNPGkg1i/P8V68FLgUeCCwafue/Me09+GENs6Hted+crv9q8BB7fKmwKPb5e2AnwLPaGPep32+1cB7eQWwW/uaN2/fn50H4joXOKBd3rttez3g4cC1wHPm+Kx+85qBLYCfAQe1bR3YPr/vQCzfBx4C3LN9fnS77RXAp4GN28/5kcC95vid+hawQ9vmV7jrd2sP4DrgUe15Dm7333Dg2AvaY+85y/kLePDA78T1wJ7ta/oocOJM+65N+zRXum+deq3tMdcMfL77Ag8CAjyh3XePgc/qqnH/G/fHH3/W/gdz5OCx5khz5N3axxzZ+R/v7GnQm4FXJdlqLY79QVV9qKruBD5G80fgb6rqtqo6HbgdGOwT/pmq+lJV3Qa8keZK4g7AM2m6kHyoqu6oqvOB/wSeP3DsKVX1lar6dVX9cjCI9hyPA46oql9W1QU0VyoPGvJ1vAh4V1VdVlU3A68HDph25eytVXVLVV0IfIgmGQD8Cnhwki2r6uaqOrtd/2LgtKo6rY35DGAVTWKbcmxVXdS+5huAU6bOm2Rn4KHAqQBVtbKqLmzP9U2axPqEIV/fvsAlVfUfbVsnAN8BnjWwz4eq6ntV9QvgJJr/DEy9vvvSJIU7q+q8qrpxjrbeU1VXVtX1wNsH3qeXA++vqnPa8xwH3AY8euDYf26P/cWQr+sTVfW1qrqDJpHtPse+82q/qn4InA88p932RODWqc+3qj5TVd+vxlnA6cAfDBm3pMlhjjRHgjnybu2bI7vPYk+/UVXfAv4LOHItDr92YPkX7fmmr9t04PmVA+3eTHPVaVuaK0SParsO/DzJz2mSy/1nOnYG2wLXV9VNA+t+SHPlcBjbtvsPHrsMGOyKceW07du2y4fSXO37Ttt945nt+p2A/ae9pscB28zxmo7nrj/8LwQ+VVW3AiR5VJIvJlmd5AbgT4Et1/L1Tb2GwffnxwPLt3LX5/YfwOeAE9suOv8ryQZztDXb+7QT8Jpp78cOA9unHzuM2WKeydq0P/3zOH5qQ5KnJzk7TZeon9P8B2XYz0PShDBH/uZ4c+RdzJENc2SHWexpuqNoruoM/mGbGqi98cC6wcSyNnaYWkiyKU03hqtp/oCcVVX3HvjZtKr+bODYmuO8VwNbJNlsYN2OwI+GjOtqmj90g8fewd0T9Q7Ttl8NUFWXVNWBwP2AvwdOTjN725U03VwGX9MmVXX0HK/pdJoxG7vT/AE9fmDb8TRXMHeoqs2B99F0j5jpPGt6fVOvYY3vT1X9qqreWlW7Ao+lucL8J3McMuP7RPN+vH3a+7FxewX1N82tKZ4FWJv2Pw7snWR74Lm0n0eSDWmuqr8T2Lqq7g2cxl2fh6R+MUeaI2dkjjRHdpXFnu6mqi6l6WJy+MC61TR/6F6cZP0kL6Xpf70Qz0jyuCT3AN4GnFNVV9JcNX1IkoOSbND+/H6S3xky/iuB/wbekWaw9MNpriZ+dMi4TgD+IskD2gT7d8DH2q4PU/46ycZJdqMZN/ExgCQvTrJVVf0a+Hm7753AR4BnJXlq+/5tlOa7Zbaf43XcAZxMMynAFsAZA5s3o7ky+8ske9JcRZuyGvg1zXiKmZxG8/6+MMmydjD2rjTv+5yS/GGSh6WZ3e1Gmi4rd85xyCuTbJ9kC+ANtO8T8G/An7ZXX5NkkzQD6jeb/VQLci13fz/m3X77b2AlTZekH1TVxe2me9CMqVkN3JHk6TTjbyT1kDnSHDkbc6Q5sqss9jSTv6EZXD3o5cDraAZN70aTLBbieJorpNfTDGJ+EUDbteQpwAE0V7l+THMFcMN5nPtAmkHYVwOfBI5qxwAM44M0XTG+BPwA+CXwqmn7nEUzQP1M4J3VjLcAeBpwUZKbaQbUH1DNmIgrgf1o/pivprlq9jrW/O/veJpJAT4+LZH+D+BvktxEM4bkpKkNbTeWtwNfabtfDPaxp6p+SnO18TU0n+VfAc+sqp+sIRZorlSfTJPELm7fh4+sIf7Tgcvan79tY1hF8/v0HpqB75fSDJAflbcAx7XvxwsW0P7U5/GbK8jt7+vhNJ/Bz2j+U3HqYgYvqXPMkebImZgjzZGdlKpR3gmWtC5KcjnNTHSfH3cskiR1iTlSS8k7e5IkSZLUQxZ7kiRJktRDduOUJEmSpB7yzp4kSZIk9ZDFniRJkiT10LJxB7AQW265ZS1fvnzcYUiSlsB55533k6raatxxTApzpCStG+bKjxNd7C1fvpxVq1aNOwxJ0hJI8sNxxzBJzJGStG6YKz/ajVOSJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6aNm4A1gKy4/8zKKd6/Kj9120c0mSJGnp+X9DrSu8sydJkiRJPbRO3NlTP3gVTpIWl39XJWnxdelvq3f2JEmSJKmHLPYkSZIkqYfsxilJkjqlS12gBnU1LuhubF2NS/PT1c+xq3F1iXf2JElaYkk+mOS6JN8aWLdFkjOSXNI+3mdg2+uTXJrku0meOp6oJUmTxjt7kpaUV+EkAI4F3gN8eGDdkcCZVXV0kiPb50ck2RU4ANgN2Bb4fJKHVNWdSxyzJGnCeGdPkqQlVlVfAq6ftno/4Lh2+TjgOQPrT6yq26rqB8ClwJ5LEqgkaaJ5Z0+SNG/eoR2JravqGoCquibJ/dr12wFnD+x3VbtOkqQ5WexJktRtmWFdzbhjchhwGMCOO+44ypgkjYAX0rTY7MYpSVI3XJtkG4D28bp2/VXADgP7bQ9cPdMJquqYqlpRVSu22mqrkQYrSeo+7+zpt3hVSeoO/z2uU04FDgaObh9PGVh/fJJ30UzQsjPwtbFEKEmaKCO7s+e00pIkzSzJCcBXgV2SXJXkUJoib58klwD7tM+pqouAk4BvA58FXulMnJKkYYzyzt6xOK201gHeeZE0X1V14CybnjTL/m8H3j66iCRJfTSyO3tOKy1JkiRJ47PUE7TcbVppYHBa6SsH9nNaaUmSJElagK5M0OK00pLGyu64kiSpb5b6zp7TSkuSJEnSEljqO3tOKy0tEe9USZIkrdtGVuy100rvDWyZ5CrgKJoi76R2iukrgP2hmVY6ydS00nfgtNKSJEmStCAjK/acVnrNvPMiSZIkaVSWesyeJEmSJGkJWOxJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJktQhSf4iyUVJvpXkhCQbJdkiyRlJLmkf7zPuOCVJ3WexJ0lSRyTZDjgcWFFVvwusDxwAHAmcWVU7A2e2zyVJmpPFniRJ3bIMuGeSZcDGwNXAfsBx7fbjgOeMKTZJ0gSx2JMkqSOq6kfAO4ErgGuAG6rqdGDrqrqm3eca4H4zHZ/ksCSrkqxavXr1UoUtSeooiz1JkjqiHYu3H/AAYFtgkyQvHvb4qjqmqlZU1YqtttpqVGFKkibEWIo9B59LkjSjJwM/qKrVVfUr4BPAY4Frk2wD0D5eN8YYJUkTYsmLPQefS5I0qyuARyfZOEmAJwEXA6cCB7f7HAycMqb4JEkTZFzdOB18LknSNFV1DnAycD5wIU2ePgY4GtgnySXAPu1zSZLmtGypG6yqHyWZGnz+C+D0qjo9yd0GnyeZcfC5JEl9VlVHAUdNW30bzV0+SZKGNo5unAsafO5MY5IkSZK0ZuPoxrmgwefONCZJkiRJazaOYs/B55IkSZI0YuMYs3dOkqnB53cAX6cZfL4pcFKSQ2kKwv2XOjZJkiRJ6oslL/bAweeSJEmSNGrj+uoFSZIkSdIIWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD1nsSZIkSVIPWexJkiRJUg9Z7EmSJElSD62x2Euyf5LN2uU3JflEkj1GH5okSd1mjpQkddkwd/b+uqpuSvI44KnAccB7RxuWJEkTwRwpSeqsYYq9O9vHfYH3VtUpwD1GF5IkSRPDHClJ6qxhir0fJXk/8ALgtCQbDnmcJEl9Z46UJHXWMAnpBcDngKdV1c+BLYDXjTQqSZImgzlSktRZayz2qupW4Drgce2qO4BLRhmUJEmTwBwpSeqyYWbjPAo4Anh9u2oD4COjDEqSpElgjpQkddkw3TifCzwbuAWgqq4GNhtlUJIkTQhzpCSps4Yp9m6vqgIKIMkmow1JkqSJYY6UJHXWMMXeSe1MY/dO8nLg88C/jTYsSZImgjlSktRZy9a0Q1W9M8k+wI3ALsCbq+qMkUcmSVLHmSMlSV22xmKv7ZLyhao6I8kuwC5JNqiqX40+PEmSusscKUnqsmG6cX4J2DDJdjTdUw4Bjh1lUJIkTQhzpCSps4Yp9tJ+j9DzgP9TVc8Fdh1tWJIkTQRzpCSps4Yq9pI8BngR8Jl23Rq7f0qStA4wR0qSOmuYYu/Pab4s9pNVdVGSBwJfHG1YkiRNBHOkJKmzhpmN8yzgrIHnlwGHjzIoSZImgTlSktRlw8zG+UXaL4sdVFVPHElEkiRNCHOkJKnLhhlX8NqB5Y2APwLuGE04kiRNFHOkJKmzhunGed60VV9JctaMO0uStA4xR0qSumyYbpxbDDxdD3gkcP+RRSRJ0oQwR0qSumyYbpzn0YxHCE3XlB8Ah44yKEmSJoQ5UpLUWcN043zAUgQiSdKkMUdKkrpsmG6cGwB/Bjy+XbUSeH9V/WqEcUmS1HnmSElSlw3TjfO9wAbAv7bPD2rXvWxUQUmSNCHMkZKkzhqm2Pv9qnrEwPMvJPnGqAKSJGmCmCMlSZ213hD73JnkQVNPkjwQuHN0IUmSNDHMkZKkzhrmzt7rgC8muYxmtrGdgENGGpUkSZNh0XNkknsD/w78Ls1Mny8Fvgt8DFgOXA68oKp+tpB2JEn9N8xsnGcm2RnYhSaRfaeqbht5ZJIkddyIcuS7gc9W1fOT3APYGHgDcGZVHZ3kSOBI4IgFtiNJ6rlh7uxB8yWxy9v9H5GEqvrwyKKSJGlyLFqOTHIvmpk9XwJQVbcDtyfZD9i73e04mlk/LfYkSXMa5qsX/gN4EHABd41DKMBiT5K0ThtBjnwgsBr4UJJH0Hxp+6uBravqGoCquibJ/WaJ5zDgMIAdd9xxLUOQJPXFMHf2VgC7VlUtVqOOR5Ak9cRi58hlwB7Aq6rqnCTvpumyOZSqOgY4BmDFihWLlrclSZNpmNk4vwXcf5HbnRqP8FDgEcDFNMnszKraGTiTeSQ3SZLGZLFz5FXAVVV1Tvv8ZJri79ok2wC0j9ctYpuSpJ6a9c5ekk/T3HXbDPh2kq8Bvxl0XlXPXpsGHY8gSZp0o8qRVfXjJFcm2aWqvgs8Cfh2+3MwcHT7eMoCX4IkaR0wVzfOd46oTccjSJIm3ahyJMCrgI+2M3FeRvNVDusBJyU5FLgC2H+E7UuSemLWYq+qzhphm45HkCRNrBHmSKrqApqxgNM9aVRtSpL6aZgxe4vN8QiSJEmSNGJLXuxV1Y+BK5Ps0q6aGo9wKs04BHA8giRJkiQtyKzFXpIz28e/H0G7U+MRvgnsDvwdzaDzfZJcAuzTPpckqXNGnCMlSVoUc03Qsk2SJwDPTnIikMGNVXX+2jbqeARJ0oQbWY6UJGmxzFXsvZlm4pTtgXdN21bAE0cVlCRJHWeOlCR13lyzcZ4MnJzkr6vqbUsYkyRJnWaOlCRNgrnu7AFQVW9L8myaL0IHWFlV/zXasCRJ6j5zpCSpy9Y4G2eSd9B86fm3259Xt+skSVqnmSMlSV22xjt7wL7A7lX1a4AkxwFfB14/ysAkSZoA5khJUmcN+z179x5Y3nwUgUiSNKHMkZKkThrmzt47gK8n+SLN1NKPxyuWkiSBOVKS1GHDTNByQpKVwO/TJLIjqurHow5MkqSuM0dKkrpsmDt7VNU1wKkjjkWSpIljjpQkddWwY/YkSZIkSRPEYk+SJEmSemjOYi/Jekm+tVTBSJI0KcyRkqSum7PYa7836BtJdlyieCRJmgjmSElS1w0zQcs2wEVJvgbcMrWyqp49sqgkSZoM5khJUmcNU+y9deRRSJI0mcyRkqTOGuZ79s5KshOwc1V9PsnGwPqjD02SpG4zR0qSumyNs3EmeTlwMvD+dtV2wKdGGZQkSZPAHClJ6rJhvnrhlcBewI0AVXUJcL9RBiVJ0oQwR0qSOmuYYu+2qrp96kmSZUCNLiRJkiaGOVKS1FnDFHtnJXkDcM8k+wAfBz492rAkSZoI5khJUmcNU+wdCawGLgReAZwGvGmUQUmSNCHMkZKkzhpmNs5fJzkOOIema8p3q8ouKpKkdZ45UpLUZWss9pLsC7wP+D4Q4AFJXlFV/3fUwUmS1GXmSElSlw3zper/CPxhVV0KkORBwGcAE5kkaV1njpQkddYwY/aum0pircuA60YUjyRJk8QcKUnqrFnv7CV5Xrt4UZLTgJNoxiPsD5y7BLFJktRJ5khJ0iSYqxvnswaWrwWe0C6vBu4zsogkSeo+c6QkqfNmLfaq6pClDESSpElhjpQkTYJhZuN8APAqYPng/lX17NGFJUlS95kjJUldNsxsnJ8CPgB8Gvj1aMORJGmimCMlSZ01TLH3y6r655FHIknS5DFHSpI6a5hi791JjgJOB26bWllV548sKkmSJoM5UpLUWcMUew8DDgKeyF1dVKp9LknSuswcKUnqrGGKvecCD6yq20cdjCRJE8YcKUnqrPWG2OcbwL1HHYgkSRPIHClJ6qxh7uxtDXwnybncfTyC00pLktZ15khJUmcNU+wdNfIoJEmaTCPJkUnWB1YBP6qqZybZAvgYzff5XQ68oKp+Noq2JUn9scZir6rOWopAJEmaNCPMka8GLgbu1T4/Ejizqo5OcmT7/IgRtS1J6ok1jtlLclOSG9ufXya5M8mNSxGcJEldNoocmWR7YF/g3wdW7wcc1y4fBzxnIW1IktYNw9zZ22zweZLnAHsutGG7qEiSJt2IcuT/Bv4KGDz31lV1TdvmNUnut8A2JEnrgGFm47ybqvoUi/P9QVNdVKZMdVHZGTizfS5J0sRYaI5M8kzguqo6by2PPyzJqiSrVq9evbZhSJJ6Yo139pI8b+DpesAKmi+MXWsDXVTeDvxlu3o/YO92+ThgJY5HkCR12Ahy5F7As5M8A9gIuFeSjwDXJtmmvau3DXDdTAdX1THAMQArVqxYUK6WJE2+YWbjfNbA8h00XSz3W2C7dlGRJPXBoubIqno98HqAJHsDr62qFyf5B+Bg4Oj28ZS1bUOStO4YZszeIYvZ4GAXlTaRzff4w4DDAHbcccfFDE2SpHlZ7Bw5h6OBk5IcClwB7L9E7UqSJtisxV6SN89xXFXV29ayTbuoSJIm2ghz5OBJVtIMaaCqfgo8aaHnlCStW+aaoOWWGX4ADmUBY+mq6vVVtX1VLQcOAL5QVS8GTqXpmgJ2UZEkddtIcqQkSYtp1jt7VfWPU8tJNqOZPfMQ4ETgH2c7bgHsoiJJmghjyJGSJM3bnGP22u+++0vgRTQzZO6xmN99ZxcVSdKkGnWOlCRpoeYas/cPwPNoxsc9rKpuXrKoJEnqMHOkJGkSzDVm7zXAtsCbgKuT3Nj+3JTkxqUJT5KkTjJHSpI6b64xe3MVgpIkrbPMkZKkSWCykiRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkSZKkHrLYkyRJkqQestiTJEmSpB6y2JMkqSOS7JDki0kuTnJRkle367dIckaSS9rH+4w7VklS91nsSZLUHXcAr6mq3wEeDbwyya7AkcCZVbUzcGb7XJKkOS15sedVS0mSZlZV11TV+e3yTcDFwHbAfsBx7W7HAc8ZT4SSpEkyjjt7XrWUJGkNkiwHfg84B9i6qq6BpiAE7je+yCRJk2LJiz2vWkqSNLckmwL/Cfx5Vd04j+MOS7IqyarVq1ePLkBJ0kQY65i9tblqaSKTJPVZkg1oCr2PVtUn2tXXJtmm3b4NcN1Mx1bVMVW1oqpWbLXVVksTsCSps8ZW7K3tVUsTmSSpr5IE+ABwcVW9a2DTqcDB7fLBwClLHZskafKMpdhbyFVLSZJ6bC/gIOCJSS5of54BHA3sk+QSYJ/2uSRJc1q21A0OcdXyaLxqKUlaB1XVl4HMsvlJSxmLJGnyLXmxx11XLS9MckG77g00Rd5JSQ4FrgD2H0NskiRJktQLS17sedVSkiRJkkZvrLNxSpIkSZJGw2JPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeshiT5IkSZJ6yGJPkiRJknrIYk+SJEmSeqhzxV6SpyX5bpJLkxw57ngkSeoC86Mkab46VewlWR/4F+DpwK7AgUl2HW9UkiSNl/lRkrQ2OlXsAXsCl1bVZVV1O3AisN+YY5IkadzMj5KkeetasbcdcOXA86vadZIkrcvMj5KkeUtVjTuG30iyP/DUqnpZ+/wgYM+qetXAPocBh7VPdwG+u0jNbwn8ZJHOtZiMa366Ghd0Nzbjmh/jmp/FjGunqtpqkc41UYbJj+16c2Q3GNf8GNf8GNf8rAtxzZofly1SA4vlKmCHgefbA1cP7lBVxwDHLHbDSVZV1YrFPu9CGdf8dDUu6G5sxjU/xjU/XY1rAq0xP4I5siuMa36Ma36Ma37W9bi61o3zXGDnJA9Icg/gAODUMcckSdK4mR8lSfPWqTt7VXVHkv8JfA5YH/hgVV005rAkSRor86MkaW10qtgDqKrTgNPG0PSid3tZJMY1P12NC7obm3HNj3HNT1fjmjhjzI/Q3c/RuObHuObHuObHuOZnSeLq1AQtkiRJkqTF0bUxe5IkSZKkRWCxJ0mSJEk91Lkxe0slyUOB/Wi+lLZoprA+taouHmtgHdW+X9Wmd4wAAArESURBVNsB51TVzQPrn1ZVnx1jXHsCVVXnJtkVeBrwnXZsS2ck+XBV/cm44xiU5HHAnsC3qur0McbxKODiqroxyT2BI4E9gG8Df1dVN4wprsOBT1bVlWvceQkNzMR4dVV9PskLgccCFwPHVNWvxhjbg4Dn0nxFwB3AJcAJ4/oMtfbMkfNjjlwYc+SccZgj58EcOUO76+KYvSRHAAcCJ9J8dxE031l0AHBiVR09rtjmkuSQqvrQGNo9HHglzT+U3YFXV9Up7bbzq2qPpY6pbfso4Ok0Fy3OAB4FrASeDHyuqt4+primT4ce4A+BLwBU1bOXPCggydeqas92+eU0n+kngacAnx7X732Si4BHtLMNHgPcCpwMPKld/7wxxXUDcAvwfeAE4ONVtXocsQxK8lGa3/mNgZ8DmwKfoHm/UlUHjymuw4FnAWcBzwAuAH5Gk9j+R1WtHEdcmr9JzJHjyo9t2+bI+cVljpxfXObIeTBHzqCq1rkf4HvABjOsvwdwybjjmyPuK8bU7oXApu3ycmAVTTID+PoY348LaaYg3xi4EbhXu/6ewDfHGNf5wEeAvYEntI/XtMtPGGNcXx9YPhfYql3eBLhwjHFdPPjeTdt2wTjfL5qu7k8BPgCsBj4LHAxsNsa4vtk+LgOuBdZvn2fMv/cXDsSyMbCyXd5xnH8n/Fmrz3LicuS48mPbtjlyfnGZI+cXlzlyfnGZI6f9rKvdOH8NbAv8cNr6bdptY5Pkm7NtArZeylgGrF9tt5SqujzJ3sDJSXZq4xqXO6rqTuDWJN+vqhvbGH+RZJyf4wrg1cAbgddV1QVJflFVZ40xJoD1ktyH5o9zqr0CV1W3JLljjHF9a+Cq/DeSrKiqVUkeAoytuwVN16dfA6cDpyfZgOYq+YHAO4GtxhTXem03lU1oEsbmwPXAhsAGY4ppyjLgzjaWzQCq6or2vdPk6GSO7Gh+BHPkfJkj58ccOT/myBkaXRf9OXBmkkuAqb7GOwIPBv7n2KJqbA08lebW7qAA/7304QDw4yS7V9UFAFV1c5JnAh8EHjammABuT7JxVd0KPHJqZZLNGeN/SNo/fv+U5OPt47V049/a5sB5NL9LleT+VfXjJJsy3v+QvAx4d5I3AT8BvprkSpp/my8bY1x3e0+q6ed/KnBqO25iXD4AfIfmiv0bgY8nuQx4NE23u3H5d+DcJGcDjwf+HiDJVjSJVpOjqzmyi/kRzJHzYo6cN3Pk/Jgjp1knx+wBJFmPZuDtdjS/sFcB57ZXwcYZ1weAD1XVl2fYdnxVvXAMMW1Pc4XwxzNs26uqvrLUMbVtb1hVt82wfktgm6q6cAxh/ZYk+wJ7VdUbxh3LTJJsDGxdVT8YcxybAQ+kSfpXVdW1Y47nIVX1vXHGMJsk2wJU1dVJ7k0zBueKqvramOPaDfgdmgkNvjPOWLQwXcyRXcyPbdvmyAUwRw4dhzlySObIae2uq8WeJEmSJPWZ37MnSZIkST1ksSdJkiRJPWSxJ0mSJEk9ZLEntZJsneT4JJclOS/JV5M8d4b9lif51gzr/ybJk4do5/eSVJKnLlbsc7R18yzrh4p1gW2/JMl7RtmGJGk8Zssvs+z7kqlJMwbWbZXkV0lesfjR/Vb7l7cT00xf/6dJ/mTEbe+d5L9G2YY0F4s9CUgS4FPAl6rqgVX1SOAAYPtp+806PXRVvbmqPj9EcwcCX24fZ4ylnQlvZOYRqyRJC/USmu9uHLQ/cDaz5EKAJOuPMCaq6n1V9eFRtiGNm8We1HgicHtVvW9qRVX9sKr+T3tF8uNJPk3z5aEzSnJskucneXqSkwbW790eO1VUPp8m8T0lyUbt+uVJLk7yr8D5wA5JXpfk3CTfTPLWgfN9qr3zeFGSw9b0wpL8Y5Lzk5zZfp/Lb2Jtly9P8tZ2nwuTPLRd/5YkH0yysr3befjAOV+c5GtJLkjy/qmEnOSQJN9Lchaw1xrfdUlSbyTZPcnZbd76ZJL7tLlmBfDRNmdMfQfbgcBrgO2TbDdwjpvb3ifnAI+ZI9+8N8mqNhe+dXosM3hde56vJXlwe463JHltu7wyyd+327+X5A/a9S9J8okkn01ySZL/NRDrU9peQOe3/0/YtF3/tCTfSfJl4HkLfV+lhbDYkxq70RRZs3kMcHBVPXGIc50BPDrJJu3zPwY+1i7vBfygqr4PrASeMXDcLsCHq+r32uWdab7nanfgkUke3+730vbO4wrg8CT3nSOWTYDzq2oP4CzgqFn2+0m7z3uB1w6sfyjNlxjvCRyVZIMkv9O+pr2qanfgTuBFSbYB3tq+xn2AXeeIS5LUPx8GjqiqhwMXAkdV1cnAKuBFVbV7Vf0iyQ7A/dvvPTuJJqdM2YTme8geBfyUGfJNu98bq2oF8HDgCUkevobYbqyqPYH3AP97ln2Wtfv8OXfPl7u3cTwM+OMkO7TdQt8EPLnNn6uAv2wv4v4b8CzgD4D7ryEuaaQs9qQZJPmXJN9Icm676oyqun6YY6vqDuCzwLPabp/7Aqe0mw8ETmyXT+Tu3Vd+WFVnt8tPaX++TlOEPpSm+IOmwPsGTfeXHQbWz+TX3FVofgR43Cz7faJ9PA9YPrD+M1V1W1X9BLgO2Bp4EvBI4NwkF7TPHwg8ClhZVaur6vaBdiVJPZdkc+DeVXVWu+o44PGz7H4ATZEHv50L7wT+s12eLd8AvCDJ+TR5cjfWfIHxhIHHx8yyz2y58MyquqGqfgl8G9gJeHTb5lfa2A5u1z+U5qLuJdV8mfVH1hCXNFKzjj+S1jEXAX809aSqXtletVvVrrplnuf7GPBK4Hrg3Kq6qe168kfAs5O8EQhw3ySbzdBGgHdU1fsHT5pkb+DJwGOq6tYkK4GN5hFXzbL+tvbxTu7+d+G2geWpbQGOq6rXT4vtOXOcX5KkKQcCWyeZuku3bZKdq+oS4JdVdWe7frZ88wCaXii/X1U/S3Isa86FNcvyoPnmwjOq6m5jDpPsPsf5pSXnnT2p8QVgoyR/NrBu4wWcbyWwB/By7rrD9WTgG1W1Q1Utr6qdaK5ePmeG4z8HvHSg//92Se4HbA78rC30HkpzZXEu69GMEQR4Ic3EMAt1JvD8Nh6SbJFkJ+AcYO8k902yAc3ge0nSOqCqbgB+NjXWDTiIZvgAwE3AZgBJdgE2qart2ly4HHgHzd2+6WbLN/eiuUB6Q5KtgacPEeIfDzx+db6vbwZnA3sNjP/bOMlDgO8AD0jyoHa/WSegkZaCd/YkoKqqvTP1T0n+ClhNk0iOAO45wyG7JLlq4PlfTDvfnWmmWn4JTdcOaP7gf3Laef4T+DPg/007/vR2bNxXkwDcDLyYpnvonyb5JvBdmmQzl1uA3ZKcB9zA3cdFrJWq+naSNwGnp5k19FfAK6vq7CRvoUmi19B0Px3pTGqSpLHZeFoefBdNvntfko2By4BD2m3Htut/QXMxc6ZceCLwtsGVa8g3X6fplXMZ8JUh4t2wnfRlPRahAKuq1UleApyQZMN29Zuq6ntpJk/7TJKf0Fxk/d2FtietrTTdiSVJkiRJfWI3TkmSJEnqIbtxSj3Qdk3ZcNrqg6rqwnHEI0nSUkvySeAB01YfUVWfG0c8UhfYjVOSJEmSeshunJIkSZLUQxZ7kiRJktRDFnuSJEmS1EMWe5IkSZLUQxZ7kiRJktRD/x+hrq6GvM/0uQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# in equal frequency discretisation, we obtain the same amount of observations\n", "# in each one of the bins.\n", "plt.subplot(1,2,1)\n", "tmp.groupby('GrLivArea_binned')['GrLivArea'].count().plot.bar()\n", "plt.ylabel('Number of houses')\n", "plt.title('Number of observations per interval')\n", "\n", "plt.subplot(1,2,2)\n", "tmp.groupby('LotArea_binned')['LotArea'].count().plot.bar()\n", "plt.ylabel('Number of houses')\n", "plt.title('Number of observations per interval')\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Return interval limits instead" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EqualFrequencyDiscretiser(q=10, return_boundaries=True, return_object=False,\n", " variables=['LotArea', 'GrLivArea'])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now, let's return bin boundaries instead\n", "\n", "efd = EqualFrequencyDiscretiser(\n", " q=10, variables=['LotArea', 'GrLivArea'], return_boundaries=True)\n", "\n", "efd.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "train_t = efd.transform(X_train)\n", "test_t = efd.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([Interval(-inf, 912.0, closed='right'),\n", " Interval(912.0, 1069.6, closed='right'),\n", " Interval(1069.6, 1211.3, closed='right'),\n", " Interval(1211.3, 1344.0, closed='right'),\n", " Interval(1344.0, 1479.0, closed='right'),\n", " Interval(1479.0, 1603.2, closed='right'),\n", " Interval(1603.2, 1716.0, closed='right'),\n", " Interval(1716.0, 1893.0, closed='right'),\n", " Interval(1893.0, 2166.4, closed='right'),\n", " Interval(2166.4, inf, closed='right')], dtype=object)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# the numbers are the different bins into which the observations\n", "# were sorted\n", "np.sort(np.ravel(train_t['GrLivArea'].unique()))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([Interval(-inf, 912.0, closed='right'),\n", " Interval(912.0, 1069.6, closed='right'),\n", " Interval(1069.6, 1211.3, closed='right'),\n", " Interval(1211.3, 1344.0, closed='right'),\n", " Interval(1344.0, 1479.0, closed='right'),\n", " Interval(1479.0, 1603.2, closed='right'),\n", " Interval(1603.2, 1716.0, closed='right'),\n", " Interval(1716.0, 1893.0, closed='right'),\n", " Interval(1893.0, 2166.4, closed='right'),\n", " Interval(2166.4, inf, closed='right')], dtype=object)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sort(np.ravel(test_t['GrLivArea'].unique()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "fengine", "language": "python", "name": "fengine" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }