{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Winsorizer\n", "Winzorizer finds maximum and minimum values following a Gaussian or skewed distribution as indicated. It can also cap the right, left or both ends of the distribution.\n", "\n", "The Winsorizer() caps maximum and / or minimum values of a variable.\n", "\n", "The Winsorizer() works only with numerical variables. A list of variables can\n", "be indicated. Alternatively, the Winsorizer() will select all numerical\n", "variables in the train set.\n", "\n", "The Winsorizer() first calculates the capping values at the end of the\n", "distribution. The values are determined using:\n", "\n", "- a Gaussian approximation,\n", "- the inter-quantile range proximity rule (IQR)\n", "- percentiles.\n", "\n", "\n", "### Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# importing libraries\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from feature_engine.outliers import Winsorizer" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load titanic dataset from OpenML\n", "\n", "def load_titanic():\n", " data = pd.read_csv(\n", " 'https://www.openml.org/data/get_csv/16826755/phpMYEkMl')\n", " data = data.replace('?', np.nan)\n", " data['cabin'] = data['cabin'].astype(str).str[0]\n", " data['pclass'] = data['pclass'].astype('O')\n", " data['embarked'].fillna('C', inplace=True)\n", " data['fare'] = data['fare'].astype('float')\n", " data['fare'].fillna(data['fare'].median(), inplace=True)\n", " data['age'] = data['age'].astype('float')\n", " data['age'].fillna(data['age'].median(), inplace=True)\n", " data.drop(['name', 'ticket'], axis=1, inplace=True)\n", " return data\n", "\n", "# To plot histogram of given numerical feature\n", "\n", "\n", "def plot_hist(data, col):\n", " plt.figure(figsize=(8, 5))\n", " plt.hist(data[col], bins=30)\n", " plt.title(\"Distribution of \"+col)\n", " return plt.show()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivedsexagesibspparchfarecabinembarkedboatbodyhome.dest
15710male28.00051.8625ESNaNNaNBrighton, MA
40021female34.01132.5000nS10NaNGreenport, NY
54621female28.00013.0000nS9NaNSpain
61830male35.0008.0500nSNaNNaNLower Clapton, Middlesex or Erdington, Birmingham
120830female9.03227.9000nSNaNNaNNaN
\n", "
" ], "text/plain": [ " pclass survived sex age sibsp parch fare cabin embarked \\\n", "157 1 0 male 28.0 0 0 51.8625 E S \n", "400 2 1 female 34.0 1 1 32.5000 n S \n", "546 2 1 female 28.0 0 0 13.0000 n S \n", "618 3 0 male 35.0 0 0 8.0500 n S \n", "1208 3 0 female 9.0 3 2 27.9000 n S \n", "\n", " boat body home.dest \n", "157 NaN NaN Brighton, MA \n", "400 10 NaN Greenport, NY \n", "546 9 NaN Spain \n", "618 NaN NaN Lower Clapton, Middlesex or Erdington, Birmingham \n", "1208 NaN NaN NaN " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Loading titanic dataset\n", "data = load_titanic()\n", "data.sample(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train data: (916, 11)\n", "test data: (393, 11)\n" ] } ], "source": [ "# let's separate into training and testing set\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(data.drop('survived', axis=1),\n", " data['survived'],\n", " test_size=0.3,\n", " random_state=0)\n", "\n", "print(\"train data:\", X_train.shape)\n", "print(\"test data:\", X_test.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max age: 80.0\n", "Max fare: 512.3292\n" ] } ], "source": [ "# let's find out the maximum Age and maximum Fare in the titanic\n", "\n", "print(\"Max age:\", data.age.max())\n", "print(\"Max fare:\", data.fare.max())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of age feature before capping outliers\n", "plot_hist(data, 'age')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of fare feature before capping outliers\n", "plot_hist(data, 'fare')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Capping : Gaussian\n", "\n", "Gaussian limits:\n", "+ right tail: mean + 3* std\n", "+ left tail: mean - 3* std\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Winsorizer(variables=['age', 'fare'])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''Parameters\n", "----------\n", "capping_method : str, default=gaussian\n", "\n", " Desired capping method. Can take 'gaussian', 'iqr' or 'quantiles'.\n", "\n", "tail : str, default=right\n", "\n", " Whether to cap outliers on the right, left or both tails of the distribution.\n", " Can take 'left', 'right' or 'both'.\n", "\n", "fold: int or float, default=3\n", "\n", " How far out to to place the capping values. The number that will multiply\n", " the std or IQR to calculate the capping values. Recommended values, 2\n", " or 3 for the gaussian approximation, or 1.5 or 3 for the IQR proximity\n", " rule.\n", "\n", "variables: list, default=None\n", " \n", "missing_values: string, default='raise'\n", "\n", " Indicates if missing values should be ignored or raised.\n", "'''\n", "# capping at right tail using gaussian capping method\n", "capper = Winsorizer(\n", " capping_method='gaussian', tail='right', fold=3, variables=['age', 'fare'])\n", "\n", "# fitting winsorizer object to training data\n", "capper.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': 67.49048447470315, 'fare': 174.78162171790441}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here we can find the maximum caps allowed\n", "capper.right_tail_caps_" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this dictionary is empty, because we selected only right tail\n", "capper.left_tail_caps_" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAecAAAE/CAYAAAB8YAsWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAU40lEQVR4nO3dffRlVV3H8fcHRjPFBJxpGoFhUGdZ1Eq00XBlhWKJ+ICtVYTLh4loTauotLACsyCXFNZSy0qTFMEnhHwIUjJxsoVWSoOKIkhOOMiMMDOAPAimDn774+6fXMYZfs/89u/83q+17rrn7PO097135nP3PueeX6oKSZLUj30WugKSJOm+DGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrM0B5L8fZI/nqN9rU7y9ST7tvl/T/Jrc7Hvtr9/SbJ+rvY3jeO+OsnNSW56oI8tLTbxd87S/UuyBVgJ7ALuAa4G3g6cXVXfmcG+fq2qPjqNbf4deGdVvWU6x2rbngE8tqpeNN1t51KS1cC1wKFVtWMh6yItBvacpal5blU9HDgUOAv4Q+Ctc32QJMvmep+dWA3cYjBLU2M4S9NQVbdX1cXALwPrk/wYQJJzk7y6TS9P8sEktyW5NcnHk+yT5B2MQuqf27D1HyRZk6SSnJTkK8C/jZWNB/Vjklye5I4kFyU5sB3rqCRbx+uYZEuSZyQ5BngF8MvteFe25d8dJm/1emWS65PsSPL2JI9oyybqsT7JV9qQ9B/t7bVJ8oi2/c62v1e2/T8DuBR4VKvHuXvY9oD2mu1M8rU2ffDY8sOSXJbkziQfTfJ3Sd45tvzIJP/ZXvMrkxw1xbdU6pLhLM1AVV0ObAV+eg+LT2nLVjAaDn/FaJN6MfAVRr3w/arqL8a2+VngR4Bn7uWQLwF+FVjFaHj9DVOo44eBPwMuaMd7/B5W+5X2eBrwaGA/4G93W+epwOOAo4E/SfIjeznk3wCPaPv52VbnE9sQ/rOAr7Z6/Moett0HeBujkYnVwDd2q8e7gcuBRwJnAC+eWJDkIOBDwKuBA4GXA+9LsmIv9ZS6ZzhLM/dVRmGwu28zCtFDq+rbVfXxmvzijjOq6q6q+sZelr+jqq6qqruAPwaOn7hgbJZeCLyuqq6rqq8DpwEn7NZr/9Oq+kZVXQlcCXxPyLe6nACcVlV3VtUW4LWMhej9qapbqup9VXV3Vd0JnMko4CfOVz8J+JOq+lZVfQK4eGzzFwGXVNUlVfWdqroU2AQcO50XQuqJ4SzN3EHArXso/0tgM/CRJNclOXUK+7phGsuvBx4ELJ9SLe/fo9r+xve9jFGPf8L41dV3M+pd7255q9Pu+zpoKpVI8tAkb27D4XcAlwH7t9B/FHBrVd09tsn463Eo8EttSPu2JLcx6u2vmsqxpR4ZztIMJHkSo+D5xO7LWs/xlKp6NPA84PeSHD2xeC+7nKxnfcjY9GpGvfObgbuAh47Va19Gw+lT3e9XGYXb+L53Adsn2W53N7c67b6vbVPc/hRGQ+c/WVU/APxMKw9wI3BgkoeOrT/+etzAaGRh/7HHw6rqrGm2QeqG4SxNQ5IfSPIc4D2Mft70+T2s85wkj00S4HZGP7+a+MnVdkbnZKfrRUkObwH1KuC9VXUP8D/AQ5I8O8mDgFcC3ze23XZgTZK9/Vs/H/jddsHVftx7jnrXdCrX6nIhcGaShyc5FPg94J33v+V3PZzReebb2sVup4/t+3pGw9RnJHlwkqcAzx3b9p3Ac5M8M8m+SR7SLpQ7GGmRMpylqfnnJHcy6qX9EfA64MS9rLsW+CjwdeC/gDdW1cfasj8HXtmGX18+jeO/AziX0RDzQ4DfgdHV48BvAm9h1Eu9i9HFaBP+sT3fkuTTe9jvOW3flwFfBv4P+O1p1Gvcb7fjX8doROHdbf9T8VfA9zPqgX8S+PBuy18IPAW4hdGFXxcA3wSoqhuA4xhdeLeT0Xv0+/j/mxYxb0IiadFJcgHwxao6fdKVpUXIb5aSupfkSUke0343fQyjnvI/LXC1pHkz1LsRSRqWHwLez+h3zluB36iqzyxslaT547C2JEmdcVhbkqTOGM6SJHWmi3POy5cvrzVr1ix0NSRJesBcccUVN1fVHu8B30U4r1mzhk2bNi10NSRJesAkuX5vyxzWliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqTBf31paGas2pH5r2NlvOevY81ETSYmLPWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjozaTgnOSTJx5JcneQLSV7ayg9McmmSL7XnA1p5krwhyeYkn0vyxPluhCRJQzKVnvMu4JSqOhw4Ejg5yeHAqcDGqloLbGzzAM8C1rbHBuBNc15rSZIGbNJwrqobq+rTbfpO4BrgIOA44Ly22nnA89v0ccDba+STwP5JVs11xSVJGqppnXNOsgZ4AvApYGVV3dgW3QSsbNMHATeMbba1lUmSpCmYcjgn2Q94H/CyqrpjfFlVFVDTOXCSDUk2Jdm0c+fO6WwqSdKgTSmckzyIUTC/q6re34q3TwxXt+cdrXwbcMjY5ge3svuoqrOral1VrVuxYsVM6y9J0uBM5WrtAG8Frqmq140tuhhY36bXAxeNlb+kXbV9JHD72PC3JEmaxLIprPNTwIuBzyf5bCt7BXAWcGGSk4DrgePbskuAY4HNwN3AiXNZYUmShm7ScK6qTwDZy+Kj97B+ASfPsl6SJC1Z3iFMkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOTBrOSc5JsiPJVWNlZyTZluSz7XHs2LLTkmxOcm2SZ85XxSVJGqqp9JzPBY7ZQ/nrq+qI9rgEIMnhwAnAj7Zt3phk37mqrCRJS8Gk4VxVlwG3TnF/xwHvqapvVtWXgc3Ak2dRP0mSlpzZnHP+rSSfa8PeB7Syg4AbxtbZ2sq+R5INSTYl2bRz585ZVEOSpGGZaTi/CXgMcARwI/Da6e6gqs6uqnVVtW7FihUzrIYkScMzo3Cuqu1VdU9VfQf4B+4dut4GHDK26sGtTJIkTdGMwjnJqrHZXwAmruS+GDghyfclOQxYC1w+uypKkrS0LJtshSTnA0cBy5NsBU4HjkpyBFDAFuDXAarqC0kuBK4GdgEnV9U981JzSZIGatJwrqoX7KH4rfez/pnAmbOplCRJS5l3CJMkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHVm0nBOck6SHUmuGis7MMmlSb7Ung9o5UnyhiSbk3wuyRPns/KSJA3RVHrO5wLH7FZ2KrCxqtYCG9s8wLOAte2xAXjT3FRTkqSlY9JwrqrLgFt3Kz4OOK9Nnwc8f6z87TXySWD/JKvmqK6SJC0JMz3nvLKqbmzTNwEr2/RBwA1j621tZZIkaYpmfUFYVRVQ090uyYYkm5Js2rlz52yrIUnSYMw0nLdPDFe35x2tfBtwyNh6B7ey71FVZ1fVuqpat2LFihlWQ5Kk4ZlpOF8MrG/T64GLxspf0q7aPhK4fWz4W5IkTcGyyVZIcj5wFLA8yVbgdOAs4MIkJwHXA8e31S8BjgU2A3cDJ85DnSVJGrRJw7mqXrCXRUfvYd0CTp5tpSRJWsq8Q5gkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzyxa6AtJisObUDy10FSQtIfacJUnqjOEsSVJnDGdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktQZw1mSpM4YzpIkdcZwliSpM/49Zy1aM/0by1vOevYc10SS5pY9Z0mSOmM4S5LUGcNZkqTOGM6SJHXGcJYkqTOGsyRJnTGcJUnqjOEsSVJnZnUTkiRbgDuBe4BdVbUuyYHABcAaYAtwfFV9bXbVlCRp6ZiLnvPTquqIqlrX5k8FNlbVWmBjm5ckSVM0H8PaxwHntenzgOfPwzEkSRqs2d5bu4CPJCngzVV1NrCyqm5sy28CVs7yGFpEFsP9rmdaR0l6oMw2nJ9aVduS/CBwaZIvji+sqmrB/T2SbAA2AKxevXqW1ZCGYzF8wZE0v2Y1rF1V29rzDuADwJOB7UlWAbTnHXvZ9uyqWldV61asWDGbakiSNCgz7jkneRiwT1Xd2aZ/HngVcDGwHjirPV80FxWVNPfspUt9ms2w9krgA0km9vPuqvpwkv8GLkxyEnA9cPzsqylJ0tIx43CuquuAx++h/Bbg6NlUSpKkpcw7hEmS1BnDWZKkzhjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZ2Z7b21JnfAPekjDYc9ZkqTOGM6SJHXGYW11wSFZSbqXPWdJkjpjOEuS1BnDWZKkzhjOkiR1xnCWJKkzXq0t6QEx0yvyt5z17DmuidQ/e86SJHXGnrOkQZpJT91eunphz1mSpM4YzpIkdcZwliSpM4azJEmdMZwlSeqM4SxJUmcMZ0mSOmM4S5LUGcNZkqTOGM6SJHVmkLfv9Ab7kqTFbJDhLGl+zfQLsKSpMZwldc0vAlqKDGdJWgCeftP98YIwSZI6Y89ZkmbJoXfNNcNZkhaRmXwRcCh88XFYW5KkzhjOkiR1xmFtSWo8d6xe2HOWJKkz9pwXiL9xlKR7eaHbfdlzliSpM/acx3i+6b58PSRNl/9vzA17zpIkdcaesyQNnNe4LD6GsyRpURrylw7DeZHxfI4kDZ/hLEnao6F2BhbDz7a8IEySpM7MWzgnOSbJtUk2Jzl1vo4jSdLQzEs4J9kX+DvgWcDhwAuSHD4fx5IkaWjmq+f8ZGBzVV1XVd8C3gMcN0/HkiRpUOYrnA8Cbhib39rKJEnSJBbsau0kG4ANbfbrSa6do10vB26eo30tFkutzbZ3+JZam21v5/KaWW2+t/YeurcN5iuctwGHjM0f3Mq+q6rOBs6e6wMn2VRV6+Z6vz1bam22vcO31Npse4dtJu2dr2Ht/wbWJjksyYOBE4CL5+lYkiQNyrz0nKtqV5LfAv4V2Bc4p6q+MB/HkiRpaObtnHNVXQJcMl/7vx9zPlS+CCy1Ntve4Vtqbba9wzbt9qaq5qMikiRphrx9pyRJnRlUOC+FW4YmOSfJjiRXjZUdmOTSJF9qzwcsZB3nSpJDknwsydVJvpDkpa18kO0FSPKQJJcnubK1+U9b+WFJPtU+2xe0Cy0HI8m+ST6T5INtfrDtTbIlyeeTfDbJplY22M80QJL9k7w3yReTXJPkKUNtc5LHtfd24nFHkpdNt72DCecldMvQc4Fjdis7FdhYVWuBjW1+CHYBp1TV4cCRwMntPR1qewG+CTy9qh4PHAEck+RI4DXA66vqscDXgJMWrorz4qXANWPzQ2/v06rqiLGf1wz5Mw3w18CHq+qHgcczeq8H2eaqura9t0cAPwHcDXyA6ba3qgbxAJ4C/OvY/GnAaQtdr3lq6xrgqrH5a4FVbXoVcO1C13Ge2n0R8HNLqL0PBT4N/CSjGxgsa+X3+awv9gej+yBsBJ4OfBDIwNu7BVi+W9lgP9PAI4Av065xWgptHmvjzwP/MZP2DqbnzNK+ZejKqrqxTd8ErFzIysyHJGuAJwCfYuDtbUO8nwV2AJcC/wvcVlW72ipD+2z/FfAHwHfa/CMZdnsL+EiSK9qdEmHYn+nDgJ3A29qpi7ckeRjDbvOEE4Dz2/S02jukcBZQo69lg7oEP8l+wPuAl1XVHePLhtjeqrqnRkNiBzP6IzI/vLA1mj9JngPsqKorFrouD6CnVtUTGZ2COznJz4wvHOBnehnwROBNVfUE4C52G9IdYJtp10k8D/jH3ZdNpb1DCudJbxk6YNuTrAJozzsWuD5zJsmDGAXzu6rq/a14sO0dV1W3AR9jNKy7f5KJ+xIM6bP9U8Dzkmxh9Nfrns7o/ORQ20tVbWvPOxidi3wyw/5MbwW2VtWn2vx7GYX1kNsMoy9fn66q7W1+Wu0dUjgv5VuGXgysb9PrGZ2bXfSSBHgrcE1VvW5s0SDbC5BkRZL92/T3MzrHfg2jkP7Fttpg2lxVp1XVwVW1htG/2X+rqhcy0PYmeViSh09MMzoneRUD/kxX1U3ADUke14qOBq5mwG1uXsC9Q9owzfYO6iYkSY5ldP5q4pahZy5sjeZekvOBoxj9lZPtwOnAPwEXAquB64Hjq+rWBarinEnyVODjwOe593zkKxiddx5cewGS/DhwHqPP8D7AhVX1qiSPZtSzPBD4DPCiqvrmwtV07iU5Cnh5VT1nqO1t7fpAm10GvLuqzkzySAb6mQZIcgTwFuDBwHXAibTPNwNsc/vi9RXg0VV1eyub1ns8qHCWJGkIhjSsLUnSIBjOkiR1xnCWJKkzhrMkSZ0xnCVJ6ozhLElSZwxnSZI6YzhLktSZ/wcOz6e6KfYedQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# # Histogram of age feature after capping outliers\n", "plot_hist(capper.transform(X_train), 'age')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(67.49048447470315, 174.78162171790441)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# transforming the training and testing data\n", "train_t = capper.transform(X_train)\n", "test_t = capper.transform(X_test)\n", "\n", "# let's check the new maximum Age and maximum Fare in the titanic\n", "train_t.age.max(), train_t.fare.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gaussian approximation capping, both tails" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Winsorizer(fold=2, tail='both', variables=['fare'])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Capping the outliers at both tails using gaussian capping method\n", "\n", "winsor = Winsorizer(capping_method='gaussian',\n", " tail='both', fold=2, variables='fare')\n", "winsor.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Minimum caps : {'fare': -62.30099726608475}\n", "Maximum caps : {'fare': 127.36509792110658}\n" ] } ], "source": [ "print(\"Minimum caps :\", winsor.left_tail_caps_)\n", "\n", "print(\"Maximum caps :\", winsor.right_tail_caps_)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of fare feature after capping outliers\n", "plot_hist(winsor.transform(X_train), 'fare')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max fare: 127.36509792110658\n", "Min fare: 0.0\n" ] } ], "source": [ "# transforming the training and testing data\n", "train_t = winsor.transform(X_train)\n", "test_t = winsor.transform(X_test)\n", "\n", "print(\"Max fare:\", train_t.fare.max())\n", "print(\"Min fare:\", train_t.fare.min())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inter Quartile Range, both tails\n", "**IQR limits:**\n", "\n", "- right tail: 75th quantile + 3* IQR\n", "- left tail: 25th quantile - 3* IQR\n", "\n", "where IQR is the inter-quartile range: 75th quantile - 25th quantile.\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Winsorizer(capping_method='iqr', tail='both', variables=['age', 'fare'])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# capping at both tails using iqr capping method\n", "winsor = Winsorizer(capping_method='iqr', tail='both',\n", " variables=['age', 'fare'])\n", "\n", "winsor.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': -13.0, 'fare': -62.24179999999999}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "winsor.left_tail_caps_" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': 71.0, 'fare': 101.4126}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "winsor.right_tail_caps_" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max fare: 101.4126\n", "Min fare 0.0\n" ] } ], "source": [ "# transforming the training and testing data\n", "\n", "train_t = winsor.transform(X_train)\n", "test_t = winsor.transform(X_test)\n", "\n", "print(\"Max fare:\", train_t.fare.max())\n", "print(\"Min fare\", train_t.fare.min())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### percentiles or quantiles:\n", "\n", "- right tail: 98th percentile\n", "- left tail: 2nd percentile" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Winsorizer(capping_method='quantiles', fold=0.02, tail='both',\n", " variables=['age', 'fare'])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# capping at both tails using quantiles capping method\n", "winsor = Winsorizer(capping_method='quantiles', tail='both',\n", " fold=0.02, variables=['age', 'fare'])\n", "\n", "winsor.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Minimum caps : {'age': 2.0, 'fare': 6.44125}\n", "Maximum caps : {'age': 61.69999999999993, 'fare': 211.5}\n" ] } ], "source": [ "print(\"Minimum caps :\", winsor.left_tail_caps_)\n", "\n", "print(\"Maximum caps :\", winsor.right_tail_caps_)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max age: 61.69999999999993\n", "Min age 2.0\n" ] } ], "source": [ "# transforming the training and testing data\n", "train_t = winsor.transform(X_train)\n", "test_t = winsor.transform(X_test)\n", "\n", "print(\"Max age:\", train_t.age.max())\n", "print(\"Min age\", train_t.age.min())" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAE/CAYAAACTomAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUoUlEQVR4nO3dfdCddX3n8fdHwFqFCphsFgMhqJm2bKeiE12cWovSVkQt7kyX4viQsnSys6Wt3eK2aG2hHdmlu1Pb2gdbFiioVaE+FFpZV6R00NlVGlQUQdcsDZIYkgCiPLja4Hf/ONctJzHhfs79zX3er5l7znV+19M3P3L43L/fdZ0rqSokSVIPT1jqAiRJ0mMMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYpQWQ5M+T/NYCHWtNkoeSHDK8/4ckv7AQxx6O9z+SbFio483ivG9Ncm+Sew70uaWDSfwes/T4kmwBVgG7gUeB24F3ApdU1XfmcKxfqKqPzWKffwDeXVWXzuZcw74XAs+qqtfOdt+FlGQN8CXg+KrauZS1SN05YpZm5pVVdQRwPHAx8BvAZQt9kiSHLvQxm1gD3GcoS9MzmKVZqKqvV9W1wM8BG5L8CECSK5K8dVhekeTvkjyQ5P4kH0/yhCTvYhRQfztMVf96krVJKsk5Sb4C/P1Y23hIPzPJzUm+keSaJEcP5zolydbxGpNsSfKTSU4D3gz83HC+W4f1350aH+p6S5K7kuxM8s4kTx3WTdWxIclXhmno39xf3yR56rD/ruF4bxmO/5PA9cDThzqu2Me+Rw19tivJ14blY8fWn5DkpiQPJvlYkj9N8u6x9Scn+V9Dn9+a5JQZ/ieV2jGYpTmoqpuBrcCP72P1ecO6lYymwN882qVeB3yF0ej78Kr6r2P7/ATww8BL93PK1wP/DjiG0ZT622dQ40eA/wxcNZzv2fvY7OeHnxcDzwAOB/5kr21eCPwgcCrw20l+eD+n/GPgqcNxfmKo+exh2v5lwFeHOn5+H/s+AfhLRjMSa4Bv7lXHe4CbgacBFwKvm1qRZDXwYeCtwNHAG4EPJFm5nzql1gxmae6+yigI9vbPjAL0+Kr656r6eE1/M8eFVfVwVX1zP+vfVVW3VdXDwG8BZ07dHDZPrwHeVlV3VtVDwJuAs/Yarf9OVX2zqm4FbgW+J+CHWs4C3lRVD1bVFuD3GQvQx1NV91XVB6rqkap6ELiIUbhPXZ9+HvDbVfXtqvoEcO3Y7q8Frquq66rqO1V1PbAJOH02HSF1YTBLc7cauH8f7f8N2Ax8NMmdSc6fwbHunsX6u4DDgBUzqvLxPX043vixD2U00p8yfhf1I4xG1XtbMdS097FWz6SIJE9O8hfDFPg3gJuAI4fAfzpwf1U9MrbLeH8cD/zbYRr7gSQPMBrlHzOTc0vdGMzSHCR5HqPQ+cTe64YR43lV9QzgZ4BfS3Lq1Or9HHK6EfVxY8trGI3K7wUeBp48VtchjKbQZ3rcrzIKtvFj7wZ2TLPf3u4datr7WNtmuP95jKbL/3VV/QDwoqE9wHbg6CRPHtt+vD/uZjSjcOTYz1Oq6uJZ/hmkFgxmaRaS/ECSVwDvY/QVps/vY5tXJHlWkgBfZ/QVq6mvVe1gdA12tl6b5MQhnH4XeH9VPQr8H+BJSV6e5DDgLcD3je23A1ibZH+f9fcC/3G4uepwHrsmvXs2xQ21XA1clOSIJMcDvwa8+/H3/K4jGF1XfmC4se2CsWPfxWhq+sIkT0zyAuCVY/u+G3hlkpcmOSTJk4ab4o5FOggZzNLM/G2SBxmNzn4TeBtw9n62XQd8DHgI+N/An1XVjcO6/wK8ZZhyfeMszv8u4ApG08pPAn4FRneJA78IXMpodPowoxvPpvz18Hpfkk/v47iXD8e+Cfgn4P8BvzyLusb98nD+OxnNJLxnOP5M/CHw/YxG3p8EPrLX+tcALwDuY3ST11XAtwCq6m7gDEY32e1i9N/oP+H/33SQ8gEjkg46Sa4CvlhVF0y7sXSQ8TdKSe0leV6SZw7fiz6N0Qj5b5a4LGlRLNenDElaXv4l8EFG32PeCvyHqvrM0pYkLQ6nsiVJasSpbEmSGjGYJUlqpMU15hUrVtTatWuXugxJkg6YW2655d6q+p5nurcI5rVr17Jp06alLkOSpAMmyV37ancqW5KkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWqkxbOyJT1m7fkfntN+Wy5++QJXImkpOGKWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGpk2mJMcl+TGJLcn+UKSNwztRye5PsmXh9ejhvYkeXuSzUk+l+S5i/2HkCRpuZjJiHk3cF5VnQicDJyb5ETgfOCGqloH3DC8B3gZsG742Qi8Y8GrliRpmZo2mKtqe1V9elh+ELgDWA2cAVw5bHYl8Kph+QzgnTXySeDIJMcsdOGSJC1Hs7rGnGQt8BzgU8Cqqto+rLoHWDUsrwbuHttt69AmSZKmMeNgTnI48AHgV6vqG+PrqqqAms2Jk2xMsinJpl27ds1mV0mSlq0ZBXOSwxiF8l9V1QeH5h1TU9TD686hfRtw3Njuxw5te6iqS6pqfVWtX7ly5VzrlyRpWZnJXdkBLgPuqKq3ja26FtgwLG8Arhlrf/1wd/bJwNfHprwlSdLjOHQG2/wY8Drg80k+O7S9GbgYuDrJOcBdwJnDuuuA04HNwCPA2QtZsCRJy9m0wVxVnwCyn9Wn7mP7As6dZ12SJE0kn/wlSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIj0wZzksuT7Exy21jbhUm2Jfns8HP62Lo3Jdmc5EtJXrpYhUuStBzNZMR8BXDaPtr/oKpOGn6uA0hyInAW8K+Gff4sySELVawkScvdtMFcVTcB98/weGcA76uqb1XVPwGbgefPoz5JkibKfK4x/1KSzw1T3UcNbauBu8e22Tq0fY8kG5NsSrJp165d8yhDkqTlY67B/A7gmcBJwHbg92d7gKq6pKrWV9X6lStXzrEMSZKWlzkFc1XtqKpHq+o7wH/nsenqbcBxY5seO7RJkqQZmFMwJzlm7O2/Aabu2L4WOCvJ9yU5AVgH3Dy/EiVJmhyHTrdBkvcCpwArkmwFLgBOSXISUMAW4N8DVNUXklwN3A7sBs6tqkcXpXJJkpahaYO5ql69j+bLHmf7i4CL5lOUJEmTyid/SZLUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNTJtMCe5PMnOJLeNtR2d5PokXx5ejxrak+TtSTYn+VyS5y5m8ZIkLTczGTFfAZy2V9v5wA1VtQ64YXgP8DJg3fCzEXjHwpQpSdJkmDaYq+om4P69ms8ArhyWrwReNdb+zhr5JHBkkmMWqFZJkpa9uV5jXlVV24fle4BVw/Jq4O6x7bYObZIkaQbmffNXVRVQs90vycYkm5Js2rVr13zLkCRpWZhrMO+YmqIeXncO7duA48a2O3Zo+x5VdUlVra+q9StXrpxjGZIkLS9zDeZrgQ3D8gbgmrH21w93Z58MfH1syluSJE3j0Ok2SPJe4BRgRZKtwAXAxcDVSc4B7gLOHDa/Djgd2Aw8Apy9CDVLkrRsTRvMVfXq/aw6dR/bFnDufIuSJGlS+eQvSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWrEYJYkqRGDWZKkRg5d6gKkg8Ha8z88p/22XPzyBa5E0nLniFmSpEYMZkmSGjGYJUlqxGCWJKmRed38lWQL8CDwKLC7qtYnORq4ClgLbAHOrKqvza9MSZImw0KMmF9cVSdV1frh/fnADVW1DrhheC9JkmZgMaayzwCuHJavBF61COeQJGlZmm8wF/DRJLck2Ti0raqq7cPyPcCqeZ5DkqSJMd8HjLywqrYl+RfA9Um+OL6yqipJ7WvHIcg3AqxZs2aeZUiay0NQfACK1M+8grmqtg2vO5N8CHg+sCPJMVW1PckxwM797HsJcAnA+vXr9xnemhwHMlTm+hSv7ueStDzMeSo7yVOSHDG1DPw0cBtwLbBh2GwDcM18i5QkaVLMZ8S8CvhQkqnjvKeqPpLkH4Grk5wD3AWcOf8yJUmaDHMO5qq6E3j2PtrvA06dT1GSJE0q/3UpHbS8fitpOfKRnJIkNWIwS5LUiMEsSVIjBrMkSY0YzJIkNWIwS5LUiF+XkibYXL9y5jO2pcXjiFmSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGjGYJUlqxGCWJKkRg1mSpEYMZkmSGvGRnJIOCB//Kc2MwSxJmihz+SXxQP6CaDBLmrW5jn4lTc9rzJIkNWIwS5LUiFPZklrzpjFNGkfMkiQ1sixHzP6GvXS8KUhdHMg7b7vf5auDiyNmSZIaMZglSWrEYJYkqRGDWZKkRgxmSZIaMZglSWpkWX5dSpLmYrl+3c+vkB5cHDFLktSII2ZJOogs11G9HuOIWZKkRgxmSZIacSpbkrRgvNFs/hwxS5LUiCNmSVoC3sSl/XHELElSI46YxxzIayNeh5Ek7YvBrP1yqk2SDjynsiVJasRgliSpEaeyDzJzmV72urSkueh+Oat7fXNlMC+A5fqXQ5J04BnME8BfHCTp4OE1ZkmSGlm0EXOS04A/Ag4BLq2qixfrXJKkg5sze49ZlBFzkkOAPwVeBpwIvDrJiYtxLkmSlpPFmsp+PrC5qu6sqm8D7wPOWKRzSZK0bCxWMK8G7h57v3VokyRJj2PJ7spOshHYOLx9KMmX9rPpCuDeA1PVQcH+2JP9sSf7Y0/2x57sjz3NuD/ye4ty/uP31bhYwbwNOG7s/bFD23dV1SXAJdMdKMmmqlq/sOUdvOyPPdkfe7I/9mR/7Mn+2FPX/lisqex/BNYlOSHJE4GzgGsX6VySJC0bizJirqrdSX4J+J+Mvi51eVV9YTHOJUnScrJo15ir6jrgugU41LTT3RPG/tiT/bEn+2NP9see7I89teyPVNVS1yBJkgY+klOSpEbaBnOS05J8KcnmJOcvdT1LIcnlSXYmuW2s7egk1yf58vB61FLWeKAkOS7JjUluT/KFJG8Y2ie1P56U5OYktw798TtD+wlJPjV8bq4abr6cGEkOSfKZJH83vJ/Y/kiyJcnnk3w2yaahbSI/LwBJjkzy/iRfTHJHkhd07Y+WwewjPb/rCuC0vdrOB26oqnXADcP7SbAbOK+qTgROBs4d/k5Man98C3hJVT0bOAk4LcnJwO8Bf1BVzwK+BpyzdCUuiTcAd4y9n/T+eHFVnTT2laBJ/bzA6N9u+EhV/RDwbEZ/T1r2R8tgxkd6AlBVNwH379V8BnDlsHwl8KoDWdNSqartVfXpYflBRh+q1Uxuf1RVPTS8PWz4KeAlwPuH9onpD4AkxwIvBy4d3ocJ7o/9mMjPS5KnAi8CLgOoqm9X1QM07Y+uwewjPfdvVVVtH5bvAVYtZTFLIcla4DnAp5jg/himbT8L7ASuB/4v8EBV7R42mbTPzR8Cvw58Z3j/NCa7Pwr4aJJbhictwuR+Xk4AdgF/OVzquDTJU2jaH12DWTNQo1vqJ+q2+iSHAx8AfrWqvjG+btL6o6oeraqTGD1Z7/nADy1tRUsnySuAnVV1y1LX0sgLq+q5jC4JnpvkReMrJ+zzcijwXOAdVfUc4GH2mrbu1B9dg3naR3pOsB1JjgEYXncucT0HTJLDGIXyX1XVB4fmie2PKcOU3I3AC4Ajk0w9n2CSPjc/BvxMki2MLn29hNE1xUntD6pq2/C6E/gQo1/eJvXzshXYWlWfGt6/n1FQt+yPrsHsIz3371pgw7C8AbhmCWs5YIbrhZcBd1TV28ZWTWp/rExy5LD8/cBPMbrufiPws8NmE9MfVfWmqjq2qtYy+v/F31fVa5jQ/kjylCRHTC0DPw3cxoR+XqrqHuDuJD84NJ0K3E7T/mj7gJEkpzO6ZjT1SM+LlraiAy/Je4FTGP0LKDuAC4C/Aa4G1gB3AWdW1d43iC07SV4IfBz4PI9dQ3wzo+vMk9gfP8roZpVDGP2CfXVV/W6SZzAaMR4NfAZ4bVV9a+kqPfCSnAK8sapeMan9Mfy5PzS8PRR4T1VdlORpTODnBSDJSYxuDHwicCdwNsNnh2b90TaYJUmaRF2nsiVJmkgGsyRJjRjMkiQ1YjBLktSIwSxJUiMGsyRJjRjMkiQ1YjBLktTI/we5s3I9U0QETQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of age feature after capping outliers\n", "plot_hist(train_t, 'age')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }