{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ArbitraryOutlierCapper\n", "The ArbitraryOutlierCapper() caps the maximum or minimum values of a variable\n", "at an arbitrary value indicated by the user.\n", "\n", "The user must provide the maximum or minimum values that will be used
\n", "to cap each variable in a dictionary {feature : capping_value}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# importing libraries\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from feature_engine.outliers import ArbitraryOutlierCapper" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load titanic dataset from OpenML\n", "\n", "def load_titanic():\n", " data = pd.read_csv(\n", " 'https://www.openml.org/data/get_csv/16826755/phpMYEkMl')\n", " data = data.replace('?', np.nan)\n", " data['cabin'] = data['cabin'].astype(str).str[0]\n", " data['pclass'] = data['pclass'].astype('O')\n", " data['embarked'].fillna('C', inplace=True)\n", " data['fare'] = data['fare'].astype('float')\n", " data['fare'].fillna(data['fare'].median(), inplace=True)\n", " data['age'] = data['age'].astype('float')\n", " data['age'].fillna(data['age'].median(), inplace=True)\n", " data.drop(['name', 'ticket'], axis=1, inplace=True)\n", " return data\n", "\n", "# To plot histogram of given numerical feature\n", "\n", "\n", "def plot_hist(data, col):\n", " plt.figure(figsize=(8, 5))\n", " plt.hist(data[col], bins=30)\n", " plt.title(\"Distribution of \" + col)\n", " return plt.show()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivedsexagesibspparchfarecabinembarkedboatbodyhome.dest
21411female23.010113.275DC6NaNLexington, MA
65130male30.0007.225nCNaNNaNOttawa, ON
93030male28.0107.750nQNaNNaNNaN
99230female30.5007.750nQNaN61NaN
71830male20.0007.050nSNaNNaNPortugal
\n", "
" ], "text/plain": [ " pclass survived sex age sibsp parch fare cabin embarked boat \\\n", "214 1 1 female 23.0 1 0 113.275 D C 6 \n", "651 3 0 male 30.0 0 0 7.225 n C NaN \n", "930 3 0 male 28.0 1 0 7.750 n Q NaN \n", "992 3 0 female 30.5 0 0 7.750 n Q NaN \n", "718 3 0 male 20.0 0 0 7.050 n S NaN \n", "\n", " body home.dest \n", "214 NaN Lexington, MA \n", "651 NaN Ottawa, ON \n", "930 NaN NaN \n", "992 61 NaN \n", "718 NaN Portugal " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = load_titanic()\n", "data.sample(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train data: (916, 11)\n", "test data: (393, 11)\n" ] } ], "source": [ "# let's separate into training and testing set\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(data.drop('survived', axis=1),\n", " data['survived'],\n", " test_size=0.3,\n", " random_state=0)\n", "\n", "print(\"train data:\", X_train.shape)\n", "print(\"test data:\", X_test.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAE/CAYAAACTomAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZM0lEQVR4nO3de7SldX3f8fdHhoAC4SKndJwBBpVqiKsOdkRc2gRBI+AFs1ZicHmZWLImbTHRhCSCGsUuabGNktomtEQQvCHESyBIjIhkGdsKDgjIReIEBplxYAaQmxgi+O0f+zeyHWY49zm/vef9Wmuv8+zfc/v+ztn7fM7ze579nFQVkiSpD09Z6AIkSdLjDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrM0B5L8ryR/PEfbOiDJQ0l2as//LslvzcW22/b+JsnKudreNPb7gSR3J7lze+9bGiXxc8zSk0uyFtgPeBR4DLgJ+DhwVlX9ZAbb+q2q+so01vk74JNV9dHp7Kuteyrw7Kp603TXnUtJDgBuAQ6sqo0LWYvUO4+Ypal5TVXtARwInA68Ezh7rneSZNFcb7MTBwD3GMrS5AxmaRqq6v6quhj4DWBlkucBJDk3yQfa9L5JLklyX5J7k/x9kqck+QSDgPrrNlT9R0mWJakkJyT5HvDVobbhkH5WkquSPJDkoiT7tH0dkWTdcI1J1iZ5eZKjgXcBv9H2d12b/9Oh8VbXe5LcnmRjko8n2bPN21zHyiTfa8PQ797W9ybJnm39TW1772nbfzlwGfCMVse5W1l37/Y925TkB2166dD8g5J8LcmDSb6S5M+SfHJo/uFJ/m/7nl+X5Igp/kil7hjM0gxU1VXAOuDfbmX2SW3eBIMh8HcNVqk3A99jcPS9e1X916F1fhn4BeCV29jlW4B/ByxmMKT+kSnU+CXgPwMXtP09fyuL/WZ7vAx4JrA78D+3WOalwHOAo4D3JvmFbezyfwB7tu38cqv5rW3Y/hjg+62O39zKuk8BPsZgROIA4Edb1PFp4Crg6cCpwJs3z0iyBPgi8AFgH+APgM8lmdhGnVLXDGZp5r7PIAi29GMGAXpgVf24qv6+Jr+Y49Sq+mFV/Wgb8z9RVTdU1Q+BPwZev/nisFl6I/Dhqrq1qh4CTgGO3+Jo/f1V9aOqug64DnhCwLdajgdOqaoHq2ot8CGGAvTJVNU9VfW5qnq4qh4ETmMQ7pvPT78QeG9V/XNVfR24eGj1NwGXVtWlVfWTqroMWA0cO51vhNQLg1mauSXAvVtp/2/AGuDLSW5NcvIUtnXHNObfDuwM7DulKp/cM9r2hre9iMGR/mbDV1E/zOCoekv7tpq23NaSqRSR5GlJ/ncbAn8A+BqwVwv8ZwD3VtXDQ6sMfz8OBH69DWPfl+Q+Bkf5i6eyb6k3BrM0A0leyCB0vr7lvHbEeFJVPRN4LfD7SY7aPHsbm5zsiHr/oekDGByV3w38EHjaUF07MRhCn+p2v88g2Ia3/Shw1yTrbenuVtOW21o/xfVPYjBc/qKq+nngl1p7gA3APkmeNrT88PfjDgYjCnsNPXarqtOn2QepCwazNA1Jfj7Jq4HPMPgI07e3ssyrkzw7SYD7GXzEavPHqu5icA52ut6U5JAWTv8J+GxVPQb8A7Brklcl2Rl4D7DL0Hp3AcuSbOu9fj7we+3iqt15/Jz0o9MprtVyIXBakj2SHAj8PvDJJ1/zp/ZgcF75vnZh2/uGtn07g6HpU5P8XJIXA68ZWveTwGuSvDLJTkl2bRfFLUUaQQazNDV/neRBBkdn7wY+DLx1G8seDHwFeAj4f8CfV9UVbd5/Ad7Thlz/YBr7/wRwLoNh5V2B34XBVeLAfwQ+yuDo9IcMLjzb7C/b13uSXLOV7Z7Ttv014Dbgn4DfmUZdw36n7f9WBiMJn27bn4o/BZ7K4Mj7G8CXtpj/RuDFwD0MLvK6AHgEoKruAI5jcJHdJgY/oz/E328aUd5gRNLISXIB8J2qet+kC0sjxr8oJXUvyQuTPKt9LvpoBkfIf7XAZUnzYlzvMiRpvPxL4PMMPse8DvgPVfWthS1Jmh8OZUuS1BGHsiVJ6ojBLElSR7o4x7zvvvvWsmXLFroMSZK2m6uvvvruqnrCPd27COZly5axevXqhS5DkqTtJsntW2t3KFuSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSNd3Ctb2p6WnfzFaa+z9vRXzUMlkvREHjFLktSRSYM5ya5JrkpyXZIbk7y/tZ+b5LYk17bH8taeJB9JsibJ9UleMM99kCRpbExlKPsR4MiqeijJzsDXk/xNm/eHVfXZLZY/Bji4PV4EnNm+SpKkSUx6xFwDD7WnO7dHPckqxwEfb+t9A9gryeLZlypJ0vib0jnmJDsluRbYCFxWVVe2Wae14eozkuzS2pYAdwytvq61SZKkSUwpmKvqsapaDiwFDkvyPOAU4LnAC4F9gHdOZ8dJViVZnWT1pk2bple1JEljalpXZVfVfcAVwNFVtaENVz8CfAw4rC22Hth/aLWlrW3LbZ1VVSuqasXExMSMipckadxM5arsiSR7temnAq8AvrP5vHGSAK8DbmirXAy8pV2dfThwf1VtmIfaJUkaO1O5KnsxcF6SnRgE+YVVdUmSryaZAAJcC/z7tvylwLHAGuBh4K1zXrUkSWNq0mCuquuBQ7fSfuQ2li/gxNmXJknSjsc7f0mS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1ZNJgTrJrkquSXJfkxiTvb+0HJbkyyZokFyT5uda+S3u+ps1fNs99kCRpbEzliPkR4Miqej6wHDg6yeHAB4EzqurZwA+AE9ryJwA/aO1ntOUkSdIUTBrMNfBQe7pzexRwJPDZ1n4e8Lo2fVx7Tpt/VJLMVcGSJI2zKZ1jTrJTkmuBjcBlwD8C91XVo22RdcCSNr0EuAOgzb8fePoc1ixJ0tiaUjBX1WNVtRxYChwGPHe2O06yKsnqJKs3bdo0281JkjQWpnVVdlXdB1wBvBjYK8miNmspsL5Nrwf2B2jz9wTu2cq2zqqqFVW1YmJiYmbVS5I0ZqZyVfZEkr3a9FOBVwA3MwjoX2uLrQQuatMXt+e0+V+tqprDmiVJGluLJl+ExcB5SXZiEOQXVtUlSW4CPpPkA8C3gLPb8mcDn0iyBrgXOH4e6pYkaSxNGsxVdT1w6Fbab2VwvnnL9n8Cfn1OqpMkaQfjnb8kSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOjJpMCfZP8kVSW5KcmOSt7f2U5OsT3Jtexw7tM4pSdYkuSXJK+ezA5IkjZNFU1jmUeCkqromyR7A1Ukua/POqKo/GV44ySHA8cAvAs8AvpLkX1XVY3NZuCRJ42jSI+aq2lBV17TpB4GbgSVPsspxwGeq6pGqug1YAxw2F8VKkjTupnWOOcky4FDgytb0tiTXJzknyd6tbQlwx9Bq63jyIJckSc2UgznJ7sDngHdU1QPAmcCzgOXABuBD09lxklVJVidZvWnTpumsKknS2JpSMCfZmUEof6qqPg9QVXdV1WNV9RPgL3h8uHo9sP/Q6ktb28+oqrOqakVVrZiYmJhNHyRJGhtTuSo7wNnAzVX14aH2xUOL/SpwQ5u+GDg+yS5JDgIOBq6au5IlSRpfU7kq+yXAm4FvJ7m2tb0LeEOS5UABa4HfBqiqG5NcCNzE4IruE70iW5KkqZk0mKvq60C2MuvSJ1nnNOC0WdQlSdIOyTt/SZLUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHVk0mBOsn+SK5LclOTGJG9v7fskuSzJd9vXvVt7knwkyZok1yd5wXx3QpKkcTGVI+ZHgZOq6hDgcODEJIcAJwOXV9XBwOXtOcAxwMHtsQo4c86rliRpTE0azFW1oaquadMPAjcDS4DjgPPaYucBr2vTxwEfr4FvAHslWTzXhUuSNI6mdY45yTLgUOBKYL+q2tBm3Qns16aXAHcMrbautUmSpElMOZiT7A58DnhHVT0wPK+qCqjp7DjJqiSrk6zetGnTdFaVJGlsTSmYk+zMIJQ/VVWfb813bR6ibl83tvb1wP5Dqy9tbT+jqs6qqhVVtWJiYmKm9UuSNFamclV2gLOBm6vqw0OzLgZWtumVwEVD7W9pV2cfDtw/NOQtSZKexKIpLPMS4M3At5Nc29reBZwOXJjkBOB24PVt3qXAscAa4GHgrXNZsCRJ42zSYK6qrwPZxuyjtrJ8ASfOsi5JknZI3vlLkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHZk0mJOck2RjkhuG2k5Nsj7Jte1x7NC8U5KsSXJLklfOV+GSJI2jqRwxnwscvZX2M6pqeXtcCpDkEOB44BfbOn+eZKe5KlaSpHE3aTBX1deAe6e4veOAz1TVI1V1G7AGOGwW9UmStEOZzTnmtyW5vg11793algB3DC2zrrU9QZJVSVYnWb1p06ZZlCFJ0viYaTCfCTwLWA5sAD403Q1U1VlVtaKqVkxMTMywDEmSxsuMgrmq7qqqx6rqJ8Bf8Phw9Xpg/6FFl7Y2SZI0BTMK5iSLh57+KrD5iu2LgeOT7JLkIOBg4KrZlShJ0o5j0WQLJDkfOALYN8k64H3AEUmWAwWsBX4boKpuTHIhcBPwKHBiVT02L5VLkjSGJg3mqnrDVprPfpLlTwNOm01RkiTtqLzzlyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdWbTQBUgAy07+4rTXWXv6q+ahEklaWB4xS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1JFJgznJOUk2JrlhqG2fJJcl+W77undrT5KPJFmT5PokL5jP4iVJGjdTOWI+Fzh6i7aTgcur6mDg8vYc4Bjg4PZYBZw5N2VKkrRjmDSYq+prwL1bNB8HnNemzwNeN9T+8Rr4BrBXksVzVKskSWNvpueY96uqDW36TmC/Nr0EuGNouXWt7QmSrEqyOsnqTZs2zbAMSZLGy6wv/qqqAmoG651VVSuqasXExMRsy5AkaSzMNJjv2jxE3b5ubO3rgf2Hllva2iRJ0hTMNJgvBla26ZXARUPtb2lXZx8O3D805C1JkiaxaLIFkpwPHAHsm2Qd8D7gdODCJCcAtwOvb4tfChwLrAEeBt46DzVLkjS2Jg3mqnrDNmYdtZVlCzhxtkVpdC07+YsLXYIkjTTv/CVJUkcMZkmSOjLpULakmQ/Rrz39VXNciaRx5xGzJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjixa6AGmmlp38xYUuQZLmnMEsdWamf3CsPf1Vc1yJpIXgULYkSR3xiFnb5FCxJG1/HjFLktQRg1mSpI4YzJIkdcRgliSpI7O6+CvJWuBB4DHg0apakWQf4AJgGbAWeH1V/WB2ZUqStGOYi6uyX1ZVdw89Pxm4vKpOT3Jye/7OOdiPpCcxk6vo/eyz1J/5GMo+DjivTZ8HvG4e9iFJ0liabTAX8OUkVydZ1dr2q6oNbfpOYL9Z7kOSpB3GbIeyX1pV65P8C+CyJN8ZnllVlaS2tmIL8lUABxxwwCzLkCRpPMzqiLmq1revG4EvAIcBdyVZDNC+btzGumdV1YqqWjExMTGbMiRJGhszDuYkuyXZY/M08CvADcDFwMq22ErgotkWKUnSjmI2Q9n7AV9Isnk7n66qLyX5JnBhkhOA24HXz75MSZJ2DDMO5qq6FXj+VtrvAY6aTVGSJO2o/O9S0jzyP3RJmi5vySlJUkc8YpY0bd5lTJo/HjFLktQRg1mSpI44lL0D8AIk9WCmr0OHwLWjGctg9heAJGlUOZQtSVJHxvKIWdLUeJpD6o9HzJIkdcRgliSpIwazJEkdMZglSeqIF39J6poff9SOxmCWpMY/AtQDg3mIb0pJ0kLzHLMkSR0xmCVJ6ohD2ZLGknc106gymOeA/zRe0nR5TYu2xaFsSZI64hHzAnGYTdJMOEI3/jxiliSpIx4xS9IsOQKmueQRsyRJHTGYJUnqiMEsSVJH5u0cc5Kjgf8O7AR8tKpOn699SZL64OezZ29egjnJTsCfAa8A1gHfTHJxVd00H/uTJG2bYTla5mso+zBgTVXdWlX/DHwGOG6e9iVJ0tiYr6HsJcAdQ8/XAS+ap31JkubB9vwYWO8fOdueowcL9jnmJKuAVe3pQ0lumcPN7wvcPYfbWyjj0g+wL72yL30al76MSz/IB+elLwdurXG+gnk9sP/Q86Wt7aeq6izgrPnYeZLVVbViPra9PY1LP8C+9Mq+9Glc+jIu/YDt25f5Osf8TeDgJAcl+TngeODiedqXJEljY16OmKvq0SRvA/6WwcelzqmqG+djX5IkjZN5O8dcVZcCl87X9icxL0PkC2Bc+gH2pVf2pU/j0pdx6Qdsx76kqrbXviRJ0iS8JackSR0Zq2BOcnSSW5KsSXLyQtczHUnOSbIxyQ1DbfskuSzJd9vXvReyxqlKsn+SK5LclOTGJG9v7SPXnyS7JrkqyXWtL+9v7QclubK91i5oFzl2L8lOSb6V5JL2fFT7sTbJt5Ncm2R1axu51xdAkr2SfDbJd5LcnOTFo9iXJM9pP4/NjweSvGMU+wKQ5Pfae/6GJOe33wXb5f0yNsE8dBvQY4BDgDckOWRhq5qWc4Gjt2g7Gbi8qg4GLm/PR8GjwElVdQhwOHBi+1mMYn8eAY6squcDy4GjkxwOfBA4o6qeDfwAOGHhSpyWtwM3Dz0f1X4AvKyqlg99hGUUX18w+J8CX6qq5wLPZ/DzGbm+VNUt7eexHPg3wMPAFxjBviRZAvwusKKqnsfgIubj2V7vl6oaiwfwYuBvh56fApyy0HVNsw/LgBuGnt8CLG7Ti4FbFrrGGfbrIgb3TR/p/gBPA65hcBe7u4FFrf1nXnu9PhjcT+By4EjgEiCj2I9W61pg3y3aRu71BewJ3Ea73meU+7JF/b8C/J9R7QuP371yHwYXSV8CvHJ7vV/G5oiZrd8GdMkC1TJX9quqDW36TmC/hSxmJpIsAw4FrmRE+9OGf68FNgKXAf8I3FdVj7ZFRuW19qfAHwE/ac+fzmj2A6CALye5ut1FEEbz9XUQsAn4WDvF8NEkuzGafRl2PHB+mx65vlTVeuBPgO8BG4D7gavZTu+XcQrmsVaDP9FG6hL6JLsDnwPeUVUPDM8bpf5U1WM1GJ5byuAftDx3YSuaviSvBjZW1dULXcsceWlVvYDBqasTk/zS8MwRen0tAl4AnFlVhwI/ZIuh3hHqCwDtvOtrgb/cct6o9KWdBz+OwR9OzwB244mnGufNOAXzpLcBHUF3JVkM0L5uXOB6pizJzgxC+VNV9fnWPLL9Aaiq+4ArGAxh7ZVk830ARuG19hLgtUnWMvhvb0cyOLc5av0AfnpEQ1VtZHAe8zBG8/W1DlhXVVe2559lENSj2JfNjgGuqaq72vNR7MvLgduqalNV/Rj4PIP30HZ5v4xTMI/jbUAvBla26ZUMztV2L0mAs4Gbq+rDQ7NGrj9JJpLs1aafyuBc+c0MAvrX2mLd96WqTqmqpVW1jMF746tV9UZGrB8ASXZLssfmaQbnM29gBF9fVXUncEeS57Smo4CbGMG+DHkDjw9jw2j25XvA4Ume1n6fbf65bJ/3y0KfZJ/jE/bHAv/A4Bzguxe6nmnWfj6Dcxk/ZvBX9AkMzgFeDnwX+Aqwz0LXOcW+vJTBcNX1wLXtcewo9gf418C3Wl9uAN7b2p8JXAWsYTBkt8tC1zqNPh0BXDKq/Wg1X9ceN25+r4/i66vVvRxY3V5jfwXsPcJ92Q24B9hzqG1U+/J+4Dvtff8JYJft9X7xzl+SJHVknIayJUkaeQazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXk/wOn1so1cVz37wAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of age feature before capping outliers\n", "\n", "plot_hist(data, 'age')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAE/CAYAAACTomAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAYCElEQVR4nO3dfbTdVZ3f8fdHAjLjA+HhmsUkgeCQ6jBdFWlq49KpSkbLgzX84TC4HIk0s9LVUqvVWU58GKtdtsV2KSN9oGWJNfiMzFgySh1jwOXMWgMaFJAHlSsFkxhIRAgCoyP67R9nXz3EhHvuzb3J5uT9Wuuss39779/vt88ml09++/e7J6kqJElSH55ysAcgSZJ+yWCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLcyDJ/0zyJ3N0rBOSPJzksLb95SR/OBfHbsf7v0nWzNXxZnDe9yb5QZJ799H+L5Pc1z77sQd6fFIv4u8xS08syd3AIuAx4GfA7cAVwGVV9fNZHOsPq+pLM9jny8DHqupDMzlX2/fdwMlV9Qcz3XcuJTkB+DZwYlXt3Ev74cBDwMqquvlAj0/qiVfM0mj+WVU9AzgRuAj4Y+DyuT5JkgVzfcxOnADcv7dQbhYBRwK3zebgYzxvOgQZzNIMVNXuqtoI/D6wJsnfB0jykSTvbeXjknwuyYNJfpjkr5I8JclHGQTUX7Tl2rcmWZakkqxN8j3g2qG64bD5zSRfTfJQkquTHNPO9dIk24bHmOTuJL+b5Azg7cDvt/Pd3Np/sTTexvXOJPck2ZnkiiRHtbapcaxJ8r22DP2Ofc1NkqPa/rva8d7Zjv+7wCbgN9o4PrLHfn+PwdU0wINJrm31H0yytX3mG5P8ztA+705yVZKPJXkIeH07/+VJdiTZ3pbODxv9v67UB4NZmoWq+iqwDfidvTS/pbVNMLgSfPtgl3od8D0GV99Pr6r/PLTPS4DfAv7pPk55PvDPgeMZLKlfMsIYvwD8R+DT7XzP20u317fXy4BnA08H/tsefV4MPAdYBbwryW/t45T/FTiqHeclbcwXtGX7M4Hvt3G8fo9xfgf47ba5sKpOb+WvAacCxwCfAD6T5MihXVcDVwELgY8DH2EwNycDzwdeAczZvXnpQDGYpdn7PoPQ2NNPGQToiVX106r6q5r+YY53V9UjVfW3+2j/aFXdWlWPAH8CnDtHV4OvBT5QVXdV1cPA24Dz9rhaf09V/W2793sz8CsB38ZyHvC2qvpRVd0NvB943WwHVlUfq6r7q+qxqno/8FQGf0GY8jdV9X/aff5nAmcBb2rzuBO4uI1JelIxmKXZWwz8cC/1/wWYBL6Y5K4k60c41tYZtN8DHA4cN9Ion9hvtOMNH3sBgyv9KcNPUT/K4Kp6T8e1Me15rMWzHViSP0pyR5LdSR5kcDU+/JmH5+TEdv4d7RbCg8D/Ap412/NLB4vBLM1Ckn/EIHT+es+2dsX4lqp6NvAq4M1JVk017+OQ011RLx0qn8DgqvwHwCPArw+N6zAGS+ijHvf7DEJt+NiPAfdNs9+eftDGtOexts/wOAC0+8lvBc4Fjq6qhcBuIEPdhj/bVuAnwHFVtbC9nllVv430JGMwSzOQ5JlJXgl8isGvMH1zL31emeTkJGEQJj8Dpn6t6j4G92Bn6g+SnJLk14F/D1xVVT8DvgMcmeTs9itH72Sw5DvlPmBZkn39rH8S+LdJTkrydH55T/qxmQyujeVK4D8keUaSE4E3Ax+byXGGPIPBXxB2AQuSvIvBcvW+zr8D+CLw/vbf6ClJfjPJS2Z5fumgMZil0fxFkh8xuDJ7B/AB4IJ99F0OfAl4GPgb4H9U1XWt7T8B72zLrX80g/N/lMHDTfcy+LWifwODp8SBfwV8iMHV6SMMHjyb8pn2fn+Sr+/luB9ux/4K8P+AHwNvmMG4hr2hnf8uBisJn2jHn42/BL7A4C8e97RxTbfcfz5wBIPfM3+AwYNhx8/y/NJB4xeMSJLUEa+YJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjnTxL7Icd9xxtWzZsoM9DEmSDogbb7zxB1U1sbe2LoJ52bJlbNmy5WAPQ5KkAyLJPftqcylbkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjXXxX9lxbtv7zs9rv7ovOnuORSJI0M14xS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqyLTBnOQ5SW4aej2U5E1JjkmyKcmd7f3o1j9JLkkymeSWJKfN/8eQJGk8TBvMVfXtqjq1qk4F/iHwKPBZYD2wuaqWA5vbNsCZwPL2WgdcOg/jliRpLM10KXsV8N2qugdYDWxo9RuAc1p5NXBFDVwPLExy/FwMVpKkcTfTYD4P+GQrL6qqHa18L7ColRcDW4f22dbqJEnSNEYO5iRHAK8CPrNnW1UVUDM5cZJ1SbYk2bJr166Z7CpJ0tiayRXzmcDXq+q+tn3f1BJ1e9/Z6rcDS4f2W9LqHqeqLquqFVW1YmJiYuYjlyRpDM0kmF/DL5exATYCa1p5DXD1UP357enslcDuoSVvSZL0BEb616WSPA14OfAvhqovAq5Msha4Bzi31V8DnAVMMniC+4I5G60kSWNupGCuqkeAY/eou5/BU9p79i3gwjkZnSRJhxi/+UuSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIyMFc5KFSa5K8q0kdyR5YZJjkmxKcmd7P7r1TZJLkkwmuSXJafP7ESRJGh+jXjF/EPhCVT0XeB5wB7Ae2FxVy4HNbRvgTGB5e60DLp3TEUuSNMamDeYkRwH/BLgcoKr+rqoeBFYDG1q3DcA5rbwauKIGrgcWJjl+jsctSdJYGuWK+SRgF/C/k3wjyYeSPA1YVFU7Wp97gUWtvBjYOrT/tlYnSZKmMUowLwBOAy6tqucDj/DLZWsAqqqAmsmJk6xLsiXJll27ds1kV0mSxtYowbwN2FZVN7TtqxgE9X1TS9TtfWdr3w4sHdp/Sat7nKq6rKpWVNWKiYmJ2Y5fkqSxMm0wV9W9wNYkz2lVq4DbgY3Amla3Bri6lTcC57ens1cCu4eWvCVJ0hNYMGK/NwAfT3IEcBdwAYNQvzLJWuAe4NzW9xrgLGASeLT1lSRJIxgpmKvqJmDFXppW7aVvARfu37AkSTo0+c1fkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjIwVzkruTfDPJTUm2tLpjkmxKcmd7P7rVJ8klSSaT3JLktPn8AJIkjZOZXDG/rKpOraoVbXs9sLmqlgOb2zbAmcDy9loHXDpXg5Ukadztz1L2amBDK28Azhmqv6IGrgcWJjl+P84jSdIhY9RgLuCLSW5Msq7VLaqqHa18L7ColRcDW4f23dbqJEnSNBaM2O/FVbU9ybOATUm+NdxYVZWkZnLiFvDrAE444YSZ7CpJ0tga6Yq5qra3953AZ4EXAPdNLVG3952t+3Zg6dDuS1rdnse8rKpWVNWKiYmJ2X8CSZLGyLTBnORpSZ4xVQZeAdwKbATWtG5rgKtbeSNwfns6eyWwe2jJW5IkPYFRlrIXAZ9NMtX/E1X1hSRfA65Msha4Bzi39b8GOAuYBB4FLpjzUUuSNKamDeaqugt43l7q7wdW7aW+gAvnZHSSJB1i/OYvSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI6MHMxJDkvyjSSfa9snJbkhyWSSTyc5otU/tW1PtvZl8zR2SZLGzkyumN8I3DG0/T7g4qo6GXgAWNvq1wIPtPqLWz9JkjSCkYI5yRLgbOBDbTvA6cBVrcsG4JxWXt22ae2rWn9JkjSNUa+Y/xR4K/Dztn0s8GBVPda2twGLW3kxsBWgte9u/R8nybokW5Js2bVr1+xGL0nSmJk2mJO8EthZVTfO5Ymr6rKqWlFVKyYmJuby0JIkPWktGKHPi4BXJTkLOBJ4JvBBYGGSBe2qeAmwvfXfDiwFtiVZABwF3D/nI5ckaQxNe8VcVW+rqiVVtQw4D7i2ql4LXAe8unVbA1zdyhvbNq392qqqOR21JEljan9+j/mPgTcnmWRwD/nyVn85cGyrfzOwfv+GKEnSoWOUpexfqKovA19u5buAF+ylz4+B35uDsUmSdMjxm78kSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkemDeYkRyb5apKbk9yW5D2t/qQkNySZTPLpJEe0+qe27cnWvmyeP4MkSWNjlCvmnwCnV9XzgFOBM5KsBN4HXFxVJwMPAGtb/7XAA63+4tZPkiSNYNpgroGH2+bh7VXA6cBVrX4DcE4rr27btPZVSTJXA5YkaZyNdI85yWFJbgJ2ApuA7wIPVtVjrcs2YHErLwa2ArT23cCxczhmSZLG1kjBXFU/q6pTgSXAC4Dn7u+Jk6xLsiXJll27du3v4SRJGgszeiq7qh4ErgNeCCxMsqA1LQG2t/J2YClAaz8KuH8vx7qsqlZU1YqJiYnZjV6SpDEzylPZE0kWtvKvAS8H7mAQ0K9u3dYAV7fyxrZNa7+2qmoOxyxJ0thaMH0Xjgc2JDmMQZBfWVWfS3I78Kkk7wW+AVze+l8OfDTJJPBD4Lx5GLckSWNp2mCuqluA5++l/i4G95v3rP8x8HtzMjpJkg4xfvOXJEkdMZglSeqIwSxJUkcMZkmSOjLKU9mHjGXrPz/jfe6+6Ox5GIkk6VDlFbMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1JFpgznJ0iTXJbk9yW1J3tjqj0myKcmd7f3oVp8klySZTHJLktPm+0NIkjQuRrlifgx4S1WdAqwELkxyCrAe2FxVy4HNbRvgTGB5e60DLp3zUUuSNKamDeaq2lFVX2/lHwF3AIuB1cCG1m0DcE4rrwauqIHrgYVJjp/rgUuSNI5mdI85yTLg+cANwKKq2tGa7gUWtfJiYOvQbttanSRJmsbIwZzk6cCfAW+qqoeG26qqgJrJiZOsS7IlyZZdu3bNZFdJksbWSMGc5HAGofzxqvrzVn3f1BJ1e9/Z6rcDS4d2X9LqHqeqLquqFVW1YmJiYrbjlyRprIzyVHaAy4E7quoDQ00bgTWtvAa4eqj+/PZ09kpg99CStyRJegILRujzIuB1wDeT3NTq3g5cBFyZZC1wD3Bua7sGOAuYBB4FLpjLAUuSNM6mDeaq+msg+2hetZf+BVy4n+OSJOmQ5Dd/SZLUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHVk2mBO8uEkO5PcOlR3TJJNSe5s70e3+iS5JMlkkluSnDafg5ckadyMcsX8EeCMPerWA5urajmwuW0DnAksb691wKVzM0xJkg4N0wZzVX0F+OEe1auBDa28AThnqP6KGrgeWJjk+DkaqyRJY2+295gXVdWOVr4XWNTKi4GtQ/22tbpfkWRdki1JtuzatWuWw5Akabzs98NfVVVAzWK/y6pqRVWtmJiY2N9hSJI0FmYbzPdNLVG3952tfjuwdKjfklYnSZJGMNtg3gisaeU1wNVD9ee3p7NXAruHlrwlSdI0FkzXIckngZcCxyXZBvw74CLgyiRrgXuAc1v3a4CzgEngUeCCeRizJElja9pgrqrX7KNp1V76FnDh/g5KkqRDld/8JUlSRwxmSZI6YjBLktQRg1mSpI4YzJIkdWTap7I1P5at//wBO9fdF519wM4lSdo/XjFLktQRr5j304G88pUkjT+vmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHVlwsAeg+bds/edntd/dF53d9bkkaRwZzNqn2YbsgTrXkyHMe/9c/kVK6s+8LGUnOSPJt5NMJlk/H+eQJGkczXkwJzkM+O/AmcApwGuSnDLX55EkaRzNx1L2C4DJqroLIMmngNXA7fNwLh3CxnUZdlw/l9SL3m8xzUcwLwa2Dm1vA/7xPJxH0pAD+UyApPlz0B7+SrIOWNc2H07y7Tk8/HHAD+bweIeasZ6/vG9eDz/WczdlnubwkJi7eeLczd5IczcPf+ZP3FfDfATzdmDp0PaSVvc4VXUZcNk8nJ8kW6pqxXwc+1Dg/M2eczd7zt3sOXez1+PczcdT2V8Dlic5KckRwHnAxnk4jyRJY2fOr5ir6rEk/xr4S+Aw4MNVddtcn0eSpHE0L/eYq+oa4Jr5OPaI5mWJ/BDi/M2eczd7zt3sOXez193cpaoO9hgkSVLjP2IhSVJHxi6Y/TrQJ5bkw0l2Jrl1qO6YJJuS3Nnej271SXJJm8tbkpx28EZ+8CVZmuS6JLcnuS3JG1u98zeNJEcm+WqSm9vcvafVn5TkhjZHn24PjJLkqW17srUvO6gfoANJDkvyjSSfa9vO3YiS3J3km0luSrKl1XX7cztWwezXgY7kI8AZe9StBzZX1XJgc9uGwTwub691wKUHaIy9egx4S1WdAqwELmx/vpy/6f0EOL2qngecCpyRZCXwPuDiqjoZeABY2/qvBR5o9Re3foe6NwJ3DG07dzPzsqo6dehXo7r9uR2rYGbo60Cr6u+Aqa8DVVNVXwF+uEf1amBDK28Azhmqv6IGrgcWJjn+gAy0Q1W1o6q+3so/YvA/ycU4f9Nqc/Bw2zy8vQo4Hbiq1e85d1NzehWwKkkOzGj7k2QJcDbwobYdnLv91e3P7bgF896+DnTxQRrLk8miqtrRyvcCi1rZ+dyHtjz4fOAGnL+RtKXYm4CdwCbgu8CDVfVY6zI8P7+Yu9a+Gzj2gA64L38KvBX4eds+FuduJgr4YpIb27dOQsc/t/57zHqcqqokPqr/BJI8Hfgz4E1V9dDwxYjzt29V9TPg1CQLgc8Czz24I3pySPJKYGdV3ZjkpQd5OE9WL66q7UmeBWxK8q3hxt5+bsftinmkrwPVr7hvaqmmve9s9c7nHpIcziCUP15Vf96qnb8ZqKoHgeuAFzJYJpy6QBien1/MXWs/Crj/wI60Gy8CXpXkbga3504HPohzN7Kq2t7edzL4S+EL6PjndtyC2a8DnZ2NwJpWXgNcPVR/fntKcSWwe2jp55DT7tNdDtxRVR8YanL+ppFkol0pk+TXgJczuEd/HfDq1m3PuZua01cD19Yh+qULVfW2qlpSVcsY/D/t2qp6Lc7dSJI8LckzpsrAK4Bb6fnntqrG6gWcBXyHwf2rdxzs8fT2Aj4J7AB+yuDeyVoG9582A3cCXwKOaX3D4Cn37wLfBFYc7PEf5Ll7MYN7VbcAN7XXWc7fSHP3D4BvtLm7FXhXq3828FVgEvgM8NRWf2Tbnmztzz7Yn6GHF/BS4HPO3Yzm7NnAze1121Qu9Pxz6zd/SZLUkXFbypYk6UnNYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjvx/csKUYAcL4/kAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of fare feature before capping outliers\n", "\n", "plot_hist(data, 'fare')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max age: 80.0\n", "Max fare: 512.3292\n", "Min age: 0.1667\n", "Min fare: 0.0\n" ] } ], "source": [ "# let's find out the maximum&minimum Age and maximum Fare in the titanic\n", "print(\"Max age:\", data.age.max())\n", "print(\"Max fare:\", data.fare.max())\n", "\n", "print(\"Min age:\", data.age.min())\n", "print(\"Min fare:\", data.fare.min())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Maximum capping" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ArbitraryOutlierCapper(max_capping_dict={'age': 50, 'fare': 150})" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'''Parameters\n", "----------\n", "max_capping_dict : dictionary, default=None\n", " Dictionary containing the user specified capping values for the right tail of\n", " the distribution of each variable (maximum values).\n", "\n", "min_capping_dict : dictionary, default=None\n", " Dictionary containing user specified capping values for the eft tail of the\n", " distribution of each variable (minimum values).\n", "\n", "missing_values : string, default='raise'\n", " Indicates if missing values should be ignored or raised. If\n", " `missing_values='raise'` the transformer will return an error if the\n", " training or the datasets to transform contain missing values.\n", "'''\n", "\n", "# capping of age and fare features at right tail\n", "capper = ArbitraryOutlierCapper(\n", " max_capping_dict={'age': 50, 'fare': 150}, min_capping_dict=None)\n", "\n", "capper.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maximum caps: {'age': 50, 'fare': 150}\n" ] } ], "source": [ "# here we can find the maximum caps allowed\n", "print(\"Maximum caps:\", capper.right_tail_caps_)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this dictionary is empty, because we selected only right tail\n", "capper.left_tail_caps_" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max age after capping: 50.0\n", "Max fare after capping: 150.0\n" ] } ], "source": [ "# transforming train and test data\n", "train_t = capper.transform(X_train)\n", "test_t = capper.transform(X_test)\n", "\n", "#check max age and max fare after capping\n", "print(\"Max age after capping:\", train_t.age.max())\n", "print(\"Max fare after capping:\", train_t.fare.max())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Minimum capping" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ArbitraryOutlierCapper(min_capping_dict={'age': 10, 'fare': 100})" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# capping outliers at left tail\n", "capper = ArbitraryOutlierCapper(\n", " max_capping_dict=None, min_capping_dict={'age': 10, 'fare': 100})\n", "\n", "capper.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this dictionary is empty, because we selected only right tail\n", "capper.right_tail_caps_" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': 10, 'fare': 100}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here we can find the minimum caps allowed\n", "capper.left_tail_caps_" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Min age: 10.0\n", "Min fare: 100.0\n" ] } ], "source": [ "# transforming train and test set\n", "train_t = capper.transform(X_train)\n", "test_t = capper.transform(X_test)\n", "\n", "# After capping\n", "print(\"Min age:\", train_t.age.min())\n", "print(\"Min fare:\", train_t.fare.min())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Both ends capping" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ArbitraryOutlierCapper(max_capping_dict={'age': 60, 'fare': 150},\n", " min_capping_dict={'age': 5, 'fare': 5})" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# capping outliers at both tails\n", "capper = ArbitraryOutlierCapper(\n", " min_capping_dict={'age': 5, 'fare': 5},\n", " max_capping_dict={'age': 60, 'fare': 150})\n", "capper.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': 60, 'fare': 150}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here we can find the maximum caps allowed\n", "capper.right_tail_caps_" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': 5, 'fare': 5}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here we can find the minimum caps allowed\n", "capper.left_tail_caps_" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Max age: 60.0\n", "Max fare: 150.0\n", "Min age: 5.0\n", "Min fare: 5.0\n" ] } ], "source": [ "# transforming train and test data\n", "train_t = capper.transform(X_train)\n", "test_t = capper.transform(X_test)\n", "\n", "# After capping outliers\n", "print(\"Max age:\", train_t.age.max())\n", "print(\"Max fare:\", train_t.fare.max())\n", "\n", "print(\"Min age:\", train_t.age.min())\n", "print(\"Min fare:\", train_t.fare.min())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAE/CAYAAACTomAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUWklEQVR4nO3dfbDmZV3H8fdHwEwhAXfbcGFZ1J2KmkRnNZzMUCrxKWymCMeHlXC2KSpLrPApqJGimqwstUgI1FTIh6AkExEHnVJaVBRBc8NFdl12FxDlwbTFb3/c18rN4SznPg/3Odfe5/2aOXN+9/V7+u61e+/nXNfvd/9OqgpJktSHhyx1AZIk6T4GsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWVoASf42yesW6FhrktyVZL/2+qNJXrYQx27H+7ckGxbqeLM47+uT3JrklsU+t7QviZ9jlh5cki3AKmA3cC9wPfA24Nyq+s4cjvWyqvrwLPb5KPCOqnrrbM7V9j0LeFxVvWi2+y6kJGuALwJHVtXOpaxF6p0jZmk0z6uqg4AjgXOA3wPOW+iTJNl/oY/ZiTXAbYayNDODWZqFqvp6VV0K/BKwIcmPAiS5IMnr2/KKJP+a5I4ktyf5WJKHJHk7g4D6lzZV/btJ1iapJKcm+QrwkaG24ZB+bJKrk3wjySVJDm3nOi7J1uEak2xJ8tNJTgBeDfxSO9+1bf13p8ZbXa9NclOSnUneluSRbd2eOjYk+Uqbhn7N3vomySPb/rva8V7bjv/TwOXAo1sdF0yz7yGtz3Yl+VpbPnxo/VFJrkpyZ5IPJ3lTkncMrT82yX+0Pr82yXEj/pVK3TGYpTmoqquBrcBPTrP69LZuJYMp8FcPdqkXA19hMPo+sKr+dGifnwJ+GHjmXk75EuCXgcMYTKm/cYQaPwj8EXBRO9/jp9nspe3r6cBjgAOBv5myzVOBHwSOB34/yQ/v5ZR/DTyyHeenWs2ntGn7ZwFfbXW8dJp9HwL8A4MZiTXAN6fU8U7gauBRwFnAi/esSLIa+ADweuBQ4JXAe5Os3EudUtcMZmnuvsogCKb6PwYBemRV/V9VfaxmvpnjrKq6u6q+uZf1b6+q66rqbuB1wEl7bg6bpxcCb6iqG6vqLuBVwMlTRut/UFXfrKprgWuBBwR8q+Vk4FVVdWdVbQH+nKEAfTBVdVtVvbeq7qmqO4GzGYT7nuvTTwJ+v6q+XVUfBy4d2v1FwGVVdVlVfaeqLgc2Ac+eTUdIvTCYpblbDdw+TfufAZuBDyW5MckZIxzr5lmsvwk4AFgxUpUP7tHteMPH3p/BSH+P4buo72Ewqp5qRatp6rFWj1JEkocn+bs2Bf4N4Crg4Bb4jwZur6p7hnYZ7o8jgV9s09h3JLmDwSj/sFHOLfXGYJbmIMmTGITOx6euayPG06vqMcDPAa9Icvye1Xs55Ewj6iOGltcwGJXfCtwNPHyorv0YTKGPetyvMgi24WPvBnbMsN9Ut7aaph5r24j7n85guvzHq+r7gKe19gDbgUOTPHxo++H+uJnBjMLBQ1+PqKpzZvlnkLpgMEuzkOT7kjwXeDeDjzB9bpptnpvkcUkCfJ3BR6z2fKxqB4NrsLP1oiRHt3D6Q+A9VXUv8N/Aw5I8J8kBwGuB7xnabwewNsne3uvvAn673Vx1IPddk949m+JaLRcDZyc5KMmRwCuAdzz4nt91EIPryne0G9vOHDr2TQymps9K8tAkTwGeN7TvO4DnJXlmkv2SPKzdFHc40j7IYJZG8y9J7mQwOnsN8AbglL1suw74MHAX8J/Am6vqyrbuj4HXtinXV87i/G8HLmAwrfww4DdhcJc48GvAWxmMTu9mcOPZHv/Uvt+W5FPTHPf8duyrgC8D/wv8xizqGvYb7fw3MphJeGc7/ij+EvheBiPvTwAfnLL+hcBTgNsY3OR1EfAtgKq6GTiRwU12uxj8Hf0O/v+mfZQPGJG0z0lyEfCFqjpzxo2lfYw/UUrqXpInJXls+1z0CQxGyP+8xGVJYzGpTxmSNFl+AHgfg88xbwV+tao+vbQlSePhVLYkSR1xKluSpI4YzJIkdaSLa8wrVqyotWvXLnUZkiQtmmuuuebWqnrAM927COa1a9eyadOmpS5DkqRFk+Sm6dqdypYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6kgXz8qWJtXaMz4w6322nPOcMVQiaV/hiFmSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdmTGYkxyR5Mok1yf5fJKXt/ZDk1ye5Evt+yGtPUnemGRzks8meeK4/xCSJE2KUUbMu4HTq+po4FjgtCRHA2cAV1TVOuCK9hrgWcC69rUReMuCVy1J0oSaMZirantVfaot3wncAKwGTgQubJtdCDy/LZ8IvK0GPgEcnOSwhS5ckqRJNKtrzEnWAk8APgmsqqrtbdUtwKq2vBq4eWi3ra1NkiTNYORgTnIg8F7gt6rqG8PrqqqAms2Jk2xMsinJpl27ds1mV0mSJtZIwZzkAAah/I9V9b7WvGPPFHX7vrO1bwOOGNr98NZ2P1V1blWtr6r1K1eunGv9kiRNlFHuyg5wHnBDVb1haNWlwIa2vAG4ZKj9Je3u7GOBrw9NeUuSpAex/wjb/ATwYuBzST7T2l4NnANcnORU4CbgpLbuMuDZwGbgHuCUhSxYkqRJNmMwV9XHgexl9fHTbF/AafOsS5KkZcknf0mS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHZgzmJOcn2ZnkuqG2s5JsS/KZ9vXsoXWvSrI5yReTPHNchUuSNIlGGTFfAJwwTftfVNUx7esygCRHAycDP9L2eXOS/RaqWEmSJt2MwVxVVwG3j3i8E4F3V9W3qurLwGbgyfOoT5KkZWU+15h/Pcln21T3Ia1tNXDz0DZbW5skSRrBXIP5LcBjgWOA7cCfz/YASTYm2ZRk065du+ZYhiRJk2VOwVxVO6rq3qr6DvD33DddvQ04YmjTw1vbdMc4t6rWV9X6lStXzqUMSZImzpyCOclhQy9/Hthzx/alwMlJvifJUcA64Or5lShJ0vKx/0wbJHkXcBywIslW4EzguCTHAAVsAX4FoKo+n+Ri4HpgN3BaVd07lsolSZpAMwZzVb1gmubzHmT7s4Gz51OUJEnLlU/+kiSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdWTGYE5yfpKdSa4bajs0yeVJvtS+H9Lak+SNSTYn+WySJ46zeEmSJs0oI+YLgBOmtJ0BXFFV64Ar2muAZwHr2tdG4C0LU6YkScvDjMFcVVcBt09pPhG4sC1fCDx/qP1tNfAJ4OAkhy1QrZIkTby5XmNeVVXb2/ItwKq2vBq4eWi7ra1NkiSNYN43f1VVATXb/ZJsTLIpyaZdu3bNtwxJkibCXIN5x54p6vZ9Z2vfBhwxtN3hre0BqurcqlpfVetXrlw5xzIkSZoscw3mS4ENbXkDcMlQ+0va3dnHAl8fmvKWJEkz2H+mDZK8CzgOWJFkK3AmcA5wcZJTgZuAk9rmlwHPBjYD9wCnjKFmSZIm1ozBXFUv2Muq46fZtoDT5luUJEnLlU/+kiSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkj+y91AdJiW3vGB2a9z5ZznjOGSiTpgRwxS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHZnXXdlJtgB3AvcCu6tqfZJDgYuAtcAW4KSq+tr8ypQkaXlYiBHz06vqmKpa316fAVxRVeuAK9prSZI0gnFMZZ8IXNiWLwSeP4ZzSJI0keYbzAV8KMk1STa2tlVVtb0t3wKsmuc5JElaNub75K+nVtW2JN8PXJ7kC8Mrq6qS1HQ7tiDfCLBmzZp5liFNjrk8mQx8Opk0KeYVzFW1rX3fmeT9wJOBHUkOq6rtSQ4Ddu5l33OBcwHWr18/bXhr32OoSNL8zHkqO8kjkhy0Zxn4WeA64FJgQ9tsA3DJfIuUJGm5mM+IeRXw/iR7jvPOqvpgkv8CLk5yKnATcNL8y5QeaK6jc0nq2ZyDuapuBB4/TfttwPHzKUqSpOXKX/uoLjj6laQBH8kpSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjflxKmhBz+ciZj0KV+jORwezzmiVJ+yqnsiVJ6ojBLElSRwxmSZI6MpHXmCWNlzeaSePjiFmSpI4YzJIkdcRgliSpI15jlpYxfw+2lqPe75FwxCxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEu7Ildc3fFqflxhGzJEkdccQsjcDP+0paLAaz9sow0r6s94dISHtjMEvSPPlDgBaS15glSeqII2ZJmnDe2b5vccQsSVJHDGZJkjriVLYkNX4SQT1wxCxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEu7IlaQn40A/tjSNmSZI64ohZkrRgnAmYP4NZ0qLw4R3SaJzKliSpI46YF4C/i1XSJHKWY2kYzEvE6zCS5mJSw9IBzn2cypYkqSOOmJeBSf0JW5ImkcG8jzFkJWlgUv8/NJiHTOpfsiRp3zG2a8xJTkjyxSSbk5wxrvNIkjRJxhLMSfYD3gQ8CzgaeEGSo8dxLkmSJsm4RsxPBjZX1Y1V9W3g3cCJYzqXJEkTY1zBvBq4eej11tYmSZIexJLd/JVkI7CxvbwryReXqpZFsgK4damL2AfYT6Oxn0ZjP43GfppB/mQsfXTkdI3jCuZtwBFDrw9vbd9VVecC547p/N1Jsqmq1i91Hb2zn0ZjP43GfhqN/TSzxeyjcU1l/xewLslRSR4KnAxcOqZzSZI0McYyYq6q3Ul+Hfh3YD/g/Kr6/DjOJUnSJBnbNeaqugy4bFzH3wctm2n7ebKfRmM/jcZ+Go39NLNF66NU1WKdS5IkzcDfLiVJUkcM5jFIcn6SnUmuG2o7NMnlSb7Uvh+ylDX2IMkRSa5Mcn2Szyd5eWu3r5okD0tydZJrWx/9QWs/Kskn2yNvL2o3WS57SfZL8ukk/9pe209TJNmS5HNJPpNkU2vzPTdFkoOTvCfJF5LckOQpi9VPBvN4XACcMKXtDOCKqloHXNFeL3e7gdOr6mjgWOC09uhW++o+3wKeUVWPB44BTkhyLPAnwF9U1eOArwGnLl2JXXk5cMPQa/tpek+vqmOGPv7je+6B/gr4YFX9EPB4Bv+uFqWfDOYxqKqrgNunNJ8IXNiWLwSev5g19aiqtlfVp9rynQz+4a/GvvquGrirvTygfRXwDOA9rX1Z99EeSQ4HngO8tb0O9tOofM8NSfJI4GnAeQBV9e2quoNF6ieDefGsqqrtbfkWYNVSFtObJGuBJwCfxL66nzY9+xlgJ3A58D/AHVW1u23iI28H/hL4XeA77fWjsJ+mU8CHklzTnsAIvuemOgrYBfxDuzTy1iSPYJH6yWBeAjW4Fd7b4ZskBwLvBX6rqr4xvM6+gqq6t6qOYfAEvScDP7S0FfUnyXOBnVV1zVLXsg94alU9kcFv/zstydOGV/qeAwYfJX4i8JaqegJwN1OmrcfZTwbz4tmR5DCA9n3nEtfThSQHMAjlf6yq97Vm+2oabSrtSuApwMFJ9jyH4AGPvF2GfgL4uSRbGPw2u2cwuEZoP01RVdva953A+xn8sOd77v62Alur6pPt9XsYBPWi9JPBvHguBTa05Q3AJUtYSxfaNcDzgBuq6g1Dq+yrJsnKJAe35e8FfobBtfgrgV9omy3rPgKoqldV1eFVtZbBI4A/UlUvxH66nySPSHLQnmXgZ4Hr8D13P1V1C3Bzkh9sTccD17NI/eQDRsYgybuA4xj8xpYdwJnAPwMXA2uAm4CTqmrqDWLLSpKnAh8DPsd91wVfzeA6s30FJPkxBjeZ7MfgB+mLq+oPkzyGwcjwUODTwIuq6ltLV2k/khwHvLKqnms/3V/rj/e3l/sD76yqs5M8Ct9z95PkGAY3Ej4UuBE4hfYeZMz9ZDBLktQRp7IlSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHfl/2rdSxJSzL0kAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of age feature after capping outliers\n", "plot_hist(train_t, 'age')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAE/CAYAAACTomAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZJElEQVR4nO3de7BlZX3m8e8jjaCiNthHBptLo5IYtEagOgwpzWjACxdHSFWGYBlpDanOzGCiCRkDaoxOyQQnUUZnRhIiBPCGiBo6ikZEUsaqCDYEkItoC41029CtyE0TIvibP/bbsm3O6XPv857d30/VrrPWu26/96zu/Zx12WunqpAkSX14wkIXIEmSHmMwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYpTmQ5C+T/MkcrWv/JA8l2aWN/0OS35mLdbf1fT7Jqrla3zS2++4k309y9wTT/2uSe1rfn7Gj65N6ET/HLG1fkvXA3sAjwKPALcBFwLlV9dMZrOt3qupL01jmH4CPVNWHprOttuw7gedW1W9Nd9m5lGR/4DbggKraPM70XYEHgCOq6oYdXZ/UE4+Ypan5T1X1VOAA4Czgj4Hz5nojSZbM9To7sT/wg/FCudkb2B24eSYrH+Hfm3ZCBrM0DVV1f1WtAX4TWJXkBQBJLkjy7ja8LMlnk9yX5N4k/5jkCUk+zCCg/q6drn1LkhVJKskpSb4LfHmobThsnpPkmiQPJLksyV5tWy9NsmG4xiTrk7wsydHAW4HfbNu7oU3/2anxVtfbk9yZZHOSi5I8vU3bWseqJN9tp6HfNtHvJsnT2/Jb2vre3tb/MuAK4Fmtjgu2We4XGBxNA9yX5Mut/f1J7mp9vjbJrw4t884klyb5SJIHgNe37Z+XZFOSje3U+S5T37tSHwxmaQaq6hpgA/Cr40w+rU0bY3Ak+NbBIvU64LsMjr73qKr/NbTMS4BfAl45wSZPBn4b2IfBKfUPTKHGLwD/E/hE294Lx5nt9e31a8CzgT2A/7vNPC8GfhE4CnhHkl+aYJP/B3h6W89LWs1vaKftjwG+1+p4/TZ1fgt4fhtdWlVHtuGvA4cAewEfAz6ZZPehRY8HLgWWAh8FLmDwu3kucCjwCmDOrs1LO4rBLM3c9xiExrZ+wiBAD6iqn1TVP9bkN3O8s6p+VFX/MsH0D1fVTVX1I+BPgBPn6GjwtcD7qur2qnoIOAM4aZuj9XdV1b+0a783AI8L+FbLScAZVfVgVa0H3gu8bqaFVdVHquoHVfVIVb0X2I3BHwhb/VNV/W27zv804Fjgze33uBk4u9UkLSoGszRzy4F7x2n/c2Ad8MUktyc5fQrrumsa0+8EdgWWTanK7XtWW9/wupcwONLfavgu6h8zOKre1rJW07brWj7TwpL8UZJbk9yf5D4GR+PDfR7+nRzQtr+pXUK4D/gr4Jkz3b60UAxmaQaS/DKD0PnqttPaEeNpVfVs4NXAHyY5auvkCVY52RH1fkPD+zM4Kv8+8CPgyUN17cLgFPpU1/s9BqE2vO5HgHsmWW5b3281bbuujdNcDwDtevJbgBOBPatqKXA/kKHZhvt2F/AwsKyqlrbX06rq+UiLjMEsTUOSpyV5FXAxg48wfWOceV6V5LlJwiBMHgW2fqzqHgbXYKfrt5IcnOTJwP8ALq2qR4FvAbsnOa595OjtDE75bnUPsCLJRP/XPw78QZIDk+zBY9ekH5lOca2WS4Azkzw1yQHAHwIfmc56hjyVwR8IW4AlSd7B4HT1RNvfBHwReG/bR09I8pwkL5nh9qUFYzBLU/N3SR5kcGT2NuB9wBsmmPcg4EvAQ8A/AR+sqqvatD8D3t5Ot/7RNLb/YQY3N93N4GNFvw+Du8SB/wZ8iMHR6Y8Y3Hi21Sfbzx8kuW6c9Z7f1v0V4A7gX4Hfm0Zdw36vbf92BmcSPtbWPxN/D3yBwR8ed7a6JjvdfzLwRAafM/8hgxvD9pnh9qUF4wNGJEnqiEfMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSR7r4RpZly5bVihUrFroMSZJ2mGuvvfb7VTW2bXsXwbxixQrWrl270GVIkrTDJLlzvPZJT2Un2b193dwNSW5O8q7WfkGSO5Jc316HtPYk+UCSdUluTHLYnPZEkqQRNpUj5oeBI6vqofbIv68m+Xyb9t+r6tJt5j+GwZOPDgL+A3BO+ylJkiYx6RFzDTzURndtr+09Lux44KK23NeApUl8LJ4kSVMwpbuyk+yS5HpgM3BFVV3dJp3ZTlefnWTrg/OX8/PPtN3AOF/9lmR1krVJ1m7ZsmXmPZAkaYRMKZir6tGqOgTYFzg8yQsYfKH684BfZvBl8X88nQ1X1blVtbKqVo6NPe6mNEmSdkrT+hxzVd0HXAUcXVWb2unqh4G/AQ5vs23k5787dl9m+J2skiTtbKZyV/ZYkqVt+EnAy4Fvbr1u3L5z9gTgprbIGuDkdnf2EcD97btSJUnSJKZyV/Y+wIVJdmEQ5JdU1WeTfDnJGBDgeuC/tPkvB44F1gE/ZuLvrJUkSduYNJir6kbg0HHaj5xg/gJOnX1pkiTtfHxWtiRJHTGYJUnqSBfPyp5rK07/3IyWW3/WcXNciSRJ0+MRsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqyKTBnGT3JNckuSHJzUne1doPTHJ1knVJPpHkia19tza+rk1fMc99kCRpZEzliPlh4MiqeiFwCHB0kiOA9wBnV9VzgR8Cp7T5TwF+2NrPbvNJkqQpmDSYa+ChNrprexVwJHBpa78QOKENH9/GadOPSpK5KliSpFE2pWvMSXZJcj2wGbgC+A5wX1U90mbZACxvw8uBuwDa9PuBZ4yzztVJ1iZZu2XLlll1QpKkUTGlYK6qR6vqEGBf4HDgebPdcFWdW1Urq2rl2NjYbFcnSdJImNZd2VV1H3AV8CvA0iRL2qR9gY1teCOwH0Cb/nTgB3NRrCRJo24qd2WPJVnahp8EvBy4lUFA/0abbRVwWRte08Zp079cVTWHNUuSNLKWTD4L+wAXJtmFQZBfUlWfTXILcHGSdwP/DJzX5j8P+HCSdcC9wEnzULckSSNp0mCuqhuBQ8dpv53B9eZt2/8V+M9zUp0kSTsZn/wlSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1ZNJgTrJfkquS3JLk5iRvau3vTLIxyfXtdezQMmckWZfktiSvnM8OSJI0SpZMYZ5HgNOq6rokTwWuTXJFm3Z2Vf3F8MxJDgZOAp4PPAv4UpJfqKpH57JwSZJG0aRHzFW1qaqua8MPArcCy7ezyPHAxVX1cFXdAawDDp+LYiVJGnXTusacZAVwKHB1a3pjkhuTnJ9kz9a2HLhraLENjBPkSVYnWZtk7ZYtW6ZfuSRJI2jKwZxkD+BTwJur6gHgHOA5wCHAJuC909lwVZ1bVSurauXY2Nh0FpUkaWRNKZiT7MoglD9aVZ8GqKp7qurRqvop8Nc8drp6I7Df0OL7tjZJkjSJqdyVHeA84Naqet9Q+z5Ds/06cFMbXgOclGS3JAcCBwHXzF3JkiSNrqnclf0i4HXAN5Jc39reCrwmySFAAeuB3wWoqpuTXALcwuCO7lO9I1uSpKmZNJir6qtAxpl0+XaWORM4cxZ1SZK0U/LJX5IkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcmDeYk+yW5KsktSW5O8qbWvleSK5J8u/3cs7UnyQeSrEtyY5LD5rsTkiSNiqkcMT8CnFZVBwNHAKcmORg4Hbiyqg4CrmzjAMcAB7XXauCcOa9akqQRNWkwV9WmqrquDT8I3AosB44HLmyzXQic0IaPBy6qga8BS5PsM9eFS5I0iqZ1jTnJCuBQ4Gpg76ra1CbdDezdhpcDdw0ttqG1SZKkSUw5mJPsAXwKeHNVPTA8raoKqOlsOMnqJGuTrN2yZct0FpUkaWRNKZiT7MoglD9aVZ9uzfdsPUXdfm5u7RuB/YYW37e1/ZyqOreqVlbVyrGxsZnWL0nSSJnKXdkBzgNurar3DU1aA6xqw6uAy4baT253Zx8B3D90yluSJG3HkinM8yLgdcA3klzf2t4KnAVckuQU4E7gxDbtcuBYYB3wY+ANc1mwJEmjbNJgrqqvAplg8lHjzF/AqbOsS5KknZJP/pIkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjoyaTAnOT/J5iQ3DbW9M8nGJNe317FD085Isi7JbUleOV+FS5I0iqZyxHwBcPQ47WdX1SHtdTlAkoOBk4Dnt2U+mGSXuSpWkqRRN2kwV9VXgHunuL7jgYur6uGqugNYBxw+i/okSdqpzOYa8xuT3NhOde/Z2pYDdw3Ns6G1SZKkKZhpMJ8DPAc4BNgEvHe6K0iyOsnaJGu3bNkywzIkSRotMwrmqrqnqh6tqp8Cf81jp6s3AvsNzbpvaxtvHedW1cqqWjk2NjaTMiRJGjkzCuYk+wyN/jqw9Y7tNcBJSXZLciBwEHDN7EqUJGnnsWSyGZJ8HHgpsCzJBuBPgZcmOQQoYD3wuwBVdXOSS4BbgEeAU6vq0XmpXJKkETRpMFfVa8ZpPm87858JnDmboiRJ2ln55C9JkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjk96VvTNZcfrnZrTc+rOOm+NKJEk7K4+YJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSOTBnOS85NsTnLTUNteSa5I8u32c8/WniQfSLIuyY1JDpvP4iVJGjVTOWK+ADh6m7bTgSur6iDgyjYOcAxwUHutBs6ZmzIlSdo5TBrMVfUV4N5tmo8HLmzDFwInDLVfVANfA5Ym2WeOapUkaeTN9Brz3lW1qQ3fDezdhpcDdw3Nt6G1SZKkKZj1zV9VVUBNd7kkq5OsTbJ2y5Ytsy1DkqSRMNNgvmfrKer2c3Nr3wjsNzTfvq3tcarq3KpaWVUrx8bGZliGJEmjZabBvAZY1YZXAZcNtZ/c7s4+Arh/6JS3JEmaxJLJZkjyceClwLIkG4A/Bc4CLklyCnAncGKb/XLgWGAd8GPgDfNQsyRJI2vSYK6q10ww6ahx5i3g1NkWJUnSzsonf0mS1BGDWZKkjhjMkiR1xGCWJKkjk978pb6sOP1z015m/VnHzUMlkqT54BGzJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqiMEsSVJHDGZJkjpiMEuS1BGDWZKkjhjMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktQRg1mSpI4YzJIkdcRgliSpI0tms3CS9cCDwKPAI1W1MslewCeAFcB64MSq+uHsypQkaecwF0fMv1ZVh1TVyjZ+OnBlVR0EXNnGJUnSFMzHqezjgQvb8IXACfOwDUmSRtKsTmUDBXwxSQF/VVXnAntX1aY2/W5g71luo3srTv/cQpcgSRoRsw3mF1fVxiTPBK5I8s3hiVVVLbQfJ8lqYDXA/vvvP8syJEkaDbM6lV1VG9vPzcBngMOBe5LsA9B+bp5g2XOramVVrRwbG5tNGZIkjYwZB3OSpyR56tZh4BXATcAaYFWbbRVw2WyLlCRpZzGbU9l7A59JsnU9H6uqLyT5OnBJklOAO4ETZ1+mJEk7hxkHc1XdDrxwnPYfAEfNpihJknZWPvlLkqSOGMySJHXEYJYkqSMGsyRJHTGYJUnqyGyf/CVJ0qIyk8corz/ruHmoZHwG805gps/y3pH/ECVJA57KliSpIwazJEkdMZglSeqIwSxJUkcMZkmSOmIwS5LUEYNZkqSOGMySJHXEB4xo0fLBKZJGkUfMkiR1xGCWJKkjnsqWdmJeDpD6YzBrTi2GN/rev1lG0s7NYNaEZhqykqSZ8xqzJEkdMZglSeqIp7KlziyG6/SS5o/BLI0I7wmQRoOnsiVJ6ojBLElSRzyVrS70fhrW674/b0fur1H9HUoT8YhZkqSOzNsRc5KjgfcDuwAfqqqz5mtbkrSQfJrcYzy7NHvzEsxJdgH+H/ByYAPw9SRrquqW+diepNHlG/3OwT9uHjNfR8yHA+uq6naAJBcDxwMGs3YqvV8718/bkfvL6/SayHxdY14O3DU0vqG1SZKk7Viwu7KTrAZWt9GHktw2zmzLgO/vuKp2OPu3uNm/juU9k86yqPs3BT/r3xR+F4vRsrxnx+2/efodHjBe43wF80Zgv6HxfVvbz1TVucC521tJkrVVtXLuy+uD/Vvc7N/iZv8Wt1Hu33ydyv46cFCSA5M8ETgJWDNP25IkaWTMyxFzVT2S5I3A3zP4uNT5VXXzfGxLkqRRMm/XmKvqcuDyWa5mu6e6R4D9W9zs3+Jm/xa3ke1fqmqha5AkSY2P5JQkqSPdBnOSo5PclmRdktMXup7ZSrJfkquS3JLk5iRvau17Jbkiybfbzz0XutbZSLJLkn9O8tk2fmCSq9t+/ES7GXBRSrI0yaVJvpnk1iS/Mkr7L8kftH+bNyX5eJLdF/P+S3J+ks1JbhpqG3d/ZeADrZ83Jjls4Sqfmgn69+ft3+eNST6TZOnQtDNa/25L8soFKXoaxuvf0LTTklSSZW180e2/7ekymIce6XkMcDDwmiQHL2xVs/YIcFpVHQwcAZza+nQ6cGVVHQRc2cYXszcBtw6Nvwc4u6qeC/wQOGVBqpob7we+UFXPA17IoJ8jsf+SLAd+H1hZVS9gcNPmSSzu/XcBcPQ2bRPtr2OAg9prNXDODqpxNi7g8f27AnhBVf174FvAGQDtveYk4PltmQ+299meXcDj+0eS/YBXAN8dal6M+29CXQYzQ4/0rKp/A7Y+0nPRqqpNVXVdG36QwZv6cgb9urDNdiFwwoIUOAeS7AscB3yojQc4Eri0zbJo+5fk6cB/BM4DqKp/q6r7GKH9x+Bm0CclWQI8GdjEIt5/VfUV4N5tmifaX8cDF9XA14ClSfbZIYXO0Hj9q6ovVtUjbfRrDJ4hAYP+XVxVD1fVHcA6Bu+z3Zpg/wGcDbwFGL5BatHtv+3pNZhH+pGeSVYAhwJXA3tX1aY26W5g74Wqaw78bwb/YX7axp8B3Df0RrGY9+OBwBbgb9qp+g8leQojsv+qaiPwFwyOQjYB9wPXMjr7b6uJ9tcovuf8NvD5NjwS/UtyPLCxqm7YZtJI9G+rXoN5ZCXZA/gU8OaqemB4Wg1ukV+Ut8kneRWwuaquXeha5skS4DDgnKo6FPgR25y2XuT7b08GRx0HAs8CnsI4pxFHyWLeX5NJ8jYGl88+utC1zJUkTwbeCrxjoWuZb70G86SP9FyMkuzKIJQ/WlWfbs33bD3l0n5uXqj6ZulFwKuTrGdw6eFIBtdkl7ZTo7C49+MGYENVXd3GL2UQ1KOy/14G3FFVW6rqJ8CnGezTUdl/W020v0bmPSfJ64FXAa+txz4POwr9ew6DPxxvaO8z+wLXJfl3jEb/fqbXYB65R3q2663nAbdW1fuGJq0BVrXhVcBlO7q2uVBVZ1TVvlW1gsH++nJVvRa4CviNNtti7t/dwF1JfrE1HcXga0xHYv8xOIV9RJInt3+rW/s3EvtvyET7aw1wcru79wjg/qFT3otGkqMZXE56dVX9eGjSGuCkJLslOZDBTVLXLESNM1VV36iqZ1bVivY+swE4rP3fHIn99zNV1eULOJbBXYXfAd620PXMQX9ezOC02Y3A9e11LIPrsFcC3wa+BOy10LXOQV9fCny2DT+bwRvAOuCTwG4LXd8s+nUIsLbtw78F9hyl/Qe8C/gmcBPwYWC3xbz/gI8zuF7+EwZv4qdMtL+AMPgkyHeAbzC4O33B+zCD/q1jcK1163vMXw7N/7bWv9uAYxa6/pn0b5vp64Fli3X/be/lk78kSepIr6eyJUnaKRnMkiR1xGCWJKkjBrMkSR0xmCVJ6ojBLElSRwxmSZI6YjBLktSR/w+joNFyZ/LwGAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Histogram of fare feature after capping outliers\n", "plot_hist(train_t, 'fare')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }