{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## WoEEncoder (weight of evidence)\n",
"\n",
"This encoder replaces the labels by the weight of evidence \n",
"#### It only works for binary classification.\n",
"\n",
"The weight of evidence is given by: log( p(1) / p(0) )"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from feature_engine.encoding import WoEEncoder\n",
"\n",
"from feature_engine.encoding import RareLabelEncoder #to reduce cardinality"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Load titanic dataset from OpenML\n",
"\n",
"def load_titanic():\n",
" data = pd.read_csv('https://www.openml.org/data/get_csv/16826755/phpMYEkMl')\n",
" data = data.replace('?', np.nan)\n",
" data['cabin'] = data['cabin'].astype(str).str[0]\n",
" data['pclass'] = data['pclass'].astype('O')\n",
" data['age'] = data['age'].astype('float')\n",
" data['fare'] = data['fare'].astype('float')\n",
" data['embarked'].fillna('C', inplace=True)\n",
" data.drop(labels=['boat', 'body', 'home.dest'], axis=1, inplace=True)\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" survived | \n",
" name | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" ticket | \n",
" fare | \n",
" cabin | \n",
" embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" female | \n",
" 29.0000 | \n",
" 0 | \n",
" 0 | \n",
" 24160 | \n",
" 211.3375 | \n",
" B | \n",
" S | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" male | \n",
" 0.9167 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C | \n",
" S | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Miss. Helen Loraine | \n",
" female | \n",
" 2.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C | \n",
" S | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mr. Hudson Joshua Creighton | \n",
" male | \n",
" 30.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C | \n",
" S | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | \n",
" female | \n",
" 25.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C | \n",
" S | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass survived name sex \\\n",
"0 1 1 Allen, Miss. Elisabeth Walton female \n",
"1 1 1 Allison, Master. Hudson Trevor male \n",
"2 1 0 Allison, Miss. Helen Loraine female \n",
"3 1 0 Allison, Mr. Hudson Joshua Creighton male \n",
"4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female \n",
"\n",
" age sibsp parch ticket fare cabin embarked \n",
"0 29.0000 0 0 24160 211.3375 B S \n",
"1 0.9167 1 2 113781 151.5500 C S \n",
"2 2.0000 1 2 113781 151.5500 C S \n",
"3 30.0000 1 2 113781 151.5500 C S \n",
"4 25.0000 1 2 113781 151.5500 C S "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = load_titanic()\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X = data.drop(['survived', 'name', 'ticket'], axis=1)\n",
"y = data.survived"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cabin 0\n",
"pclass 0\n",
"embarked 0\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we will encode the below variables, they have no missing values\n",
"X[['cabin', 'pclass', 'embarked']].isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cabin object\n",
"pclass object\n",
"embarked object\n",
"dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' Make sure that the variables are type (object).\n",
"if not, cast it as object , otherwise the transformer will either send an error (if we pass it as argument) \n",
"or not pick it up (if we leave variables=None). '''\n",
"\n",
"X[['cabin', 'pclass', 'embarked']].dtypes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((916, 8), (393, 8))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# let's separate into training and testing set\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\n",
"\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"## Rare value encoder first to reduce the cardinality\n",
"# see RareLabelEncoder jupyter notebook for more details on this encoder\n",
"rare_encoder = RareLabelEncoder(tol=0.03,\n",
" n_categories=2, \n",
" variables=['cabin', 'pclass', 'embarked'])\n",
"\n",
"rare_encoder.fit(X_train)\n",
"\n",
"# transform\n",
"train_t = rare_encoder.transform(X_train)\n",
"test_t = rare_encoder.transform(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The WoERatioEncoder() replaces categories by the weight of evidence\n",
"or by the ratio between the probability of the target = 1 and the probability\n",
"of the target = 0.\n",
"\n",
"The weight of evidence is given by: log(P(X=xj|Y = 1)/P(X=xj|Y=0))\n",
"\n",
"\n",
"Note: This categorical encoding is exclusive for binary classification.\n",
"\n",
"For example in the variable colour, if the mean of the target = 1 for blue\n",
"is 0.8 and the mean of the target = 0 is 0.2, blue will be replaced by:\n",
"np.log(0.8/0.2) = 1.386\n",
"#### Note: \n",
"The division by 0 is not defined and the log(0) is not defined.\n",
"Thus, if p(0) = 0 or p(1) = 0 for\n",
"woe , in any of the variables, the encoder will return an error.\n",
" \n",
"The encoder will encode only categorical variables (type 'object'). A list\n",
"of variables can be passed as an argument. If no variables are passed as \n",
"argument, the encoder will find and encode all categorical variables\n",
"(object type).
\n",
"\n",
"For details on the calculation of the weight of evidence visit:
\n",
"https://www.listendata.com/2015/03/weight-of-evidence-woe-and-information.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Weight of evidence"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"WoEEncoder(variables=['cabin', 'pclass', 'embarked'])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"woe_enc = WoEEncoder(variables=['cabin', 'pclass', 'embarked'])\n",
"\n",
"# to fit you need to pass the target y\n",
"woe_enc.fit(train_t, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'cabin': {'B': 1.6299623810120747,\n",
" 'C': 0.7217038208351837,\n",
" 'D': 1.405081209799324,\n",
" 'E': 1.405081209799324,\n",
" 'Rare': 0.7387452866900354,\n",
" 'n': -0.35752781962490193},\n",
" 'pclass': {1: 0.9453018143294478,\n",
" 2: 0.21009172435857942,\n",
" 3: -0.5841726684724614},\n",
" 'embarked': {'C': 0.6999054533737715,\n",
" 'Q': -0.05044494288988759,\n",
" 'S': -0.20113381737960143}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"woe_enc.encoder_dict_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" cabin | \n",
" embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" 310 | \n",
" 0.945302 | \n",
" male | \n",
" 57.0 | \n",
" 1 | \n",
" 1 | \n",
" 164.8667 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 853 | \n",
" -0.584173 | \n",
" male | \n",
" 25.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.2500 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 1090 | \n",
" -0.584173 | \n",
" female | \n",
" 23.0 | \n",
" 0 | \n",
" 0 | \n",
" 8.6625 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 988 | \n",
" -0.584173 | \n",
" male | \n",
" NaN | \n",
" 0 | \n",
" 0 | \n",
" 7.7500 | \n",
" -0.357528 | \n",
" -0.050445 | \n",
"
\n",
" \n",
" 875 | \n",
" -0.584173 | \n",
" male | \n",
" 30.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.2292 | \n",
" -0.357528 | \n",
" 0.699905 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass sex age sibsp parch fare cabin embarked\n",
"310 0.945302 male 57.0 1 1 164.8667 -0.357528 -0.201134\n",
"853 -0.584173 male 25.0 0 0 7.2500 -0.357528 -0.201134\n",
"1090 -0.584173 female 23.0 0 0 8.6625 -0.357528 -0.201134\n",
"988 -0.584173 male NaN 0 0 7.7500 -0.357528 -0.050445\n",
"875 -0.584173 male 30.0 0 0 7.2292 -0.357528 0.699905"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# transform and visualise the data\n",
"\n",
"train_t = woe_enc.transform(train_t)\n",
"test_t = woe_enc.transform(test_t)\n",
"\n",
"test_t.sample(5)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbwAAAFNCAYAAAB7ftpjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZgddZ32//edzgJk30ia7IGEEJIQoAlBkR1MGhUXRkHFDY0ZxWUcHVFnQZ1nXMZnHlGZyS+DDKIzMjMMjhkJBEUWHQik0ZBOAsGQCAl0drKv3fn8/qhqOJz0chK6+nR33a/r6qtPVX2r6nOqz+n7fGs7igjMzMy6um7lLsDMzKw9OPDMzCwXHHhmZpYLDjwzM8sFB56ZmeWCA8/MzHLBgWctkvSQpI8e47yjJe2WVNHWdRWs4yZJP2lh+gpJFx3jskPSKcdcXBfVWbeLpLFp7d3LXYuVhwMvByT9UdK+NHw2SLpdUp+M1nNZ43BEvBARfSKioa3XVaqIOD0iHmrv9XbWUMiz4tdv3tafBw68/HhrRPQBpgNnAl8qcz1mXUqWezKsbTjwciYiNgCLSIIPAEkzJT0qabukp5rbBSjpZEm/lrRV0hZJ/yppQDrtx8Bo4H/SnuRfFO9CknSSpAWStklaLeljBcu+SdJ/SLpD0q50V2RVwfQvSnoxnbZK0qUFpfVsYb5XPjWn67hL0r+nbX8n6YxWNlm1pDXp8/17Sa+8ZyR9RNLTkl6WtEjSmHT8I2mTp9Jt8R5JD0t6Vzr9/HS7VKfDl0la2tpy02mTJP0y3YarJL27YNrtkm6RdE/6/B6XdHIzf8vGv80cSS9JqpP05wXTKyR9WdJz6bKelDSqieVcKen3knZKWifppoJpx0n6Sfp62S5piaRh6bQPpdt1l6S1kt7XTJ0zJD2Wzl8n6QeSehZMD0lzJf0h3V63SFLBc/hO+rdbA1zZ5F+Ypl+/6fj/VLJXZIekRySdXrS9/0nSQkl7gIslnZVuj13pvP8u6W8L5nmLpKXp83lU0rSW1m9tLCL808V/gD8Cl6WPRwK1wM3p8AhgK1BN8gHo8nR4aDr9IeCj6eNT0um9gKHAI8B3m1pPOjwWCKB7Ovww8I/AcSSBuxm4NJ12E7A/raMC+AawOJ12KrAOOKlguSe3Nl8Tz/0m4BBwNdAD+DywFujRzHYL4EFgEMk/o2cLtsXbgdXAaUB34C+BR4vmPaVg+GvA99PHXwaeA75VMO3m1pYL9E63w4fTaWcBW4DT0+m3A9uAGen0fwXubOa5Nf5tfpoud2r692jcVl8geZ2cCgg4Axhc/NyAi9J5uwHTgI3A29NpHwf+Bzgh/ducDfRL17cTODVtV9n4HJqo82xgZvp8xgJPA58t2s6/AAakf6PNwKx02lzgGWBU+jd8kILXY0vvk4JxHwH6krzmvwssLZh2O7ADeGP6/PsBzwOfIXl9vRM4CPxt2v4sYBNwbro9Ppius1dz6/dPG/8vLHcB/mmHP3LyRtoN7Erf8A8AA9JpXwR+XNR+EfDB9PFDpP/km1ju24HfF62nycBL/+k0AH0Lpn8DuD19fBPwq4Jpk4F96eNT0n8Ul1EUTi3NV1xT2rYwDLsBdcCbmnl+0fjPMx3+BPBA+vhe4PqiZe0FxhTMWxh4lwLL0sf3AR/l1UB/GHhna8sF3gP8pqjG/w/4m/Tx7cCtBdOqgWeaeW6Nf5tJBeO+DfwwfbwKuKqF7XJKM9O+C/y/9PFHgEeBaUVtegPbgXcBxx/la/mzwM+Kajm/YPg/gBvTx78G5hZMu4KjDLyi6QPS+fsXbO87CqZfALwIqGDcb3k18P4J+HrRMlcBF5ayfv+8/h/v0syPt0dEX5JP5JOAIen4McCfpLtYtkvaDpxP8qn7NSSdKOlOJbsWdwI/KVhOa04CtkXEroJxz5P0MBttKHi8FzhOUveIWE3yj+4mYFNaw0mtzddMHesaH0TEYWB9Wltz1hU8fr6g7Rjg5oJtto2kJzSCpj0GTEx36U0H7gBGSRpC0iNr3A3a0nLHAOcW/a3eBwwvWE/xtmjt5KTmnt8okl5oiySdK+lBSZsl7SDpVTW+Jn5M8uHpznS36bcl9YiIPSThPReoS3fBTmpm+RMl/SLdrbgT+DuOfM0195xPauL5lSzdJfrNdLfuTpJAomj9hcs/CXgx0vRqYvoY4M+L/n6jaPn1Z23IgZczEfEwySfT76Sj1pH08AYU/PSOiG82Mfs3SD7hTouIfsD7Sf4Zv7L4Flb9EjBIUt+CcaNJPhGXUve/RcT5JP80AvhWKfM14ZXjUEqOx41Ma2u1PUm9jW3XAR8v2m7HR8SjzdS/F3iSZHfX8og4SNL7+RzwXERsKWG564CHi6b1iYg/PeqtUNrza/L4X5F/AxYAoyKiPzCP9DUREYci4qsRMRl4A/AW4APptEURcTnJB6tngH9uZvn/lE6fkL7mvsxrX3MtqWvi+bWk+PX7XuAqkj0L/Ul6xdD8a74OGNF4DDFVuP51wP8p+vudEBE/bWb91sYcePn0XeBySdNJemlvlfTm9BPtcZIukjSyifn6kuwa3S5pBMlxnkIbgfFNrTAi1pH8g/9Guo5pwPUkx5laJOlUSZdI6kVyvG4fye7RY3G2pHemPcDPAgeAxS20/4KkgekJG58B/j0dPw/4UuNJDJL6S/qTgvma2hYPAzekvyHZXVw43Npyf0HSS7xOUo/05xxJp5X87I/0V5JOSNf34YLndyvwdUkTlJgmaXAT8/cl6bnvlzSDJCRIa79Y0lQlZy/uJDl+2iBpmKS3SepNsv130/zfs2867+60F3g04f4fwKcljZQ0ELixlfbFf7O+aX1bSY5D/l0r8z9G8jxukNRd0lUkvfdG/wzMTXvFktRbyUk/jR8Cm33/WNtw4OVQRGwm2aX2V2kQXUXyyXkzyafQL9D0a+OrJAfedwD3AHcXTf8G8Jfp7prPNzH/tSSfkl8CfkZy7OmXJZTcC/gmyQkaG4AT03qPxc9Jdqe9DFxHcuzsUCvtnwSWkjznHwJExM9Iepl3pru7lgOzC+a7CfhRui0az6R8mOSf6CPNDLe43HR38BXANSTbcEPattfRboQCD5OcJPMA8J2IuD8d/w8kgXE/SeD8EDi+ifk/AXxN0i7gr9N5Gg0H7krnfzpd109IXlt/nj6HbcCF6XKa8nmSEN1FEhj/3ky7pvwzyS7Vp4DfceTrtVjx6/cOkt2gLwIrafmDEWmv/Z0kH+S2k+wB+QVJaBIRNcDHgB+QvP5WAx9qYf3WxvTa3c1mXZeSU+ZPiYj3l7uWcpM0llfPUK0vbzVdl6THgXkR8S/lrsXcwzMzazOSLpQ0PN2l+UGSSzXuK3ddlsgs8CTdJmmTpOXNTJek7ym5AHmZpLOyqsXMrJ2cSrILdQfJbturI6KuvCVZo8x2aUq6gORg9B0RMaWJ6dXAp0iuFTqX5MLbczMpxszMci+zHl5EPEJyQLo5V5GEYUTEYmCApCOu/TIzM2sL5TyGN4LXXpS5nuYv2jUzM3tdyvm9UE1dPNrk/lVJc4A5AL179z570qQmb8pgZmY59eSTT26JiKEttSln4K3ntXchaPaOFxExH5gPUFVVFTU1NdlXZ2ZmnYakVm8dV85dmguAD6Rna84EdvhsJjMzy0pmPTxJPyW5UfEQSeuBvyH5ygwiYh6wkOQMzdUkN3z9cFa1mJmZZRZ4EXFtK9MD+GRW6zczMyvkO62YmVkuOPDMzCwXHHhmZpYLDjwzM8sFB56ZmeWCA8/MzHLBgWdmZrngwDMzs1xw4JmZWS448MzMLBcceGZmlgsOPDMzywUHnpmZ5YIDz8zMcsGBZ2ZmueDAMzOzXHDgmZlZLjjwzMwsFzINPEmzJK2StFrSjU1MHyjpZ5KWSXpC0pQs6zEzs/zKLPAkVQC3ALOBycC1kiYXNfsysDQipgEfAG7Oqh4zM8u3LHt4M4DVEbEmIg4CdwJXFbWZDDwAEBHPAGMlDcuwJjMzy6ksA28EsK5geH06rtBTwDsBJM0AxgAjM6zJzMxyKsvAUxPjomj4m8BASUuBTwG/B+qPWJA0R1KNpJrNmze3faVmZtbldc9w2euBUQXDI4GXChtExE7gwwCSBKxNfyhqNx+YD1BVVVUcmmZmZq3Ksoe3BJggaZyknsA1wILCBpIGpNMAPgo8koagmZlZm8qshxcR9ZJuABYBFcBtEbFC0tx0+jzgNOAOSQ3ASuD6rOoxM7N8y3KXJhGxEFhYNG5ewePHgAlZ1mBmZga+04qZmeWEA8/MzHLBgWdmZrngwDMzs1xw4JmZWS448MzMLBcceGZmlgsOPDMzywUHnpmZ5YIDz8zMcsGBZ2ZmueDAMzOzXHDgmZlZLjjwzMwsFxx4ZmaWCw48MzPLBQeemZnlggPPzMxywYFnZma5kGngSZolaZWk1ZJubGJ6f0n/I+kpSSskfTjLeszMLL8yCzxJFcAtwGxgMnCtpMlFzT4JrIyIM4CLgP8rqWdWNZmZWX5l2cObAayOiDURcRC4E7iqqE0AfSUJ6ANsA+ozrMnMzHIqy8AbAawrGF6fjiv0A+A04CWgFvhMRBwuXpCkOZJqJNVs3rw5q3rNzKwLyzLw1MS4KBp+M7AUOAmYDvxAUr8jZoqYHxFVEVE1dOjQtq/UzMy6vCwDbz0wqmB4JElPrtCHgbsjsRpYC0zKsCYzM8upLANvCTBB0rj0RJRrgAVFbV4ALgWQNAw4FViTYU1mZpZT3bNacETUS7oBWARUALdFxApJc9Pp84CvA7dLqiXZBfrFiNiSVU1mZpZfmQUeQEQsBBYWjZtX8Pgl4IosazAzMwPfacXMzHLCgWdmZrngwDMzs1xw4JmZWS448MzMLBcceGZmlgsOPDMzywUHnpmZ5YIDz8zMcsGBZ2ZmueDAMzOzXHDgmZlZLjjwzMwsFxx4ZmaWCw48MzPLBQeemZnlggPPzMxywYFnZma5kGngSZolaZWk1ZJubGL6FyQtTX+WS2qQNCjLmszMLJ8yCzxJFcAtwGxgMnCtpMmFbSLi7yNiekRMB74EPBwR27KqyczM8ivLHt4MYHVErImIg8CdwFUttL8W+GmG9ZiZWY5lGXgjgHUFw+vTcUeQdAIwC/ivDOsxM7McyzLw1MS4aKbtW4H/bW53pqQ5kmok1WzevLnNCjQzs/zIMvDWA6MKhkcCLzXT9hpa2J0ZEfMjoioiqoYOHdqGJZqZWV5kGXhLgAmSxknqSRJqC4obSeoPXAj8PMNazMws57pnteCIqJd0A7AIqABui4gVkuam0+elTd8B3B8Re7KqxczMTBHNHVbrmKqqqqKmpqbcZZiZWQci6cmIqGqpje+0YmZmueDAMzOzXHDgmZlZLjjwzMwsFxx4ZmaWCw48MzPLBQeemZnlggPPzMxywYFnZma54MAzM7NccOCZmVkuOPDMzCwXHHhmZpYLrQaepDeWMs7MzKwjK6WH9/0Sx5mZmZVk38EG7ltex11Prm+3dTb7BbCSzgPeAAyV9LmCSf1IvtDVzMysZPsONvDgqk3cU1vHg89sYu/BBk6r7MfVZ49sl/W39I3nPYE+aZu+BeN3AldnWZSZmXUNew/W8+tnNnFv7QZ+/cwm9h1qYEifnrzjzBFcObWSGeMGtVstzQZeRDwMPCzp9oh4XlLviNjTbpWZmVmntOdAEnILa+t4cNUm9h86zJA+vbj67JFUpyFX0U3tXldLPbxGJ0m6l6S3N1rSGcDHI+ITrc0oaRZwM8ku0Fsj4ptNtLkI+C7QA9gSERceRf1mZtYB7D5QzwNPb2RhbR0PrdrMgfrDDO3bi3dXjaJ6aiXnjC1PyBUqJfC+C7wZWAAQEU9JuqC1mSRVALcAlwPrgSWSFkTEyoI2A4B/BGZFxAuSTjyG52BmZmWwa/8hHng6OSb38LObOVh/mGH9enHtjNFUT63k7DEDyx5yhUoJPCJinfSaohtKmG0GsDoi1gBIuhO4ClhZ0Oa9wN0R8UK6nk2l1GNmZuWxc/8hfrVyIwtrN/DIH5KQG97vON537miunFrJWaMH0q0DhVyhUgJvnaQ3ACGpJ/Bp4OkS5hsBrCsYXg+cW9RmItBD0kMkJ8bcHBF3lLBsMzNrJzv2NYZcHb/5wxYONhzmpP7Hcd3MMVRPHc6ZozpuyBUqJfDmkhyHG0ESWvcDnyxhvqaefTSx/rOBS4HjgcckLY6IZ1+zIGkOMAdg9OjRJazazMxejx17D3H/yg0srK3jt6u3cKghGDHgeD5w3hiqp1UyfeSAThFyhVoNvIjYArzvGJa9HhhVMDwSeKmJNlvSsz/3SHoEOAN4TeBFxHxgPkBVVVVxaJqZWRvYvvcg96/YyD21dfzv6i3UH05C7sNvHEf11ErOGNmfosNbnUqrgSfpe02M3gHURMTPW5h1CTBB0jjgReAakmN2hX4O/EBSd5Lr/s4F/l8phZuZ2ev38p6DLFqxgYXLN/BoGnKjBh3P9W8ax5VTK5k6onOHXKFSdmkeB0wC/jMdfhewArhe0sUR8dmmZoqIekk3AItILku4LSJWSJqbTp8XEU9Lug9YBhwmuXRh+et7SmZm1pKtuw9wf3pM7tHnttJwOBgz+AQ+dsF4qqdUMmVEvy4TcoUU0fIeQkm/Bq6IiPp0uDvJcbzLgdqImJx5lQWqqqqipqamPVdpZtbpbdl9IOnJ1daxeM02Gg4HYwefQPXUSqqnVnL6SZ075CQ9GRFVLbUppYc3AuhNshuT9PFJEdEg6cDrrNHMzDKyedcB7luxgYXL6nh87VYOB4wf0ps/vfBkqqdWclpl304dckerlMD7NrA0vXRAwAXA30nqDfwqw9rMzOwobdq1n/uWJz25J9Zu43DAyUN7c8PFp1A9rZJTh+Ur5Aq1GHiSupFcc/cGkgvJBXw5IhrPtvxCtuWZmVlrNu5MQu6e2jqW/HEbETDhxD586pIJVE+tZOKwPrkNuUItBl5EHJb0fyPiPJIzKs3MrAPYsGM/9y6vY2FtHTXPv0wETBzWh89cOoErp1YyYVjf1heSM6Xs0rxf0rtIbgHma+DMzMqkbsc+FtYmuyuffP5lACYN78ufXTaR6qnDOeVEh1xLSgm8z5GcqFIvaT/Jbs2IiH6ZVmZmZry4fR/31iY9ud+9sB2A0yr78fkrJjJ7aiUnD+1T5go7j1LutOKPDGZm7Wjdtr2vHJNbui4JudNP6scX3nwqs6cMZ7xD7piU9G0JkgYCE0guQgcgIh7Jqigzs7xZt20vC9Oe3FPrk6vApozox1/MOpXqKZWMHdK7zBV2fqXcWuyjwGdI7oW5FJgJPAZckm1pZmZd2wtb93JPGnK1LyYhN21kf26cPYnqKZWMHnxCmSvsWkrp4X0GOAdYHBEXS5oEfDXbsszMuqY/btnDwvTsyuUv7gTgjFED+HL1JGZPqWTUIIdcVkoJvP0RsV8SknpFxDOSTs28MjOzLmLN5t3cu3wD9yyrY2VdEnLTRw3gK9WnMXvqcEYOdMi1h1ICb72kAcB/A7+U9DJHfs2PmZkVeG7zbhYuq+Oe2jqe2bALgLNGD+AvrzyN2VMrGTHg+DJXmD+lnKX5jvThTZIeBPoD92ZalZlZJ7R60y7uWZZcJ7dqYxJyVWMG8tdvmczsqcOp7O+QK6dSTlr5cURcBxARDzeOA67LuDYzsw7v2Y27uGdZHfcur+PZjbuR4Jwxg/ibt05m9pRKhvc/rvWFWLsoZZfm6YUDkiqAs7Mpx8ysY4sIVm3c9codT1ZvSkNu7CC++rbTmTVlOMP6OeQ6omYDT9KXgC8Dx0va2TgaOAjMb4fazMw6hIjgmQ27WFibHJNbs3kP3QQzxg3ig+edzpunDOfEvg65jq7ZwIuIbwDfkPSNiPhSO9ZkZlZ2EcHKup0srK3j3toNrNmShNzM8YP5yBvH8ebThzO0b69yl2lHoZSTVhx2ZpYLEcGKl3a+cseTP27dS0U3cd74wVz/piTkhvRxyHVWJd1a7FhJmgXcDFQAt0bEN4umX0TytUNr01F3R8TXsqzJzKxQRFD74o5Xjsm9sC0JuTecPJiPX3gyV0wexmCHXJfQ0jG8cRGxtrnprUlPbrkFuBxYDyyRtCAiVhY1/U1EvOVY12NmdrQigmXrdyQ9ueV1rNu2j+7dxBtOGcInLz6ZKyYPZ2DvnuUu09pYSz28u4CzJT0QEZcew7JnAKsjYg2ApDuBq4DiwDMzy1xEsHTd9nR35QZe3J6E3PkThvCpSyZwxeRhDDjBIdeVtRR43ST9DTBR0ueKJ0bEP7Sy7BHAuoLh9cC5TbQ7T9JTJHdv+XxErGhluWZmJTl8OPj9uu3cW1vHvcuTkOtRId40YSifvWwCV0weTv8TepS7TGsnLQXeNcDb0zbH8p14amJc8Tem/w4YExG7JVWT3L5swhELkuYAcwBGjx59DKWYWV4cPhz87oWXWVi7gXuX11G3Yz89K7rxpglD+NzlE7ls8jD6H++Qy6OWLktYBXxL0rKIOJZbia0HRhUMj6ToHpwRsbPg8UJJ/yhpSERsKWo3n/Tav6qqquLQNLOcO3w4ePKFl7lnWR33Ld/Ahp1JyF0wcSh/MetULj1tGP2Oc8jlXSlnaT4q6R+AC9Lhh4GvRcSOVuZbAkyQNA54kaTH+N7CBpKGAxsjIiTNALoBW4/mCZhZPjUcDmr+uC25Tm75BjbtOkDP7t24aOJQbpw6iUtPO5G+DjkrUErg3QYsB96dDl8H/AvwzpZmioh6STcAi0guS7gtIlZImptOnwdcDfyppHpgH3BNRLgHZ2ZNajgcPLF2G/cuT0Ju864D9OrejYtPPZHZU4dz6WnD6NMr06utrBNTa/kiaWlETG9tXHupqqqKmpqacqzazMqgvuEwT6Q9ufuWb2TL7gMc1yMJueqplVwy6UR6O+RyT9KTEVHVUptSXiX7JJ0fEb9NF/pGkt6YmVkm6hsO8/jabdxTW8ei5RvYuucgx/eo4JJJSchdPGkoJ/R0yNnRKeUVMxe4Q1L/dPhl4IPZlWRmeVTfcJjH1mxlYW0di1ZsZNueg5zQ89WQu+hUh5y9PqXcS/Mp4AxJ/dLhna3MYmZWkkMNh3n0ua3cW1vHohUbeHnvIXr3rOCS04Zx5dThXDjxRI7vWVHuMq2LKPnjkoPOzNrCwfrDPPrcFhbW1nH/yo1s33uIPr26c+lpSU/uwolDOa6HQ87anvcPmFnmDtYf5n9Xb+Ge2jp+uXIjO/Ydom+v7lw2eRjVUyt504QhDjnLnAPPzDJxoL6B3/7h1ZDbtb+evr26c3ljyE0cQq/uDjlrPyUFnqQ3AGML20fEHRnVZGad1P5DDfzmD1u4t7aOXz6dhFy/47pzxeThXDltOG88xSFn5dNq4En6MXAysBRoSEcH4MAzM/YfauCRZzezsLaOXz29id0H6ul/fA9mnT6c6mmVvPHkIfTs3q3cZZqV1MOrAib7Dihm1mj/oQYeWpWE3ANPb2TPwQYGnNCDK6dWUj2tkjecPJgeFQ4561hKCbzlwHCgLuNazKwD23ewgYdWbWLh8g38Og25gSf04K1nnET11ErOc8hZB1dK4A0BVkp6AjjQODIi3pZZVWbWIew9WM+Dz2xm4fI6HnxmE3sPNjCod0/eNn0EV06tZOb4QXR3yFknUUrg3ZR1EWbWcew5UM+DqzaxsLaOB5/ZzL5DDQzp05N3nJmE3IxxDjnrnEq508rD7VGImZXPngP1PPDMJhYuq+OhZzex/9BhhvTpxdVnj2T21OGcO24wFd2a+k5ns86jlLM0ZwLfB04DepJ81c+eiOiXcW1mlqHdB+p54OmNLKyt46FVmzlQf5ihfXvx7qpRVE+t5Jyxgxxy1qWUskvzByRf3vqfJGdsfgCYkGVRZpaNXfsP8cDTm7into6Hn93MwfrDDOvXi2tnjKZ6aiVnjxnokLMuq6QLzyNitaSKiGgA/kXSoxnXZWZtZMe+Q6/05B55dgsHGw4zvN9xvO/cNORGD6SbQ85yoJTA2yupJ7BU0rdJLk/onW1ZZvZ67Nh3iF+uTELuN3/YzKGG4KT+x3HdeWOonjqcM0c55Cx/Sgm864BuwA3AnwGjgHdlWZSZHZ19Bxv43Qsvs3jNVh5fs43fr3uZQw3BiAHH88HzxlI9rZLpIwc45CzXSjlL83lJxwOVEfHVo1m4pFnAzSQnutwaEd9spt05wGLgPRFx19GswyyPCgNu8ZqtLF23nUMNQTfB1JEDuP788cyaMpwzRvZHcsiZQWlnab4V+A7JGZrjJE0HvtbaheeSKoBbgMuB9cASSQsiYmUT7b4FLDq2p2DW9e072MCTz6c9uLWvBlxFNzFlRH8+cv44Zo4fTNWYgfQ9rke5yzXrkEq98HwG8BBARCyVNLaE+WYAqyNiDYCkO4GrgJVF7T4F/BdwTikFm+XB3oP1/O757a/04J5a/2rATXXAmR2TUgKvPiJ2HMNukRHAuoLh9cC5hQ0kjQDeAVyCA89ybO/B+ld7cGu2HRFw158/npnjB1E1dhB9evlrLM2ORUk3j5b0XqBC0gTg00AplyU0lZDF37jwXeCLEdHQUqBKmgPMARg9enQJqzbr2AoDbvGabTy1bjv1h5OAmzbSAWeWhVLeSZ8CvkJy4+ifkhxr+3oJ860nOaOz0UjgpaI2VcCdadgNAaol1UfEfxc2ioj5wHyAqqoqf02RdTp7DhT04NYeGXAfu2A8M8cP5uwxAx1wZhkp5SzNvSSB95WjXPYSYIKkccCLJHdreW/Rssc1PpZ0O/CL4rAz64wKA27xmq0sW7+D+sNB96KAqxozkN4OOLN20ew7TdKClmZs7SzNiKiXdANJj7ACuC0iVkiam06fdwz1mnVIew7UU/PKMbgjA25OQQ/OAWdWHi29884jOenkp8DjNH1MrkURsRBYWDSuyaCLiA8d7fLNyqUw4Bav2UqtA86sw2vpnTic5Bq6a0l2Rd4D/DQiVrRHYWYdye4D9dT8cRuL12zj8bVJD64hDbgzRg3g4xe+GnAn9HTAmXVEzb4z0xtF3wfcJ6kXSfA9JOlrEfH99irQrBwKA27xmq3Uvhd5HKoAAA/FSURBVPjagJvrgDPrdFp8p6ZBdyVJ2I0FvgfcnX1ZZu2ruYDrUSHOGDmAP73wZGaOH8xZYwY44Mw6qZZOWvkRMAW4F/hqRCxvt6rMMrb7QD1L/rjtlevgljvgzLq8lt7J1wF7gInApwsuDBcQ/sZz60x27T9UcJLJawNu+qgBfOKiNOBGD+T4nhXlLtfMMtDSMbxu7VmIWVvatf8QNX8sOIvyxR0cDhxwZjnmfTXWJbQUcGeOGsgNF5/CzPGDOdMBZ5ZbDjzrlHbuP/Sak0yWpwHXs6Ib00cNcMCZ2REceNYptBhwowdwwyUTmDl+EGeNHshxPRxwZnYkB551SDv3H2LJ2lfPolzxkgPOzF4fB551CDv2Nfbgjgy4Mx1wZtYGHHhWFjv2FfTg1m5lxUs7iYKA+9QlE9JjcAMccGbWJhx41i6aDbju3Thr9AA+7YAzs4w58CwTO/Ye4olXdlFuZWXdawPuM5cmATd9lAPOzNqHA8/aREsBd/bogQ44Mys7B54dkx17D/H42q2vXCbw9IbXBtxnL53IzPGDOMMBZ2YdhAPPSrJ970GeWLvtiIDr1b0bZzngzKwTcOBZk7bvPcjj6Ukmj6/Z9pqAO3vMQP7ssonMHD+YM0b1p1d3B5yZdXyZBp6kWcDNQAVwa0R8s2j6VcDXgcNAPfDZiPhtljVZ0woDbvGabTzjgDOzLiazwJNUAdwCXA6sB5ZIWhARKwuaPQAsiIiQNA34D2BSVjXZq17eU9CDW/vagKsaO5DPXTaRmScPZtpIB5yZdQ1Z9vBmAKsjYg2ApDuBq4BXAi8idhe07w1EhvXkWmHALV6zlWc27ALguB5JD84BZ2ZdXZaBNwJYVzC8Hji3uJGkdwDfAE4ErsywnlzZtucgTxScRVkYcFVjBvH5KyqZOX4w00YOoGd3f/WhmXV9WQaemhh3RA8uIn4G/EzSBSTH8y47YkHSHGAOwOjRo9u4zK7BAWdm1rIsA289MKpgeCTwUnONI+IRSSdLGhIRW4qmzQfmA1RVVXm3J7B194H0MoHGY3BJwB3fo4KqsQN56xknMXP8IKaOcMCZmUG2gbcEmCBpHPAicA3w3sIGkk4BnktPWjkL6AlszbCmTqsw4Bav2caqjQ44M7OjkVngRUS9pBuARSSXJdwWESskzU2nzwPeBXxA0iFgH/CeiHAPjiTgCq+DKw64t00/iZnjBzN1RH8HnJlZCdTZ8qWqqipqamrKXUab2/KaHtxWnt2YnMDaGHAzxw92wJmZNUPSkxFR1VIb32mlTLbsPsDjaxqPwb0acCf0rKBq7CCumj4iPcmkPz0qHHBmZq+XA6+dFAbc4jVb+cOm1wbc288c8UoPzgFnZtb2HHgZaSngzhk7iHec5YAzM2tPDrw2snnXgfTrcpKzKFenAdc77cG986yRzBw/iCkOODOzsnDgHaOWAu6ccYN4lwPOzKxDceCVaNOu/a/ZRfnc5j3AqwF39dkjmTl+MFNO6kd3B5yZWYfjwGtGawH3J1WjHHBmZp2IAy+1aed+FhdcB7cmDbg+vbpzztiBvDsNuNMdcGZmnVJuA6+1gHuPA87MrEvJZeBdM/8xFq/ZBiQBN2PcIK45Jwm4yZUOODOzriiXgXfJpBO5ZNKJDjgzsxzJZeDNueDkcpdgZmbtzF0bMzPLBQeemZnlggPPzMxywYFnZma54MAzM7NccOCZmVkuOPDMzCwXMg08SbMkrZK0WtKNTUx/n6Rl6c+jks7Ish4zM8uvzAJPUgVwCzAbmAxcK2lyUbO1wIURMQ34OjA/q3rMzCzfsuzhzQBWR8SaiDgI3AlcVdggIh6NiJfTwcXAyAzrMTOzHMsy8EYA6wqG16fjmnM9cG9TEyTNkVQjqWbz5s1tWKKZmeVFloGnJsZFkw2li0kC74tNTY+I+RFRFRFVQ4cObcMSzcwsL7K8efR6YFTB8EjgpeJGkqYBtwKzI2JrhvWYmVmOZdnDWwJMkDROUk/gGmBBYQNJo4G7gesi4tkMazEzs5zLrIcXEfWSbgAWARXAbRGxQtLcdPo84K+BwcA/SgKoj4iqrGoyM7P8UkSTh9U6rKqqqqipqSl3GWZm1oFIerK1DpPvtGJmZrngwDMzs1xw4JmZWS448MzMLBcceGZmlgsOPDMzywUHnpmZ5YIDz8zMcsGBZ2ZmueDAMzOzXHDgmZlZLjjwzMwsFxx4ZmaWCw48MzPLBQeemZnlggPPzMxywYFnZma54MAzM7NcyDTwJM2StErSakk3NjF9kqTHJB2Q9PksazEzs3zrntWCJVUAtwCXA+uBJZIWRMTKgmbbgE8Db8+qDjMzM8i2hzcDWB0RayLiIHAncFVhg4jYFBFLgEMZ1mFmZpZp4I0A1hUMr0/HmZmZtbssA09NjItjWpA0R1KNpJrNmze/zrLMzCyPsgy89cCoguGRwEvHsqCImB8RVRFRNXTo0DYpzszM8iXLwFsCTJA0TlJP4BpgQYbrMzMza1ZmZ2lGRL2kG4BFQAVwW0SskDQ3nT5P0nCgBugHHJb0WWByROzMqi4zM8unzAIPICIWAguLxs0reLyBZFenmZlZpnynFTMzywUHnpmZ5YIDz8zMcsGBZ2ZmueDAMzOzXHDgmZlZLjjwzMwsFxx4ZmaWCw48MzPLBQeemZnlggPPzMxywYFnZma54MAzM7NccOCZmVkuOPDMzCwXHHhmZpYLDjwzM8sFB56ZmeVCpoEnaZakVZJWS7qxiemS9L10+jJJZ2VZj5mZ5VdmgSepArgFmA1MBq6VNLmo2WxgQvozB/inrOoxM7N8y7KHNwNYHRFrIuIgcCdwVVGbq4A7IrEYGCCpMsOazMwsp7IMvBHAuoLh9em4o21jZmb2unXPcNlqYlwcQxskzSHZ5QmwW9KqY6hnCLDlGObrCDpr7Z21bui8tbvu9tdZa++sdUPTtY9pbaYsA289MKpgeCTw0jG0ISLmA/NfTzGSaiKi6vUso1w6a+2dtW7ovLW77vbXWWvvrHXDsdee5S7NJcAESeMk9QSuARYUtVkAfCA9W3MmsCMi6jKsyczMciqzHl5E1Eu6AVgEVAC3RcQKSXPT6fOAhUA1sBrYC3w4q3rMzCzfstylSUQsJAm1wnHzCh4H8MksayjwunaJlllnrb2z1g2dt3bX3f46a+2dtW44xtqVZI6ZmVnX5luLmZlZLnTZwJM0SNIvJf0h/T2wmXYDJN0l6RlJT0s6r71rbaKmkmpP21ZI+r2kX7Rnjc3U0mrdkkZJejDd1iskfaYctaa1dNpb35VQ+/vSmpdJelTSGeWos1hrdRe0O0dSg6Sr27O+lpRSu6SLJC1NX9sPt3eNTSnhtdJf0v9Ieiqtu0OcSyHpNkmbJC1vZvrRvz8jokv+AN8Gbkwf3wh8q5l2PwI+mj7uCQzoLLWn0z8H/Bvwi85QN1AJnJU+7gs8C0wuQ60VwHPA+PTv/lRxHSQnVN1Lcr3oTODxcm/jo6j9DcDA9PHsjlB7KXUXtPs1yfH/q8td91Fs8wHASmB0OnxiJ6n7y43vVWAosA3o2QFqvwA4C1jezPSjfn922R4eyW3LfpQ+/hHw9uIGkvqRbNQfAkTEwYjY3m4VNq/V2gEkjQSuBG5tp7pa02rdEVEXEb9LH+8CnqY8d9fpzLe+a7X2iHg0Il5OBxeTXONabqVsc4BPAf8FbGrP4lpRSu3vBe6OiBcAIqIj1F9K3QH0lSSgD0ng1bdvmUeKiEfSWppz1O/Prhx4wyK9pi/9fWITbcYDm4F/SXcL3iqpd3sW2YxSagf4LvAXwOH2KqwVpdYNgKSxwJnA45lXdqTOfOu7o63repJPwuXWat2SRgDvAObRsZSyzScCAyU9JOlJSR9ot+qaV0rdPwBOI7npRy3wmYjoKP9TWnLU789ML0vImqRfAcObmPSVEhfRnaTL/KmIeFzSzSS74v6qjUps1uutXdJbgE0R8aSki9qytlbW+3q3eeNy+pB8iv9sROxsi9qOUpvd+q4MSq5L0sUkgXd+phWVppS6vwt8MSIakg5Hh1FK7d2Bs4FLgeOBxyQtjohnsy6uBaXU/WZgKXAJcDLwS0m/KdP78mgc9fuzUwdeRFzW3DRJGyVVRkRd2s1tavfCemB9RDT2MO4iCbzMtUHtbwTeJqkaOA7oJ+knEfH+jEoG2qRuJPUgCbt/jYi7Myq1NW1267syKKkuSdNIdnfPjoit7VRbS0qpuwq4Mw27IUC1pPqI+O/2KbFZpb5etkTEHmCPpEeAM0iOU5dLKXV/GPhmJAfGVktaC0wCnmifEo/ZUb8/u/IuzQXAB9PHHwR+XtwgIjYA6ySdmo66lOSgc7mVUvuXImJkRIwluW3br7MOuxK0Wnd6nOCHwNMR8Q/tWFuxznzru1ZrlzQauBu4rsw9jEKt1h0R4yJibPq6vgv4RAcIOyjt9fJz4E2Suks6ATiX5Bh1OZVS9wsk//uQNAw4FVjTrlUem6N/f5b7TJysfoDBwAPAH9Lfg9LxJwELC9pNB2qAZcB/k57Z1hlqL2h/ER3jLM1W6ybZtRbp9l6a/lSXqd5qkk/fzwFfScfNBeamj0XyJcbPkRzbqCr3Nj6K2m8FXi7YxjXlrrmUuova3k4HOUuz1NqBL5B8aF5Osru+w9edvj/vT1/jy4H3l7vmtK6fAnXAIZLe3PWv9/3pO62YmVkudOVdmmZmZq9w4JmZWS448MzMLBcceGZmlgsOPDMzywUHnlmZpd8KsFTSckn/mV7D1VzbmyR9vj3rM+sqHHhm5bcvIqZHxBTgIMm1RmbWxhx4Zh3Lb4BTACR9IP2er6ck/bi4oaSPSVqSTv+vxp6hpD9Je4tPpbe3QtLpkp5Ie5LLJE1o12dl1gH4wnOzMpO0OyL6SOpOco/R+4BHSG4N9saI2CJpUERsk3QTsDsiviNpcKT3yJT0t8DGiPi+pFpgVkS8KGlARGyX9H1gcUT8a3qLqYqI2FeWJ2xWJu7hmZXf8ZKWktzi7gWSe41eAtwVEVsAIqKp7wWbIuk3acC9Dzg9Hf+/wO2SPkbyBaAAjwFflvRFYIzDzvKoU39bglkXsS8ipheOSG+y3drul9uBt0fEU5I+RHJPVSJirqRzSb4ceKmk6RHxb5IeT8ctkvTRiPh1Gz8Psw7NPTyzjukB4N2SBgNIGtREm75AXfp1S+9rHCnp5Ih4PCL+GtgCjJI0HlgTEd8jucv8tMyfgVkH4x6eWQcUESsk/R/gYUkNwO+BDxU1+yuSb4t/nuRu8X3T8X+fnpQikuB8iuR7Ht8v6RCwAfha5k/CrIPxSStmZpYL3qVpZma54MAzM7NccOCZmVkuOPDMzCwXHHhmZpYLDjwzM8sFB56ZmeWCA8/MzHLh/wfx/I3K0jNOBgAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"''' The WoEEncoder has the characteristic that return monotonic\n",
" variables, that is, encoded variables which values increase as the target increases'''\n",
"\n",
"# let's explore the monotonic relationship\n",
"plt.figure(figsize=(7,5))\n",
"pd.concat([test_t,y_test], axis=1).groupby(\"pclass\")[\"survived\"].mean().plot()\n",
"#plt.xticks([0,1,2])\n",
"plt.yticks(np.arange(0,1.1,0.1))\n",
"plt.title(\"Relationship between pclass and target\")\n",
"plt.xlabel(\"Pclass\")\n",
"plt.ylabel(\"Mean of target\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Automatically select the variables\n",
"\n",
"This encoder will select all categorical variables to encode, when no variables are specified when calling the encoder."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"WoEEncoder(variables=['sex'])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ratio_enc = WoEEncoder()\n",
"\n",
"# to fit we need to pass the target y\n",
"ratio_enc.fit(train_t, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" cabin | \n",
" embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" 1139 | \n",
" -0.584173 | \n",
" -0.99882 | \n",
" 38.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.8958 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 533 | \n",
" 0.210092 | \n",
" 1.45312 | \n",
" 21.0 | \n",
" 0 | \n",
" 1 | \n",
" 21.0000 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 459 | \n",
" 0.210092 | \n",
" -0.99882 | \n",
" 42.0 | \n",
" 1 | \n",
" 0 | \n",
" 27.0000 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 1150 | \n",
" -0.584173 | \n",
" -0.99882 | \n",
" NaN | \n",
" 0 | \n",
" 0 | \n",
" 14.5000 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
" 393 | \n",
" 0.210092 | \n",
" -0.99882 | \n",
" 25.0 | \n",
" 0 | \n",
" 0 | \n",
" 31.5000 | \n",
" -0.357528 | \n",
" -0.201134 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass sex age sibsp parch fare cabin embarked\n",
"1139 -0.584173 -0.99882 38.0 0 0 7.8958 -0.357528 -0.201134\n",
"533 0.210092 1.45312 21.0 0 1 21.0000 -0.357528 -0.201134\n",
"459 0.210092 -0.99882 42.0 1 0 27.0000 -0.357528 -0.201134\n",
"1150 -0.584173 -0.99882 NaN 0 0 14.5000 -0.357528 -0.201134\n",
"393 0.210092 -0.99882 25.0 0 0 31.5000 -0.357528 -0.201134"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# transform and visualise the data\n",
"\n",
"train_t = ratio_enc.transform(train_t)\n",
"test_t = ratio_enc.transform(test_t)\n",
"\n",
"test_t.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}