{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Table of Contents\n", "\n", "1. [Datasets](#dataset)
\n", "2. [Total Imprisonment Rates](#2)
\n", " 2a. [Visualization Pitfall](#3)
\n", " 2b. [Population Dataset](#4)
\n", " 2c. [Visualization Breakthrough, Observations, and interactive maps](#5)
\n", "3. [State Imprisonment Distributions by Gender](#6)
\n", " 3a. [Static Scatter Plots with Distributions](#7)
\n", " 3b. [Interactive Scatter Plots](#8)
\n", "4. [Summary](#9)
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset: Taiwanese Credit Card Default Prediction \n", "\n", "https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients\n", "\n", "this is where I expect the solutions to my issues will be: https://github.com/amueller/ml-training-advanced/blob/48162f50954a524ecb69b9d85de5233d886f8576/notebooks/extra%20-%20imbalanced%20datasets.ipynb\n", "\n", "https://scipy2017.scipy.org/ehome/220975/493423/" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# General Utility\n", "import os\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "# import pickle\n", "from scipy import interp\n", "import matplotlib.pyplot as plt\n", "# import matplotlib.gridspec as gridspec\n", "from matplotlib.collections import LineCollection\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "from IPython.core.display import display, HTML\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [], "source": [ "# Preprocessing\n", "import missingno as msno\n", "from sklearn.model_selection import train_test_split \n", "from sklearn.preprocessing import RobustScaler\n", "from imblearn.over_sampling import SMOTE \n", "from imblearn.over_sampling import ADASYN\n", "from imblearn.pipeline import Pipeline as imbPipeline\n", "from imblearn.pipeline import make_pipeline as make_imb_pipeline\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.base import clone" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Model Building\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import BaggingClassifier\n", "from sklearn.ensemble import AdaBoostClassifier\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "# Model Validation\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import precision_recall_curve\n", "from sklearn.metrics import recall_score\n", "from sklearn.metrics import log_loss\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.metrics import roc_curve\n", "from sklearn.metrics import auc\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import classification_report\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.model_selection import RepeatedStratifiedKFold\n", "from sklearn.metrics import precision_recall_curve" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['bmh', 'classic', 'dark_background', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark-palette', 'seaborn-dark', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid']\n" ] } ], "source": [ "print(plt.style.available)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# Global\n", "nb_seed = 1234\n", "# plt.style.use('seaborn-darkgrid')\n", "sns.set()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pd.options.display.max_columns = None\n", "display(HTML(\"\"))\n", "pd.set_option('display.float_format',lambda x: '%.4f' % x)\n", "plt.rcParams['figure.figsize'] = 10,10" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [], "source": [ "def roc_plotter(fitted_clf, data_, labels_, ax=None):\n", " \"\"\"\n", " Formats and plots the ROC curve and colors the curve by its \n", " threshold value at each point.\n", " Args:\n", " fitted_clf: A classifier that has already been fit to training data and labels\n", " test_data: An array containing the corresponding testing data \"\"\"\n", " if ax == None:\n", " fig, ax = plt.subplots(figsize=(8, 8))\n", " ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Random', alpha=.8)\n", " y_pred_prob = fitted_clf.predict_proba(data_)[:,1]\n", " fpr, tpr, thresh = roc_curve(labels_, y_pred_prob) \n", " mean_auc = auc(fpr, tpr)\n", " lc = colorline(ax, fpr, tpr, thresh)\n", " ax.plot(fpr, tpr, color='k',\n", " label='Mean ROC (AUC = {:0.2f})'\n", " .format(mean_auc), lw=1, alpha=0.5)\n", " ax.legend(loc='best', fancybox=True, framealpha=0.4)\n", " ax.set_title('ROC Curve', fontsize=14)\n", " ax.set_ylabel('True Positive Rate', fontsize=14)\n", " ax.set_xlabel('False Positive Rate', fontsize=14)\n", " ax.set_xlim([0.0,1.0])\n", " ax.set_ylim([0.0,1.0])\n", " divider = make_axes_locatable(ax)\n", " fig = ax.get_figure()\n", " cax = divider.append_axes('right', size='5%', pad=0.05)\n", " fig.colorbar(lc, cax=cax, orientation='vertical')\n", " return ax" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Used the example at this address to encode threshold to color on the ROC curve # Used t \n", "# https://nbviewer.jupyter.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb\n", "def make_segments(x, y):\n", " '''\n", " Create list of line segments from x and y coordinates, in the correct format for LineCollection:\n", " an array of the form numlines x (points per line) x 2 (x and y) array\n", " '''\n", " points = np.array([x, y]).T.reshape(-1, 1, 2)\n", " segments = np.concatenate([points[:-1], points[1:]], axis=1)\n", " return segments\n", "\n", "\n", "def colorline(ax, x, y, z=None, cmap='coolwarm', \n", " norm=plt.Normalize(0.0, 1.0), linewidth=10, alpha=0.9):\n", " '''\n", " Plot a colored line with coordinates x and y\n", " Optionally specify colors in the array z\n", " Optionally specify a colormap, a norm function and a line width\n", " ''' \n", " # Default colors equally spaced on [0,1]:\n", " if z is None:\n", " z = np.linspace(0.0, 1.0, len(x))\n", " # Special case if a single number:\n", " if not hasattr(z, \"__iter__\"): # to check for numerical input -- this is a hack\n", " z = np.array([z])\n", " z = np.asarray(z)\n", " segments = make_segments(x, y)\n", " lc = LineCollection(segments, array=z, cmap=cmap, norm=norm, linewidth=linewidth, alpha=alpha)\n", " ax.add_collection(lc)\n", " return lc" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "g_cmap = sns.light_palette('seagreen', n_colors=10, as_cmap=True)\n", "def confusion_mat_plotter(fitted_clf, X_train_, X_test_, y_train_, y_test_,\n", " class_labels, cmap=g_cmap, ax=None):\n", " \"\"\"\n", " Generates plots of the confusion matrices for a classifier's performance\n", " with both the training data (to check for overfitting) and the testing data.\n", " Args:\n", " fitted_clf: A classifier that has already been fit to training data and labels\n", " X_train, ... Arrays containing the training/testing data/labels.\"\"\"\n", " if ax == None:\n", " fig, ax = plt.subplots(ncols=2, figsize=(13, 6.5))\n", " \n", " y_pred_train = fitted_clf.predict(X_train_)\n", " y_pred_test = fitted_clf.predict(X_test_)\n", " cfn_train = confusion_matrix(y_train_, y_pred_train)\n", " cfn_test = confusion_matrix(y_test_, y_pred_test)\n", " \n", " sns.heatmap(cfn_train.T, square=True, annot=True, fmt='d', cbar=True, \n", " xticklabels=class_labels, yticklabels=class_labels, ax=ax[0], cmap=g_cmap,\n", " annot_kws={\"size\": 16})\n", " ax[0].set_xlabel('True Class', fontsize=14)\n", " ax[0].set_ylabel('Predicted Class', fontsize=14)\n", " ax[0].set_title('Confusion Matrix (training data)', fontsize=14)\n", " sns.heatmap(cfn_test.T, square=True, annot=True, fmt='d', cbar=True, \n", " xticklabels=class_labels, yticklabels=class_labels, ax=ax[1], cmap=g_cmap,\n", " annot_kws={\"size\": 16})\n", " ax[1].set_xlabel('True Class', fontsize=14)\n", " ax[1].set_ylabel('Predicted Class', fontsize=14)\n", " ax[1].set_title('Confusion Matrix (testing data)', fontsize=14)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# avg_cfn = np.mean(cfns, axis=0).astype(int)\n", "g_cmap = sns.light_palette('seagreen', n_colors=10, as_cmap=True)\n", "def confusion_mat_plotter_single(cfn_matrix, class_labels, cmap=g_cmap, ax=None):\n", " if ax == None:\n", " fig, ax = plt.subplots(figsize=(6, 6))\n", " sns.heatmap(cfn_matrix.T, square=True, annot=True, fmt='d', cbar=True, \n", " xticklabels=class_labels, yticklabels=class_labels, ax=ax, cmap=g_cmap,\n", " annot_kws={\"size\": 16})\n", " ax.set_xlabel('True Class', fontsize=14)\n", " ax.set_ylabel('Predicted Class', fontsize=14)\n", " ax.set_title('Confusion Matrix', fontsize=14)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ID\n", "1 1\n", "2 1\n", "3 0\n", "4 0\n", "5 0\n", "Name: default_payment_next_month, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LIMIT_BALSEXEDUCATIONMARRIAGEAGEPAY_0PAY_2PAY_3PAY_4PAY_5PAY_6BILL_AMT1BILL_AMT2BILL_AMT3BILL_AMT4BILL_AMT5BILL_AMT6PAY_AMT1PAY_AMT2PAY_AMT3PAY_AMT4PAY_AMT5PAY_AMT6
ID
1200002212422-1-1-2-23913310268900006890000
212000022226-120002268217252682327234553261010001000100002000
39000022234000000292391402713559143311494815549151815001000100010005000
45000022137000000469904823349291283142895929547200020191200110010691000
55000012157-10-10008617567035835209401914619131200036681100009000689679
\n", "
" ], "text/plain": [ " LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_0 PAY_2 PAY_3 PAY_4 \\\n", "ID \n", "1 20000 2 2 1 24 2 2 -1 -1 \n", "2 120000 2 2 2 26 -1 2 0 0 \n", "3 90000 2 2 2 34 0 0 0 0 \n", "4 50000 2 2 1 37 0 0 0 0 \n", "5 50000 1 2 1 57 -1 0 -1 0 \n", "\n", " PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 \\\n", "ID \n", "1 -2 -2 3913 3102 689 0 0 \n", "2 0 2 2682 1725 2682 3272 3455 \n", "3 0 0 29239 14027 13559 14331 14948 \n", "4 0 0 46990 48233 49291 28314 28959 \n", "5 0 0 8617 5670 35835 20940 19146 \n", "\n", " BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 \n", "ID \n", "1 0 0 689 0 0 0 0 \n", "2 3261 0 1000 1000 1000 0 2000 \n", "3 15549 1518 1500 1000 1000 1000 5000 \n", "4 29547 2000 2019 1200 1100 1069 1000 \n", "5 19131 2000 36681 10000 9000 689 679 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "CSV_PATH = os.path.join('data', 'taiwan_cc', 'default_of_credit_card_clients.csv')\n", "cc_raw = pd.read_csv(CSV_PATH, encoding='latin1', index_col='ID')\n", "cc_target = cc_raw['default_payment_next_month'] \n", "display(cc_target.head())\n", "cc_data = cc_raw.drop('default_payment_next_month', axis=1)\n", "display(cc_data.head())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 30000 entries, 1 to 30000\n", "Data columns (total 23 columns):\n", "LIMIT_BAL 30000 non-null int64\n", "SEX 30000 non-null int64\n", "EDUCATION 30000 non-null int64\n", "MARRIAGE 30000 non-null int64\n", "AGE 30000 non-null int64\n", "PAY_0 30000 non-null int64\n", "PAY_2 30000 non-null int64\n", "PAY_3 30000 non-null int64\n", "PAY_4 30000 non-null int64\n", "PAY_5 30000 non-null int64\n", "PAY_6 30000 non-null int64\n", "BILL_AMT1 30000 non-null int64\n", "BILL_AMT2 30000 non-null int64\n", "BILL_AMT3 30000 non-null int64\n", "BILL_AMT4 30000 non-null int64\n", "BILL_AMT5 30000 non-null int64\n", "BILL_AMT6 30000 non-null int64\n", "PAY_AMT1 30000 non-null int64\n", "PAY_AMT2 30000 non-null int64\n", "PAY_AMT3 30000 non-null int64\n", "PAY_AMT4 30000 non-null int64\n", "PAY_AMT5 30000 non-null int64\n", "PAY_AMT6 30000 non-null int64\n", "dtypes: int64(23)\n", "memory usage: 5.5 MB\n" ] } ], "source": [ "cc_data.info()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',\n", " 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',\n", " 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',\n", " 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',\n", " 'default_payment_next_month'],\n", " dtype='object')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cc_raw.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**From the plot below, we see that there is a significant class imbalance between the populations that did and did not default. To counteract this imbalance, I'll explore over-sampling and under-sampling strategies to balance the class counts.**\n", "\n", "**We also see an imbalance between the number of men and women in the study, but as gender isn't a target variable, I won't balance for gender. **" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tmp = cc_raw[['default_payment_next_month','SEX']]\n", "sex_dict = {1: 'Male', 2:'Female'}\n", "tmp = tmp.replace({'SEX': sex_dict})\n", "\n", "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(figsize=(6,6))\n", " sns.countplot(x='default_payment_next_month', data=tmp, ax=ax)\n", " ax.set_ylabel('Count', fontsize=14)\n", " ax.set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax.set_xlabel('Default-on-next-payment-cycle status', fontsize=14)\n", " ax.set_title('Defaulted on CC Payment', fontsize=16)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAGkCAYAAAB+TFE1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xl4TPf/Pv57kkgkM9ksVVWEkIVYsiCVJkijQlC7BAlt2tqiKpZIiRBqF1Tsu1hCqt61tVVLo8U3m5aSEk3UEopIyMwg25zfH37OpyOLkMicyP24Ltdlznmd13meM5O55+wyQRAEEBER6ZiergsgIiICGEhERCQRDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQKJy8/f3h52dnfivZcuWcHV1xWeffYYzZ868Up8bN27Ee++9B0dHRxw+fLhC6szIyICdnR2OHDkCAFAqlZg0aRJSUlLK3benpyfmzJlT7n5e1f3797Fo0SJ4e3ujbdu26Ny5MyZOnIi///672PYJCQkYN24c3N3d4ejoiJ49e2LNmjV4/PhxqfOJiorSeq/t7OzQqlUreHl5Yd68eS+cvio5duwYZsyYoesyqhUDXRdAbwZnZ2eEhIQAAPLz83Hv3j3ExMTgk08+wZIlS9CjR48y96VSqbB48WL07NkTfn5+aNq06Wup+a+//sLBgwfx8ccfv5b+K8ulS5fw2WefwcTEBCNGjEDTpk1x//59bN26FQMHDsSmTZvg6Ogotl+3bh2WLl0KLy8vhIWFwdzcHBcuXMC6desQFxeHzZs3o2bNmiXOz9jYGFu3bhVf5+XlITk5GcuXL8e///6L5cuXv9blrSxbtmyBXC7XdRnVCgOJKoSpqSlat26tNczb2xv+/v6YOXMm3N3dYWpqWqa+Hj58CEEQ4OXlBScnp9dRLgBAEATIZLLX1n9lyMvLw4QJE2BhYYFdu3ZBoVCI47y8vDB48GCEhobixx9/BPB0y2jp0qUYOXIkvvzyS7Fthw4d4OzsDD8/P2zevBmjR48ucZ4ymazIe+3i4oKbN29i7969uHfvHurWrVvBS0rVAXfZ0Ws1duxY5OTkiF+IAJCVlYUpU6agQ4cOcHR0xOjRo3Hz5k0AwL59+/DBBx9AJpNh/Pjx+OCDDwA83WqaM2cOPD094eDggPfeew9Tp06FSqUS+7Wzs8PmzZu15j9mzBgEBAQUqSshIQHDhw8HAAwYMAChoaHiuG3btqFbt25o1aoVevbsWWSXYWZmJr744gu4uLigU6dO+N///lemdfHzzz9jwIABcHR0ROfOnbF8+XIUFBSI4z09PbFhwwbMnDlTDIipU6fi0aNHJfZ54sQJXL16FV9++aVWGAGAoaEhpkyZAh8fH3E9bdq0CbVr18aYMWOK9NW2bVuMHz8ejRo1KtPyPK9FixYQBAG3b98G8OL3bMGCBejQoYPWOgCATz75BOPHjwfw9D3du3cvvvjiCzg6OsLd3R27du3CnTt3MHLkSLRt2xbdunXDyZMntfo4deoUBg0ahDZt2qBTp0745ptvoNFoxPEvWtf+/v5ITEzEL7/8Ant7e9y6deuV1gm9HAYSvVbt27eHvr4+zp49CwDIzc2Fv78/fv/9d8yYMQOLFi1CZmYmhg0bBqVSic6dOyMqKgqCIGDixIlYuXIlAGDixIk4ceIEJk2ahM2bNyMwMBAHDx4Ux5ekpC2gli1biscH5s2bJ35BR0VFYeHChejZsyfWrl0LNzc3TJw4ET/99BMAQKPR4JNPPkFKSgrmzJmDkJAQrFixAnfv3i21jt27d2PcuHFo06YNVq5cCX9/f2zatAlfffWVVru1a9dCqVRi6dKlmDBhAg4ePIjVq1eX2O+pU6dgYGAANze3Yse/9957GDdunBhWp0+fRocOHWBoaFhs+1GjRsHHx6fUZSnJP//8AwB49913Abz4PevTpw9ycnLw22+/iX1kZmYiPj4effr0EYfNnz8fVlZWWLNmDRwdHTF79mx8/PHHcHZ2xvLly2FqaorJkycjNzcXAHDmzBl8/vnnaNiwIVauXIlPP/0Umzdvxtdff61Vb2nreubMmWjRogWcnZ2xe/dubvFVEu6yo9dKT08PFhYWuH//PoCnW0DXrl3DwYMHYWVlBeDpl2aXLl0QHR2NMWPGwN7eHgDQuHFj2NnZIS8vDwUFBYiIiBC/eNu1a4ezZ88iMTHxleqSy+Vo1qwZAKB58+Zo2LAhlEol1q9fj88//xzjxo0DAHTs2BEqlQpLlixBt27dcOLECVy5cgW7d+8Wd1tZWVmhX79+Jc5LEAQsX74cPXv2RFhYmNivQqHAzJkz8emnn8LGxgYA8Pbbb2PJkiVim/j4eMTFxWHixInF9n3nzh1YWFiUesznmaysLOTl5aFBgwZlXEslKywsFP//8OFD/Prrr9izZw+8vLxQq1atMr1ntra2sLW1xYEDB9C5c2cAwMGDB2Fqagp3d3exfycnJwQHBwMA3nrrLRw5cgROTk74/PPPATzdEvzkk09w9epV2NnZYdmyZXB0dBTX4/vvvw9zc3OEhoYiMDAQ77zzDoDS17W1tTXkcjnkcnmR3ZP0+nALiSpVQkICGjdujIYNG6KwsBCFhYUwMjKCs7NziWfkGRoaYuPGjXBzc0NGRgZOnTqFLVu2IC0tDXl5eRVW2x9//IG8vDx06tRJrK2wsBDu7u64ceMGMjIy8Pvvv8PMzEzrS6pFixalfsmnpaUhKysL3t7eWsN9fHwgCIJWqD7/5VevXr1Sz1zT09NDWe+PrK+vDwBau65exaNHj9CyZUvxX8eOHfHVV1/hvffew6xZswCU/T3r06cPjh8/jidPngAADhw4AB8fHxgY/N9v5VatWon/r1OnDoCnW7jPWFpaQhAEKJVKPHnyBH/++WeR9/D9999HYWEh4uPjxeledl3T68ctJHqt8vLy8PDhQ7z99tsAgAcPHiAtLU3rCwV4umvt2RZTcY4dO4b58+fj5s2bsLS0hIODA2rWrFnuL9f/evDgAQRBgK+vb5EveT09Pdy7dw85OTmwtLQsMm1pu3QePnwImUyG2rVraw1XKBQwMjKCWq0WhxkbGxeZb2nL2KBBA5w8eRJPnjwpdispPz8fSqUStWrVgrm5OUxMTMRjPMXJysqCmZmZViA8z9jYGDt27BBPCjEyMkL9+vVhYmKi1a4s71mvXr2wePFiHD9+HC1atMDFixcRHh6u1U9xZ7o9v56eefjwITQaDSIjI8Wtn2dkMhnu3btXYh8vWtf0+jGQ6LVKTExEQUGBeLacQqGAvb09vv766yJf+iUd17h27Rq+/PJL9OvXD2PHjsVbb70FAPjyyy+Rlpam1fb5L5TSTgh43rOzAFeuXIl69eoVGW9lZQULCwtkZWUVGZednV1ivxYWFhAEQdxt+YxSqURubm6xAVdWHTt2xI4dO3Dq1CnxBJD/Onr0KIKDgxEdHQ0XFxe4ubkhPj4eBQUFxYbO1KlTce3aNfGYWXFkMhlatGhRal1lfc9q164NNzc3/Pjjj7hx4wYaN25crl1kz46VjR49utj18awOkibusqPXat26dbCwsEDXrl0BPL1e6ebNm3jnnXe0dvts2rQJJ06cKLaPixcvoqCgAJ999pn4hfLo0SMkJydrtVMoFFonFzx+/LjUi16f393VunVrGBgY4P79+1q1Xb58GVFRUQCenh6tVCq1dv1cvXoVN27cKHE+TZo0gaWlJX744Qet4YcOHYJMJivXqe0eHh6wsrLCsmXLtM44BJ4u/+rVq9GgQQM4OzsDAIYPH47MzEysWrWqSF/x8fE4deoUevXq9cr1PFPW9wwAPvroI/z666/46aef8NFHH5VrvnK5HHZ2drh+/brWe6ivr48lS5aUunX4vGe7OKnycAuJKoRSqcS5c+cAAAUFBbhz5w727NmD5ORkLFmyRNzt0r9/f0RHR+Pjjz/G559/DgsLC8TExODo0aMlfhm1aNECenp6WLRoEfz8/JCVlYXNmzfj/v37MDIyEtt5eHjgu+++g729PWrVqoWNGzdCT6/k31xmZmYAnp46bWxsjKZNm8Lf3x/z58/HgwcP0Lp1a/z1119YtmwZvLy8IJfL4ebmBhcXF0yaNAmTJk2CsbExli9fXuLWHfA0+IKCgjBnzhyYm5vjgw8+wKVLlxAVFYXu3bvD2tr6pdf3MwYGBpg3bx4+++wzDBgwAMOHD0fTpk1x8+ZNbNmyBTdv3sT27dvFsw1dXFwQGBiI1atXIy0tDb169YKJiQkSExOxZcsWODk5YeTIka9czzNlfc+Ap9dLzZw5E3/99Re++eabcs/7iy++QFBQEBQKBbp27YqsrCwsX74c+vr6sLW1LXM/ZmZmuHTpEhISEtCmTZsidVPFYyBRhTh79ix8fX0BPP2SrFu3Llq3bo1du3ZpHZRWKBTYsWMHFi5ciJkzZyIvLw82NjZYtWoVPDw8xHb/PV3bysoKCxcuRFRUFEaOHIk6deqgU6dOGDBgACIiIsQLMUNDQ5GXl4dZs2ZBLpdj6NCh4nGJ4vpt3rw5+vTpg/Xr1+PixYtYvXo1pkyZgjp16mDPnj1YsWIF6tatixEjRmDs2LHidKtXr8bcuXMxd+5cGBgY4JNPPsHPP/9c6voZOnQojI2NsWnTJnz77beoW7cuAgMDtS5ALekU9RddvNu2bVvs2bMHGzduxMaNG5GZmYlatWrBxcUFK1asKHJsbvLkyXBwcEBMTAzCw8Px6NEjNGzYEGPHjoW/vz9q1KhR6vzKcjFxWd8z4Omu2vbt2yM7O1s8Zfy/83p+fsXN/7/DPD09sWrVKqxcuRL79u2DQqEQT99/FiplWYYRI0YgODgYn332GbZu3Yq2bdu+cBoqHxkfYU5EupSbmwsPDw9MmTIF/fv313U5pEOVfgzp3Llz8Pf3B/D0jJ4xY8bA398fQ4YMEffD79mzB/3794evry9++eUXAE8PGgcGBmLYsGEIDg4WL4Irri0RSV9OTg6ioqLw6aefokaNGujZs6euSyIdq9Rddhs2bMD3338vHk9YtGgRevfuDW9vb8THxyM9PR3GxsaIjo7Gvn378OTJE/j5+cHNzQ0rV65Er1690KdPH6xbtw4xMTHw8fEptu2LdjkQke4ZGRlh586dqFmzJhYvXsxjNFS5W0iNGzfWutXL2bNn8e+//+Ljjz/GwYMH0aFDB5w/fx7Ozs4wMDCAQqGAlZUVLl26hLNnz4pXb3t4eOD06dPFtr18+XJlLhIRvSIjIyOcPn0ax48fh6urq67LIQmo1EDq2rWr1qmUGRkZsLCwwObNm/H2229j3bp1UKlUWneFlsvlUKlUUKvV4nC5XA6lUqk1DABMTEygVCorb4GIiKjC6PQsOwsLC3Tp0gXA0zNjli5dilatWmldT6FSqWBmZiYGU61ataBWq2FmZgaFQqHV9tnwFynuWggiIqocz66Le55OA8nZ2RlxcXHo3bs3EhMT0bx5c7Rq1QpLly5FXl4ecnNzkZ6ejubNm8PJyQlxcXHo27cvTp48CRcXlxLblnXeRERUuUrbINBpIIWEhGD69OnYtWsXTE1NsWTJEpiamopn3QmCgODgYBgaGmL06NEICQlBbGwsLC0tsWTJEtSsWbPYtkREVPVUy+uQkpOTuYVERKQDpX3/8l52REQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwMdPEBFVIRqNBkuWLMGlS5cAAPXr18eMGTPQrl07tGnTRnzopJWVFWbOnAlfX19MnjwZ7du3R35+PoYPH47w8PCXejZUZWEgERFVIb/++iseP36MjRs3AgAWL16M7777Dg0aNMC2bduKtJ8/fz7Gjx8vPuPL29tbkmEEcJcdEVGVUq9ePSQnJ+P48eN49OgRJkyYgMGDB6OkS0qtra0xYMAABAUFIT09HQEBAZVccdkxkIiIqhA7OzuEhoZi79696NKlC8aOHYt79+7h9u3bCAgIgL+/PwICArBjxw5xmj59+iApKQm9evXSYeUvxjs1EBFVIampqahduzZq164NjUaDdevW4fr16/jjjz9w+PDhYqcZP348HB0dsWvXLuzZswfm5uaVXPX/4Z0aiIjeEKdPn0ZUVBQAQE9PDzY2NqU+lDQ2NhZGRkYYMWIEAgICMHPmzEqq9OXxpAYioipk6NCh+Prrr9GnTx8YGxujdu3aiIiIwAcffCAeHxIEAYaGhggPD8eWLVuwe/ducdqjR4/i8OHD6NGjhy4Xo1jcZUdERJWGu+yIiEjyGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBp30TEVWwwsJCpKWlVWif1tbW0NfXr9A+pYaBRERUwdLS0jBy2kbIzetWSH/qh/ew9utA2NjYlNouIyMDvXv3RsuWLSEIAmQyGVxdXTFmzJgKqQMA/P39ERERgSZNmlRYn88wkIiIXgO5eV2Y1apf6fNt3rx5sXf9rgoYSEREb5Di7nUQGRmJpKQkaDQafPzxx+jWrRv8/f1hZ2eHK1euwMTEBC4uLvjtt9+gVCqxadMmyGQyTJ8+HUqlEtnZ2Rg0aBB8fX3FPlUqFb766is8fPgQADBt2rQXbsG9CAOJiOgN8vfffyMgIEDcZTdw4EDcvHkTO3fuRF5eHgYNGoSOHTsCANq2bYtp06bh008/hbGxMTZt2oSpU6ciISEB9evXR8+ePeHl5YW7d+/C399fK5DWrFmDjh07wtfXF9euXUNoaCh27txZrtoZSEREb5Dnd9lt2LABFy9eFEOqsLAQGRkZAAB7e3sAgJmZGZo1ayb+Pzc3F3Xq1MHWrVtx5MgRyOVyFBQUaM0nNTUV8fHxOHz4MARBgFKpLHftDCQiojfI87vsmjZtig4dOiAiIgKCIGDVqlVo2LAhAEAmk5XYz6ZNm+Do6AhfX1/Ex8cjLi5Oa7y1tTUcHBzg4+ODrKwsfPvtt+WunYFERPQaqB/e00lfz4eMp6cnEhISMHToUDx+/BheXl6Qy+Va7Yr7v6enJ8LDw3HgwAGYm5ujRo0ayMvLE8ePHDkS06ZNQ0xMDNRqNcaNG1eeRXw6b97tm4ioYvE6pJKV9v3LLSQiogqmr69f7jPOqiPeOoiIiCSBgURERJLAQCIiIkngMaRX9DoOWr4p3pSDr0RUuRhIr6iib574pijrTSCJ3mQ8y+7VMJDKQVc3TyQiaUtLS8PYtVOgqGNWIf2pMnOwcuTCF/7QS0hIQEBAAJYuXYru3buLw3v16gUHBwfMmzevyDT79u1Deno6Jk6cWCG1lgcDiYjoNVDUMYP525aVPt+mTZvi0KFDYiClpqbiyZMnpU5T2h0bKhMDiYjoDWJnZ4d//vkHKpUKCoUC+/fvR+/evXHr1i3s2LEDR44cQWFhIRQKBaKiorSm3b59Ow4ePAiZTAYfHx8MGzasUmuv9LPszp07B39/f61hBw4c0LqL7J49e9C/f3/4+vril19+AQBkZ2cjMDAQw4YNQ3BwMHJzc0tsS0RUnX344Yf4+eefAQDnz5+Ho6MjNBoNHjx4gK1bt2L79u3Iz8/Hn3/+KU6TlpaGw4cPY9euXdi5cyd+/vln/PPPP5Vad6VuIW3YsAHff/895HK5OOyvv/7C3r17xdeZmZmIjo7Gvn378OTJE/j5+cHNzQ0rV65Er1690KdPH6xbtw4xMTHw8fEptm2NGjUqc7GIiCRDJpOhZ8+eCA8Px7vvvot27dpBEATo6emhRo0aCA4OhrGxMe7evat1B+/U1FTcunULw4cPF+/efe3aNVhZWVVa7ZW6hdS4cWOsXLlSfJ2dnY3IyEhMmzZNHHb+/Hk4OzvDwMAACoUCVlZWuHTpEs6ePQt3d3cAgIeHB06fPl1s28uXL1fmIhERSc67776Lx48fIzo6Gr179wbw9IF6x44dQ2RkJMLCwlBYWKh1Z/AmTZqIj66Ijo5Gnz59YGtrW6l1V+oWUteuXcXncGg0GkyfPh2hoaEwNDQU26hUKpiamoqv5XI5VCoV1Gq1OFwul0OpVGoNAwATE5MKeSYHEVF5qTJzdNpXjx49sH//fjRu3BjXr1+HgYEBjI2N0b9/fxgaGuKtt97C3bt3xfZ2dnZwdXWFn58f8vLy0KZNG9SrV6/ClqEsdHZSw8WLF3H9+nXMnDkTubm5SEtLw7x589ChQweoVCqxnUqlgpmZmRhMtWrVglqthpmZGRQKhVbbZ8PLIjk5uVz1X7t2rVzTv8kuXLjAHwZUrRUWFmK0W0DFdWgLPHjw4IXfW/r6+hgyZAiSk5Nhb28Pe3t7JCcnw8TEpNQTFBo1aoTk5GS0bdsWbdu2FYefPXu2whahLHQSSIIgoFWrVjhw4AAAICMjAxMnTkRoaCgyMzOxbNky5OXlITc3F+np6WjevDmcnJwQFxeHvn374uTJk3BxcUGrVq2wdOnSIm3LoryPnzA1NQUOp5erjzeVg4MDL4ylaq99+/a6LkGSSgtVnQRSaee816lTB/7+/hgyZAgEQUBwcDAMDQ0xevRohISEIDY2FpaWlliyZAlq1qxZbFsiIqp6+IC+V5Samorghft5p4bn5GTdRuSU3txCIqJilfb9y7t9ExGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpKESg+kc+fOwd/fHwDw119/YejQoQgICMCnn36KrKwsAMCePXvQv39/+Pr64pdffgEAZGdnIzAwEMOGDUNwcDByc3NLbEtERFWPQWXObMOGDfj+++8hl8sBAHPnzsWMGTNga2uL3bt3Y/369QgMDER0dDT27duHJ0+ewM/PD25ubli5ciV69eqFPn36YN26dYiJiYGPj0+xbWvUqFGZi0VERBWgUreQGjdujJUrV4qvly5dCltbWwBAQUEBDA0Ncf78eTg7O8PAwAAKhQJWVla4dOkSzp49C3d3dwCAh4cHTp8+XWzby5cvV+YiERFRBanUQOratSv09fXF13Xq1AEAnD17Fjt37sSIESOgUqlgamoqtpHL5VCpVFCr1eJwuVwOpVKpNQwATExMoFQqK2lpiIioIlXqLrviHD58GGvXrsW6detgaWkJhUIBlUoljlepVDAzMxODqVatWlCr1TAzMyvS9tnwskhOTi5X3deuXSvX9G+yCxcu8IcBEb00nQbS999/jz179iA6OloMktatW2PZsmXIy8tDbm4u0tPT0bx5czg5OSEuLg59+/bFyZMn4eLiglatWmHp0qVF2paFs7NzuWo3NTUFDqeXq483lYODA2xsbHRdBhFJUGkbAzoLJI1Gg7lz5+Kdd97B2LFjIZPJ0L59ewQFBcHf3x9DhgyBIAgIDg6GoaEhRo8ejZCQEMTGxsLS0hJLlixBzZo1i21LRERVj0wQBEHXRVS25OTkcm8hpaamInjhfpjVql9BVb0ZcrJuI3JKb24hEVGxSvv+5YWxREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJJQ6YF07tw5+Pv7AwCuX7+OIUOGYNiwYZg1a5bYJioqCgMHDoSfnx/Onz//0m2JiKjqqdRA2rBhA6ZPn478/HwAwLx58xAcHIzt27dDo9Hg6NGjSElJQVJSEmJjYxEZGYmIiIiXbktERFVPpQZS48aNsXLlSvH1xYsX4eLiAgDw8PDA6dOnkZycDDc3NwBA/fr1odFokJWVVea22dnZlblIRERUQSo1kLp27Qp9fX3xtSAI4v/lcjmUSiXUajVMTU21hqtUKq1+SmprYmJSpC0REVUNBrqcuZ7e/+WhWq2Gubk5FAqFVqg8C52XaVsWycnJ5ar92rVr5Zr+TXbhwgUolUpdl0FEVYxOA6lFixZITExEu3btcPLkSbi6uqJRo0ZYvHgxAgMDcfv2bWg0GlhaWsLe3v6FbQVBgIWFRZnm7ezsXK7aTU1NgcPp5erjTeXg4AAbGxtdl0FEElTaxoBOAykkJARhYWHIz8+HtbU1vL29IZPJ4OzsjMGDB0MQBISHh5e57YwZM3S5OEREVA4y4b8HcqqJ5OTkcm8hpaamInjhfpjVql9BVb0ZcrJuI3JKb24hEVGxSvv+5YWxREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBJ0+sRYevMIGg2uXr2q6zIkydraGvr6+roug0iyGEhUodTK+1h8ZDUUdcx0XYqkqDJzsHLkQj5Jl6gUDCSqcIo6ZjB/21LXZRBRFcNjSEREJAkMJCIikgQGEhERSQIDiYiIJKFCAikrK6siuiEiomqszIFkb29fbPDcvHkTH3zwQYUWRURE1U+pp33v27cP3377LQBAEASMHj0aBgbak9y7dw9vvfXW66uQiIiqhVIDqVu3bsjIyAAAJCcnw8nJCXK5XKuNXC7Hhx9++PoqJCKiaqHUQDIxMUFQUBAAoEGDBujRoweMjIwqpTAiIqpeynynhr59+yItLQ0XLlxAQUEBBEHQGj9gwIAKL46IiKqPMgfSunXrEBkZCXNz8yK77WQyGQOJiIjKpcyBFBMTgwkTJmDkyJGvsx4iIqqOeypYAAAgAElEQVSmynzad05ODrp16/Y6ayEiomqszIH00UcfISYmpsixIyIioopQ5l122dnZOHLkCA4cOIAGDRqgRo0aWuN37NhR4cUREVH1UeZAatq0KUaNGvU6ayEiomqszIH07HokIiKi16HMgTRlypRSxy9cuPCVCigoKEBISAgyMjJgYGCA2bNnQ19fH1OnToWenh6aN2+O8PBwAEBUVBTi4uJgYGCA0NBQtG7dGtevXy+2LRERVS1lPqlBX19f658gCLh+/Tp++uknvP32269cQFxcHDQaDWJiYjBmzBgsXboU8+bNQ3BwMLZv3w6NRoOjR48iJSUFSUlJiI2NRWRkJCIiIgCg2LZERFT1lHkLad68ecUO37x5M1JSUl65ACsrKxQWFkIQBCiVShgYGODcuXNwcXEBAHh4eODUqVNo0qQJ3NzcAAD169eHRqNBVlYWLl68qNX29OnT8PLyeuV6iIhIN8ocSCXp2rUrvvnmm1eeXi6X4+bNm/D29saDBw+wZs0aJCUlaY1XKpVQq9WwsLDQGq5SqYr0pVQqX7kWIiLSnTIHkkajKTJMpVJh69atsLS0fOUCtmzZAnd3d0yYMAF37tyBv78/8vPzxfFqtRrm5uZQKBRaAaRWq2Fqago9PT2tYWZmZmWab3Jy8ivXDADXrl0r1/RU/Vy4cIE/mIhKUeZAatGiBWQyWZHhRkZGmDNnzisXYG5uLj5jydTUFAUFBWjRogUSEhLQvn17nDx5Eq6urmjUqBEWL16MwMBA3L59GxqNBpaWlrC3t0diYiLatWsnti0LZ2fnV675Wa04nF6uPqh6cXBwgI2Nja7LINKp0jYGyhxI27Zt03otk8lQo0YNNGvWDAqF4pWLGz58OL766isMHToUBQUFmDRpElq2bInp06cjPz8f1tbW8Pb2hkwmg7OzMwYPHgxBEMSz6UJCQhAWFqbVloiIqp4yB1L79u0BAGlpaUhLS0NhYSGaNGlSrjACnj5zadmyZUWGR0dHFxkWFBRU5HooKyurYtsSEVHVUuZAevjwIUJCQvDLL7/A3NwchYWFUKvVcHFxwapVq57uwiIiInpFZb4Oafbs2bh37x5++OEHxMfHIykpCQcOHMDjx49LPCWciIiorMocSCdOnMCsWbPQpEkTcVizZs0wY8YMHDt27LUUR0RE1UeZA6lmzZrFDpfJZCgsLKywgoiIqHoqcyB5enoiIiICV69eFYelp6dj9uzZ6NKly2spjoiIqo8yn9QwefJkjB07Ft27dxfPrFOr1ejUqRPCwsJeW4FERFQ9lCmQzp8/D1tbW0RHR+Py5ctIS0tDXl4e3n33XfE+ckREROVR6i67goICTJ48GYMHD8a5c+cAALa2tujRowfi4uLg7++P6dOn8xgSERGVW6mBtGnTJsTHx2Pbtm3ihbHPLF26FJs3b8axY8d4YSoREZVbqYG0b98+hIWFoV27dsWOd3V1xZQpU/Dtt9++luKIiKj6KDWQbt++jRYtWpTagYuLC27evFmhRRERUfVTaiDVqVPnhWFz69atcj1+goiICHhBIHXt2hUrVqzQej7Rf+Xn5yMqKgoeHh6vpTgiIqo+Sj3te8yYMRgwYAD69esHf39/ODg4wNTUFA8fPsT58+exY8cO5ObmIjIysrLqJSKiN1SpgWRqaoo9e/Zg0aJFmD9/Ph4/fgwAEAQB5ubm6NmzJ8aOHYtatWpVSrFERPTmeuGFsebm5pgzZw5mzJiBGzduICcnB5aWlmjUqJHW48OJiIjKo8y3DjI0NIS1tfXrrIWIiKoxbuIQEZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQa6LgAA1q1bh+PHjyM/Px9DhgxBu3btMHXqVOjp6aF58+YIDw8HAERFRSEuLg4GBgYIDQ1F69atcf369WLbEhFR1aLzLaSEhAT8/vvviImJQXR0NG7fvo158+YhODgY27dvh0ajwdGjR5GSkoKkpCTExsYiMjISERERAFBsWyIiqnp0Hki//fYbbGxsMGbMGIwePRqdO3dGSkoKXFxcAAAeHh44ffo0kpOT4ebmBgCoX78+NBoNsrKycPHiRa22Z86c0dmyEBHRq9P5Lrvs7GzcunULa9euxY0bNzB69GhoNBpxvFwuh1KphFqthoWFhdZwlUql1deztkREVPXoPJAsLCxgbW0NAwMDNGnSBEZGRrhz5444Xq1Ww9zcHAqFQiuA1Go1TE1NoaenpzXMzMysTPNNTk4uV93Xrl0r1/RU/Vy4cIE/mIhKofNAcnZ2RnR0NEaMGIE7d+7g8ePHcHV1RUJCAtq3b4+TJ0/C1dUVjRo1wuLFixEYGIjbt29Do9HA0tIS9vb2SExMRLt27cS2ZZ1veZiamgKH08vVB1UvDg4OsLGx0XUZRDpV2saAzgOpc+fOSEpKwoABAyAIAmbOnIkGDRpg+vTpyM/Ph7W1Nby9vSGTyeDs7IzBgwdDEATxbLqQkBCEhYVptSUioqpH54EEAJMmTSoyLDo6usiwoKAgBAUFaQ2zsrIqti0REVUtOj/LjoiICGAgERGRRDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJkgmk+/fvo3Pnzrh69SquX7+OIUOGYNiwYZg1a5bYJioqCgMHDoSfnx/Onz8PACW2JSKiqkUSgVRQUIDw8HDUrFkTADBv3jwEBwdj+/bt0Gg0OHr0KFJSUpCUlITY2FhERkYiIiKixLZERFT1SCKQFixYAD8/P7z11lsQBAEpKSlwcXEBAHh4eOD06dNITk6Gm5sbAKB+/frQaDTIysrCxYsXtdqeOXNGZ8tBRESvTueB9N1336F27dpwc3ODIAgAAI1GI46Xy+VQKpVQq9UwNTXVGq5SqbT6etaWiIiqHgNdF/Ddd99BJpPh1KlTuHz5MkJCQpCdnS2OV6vVMDc3h0Kh0AqgZwGlp6enNczMzKxM801OTi5X3deuXSvX9FT9XLhwgT+YiEqh80Davn27+P+AgADMmjULCxcuRGJiItq1a4eTJ0/C1dUVjRo1wuLFixEYGIjbt29Do9HA0tIS9vb2RdqWhbOzc7nqNjU1BQ6nl6sPql4cHBxgY2Oj6zKIdKq0jQGdB1JxQkJCEBYWhvz8fFhbW8Pb2xsymQzOzs4YPHgwBEFAeHh4iW2JiKjqkVQgbdu2Tfx/dHR0kfFBQUEICgrSGmZlZVVsWyIiqlp0flIDERERILEtJCKqHIWFhUhLS9N1GZJkbW0NfX19XZdRLTGQiKqhtLQ0jJy2EXLzurouRVLUD+9h7deBPPlERxhIRNWU3LwuzGrV13UZRCIeQyIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwAf0ERH9/wSNBlevXtV1GZL1uh/vzkAiIvr/qZX3sfjIaijqmOm6FMlRZeZg5ciFr/Xx7gwkIqL/UNQxg/nblrouo1riMSQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJOj88RMFBQX46quvkJGRgfz8fIwaNQrNmjXD1KlToaenh+bNmyM8PBwAEBUVhbi4OBgYGCA0NBStW7fG9evXi21LRERVi863kPbv3w9LS0vs2LED69evx+zZszFv3jwEBwdj+/bt0Gg0OHr0KFJSUpCUlITY2FhERkYiIiICAIptS0REVY/OA6l79+4YP348AECj0UBfXx8pKSlwcXEBAHh4eOD06dNITk6Gm5sbAKB+/frQaDTIysrCxYsXtdqeOXNGNwtCRETlovNAMjY2homJCVQqFcaPH48JEyZAEARxvFwuh1KphFqthqmpqdZwlUql1deztkREVPXo/BgSANy+fRtBQUEYNmwYfHx8sGjRInGcWq2Gubk5FAqFVgA9Cyg9PT2tYWZmZmWaZ3JycrlqvnbtWrmmp+rnwoULkvnBxM8vvYrX/RnWeSBlZmYiMDAQM2bMgKurKwDA3t4eiYmJaNeuHU6ePAlXV1c0atQIixcvRmBgIG7fvg2NRgNLS8ti25aFs7Nzueo2NTUFDqeXqw+qXhwcHGBjY6PrMgDw80uvpiI+w6VtDOg8kNauXYucnBysWrUKK1euhEwmw7Rp0zBnzhzk5+fD2toa3t7ekMlkcHZ2xuDBgyEIgng2XUhICMLCwrTaEhFR1aPzQJo2bRqmTZtWZHh0dHSRYUFBQQgKCtIaZmVlVWxbIiKqWnR+UgMRERHAQCIiIolgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEhhIREQkCQwkIiKSBAYSERFJAgOJiIgkgYFERESSwEAiIiJJYCAREZEkMJCIiEgSGEhERCQJDCQiIpIEBhIREUkCA4mIiCSBgURERJLAQCIiIklgIBERkSQwkIiISBIYSEREJAkMJCIikgQGEhERSQIDiYiIJIGBREREksBAIiIiSWAgERGRJDCQiIhIEgx0XUBFEAQBM2fOxOXLl2FoaIivv/4aDRs21HVZRET0Et6ILaSjR48iLy8PMTExmDhxIubNm6frkoiI6CW9EYGUnJwMd3d3AECbNm1w4cIFHVdEREQv643YZadSqWBqaiq+NjAwgEajgZ7e681b9cN7r7X/quixMgs1MnN0XYbkqCS4Tvj5LYqf35JVxmf4jQgkhUIBtVotvi5LGCUnJ5d7vounDip3H2+eLrouQLKUSmWFfO4qCj+/xeHntzSv+zP8RgSSk5MTTpw4AW9vb/zxxx+wsbEptb2zs3MlVUZERGUlEwRB0HUR5fXfs+wAYN68eWjSpImOqyIiopfxRgQSERFVfW/EWXZERFT1MZCIiEgSGEhERCQJDKQqICEhAR07dkRAQAD8/f3h5+eHH374Qddl4ejRo7h3r+RrWfLy8uDp6VlqH1OmTIGvry+uXr1a5vlmZGRg8ODBAICkpCSkpqYWO+8pU6YAAP744w8MGjQIQ4YMQVRUFAAgMzMTs2fPLvM8Sfde5e/g/Pnz6NmzJ5YuXfpS8woNDcVvv/2GvLw8xMbGlnm6pKQkREdHi6+vXbuGXr16ia/j4uKwd+/el6qlOmEgVRHvvfcetm3bhujoaGzcuBHr16/HpUuXdFrT1q1boVKpShwvCAJkMlmpfZw6dQoxMTEvfVbks3737t2LO3fuFFtbjx49AAAzZ85EZGQkdu7cifPnz+Ovv/5CnTp1oFAokJSU9FLzJd162b+DU6dOwc/PDxMmTHil+d27dw/ffvttmdtHRUXBz88PAPD9998jODgY2dnZ4vhOnTrhxx9/LPXvpjp7I65Dqm5MTEzg6+uLn376CXZ2dpg/fz7Onj0LmUwGHx8fBAQE4N9//0VYWBjy8vJgZGSE2bNnw9LSEuPHj4dKpUJubi4mT56Mdu3aif0mJCRg/fr1qFGjBjIyMtC9e3eMGjUKGRkZmDZtGgoKCiCTyTBt2jTcuXMHly5dQkhICHbu3AkDg6cfpUePHmHSpElQKpVaN7i9fPkyvv76awCAhYUF5s6diyVLluDhw4cYO3YsFixYgOnTp0OpVCI7OxuDBg2Cr68v/P39ERERgSZNmiAmJgaZmZno27cvAODixYv49ddfkZKSgubNm+Ptt98W57d//37873//g0qlQn5+Pt59910AwPvvv48zZ87A3t4ePj4+WLFiBVxcXF77e0YV7/m/g8jISCQlJUGj0WDEiBF45513EBsbC0NDQ9SrVw+FhYXYsWOHOP0333yD1NRUxMTEIDIyEsDTz8dvv/0mtlmzZg3S0tKwatUquLq6YsGCBahRowbMzMywePFimJiYiG1PnTqFZs2aiX8LFhYW2LFjB7y8vLTq7tSpE/bt2wd/f//XuXqqJAZSFVW7dm2kpKTgl19+wa1bt7Bnzx4UFBRg6NChcHV1xerVqxEQEAB3d3ecOXMGixYtwqhRo3D//n1s3rwZ9+/fxz///FOk39u3b+PAgQN48uQJ3N3dMWrUKCxYsADDhw9Hly5dcOnSJUybNg179+6FnZ0dZs+eLf4BAsC+fftgY2ODL7/8EufPn0d8fDwAYMaMGZg7dy6sra3x7bffYsOGDQgPD8fPP/+MlStXIiUlBT179oSXlxfu3r0Lf39/+Pr6lroOWrZsCXd3d/j4+GiF0T///ANTU1Po6+tDrVZDoVCI4+RyOW7evAkAaNasGc6ePVuet4F07NnfwcmTJ3Hz5k3s3LkTeXl5GDRoELZv345+/fqhbt268PLywrp167B+/XoYGRlhxowZ+O233/DWW2+VuhU/atQoXLlyBWPGjMHChQvx4Ycf4pNPPsGxY8eQk5OjFUgJCQmwtbUVX3fq1KnYPm1tbREdHc1AKgYDqYq6desW3n77baSlpYl3njAwMEDr1q3x999/IzU1FWvXrsX69eshCAIMDQ3RrFkzDBkyBMHBwSgoKEBAQECRfm1sbCCTyWBsbIyaNWsCANLT08WtCDs7O61dZM9fxnblyhV4eHgAAFq3bi2GVVpaGmbNmgUAKCgogJWVldZ0derUwdatW3HkyBHI5XIUFBQUqa2sl8xlZ2ejTp06AJ4G0H93j6jVapiZmQEA9PT0tMKUqp5nfwepqam4ePEiAgICIAgCCgsLkZGRodXW0tISISEhMDY2xtWrV+Hk5PRS8xo1ahRWr16NESNGoF69emjbtq3W+Ozs7CLDilO3bl2t3Xj0f3gMqYr475exSqVCbGwsvL29YW1tLd5bKj8/H7///juaNGkCa2trTJo0Cdu2bcOsWbPQrVs3pKamQq1WY+3atZg/f36xB/WL+7VobW2NxMREABCPvwBPv9A1Go1W26ZNm+KPP/4AAKSkpIjB0rRpUyxcuBDbtm3DpEmT0LlzZ63pNm3aBEdHRyxcuBDe3t7i8hoZGYknTqSkpBRbb2Fhodaw2rVr4+HDhwCe3ufQ0NAQN27cgCAI+O2337RuHaWvr1+kT5Kukv4OmjZtig4dOmDbtm3Ytm0bvL29tXYZq1QqrFixAkuXLsXXX38NIyMjCIIAIyMj3L17F8DTk2UePHigNT89PT3x83XgwAH0798fW7duRbNmzbB7926ttrVq1UJOzotvQJqTk4PatWu/8jp4k/HnYRURHx+PgIAA8Q/kiy++gJWVFaysrBAfHw9fX1/k5+ejR48esLe3x+TJkzFz5kzk5eUhNzcX06ZNg5WVFaKiovC///0PhoaGGD9+fJnmPWXKFISFhWHTpk0oKCjA3LlzAQCOjo4ICQnBpk2bxK2OoUOHIjQ0FEOHDkWTJk1gaGgIAAgPD8fkyZOh0Wggk8nE40nPdOnSBTNnzsSBAwdgbm6OGjVqID8/H/7+/pg1axbq16+PevXqFamtTZs2iIyMRMOGDdG0aVMAQKNGjZCVlSXeZHfWrFmYNGkSNBoN3Nzc0Lp1awBPj2s5Ojq+2htCOlHa30FCQgKGDh2Kx48fw8vLS2t3mkKhgLOzM/r06QMTExOYm5vj7t276N27N0xNTTF48GA0bdq0yIM9a9eujYKCAixZsgRdu3bFlClTIJfLYWhoiIiICK22HTp0wM8//4yPPvqo1GU4d+4c3nvvvYpbKW8Q3jqI3kjr1q1D06ZNixxQ/q9FixbB09OTN9ulCiEIAoYPH45NmzaVuiv4008/xfLlyyGXyyuxuqqBu+zojTR8+HD8+OOPJY7PzMyEWq1mGFGFkclkCAoKws6dO0tsExcXh27dujGMSsAtJCIikgRuIRERkSQwkIiISBIYSEREJAkMJCIikgQGUjXh6ekJOzs72NnZwd7eHo6OjvDz89O6b1dZ/Prrr3B3d4eTkxP+/vvvctXk7++P5cuXA3h6Ue/zFxpWZfHx8eVeP1XBpUuXKv0GtVFRURgyZEiF9/vo0SPs27evzO11sexvOgZSNRIaGopTp07h5MmTiI2NhZOTE0aOHIkzZ86UuY8lS5agU6dOOHTokHghakU4dOgQVq9eXWH96drw4cNLfTTHm2Ls2LHF3hPxdXvRXeRfxaZNm17qzt66WvY3Ge/UUI3I5XLxliV169bF5MmTce/ePcybNw/79+8vUx9KpRJt2rRB/fr1K7S2529BRFVDdb5qpDov++vCLaRqbtCgQbhy5Qpu3LgB4Ok9v0JCQuDi4oL3338fM2bMwKNHjwA8vbHqrVu3EBYWJt6Y9cSJE+jXrx9at24NFxcXTJgwAWq1GkDxu1Y8PT2L/ApNSEjAV199hX///Rf29va4detWsbXm5OQgLCwMbm5ucHZ2xqRJk8R71iUkJKBTp07Ys2cPOnXqhA4dOmDKlCnIy8srcdn9/f2xatUqfPrpp2jbti169eqFuLg4cXxp62L58uXo0KGDeO+z33//HS1btkRiYqL4UMJPPvlEfCDg80JDQxEREYExY8agTZs26Nu3r3hPQuDpc3i++OILtG/fHq1atULfvn3F3UPh4eH4/PPPtfpbvHgxxowZg4yMDNjZ2eH48ePw9PSEo6MjFixYgNTUVPTr1w+Ojo4YPXo0njx5Ik67e/dueHl5wdHREUOHDsWff/6p9X7t2LEDvr6+aNu2LQYOHIgLFy6I6+/Z5yE0NLTE9Xzo0CH07NkTbdu2xaBBg/DHH38gLy8P7dq107p4WRAEeHh44OjRoyVOV5ykpCQMHDgQbdq0Qa9evfD999+XWEtqaiqGDRsGR0dHvP/++1iwYAEKCwuxb98+REVFITk5Gfb29gCAu3fvlvgePL/sCQkJsLOz0/phFRoaKj4kUqVS4csvv0SHDh3g7OyMcePGITMzs8Q6qy2BqoUuXboIsbGxRYZnZ2cLtra2wvHjxwVBEISgoCBh5MiRQmpqqnDx4kVh2LBhwrhx4wRBEITMzEyhU6dOwpYtW4SHDx8KN27cEBwcHIQ9e/YIGRkZwqlTpwRXV1dhw4YNgiAIwooVK4QhQ4aUWMewYcOEZcuWCfn5+cLWrVsFd3d34f79+4JGoyl2GYYNGyYMHDhQ+PPPP4U///xT6NevnzBy5EhBEAQhPj5eaNmypTBixAghNTVVOHbsmNCmTRth165dJa6TYcOGCW3bthX27dsnpKWlCV988YXg7u4uzr+0dZGbmyt0795dCAsLE/Ly8gQfHx9h9uzZgiAIwv379wVbW1vhp59+Eh49elTsvKdOnSq0bNlSiIyMFNLT04W5c+cKTk5Owv379wVBEIQRI0YIY8eOFdLT04W///5bGD16tODj4yMIgiAkJiYKDg4OQk5Ojtifl5eXcOjQIeHmzZuCra2t4OvrK1y+fFnYv3+/YGtrK3h7ewv/7//9PyExMVFwdnYWtm/fLgiCIBw7dkxwc3MTjh8/Lly7dk1Ys2aN4OTkJNy7d098v9577z3h2LFjwuXLl4WhQ4cKgwYNEgRBEB48eCB+HpRKZbHLefr0aaFly5bCrl27hOvXrwuLFi0S2rdvL6jVaiE0NFT44osvxLYJCQmCi4uLkJubW+p0//1c3b17V3BychKio6OF69evC4cPHxbatWsnnDhxoth6evfuLYSFhQk3b94UEhMThY4dOwq7d+8WcnNzhfnz5wuDBg0q03vw/LLHx8cLdnZ2QmFhodZ7PHnyZEEQBGH27NnCoEGDhCtXrgh///23MGTIECE4OLjYGqsz7rKr5kxNTQE8fSzDjRs3cPToUcTHx4s3S50/fz4++OAD3LlzB/Xq1YOenh7kcjnMzMyQnZ2N6dOnY+DAgQCAd955Bx07dnzpg/kGBgYwNTWFnp4eatWqVWyby5cvIzExET/88IP4dNnFixeje/fuSEtLAwAUFhZi2rRpaNasGZo3bw53d3f8+eefpT5Xyd3dHX369AEAjBkzBn369MGdO3eQl5f3wnURERGB4cOHQ6VS4cmTJ5g4cSIAiMtgamoKY2PjEufdrFkz8UmmU6dOxbFjx3Dw4EEEBATA09MTH374oXhDWT8/P3GryMXFBXXq1MHRo0fRt29fnD9/HpmZmfD09MT9+/fFZbGxsYGNjQ3mzJkDHx8fdOjQAQDQvn17pKenAwA2btyIzz77DF26dAEAjBw5EqdOnUJsbCxGjx4NAOjTp4/WVt+4ceMAAObm5uLn4b/PnPqvmJgY9OjRQ3wPnq2jnJwc9OrVC2PGjMGTJ09Qs2ZN/PDDD/Dy8oKhoWGp0/3Xzp074erqimHDhgEAGjZsiLS0NGzZsqXIHeWBp3f07ty5M+rXr48GDRpg/fr1sLCwgKGhIeRyOQwMDMT3r7T3oCzL/l+3bt2CiYkJ3nnnHZiYmGDhwoVlujN4dcNAquaePStIoVAgLS0NgiAUebCYnp4erl69WuRu240bN4ahoSHWrFmDK1eu4MqVK0hLS4OPj0+5arp9+7b4+HGZTIbevXujffv2UCgUWo86b9KkCczNzZGWlgYLCwsA0Lpbs0KhEB9/0bNnT/H5OGsTrKcAAAgLSURBVO+++y4OHDgA4Omdwf/bHnh6xl96evoL14WLiwv69u2LvXv3Yu3atSWGz2effSbu6pHJZOJDAf97p3GZTIYWLVqI4err64tDhw7h999/R3p6Oi5evAgA4h3Me/TogR9++AF9+/bFDz/8AE9PT/H5Vc+W8ZmaNWvinXfe0Xr9bFdmWloali5dimXLlonj8/Pztdo/v041Gk2xj6cPDw8Xj0XKZDIcOnQIaWlp4g+WZ8MnTZoEAKhXrx7kcjlOnDiBbt264ciRI1i4cKFYV0nT/VdaWhri4uK01qVGoynx8Q4TJ07E7NmzsXv3bnh4eMDHxwctWrQotu2L3oOX8fnnn+Pz/6+9cw2JqusC8GOmwXQBRzIkTM0uRhZhRkqZJHRxximCIjWz1HoLNPxR4mRJoRh5C6yU1HLUfnUvkVRCTAuiTKSRGk1LJ00DS8i00hx9f/TNYUbHV9+P7yPF/fyaOefsvddae+ass9dZ56y//sLHx4f169ezZcuWcd8KPh0RDmma09DQgJWVFUuXLkWn0yGTySzG4OfPn2+xbXBwMP7+/nh5eREeHk5BQcE/jjeydpElHBwczJIsZs+eTV1d3Zj9mfZpY2Njtn/4Pzee8/LyJOdk+ibmkccbGRwcHNcWw8PDNDU1YW1tzfPnz6XChCNJTk6mv79/1PaRtZgMBgMzZsxgeHiY8PBwenp6UCqV+Pv78+vXL2llAqBSqdizZw89PT2Ul5eTkJAg7bOyshr1tumxTqAGgwG1Ws2GDRvMtpuWbrBkI0sOKSYmhkOHDknfHRwcxrSvUU6FQkF5eTlyuZzh4WG8vb3HHHMs+Y0rLVPG0jc4OJjNmzdTUVHB48ePiYqK4ujRo0RHR4/Sb7w5GKnLSAYHB6Xta9asobKyksrKSqqqqkhJSaGkpITCwsIJ6TldEEkN05w7d+6wcuVKFi5ciKurKz9+/MBgMODk5ISTkxNDQ0OcO3fOrOqqkQcPHrB27VoyMjIIDg7Gw8MDvV4vOQFbW1uzdt+/f5dCSiMx/UNbW1tL4zs5OSGXy1m8eDF9fX1SqAmgubmZvr4+s1XTWDg6Okr9TSRDcCK2uH79Oh8/fuTixYsUFhai0+ks9uXg4GCmj5GGhgbp89DQEDqdDnd3d5qbm3n58iUajYYjR47g5+cnVek12tbd3R1nZ2fy8/Pp6+vD19d3XJ3G0rOzs9NMvqtXr0ql58fDdN7kcrlZP9bW1jg7O4+yi1KplJ5/CwwM5MmTJzx69Ijt27dLjmS8dqbyt7S0mI1bXV3NzZs3R8k6MDAg1eHat28feXl5REVFUVpaOurYicyBqe5GB2r6ezcmCgEUFhby6tUrAgMDSUtLIycnhxcvXtDd3W3RrtMV4ZCmEb29vXz+/Jmuri7evn1LcnIypaWlqNVq4Hdl2I0bNxIbG4tWq6WhoYG4uDi6u7ulKrGm2NnZ0dTUhFarpbW1lfPnz1NfXy+Fg1atWsXbt28pLi5Gr9dz5syZMevEyGQyvn37Rmtrq8VVlKurK35+fqjVaurr69FqtajVary8vHB3d/+f2ch4shnLFsby6J2dnWRmZhIbG4u/vz8BAQGcOnVKyrKSyWQ0NzdbdORGamtryc/Pp6WlheTkZH7+/IlCoWDevHlYW1tTUlJCR0cHZWVlUraeadagQqFAo9Gwbds2M7sO/4t05IMHD1JUVMT9+/dpa2vj8uXL3Lt3Dzc3twm1l8lkvH//Xsp2HElYWBgPHz7k1q1bfPjwgbS0NL5+/SqV+l69ejX29vbcvn3bLNQ7XjsjISEhNDQ0cOHCBfR6PWVlZaSnp5uFHI3Y2tpSW1tLUlIS7969o7Gxkerqajw8PIDfK/Guri7a29snNAemui9ZsoRZs2Zx6dIl2tvb0Wg0Zg7106dPJCUlUVdXR1tbG8XFxTg6OmJnZzchO08XhEOaRqSkpODr68umTZuIiIhAr9dTVFSEl5eXdExaWhouLi5ERkYSFhaGo6Mj2dnZ0n7Tq8L9+/fj6elJREQEISEhdHR0EB0dLV35+/j4EBkZSXJyMkFBQbi5ueHp6WmxL29vb1xdXdm5cyeNjY0W5U9NTcXZ2Znw8HAOHz7MsmXLzGT7t1gKs5hus2SLrKwsABITE1mxYoV0HyAuLo62tjY0Gg3w+0SfkZEhHW8JPz8/ampq2LVrFzqdjoKCAubOncuCBQs4e/YsBQUFKJVKcnNzSUhIYObMmWZl3JVKJf39/aPu2Y3U658eIlUoFJw4cYKsrCwCAwOpqKggOzub5cuXj9sWIDQ0lBs3bpiFDE3x9PQkMTGR3NxcduzYQV1dHXl5eWaJAAEBAcjlcrPfxkTawe9EmitXrvDs2TNUKhWpqanExMSwd+9ei/JkZmYyMDBAUFAQoaGhLFq0iNOnTwOwdetWrKysUKlU2NjYjDsHprrPmTOHpKQkKioqUKlUvHnzhgMHDkjjxsTEsG7dOqKjo1GpVLS0tJCTk/N/ecB3KiPqIQkEf4CTJ09iMBikm/j/DTU1NRw/fpyqqqopfWKLj4/H3t5eyqQTTF9EUoNAMMX48uULNTU1XLt2jd27d09ZZ6TVann9+jVlZWXcvXv3T4sjmASIkJ1AMMXo7e0lPj4emUxmltU21Xj69Cnp6ekcO3YMFxeXPy2OYBIgQnYCgUAgmBSIFZJAIBAIJgXCIQkEAoFgUiAckkAgEAgmBcIhCQQCgWBSIBySQCAQCCYFwiEJBAKBYFLwN7jWew7FTOecAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tmp = cc_raw[['default_payment_next_month','SEX']]\n", "sex_dict = {1: 'Male', 2:'Female'}\n", "tmp = tmp.replace({'SEX': sex_dict})\n", "\n", "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(figsize=(6,6))\n", " sns.countplot(x='default_payment_next_month', hue='SEX', data=tmp, ax=ax)\n", " ax.set_ylabel('Count', fontsize=14)\n", " ax.set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax.set_xlabel('Default-on-next-payment-cycle status', fontsize=14)\n", " ax.set_title('Defaulted on CC Payment', fontsize=16)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "male_no_def = tmp[(tmp['SEX'] == 'Male') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "female_no_def = tmp[(tmp['SEX'] == 'Female') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "male_def = tmp[(tmp['SEX'] == 'Male') & (tmp['default_payment_next_month'] == 1)].count()[0]\n", "female_def = tmp[(tmp['SEX'] == 'Female') & (tmp['default_payment_next_month'] == 1)].count()[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**We see that women were less likely to default on their credit.**" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Males who didn't default: 9015, Females who didn't default: 14349\n", "Males who did default: 2873, Females who did default: 3763\n", "Percent of males who defaulted: 31.87%\n", "Percent of females who defaulted: 26.22%\n" ] } ], "source": [ "print('Males who didn\\'t default: {:5d}, Females who didn\\'t default: {:5d}'\n", " .format(male_no_def, female_no_def))\n", "print('Males who did default: {:8d}, Females who did default: {:8d}'\n", " .format(male_def, female_def))\n", "print('Percent of males who defaulted: {:3.2f}%'.format(male_def*100/male_no_def))\n", "print('Percent of females who defaulted: {:3.2f}%'.format(female_def*100/female_no_def))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**From the plot below, we see that several things.**\n", "* **The third largest population completed high school,**\n", "* **The plurality of the sampled population also went to university,**\n", "* **The second largest population also completed grad school, and**\n", "* **The remainder are either 'others' or in a column of 'unknown' values (these had values that weren't defined in the data description)**\n" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tmp = cc_raw[['default_payment_next_month','EDUCATION']]\n", "edu_dict = {1:'Grad School',\n", " 2:'University',\n", " 3:'High School',\n", " 4:'Other',\n", " 5:'unknown',\n", " 6:'unknown',\n", " 0:'unknown'}\n", "tmp = tmp.replace({'EDUCATION': edu_dict})\n", "\n", "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(figsize=(8,6.5))\n", " sns.countplot(x='default_payment_next_month', hue='EDUCATION', data=tmp, ax=ax)\n", " ax.set_ylabel('Count', fontsize=14)\n", " ax.set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax.set_xlabel('Default-on-next-payment-cycle status', fontsize=14)\n", " ax.set_title('Defaulted on CC Payment', fontsize=16)\n", " ax.legend(loc='upper right', fontsize=14)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "uni_no_def = tmp[(tmp['EDUCATION'] == 'University') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "grad_no_def = tmp[(tmp['EDUCATION'] == 'Grad School') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "hs_no_def = tmp[(tmp['EDUCATION'] == 'High School') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "other_no_def = tmp[(tmp['EDUCATION'] == 'Other') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "unk_no_def = tmp[(tmp['EDUCATION'] == 'unknown') & (tmp['default_payment_next_month'] == 0)].count()[0]\n", "uni_def = tmp[(tmp['EDUCATION'] == 'University') & (tmp['default_payment_next_month'] == 1)].count()[0]\n", "grad_def = tmp[(tmp['EDUCATION'] == 'Grad School') & (tmp['default_payment_next_month'] == 1)].count()[0]\n", "hs_def = tmp[(tmp['EDUCATION'] == 'High School') & (tmp['default_payment_next_month'] == 1)].count()[0]\n", "other_def = tmp[(tmp['EDUCATION'] == 'Other') & (tmp['default_payment_next_month'] == 1)].count()[0]\n", "unk_def = tmp[(tmp['EDUCATION'] == 'unknown') & (tmp['default_payment_next_month'] == 1)].count()[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**We see from the printouts below that 1.56% of the dataset is unknown or other. That's a **" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "University Grads who didn't default: 10700, University Grads who did default: 3330\n", "Grad School Grads who didn't default: 8549, Grad School Grads who did default: 2036\n", "High School Grads who didn't default: 3680, High School Grads who did default: 1237\n", "Others who didn't default: 116, others who did default: 7\n", "unknowns who didn't default: 319, unknowns who did default: 26\n", "Percent of University grads who defaulted: 31.12%\n", "Percent of Grad School grads who defaulted: 23.82%\n", "Percent of High School grads who defaulted: 33.61%\n", "Percent of Others who defaulted: 6.03%\n", "Percent of Unknowns who defaulted: 8.15%\n", "Percent of dataset that is unknown or other: 1.56%\n" ] } ], "source": [ "print('University Grads who didn\\'t default: {:5d}, University Grads who did default: {:5d}'\n", " .format(uni_no_def, uni_def))\n", "print('Grad School Grads who didn\\'t default: {:5d}, Grad School Grads who did default: {:5d}'\n", " .format(grad_no_def, grad_def))\n", "print('High School Grads who didn\\'t default: {:5d}, High School Grads who did default: {:5d}'\n", " .format(hs_no_def, hs_def))\n", "print('Others who didn\\'t default: {:5d}, others who did default: {:5d}'\n", " .format(other_no_def, other_def))\n", "print('unknowns who didn\\'t default: {:5d}, unknowns who did default: {:5d}'\n", " .format(unk_no_def, unk_def))\n", "print('Percent of University grads who defaulted: {:3.2f}%'.format(uni_def*100/uni_no_def))\n", "print('Percent of Grad School grads who defaulted: {:3.2f}%'.format(grad_def*100/grad_no_def))\n", "print('Percent of High School grads who defaulted: {:3.2f}%'.format(hs_def*100/hs_no_def))\n", "print('Percent of Others who defaulted: {:3.2f}%'.format(other_def*100/other_no_def))\n", "print('Percent of Unknowns who defaulted: {:3.2f}%'.format(unk_def*100/unk_no_def))\n", "print('Percent of dataset that is unknown or other: {:3.2f}%'\n", " .format(100*(unk_def+unk_no_def+other_def+other_no_def)/cc_raw.shape[0]))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import Imputer\n", "from sklearn.pipeline import Pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**I want to try imputing new values for the undefined education values, so I'll mark them as missing data (by making them NaN), and as I'm experimenting, I'll perform this step on a copy of the data.**\n", "\n", "**note: I've included these print statements to confirm that I'm not changing the original dataframe (copy() only produces a shallow copy which copies values only if the element is a primitive, otherwise it copies the object address).**" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pre-NaN: cc_data[\"EDUCATION\"].count(): 30000\n", "Pre-NaN: cc_data_copy[\"EDUCATION\"].count(): 30000\n", "Post-NaN: cc_data[\"EDUCATION\"].count(): 30000\n", "Post-NaN: cc_data_copy[\"EDUCATION\"].count(): 29655\n" ] } ], "source": [ "cc_data_copy = cc_data.copy()\n", "print('Pre-NaN: cc_data[\"EDUCATION\"].count(): {}'\n", " .format(cc_data['EDUCATION'].count()))\n", "print('Pre-NaN: cc_data_copy[\"EDUCATION\"].count(): {}'\n", " .format(cc_data_copy['EDUCATION'].count()))\n", "\n", "cc_data_copy.loc[cc_data_copy['EDUCATION'].isin([0,5,6]),'EDUCATION'] = np.nan\n", "\n", "print('Post-NaN: cc_data[\"EDUCATION\"].count(): {}'\n", " .format(cc_data['EDUCATION'].count()))\n", "print('Post-NaN: cc_data_copy[\"EDUCATION\"].count(): {}'\n", " .format(cc_data_copy['EDUCATION'].count()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Baseline model\n", "\n", "**There are still a number of preprocessing steps that need to be performed (making dummy features for categorical features, implementing an under/over-sampling strategy to address the class imbalance), but let's implement a model **" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(cc_data_copy, \n", " cc_target,\n", " test_size=0.3,\n", " random_state=nb_seed,\n", " stratify=cc_target)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "steps = [('imputation', Imputer(missing_values='NaN', strategy='most_frequent', axis=0)),\n", " ('r_forest', RandomForestClassifier())]\n", "pipeline = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.94 0.88 7009\n", " 1 0.61 0.31 0.41 1991\n", "\n", "avg / total 0.78 0.80 0.78 9000\n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pipeline.fit(X_train, y_train)\n", "y_pred = pipeline.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(pipeline, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "roc_plotter(pipeline, X_test, y_test)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAH8CAYAAABM96wGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XmcW3W9//HXSTLJTCbTjbaUHdm+7KCsF1lELyrIJiq4IVRRWURtKS2lZSlQSgsUFKjoVQHBqyKC+kOLXDeWimVzAdQvpYi0VdtCWzqZmSST5Pz+SKam0+k0Mz05Jyd5P+8jt5NzkjPvxqHnM9/VcV0XERERaV6RoAOIiIhIsFQMiIiINDkVAyIiIk1OxYCIiEiTUzEgIiLS5FQMiIiINLlY0AFEmokx5jVg54pDLrAOeAL4grV2ecVrU8AM4Exge+CfwEPA9dbaNf2uOxKYCZwBTABeB+4GbrbW5gfJM6z3iUhjUcuAiL9cYBKlG+8EYEdKN/v9Kd2EATDGtFMqEN4HXATsBZwHHA783hgzvuK1o4Gny+c+A+wLXA5cDNy1uSDDfZ+INB61DIj4r9Nau6ri+b+MMVcC9xpjOqy1ncB1QBvwdmttT/l1y4wxi4CngFuAT5SPzwOywH9ba3vLx/5hjHkT+I0x5qvW2mcGyDHc94lIg1ExIFIfcuU/C8aYCDARuKyiEADAWpszxswBvmuMuQjoBs4CLqm4ofe99nFjzLuBF/p/M2NMvNr3GWOKlAqGX5efnwNcZ63dyRhzHHAf8GPgbGBh+bp7WmuXll8/AVgOHGCt/asx5nPANGA88AdgsrX22WF8ZiLiEXUTiATMGLMnMAtYaK3tptQlMALY3G/lTwAtwCHA7kAKGPBmaq19zFqbGeDU7kD7MN7Xp3Id8x2ADuBg4DLgj5TGIPT5MPBSuRA4hdLf9Uvl1y8EfmWM2XaQ7yUiNaaWARH/3W6M+Ur56xilVoGHKI0lABhD6Wa7djPv7zs+llLLgAu8NcQMo8p/DvV9A3GBudba1wCMMd8HPgTcWD7/YeB75a8vBW6w1j5cfj7HGHMCpfEQsz3IIiLDoGJAxH+zgB9S+o3+KmAXYKa1tu8mvwZwKA0wfHWA929f/vNN4I3ya0cPMcNw37c5/6j4+vvAbGPMDkABOBo4t3xuH+B6Y8x1Fa+PA8s8yiEiw6BuAhH/rbbWvmqt/TPwUUpF+U+MMdHy+SWUbvSHbub9hwO9wHPAUkrFw+EDvdAY831jzKkDnBru+2DgXyI2dClYa18HFlPqKjgDeK6v1aD83snAQRWPfSi1GIhIQFQMiASoPHjvPEo3xcnlYwXgf4Cp5SmGGxhjWiitPfAja+1aa22RUhP8F8rnKl97PPARYPUA37ea9/XNeMhRGsPQZ/cq/mo/AE4FTqfUUrDhWwM7l4uhV621rwJTgHdVcU0RqRF1E4gEzFr7rDHmW8AVxpj7rLX/Aq6ldIN83BgzA3gJ2I1St0IS+HLFJWYBJwH/Z4y5mtLCQcdQ6rP/trX2qc186y297/fl1z0DTDbGvAjsDZxDqfl/MPcDN1DqiphYcXw+8C1jjAWepDQDYSJw5xauJyI1pJYBEX+5mzl+OaXfwG8CKE8pfDfwE+BW4G+UFgJ6Djiscp0Ca+0bwDuBvwL3UJoSeClwPXD+5oIM4X0XU1rz4AVKswVmbukvaa1dCSwCnrbWrqg4fn/5GlcCLwKnAaeVu0xEJCCO627u3yYRERFpBr63DBhjjjDG/GaA46cYY542xiwyxpzndy4REZGw8eqe6msxYIy5lNLAqES/4zFKfYn/Tamf9HOVa6+LiIjIxry8p/rdMvAK8MEBju8DLLHWri+Prn6S0kAmERERGZhn91RfZxNYax8yxuwywKkRbLwSWicwcqBrdHVlXYCi6/Lqym6ijsv4kQmSiRj5osNv/tayyXtc1yWX7SGb6d7okev7OtsNuTSv/TOPiwuui+u6G3/tFktf41I67AKlc33H3WLxP++peO9Ar+s71v88joPjOEDpzw1fRyI4OBud7+rKozEfjWfdqnVBR6grDpsfdSlDp8/TGxG3yBdff5IJuTQnrv6r03f80R3f6fnH+97li5yBjntxT+1TL1ML17PxPOYOSnu8D6i7O0dnJg/FAtmiw+tvZIlHc6xf/S+efnIJ2ezGN/xcLkNLS4JEa5J4a5JE3yPRRqI1SceosbS3JSi0R3EcqLz5Ok75Bky/Px0Hx4lscqyam/gm13Qi5XNs+LMaq1f1UCxuaYbX4BzHUUHhsa39TJdZLcZXST+j3tLn6Y0x6VUcsuwpoi3tW36x/4Z0T4XgioH+d7y/AnsYY0ZRWmv9WP6zrvkm8kWXrlwBx4GWaOk360KhwMM//ynRUfuw8x4HkEgkaW1NkmhrJ55oIxIZvEfEKRZYX9B/ICIismVrUuO587graetNc3PF8d7Va4KIs1X3VAiuGHABjDEfA9qttd80xkwGHqX0l/pmeeGVAXVmNm0e//Mfn2XkqFHse8yJ9BZjOM5/msOq+V27UAxf41m40oqINJbVI7bf5JjTUn3rroe26p4KIVxnYPWaLvdfa7o3OtaVTnP//97FBz/yCTqdXejJ99U4DpEq/3fJ9bq8trxQVeFQL1at6sFVN0HdUTeBt/Qz6i19nt774dcO3nDr+NUex3j+4b7nlSdqfmuqlzEDVXuzM7vJsacW/ZZ99juQUaPH0L1+aP87RMpdDTGnyLgRxVLxUG5V6Csk+loZyl39mx4r/4kDEcfdUFD0vX7D+UGObfheG15Tvk6/7135vglHxBid6tvbZniSyTjd3bmtuoZsbOs/02qW/m8e+hn1lj7P4Ym89neKu75ti6/rfSOQboKtFrpioDdf3Oj5v/65nH+uWMbHPvkZAFqiBeLRKFGnSDLhkIiWbvYtUZdYxKWl73nEJRZ1iVbc4Nny/84iItJk4r/8Bcl519Pzmc+TPevjg792wlifUnkrdMVApWKxyBO//SX/9c7jaInHARjX3sO49h6S8Sgj2zadZigiIlKtlicfJ3njnKoHafW+qZYB3/3lxT8RjyfYY699NjoecRw6WkP9VxMRkYDFnnuG9tmzoOiS+fjZW2wVAIhvu40PybwXujtmLOrQm3fp6enm2cWLOOWDZ24yN7+jNUZkCPP1RUREKkVfepHUVTMgnyd72hlkzv1MVe/Lq2XAH+M6orzVBY//+kl233Nvthm78XLLLdEIbS3amVlERIbPTaUodnSQP+54ei68uGJw2eACmlq41UJXDBS71tKad3nN/oGPT7ygtIQv0Df0fkRrbEir+ImIiPRX3GVXOm/7Ou7o0bCFResqxcePqWGq2gldMVDIdpPL5oi5vUyIZ8i6BbpIUXAjtMZc4rHWoCOKiEgDcMcOfWZAfs3aGiSpvdAVAwDZbJbW1tJNP0EvcdbSQxvJlraAk4mISDNr2VYtA77JZLIkylMJodRDkKSHaKQuN4wQEZE65ry1jpanfkfu/Sdt9bXUMuCjTCazoWVgI44GDoqIyBCk06SmX0p0ycs42QzZ087YqsvFx6llwDeZbJbW1sQmxx0VAyIiUq1MhtQVlxFd8jLF7bYnd/RxW33J/Fq1DPgmm83SGt+0GBjKiE8REWliuRypq2cQe/EFimPHkp43H3ebrV8wyImGczZbKIuBUjfBAMWAWgZERKQKbV+7jdhzz+KOHEV63i0UJ2znyXVbxqqbwDc9FbMJNnA2/D8REZFBZc/8GLG//ZXuKdMo7rSzZ9ctqJvAP5lMlpEdHRsfdCJabEhERKpS3G57Ohf8T9UrC1YrNkYtA77JZrMkEht3E2jwoIiIDEkNfoEsrFPLgG8y2QHGDKgYEBGRgMU8GIQYhHAWA5ksrYn+YwZUDIiIyKbiP32IyLp1ZM4+tyatAZXUMuCjTGbTdQbUTSAiIv3F/+8RkrfdCkDvIYdR2G//mn6/iKYW+ieb23TMgFoGRESkUssTj5G86QYAej53Qc0LAdAAQt8Ui0V6c720qhgQEZHNiD2zmPbrZ0HRJfOJT5H9yEd9+b6FdWt8+T5eC10xkMlmicfjm0wjVDeBiIgAUCzS9vUFkC+Q/eCHyZzzad++dYtaBvwx0HgBQC0DIiJSEomQnnMTiYUPk/nkOTUfNFip8JYGEPpioDUGAIhE/Q8jIiJ1yR03jsynJvr+fTVmwCeZbHbT8QKglgEREQlcYd26oCMMS/iKgUxm030J0JgBEZGm1dUFyaSv3QGb40SCzzAcISwGsiQS8U1PqBgQEWk6zlvr6Jj8RXoPO5yez18UeEEQVTeBP7K5AXYsBBUDIiLNJp0mddkUIq//g1gkAt3d0N4eaKSiViD0RyaTJZlMbnzQcbRjoYhIM+npITVzGtFXllDcfgfSN9wceCEAGkDom55Mhm36f9hqFRARaR65HKlZM4m99CLFsWNJz5uPWycbBBU1tdAfmeymYwY0eFBEpHk46U4iK1fijhpF+sZbKW47IehIG0RHq2XAF4WebtpbYkSLBXAcXFycoooBEZFm4Y7Zhs75X8VZt47ijjsFHWcjxbc0tdAXzpvLGPXmGFKRdVAeJlBsH0tm1I7BBhMREd+4o8fg1uFv4Zpa6BPXdXEcBwcX3PJBdROIiEgdUDeBT1zX3dAi8B/hrMRERGTLYn94jvzB7wh8DYFqaAChT1wXIv1aAtwQ/ICIiMjQJX7yIG23f4XsyafS86VLgo6zRbHRo335PsYYB1gAHARkgPOsta9WnJ8GfBR4C7jRWvuzwa4XvmIANwzFoYiIbKX4owtpu/0rABT22CvgNNUprvdtAOHpQMJae5Qx5ghgfvkYxpj9KRUChwMR4HfGmF9ZazObu1joOtuLRXfTBYZUHYiINJSWJx4jefNcAHo+fyG5D5wScKLqOBHH88dmHA08AmCtXQwcWnFuH+C31tpea20WWAIcOFju0LUM4A5QDGjMgIhIw4j94Tnar58FRZfM2eeS/fBZQUeqWnSUbwMIR1DqAuiTN8ZErLVF4AXgMmNMO9AKHAV8fbCLha4YcHF16xcRaWCFXd9GYZddyR/0DjJnnxt0nCHxsZtgPdBR8byvEMBa+zdjzB3AQuAV4PfAG4NdLHzFgAuRSL/eDXUTiIg0DHf0GDrn3w5tbaH79z0yyp8BhMAi4GTgAWPMkZRaAwAwxowFxlprjzXGjAB+Abw42MVCWAwMNIAwXD8sIiKyBf03pAsJ17+WgYeAE4wxi8rPJxpjJgFLrLUPG2N2M8Y8DWSBS6217mavRAiLgaLr4vS/+asWEBGROhAZ6U/LQPnmfkG/wy9XnD9/KNcLXTGglgERkcbhrF5N4mc/JfOpidC/CziE3E7tTeAL10WzCUREGoCzbi0d0yYTWfY6RKOhGyw4EO1N4BPXdYn0Kwa0AqGISLg46U5Sl00hsux1Cru+jezpZwQdyRORkdqbwBelFQjVMiAiElo9PbTPvIzo0lcobr8D6bk343aMCDqVJ9RN4JOBVyAMJouIiAxd27e+QeylFymOG0d63nzcMdsEHckzUf+mFnoqdMXAwHsTqBoQEQmLzDkTiaz8Nz2fv5DithOCjuMptQz4pLSDsfYmEBEJK7djBF3Xzgk6Rk1ERo4KOsKwhLAYcIlsMlpTxYCIiATP7Xxryy+qQ6EsBjYdQCgiInXJLS981yT/bjshXSshdMXAwCsQNscPmYhI2LR+7z4ir/+D7kunQzQadJyai4xQN4EvtAKhiEg4JH7yIK13fRMcyJ10MvkDDw46Us25aXUT+GKgFQhd1QIiInUl/uhC2m7/CgDdX760KQoBUMuAbxxHyxGLiNSzliceI3nzXAB6zr+I3EknB5zIP2oZ8MmAgwc1ZkBEpD4UiyQe/CEUXTJnn0v2Q2cGnchXzggtOuQLzSQQEaljkQjp2fOI/+r/yJ18atBp/KeWAX8MWAqoQBARqR/JJLlTTgs6RSC0a6FPBm4ZCOeHLyIijcXp0ABCX2y6+iCoGBARCYazejXumDFNsYZAVbrWB51gWEJXDGyy4FDpoIiI+MxZvZqOSRdR2GMvui6/EuLxoCMFLzUy6ATDEr5iQN0EIiKBc9auoWPaZCIrV+KOHgP5vIoBUMuAXwaqBVwNIBQR8Y2T7iQ1fQqRZa9T2G030tfPg2Qy6Fh1welQy4AvBuwmUMuAiIg/urtpnzGN6NKlFHfYkfQNN+N2jAg6Vf1Qy4A/NIBQRCRgsRjFcePonDu/1EUgG2jXQp8MvAKh/zlERJpSMkn6+huJrF2Du+22QaepO+om8Im6CUREApZIUJywXdAp6pO6Cfwx8GQCFQMiIhI8J6VFh3yhvQlERHziusR/9Si5Y4/XtMFqdatlwBcR7VooIuKL1u/dR+td36Tlsd/Qdc0c/VtbBUeLDvlEiw6JiNRc4qEHaL3rm+BA7t0nqBColloG/DHgokP+xxARaVjxXyykbcFtAHRPmkrv8e8JOFF4aNdCnww8tTCcH76ISL2JPf8syflzAeg5/yJyJ34g4EQho24Cf2hqoYhI7eT33Z/eQw6jsM9+ZD90ZtBxwqe7M+gEwxK6YmDAFQjVMiAi4o3WVrquvQFCupJe0Jz2cC7NHLpiQFMLRURqLBoNOkF4qWXAHwOXAioQRESkDmjMgD80gFBExBuRFctpve8eur90CbS2Bh2nIThqGfDHwN0EKgZERIbCWbWK1LTJRFauxB09mp7PXRh0pMagqYVbZoxxgAXAQUAGOM9a+2rF+SnAR4ECMMda++P+1xh4BcIaBRYRaUDO2jV0TJ1EZOVKCvvsS88nzw06UuNo96ebwIv7aSW/WwZOBxLW2qOMMUcA88vHMMaMBC4GdgM6gD8Cm4QfeNEhVQMiIlVZv57U9ClEViynsNtupGfPhWQy6FQNw+nxrZtgq++nlfwuBo4GHgGw1i42xhxaca4LeI1S8BSlamYTGjMgIjJ8sfvuJbJ0KcUddiR9w824HeGcCle3/JtauNX300p+FwMjgLcqnueNMRFrbbH8fDnwFyACzBnwCokUhbYx5SflhYgdzYcVEalG/rzP4nZ2kTnzY7ijx2z5DTIkPrYMbP39tILfxcB6SpVKn8rgJwITgF0ojQJ41BizyFr7bOUFYm/+k8jfX9zooq07HwBJba85HC0tUZL67Dylz9Rb+jy91dISpXfaVNqCDtKokr61DGz1/bSS38XAIuBk4AFjzJHACxXn1gI91tpeAGPMOmDUJldwwXU33poo05PDjedqlbmhJZNxurv12XlJn6m39Hl6S5+n99rbE/950pP269tu/f20gt/FwEPACcaYReXnE40xk4Al1tqHjTHPGmN+T6l/40lr7S+ruqrGDIiIbMp1obcX4mpZ8YuPuxZ6ej/1tRiw1rrABf0Ov1xx/mrgah8jiYg0Jtel7RsLiL5sSV8zB9rbg07UHHzqJvD6fhq6RYc0i1BEZMtav/sdEg/cD9EIsVdeJn/Q24OO1BScjG/dBJ4KXzEgIiKDSjz4Q1rv+TZEHLqmX6lCwE/Jji2/pg6pGBARaSDxR35O29duB6B70lR6jzs+4ERNRi0DIiISKNel5ZnFAPScfxG5958UcKAm1BbORZxUDIiINArHoevyK2l5zwn0HnV00GmaksYMiIhI8KJRFQJB0q6F/tDeBCIiUrf8W4HQU6ErBkREpCSyfBnFMdto18E6om4CERHxTWTFcjomX0xx3HjSN9yk3QfrRVsq6ATDomJARCRknFWrSE2bjLN2Le4ub8NNtAYdSfpkuoJOMCwqBkREQsRZu4aOqZOIrFxJYZ99Sc+arb0H6kmbFh0SEZFaSqdJTZ9CZMVyCrvvTnr2XI0XqDNOVi0Dvhh43oBmE4hIE2hro7DrbjjZHOk5GidQl0I6uy10xYCISNOKRumeejlO53rckYNuTy9BUTeBiIjUXCSiQqCeqZtARESkyWlqoYiIeMZ1Sdz/PXInnayxAWGiqYUiIuIJ16XtGwtIPHA/8Scfp/MrCyASCTqVVENjBkRExAut3/0OiQfuh1iUnrMnqhAIE21U5I8BNyoSEWkQiR/dT+s934aIQ9dlV5A//IigI8kQOOomEBGRrRH74/O03XkHAN2TptJ73PEBJ5Ih0wBCERHZGvkDDyZ78qkUd96F3PtPCjqODIOT7Q46wrA0RjGgrgMRaQSRCD1fnKx/00LMbW0POsKwNEYxICLSKFQIhJpaBkRERJpdSGd+hK4Y0GwCEWkEkVeXklxwG10zrsQdPSboOOIRdROIiEhVIsuX0TFtMs66dbR+9156vvCloCOJR9RNICIiW+SsXEmqXAjk3/4Oej53QdCRxEuaWhgkdR2ISP1z1q6hY9pkIqtWkd93P9JXz4Z4POhY4iG1DIiIyKDij/ycyIrlFHbfna7ZcyGZDDqSeMxtVcuAiIgMIvvRT0A0Su6E9+GmwrmhjQxOLQMiIjI4xyF75seCTiG1pI2K/DHgxxzOz15ERBpNQlMLRUSkT7GI05XG7RgRdBLxkZPrCTrCsKgYEBHxmuuSvPUmYn/+E53zbsEdPz7oROITtzWcg0IbpBhQP4GI1AnXpe3OO4gv/BnE40RW/puCioGmoZYBERGh9d67STz4Q4hFSV91HYUDDgw6kvjI1ZgBEZHmlnjgB7TeezdEHLqmX0n+8COCjiQ+86tlwBjjAAuAg4AMcJ619tXyuYOAWwGXUtP5kcBp1tpHN3e90BUD2qhIROpVZM2bAHRPnkbvse8KNowEw7971OlAwlp7lDHmCGB++RjW2j8BxwMYYz4MrBisEIAQFgMiIvWq57MXkDv2eAp77xN0FAmIj7sWHg08AmCtXWyMObT/C4wxSWAWcMyWLqZiQETEK46jQqDJ+TiAcATwVsXzvDEmYq0tVhz7DHC/tXbNli7WGMWAeg5ERKQOuAnfphauByrXtO5fCAB8AvhQNRdrjGJARMRnUfs3imO2wR03LugoUkecXMavb7UIOBl4wBhzJPBC5UljzAggbq1dUc3FVAyIiAxR9NVXSF12CW57O5233KGCQDZwE21+fauHgBOMMYvKzycaYyYBS6y1DwN7Aa9VezEVAyIiQxBZvozUtEtw0mnyBx6MO2ZM0JGkjji9/rQMWGtd4IJ+h1+uOP8scEa11wtdMTDwrA0NGhCR2nNWriQ1dRLOunXk3/4OumZcBdFo0LGknjiRoBMMS+iKARGRQHR10TFtMpHVq8nvux/pWddDPB50Kqkzbty3bgJPqRgQEalGMknu3f9Ny++eoGv2XGgL5z/6Ult+dRN4TcWAiEg1HIfMpyaSOevjkEgEnUbqlI9TCz2lYkBEZChUCMgg1DIgIiLS5DRmwCeOZg6ISK0Vi7R9YwHZUz9Icfsdgk4jIaKWARGRRuC6JG+9ifjCn9Hy9GLWf/MeiIRzupgEQFMLRURCznVpu/MO4gt/BvE43ZOmqBCQIfFxBUJPNUgxoK4DEdl6rffeTeLBH0IsSvqq68gfcFDQkSRk1E0gIhJi0ZdepPXeuyHi0HX5VeQPPyLoSBJCGkAoIhJihf32p+dzF+COHEnvMccFHUdCyunNBh1hWEJXDAy4N4F6CUTEA9mPfDToCBJybotaBkRERJqak9eYARGR8CgUtOOgeE9TC0VEwiH6l5don3c96auvo7jr24KOIw1EAwgDpUEDIlKd6NIlpGZMxUmnSfy/n9Bz8ZeDjiQNRN0EIiJ1LrJ8GanLpuCk0/S+8xh6Lrw46EjSYIoaQCgiUr8iK/9NauoknHXryL/jELouv1JjBsRzahnwiTYqEpHhiD33LJHVq8nvtz/pq2dDPB50JGlAmlooIlLHciedjJtM0nvo4dAWzn+wpf45eS06JCJS13rf9e6gI0ij09TCAA24LKGIiIi/ii2tQUcYlsYoBkREKvX2EnnzDYoTtgs6iTQZdROIiNSDYpH2G64j9sc/kL7hJgp77hV0ImkiGkDoE/UIiMhmFYsk58+j5fHf4ra1gesGnUiajFoGRESC5Lq03XkH8V8shHicrtlzKexlgk4lTcbVmAERkeC0fucuEg89ALEo6auvI3/AQUFHkiakloFAqe9ApNkVx2wDsShdl19F/rAjgo4jzSrSBFMLjTHvBPYCHgB2AZZYa8NZBolIQ8mdchr5ww7XDAIJVDGWCDrCsFRVDBhjxgP/D9gfSACPAbOBA4wx77XWvlK7iCIi1VEhIEFzCrmgIwxLtS0DtwH/AN4FrCofOxv4DvBV4CTPk4mIiISMG2vsAYTvAY611vYYUxqda61db4y5DFhcq3ADcQaYW+hqyIBIU4n98XncVAeFPfYMOorIRhq9ZaAIJAc4vh0Qzv0aRSSUoi+9SOqK6bjRKJ23f53ijjsFHUlkg0ZvGfhf4DZjzPmAC4wwxpwA3AH8oFbhBuKOmkDvNrtCJArRFtyWltLXItLwokuXkJo5DTIZet9zAsXtdwg6ksjGGrxlYCowh1KXQBx4HsgD3yyf881bjy9i1atLAXCiDpGIw8hr/svPCCISgMiy10lNm4KTTtP7zmPovnR6aKdxSQML6TK5VRUD1toccIkxZiawW/l9S621aWPMWKrsKjDGOMAC4KDye86z1r5acf5E4EpKrQ/PW2u/MNj13IJLoeCqZUCk0WUypC67BOetdeQPOZSuy6+EqP67l/rjVzeB1/fTqspqY0zBGDPOWttjrX3JWvunciGwK/DaEPKfDiSstUcB04H5Fd8jBcwDPlA+/5oxZpshXFtEGlVrK5mJ55E/8GDSV10H8XjQiUQGVsh5/xiYp/fTzbYMGGPOAT5TfuoAPzXG9PZ72XbAPwf7Bv0cDTwCYK1dbIw5tOLcUcALwHxjzG7A/1hr3xzCtUWkgeX++33k3n2Cugakvvm36JCn99PBugn6Vhl0yt/0SSBdcd4tP//REMKPAN6qeJ43xkSstUVgLKV1DA4CuoEnjDFPaUEjEdlAhYDUO/8GEHp6P91sMWCt7QKuATDGvAZ834Olh9cDHRXP+4IDvAk8Y61dXf6ejwMHA5uE77/WQDKpJsPhammJ6vPzmD5Tb+nz9JY+zxrzr2XAk/tpn2oHEN5jjDmq5G/NAAAgAElEQVTUGLMf0Ddqx6G0NPHbrbWfrTL8IuBk4AFjzJGUmjH6PAfsb4wZQ+kveSTwjYEu4vbbo7y7O5xTOepBMhnX5+cxfaZbqbeX5I1zyJ52BoX99tfn6TF9nt5rb68oAAr9e9NrxpP7aZ9q9yaYBVwB/BvYFlhR/jPG0LoJHgJOMMYsKj+faIyZRGnDo4eNMdOBRyl1QfzAWvuXIVxbRMKuUKB9zrW0PPEYsb++xPpv30dpNrNIOLj+TS309H5a7ToDnwXOt9Z+o9xl8G5gDfB9hjCbwFrrAhf0O/xyxfn7gfurvZ6INJBikeT8ebQ88RhuMknXlddCS0vQqUSGJupPN4HX99NqR+NsQ3nUIvAH4Chr7TpgBnBmtd9MRGRArkvb124n/ugjEI+Tvm4uhT33CjqVyNAVe71/+KDaloHllBYbeh34K/AO4D6gk9KoRRGRYYsufYXETx+EWIz0rNkUDjgw6Egiw+JGwtmtVW0x8A3gB8aYicCPgV8ZY1ZS2s3wj7UKJyLNobDHnnTNnAVA/tDDA04jshV8+k3ea9XOJphrjFkOdFlrnzbGfBk4n9L0hc8M/m4RkS3rPea4oCOIbDXXCedaGNW2DGCt/W7F198CvgVgjNm3BrlERETCx6cBhF4btBgwxhxDaf3jPPAja+3TFedSwCzgC5TWGxARqU53NySTQacQ8V5Iuwk2255hjDkfeAw4BTgJ+J0x5ozyuQ9SmsJwITDXh5wi0iBizz/LyLPPIvaH54KOIuI5Nxr3/OGHwVoGJgE3WmunARhjLgVmGWN2Am4BHga+ZK39e+1jikgjiL70IqkrL4dslpbfP0X+7YcEHUnEWyFtGRisGNgZ+GbF81uBOcBM4GPW2h/UMpiINJbo0iWkZkyFbJbcCe+j5/MXBh1JxHN+/SbvtcGKgQQVOyJZa3uNMT3AxSoERGQoIq//g9S0KThdXfQefSzdl0zTDoTSmIr5oBMMS9WzCSo843kKEWlo0RXLcbo6yR96GF3Tr4BodMtvEgkj//Ym8NRgxYBbfvQ/VhzgtSIim9X7X++kc96tpSWG4+FsRhWpRiN2EzjAH4wxlTf/duBJY0yh8oXW2p1rEU5EGoeWGJam0IDdBBN9SyEiItIA3Eg4d9rcbDFgrb3HzyAi0iAyGaKv/Z3C3vsEnUTEfw3YMiAiMjS9vbRfcwUtf/wD6atnkz/8iKATifirAccMiIhUr1Cg/fpraHnmadwRIylOmBB0IhH/qWVARJpWsUhy/jxannwct72d9NybKe68S9CpRHznNuDUwgEZY2LW2nCWPiJSE21fX0D80UcgkSB93VwKe+wZdCSRYDTaAML+jDEXUdqvYGdjzN7AZcAbwExrrdYeEGlivYccRvzRhXTNvJrC/gcEHUckMG6xsOUX1aGqigFjzGTgS8AVwNfKhx8Fbi9/fbn30UQkLPKHH8H6e7+Pm+oIOopIsKLh7H2vdnHw84HPWWu/Q3kFQmvtA8DZwKdqlE1EQkSFgEipZcDrhx+qLWF2BJYMcHwZMNq7OCIiIiEW0jED1bYMPMXGKxK6xpgIMA1Y7HkqEalbLb97ktji3wcdQ6QuuW7B84cfqm0Z+CKw0BhzMtAKfB3Yi9I2x++vUTYRqTOx556h/dqrwC3SueB/KOy2R9CRROqKSwNPLbTWvmSM2Qv4BLB3+X0PAvdZa9M1zCcidSL60oukrpoB+TzZ086g8Lbdg44kUn9C2k1Q7WyCucD3rLXfqnEeEalD0VeWkJoxFbJZcu99Pz0XXhzafdtFasmvZn2vVdtNcCDwZWPMa8D3gO9ba/9Wq1AiUkdyOdqvuhynq4veo4+le/JUiFQ73EikyUQaeGqhtfZEYFvgBuAw4E/GmD8ZYy4zxrytlgFFJGDxON3TZtB77Lvomn4FRKNBJxKpW40+gBBr7TrgLuAuY8xI4PPATGA2oH8dRBpY/sCDyR94cNAxROqfE86WgSGlLi9DfAbwIWB/4P8odRuIiIg0vYYeM2CMuYZSAbAX8ARwJ/CAtXZtDbOJSBBcV4MDRYbJdcI5nqbaloETgG8AP7DW/ruGeUQkSJkMqSunk/3AqfQed3zQaUTCxwlnr3m16wz8V62DiEjAcjlSs2YS+8PzRP65gt4jj4JEIuhUIuHihnMT380WA8aY14G3W2vfNMYsA9zNvdZau3MtwomITwoF2udcS+zZZ3BHjiJ9/Y0qBESGI6RTCwdLfQXQt7rgTB+yiEgQikWSN8+l5cnHcdvbSc+9ieLOuwSdSiSU3EZrGbDW3lPxdBfgJmttd+VrjDEjgCtrlE1EfBBZsZyWJx+HRIL07HkUdt8z6Egi4dVoYwaMMfsCE8pPrwJeNMas6/ey/YALgCm1iScitVbcaWfS827B6e6isN/+QccRCTW/WgaMMQ6wADgIyADnWWtfrTj/FeAooLN86DRrbecmFyobrJtgPPDLiucPDPCaNHBTddFFpF4V9t4n6AgiDcHHqYWnAwlr7VHGmCOA+eVjfd4BvM9au6aaiw3WTfBbyssVG2P+DhxmrX1juKlFREQann/dBEcDjwBYaxcbYw7tO1FuNdgT+IYxZgLwLWvtXYNdrNq9Cd6mQkCkMTirVwcdQaRhuW7R88dmjADeqnieN8b03dPbga8CnwTeD1xojBm0D1BTC0WaSMuTj9M+exbdk6aQe++JQccRaTwR31oG1gMdld/ZWttXOXQDX7XWZgCMMb+mNLbgxc1dTFMLRZpE7LlnaJ89C/J5IitWBB1HpCH5OLVwEXAy8IAx5kjghYpzewHfN8a8ndJ9/mjg7sEuVtXUwn7TDDHGJIADgZettW/1f6+I1Jfoiy+QumoG5PNkT/8QmXM/E3QkkQblW8vAQ8AJxphF5ecTjTGTgCXW2oeNMfcBi4EccI+19q+DXazajYr2orR98aWUmhl+B+wLdBpjPmCtfXJ4fxcRqbXokpdJzZgK2Sy5951IzwVf0EZEIjXiV8uAtdalNLW/0ssV529iCLP9ql038XbgX8ASYCIwDtgJ+DSl6QyHV/sNRcRn+TxEIvQecxzdk6dCJJy7qomEgVvduPy6U20x8E7gAGvtamPMacBPrLUrjDHfAabXLp6IbK3CPvvSedudFLedoEJApNYafAvjNDDKGDOW0kCEr5SP7wW8WYtgIuKd4o47BR1BpCm47mYn3tW1aouBB4EfAD2UugsWGmPOolQUfLtG2URERMIlpC0D1aa+mNIayL8G3mOtzQFx4DpgRo2yichQpdO0/E7jeUWC4rqu5w8/VNUyYK3NA7cYYzqAPY0xhwEPW2vX1jSdiFQvkyF1xWXEXnyB7qnTyZ3w/qATiTSfkLYMVDu1MEFpisLnK96TN8Z8D/hsuaVARIKSy5G6egaxF1+gOHYs+QMOCjqRSFMK65iBakuYm4ATgVOAkcAYSrsjHQVcX5toIlKVQoH2OdcSe+5Z3JGjSM+dT3HCdkGnEmlKLo7nDz9UO4Dwo8CHrbWPVRz7uTGmG/g+MMXzZCJSlbYFt9Hy5OO47e2k595Eceddgo4k0rScBl9nIAIMtGvhm0DKuzgiMlS5kz5Ay9NP0XXZFRR23zPoOCJNrdjg3QS/AuYaY0b2HTDGjALmUJphICIBKey+J+vv+i6F/QbdoVREfOA4Ec8ffqi2ZWAS8BtghTHmlfKxPSitg3x6LYKJyBDEqv1PWURqKawtA9VOLVxhjNmP0iDCvYEM8Ffgl+XNEkRERJqeX7/Je23QYqC8rsDxQBZ4ylr7U+CnfgQTkU3Ff/kL6O4md+oHg44iIgNouJYBY8yRwMOUphECrDbGnGWt/a0fwURkYy1PPEbyxjlQdCnstTeFvfcJOpKI9OPXVECvDdaecSPwS2A7YDzwc+DrfoQSkY3FnllM+/WzoOiS+cSnVAiI1CmnBv/nh8GKgYOBmdbaldbaN4BLKC1FPNqXZCICQPSFP5O6eibkC2Q/+GEy53w66EgishlF1/uHHwYrBtqB9X1PrLVrKO1aOHKz7xARbxUKtN90A+Ry5N53Ij3nXwROOJshRZqC43j/8MFQ5yMVqX5tAhHZWtEo6WvnkPjpj+m58GKI6D8/kXrm12/yXhusGHCBiDGm718fZ4BjAFhrizXKJ9L0ijvvQs8XvhR0DBGpghPSlrvBigEHWDHAMTvAa6OeJRIREQmpRmwZON63FCJSkstBPB50ChEZJtdtsJaBfjsUikitpdN0XPplcu96N9mzPh50GhEZhkgDdhOIiF96ekjNnEb0lSUkurvJnnI6JJNBpxKRISo0YDeBiPghlyM1ayaxl16kOHYs6XnzVQiIhFQkpCsQqhgQCVKhQPv11xB77lncUaNI33grxW0nBJ1KRIapKVoGylMKdwGWARFrba4mqUSahPPmm0TtX3FTKdI33ERxx52CjiQiWyGsexNUVQwYY2LAHODi8nv2Am4wxuSBz1pru2oXUaRxuePHk77ldpx16yjsvmfQcURkK4W1m6Da5cyuBd4HvBfIlI99FXg7cHMNcok0jeKE7bTxkEiDKLjeP/xQbTHwMeACa+3jlFYhxFr7JPBp4IwaZRMREQmVkG5NUPWYgbHAqgGOdwFt3sURaWzRV5ZQ2EPdASKNKqwDCKttGfglMM0Y01ejuMaYkZTGEfy6JslEGkz80YV0XHAerXd/K+goIlIjEcfx/OGHalsGLgIeotQ60Ab8DNgJeBU4tTbRRBpHyxOPkbx5LgBue3vAaUSkVgoh3bavqmLAWrsCONwY8x5g7/L7LPCodiwUGVzsmcW0Xz8Lii6ZT55D9iMfDTqSiNRIo08t7OtO+E35sdE5FQQiA4u++AKpq2dCvkD2jI+Q+dTEoCOJSA1FwlkLVN1NkKc8i2AztIWxyADcbbahOGYM+YPfQc/5F/k3NFhEAlEohvO/8WqLgf7bGceA3YFJwAxPE4k0kOJ229P51a/hjhylQkCkCUSqHZZfZ6odMzDQdsa/MsZY4CvAg9VcpzwbYQFwEKXFi86z1r46wGt+BvzYWvuNaq4rUs/c0WOCjiAiPvFrAKHX99OtrWHeAMwQXn86kLDWHgVMB+YP8JrrgNFbmUtERMR3Ecf7x2Z4ej+tdgDhpwc4PAI4H3iqmmuUHQ08AmCtXWyMObTf9/kQUAAWDuGaInXBWbeW+G9/DR/TbAGRZpX3b8yAp/fTascMXNHvuQvkgGeAmVVeA0oFxFsVz/N9sxGMMfsBHwc+DFw52EWcfn2vyWR8CBGkUktLVJ+fFzo7ScycirNkCW6LA5o+6Bn9jHpLn2dt+Ti10JP7aZ9qi4HzgUXW2vSQom5qPdBR8bxyWuKngO0prWi4K5A1xrxmrX20/0Vcd+OJDd3d2kl5uJLJuD6/rdXTQ+qyS3DtyxR22JH8u0/QZ+oh/Yx6S5+n99rbExu+jvo3TtiT+2mfaouB/wWOBV4actyNLQJOBh4wxhwJvNB3wlo7re9rY8xVwL8GCy5SF3I5UlddTuwvL1EcN47OufNp22Yb0D+2Ik0p79+qO57eT6stBv6EN8XAQ8AJxphF5ecTjTGTgCXW2oe38toivmv7+gJif3ged9Qo0vNuwd1226AjiUiAfGwZ8PR+Wm0x0AncboyZBbxGaRrDBtbaY6u5iLXWBS7od/jlAV43q8pcIoHKfPQTRF95mZ4vTaa4405BxxGRgPk1gNDr+2m1xcDz5YeIVHDHjSN96x1aUEhEgAZcdMgY8yngB9barH5TFxmECgERKfNxaqGnBqth7gJG+hVEREQk7FzX+4cfBusmCGd5I1IjiZ88SORf/6Tn89pwSEQG5uMAQk9tacyATzWJSH2LP7qQttu/AkDvUUeTP/DggBOJSD0KazfBloqBfxuz5a0HrLXawlgaVssTj5G8eS4APedfpEJARDYr2mgDCMvOAtb6EUSkHsWeXkz79bOg6JI5+1yyHzoz6EgiUsd8XHTIU4MVAy7wuLV2lV9hqtGb6SWbzgYdQ5pBsUjbXf8D+QLZMz5C5uxzg04kInWuEccMhPSvJOKRSIT09fNILPwZmY99UoMGRWSLehtwzMA9QI9fQUTqkTt6DJmPnx10DBEJCb+mAnpts8WAtXain0FERETCLhbOhoGqlyMWaXhO53rcVIe6A0Rk2MLaTRDSSRAi3nLWrqHjSxeRvPUmKIZ0OLCIBC4a8f7hB7UMSNNz0p2kpk8hsux1oi0x6O6GVCroWCISQr0h/V1CxYA0t+5u2mdMI7p0KcUddiR9w80qBERk2GIhbW9XMSDNK5cjdfUMYn95ieK4cXTOnY87ekzQqUQkxHoLQScYHhUD0rScnm6c9W/hjhpFet4tuNtuG3QkEQk51w3nAEIVA9K03JGjSN94K86aNRR33CnoOCLSAKKRcC40oGJAmprbMQK3Y0TQMUSkQfQW1DIgIiLS1FrUMiBS32JPLyZ/6GEQCelwXxGpezm1DIjUr8RDD9C24DZy7zmB7mkztMqgiNRESzToBMOjYkAaXvyRn9O24DYA8ge/Q4WAiNSMWgZE6lDLY78hecs8AHrOv4jc+08KOJGINLJiOIcMqBiQxhX74/O033AtFF0yn5pI9kNnBh1JRBqcBhCK1JnC7ntQ2NOQ33c/Mp88J+g4ItIE1E0gUmfcjhF03ngrxOMaJyAivlDLgEg9SiSCTiAiTUQtAyIiIk1OuxaKBMhZvZrWB++n5zOfh5h+rEUkGGoZEAmIs3YNHVMnEVm+DLclTubTnw06kog0KTecQwZUDEi4OZ3rSU2fQmT5Mgq77Ub2I2cFHUlEmpgGEIr4rbub9pmXEV26lOIOO5K+4WbtQCgigVI3gYjP2u75NrG/vERx/Hg6592CO3pM0JFEpMm1RNUyIOKrnnM+TWTNm/Sc82nc8eODjiMiQjavlgERfyWTdM24KugUIiIbxLVroYiISHPzq2XAGOMAC4CDgAxwnrX21YrzFwHnAEXgWmvtzwa7nooBCQfXLT0iIV3RQ0Sago9TC08HEtbao4wxRwDzy8cwxmwDnE+pUEgCfwF2Huxi+pdVQqH1f++l/ZorIZcLOoqIyGa1RF3PH5txNPAIgLV2MXBo3wlr7ZvAQdbaIrAdsHZLudUyIHUv8eAPab37WxBxiL30Avm3HxJ0JBGRAfk4gHAE8FbF87wxJlIuALDWFstdBVcDX93SxdQyIHUt/sjPafva7QB0T5qqQkBE6lo86nr+2Iz1QEfF8w2FQB9r7R2UWgaOM8YcN1hutQxI3Wp57Dckb5kHQM/5F5F7/0kBJxIRGZyPLQOLgJOBB4wxRwIv9J0wxuwFzLHWfggoAFlKAwk3S8WA1CfXJfHz/wdFl8w5nyb7oTODTiQiskWD/Ca/FQYsMB4CTjDGLCo/n2iMmQQssdY+bIz5kzHmKUpFwEJr7RODfQcVA1KfHIf0NXOI/+ZX5N53YtBpRESq4lfLgLXWBS7od/jlivPXANdUez0VA1K/Egl1DYhIqBTDuRqxigERERGvJPzrJvCUigGpC87KlbijR0M8HnQUEZFhy2hvApHhcVatomPyFyjuvAvpq66D1tagI4mIDEs8opYBkSFz1q6hY+okIqtW4W4zFoqDzn4REalrahkQGSKncz2p6VOIrFhOYffdSc+eC8lk0LFERIbNx70JPKViQILR3U1qxjSiS5dS3HEn0nNuwu0YEXQqEZGtkoipm0CkepEIxVQKZ9tt6Zw7H3f0mKATiYhstUyvuglEqtfaStes63HWrsUdPz7oNCIinoirZUBkiFpaVAiISENRy4CIiEiTS8RqMSMqWoNrbkzFgNSe6xJ/5Ofkjn+P1hAQkYYW1paBSNABpPG1fvc7JOfPI3Xl9PDOuxERqYLrev/wg1oGpKYSP7qf1nu+DRGH7AdOBSecVbOISDVqszdB7akYkJqJL/wZbXfeAUD3pKn0Hnd8wIlERGqrJ6TdBCoGpCZif3ye5C3zAOi58GJtRSwiTaG1JlMLa0/FgNREfr8D6H3nsRT23IvsBz8cdBwREV+oZUCkUksLXVfM0hgBEWkqtVmOuPZUDEjtRDRZRUSai1oGREREmlxYZ0+rGJCtFlm+jLa7vknX5KnQ3h50HBGRwLTWZAXC2reyqhiQreKsXElq2mQiq1bRNnYcPRd8IehIIiKB6cmpm0CajLN2DR3lQiC/7370nPuZoCOJiAQq0RLOfgIVAzIsTuf6UovAiuUUdt+drtlzoa0t6FgiIoFSy4A0lcSPfkj073+nuONOpOfchJvqCDqSiEjgWtUyIM0kc/a5OL05Mqd/GHf0mKDjiIjUBbUMSHOJRun57AVBpxARqSvFoloGREREmlqbugmkYbkuZLPQ2hp0EhGRutatbgJpSK5L29fvIPbCn0nPuRF3xMigE4mI1C21DEhDar3vHhI/+iHEokRfXUr+4HcEHUlEpG6pZUAaTuJH99P6nbsg4tA1/UoVAiIiW6CphdJQ4j9/mLY77wCge/I0eo99V7CBRERCoDsXdILhUTEgm3JdYi+9AEDPhReTe9+JAQcSEQkHtxb7FPlAxYBsynHovmQauePeTf7wI4JOIyISGm1xdRNII4lEVAiIiAxRdzboBMOjYkBERMQjrS1BJxgeX4sBY4wDLAAOAjLAedbaVyvOTwLOAlzg59baa/3M16wir/0dd5ttcDtGBB1FRCTUenxqGfD6fhqpYdaBnA4krLVHAdOB+X0njDFvAz5mrT0SOAp4nzFmf5/zNR1n2et0XPplUpd8EWfd2qDjiIiEWlvc9fyxGZ7eT/0uBo4GHgGw1i4GDq049zrw/vI5F2ihVO1IjTgrVxL/8hdx1q3DHT0GN9kedCQRkVDrynr/2AxP76d+jxkYAbxV8TxvjIlYa4vW2gKwBsAYcyPwvLX2FZ/zNQ1nzZt0TJ2Es2oVvfvuR/rq2RCPBx1LRCTUXP8mE3h6P/W7GFgPdFQ8j1hrN8zKNMYkgG9T+gteuLmLOM7Gyz0mk7qJDUlXF4kZU3H+/U/Yay+K828h2dGx5fdJVVpaovqZ9JA+T2/p86wtH/cm8OR+2sfvYmARcDLwgDHmSOCFfud/CvzSWnvjYBdx+5Ve3WFd8ikobhR3731pyebI33QL3dFEeJfNqkPJZFw/kx7S5+ktfZ7ea29PbPjax6mFntxP+/hdDDwEnGCMWVR+PrE84nFJOcsxQIsx5iRKIyCnl/tCxEuRCD1fnEwm3Unb6NEqBEREPNLmX6OLp/dTX4uB8kCGC/odfrni66SPcZqb42gqoYiIxwYZ8Ocpr++nWnRIRETEI1qOWOqT69L6v/eSPelk3NFjgk4jItLQukM6Id7vdQbET65L25130Hr3t0hNnwLFkG6nJSISEq7rev7wg1oGGljrvXeTePCHEIvS8+nPQ0S1n4hILfk4gNBTKgYaVOKBH9B6790Qcei6/CrtQCgi4oOwdhOoGGhA0Rf+TNvXFwDQfck0eo85LuBEIiLNoS2hAYRSJwr7H0D2zI9SHDee3HtPDDqOiEjT6FLLgNQNx6Hns/2nn4qISK0l1TIgIiLS3NQyIMFxXei3eZOIiPivWFTLgAQg+uorJG+9mfTMWbjjxwcdR0SkqSUTW35NPVIxEGKR5ctITbsEZ906Wr//XXq+OCnoSCIiTU3dBOKryMp/k5o6CWfdOvLvOISe8y8KOpKISNNTy4D4xlnzJqmpk4msXk1+v/1JXz0b4iFd9kpEpIF09WjMgPgk/ptfEfnnCgq770H6urnQ1hZ0JBERAdpag04wPCoGQih7xkdwW1roPfZdkEoFHUdERMrUMiD+cRxyp34w6BQiItKPq6mFIiIizU0DCKU2ikWct9bhjh4TdBIREdmCrp6gEwxP6IqB99/9Azo6Rgcdwx/FIsn584j98XnSc+dT3GHHoBOJiMggkhpA6I9tt92W7u5c0DFqz3Vpu/MO4r9YCPE4ztq1oGJARKSupTWAULzU+p27SDz0AMRipGfNprD/AUFHEhGRLWjXmAHxSuL+79F63z0Qcei6/Eryhx4edCQREamCWgbEM042C0D3lMvoPea4gNOIiEi1XFfFgHgkc/a59B71Tgq77xl0FBERGQINIBRPqRAQEQmfdLdaBkRERJpae0i3ilExELDo3/6KO2IExe13CDqKiIhsJbUMyJBFly4hNX0KxON0fvVrFLedEHQkERHZCu0aMyBDEVn2OqnLpuCk0/S+8xiKY8cFHUlERLaSNiqSqkVW/pvU1Ek469aRP+RQui6/EqLRoGOJiMhWSncHnWB4VAz4rbub1NTJRN54g/z+B5C+6jqIx4NOJSIiHtAAQqlOMkn2lNOI//qXpK+9AdpC+pMjIiKb6NQAQqlW9sNnkT3tDGhpCTqKiIh4KBXS3+9UDARFhYCISMNRy4CIiEiT02wC2VSxSNsdXyV7ymkUd31b0GlERKTG/BpAaIxxgAXAQUAGOM9a+2q/14wDFgH7W2tzg11PxUCtFIsk588j/ouFtDyzmPV33afpgyIiDa6zy7eWgdOBhLX2KGPMEcD88jEAjDHvBW4AxldzMRUDteC6tH3tduK/WAiJBF1TL1chICLSBHwcQHg08AiAtXaxMebQfucLwHuA56q5mIqBGmi959skfvwjiMVIz5pNYf8Dgo4kIiI+8HEA4QjgrYrneWNMxFpbBLDW/go2dCdskYoBj0Xt32j97ncg4tA14yryhxwWdCQREfGJjy0D64GOiucbCoF+qqpOVAx4rGD2pvtLl0Brgt6jjw06joiI+Kiza6D7cU0sAk4GHjDGHAm8sJnXqWUgKLmTTw06goiIBMB1fesmeAg4wRizqPx8ojFmErDEWvtwZaRqLqZiQERExCN+TS201rrABf0OvzzA63ar5noqBrZWb69WExQREcDXqYWeUjGwFaIvvUj7DdfSdeW1FPbcK+g4IiISsIHwb5sAABsASURBVI5k0AmGR8XAMEWXLiE1YypOVxfxhT+jR8WAiEjTW6+WgeYRef0fpKZNwenqovfoY+m56ItBRxIRkTrQ0VbV4P26o2JgiCL//hepaZNx3lr3/9u78/CarvWB49+TRKIyiEpIVU0XixovRU11W5rU8EOrhlJKS+mgpotqteaaa1ZFlZaaW1pqKHVbVTS4t5e2d5VrvtVEUhGJITk5+f2xdk5PIokgTnLk/TxPHs7e+6zz7uXIfvdaa6+F/aF6JI54S2YXFMKD/f77OTp2bEvt2nWYM+f9dPsmTBjN1q2b2bx5B0FBRe9oHO+8M4bIyP0UK1YMgJQUB1evXqFdu6fo2rWH87ivv97BqlXLSUxMwMenEKVLl+aFF/pSoUJF5zEnT55g8eL3OHv2LDYbBAQE0qfPS9SsWTvTz46JOc+0aROZNOld57Z161Yxa9Z03n//Qx58sLpz+5IlC4mPv8jAgUPTldGjR2cGDx5O7dp1ANizZ7czTrvdTvnyf+GVVwZQokTJ266rqKjf6dfveZYtW5npv4vD4WDu3Bns37+XlBQHXbp0o337DgCcPXuGSZPGERcXR5EiRRg5cjRlypQjOjqKWbOmMWHC1NuKLd59jxbmKkkGbpLPT4fxio3BXqMmCaPGg69vXockhLhNvr6+nD59iqio3ylZMgyAq1evcvjwv7HZ3Hen17lzV7p0edb5Oirqd559tiNNmjSjTJmyrF+/mi+/3MSoUeMoU6YcAHv37mHQoFeYPn0uFStW4vTpkwwc+DJvvjmaevUaAHDwYCTDhw/mvfc+4MEH1XWfO3nyeHr3Tj8wfePGTwkPb8nq1Z8wZsw7N3Ue27dv5eOPlzB58gxKlbofgI8/Xsprr73E8uVr8PG59UvPli2bWLJkEbGxMVkes3Hjp5w9e4bly9eSkJBAv369qFKlKlWqPMiYMSPp0qUbzZuHs2/f94wcOZyPPlpNiRIlqVRJsX79Gjp06HTL8cmqhQVEUvNwUov4k1yjFhQunNfhCCFygZeXN82bP8727Vvo3r0XAN988zVNmzZj9eoVzuP27NnNsmUfYLfbKVy4MC+/PIDq1Wtw4cIfTJnyDnFxfxAbG0tY2H2MHTuJ4OBgOnZsS8uWbTh4MJLo6CgiIlrRu3e/HMUVHR0FQJEi/iQnJ7No0XssXLiMMmXKOo9p2LAx3bo9x8KF85kyZQbLly+jdeu2zkQAoG7deowePQE/P7/rPuPnn48QF3cBpao4tx06dIBLly7x8suv0alTO86fjyY0NEfr3QCwaNF7vP76SGciANC9e0/CwsJISkpKlwwkJCTQv/+L1yVdjz7awvlvkSYmJoY9e75lxoy5dO3aIcvP//bbXbRr9xQ2m43AwECaNw9n27YthISEcubMKZo3Dwfg4YcbMX36JI4e1VSqpGjTpj19+vSgXbunbjlhCZABhAVHcsPGeR2CEHeFJUsWERNzPttjChXyJjk55ZbKDwkJ5fnn+9zwOJvNxhNPtGbs2LecF6AtWzYxYMDfncnA2bNnWLhwHnPmLCQoKIgTJ44zcODLrFmzgR07tlOjRk1nc/7QoQPYtm0znTt3A+Dq1SvMm2fOtXPn9rRp046wsPuui2P16k/Yvn0LCQkJXL6cSM2af2Xq1FmEhISg9X8oVMg3XSKQ5qGH6rNkySIAtP6Fxx57/LpjGjRomOm579q1k0aNmqbbtmHDesLDW1K8eAh169Zn/fo19Ov36g3rESA+/iJRUeeoXr3mdfsef/yJ67YFBATw4Yef5KjskJAQxo+fAmQ/uU90dFS67ogSJUpw/PgxoqKiCAkJTXdsaGgJoqOjqVRJERISQlBQUQ4f/pG//rVujmLKSLoJhBDiJuXkQl2kiC+XL2e7FHuuqFy5Cl5eXvz6638IDi7GlStXKF++gvOiExm5n9jYWAYOfMm5zdvbm7Nnz9KxYxd+/PFfrF69gjNnznDixHGqVftzgbImTZoBJjkpVuxe4uMvZpoMpHUTXLt2lbffHkGhQoWcffAAdrs909iTkpKdd9Y2mxepqTm/IJ06dZIWLSKcr//4I5bdu//BBx8sByAiohXTp0+iV6/e+PkVxsvLK9NyHI5UvLy8sNnM/pzOxOfaMpD2HpvNlmnLQE45HI50LQ2pqab1x9RL+haI1NTUdOdUqlQpTp8+dcvJgAwgvBslJ+N17jccmWTiQoi7T0REK7Zt+5Lg4GJERLQCcF5UHI4U6tatl67/PDra3GnOnz8brX+hdeu21KlTj5QUe7qLoZ/fn12K5qKXfRx+foUZOXIszz77NGvWfEKnTl0pX95MJHfs2FEqVqyU7vhDhyKpUcPciVerVp0jRw7TsGGTdMcsXbqY++8vTbt26adLt9lIlzx8/vln2GxeDB8+GDD7Ll9OZMuWTbRv/zRFiwZz6tTJ62K+cCGWokWDCQwM5IEHyvDTT4epm2GhtrffHsFzz73AX/7y52DHm2kZyKmSJcPStTjFxJwnNLTEddvNvph0rQje3j54e2ee8OSEp7YM3PoZ3+1SUvCfOI7A117C+6cjeR2NEOIOSrtwR0S0YteunXz99VfOJu20fXXr1icycj+nT58EYO/e7+jZsytJSUlERu6jY8dnCA9vSXBwMJGR+3E4bu+iEBgYyCuvDGLx4veJiYnB19eXfv1eZfz4Uc4YAL7//jtWrVpBnz5mAGDXrj3YtGkDkZH7ncfs2/c969atomLF6+dDKVOmHP/731nA3FF/8cUGhg4dwdq1G1m7diPr1n1B9+69WLNmJWC6JA4ejOT06VPOMrZs2URAQCBly5YDoGfP3syaNS1duUuXLubYsV+dx9xJTZs2Y/Pmz0lJSeHSpUvs3LmdRx55lNDQEjzwwAPs3PkVAPv378Xb2ytdcnLu3G/OwZm3IrCILdd/3EFaBjLjcFDk3SkU2v0Nqf7+kMmgGyHE3SPt7j8kJJRy5coTEBBIYGBgun3lypVn2LA3GDXqDcDcQU6e/C6FCxemZ88+zJ07k8WLF+Dj40OtWrU5e/ZMWukZPy3HcYWHP8GmTRuYN28mo0aNp127pyhevDiTJo0nIeESdrudsmXLMWPGPOcF7f77SzN58kwWLpzHvHmzcDhSKFbsXiZPnulsXXD1t781Z/bs6Tz//It8//1uUlNTr+vb79SpK2vXrmLv3j00bNiYYcPeYNy4t0hJSSEpKZlSpUoxbdps5/Fp7x89+g3rmCQqV67C7NkLbutJAlcZBxx+8IF5LPSFF/rSvv3T/Pbb/+jZ8xnsdjvt23egVq3aVkzvMGnSOJYtW4yfnx/jxk12lnHhwh/ExV3I8hHMnIhP8MyWAZsbV1jKFYmJ11LvaP9hair3zJ+D34b14OfHpUnTSale48bv81Du6o8tSKROc5fUZ+7KrD6HDHmN3r37UrVqtTyKKn9YsmQhxYrdy5NPPn1T7wsNDXRmJu1f1rl+Ud0wX93x5gHpJsig8LIlJhHw8SFhzIS7OhEQQgiAoUNHsHTp4rwOI09FR0dx9Kh2Tk50qwKK5P6PO0g3QQaO0qWhUCES3xyFPcPgFyGEuBuFhd3H5Mkz8jqMPFWiREkmTpx+2+V4ajeBJAMZJLWIILl2XVJDQvI6FCGEEB4myE0D/nKbJAOZkERACCHErbgoLQNCCCFEwRbkLy0DHsfn0AFS/QqTUq36jQ8WQgghbuBiwq1NnZ3XCmwy4H3kMAFvvwE2G/Fz38fhhokwhBBC3N1k1UIP4n3sKAEjh8O1aySFP4HjgTJ5HZIQQoi7QJC/Zz6xX+CSAa9TJwkYPgRbYiLJTR7h8uBhkMXCG0KIgqFp03pUqFARLy8bNpuNq1ev4u8fwJAhr1OlStVc+Yx//GMn69evYc6c93OlPJE/Xbwk3QT5X1ISAW8OwxZ/keR69Ukc8RZ4e+d1VEKIPGaz2Zgz532CgoKc21auXM7MmVNZsGBJrn6OuLsFBnjmv3HBSgZ8fbnS71X8vthA4tvjwNc3ryMSokALfrxZptvjvvomV47PKTMt+599vSkpKURF/U5QUFHAzFk/Zco7xMX9QWxsLGFh9zF27CSCg4Pp2LEtLVu24eDBSKKjo4iIaEXv3v0AWLx4AV99tZWiRYMpXfoBZ/mJiQm8++5kjh79FZvNiwYNGtKv36t4eXnx2GON6dy5KwcO/MDVq1fo1etFdu3awfHjxwgJKcGUKe+mWwVR5C/SMuAhkps8QnLjpmbdTiGEsPTvby7gFy/G4evrS+PGj/DGG6MA2LFjOzVq1KRr1x4ADB06gG3bNtO5czcArl69wrx5i4iJOU/nzu1p06YdR49qvv12F8uWrcLX15fXXx/i/KwZM6ZStGgwH320GrvdzrBhg1i58mO6dXuO5OQkQkJCWbRoGStWLGPKlPGsWLGO4sVDeOGF7uze/Q0tWkS4uXZETsmYAU8iiYAQ+cLN3tHfbgtAdtK6CX799T8MHTqQOnXqEhwcDEDHjl348cd/sXr1Cs6cOcOJE8epVu3PdUuaNDEtFiEhodx7b3Hi4y9y8GAkzZo9RuHC5i6+deu2rF+/GjBL56Z1P/j4+NC+fQfWrl1Jt27PAdCs2WOAWYGwQoWKFC9uJkK7775SxMfH37E6ELdPWgaEEMKjmW6CypWr0L//ICZMGM3SpasICwtj/vzZaP0LrVu3pU6deqSk2HFd8TVjs33aLtdjvF3GJ6WmOjIc78Butztf+/oWcv49t5b8Fe6R6vDMGQg9sz0jJ5KS8B/7Nj6HDuR1JEIID9OiRQRVq1ZjzhyzcE1k5D46dnyG8PCWBAcHExm5H8cNfuk3aNCIXbt2kJCQgMPhYNu2L5376tdvyKefrgEgKSmJjRs/o379h+/cCQm3CfL3yvUfd7g7U86UFPwnjqPQd9/i/Z+fiV/6iQwWFEJkKbNR/oMGDaNnz65ERu6jV68XmTt3JosXL8DHx4datWpz9uyZtHdnLA2Ahg0bc+LEf+nduzuBgUFUrFiZixfjABg48O/MmDGVHj06Y7fbadCgEd2798oyluziFPmLp3YT2FybsTxBYuK11MuXk7I+wOGgyPTJ+G7fSqq/PwnTZpFSsZL7AvQwRYr4km19ipsmdZq7pD5zl9Rn7gsNDXRmaU+/eCjXL6rrFta541ng3dUykJrKPfPn4Lt9K/j5kTBhiiQCQggh3Cbukv3GB+VDd1Uy4HXqJH6bN4KPDwlj35EFiIQQQrhVUIBnTmR3VyUDjnLlSRgzEVtyEvY6D+V1OEIIIQqYuHhpGcgX7PUb5HUIQgghCihPfbTwrksGhBBCiLxSVLoJ3M+WcInUgMC8DkMIIYQApJvA7XwO/ID/uFFcHvEWyQ83yutwhBBCCIoGeuZcfm5NBpRSNmA+UAu4CvTWWh932d8HeBFIBiZorTdnVo734X8TMOpNSErC55+HJBkQQgiRL7irZSC3rqdp3N0y0B7w01o3Uko1AN61tqGUKgn0B+oARYDvlFLbtdbJrgXYtCZg5HBISiIpoiVX+r7s5lMQQgghMufGMQO3fT115e5koAmwFUBrvV8p5fr8X33gO621HYhXSh0FagIHXQvwGzIQx+XLJD/yNy4PHgZentkkI4QQ4u7jxjEDt309deXuZCAIuOjy2q6U8tJaOzLZlwAUva6EK1dIrlefxNdHSiIghBAiX3Fjy8DtX09duDsZiAdch/+nBZ62L8hlXyAQl7EA7317bUUw7R4id/j7++V1CHcdqdPcJfWZu6Q+75wvljdy12pSt309deXuW+s9QCsApdTDwGGXfT8ATZRSvkqpokAV4Iib4xNCCCE8Qa5eT926aqHL6Mea1qZeQGvgqNZ6k1LqBaAvZg3QCVrrDW4LTgghhPAQuX099bgljIUQQgiRu2QEnhBCCFHA5dsZCHN7QoWCLgf1OQjoDKQCX2qtx+VJoB7iRvXpcsxmYIPWeqH7o/QsOfiOtgTexnxHD2mtX82TQD1EDurz70AXIAWYKN2yOWM90z9Ja/1ohu3/B7yFuSZ9qLVenBfx3ar83DLgnFABGIGZUAFIN6FCQ+AJYKJSqlCeROk5sqvP8sAzWuuHgUZAhFKqet6E6TGyrE8X44Fibo3Ks2X3HQ0ApgCtrf0nlVLF8yZMj5FdfRbF/A5tAEQAM/MkQg+jlBoKLAL8Mmz3wdRvC+BvwItKqRJuD/A25OdkIN2ECkCmEyporeOBtAkVRNayq8/TmKQKrXUqUAhzJyGyll19opTqgLnj2uL+0DxWdnXaCDNa+l2l1LdAlNY61v0hepTs6jMROIl55CwA810VN3YMeDKT7VUxA/firVn+vgOaujWy25Sfk4FMJ1TIYt8NJ1QQWden1jpFa/0HgFJqKqYJ9lgexOhJsqxPpVQ1oCswCjOSV+RMdv/nQzB3XEOBlsAgpVRF94bncbKrT4CzwM/AAWC2OwPzVFrrz4DMphjMWNeX8LBrUn5OBnJ1QgWRbX2ilPJTSq0A/AFZ8OHGsqvPHkAp4GugJzBYKRXu3vA8UnZ1GgtEaq3Pa60TgW+B2u4O0MNkV58tgTCgLFAGeDLDdLbi5nj8NSnfDiDETKjQBliXxYQK45VSvsA9yARFOZFdfQJ8DuzQWk91e2SeKcv61FoPT/u7UmoUcE5rvd39IXqc7L6jB4HqSql7Mb94HwZkUGb2sqvPC8CVtIVrlFJxQLD7Q/RYGVv8fgEqKqWCgcvAI4BH/S7Nz8nAZ8DjSqk91ute1oj3tAkVZmP6ZWzAG1rrpLwK1ENkWZ+Y70FToJBSqhVmtPYIq59RZC7b72cexuXJbvR/fgSwHfP9XK21/jmvAvUQN6rPA0qpfZjxAt9prXfkWaSeJxVAKfUM4K+1XqyUGoz5ftqAxVrrc3kZ4M2SSYeEEEKIAi4/jxkQQgghhBtIMiCEEEIUcJIMCCGEEAWcJANCCCFEASfJgBBCCFHASTIghBBCFHD5eZ4BIdxOKXUSMyNbRke01jdc/0IpdQIYp7VecofjsgNngIVa68m5UL4DaKG1/lopFQo8qrVek3Hf7X5OJp/pyoGZaXAb8Kq17khOyukA7NZaR+dmfEIUJNIyIER6qcAgzFStrj/N8jIoro+rPDAGMxNnt1woPwwzxS/AZMzMdZnty21P8+c5lcUsS96SzFeBvI5SqgywFrPYjhDiFknLgBDXu5RP7zIzxvWRNQNaB2DF7RScoVxbNvtyW1yG8v9nLfQ0BOidg/d7Yc0GJ4S4dZIMCHETrHXLJwJdgJLAb8AkrfWCTI6tDswD6mLm0/8EGK61TrH2jwT6Ye5qvwdeu4XVIu3ANas8G/B3oC9moaQfrDL/be1/GtOaUAGzbPVErfVSa58DsxZ7U+A5a1sTrXUFl32VgDe11s7uCqVUZ2Cm9XmFgCmYFRu9gJ1A/1tIJpJwWRlOKdUQ01pRF3Ph3w08b033etzadlQp1Utr/ZFSqj0wHtN68osV87abjEGIAkW6CYS4OcMxTehPAZWBD4E5SqmwTI5dgbkYVQM6Ac8CzwMopfpbr7sB9THrpO9QShXOSRBKKR+l1FNAOLDR2jwKGAwMAP4KnAC2KqX8rXEAK4DpVtzvAIuUUpUzFD0VWAOsAzKuYrcOKKmUquey7WnMOgGpmCSpAdAKs1CLDfgiJ+fjcl71MN0ha63XAcAmzJzvVYHHMRf5N6231Lf+fBhYrZSqBXxknV91zGJGnyqlbjjeQ4iCTFoGhLjeXKXULJfXqUAFrXUMZuW33lrrSACl1CTMRVgBv2copyzmYnhGa31KKdUSM0AOYCjmrvkbq5wBQGuyb/J3jeseIBGYrrVeZW17FbNo12arzD7AfzFLKu/F/H//TWt9BlimlDoFRLl+gNb6slLqCuCttf4jw75YpdROK8ZIpVQRTP9+C6XUPcArQAOt9Y/W5z8HxCilGmut95C5L1wGEvpiWlBWYJIuMEtqT9Bap40hOK2U+hRoaL0+j0k6YrXW15RSQ4APtNafWPsXWiv29Qf6ZBGDEAWeJANCXG8M1p2pi1gArfXnSqkWSqlpmKWz62CSBe9MynkdmAv0VUptAVZprf+plPIHSgMrlFKu/d1+mLv2nMR1FbM0ctrqaSWAezFdA1ix2pVSB4CqWuv3lFKfAF8qpf6LSVKWaq0v3qgyMlgFjLTOrQ0Qo7XeZ/Xz+wK7re6KjOeUVTLwIiZRCcG0SlwD3tZaX7POIUoptcxaba828CBQC9iXRXlVMUsdu4438AFkBU4hsiHJgBDXO6+1Pp7ZDqXUeMwd5hJMc/RLwKnMjtVaL1BKbQLaYe76N1jvT7u774zpRnAVdytxYZKDzHhbP2ituyulpgNtrZ+XlFL/d5NL134GvKeUqoHVRWBtT/td8gjm7j5d3NmUd846p+NWX/9hYLkVH0qpUsAB4BDmkcOFmCSkcRbl+QDTMN03rq5lf1pCFGwyZkCIm9MXMyhvhPUcfqC1Pd0IfKWUn1JqJoDWep7WuhXmzr6zdTceDZTSWh+3LoYnMX3utW4lKOuZ/HOYPvu0GHwwg+60MqZrrf+ltR6rtX4I+A54MpPishydr7W+BGwBOgIRmJYCMN0RdiDU5ZxigBmY7pKcnMMF4DWgjVKqo7X5SeCi1rqN1nqO1d3wF/6s74yxakyXznGXOJ7L4jyFEBZpGRDi5sRiLlY/YEbQz8JckPxcD7L6r5sAZZVSIzAj7Vti7nLBPEc/XikVDRwBhmFG7A+4jdimA6OVUr8Bv2Ka8gsDKzGJf1+lVDywDDMIrybmCYeMEoBaSqlSWuvfMtm/CtMyck5r/U/rfBOUUouBeUqpvpjEZBJQAzia0xPQWn+qlPoKmGa1qsQC9yulWmASjk6YwZuHXGLFijcKk3zsVkpFYrpCHgdGYFpnhBBZkJYBIdK70TPrvTAXuCPAUszI+32Y0fsZ398JkyTsxUza81/MnS+Ypuz3MGMKfsT0hYdrrTMOQsxpXGAuhAusn4OYcQnNtNbntdZRmLvjtsBPmC6OeVrrtOZ01/I/AioC/8riszdZ21Zm2D4Y2IpJFvZjxhCEp/X/38Q5vYZ5bHMkphtiufXnAeBRYCCglFJ+1iDHpZik5nmt9X7MExp9MP9GA4CeWustWXyWEAKwpabKfB1CCCFEQSYtA0IIIUQBJ8mAEEIIUcBJMiCEEEIUcJIMCCGEEAWcJANCCCFEASfJgBBCCFHASTIghBBCFHCSDAghhBAF3P8D+Y5yBn8IbdkAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# This shows that the random forest perfectly fits the training data\n", "# as one should expect.\n", "roc_plotter(pipeline, X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 30000 entries, 1 to 30000\n", "Data columns (total 23 columns):\n", "LIMIT_BAL 30000 non-null int64\n", "SEX 30000 non-null int64\n", "EDUCATION 29655 non-null float64\n", "MARRIAGE 30000 non-null int64\n", "AGE 30000 non-null int64\n", "PAY_0 30000 non-null int64\n", "PAY_2 30000 non-null int64\n", "PAY_3 30000 non-null int64\n", "PAY_4 30000 non-null int64\n", "PAY_5 30000 non-null int64\n", "PAY_6 30000 non-null int64\n", "BILL_AMT1 30000 non-null int64\n", "BILL_AMT2 30000 non-null int64\n", "BILL_AMT3 30000 non-null int64\n", "BILL_AMT4 30000 non-null int64\n", "BILL_AMT5 30000 non-null int64\n", "BILL_AMT6 30000 non-null int64\n", "PAY_AMT1 30000 non-null int64\n", "PAY_AMT2 30000 non-null int64\n", "PAY_AMT3 30000 non-null int64\n", "PAY_AMT4 30000 non-null int64\n", "PAY_AMT5 30000 non-null int64\n", "PAY_AMT6 30000 non-null int64\n", "dtypes: float64(1), int64(22)\n", "memory usage: 5.5 MB\n" ] } ], "source": [ "cc_data_copy.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The features 'SEX', 'EDUCATION', and 'MARRIAGE' are definitely categorical, and the features 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', and 'PAY_6' are somewhat categorical. We should modify the type of those features to reflect that.**" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "cats = ['SEX','EDUCATION','MARRIAGE','PAY_0',\n", " 'PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']\n", "cc_tmp = cc_data_copy.copy() # for use a little later" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "for cat in cats:\n", " cc_data_copy[cat] = cc_data_copy[cat].astype('category')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 30000 entries, 1 to 30000\n", "Data columns (total 23 columns):\n", "LIMIT_BAL 30000 non-null int64\n", "SEX 30000 non-null category\n", "EDUCATION 29655 non-null category\n", "MARRIAGE 30000 non-null category\n", "AGE 30000 non-null int64\n", "PAY_0 30000 non-null category\n", "PAY_2 30000 non-null category\n", "PAY_3 30000 non-null category\n", "PAY_4 30000 non-null category\n", "PAY_5 30000 non-null category\n", "PAY_6 30000 non-null category\n", "BILL_AMT1 30000 non-null int64\n", "BILL_AMT2 30000 non-null int64\n", "BILL_AMT3 30000 non-null int64\n", "BILL_AMT4 30000 non-null int64\n", "BILL_AMT5 30000 non-null int64\n", "BILL_AMT6 30000 non-null int64\n", "PAY_AMT1 30000 non-null int64\n", "PAY_AMT2 30000 non-null int64\n", "PAY_AMT3 30000 non-null int64\n", "PAY_AMT4 30000 non-null int64\n", "PAY_AMT5 30000 non-null int64\n", "PAY_AMT6 30000 non-null int64\n", "dtypes: category(9), int64(14)\n", "memory usage: 3.7 MB\n" ] } ], "source": [ "cc_data_copy.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Changing the type of those 9 features reduced the memory usage from 5.5 MB to 3.7 MB, which is about a 33% reduction in size. In general, the length of time needed for a calculation depend on the memory footprint, so it's important to be mindful of memory usage. Granted this is a fairly small data set and the performance gain will be small, but in data sets that are 10, 100, or 1000+ as large, it can save hours of computation time. **" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dummy Variables\n", "We have to make dummy variables for categorical features so that scikit-learn classifiers can correctly interpret categorical features. The process of making dummy variables takes a categorical feature (eg MARRIAGE, which has 3 categories: [married, single, other]) and makes that one categorical feature column into multiple columns (1 for each unique category, eg [MARRIAGE_married, MARRIAGE_single, MARRIAGE_other]) that contain a 1 if the row is in that new column's category or a 0 if the row is not. \n", "\n", "We see from the printout below that making dummy variables for each unique value in the 9 categorical columns increased the number of columns from 23 to 79. While this will help scikit-learn correctly process these features, adding feature columns adds complexity and noise, which can promote overfitting and hurt a model's predictive power. Let's look at the same model again with this dummified data set." ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "dummy_df = pd.get_dummies(cc_data_copy, columns=cats, drop_first=True)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of feature columns before making dummy variables for categoricals: 23\n", "Number of feature columns after making dummy variables for categoricals: 79\n" ] } ], "source": [ "print('Number of feature columns before making dummy variables for categoricals: {:3}'\n", " .format(cc_data_copy.shape[1])) \n", "print('Number of feature columns after making dummy variables for categoricals: {:3}'\n", " .format(dummy_df.shape[1]))" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(dummy_df, \n", " cc_target,\n", " test_size=0.3,\n", " random_state=nb_seed,\n", " stratify=cc_target)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "steps = [('imputation', Imputer(missing_values='NaN', strategy='most_frequent', axis=0)),\n", " ('r_forest', RandomForestClassifier())]\n", "pipeline = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.94 0.88 7009\n", " 1 0.61 0.32 0.42 1991\n", "\n", "avg / total 0.78 0.80 0.78 9000\n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pipeline.fit(X_train, y_train)\n", "y_pred = pipeline.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(pipeline, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "roc_plotter(pipeline, X_test, y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Observations after generating dummy variables\n", "\n", "The precision in identifying cases of default is 0.61 rather than 0.60, which is a good sign, but it's too small to consider it statistically significant, especially considering we're just looking at 1 model. \n", "\n", "We did increase the number of features fairly significantly, and as there are only 8 columns that would come from previously discussed features (MARRIAGE: [maried, single, other], EDUCATION: [grad_school, university, high_school, other], and SEX: [male, female]) we know that the remaining ~50 columns are from the PAY features. Let's look at the distributions of values for these features." ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pay_cols = ['PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']\n", "\n", "with plt.style.context('seaborn-darkgrid'):\n", " fig, ax = plt.subplots(2,3)\n", " fig.set_size_inches(15,7)\n", " fig.suptitle('Distribution of repayment delays over the past 6 months', fontsize=16)\n", " \n", " for i in range(len(pay_cols)):\n", " row,col = (i//3, i%3)\n", " pay_data = cc_tmp[pay_cols[i]].value_counts()\n", " ax[row,col].bar(pay_data.index, pay_data, align='center', alpha=0.7)\n", " ax[row,col].set_title(pay_cols[i], fontsize=12)\n", " plt.tight_layout(pad=3.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From the data set's documentation, we would not expect to see the values -2 or 0, but per (the Taiwanese professor that collected this data set)[https://www.kaggle.com/uciml/default-of-credit-card-clients-dataset/discussion/34608], -2 indicates no credit usage, and 0 indicates the person uses revolving credit (ie the credit user doesn't fully pay off their debt, but they do make at least the minimum payment). -1 indicates payment in full, and values above 0 indicate the number of months past due.\n", "\n", "We also see that only a vanishingly small part of the population is more than 2 months behind on their bills, so I'll try lumping them into the 2 month bin. As -2 (no credit usage), -1 (pays in full every month), and 0 (revolving credit) seem like distinct types of behavior, I'll leave them as distinct categories." ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "rolldown_vals = [3,4,5,6,7,8,9,10]\n", "# using another copy for convenience while developing\n", "# cc_tmp = cc_data_copy.copy()\n", "for feat in pay_cols:\n", " cc_tmp.loc[cc_data_copy[feat].isin([3,4,5,6,7,8,9]), feat] = 2" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "with plt.style.context('seaborn-darkgrid'):\n", " fig, ax = plt.subplots(2,3)\n", " fig.set_size_inches(15,6)\n", " fig.suptitle('Distribution of repayment delays over the past 6 months', fontsize=16)\n", " \n", " for i in range(len(pay_cols)):\n", " row,col = (i//3, i%3)\n", " pay_data = cc_tmp[pay_cols[i]].value_counts()\n", " ax[row,col].bar(pay_data.index, pay_data, align='center', alpha=0.7)\n", " ax[row,col].set_title(pay_cols[i], fontsize=12)\n", " plt.tight_layout(pad=3.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We see that we've successfully lumped the longer payment delays into the '2' category, which now means 'at least 2 months past due on payment'. I'll try making dummy variables from this smaller pool of categories." ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "cats = ['SEX','EDUCATION','MARRIAGE','PAY_0',\n", " 'PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']\n", "dummy_df = pd.get_dummies(cc_tmp, columns=cats, drop_first=True)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['LIMIT_BAL', 'AGE', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3',\n", " 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2',\n", " 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'SEX_2',\n", " 'EDUCATION_2.0', 'EDUCATION_3.0', 'EDUCATION_4.0', 'MARRIAGE_1',\n", " 'MARRIAGE_2', 'MARRIAGE_3', 'PAY_0_-1', 'PAY_0_0', 'PAY_0_1',\n", " 'PAY_0_2', 'PAY_2_-1', 'PAY_2_0', 'PAY_2_1', 'PAY_2_2', 'PAY_3_-1',\n", " 'PAY_3_0', 'PAY_3_1', 'PAY_3_2', 'PAY_4_-1', 'PAY_4_0', 'PAY_4_1',\n", " 'PAY_4_2', 'PAY_5_-1', 'PAY_5_0', 'PAY_5_2', 'PAY_6_-1', 'PAY_6_0',\n", " 'PAY_6_2'], dtype=object)" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_df.columns.values" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "new_cats = []\n", "for cat in cats:\n", " for col in dummy_df.columns.values:\n", " if cat in col:\n", " new_cats.append(col)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "for cat in new_cats:\n", " dummy_df[cat] = dummy_df[cat].astype('category')" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of feature columns before making dummy variables for categoricals: 23\n", "Number of feature columns after making dummy variables for categoricals: 43\n" ] } ], "source": [ "print('Number of feature columns before making dummy variables for categoricals: {:3}'\n", " .format(cc_tmp.shape[1])) \n", "print('Number of feature columns after making dummy variables for categoricals: {:3}'\n", " .format(dummy_df.shape[1]))" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(dummy_df, \n", " cc_target,\n", " test_size=0.3,\n", " random_state=nb_seed,\n", " stratify=cc_target)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "steps = [('imputation', Imputer(missing_values='NaN', strategy='most_frequent', axis=0)),\n", " ('r_forest', RandomForestClassifier())]\n", "pipeline = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.94 0.88 7009\n", " 1 0.61 0.33 0.43 1991\n", "\n", "avg / total 0.78 0.80 0.78 9000\n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pipeline.fit(X_train, y_train)\n", "y_pred = pipeline.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(pipeline, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "roc_plotter(pipeline, X_test, y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Again, we don't see a statistically significant improvement, but the data is in a better form for further analysis.**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Handling Imbalanced Classes (SMOTE)\n", "\n", "As we noted earlier, the number of observations where the credit-user defaulted is much, much smaller than the number where the credit-user did not default. This imbalance makes it more difficult for most machine learning algorithms to correctly classify cases. To address this, we can use an **oversampling** or **undersampling** algorithm to either (respectively) add more examples of the scarce class (credit default), or remove examples of the abundant class (no credit default). There are a number of over/under-sampling algorithms, and the more effective algorithms tend to be somewhat complex, but their impact can be incredible. \n", "\n", "**Synthetic Minority Oversampling TEchnique** (**SMOTE**) is one of the most popular oversampling algorithms. Without going into too much detain, the SMOTE algorithm creates synthetic examples to add to the data set using a process that doesn't bias the data set. You can read more about the [SMOTE algorithm in its whitepaper](https://arxiv.org/pdf/1106.1813.pdf), or see below for more detail." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### General SMOTE algorithm (semi-technical)\n", "Synthetic examples for the minority class are generated by taking each minority example (ie the row of values for each credit-default example), calculating the difference between that example of its nearest neighbor (ie the row of values most similar to that minority example), and multiply this difference by a random number between 0 and 1. If you need to generate more samples, you can repeat this process with the 2nd nearest neighbor, 3rd, 4th, etc.\n", "\n", "This creates reasonable examples that are within the domain of the existing minority class data." ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "cc_data_copy = dummy_df" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(dummy_df, \n", " cc_target,\n", " test_size=0.3,\n", " random_state=nb_seed,\n", " stratify=cc_target)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.93 0.88 7009\n", " 1 0.59 0.34 0.43 1991\n", "\n", "avg / total 0.78 0.80 0.78 9000\n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sm = SMOTE(random_state=nb_seed)\n", "X_test_sm, y_test_sm = sm.fit_sample(X_test, y_test)\n", "X_train_sm, y_train_sm = sm.fit_sample(X_train, y_train)\n", "\n", "clf_rf = RandomForestClassifier()\n", "clf_rf.fit(X_train_sm, y_train_sm)\n", "y_pred = clf_rf.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(clf_rf, X_train_sm, X_test_sm, y_train_sm, y_test_sm, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "roc_plotter(clf_rf, X_test_sm, y_test_sm)" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [], "source": [ "# y_train" ] }, { "cell_type": "code", "execution_count": 181, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\mattt\\Anaconda3\\envs\\py36\\lib\\site-packages\\ipykernel\\__main__.py:11: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "X_train_fold.shape: (13999, 43)\n", "y_train_fold.shape: (13999,)\n", "[ 0.5 0.4 0.2 ..., 0.1 0.1 0.2]\n", "0.1065562062562491\n", "X_train_fold.shape: (14000, 43)\n", "y_train_fold.shape: (14000,)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\mattt\\Anaconda3\\envs\\py36\\lib\\site-packages\\ipykernel\\__main__.py:11: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[ 0.6 0.2 0.2 ..., 0.1 0.8 0.1]\n", "0.10557142857142857\n", "X_train_fold.shape: (14001, 43)\n", "y_train_fold.shape: (14001,)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\mattt\\Anaconda3\\envs\\py36\\lib\\site-packages\\ipykernel\\__main__.py:11: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[ 0.6 0.2 0.1 ..., 0.2 0.6 0. ]\n", "0.10230032861837406\n" ] } ], "source": [ "clf_ = clf_rf\n", "n=3\n", "reps=1\n", "thresh=0.5\n", "skfolds = RepeatedStratifiedKFold(n_splits=n, n_repeats=reps, random_state=nb_seed)\n", "for train_index, test_index in skfolds.split(X_train, y_train):\n", " clone_clf = clone(clf_)\n", " X_train_fold = X_train.iloc[train_index.tolist()]\n", " y_train_fold = (y_train.iloc[train_index])\n", " X_test_fold = X_train.iloc[test_index]\n", " y_test_fold = (y_train.loc[test_index])\n", " print('X_train_fold.shape: {}'.format(X_train_fold.shape))\n", " print('y_train_fold.shape: {}'.format(y_train_fold.shape))\n", " sm = SMOTE(random_state=nb_seed)\n", " X_train_fold_sm, y_train_fold_sm = sm.fit_sample(X_train_fold, y_train_fold)\n", " clone_clf.fit(X_train_fold_sm, y_train_fold_sm)\n", " y_pred = clone_clf.predict_proba(X_test_fold)[:,1]\n", "# y_pred = 1 if y_pred >= thresh else 0\n", " print(y_pred)\n", " n_correct = sum(y_pred == y_test_fold)\n", " print(n_correct / len(y_pred))" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0.7615356 , 0.76936637, 0.80273182, 0.99119745, 0.99172295,\n", " 0.99360492, 0.99187517, 0.99144685, 0.9899365 , 0.99219856])" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# smote_pipe = make_imb_pipeline(SMOTE(), RandomForestClassifier())\n", "scores = cross_val_score(RandomForestClassifier(), X_train_sm, y_train_sm, cv=10, scoring='roc_auc')\n", "scores" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'[ 1 2 3 ..., 20997 20998 20999] not in index'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtrain_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_index\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mskfolds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mclone_clf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclone\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclf_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mX_train_fold\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtrain_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0my_train_fold\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtrain_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mX_test_fold\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtest_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2131\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2132\u001b[0m \u001b[1;31m# either boolean or fancy integer index\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2133\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2134\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2135\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_getitem_array\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2175\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2176\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2177\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2179\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[1;34m(self, obj, axis, is_setter)\u001b[0m\n\u001b[0;32m 1267\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1268\u001b[0m raise KeyError('{mask} not in index'\n\u001b[1;32m-> 1269\u001b[1;33m .format(mask=objarr[mask]))\n\u001b[0m\u001b[0;32m 1270\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1271\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_values_from_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyError\u001b[0m: '[ 1 2 3 ..., 20997 20998 20999] not in index'" ] } ], "source": [ "clf_ = clf_rf\n", "n=5\n", "reps=2\n", "skfolds = RepeatedStratifiedKFold(n_splits=n, n_repeats=reps, random_state=nb_seed)\n", "for train_index, test_index in skfolds.split(X_train, y_train):\n", " clone_clf = clone(clf_)\n", " X_train_fold = X_train[train_index]\n", " y_train_fold = (y_train[train_index])\n", " X_test_fold = X_train[test_index]\n", " y_test_fold = (y_train[test_index])\n", " \n", " sm = SMOTE(random_state=nb_seed)\n", " X_train_fold_sm, y_train_fold_sm = sm.fit_sample(X_train_fold, y_train_fold)\n", " clone_clf.fit(X_train_fold_sm, y_train_fold_sm)\n", " y_pred = clone_clf.predict(X_test_fold)\n", " n_correct = sum(y_pred == y_test_fold)\n", " print(n_correct / len(y_pred))" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [], "source": [ "# X_train" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "roc_plotter(clf_rf, X_test_sm, y_test_sm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "parametersparamete = {\n", " 'criterion': ['entropy','gini'],\n", " 'max_depth': [15,25,35],\n", " 'min_samples_leaf': [1,4,6],\n", " 'min_samples_split': [2,4],\n", " 'random_state': [123]\n", "}\n", "\n", "dt_clf = tree.DecisionTreeClassifier()\n", "\n", "dt_clf_gs = GridSearchCV(dt_clf, parameters, verbose=1, cv=5, n_jobs=-1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train_index.shape: (23999,)\n", "test_index.shape: (6001,)\n", "train_index.shape: (24000,)\n", "test_index.shape: (6000,)\n", "train_index.shape: (24000,)\n", "test_index.shape: (6000,)\n", "train_index.shape: (24000,)\n", "test_index.shape: (6000,)\n", "train_index.shape: (24001,)\n", "test_index.shape: (5999,)\n" ] } ], "source": [ "skfolds = StratifiedKFold(n_splits=5, random_state=nb_seed)\n", "\n", "for train_index, test_index in skfolds.split(cc_data_copy, cc_target):\n", " print('train_index.shape: {}'.format(train_index.shape))\n", " print('test_index.shape: {}'.format(test_index.shape))" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 30000 entries, 1 to 30000\n", "Data columns (total 43 columns):\n", "LIMIT_BAL 30000 non-null int64\n", "AGE 30000 non-null int64\n", "BILL_AMT1 30000 non-null int64\n", "BILL_AMT2 30000 non-null int64\n", "BILL_AMT3 30000 non-null int64\n", "BILL_AMT4 30000 non-null int64\n", "BILL_AMT5 30000 non-null int64\n", "BILL_AMT6 30000 non-null int64\n", "PAY_AMT1 30000 non-null int64\n", "PAY_AMT2 30000 non-null int64\n", "PAY_AMT3 30000 non-null int64\n", "PAY_AMT4 30000 non-null int64\n", "PAY_AMT5 30000 non-null int64\n", "PAY_AMT6 30000 non-null int64\n", "SEX_2 30000 non-null category\n", "EDUCATION_2.0 30000 non-null category\n", "EDUCATION_3.0 30000 non-null category\n", "EDUCATION_4.0 30000 non-null category\n", "MARRIAGE_1 30000 non-null category\n", "MARRIAGE_2 30000 non-null category\n", "MARRIAGE_3 30000 non-null category\n", "PAY_0_-1 30000 non-null category\n", "PAY_0_0 30000 non-null category\n", "PAY_0_1 30000 non-null category\n", "PAY_0_2 30000 non-null category\n", "PAY_2_-1 30000 non-null category\n", "PAY_2_0 30000 non-null category\n", "PAY_2_1 30000 non-null category\n", "PAY_2_2 30000 non-null category\n", "PAY_3_-1 30000 non-null category\n", "PAY_3_0 30000 non-null category\n", "PAY_3_1 30000 non-null category\n", "PAY_3_2 30000 non-null category\n", "PAY_4_-1 30000 non-null category\n", "PAY_4_0 30000 non-null category\n", "PAY_4_1 30000 non-null category\n", "PAY_4_2 30000 non-null category\n", "PAY_5_-1 30000 non-null category\n", "PAY_5_0 30000 non-null category\n", "PAY_5_2 30000 non-null category\n", "PAY_6_-1 30000 non-null category\n", "PAY_6_0 30000 non-null category\n", "PAY_6_2 30000 non-null category\n", "dtypes: category(29), int64(14)\n", "memory usage: 4.3 MB\n" ] } ], "source": [ "cc_data_copy.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'EDUCATION'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2524\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2525\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2526\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", "\u001b[1;31mKeyError\u001b[0m: 'EDUCATION'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtmp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcc_data_copy\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'EDUCATION'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m edu_dict = {1:'Grad School',\n\u001b[0;32m 3\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'University'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'High School'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;36m4\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Other'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2137\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2138\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2139\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2140\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2141\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_getitem_column\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2144\u001b[0m \u001b[1;31m# get column\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2145\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2146\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2147\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2148\u001b[0m \u001b[1;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_get_item_cache\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 1840\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1841\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1842\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1843\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1844\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, item, fastpath)\u001b[0m\n\u001b[0;32m 3841\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3842\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3843\u001b[1;33m \u001b[0mloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3844\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3845\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0misna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2525\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2526\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2527\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2528\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2529\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", "\u001b[1;31mKeyError\u001b[0m: 'EDUCATION'" ] } ], "source": [ "tmp = cc_data_copy['EDUCATION']\n", "edu_dict = {1:'Grad School',\n", " 2:'University',\n", " 3:'High School',\n", " 4:'Other',\n", " 5:'unknown',\n", " 6:'unknown',\n", " 0:'unknown'}\n", "tmp = tmp.replace({'EDUCATION': edu_dict})\n", "\n", "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(figsize=(8,6.5))\n", " sns.countplot(x='EDUCATION', data=tmp, ax=ax)\n", " ax.set_ylabel('Count', fontsize=14)\n", " ax.set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax.set_xlabel('Default-on-next-payment-cycle status', fontsize=14)\n", " ax.set_title('Defaulted on CC Payment', fontsize=16)\n", " ax.legend(loc='upper right', fontsize=14)\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pipeline = make_pipeline(Imputer(missing_values='NaN', strategy='most_frequent', axis=0),\n", " RandomForestClassifier(random_state=nb_seed))\n", "model = pipeline.fit(X_train, y_train)\n", "prediction = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(model, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "roc_plotter(model, X_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "smote_pipeline = make_imbPipeline(Imputer(missing_values='NaN', strategy='most_frequent', axis=0),\n", " SMOTE(random_state=nb_seed),\n", " RandomForestClassifier(random_state=nb_seed))\n", "smote_model = smote_pipeline.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = smote_model.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(smote_model, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sm = SMOTE(random_state=nb_seed)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "steps = [('imputation', Imputer(missing_values='NaN', strategy='most_frequent', axis=0)),\n", " ('smote', SMOTE(random_state=nb_seed)),\n", " ('r_forest', RandomForestClassifier())]\n", "smote_pipeline = imbPipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "confusion_matrix(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "smote_pipeline.fit(X_train, y_train)\n", "y_pred = smote_pipeline.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(smote_pipeline, X_train, X_test, y_train, y_test, ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "roc_plotter(smote_pipeline, X_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "confusion_matrix(y_test, y_pred_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from imblearn.metrics import classification_report_imbalanced" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(classification_report(y_test, prediction))\n", "print(classification_report_imbalanced(y_test, smote_prediction))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "\n", "# cross validation done right\n", "kf = KFold(n_splits=5, random_state=nb_seed)\n", "accuracy = []\n", "precision = []\n", "recall = []\n", "f1 = []\n", "auc = []\n", "for train, test in kf.split(X_train, y_train):\n", " pipeline = make_pipeline_imb(SMOTE(), classifier(random_state=nb_seed))\n", " model = pipeline.fit(X_train[train], y_train[train])\n", " prediction = model.predict(X_train[test])\n", "\n", " accuracy.append(pipeline.score(X_train[test], y_train[test]))\n", " precision.append(precision_score(y_train[test], prediction))\n", " recall.append(recall_score(y_train[test], prediction))\n", " f1.append(f1_score(y_train[test], prediction))\n", " auc.append(roc_auc_score(y_train[test], prediction))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "pipeline.fit(X_train, y_train)\n", "y_pred = pipeline.predict(X_test)\n", "print(classification_report(y_test, y_pred))\n", "confusion_mat_plotter(confusion_matrix(y_test, y_pred), ['No Default', 'Default'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# y_pred_prob = pipeline.predict_proba(X_test)[:,1]\n", "# fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob) \n", "# roc_plotter(fpr, tpr, thresholds)\n", "roc_plotter(pipeline, X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# y_pred_prob = pipeline.predict_proba(X_test)[:,1]\n", "# fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob) \n", "# roc_plotter(fpr, tpr, thresholds)\n", "roc_plotter(pipeline, X_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# tmp.loc[(tmp['EDUCATION'] == 'Other'),'EDUCATION'] = np.nan\n", "type(fpr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# \n", "def roc_plotterz(ax_, fprs_, tprs_, threshs_, clf_type, pp_steps, DEBUG=False):\n", " tpr_set = []\n", " aucs = []\n", " mean_fpr = np.linspace(0, 1, 100)\n", " if DEBUG:\n", " print('fprs_.shape: {}'.format(fprs_.shape))\n", " print('tprs_.shape: {}'.format(tprs_.shape))\n", " print('threshs_.shape: {}'.format(threshs_.shape))\n", " for i in range(len(fprs_)):\n", "# ax_.plot(fprs_[i], tprs_[i], lw=1, alpha=0.3)\n", " tpr_set.append(interp(mean_fpr, fprs_[i], tprs_[i]))\n", " tpr_set[-1][0] = 0.0\n", " roc_auc = auc(fprs_[i], tprs_[i])\n", " aucs.append(roc_auc)\n", " lc = colorline(ax_, fprs_[i], tprs_[i], threshs_[i])\n", " if DEBUG:\n", " print('[ROC plotter debug]: \\ni: {}\\nfprs_[i]: {}\\ntprs_[i]: {}\\nthreshs_[i]: {}'\n", " .format(i, fprs_[i], tprs_[i], threshs_[i]))\n", " mean_tpr = np.mean(tpr_set, axis=0)\n", " mean_tpr[-1] = 1.0\n", " mean_auc = auc(mean_fpr, mean_tpr)\n", " std_auc = np.std(aucs)\n", " std_tpr = np.std(tpr_set, axis=0)\n", " tprs_upper = np.minimum(mean_tpr + std_tpr, 1)\n", " tprs_lower = np.maximum(mean_tpr - std_tpr, 0)\n", " ax_.plot(mean_fpr, mean_tpr, color='k',\n", " label='Mean ROC (AUC = {:0.2f} $\\pm$ {:0.2f})'\n", " .format(mean_auc, std_auc), lw=1, alpha=0.8)\n", " ax_.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', \n", " alpha=.2, label='$\\pm$ 1 std. dev.')\n", " ax_.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', \n", " label='Random', alpha=.8)\n", " ax_.legend(loc='best', fancybox=True, framealpha=0.4)\n", " ax_.set_title('ROC Curve: Base Clf: [{}] \\n Steps: [{}]'\n", " .format(clf_type, pp_steps), fontsize=14)\n", " ax_.set_ylabel('True Positive Rate', fontsize=14)\n", " ax_.set_xlabel('False Positive Rate', fontsize=14)\n", " ax_.set_xlim([0.0,1.0])\n", " ax_.set_ylim([0.0,1.0])\n", " divider = make_axes_locatable(ax_)\n", " fig = ax_.get_figure()\n", " cax = divider.append_axes('right', size='5%', pad=0.05)\n", " fig.colorbar(lc, cax=cax, orientation='vertical')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(figsize=(8, 6))\n", " sns.countplot(x='SEX', data=cc_raw, ax=ax)\n", " ax.set_ylabel('Count', fontsize=12)\n", "# ax.set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax.set_xlabel('Default-on-next-payment-cycle status')\n", " ax.set_title('Defaulted on CC Payment')\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(nrows=1, ncols=2,figsize=(12,5))\n", " sns.countplot(x='default_payment_next_month', data=cc_raw, ax=ax[0])\n", " ax[0].set_ylabel('Count', fontsize=12)\n", " ax[0].set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax[0].set_xlabel('Default-on-next-payment-cycle status')\n", " ax[0].set_title('Defaulted on CC Payment')\n", " sns.countplot(x='AGE', data=cc_raw, hue='default_payment_next_month', ax=ax[1])\n", " ax[1].set_xlabel('Age [years]', fontsize=12)\n", " ax[1].set_ylabel('Count', fontsize=12)\n", " ax[1].set_xticklabels(ax[1].get_xticklabels(), rotation=90, ha='right', fontsize=7.5)\n", " ax[1].legend(loc='upper right', title='Defaulted on CC Payment')\n", " ax[1].set_title('Age Distribution')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with plt.style.context('seaborn-whitegrid'):\n", " fig, ax = plt.subplots(nrows=4, ncols=2,figsize=(15,25))\n", " sns.countplot(x='default_payment_next_month', data=cc_raw, ax=ax[0,0])\n", " ax[0,0].set_ylabel('Count', fontsize=12)\n", " ax[0,0].set_xticklabels(['Does not default (0)', 'Defaults (1)'])\n", " ax[0,0].set_xlabel('Default-on-next-payment-cycle status')\n", " ax[0,0].set_title('Defaulted on CC Payment')\n", " sns.countplot(x='AGE', data=cc_raw, hue='default_payment_next_month', ax=ax[0,1])\n", " ax[0,1].set_xlabel('Age [years]', fontsize=12)\n", " ax[0,1].set_ylabel('Count', fontsize=12)\n", " ax[0,1].set_xticklabels(ax[0,1].get_xticklabels(), rotation=90, ha='right', fontsize=7.5)\n", " ax[0,1].legend(loc='upper right', title='Defaulted on CC Payment')\n", " ax[0,1].set_title('Age Distribution')\n", " sns.countplot(x='MARRIAGE', data=cc_raw, hue='default_payment_next_month', ax=ax[1,0])\n", " ax[1,0].set_xlabel('Married status', fontsize=12)\n", " ax[1,0].set_xticklabels(['Missing', 'Married', 'Single', 'Other'])\n", " ax[1,0].set_ylabel('Count', fontsize=12)\n", " ax[1,0].set_title('Marriage Distribution (1=Married, 2=Single, 3=Other)')\n", " sns.countplot(x='EDUCATION', data=cc_raw, hue='default_payment_next_month', ax=ax[1,1])\n", " ax[1,1].set_xlabel('', fontsize=12)\n", " ax[1,1].set_xticklabels(['Missing','Grad School','University','High School','Other','Missing','Missing'],\n", " rotation=30)\n", " ax[1,1].set_ylabel('Count', fontsize=12)\n", " ax[1,1].set_title('Highest Completed Education Level Distribution') \n", " \n", " sns.countplot(x='SEX', data=cc_raw, hue='default_payment_next_month', ax=ax[2,0])\n", " ax[2,0].set_xlabel('Gender', fontsize=12)\n", " ax[2,0].set_xticklabels(['Male', 'Female'])\n", " ax[2,0].set_ylabel('Count', fontsize=12)\n", " ax[2,0].set_title('Gender')\n", " # Not including a number of bins, distplot uses the Freedman-Diaconis rule by default\n", " sns.distplot(cc_raw['LIMIT_BAL'], norm_hist=False, kde=False, ax=ax[2,1])\n", " ax[2,1].set_xlabel('Credit Limit (consumer and family credit) [NT dollars]', fontsize=12)\n", " ax[2,1].set_ylabel('Count', fontsize=12)\n", " ax[2,1].set_title('Histogram of Credit Limits')\n", " sns.distplot(cc_raw['LIMIT_BAL'][cc_raw['default_payment_next_month']==0], \n", " norm_hist=False, kde=False, ax=ax[3,0])\n", " ax[3,0].set_xlabel('Credit Limit (consumer and family credit) [NT dollars]', fontsize=12)\n", " ax[3,0].set_ylabel('Count', fontsize=12)\n", " ax[3,0].set_title('Histogram of Credit Limits For Observations that Did Not Default')\n", " sns.distplot(cc_raw['LIMIT_BAL'][cc_raw['default_payment_next_month']==1], \n", " norm_hist=False, kde=False, ax=ax[3,1])\n", " ax[3,1].set_xlabel('Credit Limit (consumer and family credit) [NT dollars]', fontsize=12)\n", " ax[3,1].set_ylabel('Count', fontsize=12)\n", " ax[3,1].set_title('Histogram of Credit Limits For Observations that Defaulted')\n", "plt.tight_layout" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from collections import Counter\n", "from imblearn.datasets import fetch_datasets\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import make_pipeline\n", "from imblearn.pipeline import make_pipeline as make_pipeline_imb\n", "from imblearn.over_sampling import SMOTE\n", "from imblearn.under_sampling import NearMiss\n", "from imblearn.metrics import classification_report_imbalanced\n", "from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report\n", "from sklearn.ensemble import RandomForestClassifier\n", "import numpy as np\n", "\n", "\n", "def print_results(headline, true_value, pred):\n", " print(headline)\n", " print(\"accuracy: {}\".format(accuracy_score(true_value, pred)))\n", " print(\"precision: {}\".format(precision_score(true_value, pred)))\n", " print(\"recall: {}\".format(recall_score(true_value, pred)))\n", " print(\"f1: {}\".format(f1_score(true_value, pred)))\n", "\n", "\n", "# our classifier to use\n", "classifier = RandomForestClassifier\n", "\n", "data = fetch_datasets()['wine_quality']\n", "\n", "# splitting data into training and test set\n", "X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'], random_state=2)\n", "\n", "\n", "# build normal model\n", "pipeline = make_pipeline(classifier(random_state=42))\n", "model = pipeline.fit(X_train, y_train)\n", "prediction = model.predict(X_test)\n", "\n", "# build model with SMOTE imblearn\n", "smote_pipeline = make_pipeline_imb(SMOTE(random_state=4), classifier(random_state=42))\n", "smote_model = smote_pipeline.fit(X_train, y_train)\n", "smote_prediction = smote_model.predict(X_test)\n", "\n", "# build model with undersampling\n", "nearmiss_pipeline = make_pipeline_imb(NearMiss(random_state=42), classifier(random_state=42))\n", "nearmiss_model = nearmiss_pipeline.fit(X_train, y_train)\n", "nearmiss_prediction = nearmiss_model.predict(X_test)\n", "\n", "\n", "\n", "# print information about both models\n", "print()\n", "print(\"normal data distribution: {}\".format(Counter(data['target'])))\n", "X_smote, y_smote = SMOTE().fit_sample(data['data'], data['target'])\n", "print(\"SMOTE data distribution: {}\".format(Counter(y_smote)))\n", "X_nearmiss, y_nearmiss = NearMiss().fit_sample(data['data'], data['target'])\n", "print(\"NearMiss data distribution: {}\".format(Counter(y_nearmiss)))\n", "\n", "# classification report\n", "print(classification_report(y_test, prediction))\n", "print(classification_report_imbalanced(y_test, smote_prediction))\n", "\n", "print()\n", "print('normal Pipeline Score {}'.format(pipeline.score(X_test, y_test)))\n", "print('SMOTE Pipeline Score {}'.format(smote_pipeline.score(X_test, y_test)))\n", "print('NearMiss Pipeline Score {}'.format(nearmiss_pipeline.score(X_test, y_test)))\n", "\n", "\n", "print()\n", "print_results(\"normal classification\", y_test, prediction)\n", "print()\n", "print_results(\"SMOTE classification\", y_test, smote_prediction)\n", "print()\n", "print_results(\"NearMiss classification\", y_test, nearmiss_prediction)\n", "\n", "\n", "from sklearn.model_selection import KFold\n", "\n", "# cross validation done right\n", "kf = KFold(n_splits=5, random_state=42)\n", "accuracy = []\n", "precision = []\n", "recall = []\n", "f1 = []\n", "auc = []\n", "for train, test in kf.split(X_train, y_train):\n", " pipeline = make_pipeline_imb(SMOTE(), classifier(random_state=42))\n", " model = pipeline.fit(X_train[train], y_train[train])\n", " prediction = model.predict(X_train[test])\n", "\n", " accuracy.append(pipeline.score(X_train[test], y_train[test]))\n", " precision.append(precision_score(y_train[test], prediction))\n", " recall.append(recall_score(y_train[test], prediction))\n", " f1.append(f1_score(y_train[test], prediction))\n", " auc.append(roc_auc_score(y_train[test], prediction))\n", "\n", "print()\n", "print(\"done right mean of scores 5-fold:\")\n", "print(\"accuracy: {}\".format(np.mean(accuracy)))\n", "print(\"precision: {}\".format(np.mean(precision)))\n", "print(\"recall: {}\".format(np.mean(recall)))\n", "print(\"f1: {}\".format(np.mean(f1)))\n", "print()\n", "\n", "# cross validation done wrong\n", "kf = KFold(n_splits=5, random_state=42)\n", "accuracy = []\n", "precision = []\n", "recall = []\n", "f1 = []\n", "auc = []\n", "X, y = SMOTE().fit_sample(X_train, y_train)\n", "for train, test in kf.split(X, y):\n", " pipeline = make_pipeline(classifier(random_state=42))\n", " model = pipeline.fit(X[train], y[train])\n", " prediction = model.predict(X[test])\n", "\n", " accuracy.append(pipeline.score(X[test], y[test]))\n", " precision.append(precision_score(y[test], prediction))\n", " recall.append(recall_score(y[test], prediction))\n", " f1.append(f1_score(y[test], prediction))\n", "\n", "print(\"done wrong mean of scores 5-fold:\")\n", "print(\"accuracy: {}\".format(np.mean(accuracy)))\n", "print(\"precision: {}\".format(np.mean(precision)))\n", "print(\"recall: {}\".format(np.mean(recall)))\n", "print(\"f1: {}\".format(np.mean(f1)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:py36]", "language": "python", "name": "conda-env-py36-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }