{ "cells": [ { "cell_type": "markdown", "id": "minute-lender", "metadata": {}, "source": [ "
\n", " MSDS 7333 Spring 2021: Case Study 07 \n", "
\n", "
\n", "
\n", " Dollar Cost Minimization \n", "
\n", "\n", "
\n", "
\n", " Sachin Chavan,Tazeb Abera, Gautam Kapila, Sandesh Ojha \n", "
" ] }, { "cell_type": "markdown", "id": "reflected-outdoors", "metadata": { "toc": true }, "source": [ "

Table of Contents

\n", "
" ] }, { "cell_type": "markdown", "id": "actual-desktop", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "basic-abuse", "metadata": {}, "source": [ "[click here](https://nbviewer.jupyter.org/github/sachinac/MSDS7333/blob/main/case_study07/msds_qtw_case_study07.ipynb) to view notebook on nbviewer." ] }, { "cell_type": "markdown", "id": "underlying-logging", "metadata": {}, "source": [ "# Import modules\n", "\n", "Import all modules required for this notebook. This is the only place where all modules are imported.\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "lovely-england", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import missingno as msno \n", "import plotly.express as px\n", "import pickle\n", "from os import path\n", "from matplotlib import pyplot\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from matplotlib.patches import Rectangle\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import RepeatedStratifiedKFold\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.feature_selection import RFE\n", "from sklearn.metrics import precision_recall_curve\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import auc\n", "from sklearn.metrics import det_curve\n", "\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.decomposition import PCA\n", "\n", "from sklearn.inspection import permutation_importance\n", "\n", "from sklearn.metrics import make_scorer\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import plot_confusion_matrix\n", "from mlxtend.evaluate import confusion_matrix" ] }, { "cell_type": "markdown", "id": "handed-mobile", "metadata": {}, "source": [ "# Function definitions" ] }, { "cell_type": "markdown", "id": "stock-smart", "metadata": {}, "source": [ "This code segment aligns all plots at the center." ] }, { "cell_type": "code", "execution_count": 2, "id": "annoying-slave", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.core.display import HTML\n", "HTML(\"\"\"\n", "\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "coordinated-supply", "metadata": {}, "outputs": [], "source": [ "# Function to visualize accuracy, f1, precision, recall obtained from grid search.\n", "\n", "def viz_hyperparameter(res,X_axis,xlabel,scoring,xlim,ylim,plot_title):\n", " plt.figure(figsize=(10, 8))\n", " plt.title(\"GridSearchCV evaluating using multiple scorers simultaneously\"+plot_title, fontsize=12)\n", " \n", " plt.xlabel(xlabel) \n", " plt.ylabel(\"Score\")\n", "\n", " ax = plt.gca()\n", " ax.set_xlim(xlim[0], xlim[1])\n", " ax.set_ylim(ylim[0], ylim[1])\n", "\n", "\n", " for scorer, color in zip(sorted(scoring), ['g', 'k','b','r','c']):\n", " \n", " sample = 'test'\n", " style = '-'\n", " \n", " sample_score_mean = res['mean_%s_%s' % (sample, scorer)]\n", " sample_score_std = res['std_%s_%s' % (sample, scorer)]\n", "\n", " \n", " ax.fill_between(X_axis, sample_score_mean - sample_score_std,\n", " sample_score_mean + sample_score_std,\n", " alpha=0.1, color=color)\n", "\n", " ax.plot(X_axis, sample_score_mean, style, color=color,\n", " alpha=0.5,label=\"%s (%s)\" % (scorer, sample))\n", "\n", " best_index = np.nonzero(res['rank_test_%s' % scorer] == min(res['rank_test_%s' % scorer]))[0][0]\n", " best_score = res['mean_test_%s' % scorer][best_index]\n", "\n", " #Plot a dotted vertical line at the best score for that scorer marked by x\n", " \n", " ax.plot([X_axis[best_index], ] * 2, [0, best_score],\n", " linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)\n", "\n", " # Annotate the best score for that scorer\n", " ax.annotate(\"%0.2f\" % best_score, (X_axis[best_index], best_score + 0.005))\n", "\n", " plt.legend(loc=\"best\")\n", " plt.grid(False)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 4, "id": "digital-publicity", "metadata": {}, "outputs": [], "source": [ "# Function definition to convert probability of high occurance to binary class labels '0' or '1', based on an input thrshold\n", "\n", "def to_labels(pos_probs, threshold):\n", " return (pos_probs >= threshold).astype('int')" ] }, { "cell_type": "code", "execution_count": 5, "id": "framed-favor", "metadata": {}, "outputs": [], "source": [ "# Plots Classification Metrics (Accuracy, False Negative Rate, False Positive Rate) and Classification $ Cost impact \n", "# vs classification threshold\n", "# Inputs are y_test, y_probability_score_for_high,threshold_interest\n", "# Threshold_interest is probability threshold of interest for most optimal model performance, given trade-offs between \n", "# model $cost and classification erros. It is manual entry for visual display in the plot.\n", "\n", "def plot_metrics_cost_vs_threshold(y_test_ref,y_prob_high_score,threshold_interest,cost_matrix):\n", " knn_fpr, knn_fnr, knn_thresholds = det_curve(y_test_ref, y_prob_high_score)\n", " score = [confusion_matrix(y_target=y_test, y_predicted=to_labels(y_prob_high_score, t)) for t in knn_thresholds]\n", " cost_list =[]\n", " acc_list = []\n", "\n", " for nscore in score:\n", " fp = nscore[0][1]\n", " fn = nscore[1][0]\n", " tp = nscore[1][1]\n", " tn = nscore[0][0]\n", " total = nscore.sum()\n", " acc = (tp+tn)/total\n", "\n", " cost_score = np.sum(np.multiply(nscore, cost_matrix))\n", "\n", " cost_list.append(cost_score)\n", " acc_list.append(acc)\n", " \n", " xloc, yloc = threshold_interest, -0.1\n", " fig, ax1 = plt.subplots()\n", "\n", " color = 'k'\n", " ax1.set_xlabel('Threshold')\n", " ax1.set_ylabel('Metrics', color=color)\n", " ax1.plot(knn_thresholds, knn_fpr, '-.',color= color, label='fpr',linewidth=3)\n", " ax1.plot(knn_thresholds, knn_fnr, '--o',color= color, label='fnr',linewidth=3)\n", " ax1.plot(knn_thresholds, acc_list,'--o',color= 'g', label='acc',linewidth=3)\n", " ax1.add_patch(Rectangle((xloc, yloc),0.1, 1.1, facecolor=\"yellow\", alpha = 0.1))\n", " ax1.tick_params(axis='y', labelcolor=color)\n", " ax1.legend(loc=0)\n", "\n", " ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis\n", "\n", " color = 'tab:red'\n", " ax2.set_ylabel('$ cost', color=color) # we already handled the x-label with ax1\n", " ax2.plot(knn_thresholds, cost_list, 's:',color= color, label='cost')\n", " ax2.tick_params(axis='y', labelcolor=color)\n", "\n", " ax2.legend(loc=0)\n", " fig.tight_layout() # otherwise the right y-label is slightly clipped\n", " plt.show()\n", " \n", " print('$ Cost and Classification Error Rates for different classification thresholds:')\n", " dfs = pd.DataFrame(list(zip(knn_thresholds, cost_list,knn_fnr,knn_fpr,acc_list)),columns =['Threshold', '$Cost', 'FNr','FPr','Acc'])\n", " print(dfs)" ] }, { "cell_type": "markdown", "id": "structured-portsmouth", "metadata": {}, "source": [ "# Inroduction\n", "\n", "This case study is about demonstrating data science skills to build and evaluate mathematical models for a real-world dataset while minimizing dollar amount cost. Models shall be developed for the given dataset and compared with respect to dollar amount based on appropriate evaluation metric. Evaluation metric shall be determined after exploring features provided in the data and distribution of class labels shall be studied to decide on metric to be used for model evaluation.\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "stretch-privilege", "metadata": {}, "source": [ "# Business Understanding\n", "\n", "\n", "A Business client has reached out to us with an unknown dataset with many features and would like us to build a model that balances minimizing cost to the business, with respect to the model being accurate enough to apply to their day-to-day business.\n", "\n", "Here is how the decision cost breaks down:\n", "\n", " \n", " - True Positive - $0 \n", " \n", " - True Negative - $0\n", " \n", " - False Positive - $10\n", " \n", " - False Negative - $500\n", "\n", "Incorrectly classifying the data can get costly as you can see above. Correctly classified data does not cost the client any money. False Positive or classifying the data as true when its false costs the client \\\\$10. False Negative or classifying the data as false when it is true costs the client $500. We will need to factor in the cost of each of the features and their associated values during the model building to minimize the cost.\n", "For this analysis, we will compare recall or the true positive rate for each model analyzed. Recall or the true positive rate is the number of positive samples that are correctly classified as ‘positive’. If all of them are identified correctly, then recall will be 1. If all of them were classified incorrectly, then recall will be 0. With some positive samples classified as negative, recall with be in between 0 and 1.\n", "The client requires us to provide a detailed cost benefit analysis of the models we are proposing and provide comparative analysis of models if more than one models are proposed.\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "binary-seafood", "metadata": {}, "source": [ "# Data Evaluation / Engineering\n", "\n", "This section provides insights about datasets as follows\n", "\n", "- Indentifies categorical and numerical features\n", "- Data type conversion as required\n", "- Missing value analysis\n", "- Makes assumptions and limitations if any\n" ] }, { "cell_type": "markdown", "id": "electrical-horse", "metadata": {}, "source": [ "## Load Data\n", "\n", "Dataset was downloaded from the [SMU cloud](https://smu.box.com/s/k9x192jxm39enjw2wx8ouw2kopx33l32). The dataset contains 160K observations, 50 independent variables and 1 target variable. Target variable contains value 0s and 1s only this is going to be binary classification problem.\n", "\n", "Summary is as follows:\n", "\n", "- 50 indepedent variables\n", "- 1 target variables\n", "- 160K observations\n", "- 3 categorical variables 47 continuous variables\n", "- It contains few missing values that needs to be investigated for elimination or imputation\n", "\n", "Further analysis is conducted in next sections." ] }, { "cell_type": "code", "execution_count": 6, "id": "precise-growth", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x0x1x2x3x4x5x6x7x8x9...x41x42x43x44x45x46x47x48x49y
0-0.166563-3.9615884.6211132.481908-1.8001350.8046846.718751-14.789997-1.040673-4.204950...-1.4971175.414063-2.3256551.674827-0.26433260.781427-7.6896960.151589-8.0401660
1-0.149894-0.58567627.8398564.1523336.426802-2.42694340.477058-6.7257090.8964210.330165...36.2927904.4909150.7625616.5266621.00792715.805696-4.896678-0.32028316.7199740
2-0.321707-1.42981912.2515616.586874-5.304647-11.31109017.81285011.0605725.325880-2.632984...-0.3684919.088864-0.689886-2.7311180.75420030.856417-7.428573-2.090804-7.8694210
3-0.2455945.076677-24.1496323.6373076.5058112.290224-35.111751-18.913592-0.337041-5.568076...15.691546-7.4677752.940789-6.4241120.419776-72.4245695.3613751.806070-7.6708470
4-0.2733660.306326-11.3525931.6767582.928441-0.616824-16.50581727.5322811.199715-4.309105...-13.911297-5.2299371.7839283.957801-0.096988-14.085435-0.208351-0.89494215.7247421
..................................................................
159995-0.487024-4.2702690.417395-1.9924231.757552-1.1678190.60686041.084463-1.923188-2.374213...-9.3904518.096802-0.875131-1.413787-0.36396815.3393924.364205-3.83148928.3898581
1599960.8254774.80436822.16153511.3713031.7159016.99075932.221207-12.278038-3.8610866.715126...12.8031890.841446-0.682177-5.047677-0.0178980.7801306.387266-1.374742-1.6239520
159997-0.8024895.3626967.243419-7.4960742.295250-2.75606710.53138842.5158211.4209846.788916...-0.346570-0.1440980.7382987.2410410.215347-12.1552493.2652631.2309633.3354711
1599980.3392377.6098955.368414-2.8254814.04610215.3226037.805271-10.2330542.6099864.251127...-0.307656-0.601145-3.4431120.5499310.2067285.0819801.701462-0.279619-1.9864240
159999-0.296748-0.412773-10.911407-5.633629-4.02815415.939428-15.864365-46.38819218.339472-4.575499...27.8374731.3923950.893555-1.848590-0.423982-17.3793805.916490-2.76744415.5475571
\n", "

160000 rows × 51 columns

\n", "
" ], "text/plain": [ " x0 x1 x2 x3 x4 x5 \\\n", "0 -0.166563 -3.961588 4.621113 2.481908 -1.800135 0.804684 \n", "1 -0.149894 -0.585676 27.839856 4.152333 6.426802 -2.426943 \n", "2 -0.321707 -1.429819 12.251561 6.586874 -5.304647 -11.311090 \n", "3 -0.245594 5.076677 -24.149632 3.637307 6.505811 2.290224 \n", "4 -0.273366 0.306326 -11.352593 1.676758 2.928441 -0.616824 \n", "... ... ... ... ... ... ... \n", "159995 -0.487024 -4.270269 0.417395 -1.992423 1.757552 -1.167819 \n", "159996 0.825477 4.804368 22.161535 11.371303 1.715901 6.990759 \n", "159997 -0.802489 5.362696 7.243419 -7.496074 2.295250 -2.756067 \n", "159998 0.339237 7.609895 5.368414 -2.825481 4.046102 15.322603 \n", "159999 -0.296748 -0.412773 -10.911407 -5.633629 -4.028154 15.939428 \n", "\n", " x6 x7 x8 x9 ... x41 x42 \\\n", "0 6.718751 -14.789997 -1.040673 -4.204950 ... -1.497117 5.414063 \n", "1 40.477058 -6.725709 0.896421 0.330165 ... 36.292790 4.490915 \n", "2 17.812850 11.060572 5.325880 -2.632984 ... -0.368491 9.088864 \n", "3 -35.111751 -18.913592 -0.337041 -5.568076 ... 15.691546 -7.467775 \n", "4 -16.505817 27.532281 1.199715 -4.309105 ... -13.911297 -5.229937 \n", "... ... ... ... ... ... ... ... \n", "159995 0.606860 41.084463 -1.923188 -2.374213 ... -9.390451 8.096802 \n", "159996 32.221207 -12.278038 -3.861086 6.715126 ... 12.803189 0.841446 \n", "159997 10.531388 42.515821 1.420984 6.788916 ... -0.346570 -0.144098 \n", "159998 7.805271 -10.233054 2.609986 4.251127 ... -0.307656 -0.601145 \n", "159999 -15.864365 -46.388192 18.339472 -4.575499 ... 27.837473 1.392395 \n", "\n", " x43 x44 x45 x46 x47 x48 \\\n", "0 -2.325655 1.674827 -0.264332 60.781427 -7.689696 0.151589 \n", "1 0.762561 6.526662 1.007927 15.805696 -4.896678 -0.320283 \n", "2 -0.689886 -2.731118 0.754200 30.856417 -7.428573 -2.090804 \n", "3 2.940789 -6.424112 0.419776 -72.424569 5.361375 1.806070 \n", "4 1.783928 3.957801 -0.096988 -14.085435 -0.208351 -0.894942 \n", "... ... ... ... ... ... ... \n", "159995 -0.875131 -1.413787 -0.363968 15.339392 4.364205 -3.831489 \n", "159996 -0.682177 -5.047677 -0.017898 0.780130 6.387266 -1.374742 \n", "159997 0.738298 7.241041 0.215347 -12.155249 3.265263 1.230963 \n", "159998 -3.443112 0.549931 0.206728 5.081980 1.701462 -0.279619 \n", "159999 0.893555 -1.848590 -0.423982 -17.379380 5.916490 -2.767444 \n", "\n", " x49 y \n", "0 -8.040166 0 \n", "1 16.719974 0 \n", "2 -7.869421 0 \n", "3 -7.670847 0 \n", "4 15.724742 1 \n", "... ... .. \n", "159995 28.389858 1 \n", "159996 -1.623952 0 \n", "159997 3.335471 1 \n", "159998 -1.986424 0 \n", "159999 15.547557 1 \n", "\n", "[160000 rows x 51 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('data/final_project.csv')\n", "df" ] }, { "cell_type": "markdown", "id": "cloudy-clerk", "metadata": {}, "source": [ "## Dataframe structure\n", "\n", "Information about fields is not available. df.info() provides us following information.\n", "\n", "- x0 to x23, x25 to x28, x31, x33 to x36 and x38 to x49 are numeric features.\n", "- x24, x29-x30, x32 and x37 are categorical or non-numeric features\n", "- y is target variable which is binary." ] }, { "cell_type": "code", "execution_count": 7, "id": "passive-clause", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 160000 entries, 0 to 159999\n", "Data columns (total 51 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 x0 159974 non-null float64\n", " 1 x1 159975 non-null float64\n", " 2 x2 159962 non-null float64\n", " 3 x3 159963 non-null float64\n", " 4 x4 159974 non-null float64\n", " 5 x5 159963 non-null float64\n", " 6 x6 159974 non-null float64\n", " 7 x7 159973 non-null float64\n", " 8 x8 159979 non-null float64\n", " 9 x9 159970 non-null float64\n", " 10 x10 159957 non-null float64\n", " 11 x11 159970 non-null float64\n", " 12 x12 159964 non-null float64\n", " 13 x13 159969 non-null float64\n", " 14 x14 159966 non-null float64\n", " 15 x15 159965 non-null float64\n", " 16 x16 159974 non-null float64\n", " 17 x17 159973 non-null float64\n", " 18 x18 159960 non-null float64\n", " 19 x19 159965 non-null float64\n", " 20 x20 159962 non-null float64\n", " 21 x21 159971 non-null float64\n", " 22 x22 159973 non-null float64\n", " 23 x23 159953 non-null float64\n", " 24 x24 159972 non-null object \n", " 25 x25 159978 non-null float64\n", " 26 x26 159964 non-null float64\n", " 27 x27 159970 non-null float64\n", " 28 x28 159965 non-null float64\n", " 29 x29 159970 non-null object \n", " 30 x30 159970 non-null object \n", " 31 x31 159961 non-null float64\n", " 32 x32 159969 non-null object \n", " 33 x33 159959 non-null float64\n", " 34 x34 159959 non-null float64\n", " 35 x35 159970 non-null float64\n", " 36 x36 159973 non-null float64\n", " 37 x37 159977 non-null object \n", " 38 x38 159969 non-null float64\n", " 39 x39 159977 non-null float64\n", " 40 x40 159964 non-null float64\n", " 41 x41 159960 non-null float64\n", " 42 x42 159974 non-null float64\n", " 43 x43 159963 non-null float64\n", " 44 x44 159960 non-null float64\n", " 45 x45 159971 non-null float64\n", " 46 x46 159969 non-null float64\n", " 47 x47 159963 non-null float64\n", " 48 x48 159968 non-null float64\n", " 49 x49 159968 non-null float64\n", " 50 y 160000 non-null int64 \n", "dtypes: float64(45), int64(1), object(5)\n", "memory usage: 62.3+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "id": "opened-warrior", "metadata": {}, "source": [ "## Data Summary\n", "\n", "Below is descriptive statisics of all features. It indicates that features most features are on different scale and required normalization before they can be used for the modeling." ] }, { "cell_type": "code", "execution_count": 8, "id": "embedded-increase", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x0x1x2x3x4x5x6x7x8x9...x41x42x43x44x45x46x47x48x49y
count159974.000000159975.000000159962.000000159963.000000159974.000000159963.000000159974.000000159973.000000159979.000000159970.000000...159960.000000159974.000000159963.000000159960.000000159971.000000159969.000000159963.000000159968.000000159968.000000160000.000000
mean-0.0010280.001358-1.150145-0.024637-0.0005490.013582-1.670670-7.692795-0.0305400.005462...6.701076-1.833820-0.002091-0.0062500.000885-12.7553950.028622-0.000224-0.6742240.401231
std0.3711376.34063213.2734808.0650326.3822937.67007619.29866530.5422648.9011856.355040...18.6801965.1107051.5349524.1645950.39662136.6086414.7881571.93550115.0367380.490149
min-1.592635-26.278302-59.394048-35.476594-28.467536-33.822988-86.354483-181.506976-37.691045-27.980659...-82.167224-27.933750-6.876234-17.983487-1.753221-201.826828-21.086333-8.490155-65.7911910.000000
25%-0.251641-4.260973-10.166536-5.454438-4.313118-5.148130-14.780146-27.324771-6.031058-4.260619...-5.804080-5.162869-1.039677-2.812055-0.266518-36.428329-3.216016-1.320800-10.9317530.000000
50%-0.0020470.004813-1.340932-0.0314080.0008570.014118-1.948594-6.956789-0.0168400.006045...6.840110-1.923754-0.004385-0.0104840.001645-12.9824970.035865-0.011993-0.5744100.000000
75%0.2485324.2842207.8716765.4451794.3066605.19074911.44693112.2170715.9723494.305734...19.2663671.4535071.0332752.7832740.26904911.4454433.2680281.3177039.6510721.000000
max1.60084927.98817863.54565338.90602526.24781235.55011092.390605149.15063439.04983127.377842...100.05043222.6680416.68092219.0697591.669205150.85941520.8368548.22655266.8776041.000000
\n", "

8 rows × 46 columns

\n", "
" ], "text/plain": [ " x0 x1 x2 x3 \\\n", "count 159974.000000 159975.000000 159962.000000 159963.000000 \n", "mean -0.001028 0.001358 -1.150145 -0.024637 \n", "std 0.371137 6.340632 13.273480 8.065032 \n", "min -1.592635 -26.278302 -59.394048 -35.476594 \n", "25% -0.251641 -4.260973 -10.166536 -5.454438 \n", "50% -0.002047 0.004813 -1.340932 -0.031408 \n", "75% 0.248532 4.284220 7.871676 5.445179 \n", "max 1.600849 27.988178 63.545653 38.906025 \n", "\n", " x4 x5 x6 x7 \\\n", "count 159974.000000 159963.000000 159974.000000 159973.000000 \n", "mean -0.000549 0.013582 -1.670670 -7.692795 \n", "std 6.382293 7.670076 19.298665 30.542264 \n", "min -28.467536 -33.822988 -86.354483 -181.506976 \n", "25% -4.313118 -5.148130 -14.780146 -27.324771 \n", "50% 0.000857 0.014118 -1.948594 -6.956789 \n", "75% 4.306660 5.190749 11.446931 12.217071 \n", "max 26.247812 35.550110 92.390605 149.150634 \n", "\n", " x8 x9 ... x41 x42 \\\n", "count 159979.000000 159970.000000 ... 159960.000000 159974.000000 \n", "mean -0.030540 0.005462 ... 6.701076 -1.833820 \n", "std 8.901185 6.355040 ... 18.680196 5.110705 \n", "min -37.691045 -27.980659 ... -82.167224 -27.933750 \n", "25% -6.031058 -4.260619 ... -5.804080 -5.162869 \n", "50% -0.016840 0.006045 ... 6.840110 -1.923754 \n", "75% 5.972349 4.305734 ... 19.266367 1.453507 \n", "max 39.049831 27.377842 ... 100.050432 22.668041 \n", "\n", " x43 x44 x45 x46 \\\n", "count 159963.000000 159960.000000 159971.000000 159969.000000 \n", "mean -0.002091 -0.006250 0.000885 -12.755395 \n", "std 1.534952 4.164595 0.396621 36.608641 \n", "min -6.876234 -17.983487 -1.753221 -201.826828 \n", "25% -1.039677 -2.812055 -0.266518 -36.428329 \n", "50% -0.004385 -0.010484 0.001645 -12.982497 \n", "75% 1.033275 2.783274 0.269049 11.445443 \n", "max 6.680922 19.069759 1.669205 150.859415 \n", "\n", " x47 x48 x49 y \n", "count 159963.000000 159968.000000 159968.000000 160000.000000 \n", "mean 0.028622 -0.000224 -0.674224 0.401231 \n", "std 4.788157 1.935501 15.036738 0.490149 \n", "min -21.086333 -8.490155 -65.791191 0.000000 \n", "25% -3.216016 -1.320800 -10.931753 0.000000 \n", "50% 0.035865 -0.011993 -0.574410 0.000000 \n", "75% 3.268028 1.317703 9.651072 1.000000 \n", "max 20.836854 8.226552 66.877604 1.000000 \n", "\n", "[8 rows x 46 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "markdown", "id": "combined-opera", "metadata": {}, "source": [ "## Non numeric features\n", "\n", "Here are non-numeric features which has content other than numbers. following are the fields.\n", "\n", "- x24 - Region names\n", "- x29 - Month\n", "- x30 - Day of the week\n", "- x32 - Looks like rate of something.so can be converted to float.\n", "- x37 - has $ sign assigned to numbers. Should be numeric field. can be converted to float.\n", "\n", "Basically x24, x29 and x30 are categorical variables in the dataset.All other fields are numeric fields.\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "occupational-paraguay", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x24x29x30x32x37
0euorpeJulytuesday0.0%$1313.96
1asiaAugwednesday-0.02%$1962.78
2asiaJulywednesday-0.01%$430.47
3asiaJulywednesday0.01%$-2366.29
4asiaJulytuesday0.01%$-620.66
..................
159995asiaAugwednesday0.0%$-891.96
159996asiaMaywednesday-0.01%$1588.65
159997asiaJunwednesday-0.0%$687.46
159998asiaMaywednesday-0.02%$439.21
159999asiaAugtuesday0.02%$-1229.34
\n", "

160000 rows × 5 columns

\n", "
" ], "text/plain": [ " x24 x29 x30 x32 x37\n", "0 euorpe July tuesday 0.0% $1313.96\n", "1 asia Aug wednesday -0.02% $1962.78\n", "2 asia July wednesday -0.01% $430.47\n", "3 asia July wednesday 0.01% $-2366.29\n", "4 asia July tuesday 0.01% $-620.66\n", "... ... ... ... ... ...\n", "159995 asia Aug wednesday 0.0% $-891.96\n", "159996 asia May wednesday -0.01% $1588.65\n", "159997 asia Jun wednesday -0.0% $687.46\n", "159998 asia May wednesday -0.02% $439.21\n", "159999 asia Aug tuesday 0.02% $-1229.34\n", "\n", "[160000 rows x 5 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[:,['x24','x29','x30','x32','x37']]" ] }, { "cell_type": "markdown", "id": "forty-habitat", "metadata": {}, "source": [ "## Change Data Types\n", "\n", "- x32 remove % sign and convert to float\n", "- x37 remove $ sign and convert to float \n", "- Indices of non-numeric features are stored in indices_obj_features\n", "- Indices of numeric features are stored in indices_num_features" ] }, { "cell_type": "code", "execution_count": 10, "id": "comfortable-death", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x24x29x30x32x37
0euorpeJulytuesday0.001313.96
1asiaAugwednesday-0.021962.78
2asiaJulywednesday-0.01430.47
3asiaJulywednesday0.01-2366.29
4asiaJulytuesday0.01-620.66
..................
159995asiaAugwednesday0.00-891.96
159996asiaMaywednesday-0.011588.65
159997asiaJunwednesday-0.00687.46
159998asiaMaywednesday-0.02439.21
159999asiaAugtuesday0.02-1229.34
\n", "

160000 rows × 5 columns

\n", "
" ], "text/plain": [ " x24 x29 x30 x32 x37\n", "0 euorpe July tuesday 0.00 1313.96\n", "1 asia Aug wednesday -0.02 1962.78\n", "2 asia July wednesday -0.01 430.47\n", "3 asia July wednesday 0.01 -2366.29\n", "4 asia July tuesday 0.01 -620.66\n", "... ... ... ... ... ...\n", "159995 asia Aug wednesday 0.00 -891.96\n", "159996 asia May wednesday -0.01 1588.65\n", "159997 asia Jun wednesday -0.00 687.46\n", "159998 asia May wednesday -0.02 439.21\n", "159999 asia Aug tuesday 0.02 -1229.34\n", "\n", "[160000 rows x 5 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Indices of non-numeric features\n", "indices_obj_features = [24,29,30]\n", "# random state 1999\n", "# Indices of numeric features\n", "listoflist = [list(range(24)),list(range(25,29,1)),[31,32],list(range(33,38,1)),list(range(38,50,1))]\n", "\n", "indices_num_features = [item for list_id in listoflist for item in list_id]\n", "\n", "df['x32'] = df['x32'].astype(str)\n", "df['x32'] = df['x32'].str.replace('%', '')\n", "df['x32'] = df['x32'].astype(float)\n", "\n", "df['x37'] = df['x37'].astype(str)\n", "df['x37'] = df['x37'].str.replace('$', '')\n", "\n", "df['x37'] = df['x37'].astype(float)\n", "\n", "df['y'] = df['y'].astype(object)\n", "\n", "df.loc[:,['x24','x29','x30','x32','x37']]" ] }, { "cell_type": "markdown", "id": "miniature-therapist", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "frozen-summit", "metadata": {}, "source": [ "## Missing Values\n", "\n", "Inspection of missing values begins with plotting missing data. missingno is the python module that enables us to visually inspect missing values as shown below. This is quick check to see if there are any missing values or NAs int the dataset. If dataset contains missing values they need to be analyzed further. \n", "\n", "The dataset that we have got has less than 1% of missing values and they don't cause any major change in distribution of data. So we have removed missing values in later section of this notebook shown here step by step.\n", "\n", "### Missingno Plot" ] }, { "cell_type": "code", "execution_count": 11, "id": "fabulous-exclusive", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABNAAAAFQCAYAAAB+uoOaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdd5wlVZnw8d+ZDjM9PQxJkqCAIkkRDJgDYQ1sUHRV1NVXVzeoqy4mXte8pjWwOK7omsVdXMU1wGvExChGFGV1RUQlKJhAJE7ome56/3jOpWvu3L59w5mp7p7f9/O5n+6ue+/TT+Wqp06dSlVVIUmSJEmSJKmzZU0nIEmSJEmSJC1kFtAkSZIkSZKkLiygSZIkSZIkSV1YQJMkSZIkSZK6sIAmSZIkSZIkdWEBTZIkSZIkSerCApokSZIkSZLUhQU0SdKCk1Jy/yR1kVJKCzmetBS5nmipcV8i9ccTFEk7tNaOfjEUbLbBQc6CG+eU0hhAVVUzpeKllCZLxMrxJlJKT0opLS8Vsy1+sXm8LebvQlxm6naEE4GU0ghAVVVVoXijheNNpJSOy78vuOnXrvQyvRjGuaTC26wisbbRtu/W9a5EnimlFSmlQ/LvC3oZ3BGOPeoW+vQrxX1JOQt9mVZZzmxJO6SU0mhKaXfgAJgt2BQ6MB4dNkYrl5znnjD8gXuOtXdK6V453lBFqpTSZErpmSmlfYaJU4u3GvhCSukvCsVbBZwNvCaltEeBeDsBPwDeCIznYQPvR/MJ1HEppaellO6RUlo9zIFnnr+rU0r7QpllOqU0klJaldeVkoXNkRJxavEm4dZ1ZJh5MpJS2rl1YltgnVuVUvrLQb/fId5q4EMppfsXircK+GhK6bUF4/0P8G9QptiQUlqeUrpnSukvU0r7Dlu8Tikty+ve6pzjsNvBZSml8VZeBU8eSxdVlnX6fZA4efrtA0XWuVUppQfVYg27vKwG1qSUDhsmTlvMVcB7U0ovguHncY53MfDuHG/YZXA8pXRISun4lNJEgfxaxx5FCiLuR4os1+5LhtsXL+j9iBYXC2iSdji5EPJJ4BvAt1NKa/NOdbdBD5zywc0bU0pjVVVtHvbALh88vBX4CvD1lNJ/pJRWDpMf8CHgPGKcz08pHZ7fGyTeKPBd4O3As1oHxoPKByE/AmaAbw8TK8dbBXwPWAFcAtxQIL8fAJPAbYDWidRAB015GfwC8H7gvcR8fklKaeUQ8T4MfJOYv+emlP48pbTrEMvMTsQy83Xg/JTSx/IB6GR+v6+D2RQF1xeklEarqpousI5MppRel1L6NPCZlNIbIebJEOvI+4GvAd9NKZ2TUhof9MA9RWvKi4H/Tik9q9/vd4i3E/C/wJ7ATwrF+y6wC/DHlNL4kPFWAxcBewCHpZSeAcOdfOccPwN8DPhvYtvwlHyCOsg8WQW8j5jHa1NKZ6aUjmitdwMs06uAdxHb1XNTSqenuEgxOmC8lSlauC4bdDnuEO+fUkofBs5MKT0Hbl1HBp1+bwfWEvul9wwZbxz4OTEvHlUbPtCJcl5eLgaOAH4zSIw5Yn4X2B+YKbSe/AC4PXCvlNIjC+T3SeBzwBeBb6WU/iS/N+h28K3A54H/l1I6NaV0m9b2eoBl2v3IEPuRHNN9CYPvSxb6fkSLjwU0STuUlNIEUWRYAbwNeDnQOuB5U0rpdv0eOOWd5leIosqHUxTRBj6wyzvnbwP3JA5yvgM8mmhN1XfRplZM2pMo1jwXuB1w2iDx8nc2A5cBvwReDLwyDVhEqx0s/Rz4P1VVXTNInFq8EeCdwNXA04D3VlU11eFzPc3jWn6XAX8G/BB4REpp7wHzW0kceN0E/C0xXz4FPB3YaYB4k8QyshfwEWLcDwQ+CrwupXTbAZfpb+eYHydO0O5OnKSdnFK6TT8nBDne+cCbifVsZMh1ZCfgW8DDgHVAAl6UUvovGGgdaZ0A3AH4f0RR5CHEQfJAB+5VVW0iTuKvBN6WcuuVQeRl8IfApcCTq6q6rsNnej5ozwWeDxLryF8Da+ZYR3qdv/V1+GHEeP9FPvEbSF6uvwlsAp4H3I042XsxMNLvPMnxvgccRlw8+W7O9XPAc1JKu/e5TLfiHZ5/XgWcRCznJ6WUVg0Q71tEgeq5wxbR8jL9deDxwN7AvYjtwZuh/2U6x/sOcBRwYX49vVZwGOTkdgK4GfgV8PGU0hNacQYorLTWkUuAJ1VVdX2Hz/Qbc4zYjl5FbJ9LrSeXEtuXzcAJ/eTUFm+S2E6PAa8CTiQuQr0ehjpWeACxDi8Dnk0UiB87wDLtfmTI/Uj+nvuSAfclC30/okWqqipfvnz52mFewKOAXwBHtA0/nThIPhu4XR6Weog3SlytvRr4NPD7HGMsvz/SZ37LiQOvLwIH1v7HM4iD7Sf2GW8F8KX8OqA2/IV5Ooy1fb6XcV6Wf76PaPX09zm3twC79xMPWJnz+CzRsqs13fYnWhGcQBQ4R/rIbzXwfeBZrc8D9wVOBtYAjwP2rY9Ll1g7EweaX65951HEScqTB1wGn00cwN6lNr4HAn8EDuo0reeJ93+J1nuH1obtS5xozBAngK3c551++XNPA34MHFwbNgGcA1yf5/UePc7jUeAdxIHwt/K6sqY2TwdZR84lWvDdMQ9bRdxaeyPw4D7jTQBfzfP4DrXh/wZcMOA8bi13n83ryGvzvHhB+zTrcR35WYd1ZJ+83NxrgPz2ysvgk2vz4Z7A/wFeBtyPvC73sI6s7rCO/EMe34cPMv1yjJcSJyeHMLvNuS9wHbBnP+sJcWL8L8SJz51qw++X152biJPy2/SR3wuJ7Ux9mTmYOKm6kbhQsarHWGPExY3f53l9JbG9WtbL+HWIt4K4qHNua3zzPD+LuOhxpz7jTRDbky+Tt1E557OBT3da9nuMuyzP4/8iWinNACe1j3MP68gk8FOi1dQewGgevlse74N6zakt7oHERawTazHvCvwFsX+5AzDZY46r87z9MrBfHvYGYD1wnwFyS/n73yRvB/PwxxPb2lXtn+8h3tuJbXRru7oceAJR4LyCWK936iNH9yND7Efq0wX3JQPtS1jg+xFfi/PVeAK+fPnytT1fxBWo62t/L6/9/mqiiHZGrztA4FBmTwD2BP6ZIYpowCOJq2N/1nZgdHvgGuCf+xzfk4irxw/Nf7cOIJ6aD/ZeCLyJuOI82WfsE4gD2dvmA6Vp4oB2p/z+bXuI8Yx8cPTR2rBH5QOyG/N7FwN/Q48H7sRJ7O+Bu+W/H09cXb6UKFKtI66KH5Lf73jQCYwQJw3fBvauDd83z/MLWwd5fU6304GftQ27J3A58IE8X05j9qB+voO69wE/qOedf96XuHX1OuLEaOc+cnwtcGXt77Ha7x/I0/GV9FAgYLYl5TvztDsT+DUDnvzk+flj4Pi24YcBG4Bn9Dk/nkWchD6wbfo9jzjReyrwfKK/xNEeY7bWs6cTLS8OJPo7mgFeVPvcnj3EenX+3um1YY/My+V1xAn4N4DjqW3P5ol5r7xstE7kT8p//5q4Un8zcbJ2+3nWkdG8jnwT2Kc2/O7ESfc5wOp+15Ec40PAt9uGHUtsH08DPkEUqW7bLcfadz8BfLF9PgF/RWy7riK2h71Ow3e2rXf17fUXiZOpp9B2kWKOWMcSFxLeRFw8+CZR6BqoiEa0BPkh8IC24ffJy9Kj+pwXLye2d0e3rSOvIQp/f0Gslyv6iNmK8QrgX4mWfJ/L+T2h9rnd5omTcg4zwMtqw/+cuHD0G+B3eVnsq5AGPDTPx9X578cR++Fr8//7PdHaa4954ozn9eQbbevJw4n90Rpif9NvofTTbF3APJE4wX8F0QLqRGDXHteRrwD/1TZsMs//GWK7+9heYuXPuB8ZYj+Sv+++ZIh9CQt8P+Jrcb4aT8CXL1+9vXo5WPHV03T8k7xzf0Rt2Gjt9zXEldH/08t0J652n8zsFbZdiAPq9iLavCdR+XMPJgpGu7X+P7NXIL8K/Hd7zvPEuyPwHLYsFE7m/3FV/vnLPM7PAsb7mJZH5/E8IOf5knwAcSpxZfciYGKeGPsSB9DTRGHpWGAj8B7i6vWj88HdTcQVzVunR5eYK/O4vYYo7l2Wc2u1LHwRcRX2a8Be88S6R/1grjb8xTnnh+W/5z1wr83HlwO/JU5slhG3V/0PccvCe/Ny8xvgglbOc8XL338H0frijm3vH5zH/QLihO9+vSzT+TN/nXM8eo715BPEAfeD8t9znvgRBYE3kYvSxDrzYbY++el1mX4MUcBsFWpTfk3k5fktfca7S54n9XVkFVFwvTrn+QeiwPmnvU7D/Lk/J07edyWu8v87cQLzj8Tttp9inqIDUaR/P7HdejFwXF5HPkSckL0gz/8rmD15m28duQ1RCPi7vJz8Jq8jhxIn+/+W5/8HmafwShQZOq0jbyFOyA6bbxnpsFyPESfYPyW3igB2J67yX5bXkW8QJ+Bn0aWAkdeRcaIPnK8Rt0nXWzfdkzhBuyTP7/27TUNm1+NX5+/cvjZspPa58/Myc+B845+n+5nMtsa5LbHday+i9VQgAJ5J9FO2sm0d2TvP15P7jHdPomXFeG3Y6jx+VxEnyeuIFnmt+d3rOvI3eTxXELdXfYZYRx5NrC/vYP515J5Eq5UrgCflZXIqLyevIS4g/I7c91iv+QEHEevJCcSJ/PXEtuJoohuET+bxfmW3HImWzE+k83ryoZzbbn3klYj9+Fqi4HC72v/5cZ4O5xNFgluIlmpzXoAi1pGVRNH1I3nYaG25Po7Y11+e4/dUKMX9SJH9SP6s+5I+9iUs8P2Ir8X9ajwBXwVn5g60kpYe1142xgshtxKxiWbrPd+iuJTmc455UN65fZxacYItD+rOB77X7zwin1zkg6bXEQec57QOqPIOeGfySU2XeKvrcWvDzwE+X582vUyj2meX5deVxInOUcRJ0DjRSu0y8pXqPsb9a8Dj8u8riCtv00TB66QeY+xJnIDMEFcsX0atNRyzt/n8gHkKkfmgaZwoRF1A3N74P8Cd2PLk9sXEicWx/Sx/tWk5SVwNP7/f5RQ4kjigu4Y4Ybkq/zyw9pl/JA4Y/66HeH+Sp92b2bKl3H2IE6g7EQeMH+kzx3XE7cn1E4LWbUwjxInUuX2uI611YVei1eav8/9orTutE7k553Oev7epx629dz5wxiDrRtvydnleR+5NFB0OycvR9+ivlUOrT6HWwfu+RN+LM3n5O75bnrX87kCcCKzP8+Wl1K7GE0Xsy4Av9zIviEL/WuJk4B+JYs1+bevI6UTB4NBex7dtXt+ROHHqeblri/OAPL6X51x/SWwD6rernZo/87Ae4v1Vnu7PbZvfx+e4dyW2jW/tMb8H0uFWKmZP5Pcn1u3/nG/+tn2vtY7sSbTw/RXRiqW17rW2cd22T7cnbwvaxnWUWG9f32kdYev1KXV6j1j/f5HXkQfn5fPheR59oYfluR73QGL71Lpl6yiiG4OZPG/v2p7XHDkdSbSgupZovfxSYJfaeD+YKNacNc98re8v92X2wsYziRZte7R9/pw8n7u2Wqdtu8HsenI8sU6/udM8nSfmI/J0+j7RQfoVREvBg2vLy1lEweSIHuKdQocWivn/fC/nei21lk/zLNND70fyfNhiO80Q+5FajDEK7Efy/9qqlShD7Ec6Le8MsS9py3HofUktVqLAvqTT+Lb9n772JW3L4ND7Eba8kF10P+Jrcb4aT8DXEDNv9mpJz02Be4xbrJjUKedC47xbiZjESXBfV8fnibcTcbXzgELTa5K4yvTUQvFWEQeDHy4UbyzvKI8uEa/UctIhXvvB0Ul5B/gmagUjZg/KHkEccN6j15jtw9myiHY2cQC/J3H7wXM75NTtymsr5tnUmo7n5e2F5FsV+4h3EvkqfG1Y62TwT/sc389Tu+WDKEzeTBTRTmOO2286jP9eRCfIHyDfWtk2T56eY3bsK6ZDvAcQV1b/APy4NnxF6/NEC4UX9jO+tfdHiav664AT5ltua/OwdRB2OHF712OIFgR/m4fXb3G5CXhzj/Fat9h8griK/DfEAet/5Pf/ibitrJ9bR56fp/mz24a3TlKeS5zcH9hDrNEOuddPflq3MO2Tl5tHzhOv04nRCHE1+SO1YTsRfajs3kd+K4hbi9vXkScTBd779jC+9QPrC4mOlVt/fzovN7cWX3qcH3fM68cZ5Kvb9dyJfc919fVnnnh/nnP4LfCd2vCJ/HO3/P5f9zO+tWHLif73fkM+ee+2jswR9z5Ecf3viSJLq1jfGufxnOM/9RBrN+LWp2miRcNfAY/N68l78mfeTdstbK28aTv2yMPX5HiPrI9fbRl/C3HCvOt88dhy3W8V0+pFtJPzd/Yj9i136xBv907x6vOWKKq/tTZsJ+Dpfea3P7FNuV3b916el8FDe4hXP8G+lC1vR/sCcRvdDPD49mWny/w4irgY9EG2vA2s1Vr3P4nWx3vXvzdPzKfleXw1W+5/W+vJXXKej5wj3u6dxrk2rNXJ+Y+YLeh0LI7Okd+fEq2KXkIc3/1p/X8R29QZ2m5JnGOetG6jnSaKIccThdE/tpaZ/P5b5hiP1vH0rbejMuB+pD1eh//X136EDsf7c0znnvYjPeTX936kU45t7/e1L+kwT1rbp4H2Je351eINtC+Zb3xrn+tpX9JlGRxoP9Jp+hEXnQfaj/haOq/GE/A14IyLk/O3EgcZnySuIKyqbcz6PUidAP6i9vdQxSTiys+z80bkreQDoEFyq31vkugn4zzi1rBXDpnj8ryDmGH2is7A403sYH9N3Lo2UJ8vHeL9mLhi8mJ6vAWwS7zVRKuXGaI49OACy+AniYPRGeLkvNU/St/zOMd7PVEkOpMoAqyovV90mWa2z6430NaPVd4ZXk2tI9heYrYPz9P8tUQR7fN53GZqO+S+1rs8Xc7Pv+9M9G8yw2wn0QOvx8TTIH/Blq3y5oxX29Y8DfhM/v1jxNXpRzB7JftfatNjvnmyN3BUp3me14Gr2PKkqGO8Wm6PyznMkG9ZysNHiCvjl7PlAXav87cVf1/ioOn9/SyDbeM1SWw3nlkbNpbz+znwlB7nxwhRkLuc2K79gTiZaBUg/504EW+f5t221XvmZWyaOHlof+DE3xNXc/fuMd6yDtNid+L2kV8Tfbm11pGD54j3hE7xasM+D3wi/74LUbCeIa/LveY3x/x8IXGiv3uP49u6ffs1wAfy7/9NrCNPYrb1wHN6iZffPxC4/xzryBuJdXjXPuI9j9l15K/alqf75/ny4AHmb2sdOYIohPxLl+naNcf8mdvk6fbothzvS5x8P7JLvPoysx+x7b+eaLVxC3F7X2s9+Qjwubb/PeexB1E8+Syxjjyiwzx5CbGP3KXHeJ1apO3FbBHt1cR+d4bZzvzb472qU7zWPCJa876jwzpywCD5tcV/HXHcsqrH8W2tI2cCb8y/fzTP639gdnvw6F7i5fcPo9avVVvO7yVOoCfbvtMtx12IFiozebl5SNt3H04UDo7qMd5WrfqIVsSbgX+cYx2Zcx63bRv+QNv2gXgK5G/Ycj3utszcjehCYRPRKmcTsS611pFz2bqftE7H063ld3f634/0dHxO7/uRvo73mX8/MvD5Ax32I/PFrP3sZ1/ScZ4wu23ud1/SKd5oLbd+9yW9zuOe9iXz5Zf/7mc/0ileK5d96HM/4mtpvRpPwNcAMy12fJcQzWTPJJrNbsh//xmzByS99j8xSexAryb3+5SHD1RMIgo/3yNub7qIOLC4Hnj+EOO8U453AXGl6Yu0nRQPEHNvoj+B3xMHCQ8YdLyZ7Qvki8zRqXiv8yN/dpzoz+OLRHPrjq1G+pjHq4km2Z8hWrxsJh8wDTi+rT60vkCcqDyVOAD61IDzYlVepv8n5/jtPH/PI/oeKb5MM9sx7jTRj0ark/39iIPFC9n6amPP6wmzB4+TzB7cXEc+yB5kvSMOar+f5+d7iZZedxswv/q02CuvV59i9vbRnuIRLdf+QKybfwD+JA+fIK5gHzZgfvUWQXsTt/R8boD8WkW064kWE6PECcK7iAOm2w+YX+ug8Y15PhzX7zJYWzcvIa4k3zkPux3R78/PmO1HY6547bcF7UMsw3euDbsdsS3517bPzrWtfkHtM60Oi6eJFptH5uF7Elewv0bu16RLvI7bfmav/u7CbFGgvo70vS8hWuCdS1z9fx/Riu9uA+aX2qbrfxMn95P9xCM6af4VcTHkOmbXkf2JE/PDh5l+tfzOzTn2nB9RiH8+sw/reEoefgixjvyc2c6V+55+RDHrLKL4fdcOn+l1Gu5D7KvfwWw/YXsTheEfM3v731zxXtgW747ErTb3rA07gLh16xVt+XU69qhPw/sw22rnOczeOrk7sW/5LLN9kfV1LMPsvm8PovA1Q7QGOmqQePk7XyVaaI0RLZfa15Gu8WhrCVb7fT/idsYzmD2R7Ck/4kLMD4jCxXXkAhjR19hH2HIdmTMeWxcMR9ryO5/YbtVvJ5wr5vNqnzmYaNE0TaxnreOFA4n98o/J/WkOOE/2JYqk32frC3q9TsODiRP5F9eG7UFsv7/P7HI5V7wXtMW7K3AMuS+s2npzAW2FPuY+nq4Xa3raj8wTr1OBpet+pN94efic+5EB8uu6H+knJj3uS+aJd2sRrX0a1nLcYl8yX7z8c2dmi2hd9yWDTEPm35d0za82bvPuR3pZpmvrw7z7EV9L79V4Ar4GmGlxm9N3mb0SMk4cwP08b1ieTO8dlo8RB1J/ZLZD8YGLaMTO5mvEQVDrxPkwotByMYM9sW4l0bz9S8xeTdqVKK7M2bdID3FXESeo78k7i/XMdrJd35n08vj0nxGFn31qO5PVeYcyyDgfkTfmD6vFO5zow+NxxEHHaI/51R+d3joROoModvT9aHfiCvZbga+zZR8C/0Ds2Je3fb6X/iPen+O1WlLtnuNdT/QN9aSCy3R9Z5ry+vKH/PoVUcS7hi0PvgZaT3L82xLFnz8yexLQVzxmD0LfRxy8vo1otn+3YfLL77Wa8v9ukPyIK3qfz595aNt7qUB+d8rfvZ7ZAlO/0+844qR6I7Ge/yq/jiqQ3/2JA8bXMbuu9pvf43OM/yXW068SLQbuWiC/uxDF1mvY8tbY+bbV9ZaIexHFxymij50fEtvk65g9ERpo20+s/3sSBcTrmF0G+4pXW9bOIfYVp7LlOjLwvonoT+p9xBX0w3qMt18tp8OIVi+XsPU6MjrM+ObfDyTW4ev6ya9tO3UScaKwgdgW/pxYBo8qMP1at/e030LW7zi/IMc5l3yFn9hu3bXHeHP2/Un0jfleouDW2g/1fOxBtBhttVL5HrEen0uss0cMEG+LVkrELVCfZct1pO9joxzrG8TJ8RvZch3pKx5bLsf75un3K2afbtzP+B6fx+1nRIup+n56xSDTry2/2xHHGX+gdntpnzFvS7Qe2pjn6y+I9frW44VB53H++6l5+fmzQfLL760hWoy9h+h36myixc1dB4k3x3bwKrZund/teLp1sWkv4m6ZOfcjPcbb6vicOfYjA8Rr7cM77kcGjdc2/W7dj/QRc4xYpg8ntmVz7kuGzHGrfUmP8VrTbRnwBLrsSwadx/n3jvuSXuLVlpUX0WU/0s/86JDDVvsRX0vz1XgCvgaYaXFF88u1v1sbhV2JosvVxC1UvXSg/HBiR/Ya4gkilxD9YwxURCOe4nch0WFmfaN3Yt5gPbDXWK1xI24B+w7xBKR6R47vIO4/fzA9dCDcFre1Qz+NKEjejzjovfUqQ35/vifaLMvzY4Ytb8E6gSiaXE60/HoH8zztry3uX+YdT6uPiMcSO93r8//6OXEg1+pfYK6OQ5cTO66vseWtb08kDrJeQduBZo/5dToYfEr+P88lbjM5ltmr0PMV0b5DvqWkNmy3PH9niCuzD+0xt16W6fZWO4cThclTidsJ2p9mONB6QpzQvTWPw5EF4p2WY10L3L1AvFPztL1smPyI9f2uzN1SctD83kq0Ihk4P2a3F/vmPJ9HbB9vN2x+bdPx8GHiEQeG5xGdYJ9K7eBriOl3f6JYfiVbn6D0va3Onz2FOMB+CfkEbNB4+f1VzBYejiwQ7z+YbYFw9wLxXkUUNH85TH5EP0VHM8eFjyHyex2x/bxymPzye4cQLRxeQzzx9sBh88ufGSFay7f3i9XvNFxBPOntZ8T++t1sWRQedBo+jLj96ypmCyG9Hnuc0Bbr4cTyfA5xi9yhfcbb6liGOLb7ALV1ZMh4radc/pHcz+eQ8V5ItGL69QDT72G1OH9FtGjeah0ZMr+XEMcmt87fIefxPYnW1f9O7E8OGibH2mf2YsvC1iDT8PZEtxE3EF12fJjaU1GHmIaPYrYPqvZCSK/H063jwfswx36kz3j1Lj467keGiNdxPzJEvFfRYT/Sb8z8mQcC96LLRfQBc+y4L+kzXuuc5FDm2JcMkh+zy+lc+5J+4/09c+xHhph+W+1HfC3dV+MJ+OpjZs1uQE4nWqDtXt+o5J+7EPfWf48unZHWYt6XaKq8e/77yPz9rgWHOWItI3aG59U2UvWnvlzXikl/Rbk/zRuxege2uxInlL8krnJMEVdn9+s1bo7zcuIEPRFN1b9DXG06mmjlc2urknnyu4A4qT0+vzYRV8FPJ5ocryOugnfsUL1DzKPz9Lpn/v3mvBE/Js+jbxEHv8/plh9xRe5Z1JpN1977cp7PrWWnpyfW5Wn/A+JKX+ux6zsTV8V+ld/7HXFV9hRqj72fY5nZjSg0vi0Pq8/nxxFP07oWOK/HadfXMt3jeA+0nhA7+wey9c550HiPy8vFnQvFO4E4aG4vGPYar9dWgYPmdwzR9P+gEvG2QX5bPcFxmGUQ2GmO/zNofnsRB7H7d1jvet1Wz7ue9BmvfVpNEC1MDysU7znExYc7DxuPKNg8hTgxu9OA8eZdR4Yc35OI1omD5rdN5299Wg67DNa+uxuxLxovkSNRdPgbtj7J6/XY4/Ns2aJvZI7/M9CxTP7MS9l6uz9ovDcQ+9Sh4xEXJl5EFOrbCyG9xvsCsGeneV0gv92Ji3pn0eHhGn3E/BxtD0womGO9ULjTEMtg/Z0KdbwAACAASURBVPa4fYjt10ShZeZOxPo1510LdD+ePp3o47bnvnznibfF8Tkd9iNDxttqPzJoPObYjwwQ8x3AP9PDMeuA47zVvmSAeP+yreZx7TtzNmzoYRl8HbOFx632I0NOv477EV9L89V4Ar76mFmzJ1mt232emP9uHSS2VuK7E/frv6HHeK1WTq2rQ0fQ+eRs3id+EreD7VPPK/++E1GV7/jEsbl2CLUc6815R5l9VPdDiZPJJxGFlg/0OM6tn0fmjWPrxPVBRHFqM1G0OqJ94z1HvIfkfH5LXPl7KbMnuyuJq3dTtPVB1CXeYTnW64n+aT7Llp15jhJFux/RdoDUw3LUWk7+kuiPYs7HkXfJ7x/yMvgVosBxBVHUPZy4pbh1K82vaDt5nyPeGuLA7d5t759EFPpOJPr2eMo2WKa77uwHjDnnejJojrXv7VI4XnuHvv3GW7aNxndZofy2xfwoPr6dYgyTH23bug55DrqtnquV4UDx2j9fIL89yRePCsVbRttt6UPG67g/GXL6lcyv6PydZ10ZNGbHPjFLLoMMcexBh3VvmHidpuOQ+R1CrUV6gXidCjX9xjtjrnlTIL/R9vxKzJPCOXZqRbRgpmH+7FwXkPo5nu5l3zlQPOZeh/uORxRXblMgXv32yE7b6W11TlIkxwUyj0uPb+t22G7b6b7Gt/b5gfoO97X4Xo0n4KvHGbX1weCZxA7uHvX3ySezRPPjr9HWSeUc8Va3D2fLk7Mn52H7Ey2Ktro6wZY7sZ07vL8LcWvpy9rG41E9jnM9x8OITkj3r/9/4navq2h7ZPRc07CVK9Fa6im1984j+rjYSH5CTaeNYvsw4urEhcT973vWhrc6v/xcfn/X+fLLf7+EKLr9AvhobXirifSDiCLWg+eIV98p7tTh/b2JJv5fpfbErF7mR/77qUSHrW8kWpAd07YM3Tnn97ge5scxRF8YNxC3qx5N3EpwPXBq/sw3aXvE9LZcprfRemK83uMdvMDzW3Tx8vtFt9ULKN6jF3i8hT79tku8xZBjl/WuxLFHX/HocnvWAPEOKJxf6Xilp9/+neItsBwXfLwelsGix9MDxOv0NNhh4m3z8d0BpmHp/BZ6vJ7udvC1dF6NJ+CrzxkWfQY8nihOXEjcxtfqALZeRT+N6KBzvj683k9+ZDxtFXSiT6OfEidnzwc+Tu1R573Gy7+PE7c4vjr/vTPRMeMM0TFrt9tHbo1ZG7bVFQ7iEcI/YP4WRfUcR4gnDz4//916fPqLiI52Z2hrFdUh3geBJ+TfH0Str662DfLHiea/8+X3QaITzpXAf+YcfkntKS/5c48j+qO4Yx/j23515Uk5/mP6WAZvHd/actKpE9gTiVZ095gn3rvzMn134rbQGeLKznTOvdXa4NvAO3vIr+gyvb3WE+MZr+llOv8+0LbaeMbrJ95iyJFteOxhvObjLYYcF1M8yhxP71DxFkOOxht+Hvtaeq/GE/DVx8yKTjKvra3IJxB9nd0APJp85ZXo8+FjxFOWut0rvop4Usop+e96safVjPkuRAuHVoezc3aMOE+8ZURB77S8QXof0YLu7j2Mcz3mFk9QrP2+P9Ex7tvpssNvj5eHvZx45PDZRCHo2Dz8IUTHuIf2EO/FHd6r53oAUQA6vYf8riHfVgncg9nOTM8kF9GIJ0q9jzjA2ep2pV7mSf77YOLE+8vkW057mB/XsOVj0o8mbr98cm3YHkRLvO9Qa403R7w/kB8XTzz04Hii36Z6IfIwohn1U/tcXoZaphtYT4xnvKaX6b631cYzXj/xFkOOHeIVPfYwXrPxFkOOiy1eHlbieHqHiLcYcjTe8PPY19J8jaIFK6WUqryWppRGidv5fk905klVVZ9LKd1AVMI/Bnw5pXQdMEk8fe2BVVVtmCfetUQTbaqqmml9tqqqzSmlRPRhdTVwG+LJIxcPEo+4AjxGtKp6LdHC6v5VVf2gh3Gux5zu8Ll9gJcRrfKeV1XVpl7jZT8CXky0ljqJuPWVqqq+mFL6RlVV63qIt1/beCyr5bovsQG+I9FX0Xz5XUM8Spqqqi5MKb0yD3secFxK6dfEbaEHAMdVVXXtgPOEqqouTSmdSTwp545EMWu++XFNfXyrqvpuSukc4N9TSocCFdHHynHEbZ2/nyfe75hdpjcSxbx6DvsCLyA6ul3bQ34DL9PbIqbxjNdkvH5j0sO22njG6yfeYshxex97GG/7xlsMOS72eFmJ4+klGW8x5Gi84eexdhDVAqji+dr6xdz3zJ9FPG2nfuVoAng60bT0q8QtcYf3Ge/WjrHh1luEbgP8F9Gq4a7DxCOuAH89x7qBDrf19ROz9t7ziCtlv2WOxy53i1cb9nyiz62OT88aIr8XE1co5nz0d7d53JbjnwGvzu+/kjke/d3PPMm/H0TcDtz+JMZ+4h1BXNXfTPSH9skBl8EtOocmHnRwFlFk63f69bVML4T1xHjGa3qZZp5ttfGM10+8xZBjP/Fq7w117GG87RdvMeS4FOLVhhU5nl5K8RZDjsYbfh772nFetkBboKp89TSl9C7idr0LifutlxOtdVallG6swnrgfSml9+evj1ZtV956iLe6qqob8mer/LUE/JrYkf5oyHhVSumnwN2A+1VV9eMBxvnWmPlzuxGdA68nWjpd0m+8lNLNVVVNV1V1Wi1uqtpaag2Y3wriqvdGoslvv/mtqserquozwGfqV0wGza81T/JXfwkcX1XV9f3GI26DIS8fT08pvZ64hWyqqqqbh8wPohh3BdGa8tIC8eZcprdFTOMZr8l4A8bsuq02nvG25b59oY9z/tzQxx7G237xFkOOSyFe6ePppRRvMeRovOHnsXYcrSvYWoBSSgcST8KZIDq6bTUpnQR+DPyc6Dz6cuIWnkuqqrqsQLxfELcDfRe4rsrNvIeIdxnxNJ8J4JtVVf2mQI4/I1p1/Qr4WVW7VXWAeJfmn1cDF1dVdUWB/H5OnNReDlxRRZFzmHitefyTqqouL5Bfax6XiHcpMY9L5tdaZr4L/GGunVXpZXpbxDSe8ZqM12fMnrbVxjNeP/EWQ44NHXsYbzvFWww5LpF4pY+nl0y8xZCj8Yafx9oxWEBbJFK0ZrodcG/gjcD/Eh233424arSRuC3hl4XibSKe7lki3q7ALcC9eo3XY8wNlB3nktNwoee3LeJNEZ0yL8RlsO94iyFH4xmvn3g9xOx7W2084/UTbzHkuJ337cbbzvEWQ46LPN5iOF5tNN5iyNF4w89jLWHVAriP1NfcL2aLnK2fhxP9eNwn/z1KVMr3WgrxFkOOxlva8RZDjsYznsu08RZTvMWQo/GWdrzFkKPxlna8xZCj8Yafx76W/msZWtCqqqpaP1NKy4AriQ4+71H7yC1VVf1uKcRbDDkab2nHWww5Gs94LtPGW0zxFkOOxlva8RZDjsZb2vEWQ47GG34ea+mzgLaIVFU1U1XVLUR/HvfKw+bs92axx1sMORpvacdbDDkaz3hNxzSe8ZqOaTzjNR3TeMZrOqbxFlY8LV0W0BaRlFLKv14D7J1SGl/K8bZFTOMZr+mYxjNek/G2RUzjGa/pmMYzXtMxjWe8pmMab2HF09I12nQC6l1VRTNT4EzgR1VVTS3leNsipvGM13RM4xmvyXjbIqbxjNd0TOMZr+mYxjNe0zGNt7DiaenyKZySJEmSJElSF97CKUmSJEmSJHVhAU2SJEmSJEnqwgJaj1JKj0kpvS2ldH5K6caUUpVSOrPpvCRJkiRJkrRt+RCB3r0MOBK4mXi87aHNpiNJkiRJkqTtwRZovXsecDCwGnhmw7lIkiRJkiRpO7EFWo+qqjqv9XtKqclUJEmSJEmStB3ZAk2SJEmSJEnqwgKaJEmSJEmS1IW3cG5HxxxzTFUq1po1awA4+eSTF2S8bRHTeMZrOqbxjNdkvG0R03jGazqm8YzXdEzjGa/JeNsipvHKWLt27WLpt6lYjaGbY489duDvrlmzhiOPPLJgNnPa5vPMFmiSJEmSJEkqbuXKlU2nUIwFNEmSJEmSJBVXVdulkdx2YQFNkiRJkiRJxU1PTzedQjEW0CRJkiRJklTcyMhI0ykUYwFNkiRJkiRJxU1NTTWdQjE+hbNHKaUTgRPzn3vnn/dNKZ2Rf7+2qqoXbvfEJEmSJEmSFqCxsbGmUyjGAlrvjgKe0jbsDvkFcCVgAU2SJEmSJGmJ8RbOHlVV9aqqqlKX1wFN5yhJkiRJkrRQLKUWaBbQJEmSJEmSVNxNN93UdArFWECTJEmSJElScRMTE02nUIwFNEmSJEmSJBW3efPmplMoxgKaJEmSJEmS1IUFNEmSJEmSJBW3cuXKplMoxgKaJEmSJEmSivMWTkmSJEmSJKmL6enpplMoxgKaJEmSJEmSikspNZ1CMRbQJEmSJEmSVFxVVU2nUIwFNEmSJEmSJBW3adOmplMoxgKaJEmSJEmSipuammo6hWIsoEmSJEmSJKm4ycnJplMoxgKaJEmSJEmSirMFmiRJkiRJkrSDsIAmSZIkSZKk4nwKpyRJkiRJktTFTjvt1HQKxVhAkyRJkiRJUnE33HBD0ykUYwFNkiRJkiRJxY2PjzedQjEW0CRJkiRJklScBTRJkiRJkiSpi/Xr1zedQjEW0CRJkiRJklScLdAkSZIkSZKkLjZu3Nh0CsVYQJMkSZIkSVJxKaWmUyjGApokSZIkSZKKGxkZaTqFYiygSZIkSZIkqTgLaJIkSZIkSVIXFtAkSZIkSZKkLnwKpyRJkiRJktTFunXrmk6hGAtokiRJkiRJKu6WW25pOoViLKBJkiRJkiSpuJUrVzadQjGjTScgSTuyo446irVr1xaLd9FFFxWLJUmSJEnDmJ6ebjqFYiygSVKDLrroIk4++eQisdasWVMkjiRJkiSVsJRaoHkLpyRJkiRJkorbsGFD0ykUYwFNkiRJkiRJxaWUmk6hGAtokiRJkiRJUhcW0CRJkiRJklTc2NhY0ykUYwFNkiRJkiRJxfkUTkmSpEXoqKOOYu3atcXiXXTRRcViSZIkLTUzMzNNp1CMBTRJkrTDuOiiizj55JOLxFqzZk2ROJIkSUvVsmVL58bHpTMmkiRJkiRJWjCmpqaaTqEYC2iSJEmSJEkqbnx8vOkUirGAJkmSJEmSpOImJiaaTqEYC2iSJEmSJEkqbtOmTU2nUIwFNEmSJEmSJBW3efPmplMoxgKaJEmSJEmSipuenm46hWIsoEmSJEmSJKk4HyIgSZIkSZIkdTE1NdV0CsVYQJMkSZIkSVJxY2NjTadQjAU0SZIkSZIkFbdhw4amUyjGApokSZIkSZKKswWaJEmSJEmS1MXMzEzTKRRjAU2SJEmSJEnFjYyMNJ1CMRbQJEmSJEmSVNy6deuaTqEYC2iSJEmSJEkqbvny5U2nUIwFNEmSJEmSJBW3cePGplMoxgKaJEmSJEmSiluxYkXTKRRjAU2SJEmSJEnFTU9PN51CMRbQJEmSJEmSVNyyZUun7LR0xkSSJEmSJEkLxqZNm5pOoRgLaJIkSZIkSSpu8+bNTadQjAU0SZIkSZIkFTcxMdF0CsVYQJMkSZIkSVJxU1NTTadQjAU0SZIkSZIkFTc6Otp0CsVYQJMkSZIkSVJx4+PjTadQjAU0SZIkSZIkFXfjjTc2nUIxFtAkSZIkSZJU3J577tl0CsVYQJMkSZIkSVJx69evbzqFYiygSZIkSZIkqTgLaJIkSZIkSVIXu+yyS9MpFGMBTZIkSZIkSR3tv//+A393ZGSkYCbNsoAmSZIkSZKkjq688sqBvzszM1Mwk2ZZQJMkSZIkSVJx69atazqFYiygSZIkSZIkqbiqqppOoRgLaJIkSZIkSSpuenq66RSKsYAmSZIkSZKk4uwDTZIkSZIkSepiYmKi6RSKsYAmSZIkSZKk4jZs2NB0CsWMNp2AJEmSJGn7Wbt2bdMpSNpBLF++vOkUirGAJkmSJEk7kGOOOaZInDVr1hSJI2np8iECkiRJkiRJ0g7CApokSZIkSZKKm5ycbDqFYiygSZIkSZIkqbhrr7226RSKsYAmSZIkSZKk4sbHx5tOoRgLaJIkSZIkSSrOhwhIkiRJkiRJXey8885Np1CMBTRJkiRJkiQVNzU11XQKxVhAkyRJkiRJUnEjIyNNp1CMBTRJkiRJkiQVZws0SZIkSZIkqYuxsbGmUyjGApokSZIkSZKK27RpU9MpFGMBTZIkSZIkScV5C6ckSZIkSZLUxYoVK5pOoRgLaJIkSZIkSSpu8+bNTadQjAU0SZIkSZIkFTcyMtJ0CsVYQJMkSZIkSVJx09PTTadQjAU0SZIkSZIkFWcLNEmSJEmSJKkL+0CTJEmSJEmSupiYmGg6hWIsoEmSJEmSJKm49evXN51CMRbQJEmSJEmSVFxVVU2nUIwFNEmSJEmSJBW3fPnyplMoxgKaJEmSJEmSivMpnJIkSZIkSVIXPoVTkiRJkiRJ6mJ0dLTpFIqxgCZJkiRJkqTifAqnJEmSJEmS1MXKlSubTqEYC2iSJEmSJEkqbt26dU2nUIwFNEmSJEmSJBVnCzRJkiRJkiSpiw0bNjSdQjEW0CRJkiRJklTczMxM0ykUYwFNkiRJkiRJxW3evLnpFIqxgCZJkiRJkqTidtlll6ZTKMYCmiRJkiRJkoqbnp5uOoViLKBJkiRJkiSpuBtvvLHpFIqxgCZJkiRJkqTixsbGmk6hGAtokiRJkiRJKs6HCEiSJEmSJEldrF69uukUirGAJkmSJEmSpOKWLVs6ZaelMyaSJEmSJElaMHwKpyRJkiRJktSFBTRJkiRJkiSpi5mZmaZTKMYCmiRJkiRJkoqbnJxsOoViLKBJkiRJkiSpuGuuuabpFIqxgCZJkiRJkqTixsfHm06hGAtokiRJkiRJKm5qaqrpFIqxgCZJkiRJkqTiJiYmmk6hGAtokiRJkiRJKm5sbKzpFIqxgCZJkiRJkqTiNm3a1HQKxVhAkyRJkiRJUnEW0CRJkiRJkqQuUkpNp1CMBTRJkiRJkiQVt3z58qZTKMYCmiRJkiRJkopbtmzplJ2WzphIkiRJkiRpwZiammo6hWIsoEmSJEmSJKm4sbGxplMoxgKaJEmSJEmSivMhApIkSZIkSVIXVVU1nUIxFtAkSZIkSZJU3ObNm5tOoRgLaJIkSZIkSSrOWzglSZIkSZKkLjZt2tR0CsVYQJMkSZIkSVJxy5cvbzqFYiygSZIkSZIkqbibb7656RSKsYAmSZIkSZKk4iYmJppOoRgLaJIkSZIkSSpubGys6RSKsYAmSZIkSZKk4qampppOoRgLaJIkSZIkSSpudHS06RSKsYAmSZIkSZKk4mZmZppOoRgLaJIkSZIkSSpufHy86RSKsYAmSZIkSZKk4tatW9d0CsVYQJMkSZIkSVJxtkCTJEmSJEmSdhAW0CRJkiRJklTc1NRU0ykUYwFNkiRJkiRJxU1MTDSdQjEW0CRJkiRJklTc9PR00ykUYwFNkiRJkiRJxY2NjTWdQjEW0CRJkiRJktTRsmWDl44soEmSJEmSJGnJm5mZGfi769evL5hJsyygSZIkSZIkqTifwilJkiRJkiR14S2ckiRJkiRJUhcppaZTKMYCmiRJkiRJkoqrqqrpFIqxgCZJkiRJkqTiJicnm06hGAtokiRJkiRJKu7mm29uOoViLKBJkiRJkiSpOJ/CKUmSJEmSJHUxMTHRdArFWECTJEmSJElScTMzM02nUIwFNEmSJEmSJBW3bNnSKTstnTGRJEmSJEnSgrFp06amUyjGApokSZIkSZKK27x5c9MpFGMBTZIkSZIkScWNjY01nUIxFtAkSZIkSZJU3MjISNMpFGMBTZIkSZIkScV5C6ckSZIkSZLUxczMTNMpFGMBTZIkSZIkSerCApokSZIkSZKKm5ycbDqFYiygSZIkSZIkqbjp6emmUyjGApokSZIkSZKKGx0dbTqFYiygSZIkSZIkqbiNGzc2nUIxFtAkSZIkSZJUnC3QJEmSJEmSpC6qqmo6hWIsoEmSJEmSJKk4HyIgSZIkSZIkdbF58+amUyjGApokSZIkSZKKm5mZaTqFYiygSZIkSZIkqTgfIiBJkiRJkiR1YR9okiRJkiRJUhcrVqxoOoViLKBJkiRJkiSpuJGRkaZTKMYCmiRJkiRJkopbtmzplJ2WzphIkiRJkiRpwfjd737XdArFWECTJEmSJElScXvssUfTKRRjAU2SJEmSJEnFbd68uekUirGAJkmSJEmSpOLGxsaaTqEYC2iSJEmSJEkqzhZokiRJkiRJUhc33nhj0ykUYwFNkiRJkiRJxa1YsaLpFIqxgCZJkiRJkqTiRkdHm06hGAtokiRJkiRJKm7Tpk1Np1CMBTRJkiRJkiQVZws0SZIkSZIkqYuqqppOoRgLaJIkSZIkSSrOApokSZIkSZLUxcjISNMpFGMBTZIkSZIkScVt3ry56RSKsYAmSZIkSZKk4jZu3Nh0CsVYQJMkSZIkSVJxq1atajqFYiygSZIkSZIkqbipqammUyjGApokSZIkSZKKGxsbazqFYiygSZIkSZIkqbiqqppOoRgLaJIkSZIkSSpuxYoVTadQjAU0SZIkSZIkFXfTTTc1nUIxFtAkSZIkSZJUXEqp6RSKsYAmSZIkSZKk4qanp5tOoRgLaJIkSZIkSSpufHy86RSKsYAmSZIkSZKk4jZs2NB0CsVYQJMkSZIkSZK6sIAmSZIkSZKk4latWtV0CsVYQJMkSZIkSZK6sIAmSZIkSZKk4mZmZppOoRgLaJIkSZIkSSpu2bKlU3ZaOmMiSZIkSZKkBeOGG25oOoViLKBJkiRJkiSpuJ133rnpFIqxgCZJkiRJkqTi7ANNkiRJkiRJ6mLDhg1Np1CMBTRJkiRJkiQVt3z58qZTKMYCmiRJkiRJkoqbnp5uOoViLKBJkiRJkiRJXVhAkyRJkiRJUnGTk5NNp1CMBTRJkiRJkiQVd8sttzSdQjEW0CRJkiRJklTcsmVLp+y0dMZEkiRJkiRJC8bo6GjTKRRjAU2SJEmSJEnFrV+/vukUirGAJkmSJEmSpOJGRkaaTqEYC2iSJEmSJEkqzls4JUmSJEmSpC4soEmSJEmSJEld2AeaJEmSJEmS1EVKqekUirGAJkmSJEmSpOLGx8ebTqEYC2iSJEmSJEkqzqdwSpIkSZIkSV1MT083nUIxFtAkSZIkSZJU3NTUVNMpFGMBTZIkSZIkScVt2rSp6RSKsYAmSZIkSZKk4lasWNF0CsVYQJMkSZIkSVJxmzdvbjqFYiygSZIkSZIkqbiJiYmmUyjGApokSZIkSZKKm5mZaTqFYiygSZIkSZIkqbiUUtMpFGMBTZIkSZIkScUtW7Z0yk5LZ0wkSZIkSZK0YIyMjDSdQjEW0CRJkiRJklTc1NRU0ykUYwFNkiRJkiRJxd1www1Np1CMBTRJkiRJkiQVt+uuuzadQjGjTScgSZKkxeOoo45i7dq1xeJddNFFxWJJkqSFZd26dU2nUIwFNEmSJPXsoosu4uSTTy4Sa82aNUXiSJKkhcmncEqSJEmSJEldjI2NNZ1CMRbQJEmSJEmSVFxVVU2nUIwFNEmSJEmSJBXnLZySJEmSJElSFzfddFPTKRRjAU2SJEmSJEnFpZSaTqGYngpoKaXHpJTellI6P6V0Y0qpSimdOc93RlJKf5NS+lpK6Y8ppfUppctSSmellA6e4ztPSSldkFK6OaV0Q0ppbUrpz+f5H89LKf0wx78upfTZlNL9unxnIqX0zymln6aUNqSUfp9S+mhK6bAu39ktpbQmpXRFSmljSunXKaX3p5T26zYNJEmSJEmSdlQ74kMEXgY8GzgKuHq+D6eUVgFfAN4D7AR8EHgr8A3g3sBWBbSU0qnAGcA++XtnAkcAn0opPbvD5xPwEeA0YBw4Hfgk8CDgaymlR3b4znLgi8ArgBtzTl8CHgV8L6V07w7f2R34FvCPwC+AtwAXAH8NXJhSusN800OSJEmSJGkuKaV/yo2VTq8N2yuldEZuxLMupfT5lNKdmsyzX5s2bdpu/yul9Ko8Deuv39beT/kzv86NsNamlO7ca/zRHj/3POAq4OfAg4Hz5vn8u4DjgGdUVfWu9jdTSmNtf98PeAFRoDq6qqo/5uFvBi4ETk0pfbqqqitqX3s88Bjgm8DxVVVtyN95J/B14D0ppa9UVVW/4fb5wP2BjwEnVVU1k79zFnA28P6U0hGt4dnriYLfaVVVvaCW83OJAtw7gIfPMz0kSZIkSZK2klK6D/B3wA9rwxJRp5gBTgRuIGoaX0opHV5V1S1N5Nqv8fHx7f0vfwocU/t7uvb7KUTt6an5c68AvphSOqStdtRRTy3Qqqo6r6qqn1U9PH80pXR34InAWZ2KZzleewnyGfnn61rFs/y5K4C3A8uJFl91z8w/X9YqnuXvfBc4C9iDKLC18kq1/3NKvUhWVdU5wPnA4USBsPWdVcCTgVuAV7X9/9OBK4GH2QpNkiRJkiT1K6W0M/Ah4GnAH2tv3Qm4D/CsqqouqKrqp0QdZAJ4wvbKb3p6ev4PdfHDH/5w6Bh92lxV1W9rr2vg1prQycAbqqr6eFVV/ws8hbhr8om9BN4WDxFo/eMPp5R2Tik9KTdF/LuU0kFzfOe4/PPzHd77XNtnSCmtAO4HrCMKX/N+B7gjcHvg0qqqLu/xO/chFs5vtFcjcwHu3PznsR3iSZIkSZIkdfNu4GNVVbXf6bc8/6w3GJoBNgIP2B6JTU9Pc8oppwwV44wzzuCUU07ZnkW0O+RbNC9PKX2k1uDpQGBvorsxAKqqWg98jagvzWtbFNCOzj/3J27J/E/iNsh3AZemlN6eUhppfTilNAnsC9xcVdVvOsT7Wf5Z7zftjsAIcFlVVZt7/M4h+eelc+Rd6juSJEmSJEldpZT+FjiI6He+3SXAL4HX5wcbNrlLqAAABR5JREFUjqeU/i+wH9F3/DZ3wQUX8JOf/GSoGBs3buTiiy/mggsuKJRVV98hbs98OPC3RMHsm7lv+73zZ37X9p3f1d7rKvVwV+aWX0jpGKIPtA9VVfWkDu//BDiUuM/0bGJBuIp4eMA7iYXjn6uqelX+/G2JBxNcXVXVVk+1zP2lTQFTVVUtz8PuRzyQ4BtVVW1Vec2d6l1KtDY7JA97ItEscq68H0JUIr9QVdXD8rCXAK8jbi3daoHOC/u7gXdXVfX3c0wySZIkSZKkW6WUDiH6b39Avj2TlNJa4H+rqnp2/vsewPuAI4kay5eIPtFSVVUnbOscjz322JcT3VnVG1/NAK8877zzXlvqO9tK7pbrMuANwLeJOtL+VVX9svaZ9wP7tupA3fT6EIF+tCbSJURH/a12el9OKT0G+D7w/JTS66uqmtoG/1+SJEmSJGkhuy9wG+DH0T0XEHfaPSil9AxgsqqqC4Gjcj9p41VVXZNS+g7wve2R4Hnnnfca4DXb+jvbSlVVN6eUfkz0J3d2HrwX0bKP2t+/bf9uJ9viFs7r889P1YpnAFRV9T/A5UQnbYflwTfknzvPEa81/PrasIX8HUmSJEmSpG7OBo4Ajqq9vgd8JP9+a4OjqqpuyMWzOwH3BM7Z/ukuPrn//EOB3xC1qN8CD2l7/4HAN3uJty1aoP0UuBdzF5VaT5WYAKiq6paU0tXAvimlfTr0g3an/LPeD9kviOaLd0gpjXboB63Td36af87VX1mp70iSJEmSJM2pqqrraaubpJRuAa7LT4gkpfRY4FrgSqLY9lbg7KqqvoC2klI6FfgU0cJsT+DlwCTwwaqqqpTSGuAlKaVLiDrOy4Cbgf/qJf62aIH2pfzzLu1vpJSWM1t0uqL21lfyz4d3iHdC22eoqmoDUSFcSVQL5/0OUXT7JXBwSunAHr/zbWA9cP+U0k5t47IMeGj+s/1pGZIkSZIkScPYB/gPoousfyMe0viERjNa2PYDPkw0hvoE8cTS+1RVdWV+/03AW4C3E6399gEeWlXVTb0E3xYPEZgkKnl7EJ3hXVB777XAS4Hzqqo6rja89VCAXwBHV1X1xzz8AOBComJ4aFVVV9S+8wSiSvhN4PhcVCOldDTREd8NwEFVVd1Y+84/EU8E/RjRP9tMHv5IovnkxcARreH5vXcBfwecVlXVC2rDn0tUf8+tqqpT4U+SJEmSJElLQE8FtJTSicCJ+c+9gYcRTzI4Pw+7tqqqF9Y+/xDg0/nPTxBP2bw38ADg90Rh7Wdt/+NfgecTT+z8GDAOnATsDjynqqrT2z6fgI8CjyGqsZ/Knz0JWAH8ZVVV57R9ZznRwux+RLXxy8DtgccS9xcfV1XVd9q+sztRpDs4f/cCov+2R+ZxuV9VVb/oNv0kSZIkSZK0ePVaQHsV8MouH7myqqoD2r5zJHG/6YOJzvZ/C3wGeE1VVb+e4/88FfgH4HDiMaffB95cVdWn5/j8KPAc4GnAQcAG4FvAa6uq6tgJXEppJfBiotnj7YEbgbXAK6uquniO7+xGjP+JRBO/PwCfA15RVdVVnb4jSZIkSZKkpaHvWzglSZIkSZKkHcm2eIiAJEmSJEmStGRYQJMkSZIkSZK6sIAmSZIkSZIkdWEBTZIkSZIkSerCApokSZIkSZLUhQU0SZIkSZIkqQsLaJIkSZIkSVIXFtAkSZIkSZKkLiygSZIkSZKk/9+OHQgAAAAACPK33mCCwggYAg0AAAAARgW1RkrPpzv8AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "msno.matrix(df.iloc[:,:-1],figsize=(20, 5)) " ] }, { "cell_type": "markdown", "id": "domestic-interface", "metadata": {}, "source": [ "### Featurewise missing percentage" ] }, { "cell_type": "code", "execution_count": 12, "id": "japanese-fourth", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
%Missing
x00.016250
x10.015625
x20.023750
x30.023125
x40.016250
x50.023125
x60.016250
x70.016875
x80.013125
x90.018750
x100.026875
x110.018750
x120.022500
x130.019375
x140.021250
x150.021875
x160.016250
x170.016875
x180.025000
x190.021875
x200.023750
x210.018125
x220.016875
x230.029375
x240.017500
x250.013750
x260.022500
x270.018750
x280.021875
x290.018750
x300.018750
x310.024375
x320.019375
x330.025625
x340.025625
x350.018750
x360.016875
x370.014375
x380.019375
x390.014375
x400.022500
x410.025000
x420.016250
x430.023125
x440.025000
x450.018125
x460.019375
x470.023125
x480.020000
x490.020000
\n", "
" ], "text/plain": [ " %Missing\n", "x0 0.016250\n", "x1 0.015625\n", "x2 0.023750\n", "x3 0.023125\n", "x4 0.016250\n", "x5 0.023125\n", "x6 0.016250\n", "x7 0.016875\n", "x8 0.013125\n", "x9 0.018750\n", "x10 0.026875\n", "x11 0.018750\n", "x12 0.022500\n", "x13 0.019375\n", "x14 0.021250\n", "x15 0.021875\n", "x16 0.016250\n", "x17 0.016875\n", "x18 0.025000\n", "x19 0.021875\n", "x20 0.023750\n", "x21 0.018125\n", "x22 0.016875\n", "x23 0.029375\n", "x24 0.017500\n", "x25 0.013750\n", "x26 0.022500\n", "x27 0.018750\n", "x28 0.021875\n", "x29 0.018750\n", "x30 0.018750\n", "x31 0.024375\n", "x32 0.019375\n", "x33 0.025625\n", "x34 0.025625\n", "x35 0.018750\n", "x36 0.016875\n", "x37 0.014375\n", "x38 0.019375\n", "x39 0.014375\n", "x40 0.022500\n", "x41 0.025000\n", "x42 0.016250\n", "x43 0.023125\n", "x44 0.025000\n", "x45 0.018125\n", "x46 0.019375\n", "x47 0.023125\n", "x48 0.020000\n", "x49 0.020000" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame((df.iloc[:,:-1].isna().sum()/len(df))*100,columns=['%Missing'])" ] }, { "cell_type": "markdown", "id": "respected-volunteer", "metadata": {}, "source": [ "### Missing value treatment\n", "\n", "Since missing values are very nominal in size (less than 0.5%) compared to size of the dataset rows with missing values are removed and final dataset was obtained as below.\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "endless-adams", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n", "===========================================================\n", "Delete records with missing values\n", "===========================================================\n", "Original shape of dataframe : (160000, 51)\n", "After dropping missing values : (158392, 51)\n", "Total number of records dropped : 1608\n", "Total percentage of records dropped : 0.01%\n", " \n" ] } ], "source": [ "print(' ')\n", "print('===========================================================')\n", "print('Delete records with missing values')\n", "print('===========================================================')\n", "print('Original shape of dataframe : ',df.shape)\n", "df_final = df.dropna().copy()\n", "print('After dropping missing values : ',df_final.shape)\n", "print('Total number of records dropped : ',df.shape[0]-df_final.shape[0])\n", "print('Total percentage of records dropped : ',str(round( (df.shape[0]-df_final.shape[0])/df.shape[0],2))+'%')\n", "print(' ')" ] }, { "cell_type": "markdown", "id": "sunrise-action", "metadata": {}, "source": [ "Below plot shows there are no missing values left in the df_final Dataframe." ] }, { "cell_type": "code", "execution_count": 14, "id": "racial-saying", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "msno.matrix(df_final.iloc[:,:-1],figsize=(20, 5)) " ] }, { "cell_type": "markdown", "id": "occasional-trade", "metadata": {}, "source": [ "## Target distribution\n", "\n", "### Overall target distribution\n", "\n", "Below plot indicates that this is going to be imbalanced classification problem. \n", "\n", "Distribution of the target\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", "
Class Distribution
1 59%
0 41%
\n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "polyphonic-performance", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfEAAAE9CAYAAAAbGFuyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWUElEQVR4nO3df7CeZX3n8ffHBIwVbWsIu0tOMBEiISgFPIkVRpa1uvzQDTsru0J1lAXJ7tQoHV1ncV2oE8fVVqzrAjM1FKbqrkT8sRq3kWhV6gotJBhGDDQlIsIJs2Oa+qNq+RH47h/nCX16PMl5Dp77nFznvF8zmTz3fV/PfT75I/nkup/7ue5UFZIkqT3PmOkAkiTp6bHEJUlqlCUuSVKjLHFJkhpliUuS1ChLXJKkRs2f6QCTdeSRR9bSpUtnOoYkSdPizjvv/JuqWjTeseZKfOnSpWzbtm2mY0iSNC2SfP9Ax7ycLklSoyxxSZIaZYlLktSo5j4TlyRpsh5//HFGRkZ45JFHZjrKAS1YsIChoSEOO+ywgd9jiUuSZr2RkRGe85znsHTpUpLMdJxfUFXs3buXkZERli1bNvD7vJwuSZr1HnnkERYuXHhIFjhAEhYuXDjpKwWWuCRpTjhUC3y/p5Ov0xJPcnaSnUl2Jbn8AGP+XZJ7kuxI8sku80iSdCCnnXbapMbfcsstvOY1r+kozWA6+0w8yTzgWuBVwAiwNcmmqrqnb8xy4F3A6VX1wyRHdZVHkqSDue2222Y6wqR1ORNfDeyqqvur6jFgI3DemDGXAtdW1Q8BquoHHeaRJOmAjjjiCGB0hn3mmWdy/vnns2LFCl7/+tdTVQDcfPPNrFixglNPPZXPfe5zT733Zz/7GRdffDGrV6/mlFNO4Qtf+AIAl112GevXrwdgy5YtnHHGGTz55JNTlrnLu9MXAw/1bY8ALx0z5oUASW4F5gHvqaqbx54oyVpgLcAxxxzTSVhJkvbbvn07O3bs4Oijj+b000/n1ltvZXh4mEsvvZSvfe1rHHfccbzuda97avz73vc+XvGKV3DDDTfwox/9iNWrV/PKV76S97///axatYqXv/zlvO1tb2Pz5s084xlTN3+e6a+YzQeWA2cCQ8A3kry4qn7UP6iqNgAbAIaHh6uLIC9558e7OK3GuPODb5zpCJI0odWrVzM0NATAySefzAMPPMARRxzBsmXLWL58OQBveMMb2LBhAwBf/vKX2bRpE1dddRUwejf8gw8+yAknnMB1113HGWecwYc//GGOPfbYKc3ZZYnvBpb0bQ/19vUbAW6vqseB7yX5a0ZLfWuHuSRJOqhnPvOZT72eN28e+/btO+j4quKzn/0sxx9//C8cu/vuu1m4cCEPP/zwlOfs8jPxrcDyJMuSHA5cAGwaM+bzjM7CSXIko5fX7+8wkyRJT8uKFSt44IEH+O53vwvAjTfe+NSxs846i6uvvvqpz863b98OwPe//30+9KEPsX37dr70pS9x++23T2mmzkq8qvYB64AtwL3ATVW1I8n6JGt6w7YAe5PcA3wdeGdV7e0qkyRJT9eCBQvYsGEDr371qzn11FM56qh/+ELVFVdcweOPP85JJ53EiSeeyBVXXEFVcckll3DVVVdx9NFHc/311/PmN795Spd+zf7/NbRieHi4unieuJ+JTw8/E5c0E+69915OOOGEmY4xofFyJrmzqobHG++KbZIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5I0TW6++WaOP/54jjvuOD7wgQ/80ueb6bXTJUmadlO9Nsgga2A88cQTvOUtb+ErX/kKQ0NDrFq1ijVr1rBy5cqn/XOdiUuSNA3uuOMOjjvuOF7wghdw+OGHc8EFFzz1yNKnyxKXJGka7N69myVL/uG5YENDQ+zePfa5YJNjiUuS1ChLXJKkabB48WIeeuihp7ZHRkZYvHjxL3VOS1ySpGmwatUq7rvvPr73ve/x2GOPsXHjRtasWTPxGw/Cu9MlSZoG8+fP55prruGss87iiSee4OKLL+bEE0/85c45RdkkSWrGTD0W+dxzz+Xcc8+dsvN5OV2SpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSpsHFF1/MUUcdxYte9KIpO6ffE5ckzTkPrn/xlJ7vmCvvnnDMRRddxLp163jjG6fuO+rOxCVJmgZnnHEGz3ve86b0nJa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5I0DS688EJe9rKXsXPnToaGhrj++ut/6XP6FTNJ0pwzyFfCptqNN9445ed0Ji5JUqMscUmSGmWJS5LUKEtckjQnVNVMRziop5Ov0xJPcnaSnUl2Jbl8nOMXJdmT5K7erzd3mUeSNDctWLCAvXv3HrJFXlXs3buXBQsWTOp9nd2dnmQecC3wKmAE2JpkU1XdM2bop6pqXVc5JEkaGhpiZGSEPXv2zHSUA1qwYAFDQ0OTek+XXzFbDeyqqvsBkmwEzgPGlrgkSZ067LDDWLZs2UzHmHJdXk5fDDzUtz3S2zfWa5N8O8lnkizpMI8kSbPKTN/Y9kVgaVWdBHwF+Nh4g5KsTbItybZD+VKIJEnTqcsS3w30z6yHevueUlV7q+rR3uYfAy8Z70RVtaGqhqtqeNGiRZ2ElSSpNV2W+FZgeZJlSQ4HLgA29Q9I8s/6NtcA93aYR5KkWaWzG9uqal+SdcAWYB5wQ1XtSLIe2FZVm4C3JVkD7AP+FrioqzySJM02nT4Apao2A5vH7Luy7/W7gHd1mUGSpNlqpm9skyRJT5MlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVGWuCRJjbLEJUlqlCUuSVKjLHFJkhpliUuS1ChLXJKkRlnikiQ1yhKXJKlRlrgkSY2yxCVJapQlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqM6LfEkZyfZmWRXkssPMu61SSrJcJd5JEmaTTor8STzgGuBc4CVwIVJVo4z7jnAZcDtXWWRJGk26nImvhrYVVX3V9VjwEbgvHHGvRf4feCRDrNIkjTrzO/w3IuBh/q2R4CX9g9IciqwpKr+NMk7O8wiSXPag+tfPNMRZr1jrrx72n/mjN3YluQZwB8C7xhg7Nok25Js27NnT/fhJElqQJclvhtY0rc91Nu333OAFwG3JHkA+E1g03g3t1XVhqoarqrhRYsWdRhZkqR2dFniW4HlSZYlORy4ANi0/2BV/biqjqyqpVW1FPhLYE1VbeswkyRJs0ZnJV5V+4B1wBbgXuCmqtqRZH2SNV39XEmS5ooub2yjqjYDm8fsu/IAY8/sMoskSbONK7ZJktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNGrjEkzwryfFdhpEkSYMbqMST/CvgLuDm3vbJSTYd/F2SJKlLg87E38PoU8l+BFBVdwHLOsokSZIGMGiJP15VPx6zr6Y6jCRJGtygK7btSPLbwLwky4G3Abd1F0uSJE1k0Jn4W4ETgUeBG4GfAL/bVShJkjSxgWbiVfVz4N29X5Ik6RAwUIkn+SK/+Bn4j4FtwEer6pGpDiZJkg5u0Mvp9wM/Ba7r/foJ8HfAC3vbkiRpmg16Y9tpVbWqb/uLSbZW1aokO7oIJkmSDm7QmfgRSY7Zv9F7fURv87EpTyVJkiY06Ez8HcA3k3wXCKMLvfxOkmcDH+sqnCRJOrBB707f3Pt++Irerp19N7P9906SSZKkgxp0Jg6wHDgeWAD8RhKq6uPdxJIkSRMZ9CtmvwecCawENgPnAN8ELHFJkmbIoDe2nQ/8FvD/qurfA78B/GpnqSRJ0oQGLfG/r6ongX1Jngv8AFjSXSxJkjSRQT8T35bk1xhd2OVORhd++YvOUkmSpAkNenf67/Re/lGSm4HnVtW3u4slSZImMtDl9CRf3f+6qh6oqm/375MkSdPvoDPxJAuAXwGOTPLrjC70AvBcYHHH2SRJ0kFMdDn9PzD63PCjGf0sfH+J/wS4psNckiRpAgct8ar6CPCRJG+tqqunKZMkSRrAoDe2XZ3kNGBp/3tcsU2SpJkz6IptnwCOBe4CnujtLlyxTZKkGTPo98SHgZVVVV2GkSRJgxt0xbbvAP+0yyCSJGlyBp2JHwnck+QO4NH9O6tqTSepJEnShAYt8fd0GUKSJE3eQJfTq+rPgQeAw3qvtwLfmuh9Sc5OsjPJriSXj3P8Pya5O8ldSb6ZZOUk80uSNGcNuuzqpcBngI/2di0GPj/Be+YB1zL67PGVwIXjlPQnq+rFVXUy8AfAH04iuyRJc9qgN7a9BTid0ZXaqKr7gKMmeM9qYFdV3V9VjwEbgfP6B1TVT/o2n83o19YkSdIABv1M/NGqeiwZXXU1yXwmLtzFwEN92yPAS8cOSvIW4O3A4cArBswjSdKcN+hM/M+T/BfgWUleBXwa+OJUBKiqa6vqWOA/A/91vDFJ1ibZlmTbnj17puLHSpLUvEFL/HJgD3A3ow9F2cwBCrfPbmBJ3/ZQb9+BbAT+9XgHqmpDVQ1X1fCiRYsGjCxJ0uw26OX0ZwE3VNV18NRNa88Cfn6Q92wFlidZxmh5XwD8dv+AJMt7n68DvBq4D0mSNJBBZ+JfZbS093sW8GcHe0NV7QPWAVuAe4GbqmpHkvVJ9i8Ssy7JjiR3Mfq5+JsmlV6SpDls0Jn4gqr66f6Nqvppkl+Z6E1VtZnRS+/9+67se33ZoEElSdI/NuhM/GdJTt2/keQlwN93E0mSJA1i0Jn4ZcCnkzwMhNGHobyus1SSJGlCE5Z47ya2lwMrgON7u3dW1eNdBpMkSQc34eX0qnoCuLCqHq+q7/R+WeCSJM2wQS+n35rkGuBTwM/276yqCR+CIkmSujFoiZ/c+319377CZVIlSZoxA5V4Vf2LroNIkqTJGfRRpP8kyfVJvtTbXpnkkm6jSZKkgxn0e+J/wujKa0f3tv8a+N0uAkmSpMEMWuJHVtVNwJPw1JKqT3SWSpIkTWgyK7YtpPcM8SS/Cfy4s1SSJGlCg96d/nZgE/CCJLcCi4DzO0slSZImNGiJ3wP8b0YfPfp3wOcZ/VxckiTNkEEvp3+c0WVX/xtwNfBC4BNdhZIkSRMbdCb+oqpa2bf99ST3dBFIkiQNZtCZ+Ld6N7MBkOSlwLZuIkmSpEEMOhN/CXBbkgd728cAO5PcDVRVndRJOkmSdECDlvjZnaaQJEmTNuja6d/vOogkSZqcQT8TlyRJhxhLXJKkRlnikiQ1yhKXJKlRlrgkSY2yxCVJapQlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSGtVpiSc5O8nOJLuSXD7O8bcnuSfJt5N8Ncnzu8wjSdJs0lmJJ5kHXAucA6wELkyycsyw7cBwVZ0EfAb4g67ySJI023Q5E18N7Kqq+6vqMWAjcF7/gKr6elX9vLf5l8BQh3kkSZpVuizxxcBDfdsjvX0HcgnwpfEOJFmbZFuSbXv27JnCiJIkteuQuLEtyRuAYeCD4x2vqg1VNVxVw4sWLZrecJIkHaLmd3ju3cCSvu2h3r5/JMkrgXcD/7yqHu0wjyRJs0qXM/GtwPIky5IcDlwAbOofkOQU4KPAmqr6QYdZJEmadTor8araB6wDtgD3AjdV1Y4k65Os6Q37IHAE8OkkdyXZdIDTSZKkMbq8nE5VbQY2j9l3Zd/rV3b58yVJms0OiRvbJEnS5FnikiQ1yhKXJKlRlrgkSY2yxCVJapQlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVGWuCRJjbLEJUlqlCUuSVKjLHFJkhpliUuS1ChLXJKkRlnikiQ1yhKXJKlRlrgkSY2yxCVJapQlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNarTEk9ydpKdSXYluXyc42ck+VaSfUnO7zKLJEmzTWclnmQecC1wDrASuDDJyjHDHgQuAj7ZVQ5Jkmar+R2eezWwq6ruB0iyETgPuGf/gKp6oHfsyQ5zSJI0K3V5OX0x8FDf9khvnyRJmgJN3NiWZG2SbUm27dmzZ6bjSJJ0SOiyxHcDS/q2h3r7Jq2qNlTVcFUNL1q0aErCSZLUui5LfCuwPMmyJIcDFwCbOvx5kiTNKZ2VeFXtA9YBW4B7gZuqakeS9UnWACRZlWQE+LfAR5Ps6CqPJEmzTZd3p1NVm4HNY/Zd2fd6K6OX2SVJ0iQ1cWObJEn6RZa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVGWuCRJjbLEJUlqlCUuSVKjLHFJkhpliUuS1ChLXJKkRlnikiQ1yhKXJKlRlrgkSY2yxCVJapQlLklSoyxxSZIaZYlLktQoS1ySpEZZ4pIkNcoSlySpUZa4JEmNssQlSWqUJS5JUqMscUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVGWuCRJjeq0xJOcnWRnkl1JLh/n+DOTfKp3/PYkS7vMI0nSbNJZiSeZB1wLnAOsBC5MsnLMsEuAH1bVccCHgd/vKo8kSbNNlzPx1cCuqrq/qh4DNgLnjRlzHvCx3uvPAL+VJB1mkiRp1uiyxBcDD/Vtj/T2jTumqvYBPwYWdphJkqRZY/5MBxhEkrXA2t7mT5PsnMk8evpy1ZuOBP5mpnNIc5B/97r2e51dSH7+gQ50WeK7gSV920O9feONGUkyH/hVYO/YE1XVBmBDRzk1jZJsq6rhmc4hzTX+3ZudurycvhVYnmRZksOBC4BNY8ZsAt7Ue30+8LWqqg4zSZI0a3Q2E6+qfUnWAVuAecANVbUjyXpgW1VtAq4HPpFkF/C3jBa9JEkaQJz4ajolWdv7eETSNPLv3uxkiUuS1CiXXZUkqVGWuKbFREvwSupGkhuS/CDJd2Y6i6aeJa7ODbgEr6Ru/Alw9kyHUDcscU2HQZbgldSBqvoGo9/+0SxkiWs6DLIEryRpkixxSZIaZYlrOgyyBK8kaZIscU2HQZbglSRNkiWuzvUeM7t/Cd57gZuqasfMppLmhiQ3An8BHJ9kJMklM51JU8cV2yRJapQzcUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVGWuDRHJbltkuPPTPJ/usojafIscWmOqqrTZjqDpF+OJS7NUUl+2vv9zCS3JPlMkr9K8r+SpHfs7N6+bwH/pu+9z+49p/qOJNuTnNfb/5EkV/Zen5XkG0n8d0bqyPyZDiDpkHAKcCLwMHArcHqSbcB1wCuAXcCn+sa/G/haVV2c5NeAO5L8GfAuYGuS/wv8D+DcqnpyGv8c0pzi/5AlAdxRVSO9wr0LWAqsAL5XVffV6NKO/7Nv/L8ELk9yF3ALsAA4pqp+DlwKfAW4pqq+O41/BmnOcSYuCeDRvtdPMPG/DQFeW1U7xzn2YmAvcPQUZZN0AM7EJR3IXwFLkxzb276w79gW4K19n52f0vv9+cA7GL08f06Sl05jXmnOscQljauqHgHWAn/au7HtB32H3wscBnw7yQ7gvb1Cvx74T1X1MHAJ8MdJFkxzdGnO8ClmkiQ1ypm4JEmNssQlSWqUJS5JUqMscUmSGmWJS5LUKEtckqRGWeKSJDXKEpckqVH/HwyRRwFwKvrlAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "target = df_final['y'].value_counts(normalize=True).rename('percentage').reset_index()\n", "plt.figure(figsize=(8,5))\n", "sns.barplot(x=\"index\", y=\"percentage\", hue=\"index\", data=target)" ] }, { "cell_type": "markdown", "id": "polish-election", "metadata": {}, "source": [ "### Target - Region, Month and Daywise \n", "\n", "Here are observations from the plots\n", "\n", "- Asia is main contributor to postive class\n", "- Summer period is the main period for positive class\n", "- Middle of the week i.e. Wednesday class is seen most occurrences of postive class." ] }, { "cell_type": "code", "execution_count": 16, "id": "collect-armstrong", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)\n", "fig.suptitle('Region,Month,Daywise Plots')\n", "\n", "sns.countplot(ax=axes[0],x=\"x24\", data=df_final,hue=\"y\")\n", "axes[0].set_title(\"x24\")\n", "axes[0].tick_params(labelrotation=30)\n", "\n", "sns.countplot(ax=axes[1],x=\"x29\", data=df_final,hue=\"y\")\n", "axes[1].set_title(\"x29\")\n", "axes[1].tick_params(labelrotation=30)\n", "\n", "\n", "sns.countplot(ax=axes[2],x=\"x30\", data=df_final,hue=\"y\")\n", "axes[2].set_title(\"x30\")\n", "axes[2].tick_params(labelrotation=30)" ] }, { "cell_type": "markdown", "id": "least-employee", "metadata": {}, "source": [ "## Correlogram" ] }, { "cell_type": "markdown", "id": "objective-metabolism", "metadata": {}, "source": [ "Below correleogram shows there are not many correlated features and we will rely on automatic recursive feature elimination technique to reduce number of feature. This approach will keep optimal number of features.\n", "\n", "- x2 and x6 are highly correlated\n", "- x38 and x41 are highly correlated" ] }, { "cell_type": "code", "execution_count": 17, "id": "improved-payday", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(20,8))\n", "sns.heatmap(df_final.iloc[:,:-1].corr())" ] }, { "cell_type": "code", "execution_count": 18, "id": "confidential-contents", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 158392 entries, 0 to 159999\n", "Data columns (total 51 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 x0 158392 non-null float64\n", " 1 x1 158392 non-null float64\n", " 2 x2 158392 non-null float64\n", " 3 x3 158392 non-null float64\n", " 4 x4 158392 non-null float64\n", " 5 x5 158392 non-null float64\n", " 6 x6 158392 non-null float64\n", " 7 x7 158392 non-null float64\n", " 8 x8 158392 non-null float64\n", " 9 x9 158392 non-null float64\n", " 10 x10 158392 non-null float64\n", " 11 x11 158392 non-null float64\n", " 12 x12 158392 non-null float64\n", " 13 x13 158392 non-null float64\n", " 14 x14 158392 non-null float64\n", " 15 x15 158392 non-null float64\n", " 16 x16 158392 non-null float64\n", " 17 x17 158392 non-null float64\n", " 18 x18 158392 non-null float64\n", " 19 x19 158392 non-null float64\n", " 20 x20 158392 non-null float64\n", " 21 x21 158392 non-null float64\n", " 22 x22 158392 non-null float64\n", " 23 x23 158392 non-null float64\n", " 24 x24 158392 non-null object \n", " 25 x25 158392 non-null float64\n", " 26 x26 158392 non-null float64\n", " 27 x27 158392 non-null float64\n", " 28 x28 158392 non-null float64\n", " 29 x29 158392 non-null object \n", " 30 x30 158392 non-null object \n", " 31 x31 158392 non-null float64\n", " 32 x32 158392 non-null float64\n", " 33 x33 158392 non-null float64\n", " 34 x34 158392 non-null float64\n", " 35 x35 158392 non-null float64\n", " 36 x36 158392 non-null float64\n", " 37 x37 158392 non-null float64\n", " 38 x38 158392 non-null float64\n", " 39 x39 158392 non-null float64\n", " 40 x40 158392 non-null float64\n", " 41 x41 158392 non-null float64\n", " 42 x42 158392 non-null float64\n", " 43 x43 158392 non-null float64\n", " 44 x44 158392 non-null float64\n", " 45 x45 158392 non-null float64\n", " 46 x46 158392 non-null float64\n", " 47 x47 158392 non-null float64\n", " 48 x48 158392 non-null float64\n", " 49 x49 158392 non-null float64\n", " 50 y 158392 non-null object \n", "dtypes: float64(47), object(4)\n", "memory usage: 67.8+ MB\n" ] } ], "source": [ "df_final.info()" ] }, { "cell_type": "markdown", "id": "united-aging", "metadata": {}, "source": [ "## Assumptions\n", "\n", "- Details of the featurs are not available so analysis shall be done without taking specific domain into consideration.\n", "- Since false negative is costing higher focus shall be on minimizing false negative as much as possible.\n", "- Not knowing domain may not provide best model but it shall be competitve enough.\n", "\n", "\n", "## Limitations\n", "\n", "- The major limitation is we don't know what the dataset is about\n", "- cannot apply domain knowledge to the data\n", "\n", "\n", "\n", "\n", "## Sampling Techniques\n", "\n", "- From the plots that have examined above it is clear that class labels are not evenly distributed.\n", "- This is not highly imbalanced but moderately imbalanced data\n", "- Default stratified sampling shall be used for train/test split.\n", "- **Stratified sampling** will maintain the class distribution in train/test split as in original data\n", "- Data will be split into Train(80%) and Test(20%)\n" ] }, { "cell_type": "markdown", "id": "ultimate-ambassador", "metadata": {}, "source": [ "# Modeling Preparations" ] }, { "cell_type": "markdown", "id": "headed-lawrence", "metadata": {}, "source": [ "## Train/Test Split" ] }, { "cell_type": "code", "execution_count": 19, "id": "latest-lancaster", "metadata": {}, "outputs": [], "source": [ "target_df = df_final.iloc[:,50].astype(int)\n", "\n", "X_train, X_test, y_train, y_test = train_test_split( df_final.iloc[:,:-1], \n", " target_df, test_size=0.2, random_state=1999)" ] }, { "cell_type": "markdown", "id": "greenhouse-trance", "metadata": {}, "source": [ "### One Hot Encoding" ] }, { "cell_type": "code", "execution_count": 20, "id": "funny-malpractice", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 126713 entries, 42161 to 82580\n", "Data columns (total 67 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 x0 126713 non-null float64\n", " 1 x1 126713 non-null float64\n", " 2 x2 126713 non-null float64\n", " 3 x3 126713 non-null float64\n", " 4 x4 126713 non-null float64\n", " 5 x5 126713 non-null float64\n", " 6 x6 126713 non-null float64\n", " 7 x7 126713 non-null float64\n", " 8 x8 126713 non-null float64\n", " 9 x9 126713 non-null float64\n", " 10 x10 126713 non-null float64\n", " 11 x11 126713 non-null float64\n", " 12 x12 126713 non-null float64\n", " 13 x13 126713 non-null float64\n", " 14 x14 126713 non-null float64\n", " 15 x15 126713 non-null float64\n", " 16 x16 126713 non-null float64\n", " 17 x17 126713 non-null float64\n", " 18 x18 126713 non-null float64\n", " 19 x19 126713 non-null float64\n", " 20 x20 126713 non-null float64\n", " 21 x21 126713 non-null float64\n", " 22 x22 126713 non-null float64\n", " 23 x23 126713 non-null float64\n", " 24 x25 126713 non-null float64\n", " 25 x26 126713 non-null float64\n", " 26 x27 126713 non-null float64\n", " 27 x28 126713 non-null float64\n", " 28 x31 126713 non-null float64\n", " 29 x32 126713 non-null float64\n", " 30 x33 126713 non-null float64\n", " 31 x34 126713 non-null float64\n", " 32 x35 126713 non-null float64\n", " 33 x36 126713 non-null float64\n", " 34 x37 126713 non-null float64\n", " 35 x38 126713 non-null float64\n", " 36 x39 126713 non-null float64\n", " 37 x40 126713 non-null float64\n", " 38 x41 126713 non-null float64\n", " 39 x42 126713 non-null float64\n", " 40 x43 126713 non-null float64\n", " 41 x44 126713 non-null float64\n", " 42 x45 126713 non-null float64\n", " 43 x46 126713 non-null float64\n", " 44 x47 126713 non-null float64\n", " 45 x48 126713 non-null float64\n", " 46 x49 126713 non-null float64\n", " 47 x24_america 126713 non-null uint8 \n", " 48 x24_asia 126713 non-null uint8 \n", " 49 x24_euorpe 126713 non-null uint8 \n", " 50 x29_Apr 126713 non-null uint8 \n", " 51 x29_Aug 126713 non-null uint8 \n", " 52 x29_Dev 126713 non-null uint8 \n", " 53 x29_Feb 126713 non-null uint8 \n", " 54 x29_January 126713 non-null uint8 \n", " 55 x29_July 126713 non-null uint8 \n", " 56 x29_Jun 126713 non-null uint8 \n", " 57 x29_Mar 126713 non-null uint8 \n", " 58 x29_May 126713 non-null uint8 \n", " 59 x29_Nov 126713 non-null uint8 \n", " 60 x29_Oct 126713 non-null uint8 \n", " 61 x29_sept. 126713 non-null uint8 \n", " 62 x30_friday 126713 non-null uint8 \n", " 63 x30_monday 126713 non-null uint8 \n", " 64 x30_thurday 126713 non-null uint8 \n", " 65 x30_tuesday 126713 non-null uint8 \n", " 66 x30_wednesday 126713 non-null uint8 \n", "dtypes: float64(47), uint8(20)\n", "memory usage: 48.8 MB\n" ] } ], "source": [ "X_train_ohe = pd.get_dummies(X_train, prefix=['x24','x29','x30'])\n", "X_test_ohe = pd.get_dummies(X_test, prefix=['x24','x29','x30'])\n", "X_train_ohe.info()" ] }, { "cell_type": "markdown", "id": "divine-arrangement", "metadata": {}, "source": [ "### Normalize data" ] }, { "cell_type": "code", "execution_count": 21, "id": "endless-poultry", "metadata": {}, "outputs": [], "source": [ "sc = StandardScaler()\n", "XS_train_ohe = sc.fit_transform(X_train_ohe)\n", "XS_test_ohe = sc.transform(X_test_ohe)" ] }, { "cell_type": "markdown", "id": "eastern-combination", "metadata": {}, "source": [ "Store dataframes into csv. These spreadsheets shall be used to run different models on different machines." ] }, { "cell_type": "code", "execution_count": 22, "id": "nutritional-wheel", "metadata": {}, "outputs": [], "source": [ "#pd.DataFrame(XS_train_ohe,columns=list(X_train_ohe.columns)).to_csv(r'data/train_set.csv',index=False)\n", "#pd.DataFrame(XS_test_ohe ,columns=list(X_test_ohe.columns )).to_csv(r'data/test_set.csv',index=False)\n", "#pd.DataFrame(y_train,columns=['y']).to_csv(r'data/train_target.csv',index=False)\n", "#pd.DataFrame(y_test,columns=['y']).to_csv(r'data/test_target.csv',index=False)" ] }, { "cell_type": "markdown", "id": "banner-hamburg", "metadata": {}, "source": [ "## Principal Component Analysis\n", "\n", "Since we have 67 features in total after one hot encoding here we are checking if Principal component analysis is helpful in reducing features are not. Running PCA we found that at least 47 components which contributes to 90% variance in the data are required which is not significant reduction in the features. So we won't be using feature reduction using PCA. we will rather use recursive feature elimination techqniue for logistic regression. \n", "\n", "- 47 components which contributes 90% variance are required.\n", "- PCA is not much helpful in reducing features dramatically\n", "- We only explored option and found to be not useful for our modeling.\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "involved-classroom", "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "# Components=%{x}
Explained Variance=%{y}", "legendgroup": "", "line": { "color": "#636efa" }, "mode": "lines", "name": "", "orientation": "v", "showlegend": false, "stackgroup": "1", "type": "scatter", "x": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67 ], "xaxis": "x", "y": [ 0.05937922135040791, 0.11076819741229241, 0.14849950507857584, 0.17673522604596298, 0.20318741784704636, 0.22888493988640335, 0.25188487286805217, 0.27241471095955555, 0.29142132979693003, 0.30963348477532665, 0.3273119212642539, 0.34426402532582207, 0.3605790260829346, 0.37637993670112885, 0.39180878604724506, 0.40715394407881994, 0.4224865667331311, 0.4377957970539887, 0.45309718595934095, 0.4683839858833212, 0.4836305338906555, 0.49884236844869223, 0.514036692597111, 0.529209925984737, 0.5443552153724495, 0.5594841811872586, 0.5745585831742074, 0.5896239349421917, 0.6046632702845375, 0.6196776663465754, 0.6346857136889241, 0.6496491600342689, 0.6646031246339077, 0.6795357022860018, 0.6944502607018019, 0.7093376628269965, 0.7242179999455528, 0.73906385387729, 0.753902779600599, 0.7687059364928298, 0.783470195187496, 0.798222220275049, 0.8129619206674362, 0.8276838988378822, 0.842392967384669, 0.8570609889040666, 0.8717164419184523, 0.8863523998932815, 0.9009730914555079, 0.9155634600049207, 0.930104889559395, 0.9446288326670036, 0.959067836242417, 0.9716199086412872, 0.9830010305615372, 0.9932760382967772, 0.9998176794884575, 0.9999999999999237, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997, 0.9999999999999997 ], "yaxis": "y" } ], "layout": { "height": 400, "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "width": 800, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "# Components" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Explained Variance" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pca = PCA()\n", "\n", "PCA_train = pca.fit_transform(XS_train_ohe)\n", "PCA_test = pca.transform(XS_test_ohe)\n", "exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)\n", "exp_var_cumul\n", "\n", "px.area(\n", " x=range(1, exp_var_cumul.shape[0] + 1),\n", " y=exp_var_cumul,\n", " width=800,\n", " height=400,\n", " labels={\"x\": \"# Components\", \"y\": \"Explained Variance\"})\n" ] }, { "cell_type": "markdown", "id": "prerequisite-utility", "metadata": {}, "source": [ "## Choice of Metric\n", "\n", "Choice of metric for model development\n", "\n", "The requirement is to minimize the cost function, which penalizes false negatives (FN) 50x more than false positives (FP).\n", "\n", "\t$ cost penalty for FN = $500\n", " \n", "\t$ cost penalty for FP = $10\n", " \n", "\t$ cost penalty for TP, TN = 0\n", " \n", "\n", "Effectively, above implies that for the same total cost, count of FN = 1/50th of FP. Or one needs to prioritize minimizing FN.\n", "\n", "Given above, Recall is the metric of choice to search for best model. Recall definition is given below:\n", "\n", "Recall = TP / TP +FN\n", "\n", "Ideal model should have very low or zero FN, hence requires high recall score.\n", "\n" ] }, { "cell_type": "markdown", "id": "assured-wesley", "metadata": {}, "source": [ "# Model Building & Evaluation\n", "\n", "\n", "**Logistic Regression**\n", "\n", "Sklearn comes with get_param function which returns list of hyperparameters for the given estimators. All it requires is to create object of the estimator.\n", "\n", "Following hyperparameter can be tuned for Logistic Regression with different solver (lbfgs,sag,saga,newton-cg,liblinear) and regularization (l1 an l2) :\n", "\n", "- C - also known as inverse regularization parameter. It controls penalty strength.\n", "\n", "**KNN - K Nearest Neighbors**\n", "\n", "For KNN n_neighbors is the hyperparameter that can be tuned. Different values of n_neighbor are tried and value that gives best model can be used for predictions. \n", "\n", "\n", "**Random Forest** \n", "\n", "Random forest is non-parametric classifier. There are many hyperparamters tha can be tuned to build random forest listed as below \n", "\n", "- n_estimators - The number of trees in the forest.\n", "- max_depth - maximum depth of the tree\n", "- min_samples_split - Minimum number of samples required to split internal nodes\n", "- min_samples_leafint - Minimum number of samples required to be at leaf nodes.\n", "- max_features - The number of features to be considered when looking at best split\n", "- min_weight_fraction_leaf - Minimum weighted fraction of the sum total weights required to be at leaf node.\n", "\n", "These are important hyperparamerters which can be tuned to build models. For our dataset are using only two hyperparameters:\n", "\n", "- n_estimators\n", "- max_features\n", "\n", "max features is number of features to consider when looking for the best split:\n", "\n", "If int, then consider max_features features at each split.\n", "\n", "If float, then max_features is a fraction and int(max_features * n_features) features are considered at each split.\n", "\n", "If “auto”, then max_features=sqrt(n_features).\n", "\n", "If “sqrt”, then max_features=sqrt(n_features) (same as “auto”).\n", "\n", "If “log2”, then max_features=log2(n_features).\n", "\n", "If None, then max_features=n_features. [scikitlearn-doc]\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "bridal-skiing", "metadata": {}, "source": [ "## Logistic Regression" ] }, { "cell_type": "markdown", "id": "bound-solution", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "broke-desire", "metadata": {}, "source": [ "### Logistic Regression Hyperparameters\n", "\n", "Following are the hyperparameters returned by get_params function of sklearn library." ] }, { "cell_type": "code", "execution_count": 24, "id": "unavailable-concept", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 1.0,\n", " 'class_weight': None,\n", " 'dual': False,\n", " 'fit_intercept': True,\n", " 'intercept_scaling': 1,\n", " 'l1_ratio': None,\n", " 'max_iter': 100,\n", " 'multi_class': 'auto',\n", " 'n_jobs': None,\n", " 'penalty': 'l2',\n", " 'random_state': 1999,\n", " 'solver': 'lbfgs',\n", " 'tol': 0.0001,\n", " 'verbose': 0,\n", " 'warm_start': False}" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_model = LogisticRegression(random_state=1999,solver='lbfgs')\n", "log_model.get_params()" ] }, { "cell_type": "markdown", "id": "competitive-wilderness", "metadata": {}, "source": [ "### Recursive Feature Elimination\n", "\n", "Our dataset has 67 features after one hot encoding and not all features might be important. Recursive feature elimination finds optimal number of features and identifies features that should be kept in the model. Below code runs Recursive feature elimination with cross validation and finds optimal number of features and same shall be used for building model. \n", "\n", "Plot below is self-explanatory. It shows how recall/accuracy score varies with number of features increases. After running full cross validation number of optimal features are found to be - 60 \n" ] }, { "cell_type": "code", "execution_count": 25, "id": "literary-kennedy", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimal number of features : 60\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.feature_selection import RFECV\n", "from sklearn.model_selection import StratifiedKFold\n", "\n", "min_features_to_select= 60\n", "\n", "rfecv = RFECV(estimator=log_model, step=1, cv=StratifiedKFold(2),\n", " scoring='recall',\n", " min_features_to_select=min_features_to_select)\n", "rfecv.fit(XS_train_ohe, y_train)\n", "\n", "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n", "\n", "# Plot number of features VS. cross-validation scores\n", "plt.figure()\n", "plt.xlabel(\"Number of features selected\")\n", "plt.ylabel(\"Cross validation score (nb of correct classifications)\")\n", "plt.plot(range(min_features_to_select,\n", " len(rfecv.grid_scores_) + min_features_to_select),\n", " rfecv.grid_scores_)\n", "plt.show()\n", "\n" ] }, { "cell_type": "markdown", "id": "private-legend", "metadata": {}, "source": [ "### Hyperparameter Tuning\n", "\n", "Grid search has been executed for logistic regression with following:\n", "\n", "- C with different values - [0.1,0.01,0.001,0.0001,0.5,0.6,10]\n", "- Two solvers ['lbfgs','sag']\n", "- Both optimizers supports L2 regularization. So only L2 has been used in Grid Search.\n", "- Random seed - 1999 for reproducibility\n", "- max_iter set to 500 for sag solver. sag is stochastic gradient descent.\n", "\n", "With these variables set Grid search runs for all possible combinations of the grid parameters. Result of grid search is stored in the file as shown in below code.\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "choice-insulin", "metadata": {}, "outputs": [], "source": [ "# Load Grid Search Result here\n", "\n", "if(path.exists(\"data/logistic_final_grid_result.dat\")):\n", " grid_result = pickle.load(open(\"data/logistic_final_grid_result.dat\", \"rb\")) \n", "else: \n", " log_model = LogisticRegression(random_state=1999)\n", " cs07_random_state_ = 1999\n", "\n", " #\n", " # Run Grid Search\n", " #\n", "\n", " log_param_grid = [\n", " {'penalty' : ['l2'],\n", " 'C' : [0.1,0.01,0.001,0.0001,0.5,0.6,10],\n", " 'solver' : ['sag','lbfgs'],\n", " 'max_iter' : [500],\n", " 'random_state' : [cs07_random_state_]} ]\n", "\n", " cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1999)\n", "\n", " print('Running RFE : ')\n", "\n", " rfe = RFE(log_model,n_features_to_select=61, step=1)\n", " rfe = rfe.fit(XS_train_ohe, y_train.flatten())\n", "\n", " print(rfe.support_)\n", " print(rfe.ranking_)\n", "\n", " print('Running grid search : ')\n", " grid_search = GridSearchCV(estimator=log_model, param_grid=log_param_grid, n_jobs=-1, cv=cv, scoring=['roc_auc','accuracy','f1','recall','precision'],refit='f1')\n", "\n", " print('Fit grid search : ')\n", " grid_result = grid_search.fit(XS_train_ohe[:,rfe.support_], y_train.flatten())\n", "\n", " print('store logistic grid results')\n", " pickle.dump( grid_result, open( \"data/logistic_final_grid_result.dat\", \"wb\" )) \n" ] }, { "cell_type": "markdown", "id": "discrete-sleep", "metadata": {}, "source": [ "### Best Parameters\n", "\n", "Grid search runs for all grid parameters and finds best from all combinations in runs in best_score_ and best_params_." ] }, { "cell_type": "code", "execution_count": 27, "id": "later-title", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best: 0.581015 using {'C': 0.6, 'max_iter': 500, 'penalty': 'l2', 'random_state': 1999, 'solver': 'lbfgs'}\n" ] } ], "source": [ "# summarize results\n", "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))" ] }, { "cell_type": "markdown", "id": "assigned-tactics", "metadata": {}, "source": [ "**Grid Search Result**" ] }, { "cell_type": "code", "execution_count": 28, "id": "sixth-catalyst", "metadata": {}, "outputs": [], "source": [ "log_score = pd.DataFrame.from_dict(grid_result.cv_results_)" ] }, { "cell_type": "code", "execution_count": 29, "id": "reliable-shanghai", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_Cparam_max_iterparam_penaltyparam_random_stateparam_solverparams...split23_test_precisionsplit24_test_precisionsplit25_test_precisionsplit26_test_precisionsplit27_test_precisionsplit28_test_precisionsplit29_test_precisionmean_test_precisionstd_test_precisionrank_test_precision
013.4328511.4741840.0405970.0056340.1500l21999sag{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '......0.6686730.6709410.6579280.6707380.6593650.6562820.6657280.6655840.0047268
10.7442600.0518030.0420620.0060930.1500l21999lbfgs{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '......0.6686730.6708570.6581000.6707380.6593650.6562820.6658990.6655990.0047167
210.5018191.2272080.0435120.0092690.01500l21999sag{'C': 0.01, 'max_iter': 500, 'penalty': 'l2', ......0.6690110.6710360.6577420.6704840.6600100.6563530.6656420.6657910.0048065
30.7895550.1144260.0449560.0099500.01500l21999lbfgs{'C': 0.01, 'max_iter': 500, 'penalty': 'l2', ......0.6690110.6709510.6577420.6703980.6599260.6563530.6658120.6657770.0048016
44.7740411.1203330.0413100.0102120.001500l21999sag{'C': 0.001, 'max_iter': 500, 'penalty': 'l2',......0.6714250.6722340.6597060.6732860.6631550.6590150.6683170.6675500.0050754
\n", "

5 rows × 175 columns

\n", "
" ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time param_C \\\n", "0 13.432851 1.474184 0.040597 0.005634 0.1 \n", "1 0.744260 0.051803 0.042062 0.006093 0.1 \n", "2 10.501819 1.227208 0.043512 0.009269 0.01 \n", "3 0.789555 0.114426 0.044956 0.009950 0.01 \n", "4 4.774041 1.120333 0.041310 0.010212 0.001 \n", "\n", " param_max_iter param_penalty param_random_state param_solver \\\n", "0 500 l2 1999 sag \n", "1 500 l2 1999 lbfgs \n", "2 500 l2 1999 sag \n", "3 500 l2 1999 lbfgs \n", "4 500 l2 1999 sag \n", "\n", " params ... \\\n", "0 {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '... ... \n", "1 {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '... ... \n", "2 {'C': 0.01, 'max_iter': 500, 'penalty': 'l2', ... ... \n", "3 {'C': 0.01, 'max_iter': 500, 'penalty': 'l2', ... ... \n", "4 {'C': 0.001, 'max_iter': 500, 'penalty': 'l2',... ... \n", "\n", " split23_test_precision split24_test_precision split25_test_precision \\\n", "0 0.668673 0.670941 0.657928 \n", "1 0.668673 0.670857 0.658100 \n", "2 0.669011 0.671036 0.657742 \n", "3 0.669011 0.670951 0.657742 \n", "4 0.671425 0.672234 0.659706 \n", "\n", " split26_test_precision split27_test_precision split28_test_precision \\\n", "0 0.670738 0.659365 0.656282 \n", "1 0.670738 0.659365 0.656282 \n", "2 0.670484 0.660010 0.656353 \n", "3 0.670398 0.659926 0.656353 \n", "4 0.673286 0.663155 0.659015 \n", "\n", " split29_test_precision mean_test_precision std_test_precision \\\n", "0 0.665728 0.665584 0.004726 \n", "1 0.665899 0.665599 0.004716 \n", "2 0.665642 0.665791 0.004806 \n", "3 0.665812 0.665777 0.004801 \n", "4 0.668317 0.667550 0.005075 \n", "\n", " rank_test_precision \n", "0 8 \n", "1 7 \n", "2 5 \n", "3 6 \n", "4 4 \n", "\n", "[5 rows x 175 columns]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_score.head()" ] }, { "cell_type": "code", "execution_count": 30, "id": "rocky-ordinary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1999),\n", " estimator=LogisticRegression(random_state=1999), n_jobs=-1,\n", " param_grid=[{'C': [0.1, 0.01, 0.001, 0.0001, 0.5, 0.6, 10],\n", " 'max_iter': [500], 'penalty': ['l2'],\n", " 'random_state': [1999], 'solver': ['sag', 'lbfgs']}],\n", " refit='f1',\n", " scoring=['roc_auc', 'accuracy', 'f1', 'recall', 'precision'])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_result" ] }, { "cell_type": "code", "execution_count": 31, "id": "prompt-agent", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "log_sag_df = log_score[log_score['param_solver']=='lbfgs']\n", "log_sag = {}\n", "\n", "for column in log_sag_df.columns:\n", " log_sag[column] = log_sag_df[column].to_numpy()\n", "\n", "\n", "scoring = {'accuracy': make_scorer(accuracy_score),'recall': make_scorer(recall_score),\n", " 'precision': make_scorer(precision_score),'f1': make_scorer(f1_score), \n", " 'roc_auc' : 'roc_auc'\n", " }\n", "X_sag = np.array(log_sag['param_C'], dtype=float)\n", "\n", "viz_hyperparameter(log_sag,X_sag,xlabel=\"Parameter C\",scoring=scoring,\n", " xlim=[0,15],ylim=[0.35,.85],plot_title=\" : Logistic lbfgs solver\")" ] }, { "cell_type": "markdown", "id": "stainless-dubai", "metadata": {}, "source": [ "Grid search result obained above was built with all evaluation metric against hyperparameter C:\n", "\n", "- recall\n", "- precision\n", "- f1\n", "- roc_auc\n", "\n", "\n", "C=0.6 is the optimal value found for the logistic regression. It is quite evident from the plot that precision,recall, f1 score and accuracy remains constant if we increase value of C. It cannot be tuned beyond 0.6\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "abstract-gospel", "metadata": {}, "source": [ "### Build Logistic Regression " ] }, { "cell_type": "code", "execution_count": 32, "id": "engaged-document", "metadata": {}, "outputs": [], "source": [ "logit_model = LogisticRegression(C=0.6, max_iter = 500, \n", " penalty = 'l2', random_state=1999, \n", " solver='lbfgs')\n", "\n", "rfe = RFE(logit_model,n_features_to_select=60, step=1)\n", "rfe = rfe.fit(XS_train_ohe, y_train)\n", "\n", "logit_model = logit_model.fit(XS_train_ohe[:,rfe.support_],y_train)" ] }, { "cell_type": "markdown", "id": "straight-chorus", "metadata": {}, "source": [ "### Feature Importance" ] }, { "cell_type": "code", "execution_count": 33, "id": "enabling-geology", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "model_coef = np.argsort(logit_model.coef_[0])[::-1]\n", "importance = logit_model.coef_[0][model_coef]\n", "\n", "plt.figure(figsize=(10,10))\n", "x1=sns.barplot(x=importance, y=list(X_test_ohe.columns[model_coef]))" ] }, { "cell_type": "code", "execution_count": 34, "id": "invalid-running", "metadata": {}, "outputs": [], "source": [ "y_hat = np.zeros(y_train.shape)\n", "y_hat = logit_model.predict(XS_test_ohe[:,rfe.support_])\n", "cm = confusion_matrix(y_target=y_test, y_predicted=y_hat)\n", "acc = accuracy_score(y_test, y_hat)\n", "recall= recall_score(y_test, y_hat)\n", "precision= precision_score(y_test, y_hat)\n", "f1= f1_score(y_test, y_hat)" ] }, { "cell_type": "markdown", "id": "expanded-northwest", "metadata": {}, "source": [ "### Model Performance" ] }, { "cell_type": "code", "execution_count": 35, "id": "ahead-jesus", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 0.7055778275829414\n", "Precision : 0.6728313068355982\n", "Recall : 0.518095987411487\n", "F1-Score : 0.5854113881850913\n" ] } ], "source": [ "from mlxtend.plotting import plot_confusion_matrix\n", "fig, ax = plot_confusion_matrix(conf_mat=cm)\n", "plt.title('Confusion Matrix')\n", "plt.show()\n", "print(\"Accuracy : \", acc)\n", "print(\"Precision : \", precision)\n", "print(\"Recall : \", recall)\n", "print(\"F1-Score : \", f1)\n" ] }, { "cell_type": "code", "execution_count": 36, "id": "cheap-citation", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQMAAAEhCAYAAAB7tcX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAATAUlEQVR4nO3deZhcVZnH8e9LAiIkrFmQJBAgIUxwADVxGRHZNzFkFDFBWQyLo4g4IJpxAXFQyYAOKi7AqGyy6kAQCQFxREU0tGhYwhJMwKQDWUBCAIXQvPNHnYQihO4KpOp2h+/neepJ3XNv3fMWTf3q3FO36kZmIklrVV2ApO7BMJAEGAaSCsNAEmAYSCoMA0mAYaBuLiKmRMThVdfxWmAY9GARcUhEtEXEkxHxcHnh7Pwq9/lgROzZyfpdIyIj4qoV2ncs7b9qsJ8vRcTFXW2Xmftl5gWN7FOvjmHQQ0XECcBZwFeBgcAWwHeBA1vQ/ULgHRGxaV3b4cD9q6uDqPH/z1bKTG897AZsCDwJfKCTbV5HLSzmldtZwOvKun7AtcDjwGPAb6i9MVwEPA/8vez/MyvZ767AXOD7wLGlrRfQDpwM/Kpu228Cc4AngD8C7yrt+wLPAktLP9NL+6+ArwC3lBqGlbajyvrvAT+t2/8k4CYgqv6brAk3k7dnegewLnBVJ9t8Hng7sBOwI/BW4Atl3YnUXtD9qY0qPgdkZh4K/BV4b2b2ycz/6mT/FwKHlfv7AHdRC516t5X+NwEuAa6MiHUz83pqI5rLSz871j3mUOAYoC/w0Ar7OxH454g4IiLeBRwJHJ4lGfTqGAY906bAosx8rpNtPgR8OTMXZOZC4FRqLzSovSO/AdgyM5dm5m9W9QWVmb8DNomIEdRC4cKVbHNxZj6amc9l5tepjVZGdLHr8zPz7vKYpSvs7+nyHL4BXAwcl5lzV6VuvTzDoGd6FOgXEb072WZzXvzO+lBpAzgDeAC4ISJmRcTEV1jHRcAngN1YySglIj4dEfdExOKIeJza4U2/LvY5p7OVmfkHYBYQwBWvqGqtlGHQM90KPAOM7WSbecCWdctblDYyc0lmnpiZWwNjgBMiYo+y3aqMEC4CPg5cV961lyvD+M8ABwMbZ+ZGwGJqL+LO+um0/4g4ltoIY17Zv1aTzt5Z1E1l5uKIOBn4TkQ8B9xAbei/J7BbZn4GuBT4QkTcRu0FdjK1oTURcQBwL/AXai/QDmoThwDzga0brGN2RLyb2jv1ivoCz1H75KF3GX1sULd+PrBXRKyVmc+v5PEvERHbAqdRm8R8GpgWEVMy88+NPF6dc2TQQ5Vj8BOoTQoupDa8/gRwddnkNKANuAO4E7i9tAEMB35BbSb/VuC7mfl/Zd3XqIXI4xHx6Qbq+G1mrjhxCDAVuJ7ax40PAf/gxYcAV5Z/H42I27vqpxwSXQxMyszpmTmT2sTnRRHxuq4er66FE7GSwJGBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqehWv3TUr99GOXTo5l1vqG6koR8pUjfx4IOPsGjR47Gydd0qDIYO3Zy2touqLkOrouOZqivQKhj1tgkvu87DBEmAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMgya6/vrfMWLE+xg2bCynn35+1eVoJSYc9VUGvOE9vHHHDy9ve+yxJ9hrn+MZvt0H2Wuf4/nb356osMLWaWoYRMS+EXFfRDwQEROb2Vd309HRwbHHTmLKlG8xY8aVXHrpVGbMmFV1WVrBEYftz/U//8aL2k6fdBF77D6Kmfdezh67j+L0SRdXVF1rNS0MIqIX8B1gP2AkMD4iRjarv+5m2rS7GTZsCFtvPZh11lmbceP2ZvLkm6suSyvYZZed2GSTDV7UNvlnv+Hww/YD4PDD9uPqa35dRWkt18yRwVuBBzJzVmY+C1wGHNjE/rqV9vYFDBkycPny4MEDaG9fUGFFatT8+X/jDW/oB8Bmm23K/Pl/q7ii1mhmGAwC5tQtzy1tUo8REURE1WW0ROUTiBFxTES0RUTbwoVrTgIPGjSAOXPmL1+eO3cBgwYNqLAiNWrgwI15+OFFADz88CIGDNio4opao5lh0A4MqVseXNpeJDPPzcxRmTmqf/+Nm1hOa40ePZKZM+cwe3Y7zz67lMsuu4ExY3apuiw1YMwBO3PBhVMAuODCKRz43ndVXFFr9G7ivm8DhkfEVtRCYBxwSBP761Z69+7N2WefxD77HEdHRwcTJoxh++23qbosrWD8h07hVzf/iUWLHmfwlmM59ZQjmfjZQzl43Bf5wY+uZcstNuOKy/6z6jJbIjKzeTuP2B84C+gF/DAzv9LZ9qNGjcy2touaVo+aoOOZqivQKhj1tgm0td270kmQZo4MyMzrgOua2Yek1aPyCURJ3YNhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkoJMLr0bEEmDZJZqXXbU1y/3MzA2aXJukFnrZMMjMvq0sRFK1GjpMiIidI+Ij5X6/iNiquWVJarUuwyAiTgE+C/xHaVoHuLiZRUlqvUZGBv8KjAGeAsjMeYCHENIappEweDYzkzKZGBHrN7ckSVVoJAyuiIhzgI0i4mjgF8B5zS1LUqu97KcJy2TmmRGxF/AEsC1wcmbe2PTKJLVUl2FQ3Am8ntqhwp3NK0dSVRr5NOEoYBrwPuAg4PcRMaHZhUlqrUZGBicBb8rMRwEiYlPgd8APm1mYpNZqZALxUWBJ3fKS0iZpDdLZdxNOKHcfAP4QEZOpzRkcCNzRgtoktVBnhwnLTiz6S7ktM7l55UiqSmdfVDq1lYVIqlaXE4gR0R/4DLA9sO6y9szcvYl1SWqxRiYQfwzcC2wFnAo8CNzWxJokVaCRMNg0M38ALM3MmzNzAuCoQFrDNHKewdLy78MR8R5gHrBJ80qSVIVGwuC0iNgQOBH4NrAB8O9NrUpSyzXyRaVry93FwG7NLUdSVTo76ejbvPCDqC+RmZ9c3cXM++MsTo1DVvdu1USn5CVVl6BV8vLThJ2NDNpWfyGSuqvOTjq6oJWFSKqWF1GRBBgGkgrDQBLQ2C8dbRsRN0XEXWV5h4j4QvNLk9RKjYwMzqN2AZWlAJl5BzCumUVJar1GwmC9zJy2QttzzShGUnUaCYNFEbENL1xE5SDg4aZWJanlGvluwrHAucB2EdEOzAY+3NSqJLVcI99NmAXsWS6rtlZmLunqMZJ6nkZ+6ejkFZYByMwvN6kmSRVo5DDhqbr76wIHAPc0pxxJVWnkMOHr9csRcSYwtWkVSarEKzkDcT1g8OouRFK1GpkzuJMXftegF9AfcL5AWsM0MmdwQN3954D5melJR9IaptMwiIhewNTM3K5F9UiqSKdzBpnZAdwXEVu0qB5JFWnkMGFj4O6ImEbdx4yZOaZpVUlquUbC4ItNr0JS5RoJg/0z87P1DRExCbi5OSVJqkIj5xnstZK2/VZ3IZKq1dl1Ez4GfBzYOiLuqFvVF7il2YVJaq3ODhMuAaYAXwMm1rUvyczHmlqVpJbr7LoJi6ldUm1868qRVBV/HVkSYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQ0dq1FdeH42TfxzJKnyI7nef65Ds4b/X7W3XhDDrr8v9lo6CAef7Cdnxz8Kf7x+BMA7PvNzzN8/3ez9Ol/cPURE3nkTzMqfgZa5vrrf8fxx59JR8fzHHXUWCZOPKLqklqmaSODiPhhRCyIiLua1Ud3csFuh3POm8Zy3uj3A7DzxGOYfdOtnL3tPsy+6VZ2nngMAMP224VNhg/l28P35mfHfJH3fO9LFVateh0dHRx77CSmTPkWM2ZcyaWXTmXGjFlVl9UyzTxMOB/Yt4n779ZGHLgH0y+4GoDpF1zNiLF7ArDdgXtwx4W19vY/TGfdjTagz2b9K6tTL5g27W6GDRvC1lsPZp111mbcuL2ZPPm1c7HxpoVBZv4aeE1ckzETDr3hBxzd9lPefPTBAPQZuClPPrIQgCcfWUifgZsC0HfQQBbPeWT5Y5+Y+wh9Bw1sfdF6ifb2BQwZ8sLfYvDgAbS3L6iwotaqfM4gIo4BjgHYsPpyXpEf7TyeJfMWsF7/TTj0xh+x6N6XDi0zs4LKpMZV/mlCZp6bmaMyc9R69Kq6nFdkybzau8fTCx/j3qtuZNBbd+DJ+Y8uH/732aw/Ty2oDZKWtM9nwyGbLX/sBoM3Y0n7/NYXrZcYNGgAc+a88LeYO3cBgwYNqLCi1qo8DHq6tdd7Pev0WX/5/W32ficL7prJ/df8kh0PHwvAjoeP5b7JNwFw3zW/ZIfDau2D3rYjzyxesvxwQtUaPXokM2fOYfbsdp59dimXXXYDY8bsUnVZLdMzx+XdyPoDN+WDV30HgLV69+KuS67lL1N/w7zb7uSgK87iTUcexOKH5nHlwZ8CYOZ1NzN8/3dz3AM3svTpvzP5I5+rsnzV6d27N2effRL77HMcHR0dTJgwhu2336bqslommnUsGxGXArsC/YD5wCmZ+YPOHrN5rJsfZcum1KPmOCUvqboErYJRow6lrW1GrGxd00YGmTm+WfuWtPo5ZyAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJgMjMqmtYLiIWAg9VXUcT9AMWVV2EVsma+jfbMjP7r2xFtwqDNVVEtGXmqKrrUONei38zDxMkAYaBpMIwaI1zqy5Aq+w19zdzzkAS4MhAUmEYSAIMg6aKiH0j4r6IeCAiJlZdj7oWET+MiAURcVfVtbSaYdAkEdEL+A6wHzASGB8RI6utSg04H9i36iKqYBg0z1uBBzJzVmY+C1wGHFhxTepCZv4aeKzqOqpgGDTPIGBO3fLc0iZ1S4aBJMAwaKZ2YEjd8uDSJnVLhkHz3AYMj4itImIdYBxwTcU1SS/LMGiSzHwO+AQwFbgHuCIz7662KnUlIi4FbgVGRMTciDiy6ppaxdORJQGODCQVhoEkwDCQVBgGkgDDQFJhGLxGRcSuEXFtuT+ms29VRsRGEfHxV9DHlyLi0422r7DN+RFx0Cr0NfS1+E3D1ckwWMOUb0uuksy8JjNP72STjYBVDgP1LIZBD1He+e6NiB9HxD0R8ZOIWK+sezAiJkXE7cAHImLviLg1Im6PiCsjok/Zbt+yj9uB99Xt+4iIOLvcHxgRV0XE9HL7F+B0YJuI+HNEnFG2OykibouIOyLi1Lp9fT4i7o+I3wIjGnheR5f9TI+Iny57TsWeEdFW9ndA2b5XRJxR1/dHX+1/W9UYBj3LCOC7mflPwBO8+N360cx8M/AL4AvAnmW5DTghItYFzgPeC7wF2Oxl+vgWcHNm7gi8GbgbmAj8JTN3ysyTImJvYDi1r2nvBLwlInaJiLdQO+16J2B/YHQDz+l/M3N06e8eoP6Mv6Glj/cA3y/P4UhgcWaOLvs/OiK2aqAfdaF31QVolczJzFvK/YuBTwJnluXLy79vp/ZjKrdEBMA61E6v3Q6YnZkzASLiYuCYlfSxO3AYQGZ2AIsjYuMVttm73P5UlvtQC4e+wFWZ+XTpo5HvYrwxIk6jdijSh9rp28tckZnPAzMjYlZ5DnsDO9TNJ2xY+r6/gb7UCcOgZ1nx3PH65afKvwHcmJnj6zeMiJ1WYx0BfC0zz1mhj0+9gn2dD4zNzOkRcQSwa926lT3fAI7LzPrQICKGvoK+VcfDhJ5li4h4R7l/CPDblWzze+CdETEMICLWj4htgXuBoRGxTdlu/EoeC3AT8LHy2F4RsSGwhNq7/jJTgQl1cxGDImIA8GtgbES8PiL6Ujsk6Upf4OGIWBv40ArrPhARa5WatwbuK31/rGxPRGwbEes30I+6YBj0LPcBx0bEPcDGwPdW3CAzFwJHAJdGxB2UQ4TM/Ae1w4KflwnEBS/Tx/HAbhFxJ/BHYGRmPkrtsOOuiDgjM28ALgFuLdv9BOibmbdTO1yZDkyh9jXurnwR+ANwC7XAqvdXYFrZ17+V5/A/wAzg9vJR4jk4wl0t/NZiD1GGwddm5hsrLkVrKEcGkgBHBpIKRwaSAMNAUmEYSAIMA0mFYSAJgP8HEwxC7USz91YAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_matrix = np.array([[0, 10],[500, 0]])\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_matrix,cmap=\"YlOrRd\")\n", "plt.title('Cost Matrix')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "representative-bryan", "metadata": {}, "source": [ "### Build Cost Matrix" ] }, { "cell_type": "code", "execution_count": 37, "id": "institutional-volunteer", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_calc = np.multiply(cm, cost_matrix)\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_calc,cmap=\"OrRd\")\n", "plt.title('Cost Calculation')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "pretty-probability", "metadata": {}, "source": [ "### Precision Recall Curve" ] }, { "cell_type": "code", "execution_count": 38, "id": "graphic-convergence", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logistic Regression : f1=0.585 auc=0.676\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "y_score = logit_model.predict_proba(XS_test_ohe[:,rfe.support_])\n", "logit_probs = y_score[:, 1]\n", "# predict class values\n", "logit_precision, logit_recall, logit_thresholds = precision_recall_curve(y_test, logit_probs)\n", "logit_f1, logit_auc = f1_score(y_test, y_hat), auc(logit_recall, logit_precision)\n", "# summarize scores\n", "print('Logistic Regression : f1=%.3f auc=%.3f' % (logit_f1, logit_auc))\n", "pyplot.plot(logit_recall, logit_precision, marker='.', label='Logistic Regression')\n", "# axis labels\n", "pyplot.xlabel('Recall')\n", "pyplot.ylabel('Precision')\n", "# show the legend\n", "pyplot.legend()\n", "# show the plot\n", "pyplot.show()" ] }, { "cell_type": "markdown", "id": "moral-electric", "metadata": {}, "source": [ "### Cost Curve" ] }, { "cell_type": "code", "execution_count": 39, "id": "latter-trustee", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.006807 189690 0.000000 1.000000 0.401212\n", "1 0.008014 190190 0.000079 1.000000 0.401181\n", "2 0.012172 190690 0.000157 1.000000 0.401149\n", "3 0.014811 190680 0.000157 0.999947 0.401181\n", "4 0.016882 190670 0.000157 0.999895 0.401212\n", "... ... ... ... ... ...\n", "31559 0.949043 6295510 0.990637 0.000053 0.602513\n", "31560 0.949097 6296010 0.990716 0.000053 0.602481\n", "31561 0.949692 6296510 0.990795 0.000053 0.602450\n", "31562 0.950181 6297010 0.990873 0.000053 0.602418\n", "31563 0.950191 6297000 0.990873 0.000000 0.602450\n", "\n", "[31564 rows x 5 columns]\n" ] } ], "source": [ "plot_metrics_cost_vs_threshold(y_test, logit_probs,0.05,cost_matrix)" ] }, { "cell_type": "markdown", "id": "lesser-click", "metadata": {}, "source": [ "## K-Nearest Neighbor" ] }, { "cell_type": "markdown", "id": "electrical-uzbekistan", "metadata": {}, "source": [ "### KNN Classifier Hyperparameters" ] }, { "cell_type": "code", "execution_count": 40, "id": "minus-earthquake", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'algorithm': 'auto',\n", " 'leaf_size': 30,\n", " 'metric': 'minkowski',\n", " 'metric_params': None,\n", " 'n_jobs': None,\n", " 'n_neighbors': 5,\n", " 'p': 2,\n", " 'weights': 'uniform'}" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn_model = KNeighborsClassifier()\n", "knn_model.get_params()" ] }, { "cell_type": "markdown", "id": "going-beginning", "metadata": {}, "source": [ "### Hyperparameter Tuning" ] }, { "cell_type": "code", "execution_count": 41, "id": "steady-ceiling", "metadata": {}, "outputs": [], "source": [ "if(path.exists(\"data/knn_final_grid_result.dat\")):\n", " knn_grid_result = pickle.load(open(\"data/knn_final_grid_result.dat\", \"rb\")) \n", "else: \n", " knn_model = KNeighborsClassifier()\n", " cs07_random_state_ = 1999\n", " \n", " knn_param_grid = [\n", " {'n_neighbors' : np.arange(10,14,2)} ]\n", " \n", " cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1999)\n", " grid_search = GridSearchCV(estimator=knn_model, \n", " param_grid=knn_param_grid, n_jobs=-1, cv=cv, \n", " scoring='f1',error_score=0)\n", " \n", " knn_grid_result = grid_search.fit(XS_train_ohe, y_train.flatten()) \n", " pickle.dump( grid_result, open( \"data/knn_final_grid_result.dat\", \"wb\" )) \n" ] }, { "cell_type": "markdown", "id": "historic-buying", "metadata": {}, "source": [ "### Best Parameters" ] }, { "cell_type": "code", "execution_count": 42, "id": "measured-desktop", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best: 0.694441 using {'n_neighbors': 12}\n" ] } ], "source": [ "# summarize results\n", "print(\"Best: %f using %s\" % (knn_grid_result.best_score_, knn_grid_result.best_params_))" ] }, { "cell_type": "markdown", "id": "combined-footwear", "metadata": {}, "source": [ "**Grid Search Result**" ] }, { "cell_type": "code", "execution_count": 43, "id": "soviet-grass", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_n_neighborsparamssplit0_test_roc_aucsplit1_test_roc_aucsplit2_test_roc_aucsplit3_test_roc_auc...split23_test_precisionsplit24_test_precisionsplit25_test_precisionsplit26_test_precisionsplit27_test_precisionsplit28_test_precisionsplit29_test_precisionmean_test_precisionstd_test_precisionrank_test_precision
00.3051390.084233296.61283313.08461710{'n_neighbors': 10}0.8725730.8731580.8704470.865345...0.8342680.8287690.8219030.8521970.8344300.8341350.8446180.8355370.0059222
10.3102540.188162286.18508055.57597412{'n_neighbors': 12}0.8792900.8791200.8783070.870748...0.8497880.8371000.8233040.8546980.8373680.8426900.8518200.8419200.0066691
\n", "

2 rows × 171 columns

\n", "
" ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", "0 0.305139 0.084233 296.612833 13.084617 \n", "1 0.310254 0.188162 286.185080 55.575974 \n", "\n", " param_n_neighbors params split0_test_roc_auc \\\n", "0 10 {'n_neighbors': 10} 0.872573 \n", "1 12 {'n_neighbors': 12} 0.879290 \n", "\n", " split1_test_roc_auc split2_test_roc_auc split3_test_roc_auc ... \\\n", "0 0.873158 0.870447 0.865345 ... \n", "1 0.879120 0.878307 0.870748 ... \n", "\n", " split23_test_precision split24_test_precision split25_test_precision \\\n", "0 0.834268 0.828769 0.821903 \n", "1 0.849788 0.837100 0.823304 \n", "\n", " split26_test_precision split27_test_precision split28_test_precision \\\n", "0 0.852197 0.834430 0.834135 \n", "1 0.854698 0.837368 0.842690 \n", "\n", " split29_test_precision mean_test_precision std_test_precision \\\n", "0 0.844618 0.835537 0.005922 \n", "1 0.851820 0.841920 0.006669 \n", "\n", " rank_test_precision \n", "0 2 \n", "1 1 \n", "\n", "[2 rows x 171 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn_grid = pd.DataFrame.from_dict(knn_grid_result.cv_results_)\n", "knn_grid" ] }, { "cell_type": "code", "execution_count": 44, "id": "increasing-valuation", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "knn_neighbhor = {}\n", "\n", "for column in knn_grid.columns:\n", " knn_neighbhor[column] = knn_grid[column].to_numpy()\n", "\n", "\n", "scoring = {'accuracy': make_scorer(accuracy_score),'recall': make_scorer(recall_score),\n", " 'precision': make_scorer(precision_score),'f1': make_scorer(f1_score), \n", " 'roc_auc' : 'roc_auc'\n", " }\n", "n_neighbhor = np.array(knn_neighbhor['param_n_neighbors'], dtype=float)\n", "\n", "viz_hyperparameter(knn_neighbhor,n_neighbhor,xlabel=\"No. of Neighbhors\",scoring=scoring,\n", " xlim=[10,15],ylim=[0.5,.95],plot_title=\" : KNN \")" ] }, { "cell_type": "markdown", "id": "compatible-defeat", "metadata": {}, "source": [ "Grid search found n_neighbor=12 with better recall and selected for the predictions." ] }, { "cell_type": "markdown", "id": "precise-consequence", "metadata": {}, "source": [ "### Build KNN classifier\n" ] }, { "cell_type": "code", "execution_count": 45, "id": "deadly-space", "metadata": {}, "outputs": [], "source": [ "knn = KNeighborsClassifier(n_neighbors=12)\n", "model_knn = knn.fit(XS_train_ohe, y_train)\n", "y_hat = np.zeros(y_train.shape)\n", "y_hat = model_knn.predict(XS_test_ohe)\n", "cm = confusion_matrix(y_target=y_test, y_predicted=y_hat)\n", "acc= accuracy_score(y_test, y_hat)\n", "recall= recall_score(y_test, y_hat)\n", "precision= precision_score(y_test, y_hat)\n", "f1= f1_score(y_test, y_hat)" ] }, { "cell_type": "markdown", "id": "possible-major", "metadata": {}, "source": [ "### Model Performance" ] }, { "cell_type": "code", "execution_count": 46, "id": "eligible-halloween", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7985100539789766\n", "Precision : 0.8527149069015498\n", "Recall : 0.6017309205350118\n", "F1-Score : 0.7055675999815488\n" ] } ], "source": [ "from mlxtend.plotting import plot_confusion_matrix\n", "fig, ax = plot_confusion_matrix(conf_mat=cm)\n", "plt.title('Confusion Matrix')\n", "plt.show()\n", "print(\"Accuracy: \", acc)\n", "print(\"Precision : \", precision)\n", "print(\"Recall : \", recall)\n", "print(\"F1-Score : \", f1)\n", "\n" ] }, { "cell_type": "markdown", "id": "rough-depression", "metadata": {}, "source": [ "### Build Cost Matrix" ] }, { "cell_type": "code", "execution_count": 47, "id": "sudden-warren", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQMAAAEhCAYAAAB7tcX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAATAUlEQVR4nO3deZhcVZnH8e9LAiIkrFmQJBAgIUxwADVxGRHZNzFkFDFBWQyLo4g4IJpxAXFQyYAOKi7AqGyy6kAQCQFxREU0tGhYwhJMwKQDWUBCAIXQvPNHnYQihO4KpOp2h+/neepJ3XNv3fMWTf3q3FO36kZmIklrVV2ApO7BMJAEGAaSCsNAEmAYSCoMA0mAYaBuLiKmRMThVdfxWmAY9GARcUhEtEXEkxHxcHnh7Pwq9/lgROzZyfpdIyIj4qoV2ncs7b9qsJ8vRcTFXW2Xmftl5gWN7FOvjmHQQ0XECcBZwFeBgcAWwHeBA1vQ/ULgHRGxaV3b4cD9q6uDqPH/z1bKTG897AZsCDwJfKCTbV5HLSzmldtZwOvKun7AtcDjwGPAb6i9MVwEPA/8vez/MyvZ767AXOD7wLGlrRfQDpwM/Kpu228Cc4AngD8C7yrt+wLPAktLP9NL+6+ArwC3lBqGlbajyvrvAT+t2/8k4CYgqv6brAk3k7dnegewLnBVJ9t8Hng7sBOwI/BW4Atl3YnUXtD9qY0qPgdkZh4K/BV4b2b2ycz/6mT/FwKHlfv7AHdRC516t5X+NwEuAa6MiHUz83pqI5rLSz871j3mUOAYoC/w0Ar7OxH454g4IiLeBRwJHJ4lGfTqGAY906bAosx8rpNtPgR8OTMXZOZC4FRqLzSovSO/AdgyM5dm5m9W9QWVmb8DNomIEdRC4cKVbHNxZj6amc9l5tepjVZGdLHr8zPz7vKYpSvs7+nyHL4BXAwcl5lzV6VuvTzDoGd6FOgXEb072WZzXvzO+lBpAzgDeAC4ISJmRcTEV1jHRcAngN1YySglIj4dEfdExOKIeJza4U2/LvY5p7OVmfkHYBYQwBWvqGqtlGHQM90KPAOM7WSbecCWdctblDYyc0lmnpiZWwNjgBMiYo+y3aqMEC4CPg5cV961lyvD+M8ABwMbZ+ZGwGJqL+LO+um0/4g4ltoIY17Zv1aTzt5Z1E1l5uKIOBn4TkQ8B9xAbei/J7BbZn4GuBT4QkTcRu0FdjK1oTURcQBwL/AXai/QDmoThwDzga0brGN2RLyb2jv1ivoCz1H75KF3GX1sULd+PrBXRKyVmc+v5PEvERHbAqdRm8R8GpgWEVMy88+NPF6dc2TQQ5Vj8BOoTQoupDa8/gRwddnkNKANuAO4E7i9tAEMB35BbSb/VuC7mfl/Zd3XqIXI4xHx6Qbq+G1mrjhxCDAVuJ7ax40PAf/gxYcAV5Z/H42I27vqpxwSXQxMyszpmTmT2sTnRRHxuq4er66FE7GSwJGBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqehWv3TUr99GOXTo5l1vqG6koR8pUjfx4IOPsGjR47Gydd0qDIYO3Zy2touqLkOrouOZqivQKhj1tgkvu87DBEmAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMgya6/vrfMWLE+xg2bCynn35+1eVoJSYc9VUGvOE9vHHHDy9ve+yxJ9hrn+MZvt0H2Wuf4/nb356osMLWaWoYRMS+EXFfRDwQEROb2Vd309HRwbHHTmLKlG8xY8aVXHrpVGbMmFV1WVrBEYftz/U//8aL2k6fdBF77D6Kmfdezh67j+L0SRdXVF1rNS0MIqIX8B1gP2AkMD4iRjarv+5m2rS7GTZsCFtvPZh11lmbceP2ZvLkm6suSyvYZZed2GSTDV7UNvlnv+Hww/YD4PDD9uPqa35dRWkt18yRwVuBBzJzVmY+C1wGHNjE/rqV9vYFDBkycPny4MEDaG9fUGFFatT8+X/jDW/oB8Bmm23K/Pl/q7ii1mhmGAwC5tQtzy1tUo8REURE1WW0ROUTiBFxTES0RUTbwoVrTgIPGjSAOXPmL1+eO3cBgwYNqLAiNWrgwI15+OFFADz88CIGDNio4opao5lh0A4MqVseXNpeJDPPzcxRmTmqf/+Nm1hOa40ePZKZM+cwe3Y7zz67lMsuu4ExY3apuiw1YMwBO3PBhVMAuODCKRz43ndVXFFr9G7ivm8DhkfEVtRCYBxwSBP761Z69+7N2WefxD77HEdHRwcTJoxh++23qbosrWD8h07hVzf/iUWLHmfwlmM59ZQjmfjZQzl43Bf5wY+uZcstNuOKy/6z6jJbIjKzeTuP2B84C+gF/DAzv9LZ9qNGjcy2touaVo+aoOOZqivQKhj1tgm0td270kmQZo4MyMzrgOua2Yek1aPyCURJ3YNhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkoJMLr0bEEmDZJZqXXbU1y/3MzA2aXJukFnrZMMjMvq0sRFK1GjpMiIidI+Ij5X6/iNiquWVJarUuwyAiTgE+C/xHaVoHuLiZRUlqvUZGBv8KjAGeAsjMeYCHENIappEweDYzkzKZGBHrN7ckSVVoJAyuiIhzgI0i4mjgF8B5zS1LUqu97KcJy2TmmRGxF/AEsC1wcmbe2PTKJLVUl2FQ3Am8ntqhwp3NK0dSVRr5NOEoYBrwPuAg4PcRMaHZhUlqrUZGBicBb8rMRwEiYlPgd8APm1mYpNZqZALxUWBJ3fKS0iZpDdLZdxNOKHcfAP4QEZOpzRkcCNzRgtoktVBnhwnLTiz6S7ktM7l55UiqSmdfVDq1lYVIqlaXE4gR0R/4DLA9sO6y9szcvYl1SWqxRiYQfwzcC2wFnAo8CNzWxJokVaCRMNg0M38ALM3MmzNzAuCoQFrDNHKewdLy78MR8R5gHrBJ80qSVIVGwuC0iNgQOBH4NrAB8O9NrUpSyzXyRaVry93FwG7NLUdSVTo76ejbvPCDqC+RmZ9c3cXM++MsTo1DVvdu1USn5CVVl6BV8vLThJ2NDNpWfyGSuqvOTjq6oJWFSKqWF1GRBBgGkgrDQBLQ2C8dbRsRN0XEXWV5h4j4QvNLk9RKjYwMzqN2AZWlAJl5BzCumUVJar1GwmC9zJy2QttzzShGUnUaCYNFEbENL1xE5SDg4aZWJanlGvluwrHAucB2EdEOzAY+3NSqJLVcI99NmAXsWS6rtlZmLunqMZJ6nkZ+6ejkFZYByMwvN6kmSRVo5DDhqbr76wIHAPc0pxxJVWnkMOHr9csRcSYwtWkVSarEKzkDcT1g8OouRFK1GpkzuJMXftegF9AfcL5AWsM0MmdwQN3954D5melJR9IaptMwiIhewNTM3K5F9UiqSKdzBpnZAdwXEVu0qB5JFWnkMGFj4O6ImEbdx4yZOaZpVUlquUbC4ItNr0JS5RoJg/0z87P1DRExCbi5OSVJqkIj5xnstZK2/VZ3IZKq1dl1Ez4GfBzYOiLuqFvVF7il2YVJaq3ODhMuAaYAXwMm1rUvyczHmlqVpJbr7LoJi6ldUm1868qRVBV/HVkSYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQ0dq1FdeH42TfxzJKnyI7nef65Ds4b/X7W3XhDDrr8v9lo6CAef7Cdnxz8Kf7x+BMA7PvNzzN8/3ez9Ol/cPURE3nkTzMqfgZa5vrrf8fxx59JR8fzHHXUWCZOPKLqklqmaSODiPhhRCyIiLua1Ud3csFuh3POm8Zy3uj3A7DzxGOYfdOtnL3tPsy+6VZ2nngMAMP224VNhg/l28P35mfHfJH3fO9LFVateh0dHRx77CSmTPkWM2ZcyaWXTmXGjFlVl9UyzTxMOB/Yt4n779ZGHLgH0y+4GoDpF1zNiLF7ArDdgXtwx4W19vY/TGfdjTagz2b9K6tTL5g27W6GDRvC1lsPZp111mbcuL2ZPPm1c7HxpoVBZv4aeE1ckzETDr3hBxzd9lPefPTBAPQZuClPPrIQgCcfWUifgZsC0HfQQBbPeWT5Y5+Y+wh9Bw1sfdF6ifb2BQwZ8sLfYvDgAbS3L6iwotaqfM4gIo4BjgHYsPpyXpEf7TyeJfMWsF7/TTj0xh+x6N6XDi0zs4LKpMZV/mlCZp6bmaMyc9R69Kq6nFdkybzau8fTCx/j3qtuZNBbd+DJ+Y8uH/732aw/Ty2oDZKWtM9nwyGbLX/sBoM3Y0n7/NYXrZcYNGgAc+a88LeYO3cBgwYNqLCi1qo8DHq6tdd7Pev0WX/5/W32ficL7prJ/df8kh0PHwvAjoeP5b7JNwFw3zW/ZIfDau2D3rYjzyxesvxwQtUaPXokM2fOYfbsdp59dimXXXYDY8bsUnVZLdMzx+XdyPoDN+WDV30HgLV69+KuS67lL1N/w7zb7uSgK87iTUcexOKH5nHlwZ8CYOZ1NzN8/3dz3AM3svTpvzP5I5+rsnzV6d27N2effRL77HMcHR0dTJgwhu2336bqslommnUsGxGXArsC/YD5wCmZ+YPOHrN5rJsfZcum1KPmOCUvqboErYJRow6lrW1GrGxd00YGmTm+WfuWtPo5ZyAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJgMjMqmtYLiIWAg9VXUcT9AMWVV2EVsma+jfbMjP7r2xFtwqDNVVEtGXmqKrrUONei38zDxMkAYaBpMIwaI1zqy5Aq+w19zdzzkAS4MhAUmEYSAIMg6aKiH0j4r6IeCAiJlZdj7oWET+MiAURcVfVtbSaYdAkEdEL+A6wHzASGB8RI6utSg04H9i36iKqYBg0z1uBBzJzVmY+C1wGHFhxTepCZv4aeKzqOqpgGDTPIGBO3fLc0iZ1S4aBJMAwaKZ2YEjd8uDSJnVLhkHz3AYMj4itImIdYBxwTcU1SS/LMGiSzHwO+AQwFbgHuCIz7662KnUlIi4FbgVGRMTciDiy6ppaxdORJQGODCQVhoEkwDCQVBgGkgDDQFJhGLxGRcSuEXFtuT+ms29VRsRGEfHxV9DHlyLi0422r7DN+RFx0Cr0NfS1+E3D1ckwWMOUb0uuksy8JjNP72STjYBVDgP1LIZBD1He+e6NiB9HxD0R8ZOIWK+sezAiJkXE7cAHImLviLg1Im6PiCsjok/Zbt+yj9uB99Xt+4iIOLvcHxgRV0XE9HL7F+B0YJuI+HNEnFG2OykibouIOyLi1Lp9fT4i7o+I3wIjGnheR5f9TI+Iny57TsWeEdFW9ndA2b5XRJxR1/dHX+1/W9UYBj3LCOC7mflPwBO8+N360cx8M/AL4AvAnmW5DTghItYFzgPeC7wF2Oxl+vgWcHNm7gi8GbgbmAj8JTN3ysyTImJvYDi1r2nvBLwlInaJiLdQO+16J2B/YHQDz+l/M3N06e8eoP6Mv6Glj/cA3y/P4UhgcWaOLvs/OiK2aqAfdaF31QVolczJzFvK/YuBTwJnluXLy79vp/ZjKrdEBMA61E6v3Q6YnZkzASLiYuCYlfSxO3AYQGZ2AIsjYuMVttm73P5UlvtQC4e+wFWZ+XTpo5HvYrwxIk6jdijSh9rp28tckZnPAzMjYlZ5DnsDO9TNJ2xY+r6/gb7UCcOgZ1nx3PH65afKvwHcmJnj6zeMiJ1WYx0BfC0zz1mhj0+9gn2dD4zNzOkRcQSwa926lT3fAI7LzPrQICKGvoK+VcfDhJ5li4h4R7l/CPDblWzze+CdETEMICLWj4htgXuBoRGxTdlu/EoeC3AT8LHy2F4RsSGwhNq7/jJTgQl1cxGDImIA8GtgbES8PiL6Ujsk6Upf4OGIWBv40ArrPhARa5WatwbuK31/rGxPRGwbEes30I+6YBj0LPcBx0bEPcDGwPdW3CAzFwJHAJdGxB2UQ4TM/Ae1w4KflwnEBS/Tx/HAbhFxJ/BHYGRmPkrtsOOuiDgjM28ALgFuLdv9BOibmbdTO1yZDkyh9jXurnwR+ANwC7XAqvdXYFrZ17+V5/A/wAzg9vJR4jk4wl0t/NZiD1GGwddm5hsrLkVrKEcGkgBHBpIKRwaSAMNAUmEYSAIMA0mFYSAJgP8HEwxC7USz91YAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_matrix = np.array([[0, 10],[500, 0]])\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_matrix,cmap=\"YlOrRd\")\n", "plt.title('Cost Matrix')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 48, "id": "serious-smile", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_calc = np.multiply(cm, cost_matrix)\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_calc,cmap=\"OrRd\")\n", "plt.title('Cost Calculation')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "complex-andrews", "metadata": {}, "source": [ "### Precision-Recall Curve" ] }, { "cell_type": "code", "execution_count": 49, "id": "refined-concord", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "KNN: f1=0.706 auc=0.849\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXxU5b3H8c8vC1kgkIQkqGRjE0RFNBFZXEDbil1cqrWuuHtr8ap16bXe21672N5ardVKF6573bVq9UpdqqBWQQkCyiIQgUBQIULCFgJZfvePmdCIkQySMzPJfN+v17wyZ5mZ3yEh35zzPOd5zN0REZHElRTrAkREJLYUBCIiCU5BICKS4BQEIiIJTkEgIpLgUmJdwJ7Ky8vz0tLSWJchItKlzJkz51N3z29vW5cLgtLSUioqKmJdhohIl2JmVV+0TZeGREQSnIJARCTBKQhERBJcl2sjEBH5shobG6murqahoSHWpQQmPT2dwsJCUlNTI36NgkBEEkZ1dTVZWVmUlpZiZrEup9O5O+vXr6e6upoBAwZE/LrALg2Z2T1mts7MFnzBdjOzO8ys0szeM7PDgqpFRASgoaGBvn37dssQADAz+vbtu8dnPEG2EdwHTNzN9hOAIeHHpcAfA6yFOVW1TJleyZyq2iA/RkTiXHcNgVZf5vgCuzTk7q+bWeludjkJeMBD42DPMrNsM9vX3T/u7FrmVNVy5tRZNDa3kJxkXDZ+ECMKs+mTkUrvjBR6p6fSJyOVzB7JmBlzqmqZtXw9owf2pawkp7PLERGJK7FsI+gPrG6zXB1e97kgMLNLCZ01UFxcvMcfNGv5ehqbW3CgqcX5/auV7e6XnGRkpiaxZXszHl6+fMJgTj2skKLcjG7/l4SIBK9Xr15s2bIFgGnTpnHVVVfx8ssvc++993LzzTezcuVKCgoKPrevmXH11Vdz6623AnDLLbewZcsWbrzxxr2uqUs0Frv7VGAqQHl5+R7PpDN6YF/SUpNobGohJTmJ355+CMW5PdnU0MjGbY1s2hb+2tDIm5Xrmbe6DoDmFuf2V5Zx+yvL6NuzB4cWZ3NocQ6HFmdzSGE2PdO6xD+fiMShV155hSuuuIIXX3yRkpISAPLy8rj11lv59a9//bn909LSeOqpp/jRj35EXl5ep9YSy99ka4CiNsuF4XWdrqwkh4cuHh3R5Z5jh9Vy9l2zaGxqITUliZtOPpiGpmbmrqrj3VW1/GPxOgCSDIbu0zsUDkXZHFaSw4C+PUlK0lmDSHcSxKXi119/nUsuuYRp06YxaNCgnesvvPBC7rvvPv7jP/6D3Nzcz7wmJSWFSy+9lNtuu42bbrqpU+rY+d6d+m575lngcjN7FDgC2BhE+0CrspKciL6JXxQaZx8RSuy6+h3MW13Hu6vqmLuqlufmf8TDb68CoE9GKiOLsjms9ayhKJvKdVvU3iASh3763EIWfbRpt/tsbmjkg0820+KhP/6G7ZNFVvoX988fvl9v/vtbB+72Pbdv387JJ5/MjBkzGDZs2Ge29erViwsvvJDbb7+dn/70p5977eTJkxkxYgQ//OEPd/sZeyqwIDCzR4DxQJ6ZVQP/DaQCuPufgGnA14FKoB64IKha9tTuQiM7swfjhxYwfmjoGl5Li/NhzRbmrqpj7upa3q2q43evLKV1KmgDHEhJMq76yhDGDy2gpG/mbn+YRCQ+bGpooiX8f7nFQ8t7+383NTWVsWPHcvfdd3P77bd/bvsVV1zByJEjufbaaz+3rXfv3kyaNIk77riDjIyMvaqjrSB7DZ3ZwXYHJgf1+dGSlGQM6ZfFkH5ZnH546ErX5oZG3qveyJ9mfMgblZ8CoUbqW15ayi0vLQWgb88eFPfNpLRvT4pzMynNy6Q4tyelfTPJ7dlDDdMiAevoL3cIXRZqe6n49jMO3esz+6SkJB5//HGOO+44fvnLX3LDDTd8Znt2djZnnXUWU6ZMaff1V111FYcddhgXXNB5fzurtTMAWempjBucR3pqMrOrNoR+iJKT+MUpB9GzRwor19ezasNWqtbX886KDTwzb83OMwiAXmkpnwuH1tDYp3e62iFEomRP2hf3RGZmJs8//zxHHXUU/fr146KLLvrM9quvvprDDz+cpqamz702NzeX008/nbvvvpsLL7ywU+pREAQo0h+i7U3NrN6wjVUbtrLy03pWbainav1WPvh4My8vWktj879SokdKEsW5mZTk/iscWr/2z87g/TUb1SYh0okibV/cU7m5ubzwwgscffTR5Od/dr6YvLw8TjnlFG677bZ2X3vNNddw5513dlot5r7HvTFjqry83BNpYprmFuejum2s2lDPyvVbWbU+9LVqfSgw6nc079y39TyhtU3iv75xAN8pL1I3V5GwxYsXc8ABB8S6jMC1d5xmNsfdy9vbX78h4lxyklGUm0lRbibjBn+277C7U7Nlezgc6vnrnNXMXL4BCLVJ3PjcIn7+/GIO2DeL8pJcyktzKC/JZZ8+6bE4FBGJUwqCLszMKMhKpyArnfLSXAbk9dzZsJWSnMR1xw9l47ZGKlbW8tjs1dz31koA+mdncHhpDmWluZSX5LB/vyyS1e4gkrAUBN3I7tokGptbWPTRJiqqaplTtYG3PlzPM/M+AiArLYXDSnIoL8mhrDSHkUXZZPbQj4Z0T+7erXvlfZnL/WojSFDuzuoN26io2hAKh5W1LF23GfdQ+8KB+/WmbOflpBwKeutyknR9K1asICsrq9sORd06H8HmzZs/Nx/B7toIFASy08b6Rt5dVRsKh5W1zK+uo6GxBYDi3MydZwzlJbkMKeilbqzS5STyDGUKAvlSdjS1sPCjjcypqqViZS0VVbV8umU7AL3TUygryaG8NJeyktDlpPTU5BhXLCJfREEgncLdqVpfv7OdoWJlLcvWhYbITU02DtyvD+UlOZSX5lBWkkt+VlqMKxaRVgoCCUxd/Y7QGUO4nWFedR07mkKXk0r7Zu5sZ8hMTWZ1bT1jBuXpRjeRGFAQSNRsb2pmwZpNO88Y5lTVsn7rjp3bk834wVeHMGlsKb018J5I1CgIJGbcnZueX8zd/1xB25+0lCSjvDSHCUMLOHZYAYMLenXLXhwi8UJ3FkvMmBknHLwvD75dtXMEx//6xnA+qtvG9CU1/OrvH/Crv39A/+wMJgzLZ8LQAsYOyiOjhxqeRaJFZwQSFV80y9PHG7cxY0kN0z9Yxz8rP6V+RzM9UpIYM7AvE4bmM2FYASV9e8awcpHuQZeGpEvY3tRMxcpaXv1gHdOXrGN5zVYABub1ZMKwAiYMLeDwATmkpehsQWRPKQikS6pav5UZS2p49YN1zFy+nh1NLWT2SGbc4DwmDC1gwrB89u3TebM0iXRnCgLp8rbtaGbm8k+Z/kEoGNbUbQNCc8i2ni0cVpxNSnJSjCsViU8KAulW3J3KdVuYvmQd0z+oYfbKDTS1OL3TUzh6/1CD8zFD88nrpRvaRFopCKRb29zQyD+XfRoKhiU11GzejhmM6N9n59nCwf37aGwkSWgKAkkYLS3Ooo83MT3c4Dx3dR3u0LdnD44Zms+xwwo4akg+fTJ0M5skFgWBJKwNW3fwxrJQu8JrS2uoq28kOckoK84JnS0My2dovyzdzCbdnoJAhND8z/NW1zFjSehsYcGaTQDs2yed8UMLmDA0n3GD8/jgk83t3vMg0pUpCETasXZTA68tqWH6knW8sexTtmxvIiXJaHHHHdJSknjoktEKA+kWdhcEgfa1M7OJZrbEzCrN7Pp2tpeY2Stm9p6ZzTCzwiDrEWmrX+90Tj+8iD+eU8a7P/4qj1wymkOLs2lxcKChqYVfv/ABqzfUx7pUkUAFFgRmlgxMAU4AhgNnmtnwXXa7BXjA3UcAPwN+FVQ9IrvTIyWJMYP6cv0JB5CemkSSQZLB7BUbOOY307n4/gpeX1pDS0vXOoMWiUSQg86NAirdfTmAmT0KnAQsarPPcODq8PPpwDMB1iPSobKSHB66ePTONoJ9+6Tz8NureHT2Kv6xeC0D8npy7ugSTi0rVM8j6TYCayMws9OAie5+cXj5XOAId7+8zT4PA2+7++1m9m3gr0Ceu6/f5b0uBS4FKC4uLquqqgqkZpEvsr2pmb+//wkPzFzJu6vqyEhN5uRD+zNpTAkH7Ns71uWJdCieh6G+FrjTzM4HXgfWAM277uTuU4GpEGosjmaBIgBpKaFf/Ccf2p8FazbywMyVPPVuNY+8s4pRpblMGlvC8QfuQ6qGuJAuKMgzgjHAje5+fHj5RwDu3m47gJn1Aj5w9902GKvXkMSLuvodPF6xmgdnrWLVhnoKstI4c1QxZx1RTL/e6bEuT+QzYtJ91MxSgKXAcYT+0p8NnOXuC9vskwdscPcWM7sJaHb3n+zufRUEEm9aWpzXltbwwMyVzFhaQ7IZxx+0D5NGlzBqQK5uVpO4EJNLQ+7eZGaXAy8CycA97r7QzH4GVLj7s8B44Fdm5oQuDU0Oqh6RoCQlWfgu5QKq1m/lwVlVPF5RzfPvfcywfbI4d0wJJ4/sT8+0WF+JFWmfbigTCcC2Hc08O38ND8ysYuFHm8hKS+HUskLOHVPCoPxesS5PEpDuLBaJEXfn3VV1PDBzJdPe/5jGZueoIXlMGlPKscMKSNaIqBIlCgKROFCzeTuPzV7FQ2+v4uONDfTPzuDs0cV8t7yIvpo7QQKmIBCJI03NLfxj8VoemFnFWx+up0dKEt8csS+TxpQysig71uVJN6UgEIlTy9Zu5i+zqvjrnGq27mhmRGEfJo0p5Zsj9iU9NTnW5Uk3oiAQiXObGxp5em6ocbly3RZyMlM5/fAizjmihKLczFiXJ92AgkCki3B3Zi5fzwNvVfHy4rW0uHPcsAImjSnlyMF5mm5TvrR4HmJCRNowM8YOymPsoDw+3riNh99exSPvrOIfi99hQF5Pzhldwmka8E46mc4IROLc9qZmXljwCQ/MrGJOVa0GvJMvRZeGRLqJBWs28peZVfxt/hoaGls4vDSHo4bkAzBucJ5mU5MvpCAQ6Wbq6nfwREU1d72xnLWbtwOQmmw8csloyktzY1ydxKOYTVUpIsHIzuzBJUcP5NwxJbQ2Hzc2O5c//C7zVtfFtDbpehQEIl3YmEF5pKUmkWyhM4KGphZOnvIm1z0xn5rwmYJIR9RrSKQL23VqzaH7ZPH7V5Zxz5sreGHBJ1z11f2ZNKZEE+bIbqmNQKQb+rBmCz99bhGvL61hSEEvbjzxQMYNzot1WRJDaiMQSTCD8ntx/wWHM/XcMhqamjn7rre57ME5VNfWx7o0iUMKApFuysz42oH78PIPjuGar+7P9CXrOO7W1/jdP5bS0Pi5qcElgSkIRLq59NRk/v24IbxyzXi+Mrwfv/vHMr7y29d4YcEndLVLwxIMBYFIguifncGUsw7j4UuOoGePFL734Bwm3fMOles2x7o0iTEFgUiCGTsoj+evOJIbvzWc+avrmPi7N/jF/y1ic0NjrEuTGFEQiCSglOQkzh83gOnXjue0skLufnMFE255jSfnVNPSostFiUZBIJLA+vZK439OHcHfJo+jMCeDa5+Yz6l/eov3qnV3ciJREIgIIwqzeeqysdzynUNYvWEbJ015k+v/+h7rt+ju5ESgIBARAJKSjNPKCnn12mO4+MgBPDmnmvG3zODeN1fQ1NwS6/IkQAoCEfmM3ump/Oc3hvPCVUcxsiibnz63iG/c8U9mfrg+1qVJQAINAjObaGZLzKzSzK5vZ3uxmU03s7lm9p6ZfT3IekQkcoMLsnjgwlH8+dwytu5o4sz/ncXkh9/lo7ptsS5NOllgYw2ZWTKwFPgqUA3MBs5090Vt9pkKzHX3P5rZcGCau5fu7n011pBI9DU0NvPn15bzhxmVmMHk8YO55OiBpKcmx7o0iVCsxhoaBVS6+3J33wE8Cpy0yz4OtM611wf4KMB6RORLSk9N5sqvDOGVa45hwtACbn15KV+97TVeWqi7k7uDIIOgP7C6zXJ1eF1bNwLnmFk1MA349/beyMwuNbMKM6uoqakJolYRiUBhTiZ/PKeMhy4+gvSUZC79yxzOu3c2H9ZsiXVpshdi3Vh8JnCfuxcCXwf+Ymafq8ndp7p7ubuX5+fnR71IEfmscYPzmHblUfz4m8OZW1XLxN+9zq+mLWbL9qZYlyZfQpBBsAYoarNcGF7X1kXA4wDuPhNIBzRoukgXkJqcxEVHDmD6deM55dD+/Pn15Uy4ZQZPvVuty0VdTJBBMBsYYmYDzKwHcAbw7C77rAKOAzCzAwgFga79iHQheb3SuPm0Q3hm8jj2y87g6sfnc9qfZrJgzcZYlyYRCiwI3L0JuBx4EVgMPO7uC83sZ2Z2Yni3a4BLzGw+8AhwvutPCZEuaWRRNk9fNpabTxtB1fqtfOvOf3LD0++zYeuOWJcmHdBUlSLS6TY1NPK7l5dx/8yV9EpL4Zqv7c9Zo4pJ0dzJMbO77qMKAhEJzLK1m7nxuYW8WbmeYftkcfYRxWxqaGL0wL6UleTEuryEoiAQkZhxd15Y8Ak//tsCPt0SukyUnprEQxePVhhEkSavF5GYMTNOOHhfzh1dgoXXNTS28PpS9QuJFwoCEYmKI4fkk5aatDMM/jZvDWs3NcS0JglREIhIVJSV5PDQxaO59vih/PibB1CzeTunTHmTJZ9ozuRYUxCISNSUleQwecJgLjpyII9/bwxNLc5pf3qLtyo/jXVpCU1BICIxceB+fXh68jj27ZPOefe+w9Nzq2NdUsJSEIhIzPTPzuCJ742lvCSXHzw2nztfXabhKWIgoiAws3Fm9rKZLTWz5Wa2wsyWB12ciHR/fTJSuf/CUXz70P7c8tJSfvTU+zRqasyoSolwv7uBHwBzgObgyhGRRNQjJYlbTz+E/jkZ/P7VSj7e2MCUsw+jV1qkv6Jkb0R6aWiju//d3de5+/rWR6CViUhCMTOu+dpQ/ufbB/PPyk85/U8z1b00SiINgulm9hszG2Nmh7U+Aq1MRBLSGaOKufu8cqrWb+Xbf3iLpWvVvTRoEQ0xYWbT21nt7n5s55e0expiQiQxLFizkQvumx2aL/ncMsYO0lQle2Ovh5hw9wntPKIeAiKSOA7q34envz+WfXqnc9497/DM3F3ntZLOEmmvoT5m9tvWeYPN7FYz6xN0cSKS2ApzMnnysrGUleRw1WPzmDK9Ut1LAxBpG8E9wGbg9PBjE3BvUEWJiLRq7V568sj9+M2LS7jh6fdpUvfSThVp36xB7n5qm+Wfmtm8IAoSEdlVWkoyt313JIU5mdw5Pdy99KzD6KnupZ0i0jOCbWZ2ZOuCmY0DtgVTkojI55kZ1x4/lF+ecjBvLPuU706dyTp1L+0UkQbBZcAUM1tpZlXAncD3gitLRKR9Zx1RzF2Tylles5VT/vAWy9S9dK9F2mtonrsfAowADnb3Q919frCliYi0b8KwAh7/tzHsaG7h2398i5kf6v7WvbHbIDCzc8Jfrzazq4GLgYvbLIuIxERr99J+vdOZdM/b/G2eupd+WR2dEfQMf836goeISMwU5mTy1++N5bDiHK58VN1LvyxNXi8iXd72pmaue+I9np3/EWeOKubnJx1ISrJG2W9rr+8sNrObzay3maWa2StmVtN62UhEJNbSUpL53XdH8v3xg3jknVVc8kAFW7c3xbqsLiPSyPyau28CvgmsBAYD13X0IjObaGZLzKzSzK5vZ/ttZjYv/FhqZnV7UryISKukJOOHE4dx0ykH8drSGnUv3QORBkHrXRvfAJ5w940dvcDMkoEpwAnAcOBMMxvedh93/4G7j3T3kcDvgacirlxEpB1nH1HCXeeV8+E6dS+NVKRB8H9m9gFQBrxiZvlAR1E7Cqh09+XuvgN4FDhpN/ufCTwSYT0iIl/o2GH9eOzfRrO9qYVT//gWs5are+nuRHofwfXAWKDc3RuBrez+lzpAf2B1m+Xq8LrPMbMSYADw6hdsv7R1wLuamppIShaRBDeiMJunvz+W/Kw0Jt39jrqX7kZH9xEcG/76bWA8cFL4+URCwdBZzgCedPd2p8F096nuXu7u5fn5+Z34sSLSnRXlZvLUZeMYWZzNlY/O4w8z1L20PR2N2HQMob/Sv9XONmf31/TXAEVtlgvD69pzBjC5g1pERPZYn8xU/nLRKK594j1ufmEJc1fVMaKwD2MH5VFWkhPr8uJCYPcRmFkKsBQ4jlAAzAbOcveFu+w3DHgBGOARFKP7CETky2hpca5+fD7PhC8Rpacm8dDFoxMmDDrjPoJfmll2m+UcM/vF7l7j7k3A5cCLwGLgcXdfaGY/M7MT2+x6BvBoJCEgIvJlJSUZQ/r1wsLL2xtb1IgcFulg3ie4+w2tC+5ea2ZfB/5rdy9y92nAtF3W/WSX5RsjrEFEZK+MHtiXtNQktje24EC9bjoDIu8+mmxmaa0LZpYBpO1mfxGRuFNWksNDF4/m6q/tzyGFffjz68t5Y5l6IkYaBA8Run/gIjO7CHgZuD+4skREglFWksO/HzuEBy8+gsEFvbjswXdZ/PGmWJcVU5HeR/Br4BfAAeHHz9395iALExEJUlZ6KvdecDg905K58L7ZfLIxcYej2JPh+RYDL7j7tcAbZqZhqEWkS9u3Twb3nH84m7Y1csF9s9mSoG0GkfYaugR4EvhzeFV/4JmgihIRiZYD9+vDH84pY+nazUx+6F0am1tiXVLURXpGMBkYB2wCcPdlQEFQRYmIRNMx++dz08mhUUt/8rcFCXf3caTdR7e7+w6zUA/c8M1iifUvJSLd2hmjilldW8+U6R9SlJvJ98cPjnVJURNpELxmZjcAGWb2VeD7wHPBlSUiEn3XfHUoqzds4+YXltA/O4OTRrY7Tma3E+mlof8AaoD3gX8jdJPYbm8mExHpapKSjN98ZwSjBuRy3RPv8XaC3HncYRCEJ5hZ7O7/6+7fcffTws91aUhEup20lGSmnltGYW4Gl/5lDpXrtsS6pMB1GAThoaGXmFlxFOoREYm57Mwe3H/BKFKTjQvue4eazdtjXVKgIr00lAMsDE9c/2zrI8jCRERiqSg3k7vOO5yazdu5+IEKtu1od7qUbiHSxuIfB1qFiEgcGlmUzR1nHMq/PTiHKx+dyx/PKSM5yTp+YRfT0Qxl6WZ2FfAdYBjwpru/1vqISoUiIjH0tQP34SffHM5Li9byi+cXxbqcQHR0RnA/0Ai8AZwADAeuDLooEZF4csG4AazesI173lxBUU4mFx45INYldaqOgmC4ux8MYGZ3A+8EX5KISPz5z28cwJq6en7+/CL652Rw/IH7xLqkTtNRY3Fj65PwjGMiIgkpOcn43XcP5ZDCbK58dC7zVtfFuqRO01EQHGJmm8KPzcCI1udmltgDeItIwsnokcxd55VTkJXORffNZtX6+liX1Cl2GwTunuzuvcOPLHdPafO8d7SKFBGJF3m90rj3gsNpduf8+96hrn5HrEvaa3syH4GIiACD8nsx9dxyqjds49IH5tDQ2LXvMVAQiIh8CaMG5HLL6YfwzsoNXPfke7S0dN1RdyK9oUxERHZx4iH7UV1bz80vLKEoJ4MfThwW65K+FAWBiMheuOyYQazesI0/zPiQwpxMzjqi6w3LpiAQEdkLZsbPTzqQjzdu48d/W8DWHU3saGph9MC+lJXkxLq8iATaRmBmE81siZlVmtn1X7DP6Wa2yMwWmtnDQdYjIhKElOQk7jzrMIpyMrjp+cXc+tISzr5rFnOqamNdWkQCC4LwPAZT+NfQFGea2fBd9hkC/AgY5+4HAlcFVY+ISJB6paVwwkH7AtDi0NjUwqwuMrFNkGcEo4BKd1/u7juAR4GTdtnnEmCKu9cCuPu6AOsREQnUV4b3IyU8OmlyUhKjB/aNcUWRCTII+gOr2yxXh9e1tT+wv5m9aWazzGxie29kZpeaWYWZVdTU1ARUrojI3ikryeHBi0fRJyOVfr3TOKSwT6xLikis7yNIAYYA44Ezgf81s+xdd3L3qe5e7u7l+fn5US5RRCRyowfm8etTR7C6dhuPzF7d8QviQJBBsAYoarNcGF7XVjXwrLs3uvsKYCmhYBAR6bKOP7Afowfm8tuXlrBxW2PHL4ixIINgNjDEzAaYWQ/gDGDX6S2fIXQ2gJnlEbpUtDzAmkREAmdm/Pibw6nb1sgdryyLdTkdCiwIwsNWXw68CCwGHnf3hWb2MzM7Mbzbi8B6M1sETAeuc/eu0cwuIrIbB+7Xh++WF3H/WytZXrMl1uXslrl3rfExysvLvaKiItZliIh0qGbzdibcMoPRA3O567zDY1qLmc1x9/L2tsW6sVhEpNvKz0rj8mMH84/F63hjWfz2eFQQiIgE6IJxpRTnZvKL/1tMU3NLrMtpl4JARCRAaSnJ3PD1YSxZu5lH47Q7qYJARCRgxx+4D0cMyOW3Ly+Ny+6kCgIRkYC1dietrd/B7+OwO6mCQEQkCg7q34fTy4q4Lw67kyoIRESi5Nrjh5Kemswvp30Q61I+Q0EgIhIl+VlpTJ4wmH8sXss/l30a63J2UhCIiETRBeNKKcrN4Of/tyhuupMqCEREoig9NZkbTjiAJWs381hFfHQnVRCIiETZxIP2YdSAXG59aSmbGmLfnVRBICISZWbGT8LdSe98tTLW5SgIRERi4aD+ffhOWSH3vrmCFZ9ujWktCgIRkRi59vih9EhO4pfTFse0DgWBiEiMFGSlM/nYwby8aC0/fHI+c6pqY1KHgkBEJIYOLcrGgMcrqjn7rlkxCQMFgYhIDL27qm7n8x1NLcxaHv1JGhUEIiIxNHpgX3qkhH4VJ5kxemDfqNegIBARiaGykhwevmQ0+b16sH+/XpSV5ES9BgWBiEiMlZXkcFp5EUvWbonJDWYKAhGROHDssAKaW5w3lkZ/MDoFgYhIHDi0KJs+GalMX7Iu6p+tIBARiQMpyUkcvX8+M5bU0NLiUf1sBYGISJyYMDSfT7dsZ8FHG6P6uYEGgZlNNLMlZlZpZte3s/18M6sxs3nhx8VB1iMiEs+O2T8fM5j+QU1UPzewIDCzZGAKcAIwHDjTzIa3s+tj7j4y/LgrqHpEROJd315pHFKYzTjh34AAAAmXSURBVKtRbicI8oxgFFDp7svdfQfwKHBSgJ8nItLlTRhawHvVdXy6ZXvUPjPIIOgPtJ1+pzq8blenmtl7ZvakmRW190ZmdqmZVZhZRU1NdE+ZRESi6dhhBbjDa0ui97su1o3FzwGl7j4CeBm4v72d3H2qu5e7e3l+fn5UCxQRiaYD9+tNXq+0qHYjDTII1gBt/8IvDK/byd3Xu3vr+c9dQFmA9YiIxL2kJGPC0HxeX1oTtcntgwyC2cAQMxtgZj2AM4Bn2+5gZvu2WTwRiO3sDCIicWDCsAI2NTR9ZmTSIAUWBO7eBFwOvEjoF/zj7r7QzH5mZieGd7vCzBaa2XzgCuD8oOoREekqjhySR0qSRe3ykLlH9w62vVVeXu4VFRWxLkNEJFBnTJ1JXX0jL1x1dKe8n5nNcffy9rbFurFYRETaceywAj74ZDMf1W0L/LMUBCIicWjC0AIAbnx2YeDTVyoIRETi0KZtjRjw0qK1gc9lrCAQEYlDs1ZsoLUFtzHguYwVBCIicWj0wL6kJBkAqclJgc5lrCAQEYlDZSU5XHncEABuOuXgQOcyVhCIiMSpY4aGhtTpmZYS6OcoCERE4lRxbiYA1bX1gX6OgkBEJE71yUglKy2F1RsUBCIiCcnMKMzNZHVtsDeVKQhEROJYUU6GzghERBJZcW4mq2vrCXJcOAWBiEgcK8rNpKGxhZoAp65UEIiIxLGi3AwAVm8Irp1AQSAiEseKcoLvQqogEBGJY4XhIAiywVhBICISxzJ6JJOdmcqLC9cGNgKpgkBEJI7Nqapl47ZG3l+zMbDhqBUEIiJxbNby9bT2HA1qOGoFgYhIHBs9sC/JrcNRpwQzHLWCQEQkjpWV5PCtEfuRbMZDFx0RyHDUCgIRkTg3dJ8smt05YL/egby/gkBEJM5lZ6YCsHFbYyDvryAQEYlzfTK6cBCY2UQzW2JmlWZ2/W72O9XM3MzKg6xHRKQrag2CuvouFgRmlgxMAU4AhgNnmtnwdvbLAq4E3g6qFhGRrqw1CB59Z1WXu49gFFDp7svdfQfwKHBSO/v9HPg10BBgLSIiXVbr8BJ/m/dRIDeVBRkE/YHVbZarw+t2MrPDgCJ3f353b2Rml5pZhZlV1NTUdH6lIiJxbMknmwFwgrmpLGaNxWaWBPwWuKajfd19qruXu3t5fn5+8MWJiMSRo/bPJz01iWQL5qaylE59t89aAxS1WS4Mr2uVBRwEzDAzgH2AZ83sRHevCLAuEZEupawkh4cuHs2s5esZPbBvp99UFmQQzAaGmNkAQgFwBnBW60Z33wjktS6b2QzgWoWAiMjnlZXkBHJXMQR4acjdm4DLgReBxcDj7r7QzH5mZicG9bkiIrJngjwjwN2nAdN2WfeTL9h3fJC1iIhI+3RnsYhIglMQiIgkOAWBiEiCUxCIiCQ489Y50LoIM6sBqr7ky/OATzuxnK5Ax5wYdMyJYW+OucTd270jt8sFwd4wswp3T6gRTnXMiUHHnBiCOmZdGhIRSXAKAhGRBJdoQTA11gXEgI45MeiYE0Mgx5xQbQQiIvJ5iXZGICIiu1AQiIgkuG4ZBGY20cyWmFmlmV3fzvY0M3ssvP1tMyuNfpWdK4JjvtrMFpnZe2b2ipmVxKLOztTRMbfZ71QzczPr8l0NIzlmMzs9/L1eaGYPR7vGzhbBz3axmU03s7nhn++vx6LOzmJm95jZOjNb8AXbzczuCP97vBee6XHvuHu3egDJwIfAQKAHMB8Yvss+3wf+FH5+BvBYrOuOwjFPADLDzy9LhGMO75cFvA7MAspjXXcUvs9DgLlATni5INZ1R+GYpwKXhZ8PB1bGuu69POajgcOABV+w/evA3wEDRgNv7+1ndsczglFApbsvd/cdwKPASbvscxJwf/j5k8BxFp4mrYvq8Jjdfbq714cXZxGaMa4ri+T7DPBz4NdAQzSLC0gkx3wJMMXdawHcfV2Ua+xskRyzA73Dz/sAH0Wxvk7n7q8DG3azy0nAAx4yC8g2s3335jO7YxD0B1a3Wa4Or2t3Hw9NoLMR6NxJQKMrkmNu6yJCf1F0ZR0ec/iUucjdn49mYQGK5Pu8P7C/mb1pZrPMbGLUqgtGJMd8I3COmVUTmv/k36NTWszs6f/3DgU6MY3EHzM7BygHjol1LUEysyTgt8D5MS4l2lIIXR4aT+is73UzO9jd62JaVbDOBO5z91vNbAzwFzM7yN1bYl1YV9EdzwjWAEVtlgvD69rdx8xSCJ1Oro9KdcGI5Jgxs68A/wmc6O7bo1RbUDo65izgIGCGma0kdC312S7eYBzJ97kaeNbdG919BbCUUDB0VZEc80XA4wDuPhNIp8186N1QRP/f90R3DILZwBAzG2BmPQg1Bj+7yz7PAueFn58GvOrhVpguqsNjNrNDgT8TCoGuft0YOjhmd9/o7nnuXurupYTaRU5094rYlNspIvnZfobQ2QBmlkfoUtHyaBbZySI55lXAcQBmdgChIKiJapXR9SwwKdx7aDSw0d0/3ps37HaXhty9ycwuB14k1OPgHndfaGY/Ayrc/VngbkKnj5WEGmXOiF3Fey/CY/4N0At4ItwuvsrdT4xZ0XspwmPuViI85heBr5nZIqAZuM7du+zZboTHfA3wv2b2A0INx+d35T/szOwRQmGeF273+G8gFcDd/0SoHeTrQCVQD1yw15/Zhf+9RESkE3THS0MiIrIHFAQiIglOQSAikuAUBCIiCU5BICKS4BQEIu0ws2Yzm2dmC8zsOTPL7uT3Xxnu54+ZbenM9xbZUwoCkfZtc/eR7n4QoXtNJse6IJGgKAhEOjaT8KBeZjbIzF4wszlm9oaZDQuv72dmT5vZ/PBjbHj9M+F9F5rZpTE8BpEv1O3uLBbpTGaWTGj4grvDq6YC33P3ZWZ2BPAH4FjgDuA1dz8l/Jpe4f0vdPcNZpYBzDazv3blO32le1IQiLQvw8zmEToTWAy8bGa9gLH8a5gOgLTw12OBSQDu3kxoaHOAK8zslPDzIkIDwCkIJK4oCETat83dR5pZJqFxbiYD9wF17j4ykjcws/HAV4Ax7l5vZjMIDYgmElfURiCyG+FZ3a4gNLBZPbDCzL4DO+eOPSS86yuEpgDFzJLNrA+h4c1rwyEwjNBQ2CJxR0Eg0gF3nwu8R2gClLOBi8xsPrCQf02beCUwwczeB+YQmjv3BSDFzBYD/0NoKGyRuKPRR0VEEpzOCEREEpyCQEQkwSkIREQSnIJARCTBKQhERBKcgkBEJMEpCEREEtz/AwHKfi9JBf66AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "y_score = model_knn.predict_proba(XS_test_ohe)\n", "knn_probs = y_score[:, 1]\n", "# predict class values\n", "knn_precision, knn_recall, knn_thresholds = precision_recall_curve(y_test, knn_probs)\n", "knn_f1, knn_auc = f1_score(y_test, y_hat), auc(knn_recall, knn_precision)\n", "# summarize scores\n", "print('KNN: f1=%.3f auc=%.3f' % (knn_f1, knn_auc))\n", "pyplot.plot(knn_recall, knn_precision, marker='.', label='KNN')\n", "# axis labels\n", "pyplot.xlabel('Recall')\n", "pyplot.ylabel('Precision')\n", "# show the legend\n", "pyplot.legend()\n", "# show the plot\n", "pyplot.show()" ] }, { "cell_type": "markdown", "id": "tamil-viking", "metadata": {}, "source": [ "### Cost Curve" ] }, { "cell_type": "code", "execution_count": 50, "id": "fatty-cotton", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.000000 189690 0.000000 1.000000 0.401212\n", "1 0.083333 182010 0.001967 0.893616 0.464124\n", "2 0.166667 193070 0.008261 0.741051 0.552953\n", "3 0.250000 283680 0.027695 0.567663 0.648979\n", "4 0.333333 538740 0.072777 0.401919 0.730137\n", "5 0.416667 1006540 0.150747 0.255891 0.786294\n", "6 0.500000 1671050 0.258694 0.142601 0.810821\n", "7 0.583333 2544210 0.398269 0.069640 0.798510\n", "8 0.666667 3470750 0.545240 0.030313 0.763092\n", "9 0.750000 4360730 0.685838 0.011756 0.717794\n", "10 0.833333 5144660 0.809441 0.003479 0.673159\n", "11 0.916667 5725190 0.900865 0.001002 0.637962\n", "12 1.000000 6123010 0.963493 0.000053 0.613403\n" ] } ], "source": [ "plot_metrics_cost_vs_threshold(y_test, knn_probs,0.05,cost_matrix)" ] }, { "cell_type": "markdown", "id": "quantitative-poker", "metadata": {}, "source": [ "### Feature Importance" ] }, { "cell_type": "code", "execution_count": 51, "id": "found-instrument", "metadata": {}, "outputs": [], "source": [ "#results = permutation_importance(model_knn, XS_train_ohe, y_train, scoring='recall')\n", "# get importance\n", "#importance = results.importances_mean\n", "# summarize feature importance\n", "#for i,v in enumerate(importance):\n", "# print('Feature: %0d, Score: %.5f' % (i,v))\n", "# plot feature importance\n", "#pyplot.bar([x for x in range(len(importance))], importance)\n", "#pyplot.show()" ] }, { "cell_type": "markdown", "id": "lasting-galaxy", "metadata": {}, "source": [ "## Random Forest" ] }, { "cell_type": "markdown", "id": "typical-connection", "metadata": {}, "source": [ "### Random Forest Hyperparameters" ] }, { "cell_type": "code", "execution_count": 52, "id": "threaded-solution", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'bootstrap': True,\n", " 'ccp_alpha': 0.0,\n", " 'class_weight': None,\n", " 'criterion': 'gini',\n", " 'max_depth': None,\n", " 'max_features': 'auto',\n", " 'max_leaf_nodes': None,\n", " 'max_samples': None,\n", " 'min_impurity_decrease': 0.0,\n", " 'min_impurity_split': None,\n", " 'min_samples_leaf': 1,\n", " 'min_samples_split': 2,\n", " 'min_weight_fraction_leaf': 0.0,\n", " 'n_estimators': 100,\n", " 'n_jobs': None,\n", " 'oob_score': False,\n", " 'random_state': 1999,\n", " 'verbose': 0,\n", " 'warm_start': False}" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf_model = RandomForestClassifier(random_state=1999)\n", "rf_model.get_params()\n" ] }, { "cell_type": "markdown", "id": "excessive-butler", "metadata": {}, "source": [ "### Hyperparameter tuning" ] }, { "cell_type": "code", "execution_count": 53, "id": "elder-moment", "metadata": {}, "outputs": [], "source": [ "if(path.exists(\"data/rf_grid_result.dat\")):\n", " rf_grid_result = pickle.load(open(\"data/rf_grid_result.dat\", \"rb\")) \n", "else: \n", " \n", " cs07_random_state_ = 1999 \n", " rf_model = RandomForestClassifier(random_state=cs07_random_state_)\n", "\n", " #\n", " # Run Grid Search\n", " #\n", "\n", " rf_param_grid = [\n", " {'max_features' : np.arange(1,6,1),\n", " 'n_estimators' : np.arange(10,50,10),\n", " 'random_state' : [cs07_random_state_],\n", " 'n_jobs' : [-1]\n", " } ]\n", "\n", " rf_scoring = { 'ROC' : 'roc_auc', \n", " 'Accuracy' : make_scorer(accuracy_score),\n", " 'Recall' : make_scorer(recall_score),\n", " 'Precision' : make_scorer(precision_score),\n", " 'F1' : make_scorer(f1_score)\n", " }\n", "\n", " cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1999)\n", "\n", " print('Running grid search : ')\n", "\n", " grid_search = GridSearchCV(estimator=rf_model, \n", " param_grid=rf_param_grid, n_jobs=-1, cv=cv, \n", " scoring=['roc_auc','accuracy','f1','recall','precision'],refit='f1')\n", " print('Fit grid search : ')\n", " grid_result = grid_search.fit(XS_train_ohe, y_train.flatten())\n", "\n", " pickle.dump( grid_result, open( \"data/rf_grid_result.dat\", \"wb\" )) \n", "\n" ] }, { "cell_type": "markdown", "id": "military-catering", "metadata": {}, "source": [ "### Best Parameters \n", "\n", "This is the best model returned by Grid Search hyperparameter tuning." ] }, { "cell_type": "code", "execution_count": 54, "id": "standard-faith", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best: 0.870298 using {'max_features': 5, 'n_estimators': 40, 'n_jobs': -1, 'random_state': 1999}\n" ] } ], "source": [ "# summarize results\n", "print(\"Best: %f using %s\" % (rf_grid_result.best_score_, rf_grid_result.best_params_))" ] }, { "cell_type": "code", "execution_count": 55, "id": "representative-officer", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_max_featuresparam_n_estimatorsparam_n_jobsparam_random_stateparamssplit0_test_roc_auc...split23_test_precisionsplit24_test_precisionsplit25_test_precisionsplit26_test_precisionsplit27_test_precisionsplit28_test_precisionsplit29_test_precisionmean_test_precisionstd_test_precisionrank_test_precision
04.6784970.2692830.2470490.010967110-11999{'max_features': 1, 'n_estimators': 10, 'n_job...0.809679...0.7586320.7593480.7492560.7695340.7609960.7549950.7766300.7642070.01212920
19.5285330.4692640.2591230.021618120-11999{'max_features': 1, 'n_estimators': 20, 'n_job...0.850978...0.8182660.8075160.8011440.8304250.8092050.8089780.8333850.8173800.00959319
214.9780170.6586470.2826510.034041130-11999{'max_features': 1, 'n_estimators': 30, 'n_job...0.876086...0.8388970.8342490.8402440.8529680.8436930.8431200.8543780.8472700.00856517
319.5089080.7376290.3671020.081046140-11999{'max_features': 1, 'n_estimators': 40, 'n_job...0.886774...0.8579150.8574030.8573150.8667070.8536220.8485030.8751880.8623060.00794816
47.0735990.8400570.2527760.011082210-11999{'max_features': 2, 'n_estimators': 10, 'n_job...0.866640...0.8257720.8277790.8341150.8423500.8291140.8433280.8414700.8354550.00691718
\n", "

5 rows × 174 columns

\n", "
" ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", "0 4.678497 0.269283 0.247049 0.010967 \n", "1 9.528533 0.469264 0.259123 0.021618 \n", "2 14.978017 0.658647 0.282651 0.034041 \n", "3 19.508908 0.737629 0.367102 0.081046 \n", "4 7.073599 0.840057 0.252776 0.011082 \n", "\n", " param_max_features param_n_estimators param_n_jobs param_random_state \\\n", "0 1 10 -1 1999 \n", "1 1 20 -1 1999 \n", "2 1 30 -1 1999 \n", "3 1 40 -1 1999 \n", "4 2 10 -1 1999 \n", "\n", " params split0_test_roc_auc \\\n", "0 {'max_features': 1, 'n_estimators': 10, 'n_job... 0.809679 \n", "1 {'max_features': 1, 'n_estimators': 20, 'n_job... 0.850978 \n", "2 {'max_features': 1, 'n_estimators': 30, 'n_job... 0.876086 \n", "3 {'max_features': 1, 'n_estimators': 40, 'n_job... 0.886774 \n", "4 {'max_features': 2, 'n_estimators': 10, 'n_job... 0.866640 \n", "\n", " ... split23_test_precision split24_test_precision \\\n", "0 ... 0.758632 0.759348 \n", "1 ... 0.818266 0.807516 \n", "2 ... 0.838897 0.834249 \n", "3 ... 0.857915 0.857403 \n", "4 ... 0.825772 0.827779 \n", "\n", " split25_test_precision split26_test_precision split27_test_precision \\\n", "0 0.749256 0.769534 0.760996 \n", "1 0.801144 0.830425 0.809205 \n", "2 0.840244 0.852968 0.843693 \n", "3 0.857315 0.866707 0.853622 \n", "4 0.834115 0.842350 0.829114 \n", "\n", " split28_test_precision split29_test_precision mean_test_precision \\\n", "0 0.754995 0.776630 0.764207 \n", "1 0.808978 0.833385 0.817380 \n", "2 0.843120 0.854378 0.847270 \n", "3 0.848503 0.875188 0.862306 \n", "4 0.843328 0.841470 0.835455 \n", "\n", " std_test_precision rank_test_precision \n", "0 0.012129 20 \n", "1 0.009593 19 \n", "2 0.008565 17 \n", "3 0.007948 16 \n", "4 0.006917 18 \n", "\n", "[5 rows x 174 columns]" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf_grid = pd.DataFrame.from_dict(rf_grid_result.cv_results_)\n", "rf_grid.head()" ] }, { "cell_type": "code", "execution_count": 56, "id": "adaptive-abuse", "metadata": {}, "outputs": [], "source": [ "grid_results = rf_grid.loc[:,['param_max_features','param_n_estimators','mean_test_recall']]\n", "grid_contour = grid_results.groupby(['param_max_features','param_n_estimators']).mean()\n", "grid_contour\n", "\n", "grid_reset = grid_contour.reset_index()\n", "grid_reset.columns = ['param_max_features', 'param_n_estimators', 'mean_test_recall']\n", "grid_pivot = grid_reset.pivot('param_max_features', 'param_n_estimators')\n", "grid_pivot\n", "x = grid_pivot.columns.levels[1].values\n", "y = grid_pivot.index.values\n", "z = grid_pivot.values" ] }, { "cell_type": "markdown", "id": "compliant-modeling", "metadata": {}, "source": [ "### 3D Plot (estimators X max_features X Recall Score)\n", "\n", "This is 3D plot after tunining following hyperparameters\n", "\n", "- Number of estimators\n", "- Max features\n", "- Mean Recall score\n" ] }, { "cell_type": "code", "execution_count": 57, "id": "sporting-aviation", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "type": "surface", "x": [ 10, 20, 30, 40 ], "y": [ 1, 2, 3, 4, 5 ], "z": [ [ 0.4664740859043833, 0.5225565203254097, 0.5507255044880733, 0.5693342597848184 ], [ 0.5951626163307765, 0.6625099005602203, 0.6926394845821621, 0.708041892726556 ], [ 0.6738008177807276, 0.7339419027309525, 0.7600127510908739, 0.771349741936619 ], [ 0.7208276468477975, 0.7755723737595052, 0.7944172320388966, 0.8045085710435206 ], [ 0.751547532827603, 0.7986400341274265, 0.8166456762665016, 0.8256878486930744 ] ] } ], "layout": { "autosize": false, "height": 500, "margin": { "b": 65, "l": 65, "r": 50, "t": 90 }, "scene": { "xaxis": { "title": { "text": "param_n_estimators" } }, "yaxis": { "title": { "text": "param_max_features" } }, "zaxis": { "title": { "text": "mean_test_recall" } } }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Hyperparameter tuning" }, "width": 600, "xaxis": { "title": { "text": "n_estimators" } }, "yaxis": { "title": { "text": "max_features" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import plotly.graph_objects as go\n", "# X and Y axes labels\n", "layout = go.Layout(\n", " xaxis=go.layout.XAxis(\n", " title=go.layout.xaxis.Title(\n", " text='n_estimators')\n", " ),\n", " yaxis=go.layout.YAxis(\n", " title=go.layout.yaxis.Title(\n", " text='max_features') \n", " ) )\n", "\n", "fig = go.Figure(data= [go.Surface(z=z, y=y, x=x)], layout=layout )\n", "fig.update_layout(title='Hyperparameter tuning',\n", " scene = dict(\n", " xaxis_title='param_n_estimators',\n", " yaxis_title='param_max_features',\n", " zaxis_title='mean_test_recall'),\n", " autosize=False,\n", " width=600, height=500,\n", " margin=dict(l=65, r=50, b=65, t=90))\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "practical-mercy", "metadata": {}, "source": [ "### Build Random Forest Model" ] }, { "cell_type": "code", "execution_count": 58, "id": "through-surrey", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(max_features=5, n_estimators=40, n_jobs=-1,\n", " random_state=1999)" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf_model = RandomForestClassifier(max_features=5, \n", " n_estimators=40,\n", " n_jobs=-1,\n", " random_state=1999)\n", "rf_model.fit(XS_train_ohe, y_train)" ] }, { "cell_type": "markdown", "id": "understood-stretch", "metadata": {}, "source": [ "### Feature Importance" ] }, { "cell_type": "code", "execution_count": 59, "id": "lonely-survivor", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABI0AAAF1CAYAAABlMRrnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde5xsV1kn/N9DDglyd8KRgVxIMCEab1GPQecVpyeRMYyDwTHIAUfAwWEYzTiOqBPecRAjjOKo4Cv4ajRgBDTBoMxRoniJ8YIYc6KgJBg4BDAJqCcXAgECBJ75Y+9DKl3dp6u7q/rcvt/Ppz9dtfeq/axVa1XVrqfXXl3dHQAAAACYdL8DXQEAAAAADj6SRgAAAABMkTQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAWENVvaiqXnug63EwqMGrq+qOqvrLA12f9aqqu6rqsQe6Hoeqqvq1qnrKga7HelXV/1NV7x77/5Crf5JU1TFV9XdVtf1A1wWAI4ekEQCHpKp6X1V9fPwS+A9V9ctV9eADXa/NqKqlqvrM2KZ9P7+1hfFPqqquqm37Kfa1SZ6Y5PjuPnOT8Z5dVX+2mWOsV3c/uLtv3MqYqxmf61MOdD1mVVVfmuTLkvyf8f6W9F9VnVpVdy9P3FbVM6rq/VX10ap6Y1X9s/0c5sIkrxj7/42brM/7qurrN3OMjejuTyR5VZILtjo2AEcuSSMADmVP7u4HJzkjyZcnecEBrs88fGD8Yrvv58nrPUBVHbWIio0ek+R93f3RBcaYyRrJrYPWoVrvJP8pyeu6u7c47iuTXDO5oaq+KMkvJPn2JI9M8rEkP7efYzwmyXWLquB6bLL/fzXJs6rqmHnVBwD2R9IIgENed/9DkjdnSB4lSarqgqp6T1V9pKqur6pvntj37Kr6s6r6yfEyq/dW1ZMm9p9cVX88Pvb3kzxiMl5VfVNVXVdVH6qqq6rqCyf2va+qfqCq/macAXFxVT2yqn5nPN4fVNXnrreNVfWFY6wPjbG/aWLfL1fV/19VV1TVR5P8q6p6dFW9oar2ju37nonyZ1bV7qr6cFX9Y1X99LjrT8bfHxpnOX3Nsjo8J8kvJfmacf+PjNv/bVW9bazbn48zUvbbD+Nz9vMTx/rQuP2qqvrO5X01cb+r6rur6t1J3r1W/BWex8/O7hmft58b++auqnpLVf3zqnr5OC7+rqq+fOKx76uqF4ztuKOGy/QeMLH/P1bVnqq6vap2VdWjV6t3Ve17rt8+xn5aVX1uVf322Gd3jLePnzjGVVX1o2M9P1JVv1dVj5jY/7Vj+z9UVTdV1bPH7ceMY/3vx/7++ar6nHHfI8Y4Hxrr/adVtdr54ZOS/PEa/fewqvqVsQ3vr6of2ne8sS/fUlWvqKo7x+f37NX6anzMziQfSvKHy3Z9W5Lf6u4/6e67kvzPJP+uqh6ywjHek+SxSX5rrOsxYz0vrqoPVtUtVfXiGpOtVfX5VXVlVd1WVbdW1euq6uHjvtckOXHiWD9YwwzBm5fF/OxspBoub728ql5bVR9O8uw14p9Sw/vPnWP8y/Ydt7tvTnJHkq/e3/MGAPMiaQTAIW/8Yv2kJHsmNr8nyROSPCzJjyR5bVU9amL/45PckCEh9BNJLq6qGvf9apJrx30/muRZE7Eel+TXknxvku1JrsjwBfLoiWN/S4ZLuB6X5MlJfifJ/zuWv1+S78k6VNX9k/xWkt9L8nlJ/kuS11XVaRPFnpHkJUkekuTPx/JvT3JckrOTfG9VfcNY9meS/Ex3PzTJ5yd5/bj968bfDx9nOb11sh7dfXGS5yV567j/h8ekyqsyzEI5NsPsj11170yIFfuhu9+57FgPX8dT8pQM/Xf6DPHX8q1JfihDX38iyVuT/NV4//IkP72s/Lcl+YYMz9vjxsemqs5K8mPj8R6V5P1JLl2t3t2977n+srH9l2UYG6/OMCvmxCQfT/KKZcd4RpLvyDAOjk7y/WP8x2QYZz+bYZydkeRt42N+fKzrGUlOyTAmXjjue36Sm8fHPDLDOJ2aSVRVD0pycobXTPbTfz+boa8fm+RfJnnmWN99Hp9hTDwiyQ8n+Y1a5bKyqnpohsvKvm+F3V+UYXxnrM97knxybOd9dPfnJ/n7jDMTx8u8fjnJPePz8eVJ/nWSfcnKytCXj07yhUlOSPKi8VjfvuxYP7FS3Vdwbobx9PAkr1sj/o9meK1/bpLjMzynk96Z4TJBAFg4SSMADmVvrKqPJLkpyT9l+BKaJOnuX+/uD3T3Z8Yv5O9OMrkGz/u7+xe7+9NJLsnwRf+RVXVikq9K8j+7+xPd/ScZEjD7PC3Jm7r797v7U0l+MsnnJPkXE2V+trv/sbtvSfKnSa7u7r/u7ruT/GaGL4mrefQ462Pfz7dmmFXw4CQ/3t2f7O4rk/x2kqdPPO7/dPdbuvszSb4kyfbuvnAsf2OSX0yycyz7qSSnVNUjuvuu7v6L/T7L+/fcJL/Q3Vd396e7+5IMyZevTmbqh434se6+vbs/vlb8Gfxmd1870Td3d/evjOPiskz31Su6+6buvj1Dkm5fH3xbkld191+NSYkXZJiFc9Iq9Z7S3bd19xu6+2Pd/ZHx+P9yWbFXd/e7xmO8PvfOrntGkj/o7l/r7k+Nx3rbmAh9bpL/Nsb+SJL/lfuOhUclecz4uD9d5fKzfUmhj6xU9+Szl0XuTPKC7v5Id78vyU9luIRsn39K8vIx1mUZklDfuMohfzTJxePsmuUenOTOZdvuzJA03a+qemSSf5Pke7v7o939T0leNtY93b1nfH1/orv3ZkgcLu+H9Xprd79xfH0+dH/xM/TJY5I8urvv7u7l60Z9JPf2BwAslKQRAIeyp3T3Q5IsJfmCTFxGVlXPrHsvWfpQki/OfS8z+4d9N7r7Y+PNB2eYXXDHsjV73j9x+9GT98cvgTdlmL2xzz9O3P74Cvf3t2D3B7r74RM/rx9j3jTGmqzTZMybJm4/JsuSTxlmkDxy3P+cDDMy/q6qrqmqf7uf+qzlMUmevyzWCWOdZ+mHjVje1lXjz2C9fTUZ+/0TcZaPi7uS3JbV+2hKVT2wqn5hvKzrwxkuF3x43XeNqn+YuP2xifqdkGEGz3LbkzwwybUTz8/vjtuT5H9nmKH3e1V1Y1Wttsjyh8bf+0vKPCLJ/XPf18vycXrLsqTU5HP4WVV1RpKvz5BMWcldGZIvkx6a/SS1JjxmrOcHJ56TX8gweys1XE566XjZ2IeTvDbzH7Orxk/ygxlmO/1lDZei/odlx3pI7u0PAFioQ3UhRgD4rO7+46r65Qyzfp4yXqrzixkuy3prd3+6qt6W4YvYWj6Y5HOr6kETiaMTc+8lOx/IMJMnyfAv6DN8Yb9lLo1Z2QeSnFBV95tIHJ2Y5F0TZSa/iN+U5L3dfepKB+vudyd5+rjWzL9LcnlVHZsVLkuawU1JXtLdL1m+Y4Z+WCneRzMkOfb55ys1YZb4C3LCxO0TM/RNxt+P2bdjvJzr2Nx3XKz1/D4/yWlJHt/d/zAmTv46s43bm7LyDK5bMyS/vmic+XYf48yj52dIvH1xkiur6pru/sNl5T46rg30uCR7V2nPrbl3lsz147YTc9/n4LiqqonE0YlJdq1Q76UkJyX5+/Gq0QcnOaqqTu/ur8iwqPVnL9GqqscmOSb3fU2s5qYMs9Ee0d33rLD/f41t+5Luvr2qnpL7Xia4vN33GbNjkm/7sjLLx+yq8XtYo+0/jsf62iR/UFV/0t37Lr/9wgwzuABg4cw0AuBw8fIkT6yqL0vyoAxf0vYmSVV9R4YZLmvq7vcn2Z3kR6rq6PFL2+R/MHt9km+sqrPHtYaen+EL4J/PrSXTrs4wq+QHq+r+VbU01mn5mjn7/GWSj1TVf6+qz6mqo6rqi6vqq5Kkqv59VW0fE1D7Zix8JsPz9ZkM69HM6heTPK+qHl+DB1XVN9awIPFa/fCPSY5fth7U2zIsaPzAGhasfs4m4i/Cd1fV8eM6PP8jwyVsybDO1XdU1Rnjekr/K8Nlie/bz7H+Mfd9rh+SIcHzofH4P7zio1b2uiRfX1XfWlXbqurYqjpj7ONfTPKyqto3k+a4Gte3qmER8VPG5OedST6dYQys5Irc9zKt+/TfeEnf65O8pKoeMiYNvy/DTJ19Pi/J94zj+KkZEiBXrBDrogzrRp0x/vx8kjdlWE9qX3ufXFVPGBN0Fyb5jTEJtl/d/cEMawb9VFU9tKruV8Pi1/va9pAMM5nurKrjkvzAskMs77d3JXnAOO7un2Gdq1XX1ForflU9te5dAP2ODK+hz4z7jkvyz5Js5pJSAJiZpBEAh4Vx7ZFfSfLC7r4+w1/i35rhC96XJHnLOg73jAwL9t6e4Yv7r0zEuSHJv8+wOO2tGZI3T+7uT86hGSsaj/3kDIt935rhX4s/s7v/bpXyn07ybzN82X7v+JhfyrBAcZKck+S6qrorw6LYO7v74+Nlei9J8pbxspk11wXq7t0ZZkW8IsMX3D1Jnj3uW6sfrswwY+QfqurWcdvLMixo/I8Z1pp63UbjL8ivZvjCf2OGy8FePNbjDzL8B683ZJit9vm5d42a1bwoySV179pVL8+wPtatGZICvztrpbr77zOsk/P8DOP2bbl3Js5/z/C8/MV4udUfZJjRlCSnjvfvytBPP9fdf7RKmIuSfNuYYEpW7r//kmHmzY1J/izD8/WqiWNcPca8NcNYO6+7b1uhPR/r7n/Y9zPW7+7xdZ7uvi7DQtyvy7BO0kOSfNdaz9OEZ2ZYSPz6DOPm8gxrOyXDgu1fkSGJ9qYkv7HssT+W5IfGfvv+7r5zjP1LGWZVfTTD4uIbjf9VSa4eX5+7kvzXcV2yZHhvumRcNwsAFq5WXusQAIBJVfW+JN85JoiOSFX1q0le391v3MBjn53h+fvauVfsCDDOYHt7kq8bF88GgIWzphEAADPp7mcc6DocqcbZRV9woOsBwJHF5WkAAAAATHF5GgAAAABTZpppVFXnVNUNVbWnqi5YYf8xVXXZuP/qqjpp3H7/qrqkqv62qt5ZVS+Yb/UBAAAAWIQ1k0ZVdVSSV2b4jy2nJ3l6VZ2+rNhzktzR3adk+K8nLx23PzXJMd39JUm+Msl/2pdQAgAAAODgNctC2Gcm2bPvX31W1aVJzs3wL0L3OTfDv41Nhn8Z+orx37F2kgdV1bYM/0L2k0k+vL9gj3jEI/qkk05aRxMAAAAA2J9rr7321u7evp7HzJI0Oi7JTRP3b07y+NXKdPc9VXVnkmMzJJDOTfLBJA9M8t+6+/blAarquUmemyQnnnhidu/evZ42AAAAALAfVfX+9T5m0f897cwkn07y6CQnJ3l+VT12eaHuvqi7d3T3ju3b15X0AgAAAGABZkka3ZLkhIn7x4/bViwzXor2sCS3JXlGkt/t7k919z8leUuSHZutNAAAAACLNUvS6Jokp1bVyVV1dJKdSXYtK7MrybPG2+clubK7O8nfJzkrSarqQUm+OsnfzaPiAAAAACzOmkmj7r4nyflJ3pzknUle393XVdWFVfVNY7GLkxxbVXuSfF+SC8btr0zy4Kq6LkPy6dXd/TfzbgQAAAAA81XDhKCDx44dO9pC2AAAAADzU1XXdve6lgxa9ELYAAAAAByCJI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGAAAAAEw5opNGS0tLWVpaOtDVAAAAADjoHNFJIwAAAABWJmkEAAAAwBRJIwAAAACmSBoBAAAAMEXSCAAAAIApkkYAAAAATJE0AgAAAGCKpBEAAAAAUySNAAAAAJgiaQQAAADAFEkjAAAAAKZIGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKTMljarqnKq6oar2VNUFK+w/pqouG/dfXVUnjdu/rareNvHzmao6Y75NAAAAAGDe1kwaVdVRSV6Z5ElJTk/y9Ko6fVmx5yS5o7tPSfKyJC9Nku5+XXef0d1nJPn2JO/t7rfNswEAAAAAzN8sM43OTLKnu2/s7k8muTTJucvKnJvkkvH25UnOrqpaVubp42OPSEtLS1laWjrQ1QAAAACYySxJo+OS3DRx/+Zx24pluvueJHcmOXZZmacl+bWNVRMAAACArbQlC2FX1eOTfKy737HK/udW1e6q2r13796tqBIAAAAA+zFL0uiWJCdM3D9+3LZimaraluRhSW6b2L8z+5ll1N0XdfeO7t6xffv2WeoNAAAAwALNkjS6JsmpVXVyVR2dIQG0a1mZXUmeNd4+L8mV3d1JUlX3S/KtOYLXMwIAAAA41Gxbq0B331NV5yd5c5Kjkryqu6+rqguT7O7uXUkuTvKaqtqT5PYMiaV9vi7JTd194/yrDwAAAMAirJk0SpLuviLJFcu2vXDi9t1JnrrKY69K8tUbryIAAAAAW21LFsIGAAAA4NAiaQQAAADAFEkjAAAAAKZIGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACmbDvQFViInTtmK3f9DbOXv3T3xusDAAAAcIgx0wgAAACAKZJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJo8PQ0tJSlpaWDnQ1AAAAgEOYpBEAAAAAUySN2BSzmgAAAODwJGkEAAAAwJSZkkZVdU5V3VBVe6rqghX2H1NVl437r66qkyb2fWlVvbWqrquqv62qB8yv+gAAAAAswppJo6o6KskrkzwpyelJnl5Vpy8r9pwkd3T3KUleluSl42O3JXltkud19xclWUryqbnVHgAAAICFmGWm0ZlJ9nT3jd39ySSXJjl3WZlzk1wy3r48ydlVVUn+dZK/6e63J0l339bdn55P1QEAAABYlFmSRscluWni/s3jthXLdPc9Se5McmySxyXpqnpzVf1VVf3gSgGq6rlVtbuqdu/du3e9bQAAAABgzha9EPa2JF+b5NvG399cVWcvL9TdF3X3ju7esX379gVXCQAAAIC1zJI0uiXJCRP3jx+3rVhmXMfoYUluyzAr6U+6+9bu/liSK5J8xWYrDQAAAMBizZI0uibJqVV1clUdnWRnkl3LyuxK8qzx9nlJruzuTvLmJF9SVQ8ck0n/Msn186k6AAAAAIuyba0C3X1PVZ2fIQF0VJJXdfd1VXVhkt3dvSvJxUleU1V7ktyeIbGU7r6jqn46Q+Kpk1zR3W9aUFsAAAAAmJM1k0ZJ0t1XZLi0bHLbCydu353kqas89rVJXruJOgIAAACwxRa9EDYAAAAAhyBJIwAAAACmSBoBAAAAMEXSCAAAAIApMy2EzX7s3DFbuetvWF/5S3dvrD4AAAAAc2CmEQAAAABTJI04ZCwtLWVpaelAVwMAAACOCJJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACmSBoBAAAAMEXSCAAAAIApkkYAAAAATJE0AgAAAGCKpBEAAAAAUySNAAAAAJgiaQRHmKWlpSwtLR3oagAAAHCQkzQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAAAAApmw70BVgHXbumK3c9Tesr/yluzdWHwAAAOCwJWnEyiSoAAAA4Ijm8jQAAAAApkgaAQAAADDliL487aqzTjvQVQAAAAA4KM0006iqzqmqG6pqT1VdsML+Y6rqsnH/1VV10rj9pKr6eFW9bfz5+flWHwAAAIBFWHOmUVUdleSVSZ6Y5OYk11TVru6+fqLYc5Lc0d2nVNXOJC9N8rRx33u6+4w51xsAAACABZplptGZSfZ0943d/ckklyY5d1mZc5NcMt6+PMnZVVXzqyZsvaWlpSwtLR3oagAAAMABMUvS6LgkN03cv3nctmKZ7r4nyZ1Jjh33nVxVf11Vf1xVT1gpQFU9t6p2V9XuvXv3rqsBAAAAAMzfov972geTnNjdX57k+5L8alU9dHmh7r6ou3d0947t27cvuEpw8DGrCQAAgIPNLEmjW5KcMHH/+HHbimWqaluShyW5rbs/0d23JUl3X5vkPUket9lKAwAAALBYsySNrklyalWdXFVHJ9mZZNeyMruSPGu8fV6SK7u7q2r7uJB2quqxSU5NcuN8qg4AAADAoqz539O6+56qOj/Jm5McleRV3X1dVV2YZHd370pycZLXVNWeJLdnSCwlydclubCqPpXkM0me1923L6IhAAAAAMzPmkmjJOnuK5JcsWzbCydu353kqSs87g1J3rDJOgIAAACwxRa9EDYAm2ShdAAA4ECQNAIAAABgiqQRcNgwIwcAAGB+JI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBl24GuACRJdu5Yu8z1N8xeNkku3b3x+gAAAMARzkwjYGH8NzMAAIBDl6QRAAAAAFMkjQA2wCwqAADgcCdpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGAAAAAEzZdqArcKS46qzTDnQVAAAAAGZmphEAAAAAUySNAAAAAJgiaQQAAADAFEkjAAAAAKZIGgEAAAAwxX9P48izc8ds5a6/YX3lL929sfoAAADAQchMIwAAAACmmGnEplx11mkHugoHN7OaAAAAOERJGh2GJHIAAACAzXJ5GgAAAABTzDSCw4VL4QAAAJgjM40AAAAAmCJpBMBnLS0tZWlp6UBXAwAAOAjMlDSqqnOq6oaq2lNVF6yw/5iqumzcf3VVnbRs/4lVdVdVff98qg0AAADAIq25plFVHZXklUmemOTmJNdU1a7uvn6i2HOS3NHdp1TVziQvTfK0if0/neR35ldt4ICyfhIAAMBhb5aFsM9Msqe7b0ySqro0yblJJpNG5yZ50Xj78iSvqKrq7q6qpyR5b5KPzq3WwJFlEUkqCSoAAID9muXytOOS3DRx/+Zx24pluvueJHcmObaqHpzkvyf5kf0FqKrnVtXuqtq9d+/eWesOAAAAwIIseiHsFyV5WXfftb9C3X1Rd+/o7h3bt29fcJUAAAAAWMssl6fdkuSEifvHj9tWKnNzVW1L8rAktyV5fJLzquonkjw8yWeq6u7ufsWmaw4AAADAwsySNLomyalVdXKG5NDOJM9YVmZXkmcleWuS85Jc2d2d5An7ClTVi5LcJWEEHNQs8g0AAJBkhqRRd99TVecneXOSo5K8qruvq6oLk+zu7l1JLk7ymqrak+T2DIklAAAAAA5Rs8w0SndfkeSKZdteOHH77iRPXeMYL9pA/QAAAAA4ABa9EDYAAAAAhyBJIwAAAACmSBoBcEAsLS1laWnpQFcDAABYhaQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwCOCNZQAgCA9ZE0AgAAAGDKtgNdAYAj1s4ds5W7/ob1lb9098bqAwAAMEHSiEPGVWeddqCrAAAAAEcMl6cBAAAAMMVMI1iFmU0AAAAcycw0AgAAAGCKpBEAAAAAU1yeBgeBw/VSuMO1XQAAAEcCM40AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGADBnS0tLWVpaOtDVAACATZE0AgAAAGDKtgNdAQC2wM4ds5W7/ob1lb9098bqAwAAHPQkjYDDxlVnnXagqwAAAHDYkDQC2AAJKgAA4HBnTSMAAAAApphpBMD8zbImkvWTAADgoGamEQAAAABTJI0AAAAAmDJT0qiqzqmqG6pqT1VdsML+Y6rqsnH/1VV10rj9zKp62/jz9qr65vlWHwAAAIBFWDNpVFVHJXllkiclOT3J06vq9GXFnpPkju4+JcnLkrx03P6OJDu6+4wk5yT5haqyjhIAAADAQW6WmUZnJtnT3Td29yeTXJrk3GVlzk1yyXj78iRnV1V198e6+55x+wOS9DwqDQAAAMBizZI0Oi7JTRP3bx63rVhmTBLdmeTYJKmqx1fVdUn+NsnzJpJIAAAAABykFr4Qdndf3d1flOSrkrygqh6wvExVPbeqdlfV7r179y66SgAAAACsYZak0S1JTpi4f/y4bcUy45pFD0ty22SB7n5nkruSfPHyAN19UXfv6O4d27dvn732wLpdddZpueqs0w50NQAAADjIzbIo9TVJTq2qkzMkh3YmecayMruSPCvJW5Ocl+TK7u7xMTd19z1V9ZgkX5DkffOqPABk547Zyl1/w/rKX7p7Y/UBAIDDxJpJozHhc36SNyc5Ksmruvu6qrowye7u3pXk4iSvqao9SW7PkFhKkq9NckFVfSrJZ5J8V3ffuoiGAAAAADA/s8w0SndfkeSKZdteOHH77iRPXeFxr0nymk3WEQAAAIAttvCFsAEAAAA49EgaAQAAADBF0ggAAACAKZJGAHAIW1paytLS0oGuBgAAhyFJIwAAAACmSBoBAAAAMGXbga4AABwydu6Yrdz1N6yv/KW7N1YfAABYIDONAAAAAJgiaQQAAADAFEkjAAAAAKZIGgEAAAAwxULYABwQV5112oGuAgAAsB9mGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYIqFsAEOclu5YLTFqQEAgH3MNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACm+O9pAHAw2rljtnLX37C+8pfu3lh9AAA44phpBAAAAMAUSSMAAAAAprg8DYAjwlVnnXagqwAAAIcUM40AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBlpqRRVZ1TVTdU1Z6qumCF/cdU1WXj/qur6qRx+xOr6tqq+tvx91nzrT4AHHyuOus0C28DAHDIWzNpVFVHJXllkiclOT3J06vq9GXFnpPkju4+JcnLkrx03H5rkid395ckeVaS18yr4gAAAAAsziwzjc5Msqe7b+zuTya5NMm5y8qcm+SS8fblSc6uquruv+7uD4zbr0vyOVV1zDwqDgAAAPTBY7kAABbKSURBVMDizJI0Oi7JTRP3bx63rVimu+9JcmeSY5eV+ZYkf9Xdn1geoKqeW1W7q2r33r17Z607AAAAAAuyJQthV9UXZbhk7T+ttL+7L+ruHd29Y/v27VtRJQAAAAD2Y5ak0S1JTpi4f/y4bcUyVbUtycOS3DbePz7JbyZ5Zne/Z7MVBgAAAGDxZkkaXZPk1Ko6uaqOTrIzya5lZXZlWOg6Sc5LcmV3d1U9PMmbklzQ3W+ZV6UBAAAAWKw1k0bjGkXnJ3lzkncmeX13X1dVF1bVN43FLk5ybFXtSfJ9SS4Yt5+f5JQkL6yqt40/nzf3VgAAAAAwV9tmKdTdVyS5Ytm2F07cvjvJU1d43IuTvHiTdQQAAABgi23JQtgAAAAAHFpmmmkEABycrjrrtANdBQAADlNmGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACmSBoBAAAAMEXSCAAAAIApkkYAAAAATJE0AgAAAGCKpBEAAAAAUySNAAAAAJgiaQQAAADAFEkjAAAAAKZIGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYMpMSaOqOqeqbqiqPVV1wQr7j6mqy8b9V1fVSeP2Y6vqj6rqrqp6xXyrDgAAAMCirJk0qqqjkrwyyZOSnJ7k6VV1+rJiz0lyR3efkuRlSV46br87yf9M8v1zqzEAcEAsLS1laWnpQFeDddBnHKmMfYD5mGWm0ZlJ9nT3jd39ySSXJjl3WZlzk1wy3r48ydlVVd390e7+swzJIwCAmWz1F76tjHe4fpnVZ2IdLLG22uH6PHpNH3oO5+fwcO2zQ8EsSaPjktw0cf/mcduKZbr7niR3Jjl21kpU1XOrandV7d67d++sDwMAAICDlmTHoedwTb5tNM62+VZjY7r7oiQXJcmOHTv6AFcHAI4sO3fMVu76G9ZX/tLdG6sPAAAHhVmSRrckOWHi/vHjtpXK3FxV25I8LMltc6khAHB4mSXpNK8E1VYmxMTafKxFxTM+Fhdr1scfzONjtXhibT7WouIdaeMDDqBZkkbXJDm1qk7OkBzameQZy8rsSvKsJG9Ncl6SK7vbjCEAAADYjIMx0XcwJxVXiyfWbOWWWTNp1N33VNX5Sd6c5Kgkr+ru66rqwiS7u3tXkouTvKaq9iS5PUNiKUlSVe9L8tAkR1fVU5L86+6+fkO1BQAOmKvOOu1AVwEAgC0005pG3X1FkiuWbXvhxO27kzx1lceetIn6AQBHoK1OUG1lvMM1+abPxDpYYh3ODufn8XBt2+HaLo4cB8VC2AAAzJ8vKxypJMQ273BtV3L4tu1wHveHa9u2OlZddu26H3e/BdQFAAAAgEOcpBEAAAAAUySNAAAAAJgiaQQAAADAFEkjAAAAAKZIGgEAAAAwRdIIAAAAgCmSRgAAAABMkTQCAAAAYIqkEQAAAABTJI0AAAAAmCJpBAAAAMAUSSMAAAAApkgaAQAAADBF0ggAAACAKZJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACmSBoBAAAAMEXSCAAAAIApkkYAAAAATJE0AgAAAGCKpBEAAAAAUySNAAAAAJgiaQQAAADAlJmSRlV1TlXdUFV7quqCFfYfU1WXjfuvrqqTJva9YNx+Q1V9w/yqDgAAAMCirJk0qqqjkrwyyZOSnJ7k6VV1+rJiz0lyR3efkuRlSV46Pvb0JDuTfFGSc5L83Hg8AAAAAA5is8w0OjPJnu6+sbs/meTSJOcuK3NukkvG25cnObuqatx+aXd/orvfm2TPeDwAAAAADmKzJI2OS3LTxP2bx20rlunue5LcmeTYGR8LAAAAwEGmunv/BarOS3JOd3/neP/bkzy+u8+fKPOOsczN4/33JHl8khcl+Yvufu24/eIkv9Pdly+L8dwkz02SE0888Svf//73z6d1AAAAAKSqru3uHet5zCwzjW5JcsLE/ePHbSuWqaptSR6W5LYZH5vuvqi7d3T3ju3bt89eewAAAAAWYpak0TVJTq2qk6vq6AwLW+9aVmZXkmeNt89LcmUPU5h2Jdk5/ne1k5OcmuQv51N1AAAAABZl21oFuvueqjo/yZuTHJXkVd19XVVdmGR3d+9KcnGS11TVniS3Z0gsZSz3+iTXJ7knyXd396cX1BYAAAAA5mTNNY222o4dO3r37t0HuhoAAAAAh41FrWkEAAAAwBFG0ggAAACAKZJGAAAAAEyRNAIAAABgiqQRAAAAAFMkjQAAAACYImkEAAAAwBRJIwAAAACmSBoBAAAAMEXSCAAAAIAp1d0Hug73UVV7k7x/C0M+IsmtYol1EMQT69CLJ9ahFWur44l1aMXa6nhiHXrxxDq0Ym11PLEOvXhiHVqxtjre4RrrtO5+yHoesG1RNdmo7t6+lfGqand37xBLrAMdT6xDL55Yh1asrY4n1qEVa6vjiXXoxRPr0Iq11fHEOvTiiXVoxdrqeIdzrPU+xuVpAAAAAEyRNAIAAABgiqRRcpFYYh0k8cQ69OKJdWjF2up4Yh1asbY6nliHXjyxDq1YWx1PrEMvnliHVqytjifW6KBbCBsAAACAA89MIwAAAACmHHFJo6o6o6reWlXXVdXfVNXTJvZdXFVvH7dfXlUPXmCsk6vq6qraU1WXVdXRm4k1cdyHVtXNVfWKiW1PG+NfV1UvnUec/cR6elX97Rjvd6vqEYuIVVUPrKo3VdXfje368TnF2V+fnV1Vf1VVb6uqP6uqU+YQ73er6kNV9dvLtr+uqm6oqndU1auq6v6bjTUed6U++8qxz/ZU1f9XVbXJGI+ZeJ6uq6rnjdsfMm7b93NrVb18Dm3aX5/96US8D1TVGzcbb+LY/2pZe+6uqqfM6/gTcVbqs5dU1U1VddecYqzYZ+O+LXlNj9uOrqqLqupd42v7W+YRa1ncE6vq96rqnVV1fVWdNO8YY5xVx+Wcjr/a62zu741rvMbm/l61xnj83Ro+p6+rqp+vqqM2GWt/bTt/fF/seYz7Ndq1lZ/TV419tu+96/PmFW8ixoqfbYs+fs353GqN8fHLVfXeiefxjM3EWiH2S8fX1Tu26v1j3LcV4+MnxrjvrDmcd6xw/NXGx1zPCdZ4Hhfyebb8NV0LOrdaFnPVds4xxmp9tpXn3lXD+dW7xrH5PZuNNR53pffhuX6WTRxzpXadNT6H76iqS6pqU/89fY1xP9fz04njrta2ub8Pr/X5Nb5nzev8e7V2zTUnsZ/4U2NzVd19RP0keVySU8fbj07ywSQPH+8/dKLcTye5YIGxXp9k53j755P85zm172eS/GqSV4z3j03y90m2j/cvSXL2gmJtS/JPSR4x3v+JJC9aUKwHJvlX4+2jk/xpkicteHy8K8kXjre/K8kvzyHe2UmenOS3l23/N0lq/Pm1RY2PcdtfJvnqMdbvbPZ5HPvjmPH2g5O8L8mjVyh3bZKvW2SfLSv3hiTPnMfzuMKx/1mS25M8cAHHXqnPvjrJo5LcNacYK/bZVr6mx20/kuTF4+377Ys75+fzqiRPnGjr3PtsPPZM43IBfTb398Y13hfn/l61v/eQjJ/TY7w3ZPwcXVDbvjzJSWP8TY/F/fTZln1Oj9uuSrJjXmNxlbgrfrYt+viZ87nVGuPjl5Oct6D2fWOS38/wHvygJNdk4hx1UWNxK8ZHkn+R5C1Jjhp/3ppkaavHX+ZwTrDG87iQz7OVXtPL9s/l3GrWdi66z7K1597fkeRXktxvvP95i+qzzPmzbLV2jWPvpiSPG+9fmOQ5ixoPmfP56Qx9Nvf34f29fyTZkeQ182rffto115zEesbmaj+H9UyjqvqqMUP3gKp6UFVdl+To7n53knT3BzJ8Ido+3v/w+LhK8jlJZl7waT2xxuOfleTy8eGXJJl5hsJKsarqi6vqK5M8MsnvTRR/bJJ3d/fe8f4fJJn5rx3rjLXvi8ODxjY+NMkHFhGruz/W3X803v5kkr9KcvyssVaLl/2Mjwzj4aHj7YfNo23d/YdJPrK8fHdf0aMMSZ2Z27ae57GqHpXhjekvxli/kk2OxQwfTJ8YixyTFWY0VtXjknxehi+0M9tAn+173EMzvOY29FfF1Z7TiSLnJfmd7v7YPI+/yussY399cF6xsnqfbdlrevQfkvzY2MbPdPetG2njfmJ/aZJt3f37Y4y7Ntpna8XKDONyk8dfsc82+964gc/NDb9XrbdtY7wPjze3ZThxXcjn9Hj/r7v7fetpzwbbtZWf03O13s+2RR5/fI+a67lV5vg6Xmfcr0jyJ919T3d/NMnfJDlnjsdf83N6HlaJfVSSB2T84pnk/kn+cV7Hn2X8beScYAPP44Y/zzb6mq4NnlutFTtzHC8b6LMtO/dO8p+TXNjdn0mS7v6nzcZarc/m/Vm2n3Ydm+ST3f2u8f7vZ5OfL9n/Z/SGz0830LZNWW+sGmaD/e8kP7joWJvJScwae73nCJuannaw6+5rqmpXkhdneMJf293v2Le/qs7M8EJ9z8S2V2f4y+n1SZ6/oFjHJvlQd98z7r45yXGbiTXW98ok/z7J108U35PktBouwbg5wwnUzNO11xOruz9VVf85yd8m+WiSdyf57gW167Oq6uEZsrQ/M2us1eKtMT6+M8kVVfXxJB/OkE2fS6zV1HCpx7cn+a+biZXVn8fjMoyLfTY9Frv7HVV1QpI3JTklyQ+MJ9uTdia5bPyiObONvKZHT0nyhxMf0usyQ//tzPCXgA3Z6NifV6z99dlWvabH13GS/GhVLWXow/O7e0NfJlaJ/dgkH6qq30hycoYv5xd096c3EmN/sWYcl5s6/lqvs428N260LRt5r9po26rqzUnOzDAz8vIVDjvXtm3Eeto1fqZsyef0hFdX1acz/IX7xet9L16rnRs51hyOP/dzqxnGx0uq6oVJ/jDDe8knsk6r9N+1SX64qn4q4+zBDH26bht8/1jk+PjTqvqjDLO2KsNfuN85z7bN8NB1nxOs8zW9qc+zTZwTbOjcaq3YM57Xber4+3nIVp57f36Sp1XVNyfZm+R79iWNNxIra/TZoj7Llrk1ybaq2tHduzP8kfOEzcaa13iYNd4aD9vQ+/AGYp2fZFd3f7DWeUXtRtq10ZzELLGzke8YvYCpTgfTT4YP+LcnuTrJURPbH5XkhiRfvcJjjkryc0m+YxGxkjwiyZ6J/SckecdmYmUYyD847nt27jsF8sljubcm+akkb1xErAx/LfrDDG+6leQVSX5oUe0at23L8Gb7vYseH0l+I8njx9s/kOSX5hRrKatMoU7yi0levtl27afPdiT5g4nHPWG1uqy3XeO+R2eYffDIZduvT/KVi+6ziX2/k+RbNhJvxrh7k9x/nsdfa+yP2zc0PXbWPtvK13SG98XOOM04yfclec2cn9PzktyZIXm0LcMXok1N097MuFxEn01s2/B74wZfYxt6r9pI28btDxj774lb0Lb3ZQOXlqynXdmiz+nx/nHj74dk+AvjPC7RWddn2yKOnwWcW+1vfIzbKsNf2i9J8sJ5tjHJ/0jytgwzA163kdfyBsfiQsdHhi+Zb8pwWcuDxzH/hK0cf9ngOcGsz2Pm8Hm2ntf0xGM2fG610fGy6D7LFp57J7kryfPH2/8uyZ9uQZ/N+7NspXZ9TYbZZ3+ZIXHwtkWPh2zi8q11tm1T78Ozxhrb+WcZZqtvqH3radfEvg3lJOYxNpf/HNaXp42OzfCh9JAML8x901LflOR/dPdfLH9AD391vjTrmL63zli3JXl43bsQ2fFJbtlkrK9Jcn5VvS/JTyZ5Zo0LoHb3b3X347v7azKc9Lxr5UNuOtYZY7z39DAKX5/h2vWFtGt0UYZp/Rtd9G+mPquq7Um+rLuvHh93WTbftv2qqh/OMA3++9YZZ6VYqz2Pt+S+l5PMYyx+Vg9/eXhHhmRUkqSqvizDm+6164yzarz9vaZrWMD2zHH/ZqzWzm9N8pvd/ak5H3+tsT/PWJ+1rM+28jV9W5KPZThBTJJfz3CJxmYsj31zhpOlG3uYjfDGOcRYLdaanzWbPf4+K73Osrn3xvW+xjbzXrVivH1WaVu6++4k/yfJuZuNNed+2m+sfZa3aws/p9Pdt4y/P5JhLYMzN9K4/cSet1mPv4hzq1XHR3d/sAefSPLqbO55nIrb3S/p7jO6+4kZvhStd0zs9/j7rDAWFz0+vjnJX/RwefBdGRI4XzPH4+/XJs8JZn0e5/F5tq5zgjmcW+0v9met9p48r+NPOgDn3jfn3j77zSRfuslYa57HzfOzbDXd/dbufkJ3n5nkT7L5z5fJY89jPMwcb7k5vA/PGuvLMyS894z9+cCq2rOgWJ+1iZzEWrHX/x1jM1mrQ+Enya4kz8jwF5tXZMi0/WGW/cUmwwfyKRO3fzLJTy4i1lj213PfxRq/azOxlu17du77V8XPG39/boa/Wj1uEbFy7yKR+xbz/NEkP7XAdr04Q3b+flswPrZlmOK5byG55yR5wzzalpUz59+Z5M+TfM482rXG87h8Iex/s8nn8Ph99R7H3LuSfMlE+R9P8iOL7rOJ8s9LcslG483Qf3+RceHhRRx/pT6b2L7RmUYz9dkBeE1fmuSsiX2/Ps/nNMNfV94+0Z5XJ/nuzfbdRsblovpsvL+p98b1tCWbfK9a53h8cJJHjdu3ZfgCcf6i+ykbn2m0nj7bqs/pbbl3Yfv7Z7gk4nnzHJsT25cyn5lG6/nsnOu51Rpjf99YrCQvT/Ljc3wNHJXk2HHfl2b4UrZtjsdf7TW28PGR5GkZLg3elntnsz55q8ZfNnFOsM7X9KY+z2Z9TU9s29S51Ubbucg+y9afe/94kv8wsf+aRfRZFvBZtka79n2+HDO+3s5a9HjI5mYaradtm3ofXk+szbZvHeN+0zmJjY7N/R5jMxU42H+SPHPfm0uGD+Crx22fynBStu/njAyLeL0lw9od78gwFXjm/1Sxnlhjmcdm+LK+J8NJzjGbjHXWxP77dH6G/2hz/fizrtX5NxDreUnemWHRxt/KeNIz71gZ3rB6jLXvuf3ORY2Pscw3j+Pj7Rn+u8hjN9u2DNNF9yb5eIa/cHzDWOaeDNfA76vDzNMtN9BnO8Yx/54MHwa1yVjfMPb/28ffz132mBuTfMGiX9MTj7kqyTkbiTdD/52U4S/ZG05cbrDPfmIcL58Zf79oUX2WLXpNj/cfk+GvYH+T4cTmxAX02RPH4/9thv+6cfRm+m6j43IOx1+xz7LJ98b1tiWbeK/aQNsemeG/SP1Nhvesn806vkhvoG3fk+H1dU+GxVdnvixiPe0ay2zJ53SG/8R17Rj/ugzrXR21nngzvs5W/Gxb9PEz/3Or/Y2PK3PvOeNrkzx4jm3ct47F9Rn+MLGh944NvMa2YnycneQXMrxHXZ/kp7dy/GWD5wTreR7HMhv+PFutXRP7n/1/27mDFAZhIAqg0xt4kx7U47nyNl2kuOiAMBPSje+BOyWERB2/xsgPfu3aaqafq8cs/lt7bzG+QDtiLJt8rxizWHMvu+vXHuN8O6P4Iqsx79v1abNv7etwta2fY0uhUaWtmMwkZubm3fb67gwAAAAAlyf80wgAAACAIqERAAAAAInQCAAAAIBEaAQAAABAIjQCAAAAIBEaAQAAAJAIjQAAAABIhEYAAAAAJB/bACCnndjGOQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "importances = rf_model.feature_importances_\n", "\n", "std = np.std([tree.feature_importances_ for tree in rf_model.estimators_], axis=0)\n", "indices = np.argsort(importances)[::-1]\n", "\n", "plt.figure(figsize=(20,6))\n", "\n", "plt.title(\"Random Forest feature importances (top 40 features) \")\n", "plt.bar(range(XS_train_ohe.shape[1]),importances[indices],\n", " color=\"#FF5733\", yerr=std[indices], align=\"center\")\n", "plt.xticks(range(XS_train_ohe.shape[1]), X_test_ohe.columns[indices])\n", "plt.xlim([-1, 40])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 60, "id": "arabic-spokesman", "metadata": {}, "outputs": [], "source": [ "y_hat = np.zeros(y_train.shape)\n", "y_hat = rf_model.predict(XS_test_ohe)\n", "cm = confusion_matrix(y_target=y_test, y_predicted=y_hat)\n", "acc = accuracy_score(y_test, y_hat)\n", "recall= recall_score(y_test, y_hat)\n", "precision= precision_score(y_test, y_hat)\n", "f1= f1_score(y_test, y_hat)" ] }, { "cell_type": "markdown", "id": "electric-behalf", "metadata": {}, "source": [ "### Model Performance" ] }, { "cell_type": "code", "execution_count": 61, "id": "three-referral", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 0.904984374506771\n", "Precision : 0.9209338656483249\n", "Recall : 0.8348544453186467\n", "F1-Score : 0.8757840871574777\n" ] } ], "source": [ "from mlxtend.plotting import plot_confusion_matrix\n", "fig, ax = plot_confusion_matrix(conf_mat=cm)\n", "plt.title('Confusion Matrix')\n", "plt.show()\n", "print(\"Accuracy : \", acc)\n", "print(\"Precision : \", precision)\n", "print(\"Recall : \", recall)\n", "print(\"F1-Score : \", f1)" ] }, { "cell_type": "markdown", "id": "divine-burning", "metadata": {}, "source": [ "### Build Cost Matrix" ] }, { "cell_type": "code", "execution_count": 62, "id": "quick-clearance", "metadata": { "scrolled": true }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQMAAAEhCAYAAAB7tcX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAATAUlEQVR4nO3deZhcVZnH8e9LAiIkrFmQJBAgIUxwADVxGRHZNzFkFDFBWQyLo4g4IJpxAXFQyYAOKi7AqGyy6kAQCQFxREU0tGhYwhJMwKQDWUBCAIXQvPNHnYQihO4KpOp2h+/neepJ3XNv3fMWTf3q3FO36kZmIklrVV2ApO7BMJAEGAaSCsNAEmAYSCoMA0mAYaBuLiKmRMThVdfxWmAY9GARcUhEtEXEkxHxcHnh7Pwq9/lgROzZyfpdIyIj4qoV2ncs7b9qsJ8vRcTFXW2Xmftl5gWN7FOvjmHQQ0XECcBZwFeBgcAWwHeBA1vQ/ULgHRGxaV3b4cD9q6uDqPH/z1bKTG897AZsCDwJfKCTbV5HLSzmldtZwOvKun7AtcDjwGPAb6i9MVwEPA/8vez/MyvZ767AXOD7wLGlrRfQDpwM/Kpu228Cc4AngD8C7yrt+wLPAktLP9NL+6+ArwC3lBqGlbajyvrvAT+t2/8k4CYgqv6brAk3k7dnegewLnBVJ9t8Hng7sBOwI/BW4Atl3YnUXtD9qY0qPgdkZh4K/BV4b2b2ycz/6mT/FwKHlfv7AHdRC516t5X+NwEuAa6MiHUz83pqI5rLSz871j3mUOAYoC/w0Ar7OxH454g4IiLeBRwJHJ4lGfTqGAY906bAosx8rpNtPgR8OTMXZOZC4FRqLzSovSO/AdgyM5dm5m9W9QWVmb8DNomIEdRC4cKVbHNxZj6amc9l5tepjVZGdLHr8zPz7vKYpSvs7+nyHL4BXAwcl5lzV6VuvTzDoGd6FOgXEb072WZzXvzO+lBpAzgDeAC4ISJmRcTEV1jHRcAngN1YySglIj4dEfdExOKIeJza4U2/LvY5p7OVmfkHYBYQwBWvqGqtlGHQM90KPAOM7WSbecCWdctblDYyc0lmnpiZWwNjgBMiYo+y3aqMEC4CPg5cV961lyvD+M8ABwMbZ+ZGwGJqL+LO+um0/4g4ltoIY17Zv1aTzt5Z1E1l5uKIOBn4TkQ8B9xAbei/J7BbZn4GuBT4QkTcRu0FdjK1oTURcQBwL/AXai/QDmoThwDzga0brGN2RLyb2jv1ivoCz1H75KF3GX1sULd+PrBXRKyVmc+v5PEvERHbAqdRm8R8GpgWEVMy88+NPF6dc2TQQ5Vj8BOoTQoupDa8/gRwddnkNKANuAO4E7i9tAEMB35BbSb/VuC7mfl/Zd3XqIXI4xHx6Qbq+G1mrjhxCDAVuJ7ax40PAf/gxYcAV5Z/H42I27vqpxwSXQxMyszpmTmT2sTnRRHxuq4er66FE7GSwJGBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqehWv3TUr99GOXTo5l1vqG6koR8pUjfx4IOPsGjR47Gydd0qDIYO3Zy2touqLkOrouOZqivQKhj1tgkvu87DBEmAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQYBpIKw0ASYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMgya6/vrfMWLE+xg2bCynn35+1eVoJSYc9VUGvOE9vHHHDy9ve+yxJ9hrn+MZvt0H2Wuf4/nb356osMLWaWoYRMS+EXFfRDwQEROb2Vd309HRwbHHTmLKlG8xY8aVXHrpVGbMmFV1WVrBEYftz/U//8aL2k6fdBF77D6Kmfdezh67j+L0SRdXVF1rNS0MIqIX8B1gP2AkMD4iRjarv+5m2rS7GTZsCFtvPZh11lmbceP2ZvLkm6suSyvYZZed2GSTDV7UNvlnv+Hww/YD4PDD9uPqa35dRWkt18yRwVuBBzJzVmY+C1wGHNjE/rqV9vYFDBkycPny4MEDaG9fUGFFatT8+X/jDW/oB8Bmm23K/Pl/q7ii1mhmGAwC5tQtzy1tUo8REURE1WW0ROUTiBFxTES0RUTbwoVrTgIPGjSAOXPmL1+eO3cBgwYNqLAiNWrgwI15+OFFADz88CIGDNio4opao5lh0A4MqVseXNpeJDPPzcxRmTmqf/+Nm1hOa40ePZKZM+cwe3Y7zz67lMsuu4ExY3apuiw1YMwBO3PBhVMAuODCKRz43ndVXFFr9G7ivm8DhkfEVtRCYBxwSBP761Z69+7N2WefxD77HEdHRwcTJoxh++23qbosrWD8h07hVzf/iUWLHmfwlmM59ZQjmfjZQzl43Bf5wY+uZcstNuOKy/6z6jJbIjKzeTuP2B84C+gF/DAzv9LZ9qNGjcy2touaVo+aoOOZqivQKhj1tgm0td270kmQZo4MyMzrgOua2Yek1aPyCURJ3YNhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkoJMLr0bEEmDZJZqXXbU1y/3MzA2aXJukFnrZMMjMvq0sRFK1GjpMiIidI+Ij5X6/iNiquWVJarUuwyAiTgE+C/xHaVoHuLiZRUlqvUZGBv8KjAGeAsjMeYCHENIappEweDYzkzKZGBHrN7ckSVVoJAyuiIhzgI0i4mjgF8B5zS1LUqu97KcJy2TmmRGxF/AEsC1wcmbe2PTKJLVUl2FQ3Am8ntqhwp3NK0dSVRr5NOEoYBrwPuAg4PcRMaHZhUlqrUZGBicBb8rMRwEiYlPgd8APm1mYpNZqZALxUWBJ3fKS0iZpDdLZdxNOKHcfAP4QEZOpzRkcCNzRgtoktVBnhwnLTiz6S7ktM7l55UiqSmdfVDq1lYVIqlaXE4gR0R/4DLA9sO6y9szcvYl1SWqxRiYQfwzcC2wFnAo8CNzWxJokVaCRMNg0M38ALM3MmzNzAuCoQFrDNHKewdLy78MR8R5gHrBJ80qSVIVGwuC0iNgQOBH4NrAB8O9NrUpSyzXyRaVry93FwG7NLUdSVTo76ejbvPCDqC+RmZ9c3cXM++MsTo1DVvdu1USn5CVVl6BV8vLThJ2NDNpWfyGSuqvOTjq6oJWFSKqWF1GRBBgGkgrDQBLQ2C8dbRsRN0XEXWV5h4j4QvNLk9RKjYwMzqN2AZWlAJl5BzCumUVJar1GwmC9zJy2QttzzShGUnUaCYNFEbENL1xE5SDg4aZWJanlGvluwrHAucB2EdEOzAY+3NSqJLVcI99NmAXsWS6rtlZmLunqMZJ6nkZ+6ejkFZYByMwvN6kmSRVo5DDhqbr76wIHAPc0pxxJVWnkMOHr9csRcSYwtWkVSarEKzkDcT1g8OouRFK1GpkzuJMXftegF9AfcL5AWsM0MmdwQN3954D5melJR9IaptMwiIhewNTM3K5F9UiqSKdzBpnZAdwXEVu0qB5JFWnkMGFj4O6ImEbdx4yZOaZpVUlquUbC4ItNr0JS5RoJg/0z87P1DRExCbi5OSVJqkIj5xnstZK2/VZ3IZKq1dl1Ez4GfBzYOiLuqFvVF7il2YVJaq3ODhMuAaYAXwMm1rUvyczHmlqVpJbr7LoJi6ldUm1868qRVBV/HVkSYBhIKgwDSYBhIKkwDCQBhoGkwjCQBBgGkgrDQBJgGEgqDANJgGEgqTAMJAGGgaTCMJAEGAaSCsNAEmAYSCoMA0mAYSCpMAwkAYaBpMIwkAQ0dq1FdeH42TfxzJKnyI7nef65Ds4b/X7W3XhDDrr8v9lo6CAef7Cdnxz8Kf7x+BMA7PvNzzN8/3ez9Ol/cPURE3nkTzMqfgZa5vrrf8fxx59JR8fzHHXUWCZOPKLqklqmaSODiPhhRCyIiLua1Ud3csFuh3POm8Zy3uj3A7DzxGOYfdOtnL3tPsy+6VZ2nngMAMP224VNhg/l28P35mfHfJH3fO9LFVateh0dHRx77CSmTPkWM2ZcyaWXTmXGjFlVl9UyzTxMOB/Yt4n779ZGHLgH0y+4GoDpF1zNiLF7ArDdgXtwx4W19vY/TGfdjTagz2b9K6tTL5g27W6GDRvC1lsPZp111mbcuL2ZPPm1c7HxpoVBZv4aeE1ckzETDr3hBxzd9lPefPTBAPQZuClPPrIQgCcfWUifgZsC0HfQQBbPeWT5Y5+Y+wh9Bw1sfdF6ifb2BQwZ8sLfYvDgAbS3L6iwotaqfM4gIo4BjgHYsPpyXpEf7TyeJfMWsF7/TTj0xh+x6N6XDi0zs4LKpMZV/mlCZp6bmaMyc9R69Kq6nFdkybzau8fTCx/j3qtuZNBbd+DJ+Y8uH/732aw/Ty2oDZKWtM9nwyGbLX/sBoM3Y0n7/NYXrZcYNGgAc+a88LeYO3cBgwYNqLCi1qo8DHq6tdd7Pev0WX/5/W32ficL7prJ/df8kh0PHwvAjoeP5b7JNwFw3zW/ZIfDau2D3rYjzyxesvxwQtUaPXokM2fOYfbsdp59dimXXXYDY8bsUnVZLdMzx+XdyPoDN+WDV30HgLV69+KuS67lL1N/w7zb7uSgK87iTUcexOKH5nHlwZ8CYOZ1NzN8/3dz3AM3svTpvzP5I5+rsnzV6d27N2effRL77HMcHR0dTJgwhu2336bqslommnUsGxGXArsC/YD5wCmZ+YPOHrN5rJsfZcum1KPmOCUvqboErYJRow6lrW1GrGxd00YGmTm+WfuWtPo5ZyAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJMAwkFYaBJMAwkFQYBpIAw0BSYRhIAgwDSYVhIAkwDCQVhoEkwDCQVBgGkgDDQFJhGEgCDANJhWEgCTAMJBWGgSTAMJBUGAaSAMNAUmEYSAIMA0mFYSAJgMjMqmtYLiIWAg9VXUcT9AMWVV2EVsma+jfbMjP7r2xFtwqDNVVEtGXmqKrrUONei38zDxMkAYaBpMIwaI1zqy5Aq+w19zdzzkAS4MhAUmEYSAIMg6aKiH0j4r6IeCAiJlZdj7oWET+MiAURcVfVtbSaYdAkEdEL+A6wHzASGB8RI6utSg04H9i36iKqYBg0z1uBBzJzVmY+C1wGHFhxTepCZv4aeKzqOqpgGDTPIGBO3fLc0iZ1S4aBJMAwaKZ2YEjd8uDSJnVLhkHz3AYMj4itImIdYBxwTcU1SS/LMGiSzHwO+AQwFbgHuCIz7662KnUlIi4FbgVGRMTciDiy6ppaxdORJQGODCQVhoEkwDCQVBgGkgDDQFJhGLxGRcSuEXFtuT+ms29VRsRGEfHxV9DHlyLi0422r7DN+RFx0Cr0NfS1+E3D1ckwWMOUb0uuksy8JjNP72STjYBVDgP1LIZBD1He+e6NiB9HxD0R8ZOIWK+sezAiJkXE7cAHImLviLg1Im6PiCsjok/Zbt+yj9uB99Xt+4iIOLvcHxgRV0XE9HL7F+B0YJuI+HNEnFG2OykibouIOyLi1Lp9fT4i7o+I3wIjGnheR5f9TI+Iny57TsWeEdFW9ndA2b5XRJxR1/dHX+1/W9UYBj3LCOC7mflPwBO8+N360cx8M/AL4AvAnmW5DTghItYFzgPeC7wF2Oxl+vgWcHNm7gi8GbgbmAj8JTN3ysyTImJvYDi1r2nvBLwlInaJiLdQO+16J2B/YHQDz+l/M3N06e8eoP6Mv6Glj/cA3y/P4UhgcWaOLvs/OiK2aqAfdaF31QVolczJzFvK/YuBTwJnluXLy79vp/ZjKrdEBMA61E6v3Q6YnZkzASLiYuCYlfSxO3AYQGZ2AIsjYuMVttm73P5UlvtQC4e+wFWZ+XTpo5HvYrwxIk6jdijSh9rp28tckZnPAzMjYlZ5DnsDO9TNJ2xY+r6/gb7UCcOgZ1nx3PH65afKvwHcmJnj6zeMiJ1WYx0BfC0zz1mhj0+9gn2dD4zNzOkRcQSwa926lT3fAI7LzPrQICKGvoK+VcfDhJ5li4h4R7l/CPDblWzze+CdETEMICLWj4htgXuBoRGxTdlu/EoeC3AT8LHy2F4RsSGwhNq7/jJTgQl1cxGDImIA8GtgbES8PiL6Ujsk6Upf4OGIWBv40ArrPhARa5WatwbuK31/rGxPRGwbEes30I+6YBj0LPcBx0bEPcDGwPdW3CAzFwJHAJdGxB2UQ4TM/Ae1w4KflwnEBS/Tx/HAbhFxJ/BHYGRmPkrtsOOuiDgjM28ALgFuLdv9BOibmbdTO1yZDkyh9jXurnwR+ANwC7XAqvdXYFrZ17+V5/A/wAzg9vJR4jk4wl0t/NZiD1GGwddm5hsrLkVrKEcGkgBHBpIKRwaSAMNAUmEYSAIMA0mFYSAJgP8HEwxC7USz91YAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_matrix = np.array([[0, 10],[500, 0]])\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_matrix,cmap=\"YlOrRd\")\n", "plt.title('Cost Matrix')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 63, "id": "dramatic-innocent", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQMAAAEhCAYAAAB7tcX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWlElEQVR4nO3deZhU1Z3G8e8riMgmCozQDSqyigRBWWLiGIwGcUWjJC5xnzhK1BhX4mTUmA3HOMGJGpfEZaJxIeqgKAKSGRdUEBFEQXYRmh2VRUzQ9jd/1KEtsWkKpOqyvJ/nqYe6956653e76bfOubVcRQRmZjtlXYCZbR0cBmYGOAzMLHEYmBngMDCzxGFgZoDDwDaDpLMlvbQF9vOupCM287H/LGnaV63BPucwyJik0ySNl7Ra0kJJwyUd8hX3udE/MkmNJA2W9F7qe1ZabvpV+i4WSSGp7brliHgxIjpkWdP2xmGQIUmXAYOBXwN7AnsBtwP9itxvHWA0sD/QF2gEHAwsB3oWs2/bikWEbxncgN2A1UD/GtrsQi4sFqTbYGCXtK0pMAz4EHgfeJFcuP8Z+Az4OO3/qmr2+y/AYqBBDX0PBGYBq4ApwIl5284GXspb3h8YlepYDFyT1t8H/DKvXW9gft7yu8AR6X5P4JV0PAuBW4E6adsLQAAfpWP6fjX72g/4v/T4t4Hj87bdB9wGPJ2OZyzQJuv/A1vbzSOD7BwM1AWeqKHNvwFfB7oCB5D7g/lZ2nY5MB9oRm5UcQ0QEXEG8B5wXEQ0iIj/qGa/RwDPRsTqGvqeBfwzudD6OfCApBbrN5LUEHgOeBYoA9qSG3VsqkrgJ+RC7mDgcGAAuYM6NLU5IB3TI+vVsDPwFDAS+CfgYuBBSfnTiFPScewOzAR+tRk1btccBtlpAiyLiE9raHM6cENELImIpeT+M5+Rtn0CtAD2johPIjeHLvSDJk3IPftuUEQMiYgFEfFZ+uObQfVTiGOBRRFxc0T8PSJWRcTYAuvI7+/1iHg1Ij6NiHeBO4FvFfjwrwMNgEERsTYi/kZu1HRqXpsnImJc+nk/SC5gLY/DIDvLgaaSatfQpgyYm7c8N60DuIncM9xISbMlDdzEvr/0LJ9P0pmSJkr6UNKHQGdyz9rra0VuFPGVSGovaZikRZJWkjuPUujJzDJgXkR8lrduLlCet7wo7/4acuFheRwG2XkF+AdwQg1tFgB75y3vldaRnoEvj4h9geOByyQdntptbITwHHCkpPrVbZS0N3A3cBHQJCIaA28Bqqb5PGDfDfTzEVAvb7l5DTX9AXgHaBcRjchNe6rrrzoLgFaS8v8/7wVUFPh4w2GQmYhYAVwL3CbpBEn1JO0s6ShJ6+b5DwE/k9QsveR3LfAAgKRjJbWVJGAFuTn3umfGxWz4DxRyJxnnAY9J6ihpJ0lNJF0j6WigPrlAWZr6OofcyKA6w4AWki6VtIukhpJ6pW0TgaMl7SGpOXBpDTU1BFYCqyV1BC5cb3tNxzSW3LP9Veln2Bs4Dni4hv5sPQ6DDEXEzcBl5E4KLiX3B3oR8D+pyS+B8cCbwGRgQloH0I7cM/xqcqOM2yPif9O235ALkQ8lXVFNv/8gdxLxHXKvAqwExpEblo+NiCnAzWm/i4GvAWM2cAyrgO+Q++NbRO7cwmFp85+BSeReNRgJPFLNLta5AjiN3Nn+u6tpez1wfzqm761Xw9rU/1HAMnIvz54ZEe/U0J+tR4WfczKz7ZlHBmYGOAzMLHEYmBngMDCzxGFgZoDDwMwSh4GZAQ4DM0scBmYGOAzMLHEYmBngMDCzxGFgZoDDwMwSh4GZAVDT9++VXNOmTWKfvVtlXYZtCn8dxjbl3ffmsWzZ8mq/Tm6rCoN99m7F+DGb8y3blpWIyqxLsE3Q45t9NrjN0wQzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDoMienbkaDp06UXb/Xsw6KZbsi5nh3fLbXfxte6H0vmgQxl8650ADHn8STofdCi16jdn/OsTq9ouX/4+3+57Ig2bteain/z0C/t5fcIkuvT4Fu069+KSy68hIkp6HMVS1DCQ1FfSNEkzJQ0sZl9bm8rKSn506dUMH/oIU94Yw0NDHmfK1GlZl7XDeuvtqfzx3gcY+8KzTBz7N54ePoqZs+bQuVNHHnvoHg495OAvtK9bdxduuHYgN/36+i/ta8CPr+Ku225m+uRXmTlzDs+O/FuJjqK4ihYGkmoBtwFHAZ2AUyV1KlZ/W5txr02gbZvW7Nt6H+rUqcMp/U9k6LDhWZe1w5o6bQY9ux9IvXr1qF27Noce8g0eH/o0+3VsT4f2bb/Uvn79+hzyjV7UrbvLF9YvXLiYlatW8/We3ZHEGaf353+e2j5+r8UcGfQEZkbE7IhYCzwM9Ctif1uVigULadWyrGq5ZXkZFRULM6xox9a5U0deenksy5e/z5o1axg+4jnmza/Y5P1ULFhIy/IWVcsty8tYsGD7+L3WLuK+y4F5ecvzgV5F7M9sg/br2J6rLruII4/7PvXr1+OALp2pVatW1mVtVTI/gSjpfEnjJY1funR51uVsMeVlLZg3f0HV8vyKBZTnPaNY6Z139umMf3kUz48ayu6Nd6N92zabvI/yshbMzxvhza9YQFnZ9vF7LWYYVACt8pZbpnVfEBF3RUT3iOjerFmTIpZTWj26d2PGzNnMeXcua9eu5eEhT3D8MX2zLmuHtmTJUgDemzefJ558htO+/91N3keLFnvSqGEDXh03nojgzw8Ood+x28fvtZjThNeAdpJakwuBU4DTitjfVqV27drc+rtBHHlcfyorP+Pcs05j/04dsy5rh3byaeex/P0P2Hnn2tz6u9/QuPFuPDH0GS65/BqWLlvOsSedTtcunXn2yUcAaN2xOytXrWLt2rUMfWo4I556hE77deC2wTdyzr9ewscf/52+fQ7nqCMPz/jItgwV8zVSSUcDg4FawD0R8aua2nc/qGuMHzO6aPXYlhdRmXUJtgl6fLMP4ydMVHXbijkyICKeAZ4pZh9mtmVkfgLRzLYODgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMcBiYWeIwMDPAYWBmicPAzACHgZklDgMzAxwGZpY4DMwMqOHCq5JWAesu0bzuqq2R7kdENCpybWZWQhsMg4hoWMpCzCxbBU0TJB0i6Zx0v6mk1sUty8xKbaNhIOk64Grgp2lVHeCBYhZlZqVXyMjgROB44COAiFgAeAphtp0pJAzWRkSQTiZKql/ckswsC4WEwaOS7gQaS/oh8Bxwd3HLMrNS2+CrCetExG8lfQdYCbQHro2IUUWvzMxKaqNhkEwGdiU3VZhcvHLMLCuFvJrwL8A44LvAycCrks4tdmFmVlqFjAyuBLpFxHIASU2Al4F7ilmYmZVWIScQlwOr8pZXpXVmth2p6bMJl6W7M4GxkoaSO2fQD3izBLWZWQnVNE1Y98aiWem2ztDilWNmWanpg0o/L2UhZpatjZ5AlNQMuArYH6i7bn1EfLuIdZlZiRVyAvFB4B2gNfBz4F3gtSLWZGYZKCQMmkTEn4BPIuL5iDgX8KjAbDtTyPsMPkn/LpR0DLAA2KN4JZlZFgoJg19K2g24HPg90Aj4SVGrMrOSK+SDSsPS3RXAYcUtx8yyUtObjn7P51+I+iURccmWLmbBhElcv2vTLb1bK6LrP16WdQm2KbThTTWNDMZv8ULMbKtV05uO7i9lIWaWLV9ExcwAh4GZJQ4DMwMK+6aj9pJGS3orLXeR9LPil2ZmpVTIyOBuchdQ+QQgIt4ETilmUWZWeoWEQb2IGLfeuk+LUYyZZaeQMFgmqQ2fX0TlZGBhUasys5Ir5LMJPwLuAjpKqgDmAD8oalVmVnKFfDZhNnBEuqzaThGxamOPMbNtTyHfdHTtessARMQNRarJzDJQyDTho7z7dYFjganFKcfMslLINOHm/GVJvwVGFK0iM8vE5rwDsR7QcksXYmbZKuScwWQ+/16DWkAzwOcLzLYzhZwzODbv/qfA4ojwm47MtjM1hoGkWsCIiOhYonrMLCM1njOIiEpgmqS9SlSPmWWkkGnC7sDbksaR9zJjRBxftKrMrOQKCYN/L3oVZpa5QsLg6Ii4On+FpBuB54tTkplloZD3GXynmnVHbelCzCxbNV034UJgALCvpDfzNjUExhS7MDMrrZqmCX8BhgO/AQbmrV8VEe8XtSozK7marpuwgtwl1U4tXTlmlhV/O7KZAQ4DM0scBmYGOAzMLHEYmBngMDCzxGFgZoDDwMwSh4GZAQ4DM0scBmYGOAzMLHEYmBngMDCzxGFgZoDDwMwSh4GZAQ4DM0scBmYGOAzMLHEYmBngMDCzxGFgZoDD4Ev6/elPXLl4MQMmT65at+vuu3PGyJFcPH06Z4wcSd3Gjb/wmLLu3bn2k0/odNJJVeuOGDSIAZMnM2DyZPb/3veq1p9w7738ePZsLnjjDS544w2aH3BA1bajbrmFS2bM4MJJk2jRrVvV+gPOPJOLp0/n4unTOeDMM4tx2JY8O3I0Hbr0ou3+PRh00y1Zl1NSRQsDSfdIWiLprWL1UQwT77uPB/r2/cK6QwYOZM7o0fy+fXvmjB7NIQM/v8CUdtqJ79x4I7NGjqxa1+7oo2lx4IHc0bUrd/fqxTeuuIJdGjas2j7qyiu5o1s37ujWjUWTJuUec9RR7NGuHf/Vrh1PnX8+x/zhD0AuiHpfdx1/7NWLu3v2pPd1130pjGzLqKys5EeXXs3woY8w5Y0xPDTkcaZMnZZ1WSVTzJHBfUDfjTXa2sx98UU+fv+LV4/r0K8fE++/H4CJ999PxxNOqNrW6+KLmfLYY3y0ZEnVumadOjH3hRf4rLKST9asYfGbb9K2b80/ig79+jHpv/8bgPljx1K3cWMaNG9OmyOPZNaoUXz8wQf8/cMPmTVq1Eb3ZZtn3GsTaNumNfu23oc6depwSv8TGTpseNZllUzRwiAiXgC2i2syNthzT1YvWgTA6kWLaLDnngA0LCuj44knMj49i6+zeNIk2vbty8677kq9Jk1ofdhhNGrVqmr7t3/1Ky6cNIkj//M/qVWnDgCNystZOW9eVZuV8+fTqLx8g+tty6tYsJBWLcuqlluWl1FRsTDDikqrpguvloSk84HzAXbLuJZCRQQAfQcP5rmrr65aXmfWqFGU9ejBeS+/zEdLlzLvlVeIykoAnvvpT1m9aBG16tThuLvu4pCrr+b5X/yi5Mdgtr7MTyBGxF0R0T0iutfLupgNWL14MQ2aNwegQfPmVVOCsu7dOfnhh7l0zhw6nXwyx9x+Ox379QPgxV//mju6dePPffogieXTp+f2lUYYlWvXMvHeeynv2ROAlRUVXxg9NGrZkpUVFRtcb1teeVkL5s1fULU8v2IB5eUtMqyotDIPg23BtCefpOtZZwHQ9ayzmDZ0KAC37Lsvg1u3ZnDr1kz56195esAA3hk6FO20E7vusQcAe37ta+zZpUvVCcZ1oQLQ8YQTWPLWW1V9rHuloGWvXvxjxQpWL1rErBEjaNOnD3UbN6Zu48a06dOHWSNGlOzYdyQ9undjxszZzHl3LmvXruXhIU9w/DE7zvmZzKcJW5uT/vIX9undm3pNm3LZvHn873XX8dKgQfR/9FG6nXceK+bOZUjeS4XVqbXzzpz74osA/GPlSh7/wQ/4LE0TTnrwQeo1a4YkFk2cyLALLgBgxjPP0O7oo7lk5kw+WbOGoeecA8DHH3zAC7/4Bee/9hoAz99wAx9/8EGxDn+HVrt2bW793SCOPK4/lZWfce5Zp7F/p45Zl1UyWn++u8V2LD0E9AaaAouB6yLiTzU9pkyK84tSjRXL9R8vy7oE2wTdv3k441+fqOq2FW1kEBGnFmvfZrbl+ZyBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQEOAzNLHAZmBjgMzCxxGJgZ4DAws8RhYGaAw8DMEoeBmQGgiMi6hiqSlgJzs66jCJoCy7IuwjbJ9vo72zsimlW3YasKg+2VpPER0T3rOqxwO+LvzNMEMwMcBmaWOAxK466sC7BNtsP9znzOwMwAjwzMLHEYmBngMCgqSX0lTZM0U9LArOuxjZN0j6Qlkt7KupZScxgUiaRawG3AUUAn4FRJnbKtygpwH9A36yKy4DAonp7AzIiYHRFrgYeBfhnXZBsRES8A72ddRxYcBsVTDszLW56f1pltlRwGZgY4DIqpAmiVt9wyrTPbKjkMiuc1oJ2k1pLqAKcAT2Zck9kGOQyKJCI+BS4CRgBTgUcj4u1sq7KNkfQQ8ArQQdJ8SedlXVOp+O3IZgZ4ZGBmicPAzACHgZklDgMzAxwGZpY4DHZQknpLGpbuH1/TpyolNZY0YDP6uF7SFYWuX6/NfZJO3oS+9tkRP2m4JTkMtjPp05KbJCKejIhBNTRpDGxyGNi2xWGwjUjPfO9IelDSVEl/lVQvbXtX0o2SJgD9JfWR9IqkCZKGSGqQ2vVN+5gAfDdv32dLujXd31PSE5Impds3gEFAG0kTJd2U2l0p6TVJb0r6ed6+/k3SdEkvAR0KOK4fpv1MkvTYumNKjpA0Pu3v2NS+lqSb8vr+16/6s7Uch8G2pQNwe0TsB6zki8/WyyPiQOA54GfAEWl5PHCZpLrA3cBxwEFA8w308V/A8xFxAHAg8DYwEJgVEV0j4kpJfYB25D6m3RU4SNKhkg4i97brrsDRQI8CjunxiOiR+psK5L/jb5/UxzHAHekYzgNWRESPtP8fSmpdQD+2EbWzLsA2ybyIGJPuPwBcAvw2LT+S/v06uS9TGSMJoA65t9d2BOZExAwASQ8A51fTx7eBMwEiohJYIWn39dr0Sbc30nIDcuHQEHgiItakPgr5LEZnSb8kNxVpQO7t2+s8GhGfATMkzU7H0Afoknc+YbfU9/QC+rIaOAy2Leu/dzx/+aP0r4BREXFqfkNJXbdgHQJ+ExF3rtfHpZuxr/uAEyJikqSzgd5526o7XgEXR0R+aCBpn83o2/J4mrBt2UvSwen+acBL1bR5FfimpLYAkupLag+8A+wjqU1qd2o1jwUYDVyYHltL0m7AKnLP+uuMAM7NOxdRLumfgBeAEyTtKqkhuSnJxjQEFkraGTh9vW39Je2Uat4XmJb6vjC1R1J7SfUL6Mc2wmGwbZkG/EjSVGB34A/rN4iIpcDZwEOS3iRNESLi7+SmBU+nE4hLNtDHj4HDJE0GXgc6RcRyctOOtyTdFBEjgb8Ar6R2fwUaRsQEctOVScBwch/j3ph/B8YCY8gFVr73gHFpXxekY/gjMAWYkF5KvBOPcLcIf2pxG5GGwcMionPGpdh2yiMDMwM8MjCzxCMDMwMcBmaWOAzMDHAYmFniMDAzAP4fyGBIWKEPKQwAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "cost_calc = np.multiply(cm, cost_matrix)\n", "fig, ax = plot_confusion_matrix(conf_mat=cost_calc,cmap=\"OrRd\")\n", "plt.title('Cost Calculation')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "universal-attachment", "metadata": {}, "source": [ "### Precision Recall Curve" ] }, { "cell_type": "code", "execution_count": 64, "id": "suitable-vermont", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Random Forest : f1=0.876 auc=0.952\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "y_score = rf_model.predict_proba(XS_test_ohe)\n", "rf_probs = y_score[:, 1]\n", "# predict class values\n", "rf_precision, rf_recall, rf_thresholds = precision_recall_curve(y_test, rf_probs)\n", "rf_f1, rf_auc = f1_score(y_test, y_hat), auc(rf_recall, rf_precision)\n", "# summarize scores\n", "print('Random Forest : f1=%.3f auc=%.3f' % (rf_f1, rf_auc))\n", "pyplot.plot(rf_recall, rf_precision, marker='.', label='Random Forest')\n", "# axis labels\n", "pyplot.xlabel('Recall')\n", "pyplot.ylabel('Precision')\n", "# show the legend\n", "pyplot.legend()\n", "# show the plot\n", "pyplot.show()" ] }, { "cell_type": "markdown", "id": "lyric-phrase", "metadata": {}, "source": [ "### Cost Curve" ] }, { "cell_type": "code", "execution_count": 65, "id": "mature-viewer", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeVzU1frA8c8ZQBBFXFBLkEFNnSm1XMsWr2aLWantGVouVyqtzBZNqVDvpfplqVmWYeaSU1Y3TS3TzCXbNJdM1BnNlE0tFQUXQLbz+2NgGmAGZmCGWTjv12teOfPdDpU8c855znOElBJFURRF8TYaTzdAURRFUWxRAUpRFEXxSipAKYqiKF5JBShFURTFK6kApSiKonglFaAURVEUrxTo6QYoiqIormHU6T8E7gBO6E3GTg6cfz8wFZDA73qT8SH3ttA5qgelKIriPxYBAxw50ajTtwcmA9fpTcYrgKfd2K5qUT0oRVEUP6E3GbcYdfoY68+MOn07YC7QHMgBxuhNRhMwBpirNxnPlFx7opabWyXVg1IURfFvScCTepOxO/Ac8G7J5x2ADkad/iejTr/VqNM71POqTaoHpSiK4qeMOn1D4Frgc6NOX/pxcMk/A4H2QF8gCthi1Ok7603GrNpupz0qQCmKovgvDZClNxmvsnEsA9imNxkLgCNGnf4g5oC1vTYbWBk1xKcoiuKn9CbjWczB5z4Ao04vjDr9lSWHv8Tce8Ko00dgHvI77Il22iNUNXNFURT/YNTpP8EcdCKAv4EEYCPwHnApEAQs05uM0406vQDexJz1VwQk6k3GZZ5otz0qQCmKoiheSQ3xKYqiKF7J55IkNBqNrF+/vqeboSiK4rdycnKklNLjHRifC1D169fnwoULnm6GoiiK3xJC5Hq6DaCG+BRFURQvpQKUoiiK4pVUgFIURVG8ks/NQdlSUFBARkYGeXl5nm6KW4SEhBAVFUVQUJCnm6IoilJr/CJAZWRkEBYWRkxMDEIITzfHpaSUZGZmkpGRQZs2bTzdHEVRlFrjtiE+IcSHQogTQoi9do4LIcQcIcQhIcQeIUS36j4rLy+PZs2a+V1wAhBC0KxZM7/tHSqKotjjzh7UIuAdYImd47dhLkzYHrgacymOq6v7MH8MTqX8+WdTFMU1Dl5/A0WnTlX4PCAigg4//uCBFtWc23pQUsotwOlKThkMLJFmW4HGQohL3dWeUmfOnOHixYvufoyiKEqtshWcKvvcF3gyiy8SSLd6n1Hymdvk5uZy+PBh9u3bx8mTJ/GmOoS7d+9mzZo1nm6GoiiK1/CJJAkhRBwQB1CvXr1q3UNKSUpKCiFjx6HJzuYUYP29wvFucH61nl+V3bt3sGPHTgYOvMnOGUVuea6iKL6v0Id7SZXxZIA6CrS2eh9V8lkFUsokzNsW06BBg2p1e4QQtG7dmvTsbJvHXdENXrJkKW+8MQshBF26dOY//0lg1KhHOXXqFM2bR7Bw4Xyio6P5/PMvmDbtvwQEBBAeHs53333Dyy9PJzc3lx9//JnJkyfywAP31bg9iqL4F3vzTBLwx5lqTw7xrQIeLsnmuwbIllIed8WNU4c/TNbyFQDIggJShz9M9qpVNGzYsNLrCgoKKDxzhtThD3Nu4yYACk+edOiZ+/bt57//fZWNG9fx++87eOutN3nyyQk88sgw9uzZSWzsUJ566hkApk9PZN26r/j99x2sWvUF9erVY/r0l3nggXvZvXu7Ck6Kothk74u0PwYncG+a+SfAL0BHIUSGEGK0EOIxIcRjJaeswbx74yFgPjDWXW1x1L59+zhrp4dVlY0bN3HfffcQEREBQNOmTfnll2089NCDAAwfHsuPP/4MwHXX9WbEiH8zf/4CiorU0J2iKDV3srDQ5ucFDRrUcktcx21DfFLKoVUcl8A4dzxb+9E/me0iKKjM+8oUFhZy+NQpdO+9a+ltBTZv7vL2zZs3l23bfuXrr7+he/fe7Nz5i8ufoShK3fLH5Bc4EhhIQkICaWlpREdHk5iYSGxsrKebVm2qFp8NR44ccbpnc+ON/fj88y/IzMwE4PTp01x77TUsW/YZAAbDJ9xww3UA/Pnnn1x9dS+mT0+gefMI0tPNlTDOnTvv2h9EURS/sWXp0kqPjx49mkceeYSUlBSKi4tJSUnx6eAEPpLF50oBERG2F7M1a0ZAQABFRUVcvHiRjIwMtFqtw/e94orLiY9/gX/96yYCAgLo2vUq3n57FiNHxjFjxkxLkgTA889P5o8/DiGlpH//flx5ZReio1vz2mszuOqqnipJQlGUMubNm8eP8fHEN2/h6aZYGHX6xsAHQCfMeRqj9CajS4eDhDetBXJEgwYNZPkNC41GI3q9vsb3zszM5MiRI5b3l112GY0bNy53lnvSzKtiNB5Er+/kkWcriuIZhYWFPDt+PHPefReAH9pdRrPAiv2KggYN6LJzh8ueK4TIkVJWOnll1OkXAz/oTcYPjDp9PSBUbzJmuawR1MEeVGWaNm1KVlYWZ86cASA1NZUGDRqoKuKKotQKW2nkjwH3t7uM6/88xNONwxkxYgRvvPGGR+eZjDp9ONAHGAGgNxnzccO3dxWgrAgh0Gq1nD9/noKCAgoKCkhNTaVdu3aqHp6iKG5nL428aWAg9913H4sWLSI0NJQnnnjC3U0JFEJYd8mSStajlmoDnAQWGnX6K4GdwHi9yVh2eKuG/CZJwlVDlYGBgcTExFjeZ2VlWRIfPMXXhmEVRXG9Tz/9lNDQ0Np6XKGUsofVK6nc8UCgG/Ce3mTsClwAXnB1I/wiQIWEhJCZmemyX+Th4eE0t0ovT0tL81iBWfN+UGcICQnwyPMVRfEOXjaKkwFk6E3GbSXv/4c5YLmUXwzxRUVFkZGRwUkHqz44ori4mKysLAoLC6lfvz5//PEHAQEBeKImXkhIAFFRlVfBUBRFqS16k/Evo06fbtTpO+pNxgNAf2C/q5/jFwEqKCjILbvNXrhwgb179zJy5Eirby/HXP4cRVGUwtOV7U7klZ4EDCUZfIeBka5+gF8M8blLr169GDVqlLd1rRUXMiQbiJkdg2aahpjZMRiSDQ4dq43jSt0hi4vJGDuOYo3tX8neWK5IbzLu1puMPfQmYxe9yThEbzKecfUz/GIdVO3yZA+qlQef7X+W7lnKmNVjyCvMs3xWP7A+8weZF1THrY4jpyDHckwgaN6gOQ3rNSQ7L5vTuaeRyDLH5985n9HdRmNINlS4PkAE8C/tv+gV2YuUrBSWm5aTX5Rf4dmxnc0pw4ZkA/Eb4knLTiM6PJrE/omWY4p/SE1NZePGjYwcOdJcoFoWs+qvv4iPj/doGrkj66BqpR0qQDkuNzeXWbOm0bJlc0aPrrTUoJuoAOUs61/yLRu0ZGD7gQQHBvP737/zS/ovZQJMKW24uYJIanaq089r3ag1aRPSiJkdU63ro8OjSX061WaAUwHMvxw7dow+ffqQnZLCxFdf5fnnn/d0kyxUgKomTwWo/fv3c9ttt5GWlkazZk34448fadKkfJUJd1MBqrzKfknb+iXvCFGyeYGt4OXItcUJxWimaWp0vb0ApxEa+rfpT4AIYGPKxjI9sNCgUJLuTFJByssZDAZeeOEFMjIyuLlhGK9eeilxfx3n8927adu2raebB3hPgFJzUA5q27atZS4qM/MM8+Z95OEWKYZkA2NWjSE1OxWJJDU7ldErR1vmcuI3xDsdnMDci4kOj7Z5rFVYKw49eYhWYba/LLQOb225hy1NQprwn37/IaxemN1nA6Rlp9k8XiyLWX94PWv/XFsmOAHkFOQQvyHe8l7NcXmfPd170O0//+XbBg3Z31HHW5GRhGo0JMW08Zrg5E1UgHJQSEgIM2bMICKiKfPmvcbzzz/u6SbVCeV/yS7evZjvU77npY0vMWrlKHILc8ucf7HoouWXtL1f8gBrHlrD27e9TWhQ2YWPoUGhJPZPJLF/os1jr9/8Ou2atuP1m1+3efyV/q8A2L3+7YFv82KfF3nvjvfsPhvsB7iqlP7Mpb1H6+AdtzpOBSkPC7Iz+hOa75kan97OL9LMa8u9997Lrbd2oVEj299+FdcqP0SXmp3KiJUjqryu9Jd0dHi0zWEybbiW29rfBkCT+k0qncexd6z0n+46ntg/scLwZGhQKNP7TqdNkzaMWjmK7IsVN9csDWy2eo85BTnErY4jKzeLrLwsknYlkZ6druavFK+l5qCcprL4akt1Ew204VpSnk6xOQflS/M0zs6vhQSG8MGgD4jtHOv0HFhwQDDz75zP8CuHu/znUMy+/fZbWj813u5xvclYi62pnJqD8hNSSoqLiz3dDJ9lPYQXOTOSsV+PtRyrbIju0e6P8mSvJ6kfWL/M59bDZLGdY0m6MwltuBaBQBuu9ZngBOb2pzydQnFCMSlPp5Rpt62frTQ4gfNDhBeLLjJh3QTLezV/5Vrp6ek89NBDnm6Gz1E9KKf904PaunUnEyZMZcSI+3n00dr45ulfPajSJIfy80jzbp/Hoz0etduDKu0hld5DpVpXZLOHFRDCvZffy9Jk2zuzlmYQOpLirjiuuLiY6667jt+2beO3Dh3tnqd6UBWpHlQ1rVjxDb17D2Lr1l289NIMsrPPerpJPiXjbAaPf/V4heAEMGXjFMB+okFpDwkq72XUZTZ7WIM/4KO7P7Ks8yqvsvmr3MJc4lbH8b/9/yOnIEf1sJyg0Wh47rnnaBceTpGdDkFAREQtt8o3qB6U08w9qNzcXDp27EN6uvn9888/zuuvv+jmZ/t+D2rP33t44+c3+GTvJxQWF9o8p/SbPKgekjtUNTdX1fxVkCaIYllMkSyyeb1i2x9//MG2n35i2IgRnm5KlbylB6UClNP+GeL75JMveeihcYC5YK3RuJl27WLc+GzfC1DWASY4MLhMWSF7rIfwFPeoLPDXNDlFKav4wgWyVnxJk6EPIgJ8Y9scbwlQaoivBh58cDDXXGPeAqWgoICJExOruKJuKb8Wp3xw6tisI8EBwWU+Kz+Ep7hHZUOjtoZWQwJDGNJxCLoInd17Wie11OUhQIPBQHR0NBqNhpiYGL59+WX+Tkwkb7/3zDH5ChWgakAIwezZ0yzvly9fw+bNP3uwRd7FXiWH+oH12Tp6K6YnTCwYvMBns+z8lb0MwRUPrmD/2P1c2vBSm9eVzmHV5UXCBoOBuLg40tPTkVKSmprKPfPmYRw9mvqdO3m6eT5HDfE5reI6qGHDnsRgWA5At26d2b59DRo7ZfNrxvuH+I6fO87UzVMZ2XUk1y641uZchvUck+J7qprDsjdEWFpI15/91FFHUxvb85yWkusOmDzQoupRQ3x+5NVXX6B+/RAAdu1K5osvvvZwi2qP9VBO49cao52tJWlXEs+vf95Sl6686pbxUbxDVevL7K1fSz+bzsT1E0nNcn5+y1fYCk6Vfa5UTvWgnGa7ksSkSYm8/vq7AHTs2I69ezcSGOjqSlLe1YOqqlr4ize8yMytM322koNSPVUlWWiEhm6XdiM9O50TF074TXZmTk4Oqd262z3uTeucqqJ6UH5m0qSxlhp9Bw78yZIl//Nwi9zvuXXP2QxOQZog1g1bx39u/I9PV3JQqsdWkkXpFiZgrsi+49gO/r7wt1/NUT377LOeboLfcWuAEkIMEEIcEEIcEkK8YON4tBBikxDiNyHEHiHEQHe2x52aNm3C888/Znk/deqbXLx40YMtcp8TF04wauUo/rrwl83jhcWF3NLuFkAtpK2LbA0BLh6ymNVDV3NT25tsXlN+qxBf88UXXzBv3jxPN8PvuG2ITwgRABwEbgYygO3AUCnlfqtzkoDfpJTvCSEuB9ZIKWMqu6+3DvEBnDt3nnbtruXkyUwA3nprOk89NdqFz/b8EN8K4wpGrhxps5J2KbUeRqmMvYXApckzRcVFzN0+l5m/zPSJBdqpqalcddVVZGVlsb+j/TR8NcTnPHf2oHoBh6SUh6WU+cAyYHC5cyTQqOTP4Xi2VHiNhYU1JD7+Kcv7xMQ5nD/vyWBac+XXsxhPGTmXf85yPECUXXio1jEpVbGXJFP6+TPrnmH82vE+kaZeWFhIbGwsWVlZgDlbzxZVyqh63BmgIoF0q/cZJZ9ZmwoME0JkAGuAJ93Ynlrx6KPDaN3a3NM5ceIUb731gYdbVH221rMk/pBI/5j+tG/anm9iv2HxXYvVHJPilMpqLEopmbez4lCZtw4BLl26lJ9++gmAexs3ITDxv+hNxgqvDj/+4OGW+iZPJ0kMBRZJKaOAgcBHQogKbRJCxAkhdgghdhQW2q7f5i1CQkJISHjG8n7GjHmcPn3Ggy2qvue/fd7mpnemTBPJjycz4LIBao5JcVplaern8s9V2Mq+VGp2KmsPrcWbMo8ffvhhXn/9dcKCgpgSE0Pr3bs93SS/4s4AdRSwXggTVfKZtdHAZwBSyl+AEKBCX1hKmSSl7CGl7OH61G3Xe+SR++jQoS0A2dlnmTHDtyZPpZQs/G0hx88ft3k842wGwYHBNo8piiPsfbFpFNzIbrV1gNsMt9Fjfg/GfzMe7WytR0opGQwGYmJi0Gg0tG3bllatWvHzrl1cvm4tLZ9/vtbaURe4M0kiEHOSRH/MgWk78JCUcp/VOd8An0opFwkh9MAGIFJW0ihvTpKw9tlnq3jggccBCA2tz9GjO2ncOLyGz3Z/ksT5/POM/XosH+35yO45KglCcaeq1tfZUlvr60pLGeXk/NO2jo0a8dLcucQOG+bWZ9cmR5IkjDp9CnAOKAIK9SZjD5e3w53d5ZK08dlAAPChlDJRCDEd2CGlXFWSuTcfaIg5YWKilPLbyu7pKwGquLiY7t0HEBHRlMTESfTq1dUFz3ZPgLKubB2gCSizDYZAlMm4UgttldpQvtr6M72f4dDpQ8zfNd9uRfza+OIUExNDauo/i5BbBQayuk1blhUV8fqfh9z67NrkRIDqoTcZT7mtHd40nusIXwlQAFlZ2S7oNVlzfYCq7NvqiKtGcEP0DUz/frpPpPsq/u/v839zyZuX2DwmEPw65leubHklQQFBbnm+RqMpMwemAWKbNGH9+fMcz7c9d+aLVICqJl8KUK7n+gClnaUl7WzF2mnN6jfj1ES3/X+nKNVmr5RSVKMozuSeoUWDFtzY5kbWH15Pena6y75Y7d+/n8xBg4mwMQ/ua8Vgq+JggDoCnME8+vW+3mRMcnU7PJ3Fp3jQyQsnbQYngNO5p2u5NYriGHtp6t0u6caFggscyTrCgt8WkJad5rJ1VKdPn2bQoEE2gxP4ZTHYwNLM6ZJXnI1zrtebjN2A24BxRp2+j6sboQJULcnPz2fu3EU8/niFik8e8/Uf9quuq4rjireyl6bev21/mtVvZvOamqyjKiws5IEHHuDPP/+sSbN9TWFp5nTJq0LvSG8yHi355wlgBebiDC6lhvic5vwQ37lz57nyyps5csTcW/n116/p2fOqajzb9UN8tyy5hfVH1pf5TCVCKL7q3MVzNHqtkd3jfzz5B5c1vcype06YMIHZs2cD+E0po6pUNcRn1OkbABq9yXiu5M/rgel6k3GtK9uhelC1ICysIT16dLG8nz3be6pLrBm2hpdueElVg1D8QlhwWKXrqC6fezkDlw4kela0Q2uoFi5caAlOShktgR+NOv3vwK/A164OTqB6UNVQvSQJk+kQ/frdx8SJj/P44w8TEhJSjbtUrwdlSDYwZcMU0rLTaN2oNa/e9KoKQIrfqmodlaNLJ3755Rf69u1Lfkl23l133UXifvu9pLrUg6q1dqgA5azqZ/Hl5+dTr169Gjzb+QBlSDYwZtUYcgtzLZ+pITzF35VfRzW662jW/bmOn9J/snm+9Roqg8HApEmTOHr0n8I3nTt35ueff+bYgAEUncqscH1ARIRf1dtTAaqafDlA1ZzzAar1rNZknM2o8LmqBqHUNVJKAqYH2NzqA2Df2H389u1vFSpFAHw4aRLDJjxDUMsWtdFUj/OWAOX9he38nJQS4aYU1cycTJvBCSAt23Z6uaL4KyEE0eHRdrej7/JeF0L3hZIjywanAEC7fAXHjv+FdvEi9zdUsVBJEh7y2297GThwOJ9+usot9z+Vc4r+S/rbPa7SyJW6qLLt6ItkEecuPwcTgElAAvA0FHWGf6emcMnLL9V6e+s61YPygE8++ZKHHhoHwB9/HOaeewYSFOS60iwnL5yk/5L+JJ9ItnlcbSqo1FWl867W81Nx3eNYf3g9m1M2m0+ymia+JCiQv+4s5GyzMILbtav9Btdxag7KaTWfgzpzJou2ba8lK8u8bXpS0uuMGeNIwkLVc1B/n/+b/kv6s++kuWi8QBDXPY61h9aqenqKYoeUkjueu4MXNhwkIq/i9/bMkCKu2bWfQE3d+E7vLXNQKkA5zTVJEq+99g6TJ78KQGTkJRw69JMDqeeVB6i/zv/FjYtvxHjKnO6qERoWD1nMsC7+sw2AorjDypUrGTJkSKULce96QvL7Y7/Xib3QvCVAqTkoD3nyyVG0bNkcgKNH/+LddxdX+16GZAMxs2No9WYrDmYeBMzB6aO7PlLBSVGqcOLECcaMGVPleVdHXU1wYLDl75snNkusa1SA8pAGDUJ58cXxlvevvPI2Z8+ec/o+pYsSU7NTkUiKZBECwdgeY3mo80OubLKi+B0pJWPGjOHkyZNVnjvl+ikV/r65ohCtYp8KUB4UFxdLTExrADIzz1SrBFL8hvgKK+YlktUHV7ukjYrizxYuXMiqVY5l0naM6Gjz71tOQQ5Pr30aX5su8QUqQHlQvXr1mDr1Gcv7N96Yx6lTjm9zkZmTaXc9k1rnpCiVO3z4MOPH/zOK8cQTT1R5jb2/V6dyTnHth9fyQ6r/VJPwBipAediwYfeg17cHzFXPX3vtHYeuy8rL4palt9g9rtY5KYptBoMBrVZLu3btOH/+PAAdO3bklcmT0TRsaPOagIgIoPK/V1szttJnUR+6vd+NVm+2UnNULqCy+Jzm+lJHy5ev4Z57zJO0ISEhHDr0I5GRl9o405zFd+7iOW7+6Ga2Hd1m836q1p6i2GYwGGyWMpo+fTqPN2/Oybffod3XX1EvJsb29TYK0QZqApHSPP9riy/+fVRZfIrFXXfdRo8eVwKQl5fH9Omz7J57If8Ct398e5ng9O+u/1bbZSiKA+Lj4ysEJ4AFCxbQLC6OmI8NdoMT2N4scdGQRRx66pDdv3M5BTlM/m6yq36EOkX1oJzmnmKx69dv4ZZbhgIQEBCAyfQ9l13Wpsw5uQVNuOOTO9h4ZKPls7kD5zK251i3tElR/I1Go6mQzBAABGs0XCiy3QNy6v7TNDaL0QoExQnFNb5/bVE9KKWMm266gX79rgWgqKiIl19+w3LMkLwc7ayehL4SWiY4zbxlpgpOiuKE6OiKc0hDGzdhzWWXUXDiRM3vb2eOyvpztY7KcSpAeQkhBK+88oLl/SeffMnvv+/DkLycuNUTSTtbtud23+X3MaH3hNpupqL4tMTEREJDyxaLPYSEq68hsHnzmt/fRjFa69qXah2Vc9QQn9Pcux/U4MEjWbXqWwBuv70/yTeZSMs+WuE8tZ+Tojjv4PU3UHTqVIXPXbnhYPnNEq1rX8bMjrG53cclDS/h+LPHXfJ8V/CWIT4VoJzm3gC1d6+JLl1u+mecfKrt83xtTFtRPGn8+PH069ePji/YT1aojS3b7c1RAfSK7MXYHmORSKZunurR4s4qQFWTvwcogOHDn2Tp0uUABE+qx8X6+RXOUT0oRXHMp59+yoMPPghQaTHY2ghQ9npQlfFEmrq3BCg1B+WFpk17jsDAQHr37s5z3R4jNKh+meNqPydFcczRo0d5/PHHPd0MC1tzVAEigAARYPeanIIc4jfEu7tpXkkFKC/Utq2WXbvW8tNPK/nv/ZNIuvN1tOGRap2TojihuLiYkSNHcubMGQC0Wq2HW2R7HdXiuxZz/NnjvNb/NbvXWZdYmrV1Vp3JAlRDfE5z7xDfgVOHeGvbAmbdOtXGvjNVb1ioKIq5YsSTTz5pCU4A33//Pc3jHrV7TW0M8VVFO1trs95f6ZD+jJ9nMHH9xDLHQgJDSLojieFXDq80QcMZdWKITwgxQAhxQAhxSAjxgp1z7hdC7BdC7BNCfOzO9ni7v86fYIBhGO/tWMLAj4eTnXfW001SFJ9jMBj497//XSY4BQYGkp6ebqmpV569z2vbK/1fqTRNPXFLxaH9vMI8HvnyETq83YERX47wqxR2t/WghBABwEHgZiAD2A4MlVLutzqnPfAZcKOU8owQooWUstLVcv7YgzIkL2fyd6+SbrXWKTSoPj+MXEG3SztTWFjIRx99QcOGUdx3330uf76i+JPo6GjS09MrfK7VaklJSan9Bjmpsl6QmCacvl91Eqq8pQcV6MZ79wIOSSkPAwghlgGDgf1W54wB5kopzwBUFZz8UelC3JyC3DKfP97jYbpd2pn9+w9yzz1jMJkOERUVxZ133unA1vCKUjdJKW0GJ4D7c3I59uKLXDp9OkLjvdPvsZ1j7Q7LacO1TmcBunPrHaNOHwDsAI7qTcY7XH1/d/5XigSs/0/JKPnMWgeggxDiJyHEViHEAFs3EkLECSF2CCF2FBYWuqm5njFlw2sVghPA//Z/BUB0dCSnT2cBkJGRwYIFC2q1fYriS95//327x+o3CkMTHOLVwakq9ipVvHPbOzQPtV0Jw81b74wH3DZ55+n/UoFAe6AvMBSYL4RoXP4kKWWSlLKHlLJHYKA7O321z1aVCPPn5uG+hg0b8OKL42nQIJRXXnmF0aNH12bzFMWntG3blgYNKo5MhYaG0mHaNFq+6Nvp2rayAJPuTGJcr3HMGjCr0vkrVzPq9FHA7YDzW4E7yJ2/7Y8Cra3eR5V8Zi0D2CalLACOCCEOYg5Y293YLq8xb8cSu8eiw//J2Hv00WHce+/tXHpp19polqL4rFtuuYV9+/YxfPhwUlJSyMjI4L6YGB4YO467Y/1jaYa9IcDSz1yRxQcECiF2WL1PklImlTtnNjARCKvOAxxqhLtujDnItBdCtMEcmB4EHip3zpeYe04LhRARmIf8DruxTV5jpXZdi8IAACAASURBVGkd49bY/jYXGlSfxP7/JD3Wq1ePSy9tWVtNUxSfptVq2bJlCwCyuJjDgwYRmJzs4VbVjsrmr5xUKKXsYe+gUae/AzihNxl3GnX6vq54oC1uXQclhBiIOcoGAB9KKROFENOBHVLKVUIIAbwJDACKgEQp5bLK7ukPWXyFxYV0fq8/plOHAIgJb02RLCLj7HGiw1uR2P8FYjvfbeNKtQ5KUcq7cOFCmWE9dxaELSgoICMjg7y8vBrdx1uEhIQQFRVFUFBQmc+ryuIz6vSvAsOBQiAEaAQs15uMw1zZPrVQ12muSTM/du4vbjMM43x+Dj+PWknLho6U+jcHKKPRyLPPPsvjjz/OnXfe6ZL2KIovWrFiBU888QSfffYZ1113HQBGnd7u+TVdjHvkyBHCwsJo1qwZ5u/XvktKSWZmJufOnaNNm7KbozqTZl7Sg3rO17L4lEq0CruEH0au4LvhyxwMTmaff/45nTt35ptvvuG5554jP79iIVlF8XcGg4HIyEjuvvtujh07xg033MCqVavc/ty8vDy/CE5g3oOuWbNmXt0bVAGqFhiSlxMzuxeaaVHEzO6FIdlcqbxRcBhtmjiXAtqvXz8aNmwIwMGDB3n33Xdd3l5F8WYGg4ExY8Zw7FjZ0Yy//vqrVp7vD8GplCt+Fr3JuNkdvSdQAcrtShfipmYfLSk/cpS41RMtQcpZERERJCQkWN5PmzaNUzbG2xXFX02ZMoXc3LJrB6WUvPLKKx5qUe2aM2cOer2eWD/JSqyMClBuZmshbk5BLvEb7Fcursq4ceNo3749AFlZWUydOrUmTVQUn5KWZrsyQlpaGsW5FRe9+5t3332X9evXYzBUXWPP1wsbqADlZlUtxK2OevXq8eabb1rez5s3j3379lX7foriK7755hu7x6KjoynMzIQA23sreUtB2Jp47LHHOHz4MLfddhvh4eEMHz6c3r170759e+bPnw/A5s2bueGGGxg0aBCXX365h1tcM/5VlsHLfPib/Yx564W41XHHHXdw00038d1331FUVMQzzzzD2rVr/Wp8XFGs/fHHHwwdOtTmsdDQUBITE6kXFYVuz+8IO0HK1aZOncq0adNccq877riD1atXV3rOvHnzWLt2LZs2beKdd95hxYoVbN26lQsXLtC1a1duv/12AHbt2sXevXsrZOf5GtWDcpO1hzYRt3qizWPlF+JWhxCCmTNnoimpK/btt99W+u1SUXzZ2bNnGTx4MNnZ2QA0adKEqKgohBBotVoWzJnDrecvIPPzay04eYPBgwdTv359IiIi6NevH7/++isAvXr18vngBA4GKCHEeCFEI2G2QAixSwhxi7sb56t2HtvDvZ/FUSSLAIhuFEnrRq1KamdFknTn63YW4jqnc+fOxMXFWd4/88wzFBQU1Pi+iuJtPvnkE4xG8xqmkJAQ1q9fT3p6OsXFxaSkpDAgPJyTc+Zw8c8/PdzS2lV+xKT0va16hL7I0R7UKCnlWeAWoAnmFcTVn+X3Y0fOpHH7xw9zoSAHgOjwSLb+ezVpE7ZTnJBBytO/uiQ4lZo+fTqNGjUC4MCBA7zzzjsuu7eieIu4uDjee+89goKCmD9/Pt27dy9zvPG999L2q9WE6O0v0nWHqVOnIqV0yauq4T1bVq5cSV5eHpmZmWzevJmePXu64aesGaNOX35rcJuf2eJogCoN0wOBj6SU+6w+U0pk5pzmNsMw/r5wEoAmIY1ZG2vg0jD31dFr3rw5L7/8suX91KlT+fvvv932PEXxBCEEjz32GAcOHGDYsH+q6RSeOkV+hjkRKdgPhrSc1aVLF/r168c111zDSy+9RKtWXlkO7RcHP6vA0SSJnUKIb4E2wGQhRBhQ7OC1fs+QvJz4Da+RapWxFxwQzKqhC9E3b+/25z/55JPMnz+fAwcOcPbsWV544QUWLlzo9ucqirsYDAbi4+NJS0sjOjqaxMREYmNjadOmjVtr7fkC612Bu3TpwpIlZXdF6Nu3L3379q3dRtlg1OkvwbwHYH2jTt+Vfzo1jYBQuxdacTRAjQauAg5LKXOEEM2AkU621y/Z2xH30e6xXB/dq1baUK9ePebMmcOtt94KwKJFi3j00Ue55pprauX5iuJKpZUiShfjpqamWuZaY2NjbQYnwO7nisfcCozAvNXSm/wToM4BUxy5gUPFYoUQdwEbpZTZJe8bA32llF863+aa8bZisTGze5XpOZXShkeS8vSvLn525d33u+66iy+/NP8n6dGjB9u2bbNk+SmKr9BqtTYX42q1WlJSUtxaDLYqRqMRfS3Pc7mbrZ/JmWKxVd5fp79HbzJ+UZ1rHf3tlVAanACklFlAQiXn1wlSSrsLbmuyELe6Zs6cSUhICAA7duzgww8/rPU2KEpNVVYpQvFJUUadvpFRpxdGnf4Do06/y6jTO5QF7miAsnVenV7kK6Xk6bUJBAXY/tdQ04W41dGmTRsmTvxn7dXkyZM5c+ZMrbdDUarr/ffft3ssOtq5wsqK1xilNxlLs8Cb4UQWuKMBaocQYqYQol3Jayaws3pt9X1SSqZseI05vy4gv6gAUS6h0RULcatr0qRJlr/Ip06dYsWKFR5ph6I4a9OmTTzxxBM2j5VWilB8knUW+BK9yehwFrijAepJIB/4tOR1ERjnZCP9RuIPb/HaT/+sN+oVeRXR4ZEuX4hbHaGhocycOZMrrriCDRs2MGrUKI+0Q1GccejQIe655x5LcVOtVkvr1q0tlSKSkpJ4cPBg0h99DE3jxjbv4Q+19vzUTqNO/y3mALXOqNM7nAXu0DCdlPIC4JkugZd58+d5vLRphuX9oI638L/7kggKCKrkqtp19913M3jwYAID6/QorOIjsrKyuPPOOy3D0Zdccgk//vgjUVFRZc7LM5nIO3iQ1u++S2i3rp5oqleYM2cO7733Ht26dXOoorkXsGSB603GHKNO73AWeKU9KCHE7JJ/rhZCrCr/qnGzfYgh2UDT/2vKc+v/Y/nslnb/4tN73/Oq4ATmRY0qOCm+oLCwkAcffBCTyQSYyxitXLmyQnACCNHpuGzdWp8KTgaDgZiYGDQaDTExMS4JKM5styGlpLjYs0tW9SZjMeZU8xeNOv0bwLV6k3GPI9dWNcT3Uck/38Ccx17+VScYkg2MWjmKM3n/JBxohIYHrxhMSGCIB1vmuPPnz5OTk+PpZigK8M8v7qCgINatW2f5/MMPP6RXr7LrBwuOH+fMsmVIKRH16tV2U6vNYDAQFxdHamoqUkrLeq6aBKny222MGjWKvn370rZtW+bMmQOYF/J27NiRhx9+mE6dOpGenu6qH6lajDr9a8B4YH/J6ymjTu/Y7pJV1YcCAgCDq+pN1fQVGhoqa1NxcbEMfzVcMpUKL+2sSCnl0Vp8Va/9BoNBtmrVSk6ZMqVa91AUV1q6dKkMDQ2VQJnXkCFDbJ7/96xZ0tS1m8z/669abmlF+/fvL/M+ISGhws9R1Uur1VquHzNmTJljCQkJVbZBq9XKkydPyoSEBNm7d2+Zl5cnT548KZs2bSrz8/PlkSNHpBBC/vLLL9X6maSUErggXfQ7e39H3Z79HXUaq/cB+zvq9jhybZXjQFLKIiGEVghRT0qZ71jM9B9zt88l+2K2zWOeWOvkrNWrV1u2hn7jjTeIjY31+U3MFN8WHx9vsze/a9cuALuljI7cc69flDJy5Xqu22+/neDgYIKDg2nRooWlDqdWq/W2SjKNgdMlfw539CJHJyoOAz+VzDtZyjhIKWc63DwfNbrraJ799lnyiyrGZk+sdXLWHXfcwdVXX822bdto2bKlZT8dRfEUe7+gS4ei/L2UkSvXcwUH/1MUPCAgwJIF6WXbbbwK/GbU6TdhTi/vg4NJd46mmf8JfFVyfljJq6Hz7fQ99YPqM+vWWQRpyiZCeHKtkzM0Gg0ffPABo0ePJjk5md69e3u6SUoddvz4cZo1a2bzmC8uxK1su42lS5cSGlq2Jmr59VxJSUllrpk6dWot/wTupzcZPwGuAZYDXwC99Sbjp45c62iA2i+lnGb9Atxb9KqWGZINxMyOQTNNg3a2FkPyPxOZY3uOZeGQhWjDtV6x1slZnTp14oMPPiA83OGetaK43Pnz57njjjs4deoUQUHlvvD54ULc2NhYkpKS0Gq1ZdZzlQ651xVGnf4uIEdvMq7Sm4yrgDyjTj/EkWsdLRa7S0rZrarPaoM7isUakg3ErY4jp+CfcfHQoFCS7kwitnP5/5k8Oe/k/UOKimJLYWEhgwcPZs2aNYB5KcQll1zCX3/9VWY7DcCjxWCroorFVuP+Ov1uvcl4VbnPftObjFWuF6h0DkoIcRvm1b+RQog5VocaAYXVaay3KZbFPLP2mTLBCSCnIIf4DfE2ApR/WLVqFcnJycTHx3u6KYqfk1Ly5JNPWoITmGvujRkzpsK5+amptdk0pZqMOn0IsAUIxhxH/qc3Ge0VEK92LdeqTjoG7AAGUbb23jlggiMP8FZ/n/+bhbsXkrQziRM5J2yek5btf9WT8/LyGDt2LAsXLkQIwfXXX8+//vUvTzdL8WMzZsxg3rx5lveTJ0+2GZwAAppFIEJCkHl5FY+pUkbe5CJwo95kPG/U6YOAH406/Td6k3GrjXN3GHX6mcDckvfjcLCWa6UBSkr5O/C7EOLjknOjpZQHHP0JhBADgLcwr6X6QEpps4KtEOIe4H9ATynlDkfv7wxDsoEpG6aQnp1O/aD65BXkUVxFOajocN+btK1KcHAwx46ZhymllIwYMYI9e/YQFhbm4ZYp/sR6R1zraYShQ4fy3//+1+Y1UkoCGjZAt/u32mqmUk16k1EC50veBpW87M0XPQm8hLmOqwTW42AtV0fTzAdgriZRD2gjhLgKmC6lHGTvAiFEAOaIeTOQAWwXQqySUu4vd14Y5lXG2xxsi9PKzzGVH84LDQwlvzifwuJ/Ri1Dg0JJ7O9fk7ZgHvtfsGABnTp1Iisri5SUFJ599lmSkpI83TTFT5RWUCi/1qljx44sXLjQ5iaa5zZu4vSHHxI55y0CmzatraYq9gUKIaw7C0lSyjK/JIw6fQDmntBlwFy9yWjzd7jeZKx2LVdHs/imAr2ALAAp5W6gTRXX9AIOSSkPlyzwXQYMtnHef4D/Ayr26V0kfkN8haAEEBwQzOIhizk18RSLhiyyytLT2kmQ8A+RkZG8884/1djnz59fZn5AUWrC3kLcCxculFm3Y03m5SKRaBrWidUrvqBQStnD6lXhG6zeZCwqSX6IAnoZdfpOrm6EowGqQFrtqFuiqvS/SMC6CFRGyWcWQohuQGsp5deV3UgIESeE2CGE2FG6EM0Z9uaS8ovyefjKh6kfVJ/YzrGkPJ1CcUIxKU+n+G1wKvXQQw9xzz33WN7/+9//5vTp05VcoSiOsbcQ9+jRo3avaTRwINqPPkLjQ7X2FDO9yZgFbMI80uZSjgaofUKIh4AAIUR7IcTbwM81ebAQQgPMBJ6t6lwpZVJpJK9OlW57c0n+OMfkKCEE7733Hi1atADMCyjHjh2LI8sOFKUy9hbcln5+8PobMOr0FV5/3NCnNpup1IBRp29u1Okbl/y5PuapHJOrn+PMhoVXYM7c+AQ4CzxdxTVHgdZW76NKPisVBnQCNgshUjCvNF4lhOjhYJscltg/kdCgciu6/XSOyRnNmzdn/vz5lveffvopCxcu9GCLFH+QmJhYaQUFfy9lZM26AEDM7JgyBQB83KXAJqNOvwfYDqzXm4xflT/JqNO3MOr0DUr+XN+o08cbdfrXjDr9pY48xKEAJaXMkVLGSyl7lvRk4qWUVc0ZbQfaCyHaCCHqAQ8Clj2kpJTZUsoIKWWMlDIG2AoMckcWX2znWJLuTKozc0zOGDRoEKNHj7a8f+KJJ9i7d68HW6T4ojyrtHBVQcGsNDkrNTsViSQ1O5W41XEuCVJDhgyhe/fuXHHFFZYEp7Vr19KtWzeuvPJK+vfvD5ird4wcOZLOnTvTpUsXvvjiixo/G0BvMu7Rm4xd9SZjF73J2ElvMk63c+oyoLS21TTMCRVngI8deU6llSSq2pSwsiy+kusHArMxp5l/KKVMFEJMB3ZIKVeVO3cz8FxVAcodlSSc43+VJC5cuECvXr3Yv9+cYKnX69m+fbu3FZxUvNTHH39MfHw8GzdupE2bqnKnvLtSRFXKV12Yunkq076f5tQ9tOFaUp5OASBudRzzd/0zipHwrwSm9p1a5T1Onz5N06ZNyc3NpWfPnmzYsIEePXqwZcsW2rRpYzk+adIkLl68yOzZswE4c+YMTZo0qfRnAtdUkjDq9I8A0zGnmIuSP7+OOT09AXPy3e7KNi+sakKnN+ZEh08wp4ELZxoopVwDrCn32ct2zu3rzL0V12nQoAGfffYZPXv2JDc3F6PRyLhx41i0aJGnm6Z4qdJ1TqlWlR/69+/PDz/8QGRkZCVXKq4oADBnzhxWrFgBmKvAJyUl0adPH8sXhKYlqfrfffcdy5Yts1xXPji52WbMu1/swdyL+htYjTmOjCs5Xun2ClUN8V0CTME8V/QW5omwU1LK76WU39eg4YqXueKKK5g7d67l/eLFi1m8eLEHW6R4K+udYq3l5+fbXOOklFXT5KzNmzfz3Xff8csvv/D777/TtWtXrrrqqqovrGV6kzEVeBtYBywFputNxjTMGeCZepMxTW8yVj9ASSmLpJRrpZSPYE5iOIQ5qeEJl/wEilcZMWIEDz/8sOX92LFjLcN+ilJqypQpNtc5CSG49FL7c9/5GUe5eOiQ3ZJFvljKaGrfqcgEafO19O6lVSZnJd2ZVOYaR4b3srOzadKkCaGhoZhMJrZu3UpeXh5btmzhyJEjAJYlIzfffHOZL55nzpxxwU/tOL3J+B7QDmhnlUSRCQx15Poqq5kLIYKB20tuGIM50eFDKaX9RQ1upOag3Ov8+fP07NkTk8mcMXrFFVfw66+/VsjKUuomKaXdXpIQguJi++XD0h99jDyjkXbfrffZ9U7OVjM3JBuI3xBPWnYa0eHRJPZPrHFy1sWLFxkyZAgpKSl07NiRrKwspk6dSm5uLlOmTKG4uJgWLVqwfv16zp8/z7hx49i5cycBAQEkJCRw991ltwlydzXzmqgqSWIJ5uG9NcAyKaXH07tUgHK/5ORkevXqZcnMGjVqFAsWLKiVZyveq7CwkLi4OLtLEbRaLSkpKXavL/j7BPkpKTS4upebWuh+aruN2lXVgPEwoD3mWnk/CyHOlrzOCSHOur95iid07ty5TCmkDz/8kE2bNnmwRYqnXbx4kQcffNBucKpsw8H8lBSklAS1bOHTwUmpfVVVM1cznnXUqFGj2LRpE59//jmzZs2ib9++nm6S4kHvvvtumTU0ffr0ISUlhfT09AobDh68/gabi24DIiLo8OMPtdZmxfepAKTYJIRg3rx5/Prrr4wdOxYhnFphoPg4g8FATEwMGo2GmJgYIiIiGDLEvEv3hAkT2LRpE6mpqRQXF5OSklJmEW5dqhShuJfzhe2UOqNhw4ZceeWVnm6GUsvKb5eRmprKY489xrvvvsvdd9/NsGHD6vQXFiml3/z83l57U/WgFKdkZWURHx9PQUGBp5uiuImt7TJycnJISEhg+PDhfvPLuTpCQkLIzMz0+l/sjpBSkpmZSUhIiKebYpfqQSkOO378OAMGDGDPnj2kpaWxePFitTDTD9nbLsPe53VJVFQUGRkZnDx50tNNcYmQkBCioqI83Qy7VIBSHPbxxx+zZ4+5bNbSpUsZPXq0Sp7wI1JKZsyYYbd3YG8bDWsXDx9xdbO8SlBQkEP1BhXXUF9/FYc988wzxMXFERAQwOLFi1Vw8iO5ubkMHz6cSZMm2TxeWRp5qXPffcfhO+5A06iRzeO+WClC8awqK0l4G7VQ17OKiorYuXMnvXqp9Sz+4ujRo9x1111s377d8lmHDh3Iycnh6NGjFdLI7SnOzeX0okU0Gz0a4aOVIhQzb1moqwKU0+p2gLLHnzKb6oLSauRpaWkVShSNGTOGd955h3p2goxa5+T/vCVAqSE+pcaysrK49dZb2bJli6ebojjAuhq5lNISnIQQvPPOO7z//vt2gxOodU5K7VE9KKepHpS148ePc+utt5KcnEzDhg359ttv6d27t6ebpVQiJiamwlYZAC1atODvv/+u8npf3nBQcYzqQSl+ITc315Jye/78eW699Va2bdvm4VYplbGXLu4vqdOK/1ABSqmRtm3bsmHDBpo3bw7AuXPnuOWWW8pMuCvexV66uCNp5IpSm1SAUmrs8ssvZ+PGjUSUpBGfPXuWW265hV27dnm4ZQpAQUEBGzdutLxPTEyssL+XI2nkilLbVIBSXKJTp05s2LCBpk2bAubEiZtuuondu3d7uGV1l8FgICoqinr16tG/f39eeuklAGJjY0lKSkKr1SKEQKvVkpSUVGUaedG5c5z57DMCIprZPK7WOSmuppIknKaSJCqze/dubrzxRsvW0s2aNWPjxo106dLFwy2rWwwGA6NHj+bixYtlPp83bx6PPvpote55av58Tr41h7YrvyS4XTtXNFPxUt6SJKEClNNUgKrKrl276N+/P1lZWQBERESwadMmOnXq5OGW1Q2FhYU0a9aMs2cr7ikaHR1tM4PPEbK4mLx9+6nfWf139HcqQFWTClC+Yfv27dx8881kZ2cD0Lx5czZu3KiClJsdP36coUOH8v3339s8Xn5Rri1qIa5SVYAy6vStgSVAS0ACSXqT8S1Xt0PNQSlu0bNnT9atW0dYWBhgTmG+/vrry0zWK661ceNGrrrqKrvBCRzL1FMLcRUHFALP6k3Gy4FrgHFGnf5yVz9EBSjFba6++uoyQSo7O5sBAwawZMkSD7fMvxQVFTF9+nRuuukmTpw4Yfk8KCiozHkqU09xFb3JeFxvMu4q+fM5wAhEuvo5KkApbtW7d2+2bNlCq1bm4cmCggIeeeQRpk+f7hebvnlS6bbsQUFBJCQkWP59tmzZkg0bNrBw4UKnM/UUpUSgEGKH1SvO3olGnT4G6Aq4fIW+moNympqDqo709HRuv/12kpOTLZ+NGDGiyrpvim3lt2Uvpdfr2bhxI5dcckm1761KGSmOJkkYdfqGwPdAot5kXO7qdqgelFIrWrduzQ8//MDNN99s+eyzzz7jjz/+8GCrfFNxcbHNbdkBLly4UO3gVHT2LLKwsKbNU+oIo04fBHwBGNwRnMDNAUoIMUAIcUAIcUgI8YKN488IIfYLIfYIITYIIbTubI/iWeHh4Xz99deMHDkSjUbDsmXLuOKKKzzdLJ+ybds2rrrqKrv19NLT06t13+K8PFJjY/lr2jS7C27VQlyllFGnF8ACwKg3GWe66zluG+ITQgQAB4GbgQxgOzBUSrnf6px+wDYpZY4Q4nGgr5Tygcruq4b4fJ+Ukp07d9KjRw9PN8Vn5Ofn85///IdXXnmF4uJigoKCKCgoqHCeVqslJSWl0nvZSyMXoaG0fvddGlxztauarfgoB9LMrwd+AJKB0nULU/Qm4xpXtiPQlTcrpxdwSEp5GEAIsQwYDFgClJRyk9X5W4FhbmyP4iWEEDaD05YtW9iyZQuTJ08mICDAAy3zLtabCgYGBpYJSBqNhpCQEPLy8iyfOZqlZy9dXObkqOCkOERvMv4IuH2HUncO8UUC1uMNGVSehjga+MbWASFEXGk2SaEaI/dLp0+fJjY2lpdeeombb76ZY8c82VP1vPKbCloHpz59+mA0Gvnggw9Ulp7i19zZg3KYEGIY0AP4l63jUsokIAnMQ3y12DSllsyePZuMjAwAfv/99yqrHfgzKSUTJkywmQTRpEkTNm3ahEajoU2bNiogKX7NnT2oo0Brq/dRJZ+VIYS4CYgHBkkpL5Y/rtQNL7/8Mi+//DIajYYPP/yQqKgoTzfJIw4cOEC/fv3sbh6YlZWFRlP9v7a+tqxEqdvcGaC2A+2FEG2EEPWAB4FV1icIIboC72MOTids3EOpIwIDA5k2bRoHDx5k8ODBFY6X1vTzJ6ULbTUaDTExMRgMBkJCQti6davda2q8qaAKUIoPcVuAklIWAk8A6zCXwfhMSrlPCDFdCDGo5LQZQEPgcyHEbiHEKju3U+qIdja2cfjpp59o3bo1//d//0d+fr4HWuV65eeYUlNTiYuL48cff+Spp55CCEFgYNkR+JqUKirKyqIoKwuh0RDQTO3npPgGVUnCaSrNvDadOXOmzLofvV7P3Llz6devn4dbVjOtW7e2zLlZ02q17N69mxMnTrB9+3ZLFl90dDSJiYkOzznZr0jejA4//ljj9iv+zVu22/CKJAlFsefUqVOEh4db3huNRm688UZiY2N54403alTSxxOys7N56623bAYngLS0NBo3bkzjxo3p0KFDtZMg7Fckz6zW/RTFE1SpI8WrtW/fnl27djFz5kwaNmxo+dxgMNCxY0fefvttvHXpgfUcU3R0NPfccw8xMTEkJCTYvabGc0yK4kdUgFK8XmBgIBMmTMBkMvHAA/8UGjl79ixPPfUUPXv25JdffvFgCysqP8eUnp7O8uXLLbsMg3nBsjW1HYailKUClOIzIiMjWbZsGevXr6dDhw6Wz3fv3s21117L3Xffzf79+yu5Q+154YUXbK5jArjssstYsmQJixcvdvlCW5mfrwq+Kn5DJUk4TSVJeIOLFy/y5ptv8t///pfc3FzL5xqNhocffpipU6ei1Xqu9rBGo7G75qigoKBChp4ryPx80kb/m3qXtSPrk2V2z1NbZihV8ZYkCdWDUnxScHAwU6ZMYf/+/dx///2Wz4uLi1m0aBEdOnRg/PjxZXaYdTVb65hKtW7d2uY1Wq3WJcHp4PU3YNTpy7xMXa4kd+9eQrt1VxXJFb+gelBOUz0ob7Rr1y7i4+NZu3Ztmc8bNGjAhAkTeO6558pkA9aUMo5dxgAAFvZJREFUrQ0DQ0NDLcN0BoOB0aNHc/HiRZvHa0ptKqi4k7f0oFSAcpoKUN7s+++/Z/LkyRWSJsaOHcvcuXNd8gwpJZdcconN3pn1dhfW1cidXcdUFRWgFHdSAaqaVIBSqiKl5KuvvmLKlCns3buXoKAgDh48SExMTI3uW1BQwGeffcbMmTPZtWuXzXOEELVS6FYFKMWdvCVAqYW6it8RQnDnnXcycOBAPvnkE9LS0ioEp8zMTL766iuGDRtmd+8p6x5QeHg4QgjOnDlT6bNrYx1TkR/WJVQUW1SShOK3AgICGDZsGFOmTKlw7PXXX2fEiBF07tyZb7/9tsLx8uuYsrKyygSnwMBAl9bKc4YmLMztz1AUb6AClFLnHDt2jLfffhswl046e/ZshXPi4+NtrmPSaDRMnz6d48ePs2jRIrdtGGgrS8+o03Pw2uvMBV9Vlp5SB6g5KKepOShfd+7cOWbNmsUbb7xB06ZNLZUemjdvzsyZMxkyZAhhYWE21zGpOSalLlBzUIriIWFhYbz88su0aNGC8ePHW7bwOHHiBGPGjGHz5s12F9mqWnmKUntUgFLqlKKiIn7++WeWL1/O22+/TVFRUZnjubm5LFmyxOa19erVY+LEibXRTEVRUHNQip8zGAxotVo0Gg1hYWE0adKEPn36MHv27ArBqVR+fj6NGjWy+fkTTzzBDTfcwJtvvsnhw4fd0uZiOzX8FKWuUXNQTlNzUL7CVrUHR2i1Wvbt28fnn3/O+++/X+kW7J07d+auu+5i8ODBdO3atUKFcnvsbSgogoMJ6dyJ3B077V6r5qAUd/OWOSjVg1J8mnU9vKioKMaPH285Vlkm3qOPPsqkSZMIDQ0tc6w0VbxBgwaMGDGCX375hcOHDzNz5kz69OmDRlP2r0xycjLTp0+ne/fuREdHM27cONatW1emxJEt9jYUlBcv0rDPv9S27IqC6kFVg+pBeYuFCxfy+OOPVwgG77//PnFxcXYriltn4jlbjujkyZOsXr2aFStWsH79eruBqGHDhgwYMACDwUC9evUqHFdZeoo3q6oHZdTpPwTuAE7oTcZObmuHClDOUgGqNlkHkKioKIYPH05gYCCbN29my5YtNq+JiIjg5MmTxMTEkJqaWuG4db28mjh//jxr165l5cqVfP311xWqTHTt2rVCSaQjR45w4cIFAu69z+59VYBSPM2BANUHOA8sUQHKigpQdUd155BKe0hVVRx3pcLCQn766SdWrlzJypUrOXz4MAkJCUydOrXMec8//zzHPviAF1teYvdeKkApnubIHJRRp48BvlIByooKUP7FuocUGRnJfffdx6BBg+jbt6/dHlBVaquiuD1SSoy9r0VYbe9eKlsInk1L5YPW9tdT/S/2Ibp370737t1p2bKlO5uqKDYJIfKBZKuPkqSUSdbnqABlgwpQvsVWgHjwwQc5cuQI8+bNY86cORQUFJS5plu3buzcubPSXWmXLVtGZmYmzz//fK30kJxV2RxTv6wzfNYwjOY2Ni48VVhInz8PWd5HRkbStWtXOnfubHl17NiRoKAgt7RbUUD1oKpNBSjvUlkPxdYQmxACjUZjdw0SmLPsCgoKaNu2bZVzSJ7oITmisgClM+4nPT29QlWK7du306tXryrvHRQUhE6nswSsyy+/nA4dOtC2bVubCRmK4iwVoKpJBajaVVUAGjNmDLm5uZbzAwICuOyyy9i/f7/dAOOICxcusGLFilqbQ3K16mTpnTx5kq+//pqdO3eyc+dOdu/eXebfbVUCAgJo06YNHTp0oGPHjrz66qsEBwc73XZFUQGqmlSAcj17QchWDygoKIi+fftSXFzM5s2b7faE0tLS0Gq1dofoLr30Uk6fPm0zTdsXekj2FtoGNG1Kh59/ckkaeWFhISaTiT179pCcnGx5paWlVXlto0aNyMrKKrNwePfu3cTHx9O2bVuuvfZahg4d6lA7lLrHgSy+T4C+QATwN5CgNxkXuLodqhZfHeDMMFxqaiojR47k5MmTzJ49u0IGXUFBAevXr6/ymQcPHiQ6OtpmDyoqKor09HS7WXbWeyrFxsZ6RUAqz95C26LTp132jMDAQDp16kSnTmW/oGZnZ7N3716Sk5PZu3cvBw4c4ODBg2UCV4cOHSpUtdi7dy9r1qwB4Pjx4xUC1FtvvcWqVauIjIy0vFq1akVERATNmjWzvNT8l6I3GWvl240KUF7AYFhOfPxrpKUdIzq6FYmJLxAbe7ed4xV7EeUD0LRp07j//vupX7++zQA0YsQIZs6cSUhICL/++iuFhYVl2lNQUEBiYiKZmZnV+nlatGhBz549SUxMtBmAXnvtNQDLz+CNPaSaCoiIsN3DckEliPDwcK677jquu+66Mp/n5ORw6NAhDhw4YDOIWNcObNeuXYXju3btYuPGjVU+PywsrEzAatKkCY0bN6ZJkyaWP5dmIVrLy8sjKCjI7g7GilKBlNJtL2AAcAA4BLxg43gw8GnJ8W1ATFX3DA0NldWxdOlSqdVqpRBCarVauXTp0hocj5RLl74tpTwqpTwqly59W2q1kTaPlT8eHR0pFyx4U544sUempv4qZ8x4UYaEBEvA8qpXr5586aWnLdeGhtYvczwgIED26tVLDhw4UOp0OqnRaMocB+SgQYOklFJqtdoKxxx92bu2cePGcuXKlXLGjBkyNDS0zLHQ0NAy/+6q+vfqq/Z31Nl9easjR47IL774Qr7++utyy5YtFY7379+/2v+vlH9NnDixwv3vvfdeCcj69evLFStWVDj+xBP/3979R0dVngkc/z7JhBoCa0IiVEBAEEos9mjLInSVXYusHDxKFS1xj621WrStP7rb7iqnf+jpLhTdU02trS5tI21DC2yllCOW+KMiUISaKlUkgBAQobFAAhEIP0zy7B/3JjsJc+9M5uedmedzzj2ZmffeO8+bSeaZ977vvO89evvtt+u9996r8+bN0wULFujjjz+uTz31lC5evFiXLl2qK1eu1DVr1ujatWt106ZNumXLFm1oaNA9e/ZoU1OTHjlyRE+ePKmdnZ0p+R3mC+CEpjA3xLqlrA9KRAqBncB0YD/wOnCLqm4L2+frwKdU9W4RqQJuUNU5fueNpw/Kqy9l9uzZ3UOaV6xY0WO4c1FREc8884xnX0xhYSGTJl1Ge/tHvPHG23R0/P8idiLC5MmfYePG37FkyQrmzv0P2tpi7+wGKC4+h7a23YwaNYn33jvQp2PBmWrn2LFjvkO1/QwePJjHHnss6iCFoPYRJcpzMtf+/Rm1pJY9N9wY4ShHtn7RdufOnTQ2NnLgwIHurampiebm5u6tpaUlpgUbFyxYwLx583o8NmPGDOrq6gBYvXo1M2fO7FE+ZMgQDh48mJS6PPfcc1x77bU9HpswYQKtra2EQiHq6+spD5vvsKmpiRtvvJFQKERhYSGhUIhQKERBQUH31jUCNXzr2rewsLDH9uijjzJgwIDu8zc3N7Nw4UJEhPLych544IEesW3dupXly5cjImdtQMTHe1/C7bpfWlrK3LlzE/r9BWWy2FQmqCnAw6p6jXt/HoCqfi9snzp3n9dEJAR8AJynPkHFk6AS/cJnPMcXFRVx5szeuBMMgOoBCgqGx5VgnOPVM/aKigqeffZZNm/ezEMPPdRjtFh4EsrVBBSN3yCH8755P4eqf+BZnq0JKhadnZ20trb2SFpHjx7l6NGjHDlypPvnzTffzDXXXNPj2OnTp/Pyyy+jqqxbt44rr7yyR3lJSUmfZw3x8tJLLzFt2rQej5WWltLa2gpAS0sLZWVl3WW7d+/moosuSspzRzv/6NGj2b17d4/9ly1bRlVVVVKee8yYMezatSv6jj6CkqBS2Qc1DHg/7P5+4HKvfVS1XURagXKgx0dXEZkLzAXi+p5HLKOe/I6L5/iu1ti+fd6j/oYPP58PPjh0Vh8QQGmpsx7RiBFDIya4iooKampquPPOOyN+6hw5ciSAZz9QdXU1U6dOZerUqQwdOtQzCQV1kEImVdx9Ny21S1LWxxRkBQUF3X1NfX1Df/HFF+ns7KStrS3i8PeamhqOHz/OsWPHun+eOnXKczt58iSnT5+OWBbp/OH/Z7376CL9Dyaidz9beKsz0pIsqWooZLusGCShzhQbi8BpQfX1eK/RZAMHDuSuu+5i0aJFfPjhhxGP8zu+vLwMEeHw4bNHbg0dOsQ9NnKCGTlyGHv3/iniJcD+/Yt58klnJNv8+Q9GKHcSzHXXXed5Ga5rJFwsAxEsCfXUfuhQ1H3GbVifhkhyT0FBQY9LX+HmzPG9up+wd999l/b2dtrb289aZmXEiBFs3Lixu7xrU1U6Ozt7bF2PdXR0dG/t7e097hcXF/c4f0VFBY888giqSmlp6VmxTZgwoXvexlj7Z8KF3x80aFCSfmMBkKrOLWAKUBd2fx4wr9c+dcAU93YIp+UkfueNZ5BEbW2tb2d+fOXFWlv7w4iDGLrKvAY5hJf3dZBFPAM8TGQ7/uEK38EO2TYIwphkISCDJFKZoEJAI3Ah0A/4C/DJXvt8A3javV0FLI923mwfxRepPPbNJFM8yckSlMkHQUlQKZ1JQkRmAtVAIVCjqvNF5LtAvaquEpFzgF8ClwEtQJWqNnqf0WaSMLHznO2hooIxa37Pzol/73ms3/eY7PKeyXVBGSRhUx31mSWobOE3Eq+wrIyOXgsMhsvlkXjGRBOUBFWQ6QCMyYRBt92W6RCMMVFkxSg+YyLxvIRXXs5599/ne2zF3XdxqLo6VaEZY5LAWlAma3lO2NrczN/+a37EsnBe31nK9e8yGZMtrAVlAstvkMPYV/wnNR39/PPsvvpq331ssIMxwWYtKBNYni2kw4c58K1v+x7bb/gwayEZk+WsBWUyxreFtH6d77Flt1Rx7IUXfPexFpIx2c0SlEkZvwQ0bsN63xZS68rf+Z67ZMqUpMRojAkuS1AmbokkoI5jx3zPXTJlctTnT+WigMaYzMuLBBXtjTRaeb5KJAFpR4fvuTXK7NFFH/941Pjy+bUxJh/kRYLyeyONpdz7jbrM3e/sGQkKK8oYt+E37LzipoTKE5VIco72e/FV4D/+JhS2Vo4XayEZk9/yIkH5OfTjH/uWt9XX+7xRe0+V01XmtU+s5d4JLLbWX6LJOV6R1rzpK2shGZPf8n6Y+eEnfuhb/t6tX4z73Mc31PuWt735jm/56cb3fRLYYT7620HfBHMmykKLratX+5anmg0DN8b4yfsW1Pitb7N9wiWe5SNqfsa+r9wR17nVXVXXy6m3d/iWf7hmrW95yy9+7lveXFPjW35iXWpbKNEu0VkLyRjjJ+9bUBLyz9Eln/1s3OceeNVs3/JBX7rHt7zsC/6J8dxZs/zP/0X/1t/5C7/nWx5NtBbQuA3rqdzecNZmickYE4u8aEFF+yQf1M74UJTnP2fcON/yj40Z41serZ/IWkDGGC8N4ytnAD/AWe/vp5XbGxYm+znyIkFFeyONVh7tjTqR5Jfq5JjI81sCMsZE0jC+shD4ETAd2A+83jC+clXl9oZtyXyevEhQiUrkjTrVyTHRVo4lIWNMHCYBuyq3NzQCNIyvXArMAixB5RNLMMaYDAiJSPgw5EWquijs/jDg/bD7+4HLkx5Esk9ojDEm67Wr6sRMB5H3o/iMMcb02QHggrD7w93HkspaUMYYY/rqdWBsw/jKC3ESUxXwL8l+EmtBGWOM6ZPK7Q3twD1AHdAALK/c3uA/NU4cRFWTfc6UKikp0RMnTmQ6DGOMyVki0qaqJRmPI9sSlIh0AicTOEUI8F/rIXdZ3fOT1T3/JFrvYlXN+BW2rEtQiRKR+iCMTskEq7vVPd/ka91zpd4Zz5DGGGNMJJagjDHGBFI+JqhF0XfJWVb3/GR1zz85Ue+864MyxhiTHfKxBWWMMSYLWIIyxhgTSDmboERkhojsEJFdIvJghPKPicgyt3yziIxKf5TJF0O9/01EtonIWyLysoiMzEScqRCt7mH7zRYRFZGsH4bbJZa6i8gX3Nf+HRH5VbpjTJUY/uZHiMgrIvKm+3c/MxNxpoKI1IjIQRHZ6lEuIvKE+7t5S0Q+ne4YE6KqObfhrPC4GxgN9AP+Alzca5+vA0+7t6uAZZmOO031vgro797+Wi7UO9a6u/sNBNYBm4CJmY47ja/7WOBNoMy9PzjTcaex7ouAr7m3Lwb2ZjruJNZ/KvBpYKtH+Uzg94AAk4HNmY65L1uutqAmAbtUtVFVzwBdi2mFmwX83L39G2CaRFsDPfii1ltVX1HVNvfuJpxZiHNBLK85wH8CjwCn0hlcisVS968CP1LVIwCqejDNMaZKLHVX4O/c2+cCf01jfCmlquuAFp9dZgG/UMcmoFREzk9PdInL1QQVaTGtYV77qGo70AqUpyW61Iml3uHuwPl0lQui1t29vHGBqq5OZ2BpEMvrPg4YJyJ/FJFNIjIjbdGlVix1fxi4VUT2A88D96YntEDo63tCoNhyG3lKRG4FJgL/mOlY0kFECoDHgC9nOJRMCeFc5vsnnFbzOhG5RFWPZjSq9LgFWKyq3xeRKcAvRWSCqnZmOjDjL1dbULEsptW9j4iEcJr+zWmJLnViWkRMRK4GvgNcr6qn0xRbqkWr+0BgArBWRPbiXI9flSMDJWJ53fcDq1T1I1XdA+zESVjZLpa63wEsB1DV14BzgIq0RJd5aVlYMFVyNUG9DowVkQtFpB/OIIhVvfZZBdzm3r4J+IO6vYpZLGq9ReQy4H9wklOu9ENAlLqraquqVqjqKFUdhdP/dr2q1mcm3KSK5e99JU7rCRGpwLnk15jOIFMklrrvA6YBiEglToI6lNYoM2cV8CV3NN9koFVVmzIdVKxy8hKfqraLSNdiWoVAjaq+IyLfBepVdRXwM5ym/i6cTsaqzEWcHDHW+7+BAcD/umNC9qnq9RkLOklirHtOirHudcA/i8g2oAP4d1XN9isGsdb9W8BPRORfcQZMfDkHPowCICK/xvngUeH2sT0EFAGo6tM4fW4zgV1AG3B7ZiKNj011ZIwxJpBy9RKfMcaYLGcJyhhjTCBZgjLGGBNIlqCMMcYEkiUoY4wxgWQJyuQ1ESkXkS3u9oGIHHBvH3WHZCf7+R4WkW/38ZjjHo8vFpGbkhOZMcFjCcrkNVVtVtVLVfVS4Gngcff2pUDUqXDcWUiMMSlgCcoYb4Ui8hN3/aQXRKQYQETWiki1iNQD94vIZ0TkVRH5s4jUdc0WLSL3ha29tTTsvBe752gUkfu6HhRnra6t7vbN3sG4swE86a599BIwOMX1Nyaj7NOfMd7GAreo6ldFZDkwG6h1y/qp6kQRKQJeBWap6iERmQPMB74CPAhcqKqnRaQ07LzjcdblGgjsEJGngE/hfMv/cpy1ezaLyKuq+mbYcTcAn8BZ02gIsA2oSUnNjQkAS1DGeNujqlvc238GRoWVLXN/fgJnEtoX3amjCoGuuc7eApaIyEqcufC6rHYn6T0tIgdxks0VwG9V9QSAiKwArsRZZLDLVODXqtoB/FVE/pCUWhoTUJagjPEWPtN7B1Acdv+E+1OAd1R1SoTjr8VJKtcB3xGRSzzOa/+HxkRgfVDGJGYHcJ67zhAiUiQin3TXn7pAVV8BHsBZzmWAz3nWA58Xkf4iUoJzOW99r33WAXNEpNDt57oq2ZUxJkjsk5sxCVDVM+5Q7ydE5Fyc/6lqnPWWat3HBHhCVY+6lwEjnecNEVkM/Ml96Ke9+p8Afgt8DqfvaR/wWrLrY0yQ2GzmxhhjAsku8RljjAkkS1DGGGMCyRKUMcaYQLIEZYwxJpAsQRljjAkkS1DGGGMCyRKUMcaYQPo/KwDsew38tpcAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.000 189690 0.000000 1.000000 0.401212\n", "1 0.025 184740 0.000079 0.971269 0.418384\n", "2 0.050 175670 0.000472 0.910275 0.454749\n", "3 0.075 169030 0.001652 0.835732 0.498911\n", "4 0.100 160690 0.002754 0.754863 0.546892\n", "5 0.125 155350 0.004327 0.673994 0.594684\n", "6 0.150 149060 0.005744 0.593389 0.642381\n", "7 0.175 146450 0.007553 0.519005 0.686196\n", "8 0.200 150310 0.010307 0.447098 0.728148\n", "9 0.225 161550 0.013926 0.385102 0.763818\n", "10 0.250 187280 0.019670 0.328325 0.795511\n", "11 0.275 219960 0.026200 0.281828 0.820733\n", "12 0.300 250800 0.032337 0.238811 0.844029\n", "13 0.325 286300 0.039024 0.201908 0.863443\n", "14 0.350 344930 0.049174 0.170963 0.877900\n", "15 0.375 407540 0.059795 0.145184 0.889075\n", "16 0.400 485790 0.072777 0.122779 0.897282\n", "17 0.425 567590 0.086310 0.100638 0.905111\n", "18 0.450 679880 0.104485 0.083716 0.907952\n", "19 0.475 798350 0.123525 0.070378 0.908299\n", "20 0.500 925390 0.143902 0.057409 0.907889\n", "21 0.525 1058610 0.165146 0.048026 0.904984\n", "22 0.550 1205000 0.188434 0.039538 0.900723\n", "23 0.575 1388590 0.217545 0.032105 0.893494\n", "24 0.600 1590880 0.249567 0.025726 0.884466\n", "25 0.625 1795290 0.281904 0.019980 0.874933\n", "26 0.650 2015440 0.316680 0.015499 0.863664\n", "27 0.675 2267340 0.356412 0.012336 0.849616\n", "28 0.700 2523820 0.396853 0.009595 0.835033\n", "29 0.725 2832920 0.445555 0.007486 0.816756\n", "30 0.750 3156030 0.496459 0.005430 0.797563\n", "31 0.775 3490280 0.549095 0.004112 0.777234\n", "32 0.800 3835030 0.603383 0.002794 0.756242\n", "33 0.825 4221900 0.664280 0.002109 0.732220\n", "34 0.850 4610780 0.725492 0.001476 0.708040\n", "35 0.875 4940720 0.777419 0.001160 0.687395\n", "36 0.900 5310120 0.835563 0.000633 0.664383\n", "37 0.925 5660580 0.890716 0.000422 0.642381\n", "38 0.950 5941550 0.934933 0.000264 0.624736\n", "39 0.975 6166020 0.970260 0.000105 0.610657\n", "40 1.000 6298510 0.991109 0.000053 0.602323\n" ] } ], "source": [ "plot_metrics_cost_vs_threshold(y_test, rf_probs,0.05,cost_matrix)" ] }, { "cell_type": "markdown", "id": "arranged-stability", "metadata": {}, "source": [ "## Analysis of model performance\n", "

\n", " Table 7.4.1 - Performance Vs Dollar Cost \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy Precision Recall F1 Score Dollar Cost
Logistic Regression 0.70 0.67 0.51 0.58 3,062,500
KNN 0.79 0.85 0.60 0.70 2,531,000
Random Forest 0.90 0.92 0.83 0.87 1,049,500
\n", "

" ] }, { "cell_type": "markdown", "id": "needed-bubble", "metadata": {}, "source": [ "Random forest model provides the best performance by minimizing the false negatives, and provides best (highest) recall. That also is evident from lost $cost for Random Forest.\n", "\n", "**Discussion on model performance**\n", "\n", "**Random Forest** is an ensemble learning method that consists of multiple decision trees. An ensemble learning method ends up providing better metric, as it’s possible to train trees addressing a specific feature space in data set that other trees don’t.\n", "\n", "**Logistic regression** performs the poorest here. Logistic regression has assumption of linear dependence between dependent variable and independent features. It is possible the available data set is not linearly separable, providing poorest model metric.\n", "\n", "**KNN** performs between Random forest and logistic regression. Given the number of feature set and final 'y' variable, it is possible the data set lends itself well into determining output based on how close the features are.\n", "\n", "It is possible that this data could be for financial fraud detection, where y=1 implies fraud detected. If this is true, then it is likely that majority of frauds have a signature in feature set that is common. This common feature set would likely drive better performance for KNN as commonality in feature ties to nearest neighbor approach for determining a fraud. Hence it is better than logistic regression for this problem.\n", "\n", "**Discussion on Dollar cost vs. model metric**\n", " \n", "Dollar cost and model metric tradeoffs are further analyzed by varying the binary classification threshold. \n", "Table 7.4.2 below captures binary classification threshold that minimizes Dollar cost, for a model originally obtained through grid search and hyper parameter tuning, as summarized in Table 7.4.1.\n" ] }, { "cell_type": "markdown", "id": "dependent-legend", "metadata": {}, "source": [ "

\n", " Table 7.4.2 - Threshold selection \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Classification threshold FNR Accuracy Dollar Cost
Logistic Regression 0.006807 0.0 0.4012 189,690
KNN 0.083333 0.0020 0.4641 182,010
Random Forest 0.175 0.0076 0.6862 146,450
\n", "

\n", "





\n", "

\n", "\n" ] }, { "cell_type": "markdown", "id": "conservative-aberdeen", "metadata": {}, "source": [ "**Observations**\n", "\n", "For each of the models, the \\\\$ cost is significantly reduced by using a much lower classification threshold. Compare with default results in Table 7.4.1.\n", "\n", "Using a very low threshold is converting TN and FN to FP (false positives). Since the cost of FP is smaller by 50x than of FN, it is helping lower the actual \\\\$ cost to company.\n", "Above also implies, one need not build any sophisticated model, and classify all new records as TRUE, yet not be too worse off the minimum \\\\$ cost. This is happening as the $ cost of FN = 50x \\\\$ cost of FP, i.e. there is a very high skew.\n", "\n", "Experiments with setting \\\\$ cost of FN = 500, and $ cost of FP = 100, (i.e. much less skew),gave a much higher Accuracy, and higher threshold. In such a scenario, spending time and effort to build a model will likely provide higher return on investment of building a model\n", "\n", "

" ] }, { "cell_type": "markdown", "id": "choice-kinase", "metadata": {}, "source": [ "# Model Interpretability & Explainability" ] }, { "cell_type": "markdown", "id": "acquired-wrestling", "metadata": {}, "source": [ "## Logistic Regression" ] }, { "cell_type": "code", "execution_count": 66, "id": "agricultural-looking", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoMAAAI/CAYAAAAMZtOAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzde5gldX3n8fcHYRAFxJCBSGAcIrokEBylQXFFDV6jeGWUYVEZYkIUWbKbFSVhd6MoWUGzEDRCxguCkkjkIiAKouiCijIDDjLewBuRQGTMxguwoMJ3/6g6cGhO93T3TJ/TXf1+PU8/fbrqd6q+NX/wfPlV1eeXqkKSJEkL02ajLkCSJEmjYzMoSZK0gNkMSpIkLWA2g5IkSQuYzaAkSdICZjMoSZK0gG0+6gLmq9/8zd+spUuXjroMSZKkDbr22mt/UlWLB+1bcM1gkm2BbwKfqKqj2m2LgPcCzwLuA46rqvMmO87SpUtZs2bNLFcrSRu2/rSPjroESRth8RtePevnSHLzRPsWXDMIvB24cty244Dbq+oJSTYDfmP4ZUmSJA1fJ58ZTLJPkq8neXiSRyb5RpI9k+wN7Ah8ZtxX/gj4XwBVdV9V/WTYNUuSJI1CJ2cGq2p1kouAdwBbAR+luTV8BfBq4Dm9sUm2az++PcmzgO8BR1XVj4datCRJ0gh0cmawdTzwXGAMOAk4EvhUVd0ybtzmwM7Al6vqycDVwLsHHTDJEUnWJFmzfv362atckiRpSDo5M9jaHtga2AJ4OLAfsH+SI9vti5LcAfwFcBdwfvu9jwOvG3TAqloFrAIYGxurWa1ekiRpCLrcDP498D+AXYETq+rQ3o4kK4Gxqjq2/ftimjeJrwCeTXNLWZIkqfM62QwmeS3wq6r6hyQPA76c5ICqumKCr7wF+EiSU4D1wOHDqlWSNtYwYikkdVeqvNs5E2NjY2XOoCRJmg+SXFtVY4P2dXJmcJAkjwUuoHlpZgvgPVV1ervvEOAvgQJuBV5tvIyk+WL96R8YdQmaJYtf/8ejLkELQJffJh7vNmC/qloGPAU4NslOSTYH/hb4g6raC/g6cNQI65QkSRqaTjaDg0KngSdU1T3tkC154NrT/jwySYBtaWYHJUmSOq+Tt4kHhU5X1bokuwCXALsBx1TVrQBJ3gDcANwJ3AS8cTSVS5IkDVcnZwZb40OnqaoftbeCdwMOS7Jjki2ANwBPAnaiuU38F4MOaOi0JEnqmi43g73Q6W1oQqfv184IrgP2B5a1275XzavV/wQ8bdABq2pVVY1V1djixYtns3ZJkqSh6HIz2AudPhs4McnOSbYCSPJo4OnAd4B/AX4vSa+7ey7wrRHUK0mSNHSdfGZwUOg0sAfwriRF88LIu6vqhnb824Ark/wKuBlYOZrKJWn6jB+RtDEMnZ4hQ6clSdJ8Yeg0Gwyd/gLwGOD/tcOfV1W3j6JOSZqu208/ddQlaJbs8PqjR12CFoAF0wzyQOj0PUm2BtYluagXLwMcWlVO9UmSpAWlky+QTDN0WpIkacHq5MzgdEOnW2ckuRc4D3hH+TClJElaALo8Ozal0Ol27KFV9fs0uYP7A68ZdEBDpyVJUtd0uRmcaug0VfUv7e9fAP8A7DvogIZOS5KkrulyMzil0Okkmyf5zXb7FsCBNI2iJElS53XymcHphE4neSRwWdsIPgz4LPD+UdUuSdNl/IikjdHJZrCqzgLOaj/fCzyl3XXZgLF3AnsPrzpJkqS5o5PN4CAThU4n2Qa4qm/ozjRvH/+XEZQpSdP2r6e9Y9QlaBp+6w3/fdQlSA+yYJpBJg+dXtYblORa4PxRFSlJkjRMnXyBZKah00meAOzAg2cKJUmSOquTM4MzDJ0GWAGcY+C0JElaKDo5M9iaTuh0zwrgHyc6oKHTkiSpa7rcDE45dBogyROBzavq2okOaOi0JEnqmi43g1MKne4bfwiTzApKkiR1USefGZxO6HTf114FvHD41UqSJI1OfFdiZsbGxmrNmjWjLkOSJGmDklxbVWOD9nVyZnCQJMuA04BtgXuBE6rqnHbfh4FnAj9rh6+sqrWjqFOSpuvWv/vzUZcwqZ3e+L9HXYKkSSyYZhC4C3htVd2UZCfg2iSXVdVP2/3HVNW5I6xPkiRp6Dr5AskEodOLquomuP9t4tsBXwmWJEkLWiebwapaDfRCp0+iDZ3u7U+yL7AI+F7f105oG8iTk2w51IIlSZJGpJPNYOshodMASR4DfAQ4vKruazf/BbA7sA/wG8BbBh3Q0GlJktQ1XW4GHxI6nWRbmuXojquqr/QGVtVt1bgHOAPYd9ABDZ2WJEld0+VmcHzo9CLgAuCs8S+KtLOFJAnwMprVSSRJkjqvk28TTxA6vQJ4BrB9kpXt0F6EzNlJFtOEUa8FXj+CsiVJkobO0OkZMnRakiTNF4ZO92mfG/wm8ImqOqrddinwGJp/j6uAN1bVvaOrUpKm7p9PXT4rx11ytNGr0kLQ5WcGJ/J24Mpx215VVU8E9qTJHnzl0KuSJEkagU42g4NCp5PsmWRvYEfgM/3jq+rn7cfNafIHvXcuSZIWhE7eJq6q1Ul6odNbAR+luTV8BfBq4Dnjv5PkMppImU8D3huRJEkLQidnBlvjQ6ePBD5VVbcMGlxVz6d5bnBL4IBBYwydliRJXdPlZnB86PR+wFFJfgi8G3htknf2f6Gq7gYuBF466ICGTkuSpK7p5G3iVi90elfgxKo6tLejzRkcq6pjk2wNbFNVtyXZHHgRzRvFkiRJndfJZnBQ6HSSA6rqigHDHwlclGRLmpnSzwOnD7FcSZKkkTF0eoYMnZYkSfOFodMbkGQJ8AFgF5pYmRdW1Q9HWpQkTdG3/27gY84PsvsbLxxCJZLmI5vBxlnACVV1efsM4X2jLkiSJGkYuvw28UNMEEa9F7B5VV0OUFV3VNVdIy5VkiRpKBbUzOAEYdS/A/w0yfk0bx5/FjjWtYklSdJCsKBmBlvjw6g3B/YH3gTsQ9Mcrhz0RUOnJUlS1yzEZnB8GPUtwNqq+n5V/Rr4BPDkQV80dFqSJHXNQmwGe2HUZwMnAquB7ZL0ursDaNYxliRJ6rwF9czgoDBq4Jk0t4g/lyTAtcD7R1imJEnS0Bg6PUOGTkuSpPlistDpBXObOMljk1yXZG0bKfP6vn0nJPlRkjtGWaMkSdKwLaTbxLcB+1XVPW2w9LokF1XVrcDFwHuBm0ZaoSTNwNdOf/EGxzzp9RcPoRJJ81EnZwYHhUsDT6iqe9ohW9J37VX1laq6bSTFSpIkjVAnZwYHhUtX1bokuwCXALsBx7SzgpIkSQtWJ2cGW+PDpamqH1XVXjTN4GFJdpzOAQ2dliRJXdPlZnB8uPT92hnBdTQrj0yZodOSJKlrutwMPihcOsnOSbYCSPJo4OnAd0ZYnyRJ0sh1shnsD5cG3kmz5vAewFeTXA/8H+DdVXVDO/6kJLcAj0hyS5K3jqh0SZKkoTJ0eoYMnZYkSfOFodOtJJcm+WmST06w/1SDpyVJ0kLSyWiZSbwLeATwp+N3JBkDHj30iiRpI1296sBJ9+93xMD//5UkoKMzg4NCp5PsWVWfA34xYPzDaBrFNw+9WEmSpBHq5MzgRKHTk3zlKOCiqrotyVBqlCRJmgs62Qy2jgdWA3cDR080KMlOwCuBZ23ogEmOAI4AWLJkySYpUpIkaZQ6eZu4NWHo9DhPolmR5LtJfkgTL/PdQQMNnZYkSV3T5ZnBXuj0rsCJNLeCH6KqLgF+q/d3kjuqarehVChJkjRinWwG+0On25dDvpzkAOBtwO7A1m3I9Ouq6rJR1ipJkjRKhk7PkKHTkiRpvjB0ujVR6HQaJyS5Mcm3kkz4wokkSVKXdPI28SQmCp1eCewC7F5V9yXZYdiFSdJMfeH9L5pw37P+5JIhViJpPurkzOB0Q6eBNwDHV9V9AFV1+1ALliRJGpFOzgzOIHT6ccDBSV4OrAeOrqqbhlCqJEnSSHWyGWxNKXS6tSVwd1WNJXkF8CFg//GDDJ2WJEld08nbxK2phk4D3AKc336+ANhr0CBDpyVJUtd0uRnshU6fTRM6PZlPAH/Qfn4mcOMs1iVJkjRndPI28QxCp98JnJ3kvwJ3AH88qtolSZKGydDpGTJ0WpIkzReGTvdJsm2SW5K8t2/bwW0UzTeSbOiWsiRJUmd08jbxBrwduLL3R5LtacKo966q9UnOTPLsNpNQkua8yz74woHbn/+6Tw25EknzUSdnBicKnU6yN7Aj8Jm+4b8D3FRV69u/PwscNOyaJUmSRqGTM4ODQqeBbwJXAK8GntM3/LvAf0iylCZi5mXAomHWK0mSNCqdbAZb40OnjwQ+VVW3JLl/UFX9e5I3AOcA9wFfplmR5CEMnZYkSV3T5WawFzq9BU3o9H7A/kmObLcvSnJHVR1bVRcDF8P9Dd+9gw5YVauAVdC8TTz7lyBJkjS7utwM9kKndwVOrKpDezuSrATGqurY9u8dqur2JI+mmUF81QjqlSRJGrpONoMThU5X1RUTfOVvkzyx/Xx8VbkCiSRJWhAMnZ4hQ6clSdJ8Yeh0K8mlSX6a5JPjth+Q5Lok69qcwU7OmEqSJI230JqedwGPAP60tyHJZsCZwLOr6sYkxwOHAR8cTYmSND0Xf+gPB25/8R99esiVSJqPOjkzOFHodLuqyC/GDd8e+GXfc4KXY+i0JElaIDo5MzgodLqq1k0w/CfA5knGqmoNsBzYZUilSpIkjVQnm8HW+NDpgaqqkqwATk6yJc1SdQNzBg2dliRJXdPJ28StXuj0NjSh0xOqqqurav+q2he4EhgYLVNVq6pqrKrGFi9evMkLliRJGrYuN4O90OmzgRMnG5hkh/b3lsBbgNNnvTpJkqQ5oJO3iScKnQbeBuwObJ3kFuB1VXUZcEySA2ma49MmCaeWJEnqFEOnZ8jQaUmSNF8YOt2aJHT6qiRr259bk3xiVDVKkiQNUydvE0/iIaHTAFW1f+9zkvOAC4dclyTN2LlnvGDg9uWHXzrkSiTNR52cGZxm6HT/97YFDgCcGZQkSQtCJ2cGpxk63e9lwOeq6uezWqAkSdIc0clmsDWl0OlxDgE+MNFOQ6clSVLXdPI2cWvKodMASX4T2Be4ZKIxhk5LkqSu6XIzOOXQ6dZy4JNVdfesViVJkjSHdPI28QxCpwFWAO8cTcWSJEmjYej0DBk6LUmS5gtDpyVJkjRQJ28TT1eSS4GnAl+sqgNHXY8kTcc/fPj5D/r7P628bIKRkvRQzgw23gW8ZtRFSJIkDduCagZnujKJJElSVy2o28QbsTKJJElSJy2oZrA1k5VJAFcgkSRJ3bOgbhO3prUyST9XIJEkSV2zEJvB6a5MIkmS1FkL6jbxDFcmkaQ5zSgZSRtjQTWDVXUWcFb7+V7gKe2uK0ZWlCRJ0ghtVDOYZBlwGrAtcC9wQlWd0+47AHg3sAi4lma27dcTHGclMFZVR21MPZK0EJ1x5vMe9Pfhh31mRJVImo829pnBu4DXVtUewAuAU5Jsl2Qz4ExgRVXtCdwMHLaR55oVSRbU7KgkSVK/KTeDgwKbgUVVdRNAVd0K3A4spnlj95dVdWP79cuBg6Z4nhcn+WqSryX5bJId2+1vTfKhJF9I8v0kR7fblyZZ1/f9NyV5a/v5T5KsTnJ9kvOSPKLd/uEkpyf5KnBSkpuSLG73bZbku72/JUmSumzKzWBVrQZ6gc0nMS6wOcm+NLeEvwf8BNg8yVi7ezmwyxRP9UXgqVX1JOBjwJv79u0OPB/YF/irJFts4FjnV9U+VfVE4FvA6/r27Qw8rar+HPgocGi7/TnA9VW1for1SpIkzVvTvUU6MLA5yWOAjwCHVdV97bYVwMlJtgQ+Q/NM4VTsDJzTHnMR8IO+fZdU1T3APUluB3bcwLH2TPIOYDuabMH+V+4+3r5EAvAh4ELgFOCPgDMGHczQaUmS1DXTfWbwIYHNSbYFLgGOq6qv9AZW1dVVtX9V7QtcCdw44HiDvAd4b1X9PvCnPDgY+p6+z/fSNLO/Hncd/eM/DBzVHutt4/bd2Vfrj4Afty+97At8elBhhk5LkqSumW4z+KDA5iSLgAuAs6rq3P6BSXZof28JvAU4fYrneBTwL+3nqbx08mNghyTbt+c6sG/fNsBt7e3kQwd++wEfoLld3D9jKEmS1GlTvk08KLAZWAE8A9i+jYcBWFlVa4FjkhxI03CeVlWTZfltzgOzfm8FPp7k32ny/3adrK6q+lWS44FraJrIb/ft/h/AV4H17e9tJjnURTS3hwfeIpakucooGUkbI1U16hpIcjJwU1W9b4Q1jAEnV9X+Uxk/NjZWa9asmeWqJEmSNl6Sa6tqbNC+kWfsJfk0zYsibx3S+bYFvgl8ohdyneQfaaJv/jnJpcCrq+onw6hHkjbW33/k+fd//tPXuDSdpOnZ2NDpaUlyeJK1/T/A96vq2VX1syGV8XaaF1p6NW0OPBvYqap2A74OuBKKJElaEIY6M1hVQ3kmL8k+wAdp3gx+GM3zhAcDW9LE0VwK9KZK0/48Msm/0Syt993ZrlGSJGkuGPlt4tlQVauT9AKyt6J5S/ibNC+kvJomWLo39ldJ3gDcQBM3cxPwxqEXLUmSNAJDvU08ZMcDz6WZATwJOBL4VFXd0j+ojZ15A/AkYCea28R/MeiASY5IsibJmvXrXaBEkiTNf52cGWz1ArK3oAmb3g/YP8mR7fZFSe4AzgOoqu8BJPkn4NhBB6yqVcAqaN4mnu0LkCRJmm1dbgZ7Adm7AidW1f2h020m4lhVHZtkJ+D3kixu1yN+Ls06xpIkSZ3XyWZwUEB2kgMGBV9X1a1J3gZcmeRXwM3AyuFWLEkzZ5yMpI0xJ0Kn5yNDpyVJ0nwxp0OnRy3JY2nWV96M5vnC91TVVNdRlqSRO/XsB0Knjz7UWUJJ07Pgm0HgNmC/qronydbAuiQXVdWtoy5MkiRptnU5WuYhkuyT5OtJHp7kkUm+ATyhqu5ph2zJAvs3kSRJC9uCmhkcFEZdVeuS7AJcAuwGHOOsoCRJWigW4izY+DBqqupHVbUXTTN4WJIdB33R0GlJktQ1C7EZ7IVRb0MTRn2/dkZwHbD/oC9W1aqqGquqscWLF896oZIkSbNtITaDvTDqs4ETk+ycZCuAJI8Gng58Z4T1SZIkDc2CemZwUBg1sAfwriQFBHh3Vd0wyjolaTqMk5G0MRZUM1hVZwFntZ/vBZ7S7vK/pJIkaUFaMM1gkmXAacC2wL3ACVV1TrvvbJoXSn4FXAP8aVX9alS1StJ0nPSPD4ROv/kQ/99W0vQspGcG7wJeW1V7AC8ATkmyXbvvbGB34PdpImf+eDQlSpIkDVcnm8EJwqUXVdVNcP9bw7cDi9u/P1UtmpnBnUdWvCRJ0hB18jbxROHSvf1J9gUWAd/r/16SLYDXAH82xHIlSZJGppPNYOt4YDVwN3B0b2OSxwAfAQ6rqvvGfed9wJVVddWgAyY5AjgCYMmSJbNRsyRJ0lB18jZx6yHh0km2pVl27riq+kr/4CR/RXPb+M8nOqCh05IkqWu63AyOD5deBFwAnFVV5/YPTPLHwPOBQwbMFkqSJHVWJ28TTxAuvQJ4BrB9kpXt0JVVtRY4HbgZuDoJwPlVdfzwK5ek6TNORtLG6GQzOEm49FkTjO/kv4MkSdKG2ARJ0jz31n96IHT6ra9yllDS9MyJZwaTLEtydZJvtPmAB/ftOyDJdUnWJTkzyYQNbJKVSe5LslfftnVJls7uFUiSJM1Pc6IZZILVQZJsBpwJrKiqPWme6ztsA8e6BThuVquVJEnqiKE3g9NcHWR74JdVdWP79cuBgzZwik8CeyT5DwPOfUiSG9rZwhPbba9P8q6+MSuTvHejL1SSJGkeGHozWFWrgd7qICcx+eogPwE2TzLW7l4O7LKBU9zXHvcv+zcm2Qk4ETgAWAbsk+RlwHnAy/uGHgx8bNCBkxyRZE2SNevXr5/C1UqSJM1to7pNfDzwXGCMpnEDHrQ6yOFVdV+7VvAK4OQk1wC/AO6dwvH/AXhqkl37tu0DfKGq1lfVr2nyB59RVeuB7yd5apLtgd2BLw06qKHTkiSpa0b1NnFvdZAtaFYHuXOi1UGq6mpgf4AkzwOesKGDV9Wvk/wN8JYp1vMx4FXAt4EL2iZUkiSp80bVDPZWB9mVZnWQP2fi1UF2qKrbk2xJ09ydMMVzfBh4M81ydADXAKcm+U3g34FDgPe0+y6geenkSUy9gZSkOcE4GUkbYxQvkNy/OgjwTprbt73VQVYmWdv+LGu/ckySbwFfBy6uqiumcp6q+iVwKrBD+/dtwLHA54HrgWur6sJ2378D3wIeW1XXbKJLlSRJmvPiHdGZGRsbqzVr1oy6DEniTee+4P7P715+6QgrkTRXJbm2qsYG7ZsrOYPAJg+fXt83yzhwGbp27NIk6ybaL0mS1GVzbTm6Xvj0TW0UzLVJLgN+ThM+/eyqujHJhTRvAP/fvu9+qare2Pf3OVV11PBKlyRJmn9GNjO4keHT7wbWVdWyvp83DjzRg895TJLV7Xnf1rdr8yRnJ/lWknOTPGLTXq0kSdLcNLJmcAjh0wf33SY+vI2leTywL03o9N5JntGO/Q/A+6rqd2lmIY8cdEBDpyVJUteM+pnB2QyfPqdv1vAM4Hntz9eA62jCpR/fjv1RVfWCpj8KPH3QAQ2dliRJXTPqZwZnNXx6nAD/q6r+/kEbk6XA+FeqfcVakiQtCKNuBocRPt1zGfD2JGdX1R1Jfhv4VbtvSZL92obzPwFf3IhrkqShMk5G0sYYWTPYHz6d5GHAl3kgfHr7JCvboSurai1N+PSBNLe2T5tq+HRPVX0mye8CVycBuAN4Nc3t5u8Ab0zyIeCbwGkbfYGSJEnzgKHTM2TotKS54vALHgidPuPlzhJKeqhNGjo9WTB035hTk9wxk2I3tSSvb2chJUmSNM5MbhMPDIauqp8CtPEvj96URU4kyeHAn43bfH/4dJLNq+r0YdQiSZI0H006MzjNYGjaZ//eBbx5QydO8uIkX03ytSSfTbJju/2t7XJzVyW5OckrkpyU5IYklybZoh23N7CS5pm/HwN/WFXLgD2SnJJkDfBn7fHe1H5nt/Zc17dL2z0uydZJPtf+fUOSl87kH1KSJGk+mrQZnGYwNMBRwEVVddsUzv1F4KlV9STgYzy4gXwccADwEprcv89X1e8D/w94UdsQvgdYXlV7Ax/iwW8XL2rzAP9m3DnPBv6uqp4IPA24DbgbeHlVPRn4A+Bv0r5hMp6h05IkqWumcpv4eGA1TdN0dG9jXzD0YVV1X3vL+JXAs6Z47p2Bc9rjLAJ+0Lfv01X1qyQ3AA8Dek9E3wAspVkxZE/g8rZvexhNY9dzzviTJdkG+O2qugCgqu5ut28B/HW7Gsl9wG8DOwL/Ov4YVbUKWAXNCyRTvE5JkqQ5ayrN4FSDoZ8E7AZ8t23QHpHku1W12wTHfQ/wv6vqoiTPAt7at+8egLbJ/FU98MrzfW3NAb5RVftNcOw7p3BdPYfS3Obeu21Af9hepyRJUudN5W3iXjD02TTB0IsYEAxdVZdU1W9V1dKqWgrcNUkjCPAo4F/az4dNs+7vAIuT7AfN7F6SPSb7QlX9Arglycva72yZ5BFtHbe3jeAfAI+dZi2SJEnz1qQzgzMIhp6OtwIfT/LvwBU0q5BMSVX9Msly4NQkj2qv4xTgGxv46muAv09yPM3qI6+kaXIvbm9JrwG+Pc3rkKSRMltQ0sZYMKHTSZbRrCyyLc0byCdU1Tntvl1pXmLZHrgWeE1V/XKy4xk6LWmu+MMLDwLg0y89b8SVSJqrNmno9DzWy0fcA3gBcEqS7dp9JwInt7e1/x143YhqlCRJGqpZbwaTHJdk7bif42b5nFPOR2xjZA4Aes8/ngm8bDbrkyRJmitmsgLJtFTVCTw4A3DWVdXqJL18xK2YPB9xe+CnVfXrdvctNPEykiRJnTfrzeAITTUfccoHTHIEcATAkiVLNmmxkiRJo9DlZwZ7+Yjb0OYGTpCP+G/Adkl6jfHOPBB58yBVtapd2WRs8eLFs1q8JEnSMHS5GZxqPmIBnweWt5sOAy4ccq2SJEkj0cnbxDPIR3wL8LEk7wC+BnxwBGVLkiQN3YLJGdzUzBmUJEnzhTmDfZJsm+SWJO/t27Z3khuSfDfJqZnOWyWSNGJ/eOEb+cML3zjqMiTNUwuuGQTeDlw5bttpwJ8Aj29/XjDsoiRJkkahk83goNDpJHsm2RvYEfhM39jHANtW1Vfal0nOwtBpSZK0QHTyBZJBodPAN4ErgFcDz+kb/ts0QdM9hk5LkqQFo5PNYGt86PSRwKeq6paZPhJo6LQkSeqaLjeDvdDpLWhCp/cD9k9yZLt9UZI7gL+lCZrumTR0GlgFzdvEs1e6JEnScHS5GeyFTu8KnFhVh/Z2tDmDY1V1bPv3z5M8Ffgq8FrgPcMvV5Ikafg62QwOCp1OckBVXTHBV44EPkzzfOGn2x9JkqTOM3R6hgydliRJ84Wh05LUYS/8xF/ywk/85ajLkDRPzYlmMMmyJFe3eYBfT3Jw374DklyXZF2SM5Ns8NZ2kk8k+crsVi1JkjT/zYlmELgLeG1V7UGz+scpSbZLshlwJrCiqvYEbgYOm+xASbYD9gYeleR3ZrluSZKkeW3ozeCg1UGARVV1E0BV3QrcDiymiYf5ZVXd2H79cuCgDZziFcDFwMeAFX3n/XCS5X1/39H+3izJ+5J8O8nlST7VP06SJKnLht4MVtVqoLc6yEnAR6tqXW9/kn2BRcD3gJ8AmyfpPfC4HNhlA6c4BPjH9ueQKZT0CmAp8HvAa2jyCAdKckSSNUnWrF+/fgqHliRJmttGdZv4eOC5wBhNQwjcv07wR4DDq+q+dq3gFcDJSa4BfgHcO9FBk+wIPB74Yjub+Kske26glqcDH2/P96/A5ycaWFWrqmqsqsYWL148pQuVJEmay0bVDPZWB9mGZnUQkmwLXAIcV1X3v/xRVVdX1f5VtS9wJXDjgOP1vAp4NPCDJD+kmfHrzQ7+mvZ622cRF23C65EkSZqXRt1rEUIAACAASURBVNUM9lYHORs4Mcki4ALgrKo6t39gkh3a31sCbwFOn+S4hwAvqKqlVbWU5kWS3nODP2z/BngJzTJ1AF8CDmqfHdwReNZGXZkkSdI8MvQVSAatDkLTsD0D2L5dKg5gZVWtBY5JciBN43raRKuIJFkKPBbon1X8QZKfJXkK8H7gwiTXA5cCd7bDzgOeDXwT+BFwHfCzTXfFkjS7PvWyvx51CZLmMVcgAZJsXVV3JNkeuAb4j+3zgxNyBRJJkjRfDGUFksmCo/vGnNqLdJljPplkLXAV8PYNNYKSNJe88IJ3jLoESfPYprxN3AuOvinJTsC1SS6rqp8CtPEwj94UJ0pyOPBn4zZ/qareOMH40MyC3jdof1U9a1PUJUmSNN/MaGZwmsHRtM8Gvgt48xSOvTjJeUlWtz//sd3+1iRvao9/Bk0j+7KqWgacBTyzXbLuv7Tjlyb5TpKzgHXALknuSHJyO3v5uSS9+h6X5NIk1ya5KsnuM/l3kSRJmm9m1AxOMzga4Cjgoqq6bQqH/1vg5Krah2a1kQ9MNjjJ3sDhwFOApwJ/kuRJ7e7HA++rqj2q6mbgkcCadtm7/wP8VTtuFfCfq2pv4E3A+yY4l6HTkiSpUzbmNvHxwGrgbuDo3sa+4OjDquq+9pbxK5l6ZMtzgN9r7uwCsG2SrScZ/3Tggqq6sz3/+cD+NM3qzf2ZhcB9wDnt548C57fHfhrw8b5zbjnoRFW1iqZxZGxszDdvJEnSvLcxzWAvOHoLmuDoOycIjn4SsBvw3bbZekSS71bVbhMcdzPgqVV1d//GJPeHRrcePoUa79zA/mqP+dP2drMkSdKCsjFvE08pOLqqLqmq3+oLgr5rkkYQ4DPAf+79kaTXpP0QeHK77cnAru32q4CXJXlEkkcCL2+3DbIZzfrGAP+JZtm6n9OsWPLK9thJ8sSp/ANIkiTNdzN9geT+4GjgncA+PBAcvTLJ2vZnJrNtRwNj7Qsq3wRe324/D/iN9mWVo2iXpauq64AP0+QDfhX4QFV9bYJj3wnsm2QdcADNrW6AQ4HXtYHU3wBeOoO6JWkkPvXy/z7qEiTNYwsqdLrNONyJZrWRT1TVUe32E4DXAo+uqsmeT7yfodOSJGm+GEro9DzyduDKcdsuBvYdQS2StNFedMG7Rl2CpHlsZM1gkuP6bif3fo7bRMd+SA5ikj2BZwI70jyXeL+q+soUY28kSZI6ZVOuQDItVXUCcMIsHXt1kl4O4lY0MTLfBK4AXk0TXyNJkrTgjawZHILxOYhHAp+qqlv68gSnJckRwBEAS5Ys2URlSpIkjU6Xm8HxOYj7AfsnObLdvijJHVV17FQPaOi0JEnqmi43g70cxF2BE6vq0N6OJCuBsek0gpIkSV3UybeJB+UgJjlgkvEnJbmFZnWUW5K8dUilSpIkjdSCyhnclMwZlCRJ84U5g5IkSRpoTjaDSZYlubrNB/x6koP79h2Q5Lok65KcmWTC5x6TrEzy3uFULUmj8aLzTx11CZLmsTnZDAJ3Aa+tqj2AFwCnJNkuyWbAmcCKqtoTuBk4bIR1SpIkzWsjbwYHrRYCLKqqmwCq6lbgdmAxTVzML6vqxvbrlwMHTfE8H06yvO/vO9rfz0ryhSTnJvl2krMz0yBCSZKkeWbk0TKDVgupqnW9/Un2BRYB3wMK2DzJWFWtAZYDu2yCMp4E7AHcCnwJ+I/AF8cPMnRakiR1zchnBlvHA88FxoCTehuTPAb4CHB4Vd1XzavPK4CTk1wD/AK4dxOc/5qquqWq7gPWAksHDaqqVVU1VlVjixcv3gSnlSRJGq2Rzwy2xq8WcmeSbYFLgOOq6iu9gVV1NbA/QJLnAU+Y4jl+Tdv8ts8eLurbd0/f53uZO/8ukiRJs2quzAz2Vgs5GzgxySLgAuCsqjq3f2CSHdrfWwJvAU6f4jl+COzdfn4JTeMpSZK0oI18Bqx/tZAkDwO+THMr+BnA9u3ScQArq2otcEySA2ka2dOq6opJDr85D8z6vR+4MMn1wKXAnZv+aiRp+C55xdGjLkHSPNbpFUiSnAzcVFXv28C4k4AX0TSYlwN/Vhv4h3EFEkmSNF8syBVIknwa2Ivm1vNk455G8/bwXsCewD7AM2e9QEnaBF50/mmjLkHSPNeJZjDJ4UnW9v8A36+qZ1fVz/rGDco0fBjNSyuLgC1pniX88UguRJIkachG/szgplBVZwBnTGHcoEzDq5J8HrgNCPDeqvrWrBYsSZI0R3SiGZym44HVwN3A0Ul2A34X2Lndf3mS/avqqvFfNHRakiR1TSduE09TL9NwG5rbwy8HvlJVd1TVHcCngf0GfdHQaUmS1DULsRl8UKYh8M/AM5NsnmQLmpdHvE0sSZIWhAV1m3iCTMMLaNY9voFm7eNLq+riEZYpSZI0NJ3OGZxN5gxKkqT5YkHmDA6S5NIkP03yyXHbP5jk+jZ25twkW4+qRkmSpGFaUM0g8C7gNQO2/9eqemJV7UXzDOFRwy1LkmbmRed9YNQlSJrnOtkMDgqXTrJnVX0O+MX48VX18/Z7ockf9N65JElaEDr5AskE4dLrJvtOkjOAFwLfBP7b7FcpSZI0ep2cGWwdDzwXGANO2tDgqjoc2IkmVubgQWOSHJFkTZI169ev35S1SpIkjUSXm8Hx4dIbVFX3Ah8DDppgv6HTkiSpU7rcDI4Plx4ojd16n4GXAN8eSoWSJEkj1slnBgeFSyc5AHgbsDuwdZJbgNcBlwNnJtkWCHA98IYRlS5JkjRUhk7PkKHTkiRpvjB0Gkjy2CTXJVnbRs28vm/foiSrktyY5NtJBj4zKEmS1DULphkEbgP2q6plwFOAY5Ps1O47Dri9qp4A/B7wf0ZUoyRNy4HnnTnqEiTNc51sBgeFTgNPqKp72iFb8uBr/yPgfwFU1X1V9ZMhlyxJkjQSnXyBZKLQ6SS7AJcAuwHHVNWtSbZrv/b2JM8CvgccVVU/HkXtkiRJw9TJmcHWQ0Knq+pH7frDuwGHJdmRpiHeGfhyVT0ZuBp496ADGjotSZK6psvN4ISh01V1K7AO2B/4N+Au4Px298eBJw86oKHTkiSpa7rcDD4odDrJzkm2AkjyaODpwHeqyda5GHhW+71n06xPLEmS1HmdfGZwUOg0sAfwriRFEy797qq6of3KW4CPJDkFWA8cPoq6JUmShs3Q6RkydFqSJM0Xhk5LkiRpoAXXDCbZNsktSd47YN9FSdaNoi5JmokDzz171CVImucWXDMIvB24cvzGJK8A7hh+OZIkSaPTyWZw0AokSfZMsjewI/CZceO3Bv6cJqRakiRpwejk28SDViChiYu5Ang18JxxX3k78Dc0eYOSJEkLRidnBlvjVyA5EvhUVd3SPyjJMuBxVXXBhg7oCiSSJKlrOjkz2OqtQLIFzQok+wH7Jzmy3b4oyR3AzcBYkh/S/HvskOQLVfWs8QesqlXAKmiiZYZxEZIkSbOpy81gbwWSXYETq+rQ3o4kK4Gxqjq23XRau30p8MlBjaAkSVIXdbIZHLQCSZIDquqKUdcmSZvSJ5cfuuFBkjQJVyCZIVcgkSRJ88WcX4EkybIkV7cRMF9PcnDfvgOSXJdkXZIzk0w4m5lkZZL1Sb6W5KYklyV52nCuQpKG68Bzz+HAc88ZdRmS5rk50QzSRLq8tqr2AF4AnJJkuySbAWcCK6pqT5qXPQ7bwLHOqaonVdXjgXcC5yf53dksXpIkab4aejM4KBAaWFRVNwFU1a3A7cBimjeCf1lVN7Zfvxw4aKrnqqrP07z9e0R77scluTTJtUmuSrJ7kkclubltPGlr+lGSLTbZRUuSJM1RQ28Gq2o10AuEPgn4aFXdvx5wkn2BRcD3gJ8Amyfp3eNeDuwyzVNeB+zefl4F/Oeq2ht4E/C+qvoZsBZ4ZjvmQOCyqvrVdK9NkiRpvhnV28THA6uBu4GjexuTPAb4CHBYVd3XblsBnJxkS5pl5O6d5rnSHmdr4GnAx5P09m3Z/j4HOBj4PLACeN/AAyVH0M4yLlmyZJplSJIkzT2jagbHB0LfmWRb4BLguKr6Sm9gVV0N7A+Q5HnAE6Z5ricB36KZBf1pVS0bMOYi4K+T/AawN82ydQ9h6LQkSeqaUb1A0guEPhs4Mcki4ALgrKo6t39gkh3a31sCbwFOn+pJkjyTZibv/VX1c+AHSV7Z7kuSJwJU1R00M5V/SxM6Pd3ZR0mSpHlp6DODgwKhaW7NPgPYvl0dBGBlVa0FjklyIE3jetoUgqMPTvJ04BHAD4CDqupb7b5DgdOS/HeaWcmPAde3+84BPg48axNcpiTNuk8uP3jDgyRpAwydniFDpyVJ0nwx50OnhyHJY9vw6rVtuPXr2+2PSHJJkm+329856lolaSpefO75vPjc80ddhqR5bl6uTZzkcODPxm3+UlW9cZKv3QbsV1X3tG8Wr0tyEfBT4N1V9fn22cXPJfnDqvr07FQvSZI0d8zLZrCqzgDOmGh/kn2ADwL7Ag8DrgEO7ssz3JJ2VrSq7qKJlKGqfpnkOmDn2atekiRp7piXzeCGVNXqdtbvHcBWtMHWSXahia/ZDTimXe3kfkm2A15M81axJElS53X5mcHjgecCYzQrnVBVP6qqvWiawcOS7NgbnGRz4B+BU6vq+4MOmOSIJGuSrFm/fv2sX4AkSdJs63Iz2Au23oYm2Pp+7YzgOtow69Yq4KaqOmWiA1bVqqoaq6qxxYsXz0LJkiRJw9XlZnB8sPXOSbYCSPJo4OnAd9q/3wE8CvgvI6pVkiRpJDr5zOAEwdZ7AO9KUjTrFb+7qm5IsjNwHPBt4Lp23eL3VtUHRlS+JE3JxctfMeoSJHVAJ5vBqjoLOKv9fC/wlHbXZQPG3kLTHEqSJC04Xb5NLEmd9pJzL+Yl51486jIkzXNzphlMsizJ1e0qIF9PcnDfvgPa1UPWJTmzffN3ouOsTFJJntO37WXttuWzfR2SJEnzyZxpBoG7gNdW1R7AC4BTkmyXZDPgTGBFVe0J3AwctoFj3QCs6Pv7EOD66RQzWcMpSZLUFSNpBpPs087+PTzJI5N8A1hUVTfB/dEvtwOLaSJifllVN7Zfvxw4aAOnuArYN8kW7dJzuwFr+87/P5OsbmcaV6V9ayTJF5KckmQND13uTpIkqXNGMvs10Qohvf1J9gUWAd8DCtg8yVhVrQGWA7ts6BTAZ4Hn00TGXATs2rf/vVV1fHuujwAHAr0HbxZV1diggyY5AjgCYMmSJVO/YEmSpDlqlLeJH7JCCECSxwAfAQ6vqvuqqmhu+Z6c5BrgF8C9Uzj+x9rvraBZWaTfHyT5apIbgANoYmd6zpnogIZOS5Kkrhnlc3G9FUK2oFkh5M4k29KsHXxcVX2lN7CqrqZdLSTJ84AnbOjgVXVNkt8H7qqqG9s7wSR5OPA+YKyqfpTkrTx4hZI7N8G1SZIkzQujbAZ7K4TsSrNCyJ8DFwBnVdW5/QOT7FBVtyfZEngLcMIUz3EscPe4bb3G7yft84TLgXORpHnmouUvHnUJkjpgJM3gBCuErACeAWyfZGU7dGVVrQWOSXIgzW3t06rqiqmcp6o+PWDbT5O8n2Zt4n8FVm/0BUmSJM1TaR7J03SNjY3VmjVrRl2GpAXspedeCsCFy18w4kokzXVJrp3oBdmR5wwaNi1JkjQ6I28GmWHYdJLDk6zt/dC8nfxvbGTYtCRJ0kIy1GZwU4ZNV9UZVbWs9wP8T5pYmGmFTSd5XJLr+sY8vv9vSZKkLhtqM1hVq2kCoN9Bky04Wdj0T2jDptvd0w2bfml7rn7vrap92pnGrYADq+p7wM+SLGvHHA6cMejgSY5IsibJmvXr10/pmiVJkuayUdwmnoth0x8ADm/fbD4Y+IdBBzZ0WpIkdc0oomXmYtj0ecBfAVcA11bVv22KC5UkSZrrRtEMzrmw6aq6O8llwGnA62ZwTZI0dEbKSNoUhtoMzvGw6bOBlwOfmf6VSZIkzU+GTreSHEcz+3hWVR21ofGGTksatZed+zkAPrH82SOuRNJcN1no9CjXJp4zklxA82zi5aOuRZIkaZjmQuj0tIwPm25//m6K331IzmGSPWmibj4LXDyrxUuSJM0x825msKrOYIIcwCl8d3WSXs7hVsBHgW/SvEX8auA5k3xdkiSpc+ZdM7gJHE/z8sjdwNHAkcCnquqWXgzNRJIcARwBsGTJklkuU5IkafYtxGZwfM7hfsD+SY5sty9KckdVHTv+i1W1ClgFzQskwytZkiRpdizEZvBBOYdVdWhvRxttMzaoEZQkSeqiBdUMDso5THLAVPMLJWkuMVJG0qZgzuAMmTMoSZLmi8lyBuddtIwkqfHy877Iy8/74qjLkDTPzblmMMmyJFe3GYBfT3Jw374DklyXZF2SM5Ns8DZ3kk8k+crsVi1JkjQ/zblmELgLeG1V7QG8ADglyXZJNgPOBFZU1Z7AzcBhkx0oyXbA3sCjkvzOdIqYSqMpSZI03420GRy0IgiwqKpuAqiqW4HbgcU0kTC/rKob269fDhy0gVO8gmZVkY8BK/rO++EkpydZk+TGJAe221cmuSjJFcDnNuW1SpIkzUUjnf0atCJIVa3r7U+yL7AI+B5QwOZJxqpqDbAc2GUDpziEJmT6x8B5wF/37VsK7As8Dvh8kt3a7U8G9qqq/zv+YIZOS5KkrpkLt4mPB54LjAEn9TYmeQzwEeDwqrqvmteeVwAnJ7kG+AVw70QHTbIj8Hjgi+1s4q/adYh7/qk97k3A94Hd2+2XD2oEoQmdrqqxqhpbvHjxTK9XkiRpzpgLzWBvRZBtaFYEIcm2wCXAcVV1/8sfVXV1Ve1fVfsCVwI3Djhez6uARwM/SPJDmpnAQ/r2j8/U6f1954yvRJIkaZ6ZC81gb0WQs4ETkywCLgDOqqpz+wcm2aH9vSXwFuD0SY57CPCCqlpaVUtpXiRZ0bf/lUk2S/I44HeA72yi65EkSZo3RvrM4KAVQWgatmcA27fLwwGsrKq1wDHtyx6bAadNtHJIkqXAY4H+WcUfJPlZkqe0m/4ZuAbYFnh9Vd2dZFNfoiTNmgsOevqoS5DUAQtyBZIkHwY+OX7mcTpcgUTSqB103jUAnHfQviOuRNJcN29WINlUgdNtRMx7N3C6xUnWbWCMJElSp82pZpAZBE4nOTzJ2v4f4DWTnaSqVgKfns0LkSRJmg9G1gxuqsDpqjqjqpb1/9BE0vTO8+Eky/v+vmNALVcmWdb39xeTPHHTX7UkSdLcMrJmsKpWA73A6ZOYPHD6J7SB0+3uqQROT8cHgZXteZ8APLyqrh8/KMkR7aola9avX78JTy9JkjQao75NPCuB0zPwceDAJFsAfwR8eNAgQ6clSVLXjDRahgcCp7egCZy+c7LAaWB/gCTPA54wxXP8mrbpbZ89XDR+QFXdleRy4KU0YdV7z/SCJEmS5pNRzwzOVuB0vx/yQHP3EprGc5APAKcCq6vq36dxDZIkSfPWyGYGZytwurU5cE/7+f3AhUmuBy5lguXmquraJD8HztjIS5OkoTBfUNKm0MnQ6SQnAzdV1fv6ti0DTqNZceRe4ISqOqfd90HgqcBuwMU0DehD3jruZ+i0pFF75XlfB+DjB+014kokzXXzJnR6U0jyaWAvmlvP/QZmGLb7vkrTJL6aZpm6o4ZUriRJ0kjN62ZwgsDp7wPHAldNMcOw95bwLsC5wFZA96ZLJUmSBhj128QbparOYIJn/JL0Mgy3YvIMw962M4AXAt8E/tssli1JkjRnzOuZwQ2YUoZhb3tVHQ7sBHwLOJgBDJ2WJEld0+VmsJdhuA1NhiETZRj2VNW9wMdol7obsN/QaUmS1CldbganlGGYxm69zzRZhN8eQb2SJElDN6+fGZzIdDIMga8DZ7azhgGuB94w9KIlSZJGoJM5g8NgzqAkSZovFlTO4ESSLEtydZJvJPl6koP79iXJCUluTPKtJEePslZJmoqDz/8uB5//3VGXIWme6+Rt4gn0QqdvSrITcG2Sy6rqpzS3i3cBdq+q+3rrIEuSJHVdJ2cGk+zTzv5NKXSa5hnB43tRM1V1+0gKlyRJGrJOzgxW1epphk4/Djg4ycuB9cDRvcZRkiSpyzrZDLaOB1YDdwP3PwPYFzp9WF/o9JbA3VU1luQVwIeA/ccfMMkRwBEAS5Ysmd3qJUmShqCTt4lb0wmdvgU4v/18AbDXoAMaOi1Jkrqmy83glEKnW58A/qD9/EzgxqFVKUmSNEKdvE08ndDpqloLvBM4O8l/Be4A/ngEZUuSJA2dodMzZOi0JEmaLwydliRJ0kBzohmcbHWQvjGnJrljE55zpyTjnx2UpHnh6At+dP+PJG2MufLM4GSrg5BkDHj0pjxhGzy9fFMeU5Ikab4Z+szgdFcHaV8AeRfw5ikc+8VJvprka0k+m2THdvszk6xtf76WZJskS5Osa/cvTXJVkuvan6fN0uVLkiTNKUOfGZzB6iBHARdV1W1JNnT4LwJPrapK8sc0DeR/A94EvLGqvpRka5og6n63A8+tqruTPB74R+AhD1kaOi1JkrpmVLeJp7Q6SHvL+JXA/2/v3oM1qes7j78/AYbCwkGE4aLDCEpYNiKOckRJxCgX7/G+isVlZiPBiBQqFVe3sMoNSBU38S6GJesKS1ZWEERAUEEWL2AYlOUaHHTJMoAwJiAYBCN894+njzlz5nnOHI4z3XP6eb+qTk0/3b9uvqe6pvjO7/f0p18+y+suBs5trrMA+L/N/u8DpyU5B/hqVa2a1lhuBnw2yVLgcWC3YRevqjOAM2DwNPEsa5IkSdpodfUAyWzfDvICYFfgjiR3Ak9JcscM1/0M8Nmqeh7w7slrV9WJDLIDtwC+n2T3aed9ALgPeD6DGcEFv+8vKEmSNB90NTM4+XaQXRi8HeQYhrwdpKouAXaY/JzkV1W16wzX3Qq4u9leNuW851TVTcBNSV4E7A7cMO28Vc1s5DJgk9/rt5MkSZonWm8G5/B2kCfjvwBfSfIAcCWDZhPg/UleATwB3AJ8A9hxynmfB85varsM+Jcn/YtJUos+/eadui5BUk/4BhIgyUnA65qPx1fVues6xzeQSJKk+WKmN5BsLDmDnUnyOuCFwFJgc+CqJN+oqoe6rUySRvvYBff+bvsjb95xhpGSNLON4g0kT1aSY6fkBk7+HDuL84ZlHL4QuLqqfltV/wLcCLx6Q/8OkiRJG4N5OTNYVScAJ8zhvLUyDoHrgY8m+TjwFOAVwK3rsVxJkqSN1rxsBn9Pa2QcVtXjzRPGPwBWA9cwyBpci6HTkiSpb+blMvHvaa2Mw6o6oaqWVtWBQICfDDuxqs6oqomqmli0aFFrBUuSJG0o49gMTmYcnsMg43CTJNsAJNkT2BP4Zof1SZIktWaslolHZBy+Cji1eT3dQ8AhVfXbDsuUJElqjTmDc2TOoCRJmi9myhkcq2XiJJcleTDJxdP2H5XkjiSVZNuu6pMkSWrbWC0TA6cwiI9597T93wcuBq5quyBJmovPXXDf77bf++btO6xE0nzXy5nBYeHSSfaoqiuAh6ePr6ofV9Wd7VcqSZLUrV7ODA4Ll66qmzsuS5IkaaPTy2awsUa49Pq4oKHTkiSpb3q5TNxYK1z692XotCRJ6ps+N4NrhEt3XIskSdJGqZfN4NRwaeBE4EVJ9kvyXeArwP5JViV5VTP+6CSrgMXAjUnO7Kx4SZKkFhk6PUeGTkuSpPnC0Ol1SLIsycrmZ1nX9UiSJLWlz08Tz0qSpwMfBSaAAq5PclFVPdBtZZI02tlfXf277UPf4gNtkuZurGYGh4VRA+8FvlVV/9w0gN8CXt1tpZIkSe0Yq5nBYWHUwL8Cd00Ztgp4ZgflSZIktW6sZgYbxwEHMlgWPvnJnJjkiCQrkqxYvXr1uk+QJEnayI1jMzg9jPpuYKcpxxc3+9Zi6LQkSeqbcWwGp4dRXw68MsnWSbYGXtnskyRJ6r2x+s7g1DDqJJsAPwCWAsczeI8xwHFV9c9d1ShJktQmQ6fnyNBpSZI0Xxg6DSRZmuSaJLc08TLvmHJs/yQ/SnJDku8l2bXLWiVJktoyTsvEjwCHVdXKJM9gEC59eVU9CJwOvLGqbktyJPARYHmHtUrSjL563i9+t/2Wt23bYSWS5rtezgyOCJdeUFUrAarqHuB+YPKR4AIWNttbAfe0XrQkSVIHejkzOCxcuqpunjyeZG9gAfDTZtfhwKVJfg08BLyk5ZIlSZI60cuZwcbQcOkkOwJnA/+xqp5odn8AeG1VLQa+CJw27IKGTkuSpL7pczM4PVyaJAuBS4Bjq+raZt8i4PlV9cPmvHOBPx52QUOnJUlS3/S5GVwjXDrJAuAC4KyqOm/KuAeArZLs1nw+ELit1UolSZI60svvDI4Ilz4IeBmwTZLlzdDlVXVDkr8Azk/yBIPm8M+7qFuSJKlthk7PkaHTkiRpvjB0GkjyrCnB0rck+cspxy5L8n+a/V9oZhMlSZJ6r5fLxCPcC+xTVY8l2RK4OclFTebg26vqoSQBzgP+A/DlLouVpFG+ce4v1vj8mncYOi1p7no5MzgidHq3qnqsGbI5U373qnqo2dyUQf6ga+eSJGks9LIZrKrrgMnQ6ZNpQqeT7JTkRuAu4KRmVhCAJJczeCvJwwxmByVJknqvl81gY63Q6aq6q6r2BHYFliXZfnJwVb0K2JHBrOF+wy5o6LQkSeqbPjeDa4VOT2pmBG8G9p22/1Hga8Abh13Q0GlJktQ3fW4Gp4dOL06yBUCSrYGXArcn2bJ5RR1JNgVeB/xDRzVLkiS1qpdPE48InX4ucEqSAgKcWlU3NUvFFyWZfKjkO8AXuqpdkiSpTYZOz5Gh05Ikab4wdLrRhEs/mOTiafvPSXJ7kpuT/Lckm3VVoyRJUpt6uUw8g1OApwDvnrb/HOCQZvvvFhp5MAAAEvlJREFUgMOB01usS5Jm7TvnrJlm8IqDfaBN0tz1cmZwWOh0kj2q6goGOYJrqKpLqwH8PbC49aIlSZI60MuZwaq6Lslk6PQWNKHT6zqvWR4+FHjfBi5RkiRpo9DLZrBxHHAd8Chw9CzP+TxwdVV9d9jBJEcARwAsWbJkfdQoSZLUqV4uEzdGhk4Pk+SjwCLgmFFjDJ2WJEl90+dmcI3Q6ZkGJjkceBXwzqp6ooXaJEmSNgq9XCYeFjqdZD/gr4HdgS2TrALeVVWXMwiZ/kfgmiQAX62q4zoqX5IkqTWGTs+RodOSJGm+MHRakiRJQ/VymXiUJJcBLwG+V1Wvn7L/vwN/Cvyy2bW8qm5ov0JJmtk1X1q91r59lvlAm6S5G6tmkNFvIAH4YFWd13I9kiRJnerlMvGTfQOJJEnSuOplM1hV1wGTbyA5mdm9geSEpoH8RJLNN3iRkiRJG4FeNoON44ADgQkGDeFM/jODyJkXAU8HPjRsUJIjkqxIsmL16rW/tyNJkjTf9LkZnPUbSKrq3hp4DPgisPeIcb6BRJIk9Uqfm8En8waSHZs/A7wJWNeSsiRJUi/08mniObyB5Jwki4AANwB/2VXtkjQTY2QkrW+9bAar6izgrGb7ceDFzaErR4zfr6XSJEmSNiq9bAZnkmQhcCtwYVUdleQpwFeA5wCPA1+vqg93WaMkjfLjM+9fa98LDt+ug0ok9UWfvzM4yvHA1dP2nVpVuwMvAP4kyWvaL0uSJKl9vWwGR4VOJ9kL2B745uTYqnqkqr7TbP8G+BGwuJvKJUmS2tXLZeKqui7JZOj0FsD/YLA0fCVwCHDAsPOSPA34M+BTLZUqSZLUqV42g43jgOuAR4GjgSOBS6tq1SBBZk1JNgX+J/DpqvrZsAsmOQI4AmDJkiUbqGxJkqT29LkZnAyd3oxB6PQ+wL5Jjmz2L0jyqykPi5wBrKyqT466YFWd0YxjYmKiNmTxkiRJbehzMzgZOr0LcFJVHTx5IMlyYGKyEUzyMWAr4PAO6pQkSepML5vBUaHTVbVWzmCSxcCxwD8AP2qWkD9bVWe2WrQkzYIxMpLWt1S52jkXExMTtWLFiq7LkCRJWqck11fVxLBjvZwZfDKSvAL4xJRduwMHVdWFHZUkSUPddvp9Q/f/+/ds33Ilkvpk7JvBJmNwKUCSpwN3MCWHUJIkqc96GTo9yqgw6ilD3gZ8o6oe6apGSZKkNo3VzOCwMOqqunnKkIOA0zopTpIkqQNj1Qw2podRA5BkR+B5wOWjTjR0WpIk9c1YLRM3JsOon8ogjHrS24ELqupfR51YVWdU1URVTSxatGgDlylJkrThjWMzOBlGfQ5w0pT972TwOjpJkqSxMVbLxKPCqIGfATsB/7vTAiVpBkbISNoQxqoZrKqzgLOa7ceBF085/MxOipIkSerQWDWDAEkWArcCF1bVUUmeCnx3ypDFDJ4yfn8nBUoaG3d+8ufr5To7v3+H9XIdSeNp7JpB4Hjg6skPVfUwTeg0DF7XAny1g7okSZJa18sHSEaFSyfZC9ieEW8YSbIbsB1rzhRKkiT1Vi9nBoeFSzNYGr4SOAQ4YMSpBwHnVlW1UqgkSVLHetkMNqaHSx8JXFpVq5KMOucg4NBRBw2dliRJfdPnZnAyXHozBuHS+wD7Jjmy2b8gya+q6sMASZ4PbFpV14+6YFWdAZwBMDEx4eyhJEma9/rcDE6GS+8CnFRVB08eSLIcmJhsBBuGTkuSpLHTy2ZwVLh0VV05w2lvB17bToWSZCSMpI1DfFZibiYmJmrFihVdlyFJkrROSa6vqolhx3o5MzhMkmcBFzCI09kM+ExVfWHamIuAZ1fVHh2UKKlH7j357tb+Wzv+J1+gJGnuxqYZBO4F9qmqx5JsCdyc5KKqugcgyVuAX3VaoSRJUsvGJnQa2K2qHmuGbM6U371pDo9hkEsoSZI0Nno5MzgsdLqqbk6yE3AJsCvwwclZQQavqPs48EgnBUuSJHWklzODjeOAA4EJ4GSAqrqrqvZk0AwuS7J9kqXAc6rqgnVdMMkRSVYkWbF69eoNWbskSVIr+twMToZOP5VB6PTvNDOCNwP7MgijnkhyJ/A9YLckVw27YFWdUVUTVTWxaNGiDVi6JElSO/rcDE6GTp8DnJRkcZItAJJsDbwUuL2qTq+qZ1TVzs2+n1TVyzuqWZIkqVW9/M7gsNBp4LnAKUkKCHBqVd3UZZ2S+su4F0nzRS+bwao6Czir2X4ceHFz6PJ1nHcnYMagJEkaG71sBodpHhQ5HVgIPA6cUFXnNse+y+C7hQDbAX9fVW/qpFBJ88bPT72j6xIA2OGvdu26BEnz2Ng0gwxiYw6rqpVJngFcn+TyqnqwqvadHJTkfOBrnVUpSZLUol4+QDIidHpBVa2E3z1NfD+waNp5C4H9gAtbL1qSJKkDvZwZHBU6PXk8yd7AAuCn0059E3BFVT3UWrGSJEkd6mUz2DgOuA54FDh6cmeSHYGzgWVV9cS0c94JnDnqgkmOAI4AWLJkyfquV5IkqXW9XCZurBU63SwDXwIcW1XXTh2cZFtg7+b4UIZOS5KkvulzMzg9dHoBcAFwVlWdN2T824CLq+rRFmuUJEnqVC+XiUeETh8EvAzYJsnyZujyqrqh2T4IOLH1YiXNW0a6SOqDXjaDM4ROnzXDOS/f8JVJkiRtXHrZDI6S5DLgJcD3qur1U/bvAnyZwfcMrwcOrarfdFOl1C8/P+2WrkvovR2OeW7XJUiax/r8ncFhTgEOHbL/JOATVbUr8ADwrlarkiRJ6kgvm8FhodNJ9qiqK4CHp40Ng6DpyYdKvsQgb1CSJKn3erlMvK7Q6Wm2AR6sqt82n1cBz2yhTEmSpM71shlsDA2d/n0YOi1Jkvqml8vEjbVCp0f4J+BpSSYb48XA3cMGGjotSZL6ps/N4Bqh06MGVVUB32EQOg2wDPjaBq9OkiRpI9DLZnBq6DSDIOkXJdkvyXeBrwD7J1mV5FXNKR8CjklyB4MZxb/tpHBJkqSWZTAxpidrYmKiVqxY0XUZkiRJ65Tk+qqaGHaszw+QzFqSx4Gbmo//r6re0GU90nx13yev77qEsbT9+/fqugRJ85jN4MCvq2pp10VIkiS1rZffGRxlVBh113VJkiR1ZaxmBkeFUTfN4Qrgt8CJVXVhp4VKkiS1ZKyawcawMOpnVdXdSZ4NXJnkpqr66fQTDZ2WJEl9M1bLxI21wqir6u7mz58BVwEvGHaiodOSJKlvxrEZXCOMOsnWSTYHSLIt8CfArR3WJ0mS1JqxWiaeGkadZBPgB8B7gHcmeYJBc3xiVdkMSpKksWDo9BwZOi1JkuYLQ6dnkGQpcDqwEHgcOKGqzu22Kml+uu9T13Rdwlja/n37dF2CpHls7JtB4BHgsKpameQZwPVJLq+qB7suTJIkaUMbqwdIhoVOAwuqaiVAVd0D3A/4qLAkSRoLYzUzOCp0evJ4kr2BBcBaGYOSJEl9NFbNYGNY6DRJdgTOBpZV1RPDTjR0WpIk9c1YLRM31gqdTrIQuAQ4tqquHXWiodOSJKlvxrEZnB46vQC4ADirqs7rtDJJkqSWjdUy8YjQ6YOAlwHbJFneDF1eVTd0VKYkSVJrDJ2eI0OnJUnSfGHoNJDkWQyWg/8A2Az4TFV9oTn2DuBYYBPg4qr6UGeFSh2579NXdV2C5mj7o1/edQmS5rGxaQaBe4F9quqxJFsCNzcxM48BpwB7VdXqJF9Ksn9VXdFptZIkSS3o5QMkI8Kld6uqx5ohm/Nvv/uzgZVVtbr5/G3grS2XLEmS1IlezgyOCpdOshODCJldgQ9W1T1Jfg38uyQ7A6uANzEInpYkSeq9XjaDjbXCpavqLmDP5h3EFyY5r6ruS/Ie4FzgCQZPGD9n2AUNnZYkSX3Ty2Xixlrh0pOadxDfDOzbfP56Vb24qvYBbgd+MuyChk5LkqS+6XMzOD1cenGSLQCSbA28lEHjR5Ltpuw/Ejizk4olSZJa1stl4hHh0s8FTklSQIBTq+qm5pRPJXl+s31cVQ2dGZQkSeobQ6fnyNBpSZI0X8wUOt3nZeKhkixMsirJZ6fsuyrJ7UluaH6267JGSZKktvRymXgdjgeuHrL/4Kpyqk8b3P2f/WbXJahntjvqlV2XIGke6+XM4LDQ6SR7JNkL2B7w/8aSJEn0dGZwWOg0cCtwJXAIcMCQ076Y5HHgfOBj5ZcpJUnSGOjlzGDjOOBAYAI4mUFkzKVVtWrI2IOr6nkMcgf3BQ4ddsEkRyRZkWTF6tWrhw2RJEmaV/rcDE4Pnd4HOCrJncCpwGFJTgSoqrubPx8G/g7Ye9gFDZ2WJEl908tl4sZk6PQuwElVdfDkgSTLgYmq+nCSTYGnVdUvkmwGvB74dhcFS5Ikta2XzeCw0Okk+1XVlUOGbw5c3jSCmzBoBP9ri+VKkiR1xtDpOTJ0WpIkzReGTjeSXJbkwSQXT9u/f5IfNYHT30uya1c1SpIktamXy8QzOAV4CvDuaftPB95YVbclORL4CLC85drUU/d/7utdl6Ce2+69f9Z1CZLmsV7ODI4Kna6qK4CHh5xSwMJmeyvgntaKlSRJ6lAvZwaHhU5X1c0znHI4cGmSXwMPAS9poUxJkqTO9XJmsDE9dHomHwBeW1WLgS8Cpw0bZOi0JEnqmz43g9NDp4dKsgh4flX9sNl1LvDHw8YaOi1Jkvqmz83gZOj0OcBJM4x7ANgqyW7N5wOB2zZwbZIkSRuFXn5ncFToNPDXwO7AlklWAe+qqsuT/AVwfpInGDSHf95Z8ZIkSS0ydHqODJ2WJEnzhaHTQJKlSa5pYmZuTPKOKceOSnJHkkqybZd1SpIktamXy8QjPAIcVlUrkzwDuD7J5VX1IPB94GLgqi4LHOb+z5/fdQmSNnLbHfnWrkuQNI/1cmZwWOg0sKCqVgJU1T3A/cCi5vOPq+rO7iqWJEnqRi9nBtcVOp1kb2AB8NOOSpQkSdoo9LIZbBwHXAc8Chw9uTPJjsDZwLKqeuLJXDDJEcARAEuWLFl/lUqSJHWkl8vEjbVCp5MsBC4Bjq2qa5/sBQ2dliRJfdPnZnCN0OkkC4ALgLOq6rxOK5MkSdpI9LIZnBo6DZwIvAg4CHgZsDzJDc3P0mb80U0I9WLgxiRndlW7JElSmwydniNDpyVJ0nwxU+i0zeAcJVkN/GPXdcxj2wK/6LoIrZP3aX7wPs0P3qf5oa/36VlVNfSBB5tBdSLJilH/QtHGw/s0P3if5gfv0/wwjvepl98ZlCRJ0uzYDEqSJI0xm0F15YyuC9CseJ/mB+/T/OB9mh/G7j75nUFJkqQx5sygJEnSGLMZVCuSPD3Jt5KsbP7ceoaxC5OsSvLZNmvU7O5TkqVJrklyS5Ibk7yji1rHUZJXJ7k9yR1JPjzk+OZJzm2O/zDJzu1XqVncp2OS3Nr8/bkiybO6qHPcres+TRn31iSVpLdPGNsMqi0fBq6oqj8Ermg+j3I8cHUrVWm62dynR4DDquq5wKuBTyZ5Wos1jqUkmwCfA14D/BHwziR/NG3Yu4AHqmpX4BPASe1WqVnepx8DE1W1J3AecHK7VWqW94kkTwXeB/yw3QrbZTOotrwR+FKz/SXgTcMGJdkL2B74Zkt1aU3rvE9V9ZOqWtls3wPcDwwNMtV6tTdwR1X9rKp+A3yZwf2aaur9Ow/YP0larFGzuE9V9Z2qeqT5eC2DV6GqXbP5+wSDyYmTgEfbLK5tNoNqy/ZVdW+z/XMGDd8akvwB8HHgr9osTGtY532aKsnewALgpxu6MPFM4K4pn1c1+4aOqarfAr8EtmmlOk2azX2a6l3ANzZoRRpmnfcpyQuBnarqkjYL68KmXReg/kjybWCHIYeOnfqhqirJsMfYjwQurapVTmZsOOvhPk1eZ0fgbGBZVT2xfquU+i/JIcAE8Kdd16I1NZMTpwHLOy6lFTaDWm+q6oBRx5Lcl2THqrq3aSLuHzJsH2DfJEcCWwILkvyqqmb6fqGepPVwn0iyELgEOLaqrt1ApWpNdwM7Tfm8uNk3bMyqJJsCWwH/1E55aszmPpHkAAb/APvTqnqspdr0b9Z1n54K7AFc1UxO7ABclOQNVbWitSpb4jKx2nIRsKzZXgZ8bfqAqjq4qpZU1c4MlorPshFs3TrvU5IFwAUM7s95LdY27q4D/jDJLs09OIjB/Zpq6v17G3BlGSbbtnXepyQvAP4GeENVDf0Hlza4Ge9TVf2yqratqp2b/yddy+B+9a4RBJtBtedE4MAkK4EDms8kmUhyZqeVaarZ3Ke3Ay8Dlie5oflZ2k2546P5DuBRwOXAbcD/qqpbkhyX5A3NsL8FtklyB3AMMz+1rw1glvfpFAarH19p/v5Mb+q1gc3yPo0N30AiSZI0xpwZlCRJGmM2g5IkSWPMZlCSJGmM2QxKkiSNMZtBSZKkMWYzKEmSNMZsBiVJksaYzaAkSdIY+/+itotAZNMNIQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "model_coef = np.argsort(logit_model.coef_[0])[::-1]\n", "importance = logit_model.coef_[0][model_coef]\n", "\n", "plt.figure(figsize=(10,10))\n", "x1=sns.barplot(x=importance, y=list(X_test_ohe.columns[model_coef]))" ] }, { "cell_type": "markdown", "id": "compressed-presence", "metadata": {}, "source": [ "## Random Forest" ] }, { "cell_type": "code", "execution_count": 67, "id": "representative-makeup", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "importances = rf_model.feature_importances_\n", "\n", "std = np.std([tree.feature_importances_ for tree in rf_model.estimators_], axis=0)\n", "indices = np.argsort(importances)[::-1]\n", "\n", "plt.figure(figsize=(20,6))\n", "\n", "plt.title(\"Random Forest feature importances (top 40 features) \")\n", "plt.bar(range(XS_train_ohe.shape[1]),importances[indices],\n", " color=\"#FF5733\", yerr=std[indices], align=\"center\")\n", "plt.xticks(range(XS_train_ohe.shape[1]), X_test_ohe.columns[indices])\n", "plt.xlim([-1, 40])\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "adjustable-humor", "metadata": {}, "source": [ "\n", "Feature importance refers to a class of techniques that measure how useful input features are for predicting a target variable using a given model. The score assigned alludes to the relative importance of a feature in a particular model. It can also used to better understanding a model’s logic or reduce the number of input variables. Feature importance is not defined for KNN algorithms, but it is available for tree models.\n", "\n", "The feature importance result for logistic regression model prioritizes x46, x38, x35, x37 variables whereas in the random forest listed the best/importance variables are x23, x20, x49, x48, x42, x28, x12, x40, x37, x27, x7, x41, x38, x46, x6, x2, x32. Even though there is difference list hierarchy and magnitude both models list x46, x38, x37 as the top important variables. That is both tree models identified similar features as most important for predicting product formulation. This indicates that model performance is consistent and that the appropriate variables are being included in our model for prediction.\n", "\n", "\n", "As depicted in table 7.4.1 random forest is the best model with cost of \\\\$1,049,500\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "lesbian-occupation", "metadata": {}, "source": [ "# Case Conclusions\n", "\n", "The analysis involved building a binary classification model, of unknown dataset, with primary ask to minimize the \\\\$ cost.\n", "\n", "The \\\\$ cost penalized False Negatives (FN) much higher than False Positives (FP), with numbers being \\\\$500 and $10 respectively.\n", "\n", "Logistic Regression, KNN and Random Forest models were tuned for best recall score, and compared for \\\\$ cost performance. It is clear that Random Forest does the best, by providing lowest $ cost, while providing a high Recall value. \n", "\n", "However, a careful analysis of choice of binary classification threshold, indicated that one can minimize the \\\\$ cost further, and achieve False Negative Rate to be ‘0’ for Logistic regression, and just 0.8% for Random Forest. \n", "Interestingly, Logistic regression result indicates that one doesn’t need a sophisticated model to minimize \\\\$ cost. The cost impact of FN vs FP is so skewed, that a very low threshold or effectively assuming all records are TRUE still gives minimal cost. Further, Random Forest model does better than logistic regression by minimizes cost further by ~ \\\\$40,000. \n", "\n", "Above brings to fore a tradeoff between optimizing for minimal \\\\$ cost, vs a good model with high accuracy. One can have a good model, but may not have lowest \\\\$ cost. On the other hand, if \\\\$ cost reduction is the ONLY criterion, we may not need a complicated model, or detailed modeling activity.\n", "The \\\\$ cost penalty for FN and FP have a significant bearing on development, and deployment of the right model. It is very important that we have right estimate of cost of FN and FP. It will be also important to ask if cost of FN and FP can vary, depending on some of the features in the data set. E.g. the cost may be different based on geographical location (note that data set does appear to have a variable encoding geographical area). All of these needs to be considered before considering to productize and use this methodology. \n" ] }, { "cell_type": "markdown", "id": "distinguished-chile", "metadata": {}, "source": [ "# Reference\n", "\n", "- Scikit learn documentation \n", "- StackOverflow\n" ] }, { "cell_type": "markdown", "id": "mobile-parks", "metadata": {}, "source": [ "# Appendix-I - Source code" ] }, { "cell_type": "markdown", "id": "lovely-playback", "metadata": {}, "source": [ "## Logistic Regression Grid Search\n", "\n", "This code was used to run grid search for logisitc regression after applying recursive feature elimination. Grid search takes time to run therefore it was executed separately as background process and results were stored in csv file. " ] }, { "cell_type": "markdown", "id": "interesting-reporter", "metadata": {}, "source": [ "\n", "print('Process Begins')\n", "\n", "X_train = pd.read_csv('data/train_set.csv',sep=',')\n", "X_test = pd.read_csv('data/test_set.csv',sep=',')\n", "y_train_target = pd.read_csv('data/train_target.csv',sep=',')\n", "y_test_target = pd.read_csv('data/test_target.csv',sep=',')\n", "\n", "XS_train_ohe = X_train.values\n", "XS_test_ohe = X_test.values \n", "y_train = y_train_target.values\n", "y_test = y_test_target.values\n", "\n", "\n", "log_model = LogisticRegression(random_state=1999)\n", "\n", "cs07_random_state_ = 1999\n", "\n", "\n", "log_param_grid = [\n", " {'penalty' : ['l2'],\n", " 'C' : [0.1,0.01,0.001,0.0001,0.5,0.6,10],\n", " 'solver' : ['sag','lbfgs'],\n", " 'max_iter' : [500],\n", " 'random_state' : [cs07_random_state_]} \n", " ]\n", "\n", "cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1999)\n", "\n", "print('Running RFE : ')\n", "\n", "rfe = RFE(log_model,n_features_to_select=61, step=1)\n", "rfe = rfe.fit(XS_train_ohe, y_train.flatten())\n", "\n", "print(rfe.support_)\n", "print(rfe.ranking_)\n", "\n", "print('Running grid search : ')\n", "grid_search = GridSearchCV(estimator=log_model, param_grid=log_param_grid, n_jobs=-1, cv=cv, scoring='recall')\n", "\n", "print('Fit grid search : ')\n", "grid_result = grid_search.fit(XS_train_ohe[:,rfe.support_], y_train.flatten())\n", "\n", "print('store logistic grid results')\n", "pickle.dump( grid_result, open( \"data/logistic_final_grid_result.dat\", \"wb\" )) \n", "\n", "print('End of Process')\n", "\n" ] }, { "cell_type": "markdown", "id": "distant-motorcycle", "metadata": {}, "source": [ "## Logistic regression cost matrix" ] }, { "cell_type": "markdown", "id": "organized-reminder", "metadata": {}, "source": [ "\n", "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.006807 189690 0.000000 1.000000 0.401212\n", "1 0.008014 190190 0.000079 1.000000 0.401181\n", "2 0.012172 190690 0.000157 1.000000 0.401149\n", "3 0.014811 190680 0.000157 0.999947 0.401181\n", "4 0.016882 190670 0.000157 0.999895 0.401212\n", "... ... ... ... ... ...\n", "31559 0.949043 6295510 0.990637 0.000053 0.602513\n", "31560 0.949097 6296010 0.990716 0.000053 0.602481\n", "31561 0.949692 6296510 0.990795 0.000053 0.602450\n", "31562 0.950181 6297010 0.990873 0.000053 0.602418\n", "31563 0.950191 6297000 0.990873 0.000000 0.602450\n", "\n", "[31564 rows x 5 columns]\n", "\n", "" ] }, { "cell_type": "markdown", "id": "joined-garlic", "metadata": {}, "source": [ "## KNN - Grid Search" ] }, { "cell_type": "markdown", "id": "stuck-biotechnology", "metadata": {}, "source": [ "\n", "print('Process Begins')\n", "\n", "X_train = pd.read_csv('data/train_set.csv',sep=',')\n", "X_test = pd.read_csv('data/test_set.csv',sep=',')\n", "y_train_target = pd.read_csv('data/train_target.csv',sep=',')\n", "y_test_target = pd.read_csv('data/test_target.csv',sep=',')\n", "\n", "XS_train_ohe = X_train.values\n", "XS_test_ohe = X_test.values \n", "y_train = y_train_target.values\n", "y_test = y_test_target.values\n", "\n", "knn_model = KNeighborsClassifier()\n", "\n", "cs07_random_state_ = 1999\n", "\n", "knn_param_grid = [\n", " {'n_neighbors' : np.arange(5,40,2)} ]\n", "\n", "cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=cs07_random_state_)\n", "\n", "print('Running grid search : ')\n", "grid_search = GridSearchCV(estimator=knn_model, param_grid=knn_param_grid, n_jobs=-1, cv=cv, scoring=['roc_auc','accuracy','f1','recall','precision'],refit='f1')\n", "\n", "print('Fit grid search : ')\n", "grid_result = grid_search.fit(XS_train_ohe, y_train.flatten())\n", "\n", "print('store knn grid results')\n", "pickle.dump( grid_result, open( \"data/knn_final_grid_result.dat\", \"wb\" )) \n", "\n", "print('End of Process')\n", "" ] }, { "cell_type": "markdown", "id": "realistic-conversion", "metadata": {}, "source": [ "## KNN cost matrix" ] }, { "cell_type": "markdown", "id": "swiss-thanksgiving", "metadata": {}, "source": [ "\n", "\n", "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.000000 1896900 0.000000 1.000000 0.401212\n", "1 0.083333 1707600 0.001967 0.893616 0.464124\n", "2 0.166667 1458200 0.008261 0.741051 0.552953\n", "3 0.250000 1252800 0.027695 0.567663 0.648979\n", "4 0.333333 1224900 0.072777 0.401919 0.730137\n", "5 0.416667 1443400 0.150747 0.255891 0.786294\n", "6 0.500000 1914500 0.258694 0.142601 0.810821\n", "7 0.583333 2663100 0.398269 0.069640 0.798510\n", "8 0.666667 3522500 0.545240 0.030313 0.763092\n", "9 0.750000 4380800 0.685838 0.011756 0.717794\n", "10 0.833333 5150600 0.809441 0.003479 0.673159\n", "11 0.916667 5726900 0.900865 0.001002 0.637962\n", "12 1.000000 6123100 0.963493 0.000053 0.613403\n", "\n", "" ] }, { "cell_type": "markdown", "id": "collected-heart", "metadata": {}, "source": [ "## Random Forest Cost matrix\n", "\n", "\n", "\n", "$ Cost and Classification Error Rates for different classification thresholds:\n", " Threshold $Cost FNr FPr Acc\n", "0 0.000 189690 0.000000 1.000000 0.401212\n", "1 0.025 184740 0.000079 0.971269 0.418384\n", "2 0.050 175670 0.000472 0.910275 0.454749\n", "3 0.075 169030 0.001652 0.835732 0.498911\n", "4 0.100 160690 0.002754 0.754863 0.546892\n", "5 0.125 155350 0.004327 0.673994 0.594684\n", "6 0.150 149060 0.005744 0.593389 0.642381\n", "7 0.175 146450 0.007553 0.519005 0.686196\n", "8 0.200 150310 0.010307 0.447098 0.728148\n", "9 0.225 161550 0.013926 0.385102 0.763818\n", "10 0.250 187280 0.019670 0.328325 0.795511\n", "11 0.275 219960 0.026200 0.281828 0.820733\n", "12 0.300 250800 0.032337 0.238811 0.844029\n", "13 0.325 286300 0.039024 0.201908 0.863443\n", "14 0.350 344930 0.049174 0.170963 0.877900\n", "15 0.375 407540 0.059795 0.145184 0.889075\n", "16 0.400 485790 0.072777 0.122779 0.897282\n", "17 0.425 567590 0.086310 0.100638 0.905111\n", "18 0.450 679880 0.104485 0.083716 0.907952\n", "19 0.475 798350 0.123525 0.070378 0.908299\n", "20 0.500 925390 0.143902 0.057409 0.907889\n", "21 0.525 1058610 0.165146 0.048026 0.904984\n", "22 0.550 1205000 0.188434 0.039538 0.900723\n", "23 0.575 1388590 0.217545 0.032105 0.893494\n", "24 0.600 1590880 0.249567 0.025726 0.884466\n", "25 0.625 1795290 0.281904 0.019980 0.874933\n", "26 0.650 2015440 0.316680 0.015499 0.863664\n", "27 0.675 2267340 0.356412 0.012336 0.849616\n", "28 0.700 2523820 0.396853 0.009595 0.835033\n", "29 0.725 2832920 0.445555 0.007486 0.816756\n", "30 0.750 3156030 0.496459 0.005430 0.797563\n", "31 0.775 3490280 0.549095 0.004112 0.777234\n", "32 0.800 3835030 0.603383 0.002794 0.756242\n", "33 0.825 4221900 0.664280 0.002109 0.732220\n", "34 0.850 4610780 0.725492 0.001476 0.708040\n", "35 0.875 4940720 0.777419 0.001160 0.687395\n", "36 0.900 5310120 0.835563 0.000633 0.664383\n", "37 0.925 5660580 0.890716 0.000422 0.642381\n", "38 0.950 5941550 0.934933 0.000264 0.624736\n", "39 0.975 6166020 0.970260 0.000105 0.610657\n", "40 1.000 6298510 0.991109 0.000053 0.602323\n", "\n", "\n", "" ] } ], "metadata": { "finalized": { "timestamp": 1618357973455, "trusted": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "288px" }, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }