{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Logistic Regression" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "pd.options.display.max_columns=None\n", "pd.options.display.max_rows=None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 알고 가야 할 것 들\n", "\n", "### 1. Evidence of Weight\n", "### 2. Information Value\n", "### 3. VIF\n", "### 4. C 통계량\n", "### 5. AIC\n", "### 6. ROC, AUC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 데이터 셋을 로딩합니다." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data'\n", "dataset = pd.read_csv(path, delimiter=' ', header=None)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1000, 21)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234567891011121314151617181920
0A116A34A431169A65A754A93A1014A12167A143A1522A1731A192A2011
1A1248A32A435951A61A732A92A1012A12122A143A1521A1731A191A2012
2A1412A34A462096A61A742A93A1013A12149A143A1521A1722A191A2011
3A1142A32A427882A61A742A93A1034A12245A143A1531A1732A191A2011
4A1124A33A404870A61A733A93A1014A12453A143A1532A1732A191A2012
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 \\\n", "0 A11 6 A34 A43 1169 A65 A75 4 A93 A101 4 A121 67 A143 A152 \n", "1 A12 48 A32 A43 5951 A61 A73 2 A92 A101 2 A121 22 A143 A152 \n", "2 A14 12 A34 A46 2096 A61 A74 2 A93 A101 3 A121 49 A143 A152 \n", "3 A11 42 A32 A42 7882 A61 A74 2 A93 A103 4 A122 45 A143 A153 \n", "4 A11 24 A33 A40 4870 A61 A73 3 A93 A101 4 A124 53 A143 A153 \n", "\n", " 15 16 17 18 19 20 \n", "0 2 A173 1 A192 A201 1 \n", "1 1 A173 1 A191 A201 2 \n", "2 1 A172 2 A191 A201 1 \n", "3 1 A173 2 A191 A201 1 \n", "4 2 A173 2 A191 A201 2 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "COL = [\n", " 'Status_of_existing_checking_account', \n", " 'Duration_in_month',\n", " 'Credit_history',\n", " 'Purpose', \n", " 'Credit_amount', \n", " 'Savings_account_bonds', \n", " 'Present_employment_since', \n", " 'Installment_rate_in_percentage_of_disposable_income',\n", " 'Personal_status_and_sex',\n", " 'Other_debtors_guarantors',\n", " 'Present_residence_since', \n", " 'Property', \n", " 'Age_in_years', \n", " 'Other_installment_plans', \n", " 'Housing',\n", " 'Number_of_existing_credits_at_this_bank',\n", " 'Job',\n", " 'Number_of_people_being_liable_to_provide_maintenance_for',\n", " 'Telephone',\n", " 'foreign_worker',\n", " 'Target'\n", "]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "dataset.columns = COL" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Status_of_existing_checking_accountDuration_in_monthCredit_historyPurposeCredit_amountSavings_account_bondsPresent_employment_sinceInstallment_rate_in_percentage_of_disposable_incomePersonal_status_and_sexOther_debtors_guarantorsPresent_residence_sincePropertyAge_in_yearsOther_installment_plansHousingNumber_of_existing_credits_at_this_bankJobNumber_of_people_being_liable_to_provide_maintenance_forTelephoneforeign_workerTarget
0A116A34A431169A65A754A93A1014A12167A143A1522A1731A192A2011
1A1248A32A435951A61A732A92A1012A12122A143A1521A1731A191A2012
2A1412A34A462096A61A742A93A1013A12149A143A1521A1722A191A2011
3A1142A32A427882A61A742A93A1034A12245A143A1531A1732A191A2011
4A1124A33A404870A61A733A93A1014A12453A143A1532A1732A191A2012
\n", "
" ], "text/plain": [ " Status_of_existing_checking_account Duration_in_month Credit_history \\\n", "0 A11 6 A34 \n", "1 A12 48 A32 \n", "2 A14 12 A34 \n", "3 A11 42 A32 \n", "4 A11 24 A33 \n", "\n", " Purpose Credit_amount Savings_account_bonds Present_employment_since \\\n", "0 A43 1169 A65 A75 \n", "1 A43 5951 A61 A73 \n", "2 A46 2096 A61 A74 \n", "3 A42 7882 A61 A74 \n", "4 A40 4870 A61 A73 \n", "\n", " Installment_rate_in_percentage_of_disposable_income \\\n", "0 4 \n", "1 2 \n", "2 2 \n", "3 2 \n", "4 3 \n", "\n", " Personal_status_and_sex Other_debtors_guarantors Present_residence_since \\\n", "0 A93 A101 4 \n", "1 A92 A101 2 \n", "2 A93 A101 3 \n", "3 A93 A103 4 \n", "4 A93 A101 4 \n", "\n", " Property Age_in_years Other_installment_plans Housing \\\n", "0 A121 67 A143 A152 \n", "1 A121 22 A143 A152 \n", "2 A121 49 A143 A152 \n", "3 A122 45 A143 A153 \n", "4 A124 53 A143 A153 \n", "\n", " Number_of_existing_credits_at_this_bank Job \\\n", "0 2 A173 \n", "1 1 A173 \n", "2 1 A172 \n", "3 1 A173 \n", "4 2 A173 \n", "\n", " Number_of_people_being_liable_to_provide_maintenance_for Telephone \\\n", "0 1 A192 \n", "1 1 A191 \n", "2 2 A191 \n", "3 2 A191 \n", "4 2 A191 \n", "\n", " foreign_worker Target \n", "0 A201 1 \n", "1 A201 2 \n", "2 A201 1 \n", "3 A201 1 \n", "4 A201 2 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 총 20개의 설명 변수와 1개의 종속 변수\n", "dataset.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Label\n", " - 1 : Good (갚을 수 있다 / 신용도가 좋다) $\\to$ 0으로 변경\n", " - 2 : Bad (갚을 수 없다 / 신용도가 나쁘다) $\\to$ 1로 변경" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Ham(0) / Spam(1)\n", "- 정상사람(0) / 암환자(1)\n", "- 이벤트가 발생하지 않음(0) / 이벤트가 발생함(1)\n", "- 귀무가설(0) / 대립가설(1)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Label을 1/2 -> 0/1로 변경\n", "dataset['Target'] = dataset['Target'] - 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Information Value (정보 가치)\n", "모델에서 변수의 사용유무를 판단하는 feature selection에서 유용한 방법입니다. 주로 모델을 학습하기전 첫 단계에서 변수들을 제거하는 데 사용합니다. 최종 모델에서는 대략 10개 내외의 변수를 사용하도록 합니다. IV와 WOE 신용채무능력이 가능한(good) 고객과 불가능한(bad) 고객을 예측하는 로지스틱 회귀 모델링과 밀접한 관계가 있습니다. 신용 정보 관련분야에서는 good customer는 부채를 갚을 수 있는 고객, bad customer는 부채를 갚을 수 없는 고객을 뜻합니다. 일반적으로 이야기할 때는 good customer는 non-events를 의미하고 bad customer는 events를 의미합니다.\n", "\n", "> 신용 관련 분야\n", "\n", "$\n", "\\begin{align}\n", "&\\text{WOE} = ln{\\frac{\\text{distribution of good}}{\\text{distribution of bad}}} \\\\\n", "&\\text{IV} = \\sum{(\\text{WOE} \\times (\\text{distribution of good} - \\text{distribution of bad}))}\\\\\n", "\\end{align}\n", "$\n", "\n", ">일반적\n", "\n", "$\n", "\\begin{align}\n", "&\\text{WOE} = ln{\\frac{\\text{distribution of non-events}}{\\text{distribution of events}}} \\\\\n", "&\\text{IV} = \\sum{(\\text{WOE} \\times (\\text{distribution of non-events} - \\text{distribution of events}))}\\\\\n", "\\end{align}\n", "$\n", "\n", "\n", "- 구간을 나눌 때, `pd.qcut`을 사용하였습니다. bin의 갯수만큼 값의 구간을 나누고 값을 해당 구간에 mapping 시켜줍니다.\n", "- Information value를 측정할 때, EOW가 inf, -inf가 나올 수 있습니다. 분포값이 0이 될 수 있기 때문입니다. 이 때는 EOW를 0으로 변경합니다.\n", "\n", "\n", "|
Information Value
|
예측력
|\n", "|:-----------:|:------------------------------:|\n", "|
0 to 0.02
|
무의미
|\n", "|
0.02 to 0.1
|
낮은 예측
|\n", "|
0.1 to 0.3
|
중간 예측
|\n", "|
0.3 to 0.5
|
강한 예측
|\n", "|
0.5 to 1
|
너무 강한 예측(의심되는 수치)
|" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "예제)\n", "\n", "" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "max_bin = 10 # 전체 데이터의 예측력에 해를 가하지 않는 한에서 구간을 테스트하였습니다.\n", "def calc_iv(df, col, label, max_bin = max_bin):\n", " \"\"\"IV helper function\"\"\"\n", " bin_df = df[[col, label]].copy()\n", " # Categorical column\n", " if bin_df[col].dtype == 'object':\n", " bin_df = bin_df.groupby(col)[label].agg(['count', 'sum'])\n", " # Numerical column\n", " else:\n", " bin_df.loc[:, 'bins'] = pd.qcut(bin_df[col].rank(method='first'), max_bin)\n", "# bin_df.loc[:, 'bins'] = pd.cut(bin_df[col], max_bin)\n", " bin_df = bin_df.groupby('bins')[label].agg(['count', 'sum'])\n", " \n", " bin_df.columns = ['total', 'abuse']\n", " bin_df['normal'] = bin_df['total'] - bin_df['abuse']\n", " bin_df['normal_dist'] = bin_df['normal'] / sum(bin_df['normal'])\n", " bin_df['abuse_dist'] = bin_df['abuse'] / sum(bin_df['abuse'])\n", " bin_df['woe'] = np.log(bin_df['normal_dist'] / bin_df['abuse_dist'])\n", " bin_df['iv'] = bin_df['woe'] * (bin_df['normal_dist'] - bin_df['abuse_dist'])\n", " \n", " bin_df.replace([np.inf, -np.inf], 0, inplace=True)\n", " bin_df = bin_df[bin_df['total'] > 0]\n", " iv_val = sum(filter(lambda x: x != float('inf'), bin_df['iv']))\n", " \n", " return bin_df, col, iv_val" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
totalabusenormalnormal_distabuse_distwoeiv
Credit_history
A304025150.0214290.083333-1.3581230.084074
A314928210.0300000.093333-1.1349800.071882
A325301693610.5157140.563333-0.0883190.004206
A338828600.0857140.093333-0.0851580.000649
A34293502430.3471430.1666670.7337410.132423
\n", "
" ], "text/plain": [ " total abuse normal normal_dist abuse_dist woe \\\n", "Credit_history \n", "A30 40 25 15 0.021429 0.083333 -1.358123 \n", "A31 49 28 21 0.030000 0.093333 -1.134980 \n", "A32 530 169 361 0.515714 0.563333 -0.088319 \n", "A33 88 28 60 0.085714 0.093333 -0.085158 \n", "A34 293 50 243 0.347143 0.166667 0.733741 \n", "\n", " iv \n", "Credit_history \n", "A30 0.084074 \n", "A31 0.071882 \n", "A32 0.004206 \n", "A33 0.000649 \n", "A34 0.132423 " ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ch_df, ch, ch_i_val = calc_iv(dataset,'Credit_history', 'Target')\n", "ch_df" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "information value 0.2932335473908263\n" ] } ], "source": [ "# 중간 정도의 예측 능력\n", "print('information value', ch_i_val)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
totalabusenormalnormal_distabuse_distwoeiv
bins
(0.999, 100.9]10011890.1271430.0366671.2434430.112502
(100.9, 200.8]10020800.1142860.0666670.5389970.025667
(200.8, 300.7]10027730.1042860.0900000.1473250.002105
(300.7, 400.6]10025750.1071430.0833330.2513140.005984
(400.6, 500.5]10026740.1057140.0866670.1986710.003784
(500.5, 600.4]10038620.0885710.126667-0.3577500.013629
(600.4, 700.3]10032680.0971430.106667-0.0935260.000891
(700.3, 800.2]10031690.0985710.103333-0.0471790.000225
(800.2, 900.1]10042580.0828570.140000-0.5245240.029973
(900.1, 1000.0]10048520.0742860.160000-0.7672550.065765
\n", "
" ], "text/plain": [ " total abuse normal normal_dist abuse_dist woe \\\n", "bins \n", "(0.999, 100.9] 100 11 89 0.127143 0.036667 1.243443 \n", "(100.9, 200.8] 100 20 80 0.114286 0.066667 0.538997 \n", "(200.8, 300.7] 100 27 73 0.104286 0.090000 0.147325 \n", "(300.7, 400.6] 100 25 75 0.107143 0.083333 0.251314 \n", "(400.6, 500.5] 100 26 74 0.105714 0.086667 0.198671 \n", "(500.5, 600.4] 100 38 62 0.088571 0.126667 -0.357750 \n", "(600.4, 700.3] 100 32 68 0.097143 0.106667 -0.093526 \n", "(700.3, 800.2] 100 31 69 0.098571 0.103333 -0.047179 \n", "(800.2, 900.1] 100 42 58 0.082857 0.140000 -0.524524 \n", "(900.1, 1000.0] 100 48 52 0.074286 0.160000 -0.767255 \n", "\n", " iv \n", "bins \n", "(0.999, 100.9] 0.112502 \n", "(100.9, 200.8] 0.025667 \n", "(200.8, 300.7] 0.002105 \n", "(300.7, 400.6] 0.005984 \n", "(400.6, 500.5] 0.003784 \n", "(500.5, 600.4] 0.013629 \n", "(600.4, 700.3] 0.000891 \n", "(700.3, 800.2] 0.000225 \n", "(800.2, 900.1] 0.029973 \n", "(900.1, 1000.0] 0.065765 " ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dim_df, dim, dim_i_val = calc_iv(dataset,'Duration_in_month', 'Target')\n", "dim_df" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.2605225223321392" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 높은 예측 능력\n", "dim_i_val" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "# 함수를 만들어서 전체 iv를 살펴보자\n", "col_iv = {}\n", "for col in [idx for idx in dataset.columns.tolist()]:\n", " if col == 'Target':\n", " continue\n", " _, col, iv = calc_iv(dataset, col, 'Target')\n", " col_iv[col] = iv" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Status_of_existing_checking_account': 0.6660115033513336,\n", " 'Duration_in_month': 0.2605225223321392,\n", " 'Credit_history': 0.2932335473908263,\n", " 'Purpose': 0.16919506567307832,\n", " 'Credit_amount': 0.11342803024552867,\n", " 'Savings_account_bonds': 0.19600955690422672,\n", " 'Present_employment_since': 0.086433631026641,\n", " 'Installment_rate_in_percentage_of_disposable_income': 0.061554683786294126,\n", " 'Personal_status_and_sex': 0.04467067763379073,\n", " 'Other_debtors_guarantors': 0.032019322019485055,\n", " 'Present_residence_since': 0.04874371881018562,\n", " 'Property': 0.11263826240979674,\n", " 'Age_in_years': 0.10267245670259074,\n", " 'Other_installment_plans': 0.057614541955647885,\n", " 'Housing': 0.08329343361549926,\n", " 'Number_of_existing_credits_at_this_bank': 0.09779114631307396,\n", " 'Job': 0.008762765707428294,\n", " 'Number_of_people_being_liable_to_provide_maintenance_for': 0.03408883520785682,\n", " 'Telephone': 0.0063776050286746735,\n", " 'foreign_worker': 0.04387741201028899,\n", " 'bin_var': 0.03408883520785682}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "col_iv" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Status_of_existing_checking_account', 0.6660115033513336),\n", " ('Credit_history', 0.2932335473908263),\n", " ('Duration_in_month', 0.2605225223321392),\n", " ('Savings_account_bonds', 0.19600955690422672),\n", " ('Purpose', 0.16919506567307832),\n", " ('Credit_amount', 0.11342803024552867),\n", " ('Property', 0.11263826240979674),\n", " ('Age_in_years', 0.10267245670259074),\n", " ('Number_of_existing_credits_at_this_bank', 0.09779114631307396),\n", " ('Present_employment_since', 0.086433631026641),\n", " ('Housing', 0.08329343361549926),\n", " ('Installment_rate_in_percentage_of_disposable_income', 0.061554683786294126),\n", " ('Other_installment_plans', 0.057614541955647885),\n", " ('Present_residence_since', 0.04874371881018562),\n", " ('Personal_status_and_sex', 0.04467067763379073),\n", " ('foreign_worker', 0.04387741201028899),\n", " ('Number_of_people_being_liable_to_provide_maintenance_for',\n", " 0.03408883520785682),\n", " ('bin_var', 0.03408883520785682),\n", " ('Other_debtors_guarantors', 0.032019322019485055),\n", " ('Job', 0.008762765707428294),\n", " ('Telephone', 0.0063776050286746735)]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import operator\n", "candidates = sorted(col_iv.items(), key=operator.itemgetter(1), reverse=True)\n", "display(candidates)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Status_of_existing_checking_account가 가장 강력한 예측 능력(의심 될 정도로)를 가졌고, 다음이 Credit_history이다. Telephone은 가장 낮은 예측력을 가지고 있다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "사용할 컬럼은 본인이 선택해야한다. 나은 예측을 위해, 모델을 생성하고, 테스트하고 다시 돌아와서 컬럼을 생성하고를 반복한다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "우선 상위 15개만을 선택하자" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "['Age_in_years',\n", " 'Credit_amount',\n", " 'Credit_history',\n", " 'Duration_in_month',\n", " 'Housing',\n", " 'Installment_rate_in_percentage_of_disposable_income',\n", " 'Number_of_existing_credits_at_this_bank',\n", " 'Other_installment_plans',\n", " 'Personal_status_and_sex',\n", " 'Present_employment_since',\n", " 'Present_residence_since',\n", " 'Property',\n", " 'Purpose',\n", " 'Savings_account_bonds',\n", " 'Status_of_existing_checking_account']" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# 분석하려는 feature의 갯수와 예측력의 trade-off를 조정하였습니다.\n", "iv_cols = [key for key, iv in candidates if iv >= 0.044]\n", "display(len(iv_cols))\n", "display(sorted(iv_cols))" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "# 상수로 설정합니다.\n", "IV_COL = iv_cols[:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Information Value에 의하여 상위 15개의 feature를 선정하였습니다. 전체 feature를 사용하여 로지스틱 회귀를 구했을 때의 accuracy와 신용불량자에 대한 precision의 감소폭이 적으면서 (전체 데이터셋을 가장 잘 대변해 줄 수 있으면서) feature의 수를 줄이는 과정을 몇 번 테스트 하면서 얻은 값입니다.\n", "\n", "먼저, IV에서 선택된 컬럼들로 이루어진 모델에서부터 시작합니다." ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "# Category column들은 one hot vectore로 변환합니다.\n", "cate_features = {}\n", "num_fetures = []\n", "for col in IV_COL:\n", " if dataset[col].dtype == 'object':\n", " cate_features[col] = pd.get_dummies(dataset[col], prefix=col)\n", " else:\n", " num_fetures.append(col)" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['Status_of_existing_checking_account', 'Credit_history', 'Savings_account_bonds', 'Purpose', 'Property', 'Present_employment_since', 'Housing', 'Other_installment_plans', 'Personal_status_and_sex'])" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_features.keys()" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Status_of_existing_checking_account_A11Status_of_existing_checking_account_A12Status_of_existing_checking_account_A13Status_of_existing_checking_account_A14
01000
10100
20001
31000
41000
\n", "
" ], "text/plain": [ " Status_of_existing_checking_account_A11 \\\n", "0 1 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 1 \n", "\n", " Status_of_existing_checking_account_A12 \\\n", "0 0 \n", "1 1 \n", "2 0 \n", "3 0 \n", "4 0 \n", "\n", " Status_of_existing_checking_account_A13 \\\n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "\n", " Status_of_existing_checking_account_A14 \n", "0 0 \n", "1 0 \n", "2 1 \n", "3 0 \n", "4 0 " ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_features['Status_of_existing_checking_account'].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "더미 변수를 생성하게 되면, 자유도는 더미 변수가 가질 수 있는 unique 값이 $n$ 이라고 했을때 $n-1$이 되므로, 각 변수에서 더미변수 축을 한개씩 제거해준다. 제거하지 않으면 `NaN`값이 발생합니다.\n", "\n", "후진 제거법을 통해 무의미한 변수를 하나씩 제거해 나가기 위해 별도의 리스트를 하나 생성합니다. 이 리스트는 실행이 반복될 때마다 가장 무의미한 변수(p-value가 큰 변수)와 다중공선성을 갖는 변수를 제거 한다." ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "removed_features = []" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "for col, dummies in cate_features.items():\n", " dropped_col = dummies.columns[-1]\n", " removed_features.append(dropped_col)\n", " cate_features[col] = dummies.drop(dropped_col, axis=1)" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Status_of_existing_checking_account_A14',\n", " 'Credit_history_A34',\n", " 'Savings_account_bonds_A65',\n", " 'Purpose_A49',\n", " 'Property_A124',\n", " 'Present_employment_since_A75',\n", " 'Housing_A153',\n", " 'Other_installment_plans_A143',\n", " 'Personal_status_and_sex_A94']" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "removed_features" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Status_of_existing_checking_account_A11Status_of_existing_checking_account_A12Status_of_existing_checking_account_A13
0100
1010
2000
3100
4100
\n", "
" ], "text/plain": [ " Status_of_existing_checking_account_A11 \\\n", "0 1 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 1 \n", "\n", " Status_of_existing_checking_account_A12 \\\n", "0 0 \n", "1 1 \n", "2 0 \n", "3 0 \n", "4 0 \n", "\n", " Status_of_existing_checking_account_A13 \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_features['Status_of_existing_checking_account'].head()" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "final_dataset = dataset[num_fetures]" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "for col, df in cate_features.items():\n", " final_dataset = pd.concat([final_dataset, df], axis=1)" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1000, 40)" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_dataset.shape" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Duration_in_monthCredit_amountAge_in_yearsNumber_of_existing_credits_at_this_bankInstallment_rate_in_percentage_of_disposable_incomePresent_residence_sinceStatus_of_existing_checking_account_A11Status_of_existing_checking_account_A12Status_of_existing_checking_account_A13Credit_history_A30Credit_history_A31Credit_history_A32Credit_history_A33Savings_account_bonds_A61Savings_account_bonds_A62Savings_account_bonds_A63Savings_account_bonds_A64Purpose_A40Purpose_A41Purpose_A410Purpose_A42Purpose_A43Purpose_A44Purpose_A45Purpose_A46Purpose_A48Property_A121Property_A122Property_A123Present_employment_since_A71Present_employment_since_A72Present_employment_since_A73Present_employment_since_A74Housing_A151Housing_A152Other_installment_plans_A141Other_installment_plans_A142Personal_status_and_sex_A91Personal_status_and_sex_A92Personal_status_and_sex_A93
061169672441000000000000001000010000000100001
1485951221220100010100000001000010000100100010
2122096491230000000100000000001010000010100001
3427882451241000010100000010000001000010000001
4244870532341000001100010000000000000100000001
\n", "
" ], "text/plain": [ " Duration_in_month Credit_amount Age_in_years \\\n", "0 6 1169 67 \n", "1 48 5951 22 \n", "2 12 2096 49 \n", "3 42 7882 45 \n", "4 24 4870 53 \n", "\n", " Number_of_existing_credits_at_this_bank \\\n", "0 2 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 2 \n", "\n", " Installment_rate_in_percentage_of_disposable_income \\\n", "0 4 \n", "1 2 \n", "2 2 \n", "3 2 \n", "4 3 \n", "\n", " Present_residence_since Status_of_existing_checking_account_A11 \\\n", "0 4 1 \n", "1 2 0 \n", "2 3 0 \n", "3 4 1 \n", "4 4 1 \n", "\n", " Status_of_existing_checking_account_A12 \\\n", "0 0 \n", "1 1 \n", "2 0 \n", "3 0 \n", "4 0 \n", "\n", " Status_of_existing_checking_account_A13 Credit_history_A30 \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " Credit_history_A31 Credit_history_A32 Credit_history_A33 \\\n", "0 0 0 0 \n", "1 0 1 0 \n", "2 0 0 0 \n", "3 0 1 0 \n", "4 0 0 1 \n", "\n", " Savings_account_bonds_A61 Savings_account_bonds_A62 \\\n", "0 0 0 \n", "1 1 0 \n", "2 1 0 \n", "3 1 0 \n", "4 1 0 \n", "\n", " Savings_account_bonds_A63 Savings_account_bonds_A64 Purpose_A40 \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 1 \n", "\n", " Purpose_A41 Purpose_A410 Purpose_A42 Purpose_A43 Purpose_A44 \\\n", "0 0 0 0 1 0 \n", "1 0 0 0 1 0 \n", "2 0 0 0 0 0 \n", "3 0 0 1 0 0 \n", "4 0 0 0 0 0 \n", "\n", " Purpose_A45 Purpose_A46 Purpose_A48 Property_A121 Property_A122 \\\n", "0 0 0 0 1 0 \n", "1 0 0 0 1 0 \n", "2 0 1 0 1 0 \n", "3 0 0 0 0 1 \n", "4 0 0 0 0 0 \n", "\n", " Property_A123 Present_employment_since_A71 Present_employment_since_A72 \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " Present_employment_since_A73 Present_employment_since_A74 Housing_A151 \\\n", "0 0 0 0 \n", "1 1 0 0 \n", "2 0 1 0 \n", "3 0 1 0 \n", "4 1 0 0 \n", "\n", " Housing_A152 Other_installment_plans_A141 Other_installment_plans_A142 \\\n", "0 1 0 0 \n", "1 1 0 0 \n", "2 1 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " Personal_status_and_sex_A91 Personal_status_and_sex_A92 \\\n", "0 0 0 \n", "1 0 1 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " Personal_status_and_sex_A93 \n", "0 1 \n", "1 0 \n", "2 1 \n", "3 1 \n", "4 1 " ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_dataset.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Training set, test set을 나눕니다. " ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "train_x, test_x, train_y, test_y = \\\n", " train_test_split( \\\n", " final_dataset, dataset['Target'], test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train_set (800, 40)\n", "test_set (200, 40)\n" ] } ], "source": [ "print('train_set', train_x.shape)\n", "print('test_set', test_x.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Statsmodels를 이용하여 로지스틱 회귀를 돌립니다." ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/anaconda3/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n", " from pandas.core import datetools\n" ] } ], "source": [ "import statsmodels.api as sm" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.460159\n", " Iterations 7\n" ] } ], "source": [ "logistic_model = sm.Logit(\n", " train_y,\n", " sm.add_constant(train_x)\n", ").fit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 성능 측정" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import auc\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import classification_report" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "train_pred = pd.DataFrame({\n", " 'probs': logistic_model.predict(sm.add_constant(train_x)),\n", " 'class': train_y\n", "})\n", "\n", "train_pred['y_pred'] = 0\n", "train_pred.loc[train_pred['probs'] > 0.5, 'y_pred'] = 1\n", "\n", "# Test prediction\n", "test_pred = pd.DataFrame({\n", " 'probs': logistic_model.predict(sm.add_constant(test_x)),\n", " 'class': test_y\n", "})\n", "\n", "test_pred['y_pred'] = 0\n", "test_pred.loc[test_pred['probs'] > 0.5, 'y_pred'] = 1" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Training Confusion matrix:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Actual01All
Predict
0503120623
156121177
All559241800
\n", "
" ], "text/plain": [ "Actual 0 1 All\n", "Predict \n", "0 503 120 623\n", "1 56 121 177\n", "All 559 241 800" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Training Accuracy: 0.78\n", "\n", "Training classification report:\n", " precision recall f1-score support\n", "\n", " 0 0.8074 0.8998 0.8511 559\n", " 1 0.6836 0.5021 0.5789 241\n", "\n", "avg / total 0.7701 0.7800 0.7691 800\n", "\n" ] } ], "source": [ "print('\\nTraining Confusion matrix:')\n", "display(pd.crosstab(train_pred['y_pred'], train_pred['class'], rownames=['Predict'], colnames=['Actual'], margins=True))\n", "print('\\nTraining Accuracy: ', round(accuracy_score(train_pred['class'], train_pred['y_pred']), 4))\n", "print('\\nTraining classification report:\\n', classification_report(train_pred['class'], train_pred['y_pred'], digits=4))" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test Confusion matrix:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Actual01All
Predict
012327150
1183250
All14159200
\n", "
" ], "text/plain": [ "Actual 0 1 All\n", "Predict \n", "0 123 27 150\n", "1 18 32 50\n", "All 141 59 200" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Test Accuracy: 0.775\n", "\n", "Test classification report:\n", " precision recall f1-score support\n", "\n", " 0 0.8200 0.8723 0.8454 141\n", " 1 0.6400 0.5424 0.5872 59\n", "\n", "avg / total 0.7669 0.7750 0.7692 200\n", "\n" ] } ], "source": [ "print('Test Confusion matrix:')\n", "display(pd.crosstab(test_pred['y_pred'], test_pred['class'], rownames=['Predict'], colnames=['Actual'], margins=True))\n", "print('\\nTest Accuracy: ', round(accuracy_score(test_pred['class'], test_pred['y_pred']), 4))\n", "print('\\nTest classification report:\\n', classification_report(test_pred['class'], test_pred['y_pred'], digits=4))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. 후진제거법\n", " - 가장 무의미한 변수 (p-value)\n", " - 다중공선성을 갖는 변수 (Variance Inflation Factor)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### (A) P-value\n", "P-value가 가장 높은 변수를 제거합니다. 여기서 P-value는 각 독립변수의 유의성 검정(t-test)에서 얻어진 값입니다." ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [], "source": [ "unnecesarries = []" ] }, { "cell_type": "code", "execution_count": 180, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: Logit No. Iterations: 7.0000
Dependent Variable: Target Pseudo R-squared: 0.248
Date: 2019-02-25 22:10 AIC: 818.2538
No. Observations: 800 BIC: 1010.3229
Df Model: 40 Log-Likelihood: -368.13
Df Residuals: 759 LL-Null: -489.54
Converged: 1.0000 Scale: 1.0000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
const -3.6675 1.0471 -3.5026 0.0005 -5.7198 -1.6153
Duration_in_month 0.0230 0.0101 2.2759 0.0228 0.0032 0.0429
Credit_amount 0.0001 0.0000 2.4648 0.0137 0.0000 0.0002
Age_in_years -0.0315 0.0104 -3.0316 0.0024 -0.0519 -0.0111
Number_of_existing_credits_at_this_bank 0.3461 0.2075 1.6682 0.0953 -0.0605 0.7528
Installment_rate_in_percentage_of_disposable_income 0.3172 0.0945 3.3582 0.0008 0.1321 0.5024
Present_residence_since 0.0320 0.0948 0.3373 0.7359 -0.1538 0.2178
Status_of_existing_checking_account_A11 1.5522 0.2525 6.1468 0.0000 1.0573 2.0472
Status_of_existing_checking_account_A12 1.1125 0.2474 4.4969 0.0000 0.6276 1.5974
Status_of_existing_checking_account_A13 0.5329 0.4364 1.2211 0.2220 -0.3224 1.3881
Credit_history_A30 1.5371 0.4723 3.2545 0.0011 0.6114 2.4628
Credit_history_A31 1.4798 0.4815 3.0734 0.0021 0.5361 2.4235
Credit_history_A32 0.8741 0.2821 3.0988 0.0019 0.3212 1.4269
Credit_history_A33 0.5364 0.3691 1.4532 0.1462 -0.1871 1.2599
Savings_account_bonds_A61 0.6851 0.2741 2.4997 0.0124 0.1479 1.2224
Savings_account_bonds_A62 0.5970 0.3698 1.6144 0.1064 -0.1278 1.3217
Savings_account_bonds_A63 0.2964 0.4700 0.6305 0.5284 -0.6249 1.2176
Savings_account_bonds_A64 -0.2078 0.5857 -0.3547 0.7228 -1.3558 0.9402
Purpose_A40 0.6410 0.3599 1.7810 0.0749 -0.0644 1.3464
Purpose_A41 -0.9585 0.4654 -2.0593 0.0395 -1.8707 -0.0462
Purpose_A410 -0.6272 0.8378 -0.7486 0.4541 -2.2693 1.0149
Purpose_A42 -0.2957 0.3846 -0.7686 0.4421 -1.0496 0.4582
Purpose_A43 -0.3352 0.3618 -0.9263 0.3543 -1.0443 0.3740
Purpose_A44 0.1854 0.8089 0.2292 0.8187 -1.4000 1.7709
Purpose_A45 0.2867 0.6258 0.4581 0.6469 -0.9398 1.5131
Purpose_A46 0.9372 0.5224 1.7941 0.0728 -0.0867 1.9611
Purpose_A48 -1.2412 1.2458 -0.9964 0.3191 -3.6829 1.2005
Property_A121 -0.6366 0.4530 -1.4053 0.1599 -1.5244 0.2513
Property_A122 -0.2833 0.4461 -0.6349 0.5255 -1.1576 0.5911
Property_A123 -0.2635 0.4302 -0.6126 0.5401 -1.1067 0.5796
Present_employment_since_A71 0.2897 0.4022 0.7203 0.4713 -0.4986 1.0781
Present_employment_since_A72 0.1534 0.3273 0.4686 0.6394 -0.4881 0.7948
Present_employment_since_A73 0.1896 0.2776 0.6830 0.4946 -0.3545 0.7337
Present_employment_since_A74 -0.3076 0.3278 -0.9385 0.3480 -0.9500 0.3348
Housing_A151 0.2009 0.5064 0.3967 0.6916 -0.7916 1.1934
Housing_A152 -0.3416 0.4803 -0.7112 0.4770 -1.2829 0.5997
Other_installment_plans_A141 0.7211 0.2690 2.6809 0.0073 0.1939 1.2483
Other_installment_plans_A142 0.5466 0.4384 1.2467 0.2125 -0.3127 1.4059
Personal_status_and_sex_A91 0.4998 0.5226 0.9563 0.3389 -0.5245 1.5241
Personal_status_and_sex_A92 0.0181 0.3437 0.0527 0.9580 -0.6555 0.6918
Personal_status_and_sex_A93 -0.4830 0.3432 -1.4074 0.1593 -1.1558 0.1897
" ], "text/plain": [ "\n", "\"\"\"\n", " Results: Logit\n", "===================================================================================================\n", "Model: Logit No. Iterations: 7.0000 \n", "Dependent Variable: Target Pseudo R-squared: 0.248 \n", "Date: 2019-02-25 22:10 AIC: 818.2538 \n", "No. Observations: 800 BIC: 1010.3229\n", "Df Model: 40 Log-Likelihood: -368.13 \n", "Df Residuals: 759 LL-Null: -489.54 \n", "Converged: 1.0000 Scale: 1.0000 \n", "---------------------------------------------------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------------------\n", "const -3.6675 1.0471 -3.5026 0.0005 -5.7198 -1.6153\n", "Duration_in_month 0.0230 0.0101 2.2759 0.0228 0.0032 0.0429\n", "Credit_amount 0.0001 0.0000 2.4648 0.0137 0.0000 0.0002\n", "Age_in_years -0.0315 0.0104 -3.0316 0.0024 -0.0519 -0.0111\n", "Number_of_existing_credits_at_this_bank 0.3461 0.2075 1.6682 0.0953 -0.0605 0.7528\n", "Installment_rate_in_percentage_of_disposable_income 0.3172 0.0945 3.3582 0.0008 0.1321 0.5024\n", "Present_residence_since 0.0320 0.0948 0.3373 0.7359 -0.1538 0.2178\n", "Status_of_existing_checking_account_A11 1.5522 0.2525 6.1468 0.0000 1.0573 2.0472\n", "Status_of_existing_checking_account_A12 1.1125 0.2474 4.4969 0.0000 0.6276 1.5974\n", "Status_of_existing_checking_account_A13 0.5329 0.4364 1.2211 0.2220 -0.3224 1.3881\n", "Credit_history_A30 1.5371 0.4723 3.2545 0.0011 0.6114 2.4628\n", "Credit_history_A31 1.4798 0.4815 3.0734 0.0021 0.5361 2.4235\n", "Credit_history_A32 0.8741 0.2821 3.0988 0.0019 0.3212 1.4269\n", "Credit_history_A33 0.5364 0.3691 1.4532 0.1462 -0.1871 1.2599\n", "Savings_account_bonds_A61 0.6851 0.2741 2.4997 0.0124 0.1479 1.2224\n", "Savings_account_bonds_A62 0.5970 0.3698 1.6144 0.1064 -0.1278 1.3217\n", "Savings_account_bonds_A63 0.2964 0.4700 0.6305 0.5284 -0.6249 1.2176\n", "Savings_account_bonds_A64 -0.2078 0.5857 -0.3547 0.7228 -1.3558 0.9402\n", "Purpose_A40 0.6410 0.3599 1.7810 0.0749 -0.0644 1.3464\n", "Purpose_A41 -0.9585 0.4654 -2.0593 0.0395 -1.8707 -0.0462\n", "Purpose_A410 -0.6272 0.8378 -0.7486 0.4541 -2.2693 1.0149\n", "Purpose_A42 -0.2957 0.3846 -0.7686 0.4421 -1.0496 0.4582\n", "Purpose_A43 -0.3352 0.3618 -0.9263 0.3543 -1.0443 0.3740\n", "Purpose_A44 0.1854 0.8089 0.2292 0.8187 -1.4000 1.7709\n", "Purpose_A45 0.2867 0.6258 0.4581 0.6469 -0.9398 1.5131\n", "Purpose_A46 0.9372 0.5224 1.7941 0.0728 -0.0867 1.9611\n", "Purpose_A48 -1.2412 1.2458 -0.9964 0.3191 -3.6829 1.2005\n", "Property_A121 -0.6366 0.4530 -1.4053 0.1599 -1.5244 0.2513\n", "Property_A122 -0.2833 0.4461 -0.6349 0.5255 -1.1576 0.5911\n", "Property_A123 -0.2635 0.4302 -0.6126 0.5401 -1.1067 0.5796\n", "Present_employment_since_A71 0.2897 0.4022 0.7203 0.4713 -0.4986 1.0781\n", "Present_employment_since_A72 0.1534 0.3273 0.4686 0.6394 -0.4881 0.7948\n", "Present_employment_since_A73 0.1896 0.2776 0.6830 0.4946 -0.3545 0.7337\n", "Present_employment_since_A74 -0.3076 0.3278 -0.9385 0.3480 -0.9500 0.3348\n", "Housing_A151 0.2009 0.5064 0.3967 0.6916 -0.7916 1.1934\n", "Housing_A152 -0.3416 0.4803 -0.7112 0.4770 -1.2829 0.5997\n", "Other_installment_plans_A141 0.7211 0.2690 2.6809 0.0073 0.1939 1.2483\n", "Other_installment_plans_A142 0.5466 0.4384 1.2467 0.2125 -0.3127 1.4059\n", "Personal_status_and_sex_A91 0.4998 0.5226 0.9563 0.3389 -0.5245 1.5241\n", "Personal_status_and_sex_A92 0.0181 0.3437 0.0527 0.9580 -0.6555 0.6918\n", "Personal_status_and_sex_A93 -0.4830 0.3432 -1.4074 0.1593 -1.1558 0.1897\n", "===================================================================================================\n", "\n", "\"\"\"" ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ "logistic_model.summary2()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "가장 무의미한 변수는 P값이 0.9580인 Personal_status_and_sex_A92이다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "체크하고 변수를 1개씩 제거하고를 반복해야한다." ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [], "source": [ "# Help 함수\n", "def get_unrelated_cols(model, pvalue):\n", " cols = model.pvalues[model.pvalues >= pvalue].keys().tolist()\n", " print(len(cols))\n", " print(cols)\n", " \n", " return cols" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "['Personal_status_and_sex_A92']\n" ] } ], "source": [ "unrelated_cols = get_unrelated_cols(logistic_model, 0.8187)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Personal_status_and_sex_A92을 삭제한다." ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [], "source": [ "unnecesarries.append('Personal_status_and_sex_A92')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### (B) 다중공선성\n", "테스트와 상관없다고 판단되는 변수들을 제거한후에 다중공선성을 체크합니다. **다중공선성(multicollinearity)** 이란 독립변수들간의 선형관계가 존재하는 것을 나타냅니다. 독립변수 전체의 set에서 하나의 독립변수를 골라 종속변수로 보고, 그 독립변수를 제외한 set을 이용하여 선형회귀분석을 하여 구할 수 있습니다.\n", "\n", "사실 변수간의 선형관계 알아보기 위해서는 산점도 or 상관계수를 사용해도 되지만 **VIF(분산 팽창 지수, Variance Inflation Factor)** 를 이용하여 구한다." ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "def get_max_vif(df, removal_cols):\n", " vifs = []\n", " cnames = df.drop(removal_cols, axis=1).columns.tolist()\n", " for i in range(len(cnames)):\n", " xvar = cnames[:]\n", " yvar = xvar.pop(i)\n", " model = sm.OLS(\n", " df.drop(removal_cols, axis=1)[yvar], \n", " sm.add_constant(df.drop(removal_cols, axis=1)[xvar]))\n", " res = model.fit()\n", " vif = 1 / (1 - res.rsquared)\n", " vifs.append((yvar, round(vif, 3)))\n", " vifs = sorted(vifs, key=operator.itemgetter(1), reverse=True)\n", " return vifs" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "vifs = get_max_vif(train_x, unnecesarries)" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Housing_A152', 5.846),\n", " ('Property_A123', 4.999),\n", " ('Property_A121', 4.793),\n", " ('Housing_A151', 4.633),\n", " ('Property_A122', 4.281),\n", " ('Personal_status_and_sex_A93', 3.643),\n", " ('Personal_status_and_sex_A92', 3.302),\n", " ('Purpose_A43', 3.165),\n", " ('Purpose_A40', 2.983),\n", " ('Purpose_A42', 2.614),\n", " ('Credit_amount', 2.406),\n", " ('Purpose_A41', 2.121),\n", " ('Credit_history_A32', 2.011),\n", " ('Duration_in_month', 1.974),\n", " ('Present_employment_since_A73', 1.923),\n", " ('Savings_account_bonds_A61', 1.879),\n", " ('Present_employment_since_A72', 1.838),\n", " ('Present_employment_since_A74', 1.62),\n", " ('Purpose_A46', 1.564),\n", " ('Number_of_existing_credits_at_this_bank', 1.558),\n", " ('Personal_status_and_sex_A91', 1.53),\n", " ('Savings_account_bonds_A62', 1.478),\n", " ('Status_of_existing_checking_account_A11', 1.465),\n", " ('Age_in_years', 1.459),\n", " ('Credit_history_A31', 1.368),\n", " ('Status_of_existing_checking_account_A12', 1.365),\n", " ('Present_residence_since', 1.338),\n", " ('Savings_account_bonds_A63', 1.334),\n", " ('Credit_history_A33', 1.326),\n", " ('Present_employment_since_A71', 1.314),\n", " ('Installment_rate_in_percentage_of_disposable_income', 1.305),\n", " ('Savings_account_bonds_A64', 1.27),\n", " ('Purpose_A45', 1.26),\n", " ('Credit_history_A30', 1.259),\n", " ('Purpose_A410', 1.216),\n", " ('Purpose_A44', 1.191),\n", " ('Purpose_A48', 1.146),\n", " ('Status_of_existing_checking_account_A13', 1.14),\n", " ('Other_installment_plans_A141', 1.134),\n", " ('Other_installment_plans_A142', 1.107)]" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vifs" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [], "source": [ "unnecesarries.append(vifs[0][0])" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Personal_status_and_sex_A92', 'Housing_A152']" ] }, "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unnecesarries" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "무의미한 변수(가장 높은 p-value)는 Personal_status_and_sex_A92였고 pvalue가 0.9580, VIF가 3.302이다.\n", "\n", "다중공선성이 가장 높았던 변수는 Housing_A152였고 pvalue가 0.4770, VIF가 5.846이다.\n", "\n", "**무의미한 변수를 먼저 삭제해야하며, 지우기 전에, 두 변수의 pvalue와 VIF를 각각 비교해보는것이 좋다**.\n", "\n", "확실히 Personal_status_and_sex_A92는 삭제해야 한다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "모델을 삭제하기 앞서 모델 채택 기준을 살펴봐야한다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. 모델 채택 기준\n", "### (A) C통계량 (concordance statistics)\n", " - 로지스틱 회귀 모델에서 이항 결과의 적합도(goodness of fit)에 관한 품질을 측정하는 척도\n", " - ROC curve와 동일하다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$C=Pr[\\pi(B|x_{i}) > \\pi(B|x_{j}) | Y_{i} = 1, Y_{j} = 0]$$\n", "$$C 통계량(C 인덱스) = 0.5 + (\\frac{일치쌍 비율 - 불일치쌍 비율}{2})$$" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "def get_c_stat(iv_pred):\n", " noraml_test_df = iv_pred[iv_pred['class'] == 0][['class', 'probs']]\n", " spammer_test_df = iv_pred[iv_pred['class'] == 1][['class', 'probs']]\n", "\n", " noraml_test_df['key'] = 0\n", " spammer_test_df['key'] = 0\n", "\n", " cross_join_df = noraml_test_df.merge(spammer_test_df, how='outer', on='key').drop('key', axis=1)\n", "\n", " cross_join_df['concordance'] = cross_join_df['probs_x'] < cross_join_df['probs_y']\n", " cross_join_df['in_concordance'] = cross_join_df['probs_x'] > cross_join_df['probs_y']\n", " cross_join_df['tie'] = cross_join_df['probs_x'] == cross_join_df['probs_y']\n", "\n", " results = cross_join_df.agg({'concordance': np.sum, 'in_concordance': np.sum, 'tie': np.sum}) / len(cross_join_df)\n", " c_stat = 0.5 + (results['concordance'] - results['in_concordance']) / 2\n", " \n", " return c_stat" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### (B) AIC, Likelihood\n", "\n", "- $AIC=-2 \\times ln(L) + 2 \\times k$\n", "\n", "Akaike information criterion는 주어진 데이터 집합에 관해 통계 모델의 상대적 품질을 측정한다. 이 척도는 편향과 분산의 트레이드 오프이다. 더 작은 AIC를 선호한다.\n", "\n", "- $L = \\Pi_{i=1}^{n}p(x_i)^{y_i}(1-p(x_i))^{1-y_i}$\n", "\n", "log likelihood는 큰 값을 선호한다." ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "def get_aic_value(model):\n", " return -2 * model.llf + 2 * (len(model.params) - 1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "> 무의미한 변수를 지우기 전" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.8227198836095874\n", "aic: 816.2537855406207\n", "loglikehood: -368.12689277031035\n" ] } ], "source": [ "c_stat, aic = get_c_stat(train_pred), get_aic_value(logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', logistic_model.llf)" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.8067075369635773\n", "aic: 816.2537855406207\n", "loglikehood: -368.12689277031035\n" ] } ], "source": [ "c_stat, aic = get_c_stat(test_pred), get_aic_value(logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', logistic_model.llf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "> 지우고 난 후" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "unnecesarries.extend(['Personal_status_and_sex_A92', 'Housing_A152'])" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.460475\n", " Iterations 7\n" ] } ], "source": [ "drop_logistic_model = sm.Logit(\n", " train_y,\n", " sm.add_constant(train_x.drop(unnecesarries, axis=1))\n", ").fit()" ] }, { "cell_type": "code", "execution_count": 213, "metadata": {}, "outputs": [], "source": [ "train_pred = pd.DataFrame({\n", " 'probs': drop_logistic_model.predict(sm.add_constant(train_x.drop(unnecesarries, axis=1))),\n", " 'class': train_y\n", "})\n", "\n", "train_pred['y_pred'] = 0\n", "train_pred.loc[train_pred['probs'] > 0.5, 'y_pred'] = 1\n", "\n", "# Test prediction\n", "test_pred = pd.DataFrame({\n", " 'probs': drop_logistic_model.predict(sm.add_constant(test_x.drop(unnecesarries, axis=1))),\n", " 'class': test_y\n", "})\n", "\n", "test_pred['y_pred'] = 0\n", "test_pred.loc[test_pred['probs'] > 0.5, 'y_pred'] = 1" ] }, { "cell_type": "code", "execution_count": 214, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.8227198836095874\n", "aic: 812.7605967161102\n", "loglikehood: -368.3802983580551\n" ] } ], "source": [ "c_stat, aic = get_c_stat(train_pred), get_aic_value(drop_logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', drop_logistic_model.llf)" ] }, { "cell_type": "code", "execution_count": 215, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.8101935328765477\n", "aic: 812.7605967161102\n", "loglikehood: -368.3802983580551\n" ] } ], "source": [ "c_stat, aic = get_c_stat(test_pred), get_aic_value(drop_logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', drop_logistic_model.llf)" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: Logit No. Iterations: 7.0000
Dependent Variable: Target Pseudo R-squared: 0.247
Date: 2019-02-25 22:24 AIC: 814.7606
No. Observations: 800 BIC: 997.4605
Df Model: 38 Log-Likelihood: -368.38
Df Residuals: 761 LL-Null: -489.54
Converged: 1.0000 Scale: 1.0000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
const -3.8574 0.9727 -3.9656 0.0001 -5.7638 -1.9509
Duration_in_month 0.0231 0.0101 2.2787 0.0227 0.0032 0.0429
Credit_amount 0.0001 0.0000 2.4931 0.0127 0.0000 0.0002
Age_in_years -0.0306 0.0103 -2.9737 0.0029 -0.0508 -0.0104
Number_of_existing_credits_at_this_bank 0.3431 0.2072 1.6559 0.0977 -0.0630 0.7492
Installment_rate_in_percentage_of_disposable_income 0.3180 0.0944 3.3681 0.0008 0.1330 0.5031
Present_residence_since 0.0415 0.0935 0.4440 0.6570 -0.1417 0.2248
Status_of_existing_checking_account_A11 1.5561 0.2523 6.1684 0.0000 1.0617 2.0506
Status_of_existing_checking_account_A12 1.1076 0.2469 4.4869 0.0000 0.6238 1.5915
Status_of_existing_checking_account_A13 0.5488 0.4351 1.2612 0.2072 -0.3041 1.4016
Credit_history_A30 1.5543 0.4713 3.2982 0.0010 0.6307 2.4780
Credit_history_A31 1.4789 0.4809 3.0756 0.0021 0.5365 2.4214
Credit_history_A32 0.8853 0.2813 3.1471 0.0016 0.3340 1.4367
Credit_history_A33 0.5490 0.3686 1.4893 0.1364 -0.1735 1.2715
Savings_account_bonds_A61 0.6956 0.2737 2.5413 0.0110 0.1591 1.2321
Savings_account_bonds_A62 0.5991 0.3697 1.6203 0.1052 -0.1256 1.3237
Savings_account_bonds_A63 0.2976 0.4698 0.6335 0.5264 -0.6231 1.2183
Savings_account_bonds_A64 -0.2150 0.5863 -0.3668 0.7138 -1.3641 0.9340
Purpose_A40 0.6528 0.3596 1.8154 0.0695 -0.0520 1.3576
Purpose_A41 -0.9272 0.4621 -2.0067 0.0448 -1.8329 -0.0216
Purpose_A410 -0.6423 0.8345 -0.7698 0.4414 -2.2779 0.9932
Purpose_A42 -0.2835 0.3841 -0.7380 0.4605 -1.0364 0.4694
Purpose_A43 -0.3321 0.3613 -0.9192 0.3580 -1.0402 0.3760
Purpose_A44 0.1707 0.8124 0.2102 0.8335 -1.4216 1.7630
Purpose_A45 0.2766 0.6279 0.4406 0.6595 -0.9540 1.5073
Purpose_A46 0.9581 0.5227 1.8327 0.0668 -0.0665 1.9826
Purpose_A48 -1.2385 1.2279 -1.0086 0.3132 -3.6451 1.1682
Property_A121 -0.8475 0.3403 -2.4908 0.0127 -1.5144 -0.1806
Property_A122 -0.5012 0.3230 -1.5515 0.1208 -1.1342 0.1319
Property_A123 -0.4812 0.3012 -1.5973 0.1102 -1.0716 0.1092
Present_employment_since_A71 0.3068 0.4011 0.7649 0.4444 -0.4793 1.0929
Present_employment_since_A72 0.1538 0.3268 0.4706 0.6380 -0.4867 0.7943
Present_employment_since_A73 0.1967 0.2771 0.7097 0.4779 -0.3464 0.7398
Present_employment_since_A74 -0.3067 0.3273 -0.9372 0.3487 -0.9482 0.3348
Housing_A151 0.5122 0.2563 1.9984 0.0457 0.0099 1.0146
Other_installment_plans_A141 0.7157 0.2689 2.6619 0.0078 0.1887 1.2427
Other_installment_plans_A142 0.5353 0.4381 1.2218 0.2218 -0.3234 1.3939
Personal_status_and_sex_A91 0.4805 0.4459 1.0775 0.2813 -0.3935 1.3544
Personal_status_and_sex_A93 -0.4934 0.2122 -2.3255 0.0200 -0.9092 -0.0776
" ], "text/plain": [ "\n", "\"\"\"\n", " Results: Logit\n", "===================================================================================================\n", "Model: Logit No. Iterations: 7.0000 \n", "Dependent Variable: Target Pseudo R-squared: 0.247 \n", "Date: 2019-02-25 22:24 AIC: 814.7606\n", "No. Observations: 800 BIC: 997.4605\n", "Df Model: 38 Log-Likelihood: -368.38 \n", "Df Residuals: 761 LL-Null: -489.54 \n", "Converged: 1.0000 Scale: 1.0000 \n", "---------------------------------------------------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------------------\n", "const -3.8574 0.9727 -3.9656 0.0001 -5.7638 -1.9509\n", "Duration_in_month 0.0231 0.0101 2.2787 0.0227 0.0032 0.0429\n", "Credit_amount 0.0001 0.0000 2.4931 0.0127 0.0000 0.0002\n", "Age_in_years -0.0306 0.0103 -2.9737 0.0029 -0.0508 -0.0104\n", "Number_of_existing_credits_at_this_bank 0.3431 0.2072 1.6559 0.0977 -0.0630 0.7492\n", "Installment_rate_in_percentage_of_disposable_income 0.3180 0.0944 3.3681 0.0008 0.1330 0.5031\n", "Present_residence_since 0.0415 0.0935 0.4440 0.6570 -0.1417 0.2248\n", "Status_of_existing_checking_account_A11 1.5561 0.2523 6.1684 0.0000 1.0617 2.0506\n", "Status_of_existing_checking_account_A12 1.1076 0.2469 4.4869 0.0000 0.6238 1.5915\n", "Status_of_existing_checking_account_A13 0.5488 0.4351 1.2612 0.2072 -0.3041 1.4016\n", "Credit_history_A30 1.5543 0.4713 3.2982 0.0010 0.6307 2.4780\n", "Credit_history_A31 1.4789 0.4809 3.0756 0.0021 0.5365 2.4214\n", "Credit_history_A32 0.8853 0.2813 3.1471 0.0016 0.3340 1.4367\n", "Credit_history_A33 0.5490 0.3686 1.4893 0.1364 -0.1735 1.2715\n", "Savings_account_bonds_A61 0.6956 0.2737 2.5413 0.0110 0.1591 1.2321\n", "Savings_account_bonds_A62 0.5991 0.3697 1.6203 0.1052 -0.1256 1.3237\n", "Savings_account_bonds_A63 0.2976 0.4698 0.6335 0.5264 -0.6231 1.2183\n", "Savings_account_bonds_A64 -0.2150 0.5863 -0.3668 0.7138 -1.3641 0.9340\n", "Purpose_A40 0.6528 0.3596 1.8154 0.0695 -0.0520 1.3576\n", "Purpose_A41 -0.9272 0.4621 -2.0067 0.0448 -1.8329 -0.0216\n", "Purpose_A410 -0.6423 0.8345 -0.7698 0.4414 -2.2779 0.9932\n", "Purpose_A42 -0.2835 0.3841 -0.7380 0.4605 -1.0364 0.4694\n", "Purpose_A43 -0.3321 0.3613 -0.9192 0.3580 -1.0402 0.3760\n", "Purpose_A44 0.1707 0.8124 0.2102 0.8335 -1.4216 1.7630\n", "Purpose_A45 0.2766 0.6279 0.4406 0.6595 -0.9540 1.5073\n", "Purpose_A46 0.9581 0.5227 1.8327 0.0668 -0.0665 1.9826\n", "Purpose_A48 -1.2385 1.2279 -1.0086 0.3132 -3.6451 1.1682\n", "Property_A121 -0.8475 0.3403 -2.4908 0.0127 -1.5144 -0.1806\n", "Property_A122 -0.5012 0.3230 -1.5515 0.1208 -1.1342 0.1319\n", "Property_A123 -0.4812 0.3012 -1.5973 0.1102 -1.0716 0.1092\n", "Present_employment_since_A71 0.3068 0.4011 0.7649 0.4444 -0.4793 1.0929\n", "Present_employment_since_A72 0.1538 0.3268 0.4706 0.6380 -0.4867 0.7943\n", "Present_employment_since_A73 0.1967 0.2771 0.7097 0.4779 -0.3464 0.7398\n", "Present_employment_since_A74 -0.3067 0.3273 -0.9372 0.3487 -0.9482 0.3348\n", "Housing_A151 0.5122 0.2563 1.9984 0.0457 0.0099 1.0146\n", "Other_installment_plans_A141 0.7157 0.2689 2.6619 0.0078 0.1887 1.2427\n", "Other_installment_plans_A142 0.5353 0.4381 1.2218 0.2218 -0.3234 1.3939\n", "Personal_status_and_sex_A91 0.4805 0.4459 1.0775 0.2813 -0.3935 1.3544\n", "Personal_status_and_sex_A93 -0.4934 0.2122 -2.3255 0.0200 -0.9092 -0.0776\n", "===================================================================================================\n", "\n", "\"\"\"" ] }, "execution_count": 206, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_logistic_model.summary2()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. 무의미한 변수와 다중공선성이 있는 변수가 없을때까지 반복한다. (기준은 C통계량, AIC, Likelihood)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "원래는 이 과정을 각각 변수를 확인하면서, 변수를 하나씩 제거해야 한다. 하지만 시간 관계상.." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Step 1. Pvalue" ] }, { "cell_type": "code", "execution_count": 218, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6\n", "['Present_residence_since', 'Savings_account_bonds_A63', 'Savings_account_bonds_A64', 'Purpose_A44', 'Purpose_A45', 'Present_employment_since_A72']\n" ] } ], "source": [ "# Pvalue를 확인하고, 상한선을 구한다.\n", "unrelated_cols = get_unrelated_cols(drop_logistic_model, 0.5)" ] }, { "cell_type": "code", "execution_count": 219, "metadata": {}, "outputs": [], "source": [ "unnecesarries.extend(unrelated_cols)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Step 2. VIF" ] }, { "cell_type": "code", "execution_count": 221, "metadata": {}, "outputs": [], "source": [ "vifs = get_max_vif(train_x, unnecesarries)" ] }, { "cell_type": "code", "execution_count": 222, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Property_A121', 2.58),\n", " ('Property_A123', 2.487),\n", " ('Purpose_A43', 2.431),\n", " ('Credit_amount', 2.354),\n", " ('Purpose_A40', 2.32),\n", " ('Property_A122', 2.283),\n", " ('Purpose_A42', 2.111),\n", " ('Credit_history_A32', 1.977),\n", " ('Duration_in_month', 1.947),\n", " ('Purpose_A41', 1.79)]" ] }, "execution_count": 222, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vifs[:10]" ] }, { "cell_type": "code", "execution_count": 226, "metadata": {}, "outputs": [], "source": [ "unnecesarries.extend([elem[0] for elem in vifs[:10]])" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.506885\n", " Iterations 6\n" ] } ], "source": [ "drop_logistic_model = sm.Logit(\n", " train_y,\n", " sm.add_constant(train_x.drop(unnecesarries, axis=1))\n", ").fit()" ] }, { "cell_type": "code", "execution_count": 229, "metadata": {}, "outputs": [], "source": [ "train_pred = pd.DataFrame({\n", " 'probs': drop_logistic_model.predict(sm.add_constant(train_x.drop(unnecesarries, axis=1))),\n", " 'class': train_y\n", "})\n", "\n", "train_pred['y_pred'] = 0\n", "train_pred.loc[train_pred['probs'] > 0.5, 'y_pred'] = 1\n", "\n", "# Test prediction\n", "test_pred = pd.DataFrame({\n", " 'probs': drop_logistic_model.predict(sm.add_constant(test_x.drop(unnecesarries, axis=1))),\n", " 'class': test_y\n", "})\n", "\n", "test_pred['y_pred'] = 0\n", "test_pred.loc[test_pred['probs'] > 0.5, 'y_pred'] = 1" ] }, { "cell_type": "code", "execution_count": 230, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.7741112983320838\n", "aic: 855.0167324752877\n", "loglikehood: -405.5083662376438\n" ] } ], "source": [ "c_stat, aic = get_c_stat(train_pred), get_aic_value(drop_logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', drop_logistic_model.llf)" ] }, { "cell_type": "code", "execution_count": 231, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c_stat: 0.7756941940137035\n", "aic: 855.0167324752877\n", "loglikehood: -405.5083662376438\n" ] } ], "source": [ "c_stat, aic = get_c_stat(test_pred), get_aic_value(drop_logistic_model)\n", "print('c_stat:', c_stat)\n", "print('aic:', aic)\n", "print('loglikehood:', drop_logistic_model.llf)" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: Logit No. Iterations: 6.0000
Dependent Variable: Target Pseudo R-squared: 0.172
Date: 2019-02-25 22:41 AIC: 857.0167
No. Observations: 800 BIC: 964.7628
Df Model: 22 Log-Likelihood: -405.51
Df Residuals: 777 LL-Null: -489.54
Converged: 1.0000 Scale: 1.0000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
const -2.1483 0.5231 -4.1067 0.0000 -3.1737 -1.1230
Age_in_years -0.0256 0.0087 -2.9609 0.0031 -0.0426 -0.0087
Number_of_existing_credits_at_this_bank -0.0049 0.1631 -0.0303 0.9758 -0.3245 0.3146
Installment_rate_in_percentage_of_disposable_income 0.1913 0.0806 2.3732 0.0176 0.0333 0.3494
Status_of_existing_checking_account_A11 1.6818 0.2321 7.2451 0.0000 1.2268 2.1367
Status_of_existing_checking_account_A12 1.2142 0.2300 5.2782 0.0000 0.7633 1.6650
Status_of_existing_checking_account_A13 0.4189 0.4152 1.0090 0.3130 -0.3948 1.2326
Credit_history_A30 1.4395 0.4109 3.5034 0.0005 0.6342 2.2449
Credit_history_A31 1.0135 0.3945 2.5689 0.0102 0.2402 1.7868
Credit_history_A33 0.3081 0.3093 0.9960 0.3193 -0.2982 0.9143
Savings_account_bonds_A61 0.5528 0.2132 2.5935 0.0095 0.1350 0.9706
Savings_account_bonds_A62 0.6598 0.3173 2.0793 0.0376 0.0379 1.2817
Purpose_A410 0.0894 0.6874 0.1301 0.8965 -1.2578 1.4367
Purpose_A46 1.1777 0.3936 2.9919 0.0028 0.4062 1.9492
Purpose_A48 -1.6748 1.2032 -1.3919 0.1639 -4.0330 0.6835
Present_employment_since_A71 0.3893 0.3416 1.1397 0.2544 -0.2802 1.0589
Present_employment_since_A73 0.0832 0.2046 0.4069 0.6841 -0.3177 0.4842
Present_employment_since_A74 -0.2333 0.2595 -0.8990 0.3686 -0.7420 0.2754
Housing_A151 0.4031 0.2296 1.7556 0.0791 -0.0469 0.8531
Other_installment_plans_A141 0.6645 0.2541 2.6149 0.0089 0.1664 1.1625
Other_installment_plans_A142 0.4337 0.4208 1.0306 0.3027 -0.3911 1.2585
Personal_status_and_sex_A91 0.4378 0.4224 1.0365 0.3000 -0.3900 1.2657
Personal_status_and_sex_A93 -0.2726 0.1908 -1.4281 0.1533 -0.6466 0.1015
" ], "text/plain": [ "\n", "\"\"\"\n", " Results: Logit\n", "===================================================================================================\n", "Model: Logit No. Iterations: 6.0000 \n", "Dependent Variable: Target Pseudo R-squared: 0.172 \n", "Date: 2019-02-25 22:41 AIC: 857.0167\n", "No. Observations: 800 BIC: 964.7628\n", "Df Model: 22 Log-Likelihood: -405.51 \n", "Df Residuals: 777 LL-Null: -489.54 \n", "Converged: 1.0000 Scale: 1.0000 \n", "---------------------------------------------------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------------------\n", "const -2.1483 0.5231 -4.1067 0.0000 -3.1737 -1.1230\n", "Age_in_years -0.0256 0.0087 -2.9609 0.0031 -0.0426 -0.0087\n", "Number_of_existing_credits_at_this_bank -0.0049 0.1631 -0.0303 0.9758 -0.3245 0.3146\n", "Installment_rate_in_percentage_of_disposable_income 0.1913 0.0806 2.3732 0.0176 0.0333 0.3494\n", "Status_of_existing_checking_account_A11 1.6818 0.2321 7.2451 0.0000 1.2268 2.1367\n", "Status_of_existing_checking_account_A12 1.2142 0.2300 5.2782 0.0000 0.7633 1.6650\n", "Status_of_existing_checking_account_A13 0.4189 0.4152 1.0090 0.3130 -0.3948 1.2326\n", "Credit_history_A30 1.4395 0.4109 3.5034 0.0005 0.6342 2.2449\n", "Credit_history_A31 1.0135 0.3945 2.5689 0.0102 0.2402 1.7868\n", "Credit_history_A33 0.3081 0.3093 0.9960 0.3193 -0.2982 0.9143\n", "Savings_account_bonds_A61 0.5528 0.2132 2.5935 0.0095 0.1350 0.9706\n", "Savings_account_bonds_A62 0.6598 0.3173 2.0793 0.0376 0.0379 1.2817\n", "Purpose_A410 0.0894 0.6874 0.1301 0.8965 -1.2578 1.4367\n", "Purpose_A46 1.1777 0.3936 2.9919 0.0028 0.4062 1.9492\n", "Purpose_A48 -1.6748 1.2032 -1.3919 0.1639 -4.0330 0.6835\n", "Present_employment_since_A71 0.3893 0.3416 1.1397 0.2544 -0.2802 1.0589\n", "Present_employment_since_A73 0.0832 0.2046 0.4069 0.6841 -0.3177 0.4842\n", "Present_employment_since_A74 -0.2333 0.2595 -0.8990 0.3686 -0.7420 0.2754\n", "Housing_A151 0.4031 0.2296 1.7556 0.0791 -0.0469 0.8531\n", "Other_installment_plans_A141 0.6645 0.2541 2.6149 0.0089 0.1664 1.1625\n", "Other_installment_plans_A142 0.4337 0.4208 1.0306 0.3027 -0.3911 1.2585\n", "Personal_status_and_sex_A91 0.4378 0.4224 1.0365 0.3000 -0.3900 1.2657\n", "Personal_status_and_sex_A93 -0.2726 0.1908 -1.4281 0.1533 -0.6466 0.1015\n", "===================================================================================================\n", "\n", "\"\"\"" ] }, "execution_count": 232, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_logistic_model.summary2()" ] }, { "cell_type": "code", "execution_count": 235, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Confusion matrix:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Actual01All
Predict
0504150654
15591146
All559241800
\n", "
" ], "text/plain": [ "Actual 0 1 All\n", "Predict \n", "0 504 150 654\n", "1 55 91 146\n", "All 559 241 800" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print('Train Confusion matrix:')\n", "display(pd.crosstab(train_pred['y_pred'], train_pred['class'], rownames=['Predict'], colnames=['Actual'], margins=True))" ] }, { "cell_type": "code", "execution_count": 233, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test Confusion matrix:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Actual01All
Predict
012234156
1192544
All14159200
\n", "
" ], "text/plain": [ "Actual 0 1 All\n", "Predict \n", "0 122 34 156\n", "1 19 25 44\n", "All 141 59 200" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print('Test Confusion matrix:')\n", "display(pd.crosstab(test_pred['y_pred'], test_pred['class'], rownames=['Predict'], colnames=['Actual'], margins=True))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. ROC curve, AUC\n", " - ROC = FPR vs TPR(Recall) 의 비율\n", " - AUC = ROC 곡선 아래의 면적" ] }, { "cell_type": "code", "execution_count": 237, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from sklearn import metrics\n", "from sklearn.metrics import auc\n", "\n", "plt.rcParams[\"figure.figsize\"] = (7,7)" ] }, { "cell_type": "code", "execution_count": 238, "metadata": {}, "outputs": [], "source": [ "fpr, tpr, thresholds = metrics.roc_curve(train_pred['class'], train_pred['probs'], pos_label=1)" ] }, { "cell_type": "code", "execution_count": 240, "metadata": {}, "outputs": [], "source": [ "roc_auc = auc(fpr, tpr)" ] }, { "cell_type": "code", "execution_count": 241, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd4FGXXx/HvSSEFQu9GOtKbBBARRRBEqgUBsQOiIGJDRdQHFRUrKo9YUJBHfSk2pIkgiqII0qVKRwhFemgJpJz3j1lSIGWBbGaTnM915dqZ2ZnZX4aQk5m5575FVTHGGGMyEuB2AGOMMf7NCoUxxphMWaEwxhiTKSsUxhhjMmWFwhhjTKasUBhjjMmUFQpjjDGZskJh8hwR2S4isSJyXET2ish4ESl01jpXisjPInJMRGJEZLqI1D5rncIi8o6I7PDsa7NnvmTOfkfGuMsKhcmrOqtqIaAh0Ah4+swbItIcmANMBcoDlYG/gAUiUsWzTgHgJ6AO0B4oDFwJHASa+iq0iAT5at/GXCgrFCZPU9W9wGycgnHG68Bnqvquqh5T1UOq+iywCHjes85dQAXgJlVdp6pJqrpPVYer6vfpfZaI1BGRH0XkkIj8KyJDPcvHi8hLqdZrJSLRqea3i8hTIrIKOCEiz4rI12ft+10RGeWZLiIiY0Vkj4jsEpGXRCTwIg+VMRmyQmHyNBGJBG4ANnvmw3HODL5KZ/Uvgbae6euAH1T1uJefEwHMBX7AOUuphnNG4q3bgI5AUeBzoIOIFPbsOxDoDkzwrPs/IMHzGY2AdkDf8/gsY86LFQqTV30nIseAncA+YJhneXGcn/s96WyzBzhz/6FEButkpBOwV1XfUtU4z5nKn+ex/ShV3amqsar6D7AcuNHzXmvgpKouEpEyOIXvEVU9oar7gLeBnufxWcacFysUJq+6UVUjgFZATVIKwGEgCSiXzjblgAOe6YMZrJORS4EtF5TUsfOs+Qk4ZxkAvUg5m6gIBAN7ROSIiBwBPgJKX8RnG5MpKxQmT1PVX4HxwJue+RPAQuDWdFbvTsrlornA9SJS0MuP2glUzeC9E0B4qvmy6UU9a/4roJXn0tlNpBSKncApoKSqFvV8FVbVOl7mNOa8WaEw+cE7QFsROXNDewhwt4gMEpEIESnmudncHHjBs87nOL+UvxGRmiISICIlRGSoiHRI5zNmAGVF5BERCfHst5nnvZU49xyKi0hZ4JGsAqvqfuAX4FNgm6qu9yzfg9Ni6y1P890AEakqItdcwHExxitWKEye5/ml+xnwnGf+d+B64Gac+xD/4NwUvkpVN3nWOYVzQ/tv4EfgKLAY5xLWOfceVPUYzo3wzsBeYBNwreftz3Ga327H+SU/2cvoEzwZJpy1/C6gALAO51La15zfZTJjzovYwEXGGGMyY2cUxhhjMuWzQiEi40Rkn4isyeB9EZFRnm4RVonI5b7KYowx5sL58oxiPE7XBxm5Aaju+eoHfODDLMYYYy6QzwqFqs4HDmWySlecbhRUVRcBRUXEbsgZY4yfcbMDsktI+5BRtGfZOU/Dikg/nLMOChYs2LhmzZo5EtAYY3Kd2P1wdAcAMbEh/HOkCKpCQtK/B1S11IXs0s1CIeksS7cJlqqOAcYAREVF6dKlS32Zyxhj/JcqzLoL/l2W/vuHdrD/eDiPTG3PhBX1AWhSN4Qla57+50I/0s1CEY3T7cEZkcBul7IYY/KjhFNk8Pdp9lv3Bfw2BDTx4vZz6kiGb6nC5JV1eei7GzhwoiBhYUG89FJrHn64GUFBT2e4XVbcLBTTgIEiMgloBsR4njo1xhjf+/5OWP+F2ykuXIk60PnLNIu+nR7Nbf+3EIBrr63Exx93pmrV4hf9UT4rFCIyEadDtpKevveH4XRmhqp+CHwPdMDp/vkkcK+vshhj8iFNgn0rISHu3Peif0lbJAJDciZTgQjo9iMUrnjx+wopCpL2Cn7XO2rSdsK/dO9ehz59GiGS3hX+85frnsy2exTGGK8sehkWPJv1eo+chsBg3+fxgS1bDjF48I+MHt2B8uUjAFDVdAuEiCxT1agL+RwbdtEYk3skxsPGryHuYNbrbp3pvBapAuFlzn0/OAxavpYri0RiYhLvvLOI556bR2xsAkWKhDB+vDN8SXadRaRmhcIY49/iY2HVhxB7ADZ9C4f+Pr/tr3wBat/hm2wuWLNmH717T2XJEqftz+231+PNN9v59DOtUBhj/MuRrbD0LUiIdeY3fgnxJ85dr+GDWe8rtARU65q9+Vxy6lQCI0b8ziuv/EZ8fBKRkYX58MOOdOx4mc8/2wqFMSbnbfsBVvzXueF8tu0/ZLxdi5cgKBRq3QEF07mclIetW7ef4cPnk5Sk9O8fxauvXkfhwjlzE94KhTEmZ/3xAix8Puv1qnSEajc700EhUKUzhBT2aTR/c/p0IgUKBALQqFE53nijLY0bl+OaayrlaA4rFMaYzKlC9Hz44R5IOHnx+zu5L2W67UcQcem564QUg3LNzmn+mZ/8/PM27rtvOqNGtU++vPTYY81dyWKFwhiTVuyhlPsDMdvgq9aQFJ+9nyEBcP8uKJje8OH525EjcTzxxBw++WQFAO+/vzRH7kNkxgqFMSbFXx/B3AfSfy8gCDpOhEtaXvznhJVw9mfSmDZtA/37z2T37mMUKBDIc89dzVNPtXA7lhUKY4zHyf1pi0Sh8s5rQLDzvEHNHu7kygcOHYplwICZTJ68FoArrohk7Ngu1K59QZ29ZjsrFMbkRarOA2cnzqOfzR/vT5m+Zz2UsO78c0pQUAALFuwkPDyYV15pzcCBTQkM9J+Rqq1QGJMX7fkTvut8YdvW62tFIgfs3BlDiRLhhIcHU7hwCJMnd6NcuUJUrlzM7WjnsEJhTF60YZLzWigSKt/g/XaFykOzZ3yTyQCQlKSMGbOMJ5/8kX79Gic/VX3llem0/vITViiMyUt2/QHLRsKmb5z50g2h3Rh3M5lkmzYdpG/f6cyf74wh9M8/MSQlKQEB/t0M2AqFMf7o8GaY2x9Ox5zfdnuXpJ2/9p3sy2QuWEJCEiNHLmTYsF+Ii0ugdOmCjB7dgVtuqeWTTvyymxUKY/yNKoyrfnH7uPIFqNETilbNnkzmgh09eorWrf/HsmXOuGx33dWAkSPbUaJEuMvJvGeFwpiLkRAHp495t+78J2DtZ87DZplJPVTm5Q9DzV7nl6lgmewZGMdki8KFQ6hQoQj795/ko4860b59NbcjnTcrFMZciNPHYc1Y+P1ZiD9+ftt6O2ZyhdZ26SiXWrQomkKFClC3bmkAxozpTEhIIBEROTSSXjazQmHM+YqPhf9GpMyHFs/6LOGMYpdBt7neDZZjTy7nOidOnOaZZ35m1Kg/ady4PAsX9iEoKICSJXPPZab02E+iMd5KSnAeYpv/VMqy9uOhzt2uRTL+Y+7crdx333S2bz9CYKDQtm0VEhOTCArynwfnLpQVCmO8ceJfmNrVeZDtjKtftyJhOHw4lsGD5zBu3EoAGjYsy9ixXbj88nIuJ8s+ViiM8caS19IWiVtmQyXfDj9p/F9CQhLNmn3Cpk2HCAkJZNiwaxg8+EqCgwPdjpatrFAYk5mTB+D3ofDPHGc+ogL0WgSF8s5fi+bCBQUF8PDDzZgwYQ1jx3ahZs2SbkfyCVFVtzOcl6ioKF26dKnbMUx+8UlViNmaMn/jdKjayb08xlWqyhdfrCIxUbnnnoaA0yUH4PdPV4vIMlWNupBt7YzCmLMd2QIzekDsATjqdLVAmcZw9Rtw6TXuZjOu+eefIzzwwEx++GEzhQoV4Prrq1KuXITfF4jsYIXCmFNHYdrNsOt3Z+jNhLi070sg3L7Y+yawJk9JSlI++GAJQ4b8xPHjpylWLJS3376esmULuR0tx1ihMPnbvEdg+bvpv3flC1DrDoiItCKRT23YcIC+fafz++87AOjWrTb//e8N+apIgBUKk9/tWuC8BheEim2hwwRnPiAQAgu4l8v4hT59prFgwU7KlCnI++935Oaba7kdyRVWKEz+EX8Stv8ACbEpy+IOOq/d50HZJu7kMn5FVZN7dB09ugOjRv3Jm2+2o1ixMJeTuccKhckfjmyBzy+H00fTfz/Azh7yu7i4BIYP/5WtW48wceItADRoUJaxY7u6nMx9VihM/jB3QNoikbpH1qJVoVS9nM9k/MaCBTvo02caGzYcRASGDGlBgwZl3Y7lN6xQmLzpVAwseM5p4grwr+fZmyqdoeP/QYGIjLc1+caxY6cYOvQnRo9egirUrFmSsWO7WJE4ixUKk/fsWwmfN0r/vWvftiJhAJg9ezP9+s1gx44YgoICeOqpFjz77NWEhtqvxbPZETF5y5rxMPvelPkiVaDFcM90ZRvxzSSbPXsLO3bEcPnl5Rg7tgsNG9pZREasUJjcKz4WOKsLmlUfpUw3fx6ueNZp6moMsH//CUqVKgjA8OHXUqVKMR54ICpPdAXuS1YoTO604DlY9FLG73f+Ci7rlnN5jF/bs+cYAwfOYunS3axZ05+IiBAKFizAwIFN3Y6WK/i0jIpIexHZICKbRWRIOu9XEJF5IrJCRFaJSAdf5jF5yM5fnNfAAhAUlvareC2oaF2AG+eZiPHjV1K79vt8++16Dh2KZcWKvW7HynV8dkYhIoHAaKAtEA0sEZFpqrou1WrPAl+q6gciUhv4Hqjkq0wmD+o2FyJbup3C+KHt24/Qr990fvzR6f33hhuq8eGHnahQoYjLyXIfX156agpsVtWtACIyCegKpC4UChT2TBcBdvswjzEmn/jss78YMGAmJ07EU7x4GO++257bb6+X/MS1OT++LBSXADtTzUcDzc5a53lgjog8BBQErktvRyLSD+gHUKFChWwPanKJg+tg/QTQRIjZ5nYa48eKFQvlxIl4evSow6hRN1C6dEG3I+VqviwU6ZXus0dJug0Yr6pviUhz4HMRqauqSWk2Uh0DjAFn4CKfpDX+ZcmbcHBN2mVr/3fuegUKn7vM5Dvx8Yn89tsOWreuDEDnzjVYvLgvTZpc4nKyvMGXhSIauDTVfCTnXlrqA7QHUNWFIhIKlAT2+TCX8WdxR2BqV4ien/E6tW6HEnWgcEUoVT/nshm/tHz5Hnr3nsrq1fv488++REWVB7AikY2yLBQi0gS4A2gJlANigTXATGCCqh7LYNMlQHURqQzsAnoCvc5aZwfQBhgvIrWAUGD/BXwfJjc7uR9m9oKT/8KB1Wnfu/7TtPOFK0CF1jmXzfit2Nh4XnjhV9588w8SE5XKlYty+nSi27HypEwLhYjMAA4CU4G3cP7SDwUuA64FZorI66o64+xtVTVBRAYCs4FAYJyqrhWRF4GlqjoNeBz4WEQexbksdY/mtkG8zcXb+QvsmJt2Wbkr4JYfIMRaqJhz/fbbP/TtO52NG51O/B599AqGD7+WggWtF2BfyOqMoo+q/nvWsjhgsefrNREpndHGqvo9TpPX1Mv+k2p6HdDivBKbPMjzt0HFtnDNm06X38VrOMOSGnOWTz5Zzn33TQegdu1SjB3bhSuuiHQ5Vd6WaaFIp0ikt47dTzDZI6So3XMwWerQoTolS4YzYEAUQ4e2JCTEOpjwtawuPR3m3JZK4LRoUlUt7pNUxhjjcfDgSd57bzHPPns1gYEBlC8fwdatg4iICHE7Wr6RVSkumSMpTP4VPR+2z3E7hfFDqspXX61j4MDv2b//JBERITz2WHMAKxI5LKtCkdVTKhmMK2lMFhLjYfEI+GNYyrKgUPfyGL+ye/cxHnzwe7777m8ArrmmIl261HA5Vf6VVaFYi3PpKaOH5+wxaXP+VrwHi1+D49Epyxo9BA36u5fJ+AVVZdy4FTz++BxiYk4REVGAN99sR9++lxMQYI0b3JLVzexLM3vfmEwd3ux0Bx5/PGXZ8d2wb3na9e5YBmUuz9lsxi99/fU6+vZ1WjR17FidDz/sRGSkPX3vNq+bC4hIEaAqznMUAKjqH74IZXK5uMPw/R2w7fvM17t+HFTtAmElciaX8Xs331yLLl1q0LNnHXr2rGud+PkJrwqFiPQBHsPp6G810ARYBLTyWTKTe02+Ju0T1jV7Qc2eadcpf6UVCMPatft49NHZjBvXlcjIwgQGBjB1as+sNzQ5ytszikeAKGChqrYUkTo4Y0kYc64Te5zXyKuh3SdQrLq7eYzfOX06kdde+53hw+cTH5/Ec8/N49NPu7ody2TA20IRp6qxIoKIFPB0xVHTp8lM7qNJcGAtJCU4852/gvAMH9w3+dSSJbvo02caq1c7z+ref39jXnst3REGjJ/wtlDsEZGiwHRgtogcArJ8atvkI6owuRXs+i3VQhuw3qQ4eTKeYcPmMXLkIpKSlKpVi/Hxx5259trKbkczWfCqUKhqF8/kcyLSBmc0upk+S2Vyj5MHYNtM2P1H2iJR/367B2HS2LjxIG+/vQiAwYOb88IL1xIeHuxyKuMNb29mNwHWq+pxVf1JRCKAesBSn6Yz/utUDKz5FH559Nz3Bp2E4LCcz2T8TmxsPGFhTjFo2LAs777bniZNLqFpUxsrIjfx9trAGOBkqvkTwEfZH8fkGqs+TlskilZ1ziLu+suKhAFg5syNVK/+X6ZO/Tt52YMPNrUikQt5e48iIPXwpKqaJCJ2zpifnfaMV3VJS6fpa/37ISDQ3UzGL+zff4JHHpnNhAlOE+nx4/+ia1dr+5KbeVsotolIf5wzCwX6A9t9Fcr4sRXvwfr/g2M7nfkKbaDhAHczGb+gqkyevJaHHprFgQMnCQsL4uWXWzNoUDO3o5mL5G2huB8YDQzHKRTzgPt8Fcr4scWvwvFdKfNFrMWKcc4i+vSZxvTpGwFo3boyH3/cmSpVirmczGQHb1s9/Qt083EW46/iT8Dyd2HhC5B42lnWdSoUrQIl6ribzfiFsLBgVq36lyJFQnjrrXb07t3Iut/IQ7xt9VQN54yirKo2EJH6QEdVHeHTdMZdqk6BOLtlU8l6UKUDBNjIYvnZ5s2HKFu2EIUKFaBQoQJ8/XV3ypePoHz5CLejmWzmbaunT4AXgDM3tFcDd/gkkfEfvw1JWySKVYf7tjstm6xI5FuJiUm8+eYf1Kv3Ac8881Py8qio8lYk8ihv/7cXVNU/zpxKqqqKSLzvYhm/cCilWSN3roDSDd3LYvzCmjX76N17KkuW7AbgyJFTJCWpjRWRx3lbKA6KSGU842eLyI3AXp+lMu46fRzWfQ6HNznzXb+zIpHPnT6dyCuv/MYrr/xGfHwSkZGF+eijTnToYB0+5gfeFoqBwFigpoj8A+wBbvNZKuOuNeNg3sMp88FZjYhr8rKYmDhatBjH2rX7AejfP4pXX72OwoVt3Or8wttWT5uB1p7Bi0RVj/g2lnHVqRjntdwVUKM7RF7jbh7jqiJFQqlTpzSnTyfyySdduPrqim5HMjksy0Ihzo2JIqp6RFVjRCRYRO4FHlfVur6PaHJM/AmYc5/TwR9AxeugcTp9OZk87+eft1G8eBgNG5YF4MMPOxIaGpTcb5PJXzJt9SQitwKHgY0i8rOIXA1sBm4GeudAPpOTdi2AvyfC0X+c+YgK7uYxOe7IkTjuu28abdp8xr33TiU+PhGAYsXCrEjkY1mdUQwDmqnqBk8Psr8Dd6jqV76PZnwm4RQkndVobfdC+OZ6Z7psU7jufSh9ec5nM66ZNm0D/fvPZPfuYxQoEEi3brXcjmT8RFaF4rSqbgBQ1SUist2KRC51Yq/Tkd+On2Bu/8zXrdYVyjTOmVzGdfv2nWDQoFlMnrwWgObNIxk7tgu1apVyOZnxF1kVitIiMijVfMHU86o6yjexTLbaMh2+63Lu8rNbMwUWgBs+hyodcyaXcV1CQhLNm49l69bDhIcHM2JEGx58sAmBgTY6oUmRVaH4FCiVybzJDQ6uc15DikJYSQguBG0/gnJN3c1lXBcUFMCTT17J11+vZ8yYTlSubJ34mXOJqrqd4bxERUXp0qU2sF66on+Hf9M5Njt+gq0zoMmTcPVrOZ/L+I2kJGXMmGUEBAj9+jmXF8/8DrBO/PI2EVmmqlEXsm2mZxQiMgR4X1WPZvD+1UAhVf3+Qj7cZKOEOPimrfOakaDwnMtj/M6mTQfp23c68+f/Q3h4MF261KBs2UJWIEyWsrr0tAmYIyJHgWXAfiAUqA40Bn4FXvJpQuOdxNNOkQgITn8goeCC0OD+nM9lXJeQkMTIkQsZNuwX4uISKFOmIKNHd6Bs2UJuRzO5RKaFQlW/Ab4RkVpAC6AcEAt8DQxU1RO+j2jOS1AoXPuO2ymMn/jrr7307j2N5cv3AHD33Q0YOfJ6ihe3cc2N97ztwmM9sF5Ewjzzsd5sJyLtgXeBQOATVX01nXW6A8/jdDj4l6r28i66MSYzqsqDD37P8uV7qFChCGPGdOL666u5HcvkQt4OXFQHGI9zRiEiEg3cq6rrMtkmEGewo7ZANLBERKal3kZEqgNPAy1U9bCIlL7g7yS/06Ss1zH5QmJiEoGBAYgIH37YiTFjlvHyy62JiLBO/MyF8bax9EfAUFWNVNVLgGeAMVls0xTYrKpbVfU0MAnoetY69wGjVfUwgKru8z66SXZyP4yJdDuFcdnx46d55JEfuPXWr5JbMtWtW5pRo26wImEuirfdjEeo6o9nZlR1roi8lcU2lwA7U81HA83OWucyABFZgHN56nlV/eHsHYlIP6AfQIUK1v/QOQ6udTr0A6h2o7tZjCt+/HEL/frNYPv2IwQGCmvW7KNevTJuxzJ5hLdnFNtF5GkRifR8DQH+yWKb9Nrcnf3QRhBOC6pWOONbfCIiRc/ZSHWMqkapalSpUva8X7L4WNg6E6J/c+Yjr4EbPnM3k8lRhw/H0rv3VNq1+4Lt24/QsGFZliy5z4qEyVbenlH0BoYD3+MUgPnAvVlsEw1cmmo+EtidzjqLVDUe2CYiG3AKxxIvc+VvC56BZW+nzAdY7575yXff/U3//jPZu/c4ISGBPP98Kx5/vDnBwYFuRzN5jLetng4C6TTOz9QSoLpnCNVdQE/g7BZN3+GcSYwXkZI4l6K2nufn5E//LkspEmUaQ+FK0PBBVyOZnPXHHzvZu/c4V11VgU8+6UyNGiXdjmTyqKyezH5LVR8XkSmce9kIVb05o21VNUFEBgKzce4/jFPVtSLyIrBUVad53msnIuuAROAJT1EymTl5AL5I9SR+8+ehaifX4picoars2nWMyMjCADz/fCtq1izJPfc0JCDAnq42vpNpX08i0lRVF4tIm/TeV9WffJYsA/m6r6fEeJg3CP76MGXZlS9C06ecnl9NnvXPP0e4//4ZrFmzj7VrB1CkSKjbkUwuczF9PWV6M1tVF3sma6nqT6m/ABvVJKftXZK2SFS7EZo/Z0UiD0tKUt57bzF16rzP7NlbOHkynrVr97sdy+Qz3rZ6Sm/Y0z7ZGcR4QROc1xJ1oPs86GxjSOVlGzYc4JprxvPQQ7M4cSKebt1qs379g1x55aVZb2xMNsrqHkUPnJvQlUXk21RvRQBHfBnMpOPMZcLQ4nBpK1ejGN8aM2YZgwbN4tSpRMqWLcTo0R24+WY7iTfuyKrV02LgIE7T1tGplh8DVvgqlMnAtzd4JnLXGCLm/FWoUIRTpxK5996GvPVWO4oVs078jHuy6j12G7ANmJszcUyGVoyGBE9fjGWbuJvFZLu4uAR+/nkbHTpUB6B9+2qsXt2funWt+zPjPq/uUYhIExFZJCIxIhInIqc8Y1QYX0o8DRu+gpUfwM8DU5Zf86Z7mUy2W7BgBw0bfkinThNYtCg6ebkVCeMvvH0y+33gDpyO/ZoC95D2qWvjCxu+hFl3pl12zzoQG/g+Lzh27BRDh/7E6NFLUIWaNUsSGGjPQxj/422hCFDVDSIS5Olu42MR+QP4jw+zmTjPs4clakP5K6HyDVDCbmjmBbNnb6Zfvxns2BFDUFAAQ4a04NlnryYkxNv/ksbkHG9/Kk+ISAHgLxF5BdgD2DiKvrZ8lPNa4Tpo/a67WUy2+eCDJQwY4Awz37hxOcaO7UKDBmVdTmVMxry9hnGPZ92BOF1tVAe6+SiTSTgFU2+CGE+3VwXLuZvHZKubbqpF+fIRvPbadSxa1NeKhPF73nYKeKajvjjgOQAROXtsCZNddi+Azd+lzEc95l4Wc9H27DnGO+8s4uWX2xAUFEDZsoXYsmUQoaF2mcnkDlk9cBcA3IIzCNFsVV3vGQd7KFAMqOf7iPnQivec12KXwR1LrYuOXEpVGT9+JY89NocjR+IoWTKcJ55oAWBFwuQqWf20fgJUweky/AMR2YQzyNDTqvq1j7PlT9G/weYpzvT146BAhLt5zAXZtu0w998/gx9/dE7Gb7ihGj171nU5lTEXJqtC0Qyor6qJIhIGHACqqeoe30fLp/aleuDdHqzLdRITkxg9eglPP/0TJ0/GU6JEGO++255eveohYk1fTe6UVaE4paqJAKoaKyIbrEj42IE1zmujh+ySUy709dfrePhhZ9j3Hj3qMGrUDZQuXdDlVMZcnKwKRU0RWe6ZFqCGZ14AVdXLfZouv0lKhNUfO9OBIe5mMRfk1lvr8O23f9OrV126dq3pdhxjskVWhcJuVuekpISU6Qb93cthvLZs2W4efvgH/u//bqZixaIEBAiTJ1vLcZO3ZNUp4JacCpLvJSXC/Ced6cACULSKu3lMpmJj43n++V94882FJCUpL774K2PHdnU7ljE+YW30/MW+5bDC8yR2eBl3s5hMzZ//D337TmPTpkMEBAiPPXYFL754rduxjPEZKxT+IjE+ZbrHr+7lMBk6evQUQ4bM5YMPnDHb69QpxdixXWjWLNLlZMb4lteFwtPXUwVV3ezDPPnP6nEw72FI8hSKcs2hSGV3M5l0bd9+hI8/Xk5wcABDh7Zk6NCWFCgQ6HYsY3zOq0IhIh2BkUABnGFRGwLDVPUmX4bL8+Y+CH+9n3aZDXHqV44ePUXhwk4LtPr1y/Dhhx1p2vQS6tWzy4Mm//C2U8AXcR6+OwKgqiuBar4Klecd2Qo7f4Hts1KW3bEcBp2Alq+4FsukUFUmT15DtWqj+OabdcnL+/QLTwWJAAAgAElEQVS53IqEyXe8vfQUr6pHznqy1AZuPl9JibB2PMzpm3b53WugZB1XIplz7d59jP79ZzJt2gYAvvpqHbfcUtvlVMa4x9tCsV5EugMBIlIZeBhY5LtYedTm79IWichrnI7/bDAiv6CqjB27gsGD5xAT41xyeuONtvTta8+VmvzN20IxEGc0uyTgW2A2Tg+y5nysGZsyfevPUMGaVPqLvXuPc/vt3/Lzz9sA6NTpMj74oCORkYVdTmaM+7wtFFVU9SngKV+GydMOroNtnnsSTZ6yIuFnChcOYfv2I5QsGc6oUe3p2bOudeJnjIe3heJ9ESkOfAlMVtW/fZgpb1rx35TpJk+4l8MkW7t2H5deWoTChUMIDw/m22+7U758BKVKWSd+xqTmVasnVW0JtAOOAf8TkRUiMsSnyXK7xNMwpTN8Wtv5+nuiszzqCQgr4W62fO706URefPFXGjX6iCFD5iYvb9CgrBUJY9Lh9QN3qroLGCkis4CngeHAq74KlmttmQ4/9oMTe9N5U6BqpxyPZFIsWbKLPn2msXr1PsC5gZ2UpAQE2GUmYzLi7QN31YEeQDfgODAZu1+Rvr8npS0SFa6D1u860yHFoFA5d3LlcydPxjNs2DxGjlxEUpJStWoxPvmkC61aVXI7mjF+z9szignAJKCLqu7wYZ7cLfYg/D3BmW41Eur2hpAi7mYyHDkSR1TUGLZsOUxAgDB4cHNeeOFawsOD3Y5mTK7gVaFQVRuTMyuaBGs+TZmv2sWKhJ8oWjSUZs0iCQ8PZuzYLjRpconbkYzJVTItFCIyUVVvE5EVpH0S20a4S23bLFg6EnZ4boyWawZFq7qbKZ+bMWMj5coVonHj8gB88EFHQkODrBM/Yy5AVmcUZ9px2pBdZzu6E1Z/Aoc3woZJad+78gV3Mhn27z/Bww//wMSJa6hXrzRLl/ajQIHA5I79jDHnL9Pmsaoa7Znso6pbUn8BfbLauYi0F5ENIrI5s+a0ItJNRFREos4vvkuSEmHazbDoxbRFImow9N0Gla53L1s+papMmLCaWrVGM3HiGsLDg+nduxGBgdaayZiL5e3N7Pac22VHx3SWJRORQGA00BaIBpaIyDRVXXfWehHAIOBPb0O77tDf8K8zeA1XPAdB4VDtRihR091c+VR09FH695/JjBkbAWjTpjJjxnSmSpViLiczJm/I6h7F/cADwGUisjzVWxHAsiz23RTYrKpbPfuaBHQF1p213nDgdWDweeR2z7FomNzSmS7VAFq86G6efC4+PpEWLcaxY0cMRYqE8NZb7ejdu5F1v2FMNsrqjOJL4CdgBJD60tExVd2XxbaXADtTzUfjjGmRTEQaAZeq6gwRybBQiEg/oB9AhQoVsvhYHxtfG04fc6aLXeZuFkNwcCD/+c/VTJ++kfff70j58hFuRzImz8nqHsVhz9CnTwA7PfcmygHdRCSrbjXT+5MuueWUiAQAbwOPZxVSVceoapSqRpUqVSqr1X0r/oTzWr8fdJzgbpZ8KCEhiTff/IP33lucvKx370ZMmdLDioQxPuLtPYrvgCYiUhX4DJiJ8xBeZv1RRAOXppqPBHanmo8A6gK/eC4TlAWmiUgXVV3qZS4XeOpfm9EQ4HUPKCYbrFr1L336TGPp0t2EhQVx6621KVOmkF1mMsbHvB0KNUlV44GbgXdU9SGcS0uZWQJUF5HKIlIA6AlMO/OmqsaoaklVraSqlXAGQvLvInFgLWii2ynynVOnEhg2bB6NG49h6dLdXHppYb75pjtlyhRyO5ox+YK3fxIniMitwJ3AjZ5lmfZ/oKoJIjIQZ5CjQGCcqq4VkReBpao6LbPt/c7epfB/ngfUJYD0r6yZ7LZoUTR9+kxj3br9AAwYEMWIEdfZcxHG5CBvC0VvYADwuqpu9QyHOjGrjVT1e+D7s5b9J4N1W3mZxR0/3p8y3WokBNgTvr6mqjzxxI+sW7ef6tWLM3ZsF1q2rOh2LGPyHW/7elojIoOAaiJSE6fZ68u+jeYHVGHZ23B4AxzzNOC6+nW4/GF3c+Vx8fGJBAcHIiKMGdOJzz77i//85xrCwqwTP2Pc4G034y2Bz4FdONdcyorInaq6wJfhXHdkM/x6VqOsql3dyZIPHDkSx+DBc9i79zjTp9+GiFCrVilGjLjO7WjG5GveXnp6G+hw5qlqEamFUzhyR5cbFyohznktWA6aD4MilaG4PTvhC1On/k3//jPZs+c4BQoEsm7dfurUKe12LGMM3heKAqm73lDV9Z6WTPlDWAlocH/W65nz9u+/xxk06Ae+/HItAM2bRzJ2bBdq1XL5eRljTDJvC8VyEfkI5ywC4HZghW8i+YkDa+Gz+m6nyNMmTFjNQw/N4tChWAoWDGbEiDYMGNCEwEBvW20bY3KCt4XiAZyO+57EuUcxH/ivr0K5KikRDq5LWyQqtnMvTx62du0+Dh2KpW3bKowZ05lKlYq6HckYk44sC4WI1AOqAlNU9XXfR3KRKnzeCA6sTlnWcSLU7OlepjwkKUnZvv1Icq+uzz13DfXrl6F79zr2dLUxfizTc3wRGYrTfcftwI8i0jtHUrllx89pi0TjR61IZJONGw/SqtV4WrQYx+HDsQCEhgbRo0ddKxLG+LmszihuB+qr6gkRKYXz8Nw438dyyfFdKdOPxltfTtkgISGJkSMXMmzYL8TFJVCmTEE2bTpE06Y2brUxuUVWvwlPqeoJAFXd7+nxNQ/zdG5b+04rEtngr7/20rv3NJYv3wPAPfc05K232lG8eJjLyYwx5yOr34ZVRORbz7QAVVPNo6o3+yxZTju0EX73DNgXZk0zL9aoUX/y+ONzSEhIomLFIowZ05l27aq6HcsYcwGyKhS3nDX/nq+CuG71x3B8N5RrBk2fcjtNrle7dikSE5N46KGmvPJKGwoVyj+P3RiT12RaKFT1p5wK4rrE085rzdsg3J4IPl/Hj59m9uzN3HJLbQCuu64KGzc+RLVqxV1OZoy5WHn8noPJCXPmbKFu3fe59dav+P33HcnLrUgYkzfYHVtzwQ4fjuWxx+YwfvxKABo1KmvjRBiTB51XoRCREFU95aswrjq63e0Eucq3367nwQe/Z+/e44SEBPL88614/PHmBAfbOB3G5DVeXXoSkaYishrY5JlvICJ5pwuPkwdgi2fAPbFfdFl5991F3HLLl+zde5yrrqrAX389wJAhV1mRMCaP8vYexSigE3AQQFX/Aq71Vagct/fPlOlqN2a8ngHgttvqUalSUUaP7sCvv95DjRol3Y5kjPEhbwtFgKr+c9ayxOwOk+OO74YFz8GUTs584YoQEeluJj+0ffsRHnroe+LjnX/y0qULsnHjQAYMaEJAgHW/YUxe5+09ip0i0hRQEQkEHgI2+i5WDkhKgI/O6kbiinSH8863kpKU0aMX8/TTP3HiRDyRkYV56qmrAOwykzH5iLeFoj/O5acKwL/AXM+y3Gv5qJTp0pdDs6FQPe88aH6x/v77AH37TmPBAmes8Ftvrc099zR0OZUxxg1eFQpV3Qfk/m5UkxJh+q3OeBOHNzjLGjwArf9rfTt5xMcn8sYbf/DCC79y+nQiZcsW4v33O3DTTbXcjmaMcYlXvx1F5GOSe8xLoar9sj2RLx3ZDJunpMwHhkCzZ6xIpPLNN+t55pmfAejTpxFvvNGWYsWsEz9j8jNvf0POTTUdCtwE7Mz+OD628EXntXAluHmW01VHmD09rKrJY0J0716HH37YzB131Oe666q4nMwY4w+8vfQ0OfW8iHwO/OiTRL50PNp5LVIJStR0NYq/+P33HQwaNIuvv+5OlSrFCAgQxo+3JsLGmBQX2tdTZaBidgbJUc2fdzuB644dO8XAgd/TsuWnrFixl1df/d3tSMYYP+XtPYrDpNyjCAAOAUN8Fcr41g8/bOb++2ewY0cMQUEBPP30VTzzTEu3Yxlj/FSWhUKci9cNgDPjhCap6jk3tv3aus/hwFo4stXtJK46dCiWRx+dzWef/QVA48blGDeuK/Xrl3E5mTHGn2VZKFRVRWSKqjbOiUDZ7lg0zLor7bKQIu5kcdmePceYOHE1oaFBvPhiKx59tDlBQdbTvDEmc962elosIper6nKfpvGFTZ6RW0NLQNRgKFwBSjVwN1MOOnjwJMWLhyEi1KlTmnHjutKs2SVUr17C7WjGmFwi0z8nReRMIbkKp1hsEJHlIrJCRPy/aJz4F+Y97EwHhUCzIVCrF0je759IVfn00xVUq/ZfJk9em7z8jjvqW5EwxpyXrM4oFgOXA7mzveTOeSnTN//gXo4ctm3bYfr1m8Hcuc49mVmzNtOzZ12XUxljcqusCoUAqOqWHMiS/c7cc6/QBkrVczdLDkhMTOK99xYzdOjPnDwZT4kSYbz7bnt69cr737sxxneyKhSlROSxjN5U1ZHZnMc3wkq5ncDndu06yq23fsXChc5DhT171uXdd9tTunRBl5MZY3K7rJq8BAKFgIgMvjIlIu099zU2i8g5z12IyGMisk5EVonITyKSex/ic1nx4mEcOHCS8uUjmDq1JxMn3mJFwhiTLbI6o9ijqi9eyI4941aMBtoC0cASEZmmqutSrbYCiFLVkyLSH3gd6HEhn5eug2uybVf+aNmy3VStWpyiRUMJCwvmu+96Ur58BEWLhrodzRiTh2R1RnExzYOaAptVdauqngYmAV1Tr6Cq81T1pGd2EZC9w8v9+YrzGpC3BtmJjY3nqad+pGnTT3jyyZQut2rXLmVFwhiT7bI6o2hzEfu+hLQ9zEYDzTJZvw8wK703RKQf0A+gQoUK3icoEAGnj8HlD3u/jZ/79dft9O07nc2bDxEQIEREFEjT+6sxxmS3TAuFqh66iH2n95sr3a4/ROQOIAq4JoMcY4AxAFFRUefffUixGue9ib85evQUTz31Ix9+uAyAOnVKMXZsF5o1szG+jTG+5csRe6KBS1PNRwK7z15JRK4DngGuUdVTPsyTax0+HEuDBh+yc+dRgoMDGDq0JUOHtqRAgbx1Sc0Y4598WSiWANVFpDJOh4I9gV6pVxCRRsBHQHvPcKvZZ+evzmWnPKBYsTBat67MunX7GTu2C/XqWSd+xpic47NCoaoJIjIQmI3TzHacqq4VkReBpao6DXgDp/ntV55r7DtUtUu2BNj0Tcp0cO5qJqqqfPnlWipWLMoVVziXlkaP7kBoaBCBgdaJnzEmZ/l0sGhV/R74/qxl/0k1fZ1PPvjAGljxX2f6qldyVaunXbuOMmDA90ybtoFatUqyYsX9hIQEUbBgAbejGWPyKZ8WClds+ApmdE+Zr9zBvSznQVX55JPlDB78I0ePnqJw4RAeeeQKgoNzT5EzxuRNea9Q/OZ5ADysJFz5IpT2/y7Ft2w5xH33TWfevO0AdOp0GR980JHIyMLuBjPGGPJSoTiyBTZ8Ccedvo7ovQlCi7qbyQvx8Ym0avU/oqOPUrJkOP/97w306FHHnoswxviNvFEoEuJgbLWU+YBgCAxxL895CA4O5OWXWzNnzhbeeac9JUuGux3JGGPSyBuFYtadKdOVO0D9+yE4zL08mTh9OpERI34jIiKExx5rDsBddzXgrrv8/xKZMSZ/yhuFYst05zW8NNw0w29HsFu8eBd9+kxjzZp9hIYGceed9SlVKnc13TXG5D95o1F+kKcjvDtX+GWROHkynsGD59C8+VjWrNlHtWrFmTXrdisSxphcIW+cUZwR5H/X9+fN20bfvtPZuvUwAQHCE09cyfPPtyI8PNjtaMYY45W8VSj8jKrywgu/snXrYerVK824cV2JiirvdixjjDkvubtQJMTB7oWQGO92kjTi4hIIDQ1CRPj4485MnryWJ59sYZ34GWNypdx9j2LuA/BVa0jwjH3kclcd+/efoFevb+jSZSKqTm/o1auX4Nlnr7YiYYzJtXL3GcUxz8N1pRpC1S7OQEUuUFUmTlzDoEGzOHgwlvDwYP7++wC1apVyJY8xxmSn3Fsots+BHT8509e8ARV9079gVnbujKF//5nMnLkJgDZtKjNmTGeqVCnmSh5jjMluubNQ/PkK/P5MynyIO111jB27nEcfnc2xY6cpUiSEkSOv5957G1r3G8aYPCX3FYqk+LRF4obPoUxjV6Ls3HmUY8dO07VrDd5/vyPly7tz6csYY3wp9xUKTUqZvmctlKidYx+dkJDE5s2HqFmzJABDh7akceNydOp0mZ1F5HHx8fFER0cTFxfndhRjMhUaGkpkZCTBwdn3rFbuKxRnFK6Uo0Vi1ap/6dNnGjt2xLBu3QBKlAinQIFAOneukWMZjHuio6OJiIigUqVK9keB8VuqysGDB4mOjqZy5crZtt/c3Tw2B5w6lcB//jOPxo3HsHTpbkJCAvnnnxi3Y5kcFhcXR4kSJaxIGL8mIpQoUSLbz3xz7xlFDli0KJo+faaxbt1+AAYMiGLEiOsoXDh3dGFuspcVCZMb+OLn1ApFBt54YwFPPTUXVahevThjx3ahZcuKbscyxpgcZ5eeMtCkySUEBgYwZEgL/vrrASsSxnWBgYE0bNiQunXr0rlzZ44cOZL83tq1a2ndujWXXXYZ1atXZ/jw4cm9AwDMmjWLqKgoatWqRc2aNRk8eLAb30KmVqxYQd++fd2OkakRI0ZQrVo1atSowezZs9Ndp2XLljRs2JCGDRtSvnx5brzxRgDeeOON5OV169YlMDCQQ4cOJW+XmJhIo0aN6NSpU/Ky9957j2rVqiEiHDhwIHn5jBkzGDZsmI++y3Soaq76atywruqbqI6ppNnp8OFY/fzzv9Is27HjSLZ+hsm91q1b53YELViwYPL0XXfdpS+99JKqqp48eVKrVKmis2fPVlXVEydOaPv27fW9995TVdXVq1drlSpVdP369aqqGh8fr6NHj87WbPHx8Re9j27duunKlStz9DPPx9q1a7V+/foaFxenW7du1SpVqmhCQkKm29x88836v//975zl06ZN02uvvTbNsrfeektvu+027dixY/Ky5cuX67Zt27RixYq6f//+5OVJSUnasGFDPXHiRLqfm97PK7BUL/D3rl16Ar777m8GDJjJnj3HufTSwlxzTSUALr20iLvBjH96y0f3Kh7XrNfxaN68OatWrQJgwoQJtGjRgnbt2gEQHh7Oe++9R6tWrXjwwQd5/fXXeeaZZ6hZsyYAQUFBDBgw4Jx9Hj9+nIceeoilS5ciIgwbNoxbbrmFQoUKcfz4cQC+/vprZsyYwfjx47nnnnsoXrw4K1asoGHDhkyZMoWVK1dStKjzAGy1atVYsGABAQEBPPDAA+zYsQOAd955hxYtWqT57GPHjrFq1SoaNHBGely8eDGPPPIIsbGxhIWF8emnn1KjRg3Gjx/PzJkziYuL48SJE/z888+88cYbfPnll5w6dYqbbrqJF154AYAbb7yRnTt3EhcXx8MPP0y/fv28Pr7pmTp1Kj179iQkJITKlStTrVo1Fi9eTPPmzdNd/9ixY/z88898+umn57w3ceJEbrvttuT56OhoZs6cyTPPPMPIkSOTlzdq1CjdfYsIrVq1YsaMGXTv3v2ivi9v5OtC8e+/x3nooVl89dU6AJo3j6RMmUIupzImc4mJifz000/06dMHcC47NW6c9qHTqlWrcvz4cY4ePcqaNWt4/PHHs9zv8OHDKVKkCKtXrwbg8OHDWW6zceNG5s6dS2BgIElJSUyZMoV7772XP//8k0qVKlGmTBl69erFo48+ylVXXcWOHTu4/vrrWb9+fZr9LF26lLp16ybP16xZk/nz5xMUFMTcuXMZOnQo33zzDQALFy5k1apVFC9enDlz5rBp0yYWL16MqtKlSxfmz5/P1Vdfzbhx4yhevDixsbE0adKEW265hRIlSqT53EcffZR58+ad83317NmTIUOGpFm2a9currjiiuT5yMhIdu3aleGxmTJlCm3atKFw4cJplp88eZIffviB9957L3nZI488wuuvv86xY8cy3N/ZoqKi+O2336xQ+Iqq8sUXq3jkkdkcOhRLwYLBjBjRhgEDmhAYaLdtTBbO4y//7BQbG0vDhg3Zvn07jRs3pm3btoDz85xRS5fzaQEzd+5cJk2alDxfrFjW/ZXdeuutBAY6PSP36NGDF198kXvvvZdJkybRo0eP5P2uW7cueZujR49y7NgxIiJSejLYs2cPpUqldKIZExPD3XffzaZNmxAR4uNThhJo27YtxYsXB2DOnDnMmTMn+S/v48ePs2nTJq6++mpGjRrFlClTANi5cyebNm06p1C8/fbb3h0cSHPP54zMju/EiRPTvecyffp0WrRokfw9zJgxg9KlS9O4cWN++eUXr/OULl2a3bt3e73+xch9hSL2QNbrZGHkyIUMHvwjAG3bVmHMmM5UquROf1HGeCssLIyVK1cSExNDp06dGD16NIMGDaJOnTrMnz8/zbpbt26lUKFCREREUKdOHZYtW5Z8WScjGRWc1MvObp9fsGDKcL7Nmzdn8+bN7N+/n++++45nn30WgKSkJBYuXEhYWFim31vqfT/33HNce+21TJkyhe3bt9OqVat0P1NVefrpp7n//vvT7O+XX35h7ty5LFy4kPDwcFq1apXuswXnc0YRGRnJzp07k+ejo6MpXz79gcgOHjzI4sWLkwtVapMmTUpz2WnBggVMmzaN77//nri4OI4ePcodd9zBF198ke6+z4iLi8v0mGan3Pfnc/wJ5/Uixp64++6G1KhRgvHjuzJ79h1WJEyuUqRIEUaNGsWbb75JfHw8t99+O7///jtz584FnDOPQYMG8eSTTwLwxBNP8Morr7Bx40bA+cWd+jr4Ge3atUtzOeTMpacyZcqwfv365EtLGRERbrrpJh577DFq1aqV/Nf72ftduXLlOdvWqlWLzZs3J8/HxMRwySWXADB+/PgMP/P6669n3LhxyfdQdu3axb59+4iJiaFYsWKEh4fz999/s2jRonS3f/vtt1m5cuU5X2cXCYAuXbowadIkTp06xbZt29i0aRNNmzZNd79fffUVnTp1IjQ0NM3ymJgYfv31V7p27Zq8bMSIEURHR7N9+3YmTZpE69atsywS4Fz2S325zpdyb6FoM9rrTTZsOECfPlM5fToRgJIlw1m7dgB33209vZrcqVGjRjRo0IBJkyYRFhbG1KlTeemll6hRowb16tWjSZMmDBw4EID69evzzjvvcNttt1GrVi3q1q3Lnj17ztnns88+y+HDh6lbty4NGjRI/kv71VdfpVOnTrRu3Zpy5cplmqtHjx588cUXyZedAEaNGsXSpUupX78+tWvX5sMPPzxnu5o1axITE5N8jf7JJ5/k6aefpkWLFiQmJmb4ee3ataNXr140b96cevXq0a1bN44dO0b79u1JSEigfv36PPfcc2nuLVyoOnXq0L17d2rXrk379u0ZPXp08mW3Dh06pLkMdPZZwxlTpkyhXbt2ac6KMjNq1CgiIyOJjo6mfv36aS5lzZs3j44dO17kd+UdSe+6mz+LulR06SNAtx+zHIMiISGJN9/8g+ef/4VTpxIZMaINQ4ZclTNBTZ6yfv16atWq5XaMPO3tt98mIiLC75+l8Af//vsvvXr14qeffkr3/fR+XkVkmapGXcjn5b4zCoBLr4XyV2a6ysqVe2nW7BOefvonTp1K5J57GtKvnzvdkRtjsta/f39CQqx7HG/s2LGDt956K8c+L/fdzAbo/BUEh6f7VlxcAsOH/8prry0gMVGpWLEIY8Z0pl27qjkc0hhzPkJDQ7nzzjvdjpErNGnSJEc/L3cWikxMnfo3r7zyOyIwaFBTXn65DYUKFXA7lskDMmuGaoy/8MXthDxRKJKSlIAA5z9w9+51+OWX7dxxR31atKjgcjKTV4SGhnLw4EHratz4NfWMR3F2a6uLlTtvZm88AGFO07s5c7bwyCM/MHVqT6pXL5HF1sZcGBvhzuQWGY1wdzE3s3PtGcWhQ7E8/vgcxo932mS//fYi3n8/Z5qKmfwnODg4W0cMMyY38WmrJxFpLyIbRGSziJzzBIuIhIjIZM/7f4pIJW/2+813m6ldezTjx68kJCSQV19tw6hRN2R3fGOMMfjw0pOIBAIbgbZANLAEuE1V16VaZwBQX1UfEJGewE2q2iPdHXoUCy+uR2IfBuCqqyrwySedqVGjpE++B2OMySv89TmKpsBmVd2qqqeBSUDXs9bpCvzPM/010EayuFMYExtKoULBjB7dgV9/vceKhDHG+Jgvzyi6Ae1Vta9n/k6gmaoOTLXOGs860Z75LZ51Dpy1r37Amc7k6wJrfBI69ykJXHwviXmDHYsUdixS2LFIUUNVI7Je7Vy+vJmd3pnB2VXJm3VQ1THAGAARWXqhp095jR2LFHYsUtixSGHHIoWILL3QbX156SkauDTVfCRwdufpyeuISBBQBDiEMcYYv+HLQrEEqC4ilUWkANATmHbWOtOAuz3T3YCfNbc92GGMMXmczy49qWqCiAwEZgOBwDhVXSsiL+IM8j0NGAt8LiKbcc4kenqx6zG+ypwL2bFIYccihR2LFHYsUlzwsch1T2YbY4zJWbmzm3FjjDE5xgqFMcaYTPltofBV9x+5kRfH4jERWSciq0TkJxGp6EbOnJDVsUi1XjcRURHJs00jvTkWItLd87OxVkQm5HTGnOLF/5EKIjJPRFZ4/p90cCOnr4nIOBHZ53lGLb33RURGeY7TKhG53Ksdq6rffeHc/N4CVAEKAH8Btc9aZwDwoWe6JzDZ7dwuHotrgXDPdP/8fCw860UA84FFQJTbuV38uagOrACKeeZLu53bxWMxBujvma4NbHc7t4+OxdXA5cCaDN7vAMzCeYbtCuBPb/brr2cUPun+I5fK8lio6jxVPemZXYTzzEpe5M3PBcBw4HUgL/cJ7s2xuA8YraqHAVR1Xw5nzCneHAsFCnumi3DuM3e42+kAAAYuSURBVF15gqrOJ/Nn0boCn6ljEVBURMpltV9/LRSXADtTzUd7lqW7jqomADFAXhyQwptjkVofnL8Y8qIsj4WINAIuVdUZORnMBd78XFwGXCYiC0RkkYi0z7F0OcubY/E8cIeIRAPfAw/lTDS/c76/TwD/HY8i27r/yAO8/j5F5A4gCrjGp4nck+mxEJEA4G3gnpwK5CJvfi6CcC4/tcI5y/xNROqq6hEfZ8tp3hyL24DxqvqWiDTHeX6rrqom+T6eX7mg35v+ekZh3X+k8OZYICLXAc8AXVT1VA5ly2lZHYsInE4jfxGR7f/f3v2FWFVFcRz//grLMUGQwYfoYUjL0jJJDcmHMsuyUgwGpmGyJrIoishiBCkswofMXhQRTR/0wcQ/mP+KVCItarQRUWeS/mpYIGUFImUQ0+phr8GT3Tn3aI7embs+MMzcO+fPPhvmrLvXnrM2KQe7pY9OaBf9G9lsZn+Z2VHgK1Lg6GuK9MUTwDoAM2sF+pMKBlabQveTs1VqoIjyH2eU7QtPtywjBYm+moeGMn1hZifNrNbM6sysjjRfM83MzrsYWgUr8jeyifSPDkiqJaWijlzUVl4cRfriGDAJQNKNpEBx4qK2sjJsAR71/34aD5w0s+PldqrI1JP1XPmPXqdgXywABgLrfT7/mJlNu2SN7iEF+6IqFOyL7cBkSYeBTqDFzH69dK3uGQX74iVguaRZpFRLc1/8YClpDSnVWOvzMa8C/QDMbClpfuZ+4FvgD+DxQsftg30VQgjhAqrU1FMIIYQKEYEihBBCrggUIYQQckWgCCGEkCsCRQghhFwRKELFkdQp6UDmqy5n27ruKmWe4zl3efXRg17yYvh5HGO6pBGZ16/7g5D/t20rJR31vjgoaVKBfZolXZ15vSLbthDORUU+RxGq3mkzG30JzttkZvskPUV6NuVcn0WZDmwDDgOY2dwL2LYWM9sgaSKpEmq5J6ybgQ78qVszm3kB2xKqTIwoQq/gI4dPJO33r9tLbDNS0uf+yfuQpOv8/Ucy7y+TdHmZ030MDPN9J/kaBu1e6/9Kf/8NnVkD5C1vzzRggZ9nqI8E6iVNkbQu0847JW31nydLavVrWi9pYJm2tZIp4iZprqQ2SR2S3vYnbutJNb9We1tqfMQ01vdp9OvpkDS/zPlCiEARKlJNJu30rr/3M3CPmd0KNACLSuz3NLDQRyNjgR+9XEMDMMHf7wSaypx/KtAuqT+wEmgws5tJI/BnJA0GHgJGmtkoYJ6ZfUYqj9BiZqPN7LvM8XYC4yVd5a8bgLVeVuMV4G6/rn3Ai2Xadh+pNEeXxWY2zsxuAmqAB81sgx+rydtyumtjT0fNB+4CRgPjJE0vc85Q5SL1FCpRqdRTP2CxpK6b/fUl9msFXpZ0DbDRzL7xfP4YoM3Lm9SQgk4pqyWdBr4nlaEeDhw1s6/996uAZ4HFpLUuVkh6j5Ru6paXmPgAmCppA/AAMJtU5XcE8Km37Qq/hlIWSHoTGEIqdthloqTZwABgMPAFsDWnOeOAXWZ2AkDSatJiN5ty9glVLgJF6C1mAT8Bt5BGwv9ZlMjM3pG0l3Qj3i5pJqms8iozm1PgHE3ZAoKSSq5v4jf+20hF5h4GniN9Qs+zlhRkfgPazOyUUnTYaWaNBdrWAmwEnicFrDE+4llCWsXvB0mvkYrd5emLi3uFHhapp9BbDAKO+/oBM0jF3/5F0rXAETNbREoDjQI+BOolDfFtBqv4muJfAnWShvnrGcBun0cYZGbvAy+QUjgAp0ilzkvZRVqi8klS0IBU3XZC1/ElDZBUaqQEgF/7QuAySfdyJij84m2qz2zeXVv2AndIqvW5mkZgd3fnDAEiUITeYwnwmKQ9pLTT7yW2aQA6JB0AbiAt+XiYNA+wQ9Ih0nxB2aUfAczsT1J1zfWS2oG/gaWkG/A2P95u0mgH0hKcLT75PfSsY3WSUlRT/Due/mkG1vix9ni789pkwDxgti9AtBxoJ6WO2jKbrgSWdk1mZ/Y/DswBPiKtLb3fzDYX6Y9QvaJ6bAghhFwxogghhJArAkUIIYRcEShCCCHkikARQgghVwSKEEIIuSJQhBBCyBWBIoQQQq5/ACldIpmh6XpHAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure()\n", "lw = 2\n", "plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.4f)' % roc_auc)\n", "plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')\n", "plt.xlim(0, 1)\n", "plt.ylim(0, 1)\n", "plt.xlabel('False Postive Ratio')\n", "plt.ylabel('True Postive Ratio(Recall)')\n", "plt.title('ROC curve')\n", "plt.legend(loc='lower right')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. 결정경계" ] }, { "cell_type": "code", "execution_count": 253, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Threshold 0.0 Train accuracy: 0.3012\n", "Threshold 0.1 Train accuracy: 0.4625\n", "Threshold 0.2 Train accuracy: 0.63\n", "Threshold 0.3 Train accuracy: 0.6975\n", "Threshold 0.4 Train accuracy: 0.735\n", "Threshold 0.5 Train accuracy: 0.7438\n", "Threshold 0.6 Train accuracy: 0.74\n", "Threshold 0.7 Train accuracy: 0.7275\n", "Threshold 0.8 Train accuracy: 0.715\n", "Threshold 0.9 Train accuracy: 0.7\n" ] } ], "source": [ "for i in np.arange(0, 1, 0.1):\n", " train_pred['y_pred'] = 0\n", " train_pred.loc[train_pred['probs'] > i, 'y_pred'] = 1\n", " acc = round(accuracy_score(train_pred['class'], train_pred['y_pred']), 4)\n", " print(\"Threshold\", round(i,1), \"Train accuracy:\",acc)" ] }, { "cell_type": "code", "execution_count": 254, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Threshold 0.0 Test accuracy: 0.295\n", "Threshold 0.1 Test accuracy: 0.455\n", "Threshold 0.2 Test accuracy: 0.625\n", "Threshold 0.3 Test accuracy: 0.675\n", "Threshold 0.4 Test accuracy: 0.725\n", "Threshold 0.5 Test accuracy: 0.735\n", "Threshold 0.6 Test accuracy: 0.735\n", "Threshold 0.7 Test accuracy: 0.72\n", "Threshold 0.8 Test accuracy: 0.705\n", "Threshold 0.9 Test accuracy: 0.71\n" ] } ], "source": [ "for i in np.arange(0, 1, 0.1):\n", " test_pred['y_pred'] = 0\n", " test_pred.loc[test_pred['probs'] > i, 'y_pred'] = 1\n", " acc = round(accuracy_score(test_pred['class'], test_pred['y_pred']), 4)\n", " print(\"Threshold\", round(i,1), \"Test accuracy:\",acc)" ] }, { "cell_type": "code", "execution_count": 261, "metadata": {}, "outputs": [], "source": [ "# Test prediction\n", "test_pred = pd.DataFrame({\n", " 'probs': drop_logistic_model.predict(sm.add_constant(test_x.drop(unnecesarries, axis=1))),\n", " 'class': test_y\n", "})\n", "\n", "test_pred['y_pred'] = 0\n", "test_pred.loc[test_pred['probs'] > 0.4, 'y_pred'] = 1" ] }, { "cell_type": "code", "execution_count": 262, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test Confusion matrix:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Actual01All
Predict
010620126
1353974
All14159200
\n", "
" ], "text/plain": [ "Actual 0 1 All\n", "Predict \n", "0 106 20 126\n", "1 35 39 74\n", "All 141 59 200" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print('Test Confusion matrix:')\n", "display(pd.crosstab(test_pred['y_pred'], test_pred['class'], rownames=['Predict'], colnames=['Actual'], margins=True))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }