{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "from sklearn.datasets import load_iris\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.svm import LinearSVC\n", "from sklearn.svm import SVC\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "from sklearn.metrics import roc_curve,auc\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.pipeline import Pipeline\n", "from matplotlib import pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.metrics import f1_score\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomey
058managementmarriedtertiaryno2143yesnounknown5may2611-10unknownno
144techniciansinglesecondaryno29yesnounknown5may1511-10unknownno
233entrepreneurmarriedsecondaryno2yesyesunknown5may761-10unknownno
347blue-collarmarriedunknownno1506yesnounknown5may921-10unknownno
433unknownsingleunknownno1nonounknown5may1981-10unknownno
535managementmarriedtertiaryno231yesnounknown5may1391-10unknownno
628managementsingletertiaryno447yesyesunknown5may2171-10unknownno
742entrepreneurdivorcedtertiaryyes2yesnounknown5may3801-10unknownno
858retiredmarriedprimaryno121yesnounknown5may501-10unknownno
943techniciansinglesecondaryno593yesnounknown5may551-10unknownno
\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "0 58 management married tertiary no 2143 yes no \n", "1 44 technician single secondary no 29 yes no \n", "2 33 entrepreneur married secondary no 2 yes yes \n", "3 47 blue-collar married unknown no 1506 yes no \n", "4 33 unknown single unknown no 1 no no \n", "5 35 management married tertiary no 231 yes no \n", "6 28 management single tertiary no 447 yes yes \n", "7 42 entrepreneur divorced tertiary yes 2 yes no \n", "8 58 retired married primary no 121 yes no \n", "9 43 technician single secondary no 593 yes no \n", "\n", " contact day month duration campaign pdays previous poutcome y \n", "0 unknown 5 may 261 1 -1 0 unknown no \n", "1 unknown 5 may 151 1 -1 0 unknown no \n", "2 unknown 5 may 76 1 -1 0 unknown no \n", "3 unknown 5 may 92 1 -1 0 unknown no \n", "4 unknown 5 may 198 1 -1 0 unknown no \n", "5 unknown 5 may 139 1 -1 0 unknown no \n", "6 unknown 5 may 217 1 -1 0 unknown no \n", "7 unknown 5 may 380 1 -1 0 unknown no \n", "8 unknown 5 may 50 1 -1 0 unknown no \n", "9 unknown 5 may 55 1 -1 0 unknown no " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv('bank-full.csv',sep=';')\n", "df.head(10)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# df.columns: returns all the column names" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Data cleaning\n", "\n", "# s stores all value_counts\n", "# value_counts() counts unique objects\n", "s=[]\n", "for i in df.columns:\n", " s.append(df[str(i)].value_counts())\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Transform string to number \n", "\n", "# s[1].index is an object and should be transformed to list\n", "s_index1=list(s[1].index)\n", "# Find the object that equals to one in s[].index in every column, then replace the value with the index of it in s list.\n", "for i in s[1].index:\n", " df.loc[df[str(df.columns[1])]==str(i),str(df.columns[1])]=s_index1.index(str(i))\n", " \n", "s_index2=list(s[2].index)\n", "for i in s[2].index:\n", " df.loc[df[str(df.columns[2])]==str(i),str(df.columns[2])]=s_index2.index(str(i))\n", " \n", "s_index3=list(s[3].index)\n", "for i in s[3].index:\n", " df.loc[df[str(df.columns[3])]==str(i),str(df.columns[3])]=s_index3.index(str(i))\n", " \n", "s_index4=list(s[4].index)\n", "for i in s[4].index:\n", " df.loc[df[str(df.columns[4])]==str(i),str(df.columns[4])]=s_index4.index(str(i))\n", " \n", "s_index6=list(s[6].index)\n", "for i in s[6].index:\n", " df.loc[df[str(df.columns[6])]==str(i),str(df.columns[6])]=s_index6.index(str(i))\n", " \n", "s_index7=list(s[7].index)\n", "for i in s[7].index:\n", " df.loc[df[str(df.columns[7])]==str(i),str(df.columns[7])]=s_index7.index(str(i))\n", " \n", "s_index8=list(s[8].index)\n", "for i in s[8].index:\n", " df.loc[df[str(df.columns[8])]==str(i),str(df.columns[8])]=s_index8.index(str(i))\n", " \n", "s_index10=list(s[10].index)\n", "for i in s[10].index:\n", " df.loc[df[str(df.columns[10])]==str(i),str(df.columns[10])]=s_index10.index(str(i))\n", " \n", "s_index15=list(s[15].index)\n", "for i in s[15].index:\n", " df.loc[df[str(df.columns[15])]==str(i),str(df.columns[15])]=s_index15.index(str(i))\n", " \n", "s_index16=list(s[16].index)\n", "for i in s[16].index:\n", " df.loc[df[str(df.columns[16])]==str(i),str(df.columns[16])]=s_index16.index(str(i))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomey
00.151331-0.238980-1.0688500.372229-1.0183350.000084-1.799045-1.1907711.570163-0.156019-0.3905170.000043-0.183777-0.004109-0.109374-0.594561-1.132458
10.027171-0.0984881.013713-0.959482-1.018335-0.000144-1.799045-1.1907711.570163-0.156019-0.390517-0.001616-0.183777-0.004109-0.109374-0.594561-1.132458
2-0.0703830.603972-1.068850-0.959482-1.018335-0.000147-1.7990456.2410271.570163-0.156019-0.390517-0.002747-0.183777-0.004109-0.109374-0.594561-1.132458
30.053777-0.379472-1.0688503.035650-1.0183350.000016-1.799045-1.1907711.570163-0.156019-0.390517-0.002505-0.183777-0.004109-0.109374-0.594561-1.132458
4-0.0703831.1659401.0137133.035650-1.018335-0.0001472.251382-1.1907711.570163-0.156019-0.390517-0.000907-0.183777-0.004109-0.109374-0.594561-1.132458
5-0.052646-0.238980-1.0688500.372229-1.018335-0.000122-1.799045-1.1907711.570163-0.156019-0.390517-0.001797-0.183777-0.004109-0.109374-0.594561-1.132458
6-0.114725-0.2389801.0137130.372229-1.018335-0.000099-1.7990456.2410271.570163-0.156019-0.390517-0.000621-0.183777-0.004109-0.109374-0.594561-1.132458
70.0094340.6039723.0962760.37222955.472393-0.000147-1.799045-1.1907711.570163-0.156019-0.3905170.001837-0.183777-0.004109-0.109374-0.594561-1.132458
80.1513310.322988-1.0688501.703939-1.018335-0.000134-1.799045-1.1907711.570163-0.156019-0.390517-0.003139-0.183777-0.004109-0.109374-0.594561-1.132458
90.018303-0.0984881.013713-0.959482-1.018335-0.000083-1.799045-1.1907711.570163-0.156019-0.390517-0.003063-0.183777-0.004109-0.109374-0.594561-1.132458
100.0005660.0420043.096276-0.959482-1.018335-0.000118-1.799045-1.1907711.570163-0.156019-0.390517-0.000545-0.183777-0.004109-0.109374-0.594561-1.132458
11-0.1058570.0420041.013713-0.959482-1.018335-0.000105-1.799045-1.1907711.570163-0.156019-0.390517-0.001827-0.183777-0.004109-0.109374-0.594561-1.132458
120.106988-0.098488-1.068850-0.959482-1.018335-0.000146-1.799045-1.1907711.570163-0.156019-0.3905170.003903-0.183777-0.004109-0.109374-0.594561-1.132458
130.151331-0.098488-1.0688503.035650-1.018335-0.000139-1.799045-1.1907711.570163-0.156019-0.390517-0.002822-0.183777-0.004109-0.109374-0.594561-1.132458
140.1424620.182496-1.068850-0.959482-1.018335-0.000129-1.799045-1.1907711.570163-0.156019-0.390517-0.001269-0.183777-0.004109-0.109374-0.594561-1.132458
150.0892510.322988-1.0688501.703939-1.018335-0.000122-1.799045-1.1907711.570163-0.156019-0.3905170.001430-0.183777-0.004109-0.109374-0.594561-1.132458
160.0360400.0420041.0137133.035650-1.018335-0.000146-1.799045-1.1907711.570163-0.156019-0.390517-0.002415-0.183777-0.004109-0.109374-0.594561-1.132458
170.142462-0.379472-1.0688501.703939-1.018335-0.000141-1.799045-1.1907711.570163-0.156019-0.390517-0.003320-0.183777-0.004109-0.109374-0.594561-1.132458
180.1690680.322988-1.0688501.703939-1.018335-0.000140-1.799045-1.1907711.570163-0.156019-0.390517-0.000591-0.183777-0.004109-0.109374-0.594561-1.132458
19-0.0703830.182496-1.068850-0.959482-1.018335-0.000147-1.799045-1.1907711.570163-0.156019-0.390517-0.003078-0.183777-0.004109-0.109374-0.594561-1.132458
20-0.114725-0.379472-1.068850-0.959482-1.018335-0.000069-1.7990456.2410271.570163-0.156019-0.3905170.000058-0.183777-0.004109-0.109374-0.594561-1.132458
210.133594-0.238980-1.0688500.372229-1.018335-0.000063-1.799045-1.1907711.570163-0.156019-0.390517-0.001420-0.183777-0.004109-0.109374-0.594561-1.132458
22-0.079251-0.3794721.0137131.703939-1.018335-0.000144-1.7990456.2410271.570163-0.156019-0.390517-0.001480-0.183777-0.004109-0.109374-0.594561-1.132458
23-0.1413310.182496-1.068850-0.959482-1.018335-0.000142-1.799045-1.1907711.570163-0.156019-0.3905170.001264-0.183777-0.004109-0.109374-0.594561-1.132458
24-0.0083030.322988-1.0688501.703939-1.018335-0.000147-1.7990456.2410271.570163-0.156019-0.390517-0.001163-0.183777-0.004109-0.109374-0.594561-1.132458
250.0271710.042004-1.068850-0.959482-1.018335-0.000187-1.799045-1.1907711.570163-0.156019-0.390517-0.001299-0.183777-0.004109-0.109374-0.594561-1.132458
26-0.017171-0.2389801.0137130.372229-1.018335-0.000119-1.799045-1.1907711.570163-0.156019-0.3905170.000571-0.183777-0.004109-0.109374-0.594561-1.132458
270.0981200.603972-1.068850-0.959482-1.018335-0.000135-1.7990456.2410271.570163-0.156019-0.390517-0.001978-0.183777-0.004109-0.109374-0.594561-1.132458
280.044908-0.2389801.013713-0.959482-1.018335-0.000173-1.799045-1.1907711.570163-0.156019-0.390517-0.000048-0.079586-0.004109-0.109374-0.594561-1.132458
29-0.043777-0.0984881.013713-0.959482-1.018335-0.000118-1.7990456.2410271.570163-0.156019-0.3905170.001355-0.183777-0.004109-0.109374-0.594561-1.132458
......................................................
451810.044908-0.379472-1.068850-0.959482-1.0183350.0005952.251382-1.190771-1.120941-0.0116430.252056-0.002777-0.0795860.0077600.4560401.455210-1.132458
45182-0.061514-0.098488-1.068850-0.959482-1.018335-0.0001332.251382-1.190771-1.120941-0.0116430.2520560.002154-0.0795860.0146420.8329835.5547518.547930
451830.2577530.322988-1.0688501.703939-1.018335-0.0001122.251382-1.190771-1.120941-0.0116430.252056-0.002717-0.1837770.0055661.2099265.554751-1.132458
451840.1956740.322988-1.068850-0.959482-1.0183350.0000142.251382-1.190771-1.1209410.0027950.252056-0.001812-0.183777-0.0018150.8329835.554751-1.132458
451850.1690680.182496-1.0688500.372229-1.0183350.000312-1.799045-1.190771-1.1209410.0027950.252056-0.000877-0.1837770.0051670.6445125.5547518.547930
451860.1601991.165940-1.0688503.035650-1.0183350.0000152.251382-1.190771-1.1209410.0027950.2520560.000329-0.1837770.0063640.2675691.455210-1.132458
45187-0.0792510.1824961.013713-0.959482-1.018335-0.000021-1.799045-1.190771-1.1209410.0027950.2520560.002305-0.183777-0.004109-0.109374-0.5945618.547930
45188-0.105857-0.2389801.013713-0.959482-1.018335-0.000071-1.799045-1.190771-1.1209410.0027950.252056-0.000334-0.1837770.0508490.2675695.5547518.547930
45189-0.1413310.1824961.013713-0.959482-1.018335-0.0001252.251382-1.190771-1.1209410.0027950.252056-0.001284-0.1837770.0051670.8329831.455210-1.132458
45190-0.079251-0.379472-1.068850-0.959482-1.018335-0.0001322.251382-1.190771-1.1209410.0027950.252056-0.000787-0.1837770.0147420.4560405.5547518.547930
451910.3020960.3229883.0962760.372229-1.0183350.000264-1.799045-1.190771-1.1209410.0027950.2520560.000058-0.1837770.0142440.0790971.4552108.547930
45192-0.105857-0.2389801.0137130.372229-1.018335-0.0000642.251382-1.190771-1.1209410.0027950.252056-0.000304-0.183777-0.004109-0.109374-0.5945618.547930
45193-0.1147250.4634801.0137130.372229-1.018335-0.0001302.251382-1.190771-1.1209410.0027950.2520560.002877-0.079586-0.0007180.6445125.5547518.547930
451940.160199-0.238980-1.0688500.372229-1.018335-0.000132-1.7990456.241027-1.1209410.0027950.252056-0.001450-0.0795860.0146420.8329831.455210-1.132458
451950.2400160.322988-1.068850-0.959482-1.018335-0.0000232.251382-1.190771-1.1209410.0027950.252056-0.000696-0.1837770.0146421.0214555.5547518.547930
45196-0.1413311.0254481.013713-0.959482-1.018335-0.0001082.251382-1.190771-1.1209410.0027950.2520560.001083-0.183777-0.004109-0.109374-0.5945618.547930
45197-0.043777-0.2389801.013713-0.959482-1.0183350.000016-1.799045-1.190771-1.1209410.0027950.2520560.000178-0.183777-0.004109-0.109374-0.5945618.547930
45198-0.034908-0.238980-1.0688500.372229-1.0183350.0000072.251382-1.190771-1.1209410.0027950.2520560.001128-0.079586-0.004109-0.109374-0.594561-1.132458
45199-0.061514-0.3794721.013713-0.959482-1.0183350.000012-1.799045-1.190771-1.1209410.0027950.2520560.0136890.0246060.0488542.1522843.504981-1.132458
45200-0.026040-0.098488-1.068850-0.959482-1.018335-0.000087-1.799045-1.190771-1.1209410.0027950.2520560.0195690.128797-0.004109-0.109374-0.5945618.547930
452010.106988-0.238980-1.0688500.372229-1.018335-0.0000842.251382-1.190771-1.1209410.0172320.252056-0.000485-0.1837770.0143430.6445125.5547518.547930
45202-0.0615140.0420041.013713-0.959482-1.018335-0.0000872.251382-1.190771-1.1209410.0172320.252056-0.000515-0.183777-0.004109-0.109374-0.5945618.547930
45203-0.1590681.0254481.0137130.372229-1.018335-0.0001352.251382-1.190771-1.1209410.0172320.2520560.000118-0.183777-0.004109-0.109374-0.5945618.547930
452040.2843590.322988-1.068850-0.959482-1.0183350.0001602.251382-1.190771-1.1209410.0172320.2520560.000631-0.183777-0.0000201.3983981.4552108.547930
45205-0.141331-0.0984881.013713-0.959482-1.018335-0.0000922.2513826.241027-1.1209410.0172320.2520560.001928-0.079586-0.004109-0.109374-0.5945618.547930
452060.089251-0.098488-1.0688500.372229-1.018335-0.0000582.251382-1.190771-1.1209410.0172320.2520560.0108390.024606-0.004109-0.109374-0.5945618.547930
452070.2666220.3229883.0962761.703939-1.0183350.0000402.251382-1.190771-1.1209410.0172320.2520560.002983-0.079586-0.004109-0.109374-0.5945618.547930
452080.2754910.322988-1.068850-0.959482-1.0183350.0004702.251382-1.190771-1.1209410.0172320.2520560.0131010.2329880.0143430.4560405.5547518.547930
452090.142462-0.379472-1.068850-0.959482-1.018335-0.0000752.251382-1.1907714.2612670.0172320.2520560.0037670.128797-0.004109-0.109374-0.594561-1.132458
45210-0.0349080.603972-1.068850-0.959482-1.0183350.0001742.251382-1.190771-1.1209410.0172320.2520560.001551-0.0795860.0147421.9638133.504981-1.132458
\n", "

45211 rows × 17 columns

\n", "
" ], "text/plain": [ " age job marital education default balance housing \\\n", "0 0.151331 -0.238980 -1.068850 0.372229 -1.018335 0.000084 -1.799045 \n", "1 0.027171 -0.098488 1.013713 -0.959482 -1.018335 -0.000144 -1.799045 \n", "2 -0.070383 0.603972 -1.068850 -0.959482 -1.018335 -0.000147 -1.799045 \n", "3 0.053777 -0.379472 -1.068850 3.035650 -1.018335 0.000016 -1.799045 \n", "4 -0.070383 1.165940 1.013713 3.035650 -1.018335 -0.000147 2.251382 \n", "5 -0.052646 -0.238980 -1.068850 0.372229 -1.018335 -0.000122 -1.799045 \n", "6 -0.114725 -0.238980 1.013713 0.372229 -1.018335 -0.000099 -1.799045 \n", "7 0.009434 0.603972 3.096276 0.372229 55.472393 -0.000147 -1.799045 \n", "8 0.151331 0.322988 -1.068850 1.703939 -1.018335 -0.000134 -1.799045 \n", "9 0.018303 -0.098488 1.013713 -0.959482 -1.018335 -0.000083 -1.799045 \n", "10 0.000566 0.042004 3.096276 -0.959482 -1.018335 -0.000118 -1.799045 \n", "11 -0.105857 0.042004 1.013713 -0.959482 -1.018335 -0.000105 -1.799045 \n", "12 0.106988 -0.098488 -1.068850 -0.959482 -1.018335 -0.000146 -1.799045 \n", "13 0.151331 -0.098488 -1.068850 3.035650 -1.018335 -0.000139 -1.799045 \n", "14 0.142462 0.182496 -1.068850 -0.959482 -1.018335 -0.000129 -1.799045 \n", "15 0.089251 0.322988 -1.068850 1.703939 -1.018335 -0.000122 -1.799045 \n", "16 0.036040 0.042004 1.013713 3.035650 -1.018335 -0.000146 -1.799045 \n", "17 0.142462 -0.379472 -1.068850 1.703939 -1.018335 -0.000141 -1.799045 \n", "18 0.169068 0.322988 -1.068850 1.703939 -1.018335 -0.000140 -1.799045 \n", "19 -0.070383 0.182496 -1.068850 -0.959482 -1.018335 -0.000147 -1.799045 \n", "20 -0.114725 -0.379472 -1.068850 -0.959482 -1.018335 -0.000069 -1.799045 \n", "21 0.133594 -0.238980 -1.068850 0.372229 -1.018335 -0.000063 -1.799045 \n", "22 -0.079251 -0.379472 1.013713 1.703939 -1.018335 -0.000144 -1.799045 \n", "23 -0.141331 0.182496 -1.068850 -0.959482 -1.018335 -0.000142 -1.799045 \n", "24 -0.008303 0.322988 -1.068850 1.703939 -1.018335 -0.000147 -1.799045 \n", "25 0.027171 0.042004 -1.068850 -0.959482 -1.018335 -0.000187 -1.799045 \n", "26 -0.017171 -0.238980 1.013713 0.372229 -1.018335 -0.000119 -1.799045 \n", "27 0.098120 0.603972 -1.068850 -0.959482 -1.018335 -0.000135 -1.799045 \n", "28 0.044908 -0.238980 1.013713 -0.959482 -1.018335 -0.000173 -1.799045 \n", "29 -0.043777 -0.098488 1.013713 -0.959482 -1.018335 -0.000118 -1.799045 \n", "... ... ... ... ... ... ... ... \n", "45181 0.044908 -0.379472 -1.068850 -0.959482 -1.018335 0.000595 2.251382 \n", "45182 -0.061514 -0.098488 -1.068850 -0.959482 -1.018335 -0.000133 2.251382 \n", "45183 0.257753 0.322988 -1.068850 1.703939 -1.018335 -0.000112 2.251382 \n", "45184 0.195674 0.322988 -1.068850 -0.959482 -1.018335 0.000014 2.251382 \n", "45185 0.169068 0.182496 -1.068850 0.372229 -1.018335 0.000312 -1.799045 \n", "45186 0.160199 1.165940 -1.068850 3.035650 -1.018335 0.000015 2.251382 \n", "45187 -0.079251 0.182496 1.013713 -0.959482 -1.018335 -0.000021 -1.799045 \n", "45188 -0.105857 -0.238980 1.013713 -0.959482 -1.018335 -0.000071 -1.799045 \n", "45189 -0.141331 0.182496 1.013713 -0.959482 -1.018335 -0.000125 2.251382 \n", "45190 -0.079251 -0.379472 -1.068850 -0.959482 -1.018335 -0.000132 2.251382 \n", "45191 0.302096 0.322988 3.096276 0.372229 -1.018335 0.000264 -1.799045 \n", "45192 -0.105857 -0.238980 1.013713 0.372229 -1.018335 -0.000064 2.251382 \n", "45193 -0.114725 0.463480 1.013713 0.372229 -1.018335 -0.000130 2.251382 \n", "45194 0.160199 -0.238980 -1.068850 0.372229 -1.018335 -0.000132 -1.799045 \n", "45195 0.240016 0.322988 -1.068850 -0.959482 -1.018335 -0.000023 2.251382 \n", "45196 -0.141331 1.025448 1.013713 -0.959482 -1.018335 -0.000108 2.251382 \n", "45197 -0.043777 -0.238980 1.013713 -0.959482 -1.018335 0.000016 -1.799045 \n", "45198 -0.034908 -0.238980 -1.068850 0.372229 -1.018335 0.000007 2.251382 \n", "45199 -0.061514 -0.379472 1.013713 -0.959482 -1.018335 0.000012 -1.799045 \n", "45200 -0.026040 -0.098488 -1.068850 -0.959482 -1.018335 -0.000087 -1.799045 \n", "45201 0.106988 -0.238980 -1.068850 0.372229 -1.018335 -0.000084 2.251382 \n", "45202 -0.061514 0.042004 1.013713 -0.959482 -1.018335 -0.000087 2.251382 \n", "45203 -0.159068 1.025448 1.013713 0.372229 -1.018335 -0.000135 2.251382 \n", "45204 0.284359 0.322988 -1.068850 -0.959482 -1.018335 0.000160 2.251382 \n", "45205 -0.141331 -0.098488 1.013713 -0.959482 -1.018335 -0.000092 2.251382 \n", "45206 0.089251 -0.098488 -1.068850 0.372229 -1.018335 -0.000058 2.251382 \n", "45207 0.266622 0.322988 3.096276 1.703939 -1.018335 0.000040 2.251382 \n", "45208 0.275491 0.322988 -1.068850 -0.959482 -1.018335 0.000470 2.251382 \n", "45209 0.142462 -0.379472 -1.068850 -0.959482 -1.018335 -0.000075 2.251382 \n", "45210 -0.034908 0.603972 -1.068850 -0.959482 -1.018335 0.000174 2.251382 \n", "\n", " loan contact day month duration campaign pdays \\\n", "0 -1.190771 1.570163 -0.156019 -0.390517 0.000043 -0.183777 -0.004109 \n", "1 -1.190771 1.570163 -0.156019 -0.390517 -0.001616 -0.183777 -0.004109 \n", "2 6.241027 1.570163 -0.156019 -0.390517 -0.002747 -0.183777 -0.004109 \n", "3 -1.190771 1.570163 -0.156019 -0.390517 -0.002505 -0.183777 -0.004109 \n", "4 -1.190771 1.570163 -0.156019 -0.390517 -0.000907 -0.183777 -0.004109 \n", "5 -1.190771 1.570163 -0.156019 -0.390517 -0.001797 -0.183777 -0.004109 \n", "6 6.241027 1.570163 -0.156019 -0.390517 -0.000621 -0.183777 -0.004109 \n", "7 -1.190771 1.570163 -0.156019 -0.390517 0.001837 -0.183777 -0.004109 \n", "8 -1.190771 1.570163 -0.156019 -0.390517 -0.003139 -0.183777 -0.004109 \n", "9 -1.190771 1.570163 -0.156019 -0.390517 -0.003063 -0.183777 -0.004109 \n", "10 -1.190771 1.570163 -0.156019 -0.390517 -0.000545 -0.183777 -0.004109 \n", "11 -1.190771 1.570163 -0.156019 -0.390517 -0.001827 -0.183777 -0.004109 \n", "12 -1.190771 1.570163 -0.156019 -0.390517 0.003903 -0.183777 -0.004109 \n", "13 -1.190771 1.570163 -0.156019 -0.390517 -0.002822 -0.183777 -0.004109 \n", "14 -1.190771 1.570163 -0.156019 -0.390517 -0.001269 -0.183777 -0.004109 \n", "15 -1.190771 1.570163 -0.156019 -0.390517 0.001430 -0.183777 -0.004109 \n", "16 -1.190771 1.570163 -0.156019 -0.390517 -0.002415 -0.183777 -0.004109 \n", "17 -1.190771 1.570163 -0.156019 -0.390517 -0.003320 -0.183777 -0.004109 \n", "18 -1.190771 1.570163 -0.156019 -0.390517 -0.000591 -0.183777 -0.004109 \n", "19 -1.190771 1.570163 -0.156019 -0.390517 -0.003078 -0.183777 -0.004109 \n", "20 6.241027 1.570163 -0.156019 -0.390517 0.000058 -0.183777 -0.004109 \n", "21 -1.190771 1.570163 -0.156019 -0.390517 -0.001420 -0.183777 -0.004109 \n", "22 6.241027 1.570163 -0.156019 -0.390517 -0.001480 -0.183777 -0.004109 \n", "23 -1.190771 1.570163 -0.156019 -0.390517 0.001264 -0.183777 -0.004109 \n", "24 6.241027 1.570163 -0.156019 -0.390517 -0.001163 -0.183777 -0.004109 \n", "25 -1.190771 1.570163 -0.156019 -0.390517 -0.001299 -0.183777 -0.004109 \n", "26 -1.190771 1.570163 -0.156019 -0.390517 0.000571 -0.183777 -0.004109 \n", "27 6.241027 1.570163 -0.156019 -0.390517 -0.001978 -0.183777 -0.004109 \n", "28 -1.190771 1.570163 -0.156019 -0.390517 -0.000048 -0.079586 -0.004109 \n", "29 6.241027 1.570163 -0.156019 -0.390517 0.001355 -0.183777 -0.004109 \n", "... ... ... ... ... ... ... ... \n", "45181 -1.190771 -1.120941 -0.011643 0.252056 -0.002777 -0.079586 0.007760 \n", "45182 -1.190771 -1.120941 -0.011643 0.252056 0.002154 -0.079586 0.014642 \n", "45183 -1.190771 -1.120941 -0.011643 0.252056 -0.002717 -0.183777 0.005566 \n", "45184 -1.190771 -1.120941 0.002795 0.252056 -0.001812 -0.183777 -0.001815 \n", "45185 -1.190771 -1.120941 0.002795 0.252056 -0.000877 -0.183777 0.005167 \n", "45186 -1.190771 -1.120941 0.002795 0.252056 0.000329 -0.183777 0.006364 \n", "45187 -1.190771 -1.120941 0.002795 0.252056 0.002305 -0.183777 -0.004109 \n", "45188 -1.190771 -1.120941 0.002795 0.252056 -0.000334 -0.183777 0.050849 \n", "45189 -1.190771 -1.120941 0.002795 0.252056 -0.001284 -0.183777 0.005167 \n", "45190 -1.190771 -1.120941 0.002795 0.252056 -0.000787 -0.183777 0.014742 \n", "45191 -1.190771 -1.120941 0.002795 0.252056 0.000058 -0.183777 0.014244 \n", "45192 -1.190771 -1.120941 0.002795 0.252056 -0.000304 -0.183777 -0.004109 \n", "45193 -1.190771 -1.120941 0.002795 0.252056 0.002877 -0.079586 -0.000718 \n", "45194 6.241027 -1.120941 0.002795 0.252056 -0.001450 -0.079586 0.014642 \n", "45195 -1.190771 -1.120941 0.002795 0.252056 -0.000696 -0.183777 0.014642 \n", "45196 -1.190771 -1.120941 0.002795 0.252056 0.001083 -0.183777 -0.004109 \n", "45197 -1.190771 -1.120941 0.002795 0.252056 0.000178 -0.183777 -0.004109 \n", "45198 -1.190771 -1.120941 0.002795 0.252056 0.001128 -0.079586 -0.004109 \n", "45199 -1.190771 -1.120941 0.002795 0.252056 0.013689 0.024606 0.048854 \n", "45200 -1.190771 -1.120941 0.002795 0.252056 0.019569 0.128797 -0.004109 \n", "45201 -1.190771 -1.120941 0.017232 0.252056 -0.000485 -0.183777 0.014343 \n", "45202 -1.190771 -1.120941 0.017232 0.252056 -0.000515 -0.183777 -0.004109 \n", "45203 -1.190771 -1.120941 0.017232 0.252056 0.000118 -0.183777 -0.004109 \n", "45204 -1.190771 -1.120941 0.017232 0.252056 0.000631 -0.183777 -0.000020 \n", "45205 6.241027 -1.120941 0.017232 0.252056 0.001928 -0.079586 -0.004109 \n", "45206 -1.190771 -1.120941 0.017232 0.252056 0.010839 0.024606 -0.004109 \n", "45207 -1.190771 -1.120941 0.017232 0.252056 0.002983 -0.079586 -0.004109 \n", "45208 -1.190771 -1.120941 0.017232 0.252056 0.013101 0.232988 0.014343 \n", "45209 -1.190771 4.261267 0.017232 0.252056 0.003767 0.128797 -0.004109 \n", "45210 -1.190771 -1.120941 0.017232 0.252056 0.001551 -0.079586 0.014742 \n", "\n", " previous poutcome y \n", "0 -0.109374 -0.594561 -1.132458 \n", "1 -0.109374 -0.594561 -1.132458 \n", "2 -0.109374 -0.594561 -1.132458 \n", "3 -0.109374 -0.594561 -1.132458 \n", "4 -0.109374 -0.594561 -1.132458 \n", "5 -0.109374 -0.594561 -1.132458 \n", "6 -0.109374 -0.594561 -1.132458 \n", "7 -0.109374 -0.594561 -1.132458 \n", "8 -0.109374 -0.594561 -1.132458 \n", "9 -0.109374 -0.594561 -1.132458 \n", "10 -0.109374 -0.594561 -1.132458 \n", "11 -0.109374 -0.594561 -1.132458 \n", "12 -0.109374 -0.594561 -1.132458 \n", "13 -0.109374 -0.594561 -1.132458 \n", "14 -0.109374 -0.594561 -1.132458 \n", "15 -0.109374 -0.594561 -1.132458 \n", "16 -0.109374 -0.594561 -1.132458 \n", "17 -0.109374 -0.594561 -1.132458 \n", "18 -0.109374 -0.594561 -1.132458 \n", "19 -0.109374 -0.594561 -1.132458 \n", "20 -0.109374 -0.594561 -1.132458 \n", "21 -0.109374 -0.594561 -1.132458 \n", "22 -0.109374 -0.594561 -1.132458 \n", "23 -0.109374 -0.594561 -1.132458 \n", "24 -0.109374 -0.594561 -1.132458 \n", "25 -0.109374 -0.594561 -1.132458 \n", "26 -0.109374 -0.594561 -1.132458 \n", "27 -0.109374 -0.594561 -1.132458 \n", "28 -0.109374 -0.594561 -1.132458 \n", "29 -0.109374 -0.594561 -1.132458 \n", "... ... ... ... \n", "45181 0.456040 1.455210 -1.132458 \n", "45182 0.832983 5.554751 8.547930 \n", "45183 1.209926 5.554751 -1.132458 \n", "45184 0.832983 5.554751 -1.132458 \n", "45185 0.644512 5.554751 8.547930 \n", "45186 0.267569 1.455210 -1.132458 \n", "45187 -0.109374 -0.594561 8.547930 \n", "45188 0.267569 5.554751 8.547930 \n", "45189 0.832983 1.455210 -1.132458 \n", "45190 0.456040 5.554751 8.547930 \n", "45191 0.079097 1.455210 8.547930 \n", "45192 -0.109374 -0.594561 8.547930 \n", "45193 0.644512 5.554751 8.547930 \n", "45194 0.832983 1.455210 -1.132458 \n", "45195 1.021455 5.554751 8.547930 \n", "45196 -0.109374 -0.594561 8.547930 \n", "45197 -0.109374 -0.594561 8.547930 \n", "45198 -0.109374 -0.594561 -1.132458 \n", "45199 2.152284 3.504981 -1.132458 \n", "45200 -0.109374 -0.594561 8.547930 \n", "45201 0.644512 5.554751 8.547930 \n", "45202 -0.109374 -0.594561 8.547930 \n", "45203 -0.109374 -0.594561 8.547930 \n", "45204 1.398398 1.455210 8.547930 \n", "45205 -0.109374 -0.594561 8.547930 \n", "45206 -0.109374 -0.594561 8.547930 \n", "45207 -0.109374 -0.594561 8.547930 \n", "45208 0.456040 5.554751 8.547930 \n", "45209 -0.109374 -0.594561 -1.132458 \n", "45210 1.963813 3.504981 -1.132458 \n", "\n", "[45211 rows x 17 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop NA, Keep rows with at least 17 Non-Null values\n", "df=df.dropna(thresh=17)\n", "# fill NA\n", "df=df.fillna(method='ffill',limit=3)\n", "df.apply(lambda x:((x-x.mean())/x.var()))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# select X and y from dataframe\n", "X=df.iloc[:,0:16]\n", "# if y=df.iloc[:,16:17], we get a dataframe,otherwise we get a series. Here is a series object\n", "y=df.iloc[:,16]\n", "# 25% as training data for default, use 'test_size' argument to give a percentage to split\n", "X_train,X_test,y_train,y_test=train_test_split(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#################################################### logistic regression ####################################################" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Logistic classification results:\n", "accuracy_score: 0.8971069627532513\n", "precision_score: 0.6228748068006182\n", "recall_score 0.3048411497730711\n", "auc: 0.8887919573665001\n", "f1_score(weighted): 0.8811531211378714\n", "f1_score(macro): 0.6764948773636862\n", "f1_score(micro): 0.8971069627532513\n", "f1_score(None): 0.409344845099035\n" ] } ], "source": [ "log_reg=LogisticRegression()\n", "log_reg.fit(X_train,y_train)\n", "pred_log=log_reg.predict(X_test)\n", "# Use 'predict_proba' to get AUC, return the probability in every classification (if it is dichotomy, there are two columns)\n", "pred_proba_log=log_reg.predict_proba(X_test)\n", "\n", "print(\"Logistic classification results:\")\n", "# accuracy_score reflects the ratio of correct positive to predicted positive\n", "print(\"accuracy_score:\",accuracy_score(y_test,pred_log))\n", "# precision_score reflects the prediction precision \n", "print(\"precision_score:\",precision_score(y_test,pred_log))\n", "# recall_score reflects the ratio of correct positive to true positive\n", "print(\"recall_score\",recall_score(y_test,pred_log))\n", "print(\"auc:\",roc_auc_score(y_test,pred_proba_log[:,1]))\n", "print(\"f1_score(weighted):\",f1_score(y_test,pred_log,average='weighted'))\n", "print(\"f1_score(macro):\",f1_score(y_test,pred_log,average='macro'))\n", "print(\"f1_score(micro):\",f1_score(y_test,pred_log,average='micro'))\n", "print(\"f1_score(None):\",f1_score(y_test,pred_log))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#################################################### svm ####################################################" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.partial_fit(X, y)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.fit(X, y, **fit_params).transform(X)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "svm classification result\n", "accuracy_score: 0.8945412722286119\n", "precision_score: 0.6805555555555556\n", "recall_score 0.18532526475037822\n", "f1_score(weighted): 0.8668076170408332\n", "f1_score(macro): 0.6171758911874263\n", "f1_score(micro): 0.8945412722286119\n", "f1_score(None): 0.291319857312723\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", " \"the number of iterations.\", ConvergenceWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\pipeline.py:331: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " Xt = transform.transform(Xt)\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.svm import LinearSVC\n", "from sklearn.svm import SVC\n", "\n", "# method without pipeline \n", "# scaler=StandardScaler()\n", "# scaler.fit(df)\n", "# svm_clf=SVC(C=1,probability=True,verbose=1)\n", "\n", "# standardize by column\n", "svm_clf=Pipeline((\n", " ('scaler',StandardScaler()),\n", " ('linear_svc',LinearSVC(C=1,loss='hinge'))\n", " ))\n", "svm_clf.fit(X_train,y_train)\n", "pred_svm=svm_clf.predict(X_test)\n", "\n", "print(\"svm classification result\")\n", "print(\"accuracy_score:\",accuracy_score(y_test,pred_svm))\n", "print(\"precision_score:\",precision_score(y_test,pred_svm))\n", "print(\"recall_score\",recall_score(y_test,pred_svm))\n", "#print(\"auc:\",roc_auc_score(y_test,pred_proba_svm[:,1]))#auc\n", "print(\"f1_score(weighted):\",f1_score(y_test,pred_svm,average='weighted'))\n", "print(\"f1_score(macro):\",f1_score(y_test,pred_svm,average='macro'))\n", "print(\"f1_score(micro):\",f1_score(y_test,pred_svm,average='micro'))\n", "print(\"f1_score(None):\",f1_score(y_test,pred_svm))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#################################################### random forest ####################################################" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "random forest classification result:\n", "accuracy_score: 0.8969300185791382\n", "precision_score: 0.755700325732899\n", "recall_score 0.17549167927382753\n", "auc: 0.9073402475293071\n", "f1_score(weighted): 0.8673131007408489\n", "f1_score(macro): 0.6146501533893725\n", "f1_score(micro): 0.8969300185791382\n", "f1_score(None): 0.2848373235113566\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "\n", "print(\"random forest classification result:\")\n", "rnd_clf=RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)\n", "rnd_clf.fit(X_train,y_train)\n", "pred_rf=rnd_clf.predict(X_test)\n", "pred_proba_rf=rnd_clf.predict_proba(X_test)\n", "print(\"accuracy_score:\",accuracy_score(y_test,pred_rf))\n", "print(\"precision_score:\",precision_score(y_test,pred_rf))\n", "print(\"recall_score\",recall_score(y_test,pred_rf))\n", "print(\"auc:\",roc_auc_score(y_test,pred_proba_rf[:,1]))#auc\n", "print(\"f1_score(weighted):\",f1_score(y_test,pred_rf,average='weighted'))\n", "print(\"f1_score(macro):\",f1_score(y_test,pred_rf,average='macro'))\n", "print(\"f1_score(micro):\",f1_score(y_test,pred_rf,average='micro'))\n", "print(\"f1_score(None):\",f1_score(y_test,pred_rf))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#################################################### stacking classifier ####################################################" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn import model_selection\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.ensemble import RandomForestClassifier\n", "from mlxtend.classifier import StackingClassifier\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stacking:\n", "\n", "3-fold cross validation:\n", "\n", "Accuracy:0.81(+/- 0.07) [KNN]\n", "Auc:0.59(+/- 0.00) [KNN]\n", "f1:0.27(+/- 0.01) [KNN]\n", "f1_micro:0.81(+/- 0.07) [KNN]\n", "f1_macro:0.58(+/- 0.03) [KNN]\n", "f1_weighted:0.81(+/- 0.04) [KNN]\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy:0.65(+/- 0.26) [Random Forest]\n", "Auc:0.58(+/- 0.06) [Random Forest]\n", "f1:0.17(+/- 0.04) [Random Forest]\n", "f1_micro:0.65(+/- 0.26) [Random Forest]\n", "f1_macro:0.44(+/- 0.12) [Random Forest]\n", "f1_weighted:0.65(+/- 0.24) [Random Forest]\n", "\n", "Accuracy:0.79(+/- 0.16) [Naive Bayes]\n", "Auc:0.78(+/- 0.06) [Naive Bayes]\n", "f1:0.42(+/- 0.08) [Naive Bayes]\n", "f1_micro:0.79(+/- 0.16) [Naive Bayes]\n", "f1_macro:0.64(+/- 0.10) [Naive Bayes]\n", "f1_weighted:0.81(+/- 0.12) [Naive Bayes]\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy:0.81(+/- 0.07) [StackingClassifier]\n", "Auc:0.68(+/- 0.02) [StackingClassifier]\n", "f1:0.27(+/- 0.01) [StackingClassifier]\n", "f1_micro:0.81(+/- 0.07) [StackingClassifier]\n", "f1_macro:0.58(+/- 0.03) [StackingClassifier]\n", "f1_weighted:0.81(+/- 0.04) [StackingClassifier]\n", "\n" ] } ], "source": [ "print(\"Stacking:\\n\")\n", "clf1=KNeighborsClassifier(n_neighbors=1)\n", "clr2=RandomForestClassifier(random_state=1)\n", "clf3=GaussianNB()\n", "lr=LogisticRegression()#logistics\n", "sclf=StackingClassifier(classifiers=[clf1,clr2,clf3],meta_classifier=lr)\n", "\n", "print('3-fold cross validation:\\n')\n", "\n", "for clf,label in zip([clf1,clr2,clf3,sclf],\n", " ['KNN',\n", " 'Random Forest',\n", " 'Naive Bayes',\n", " 'StackingClassifier']):\n", " scores_acc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')\n", " scores_auc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='roc_auc')\n", " scores_f1=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1')\n", " scores_f1_macro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_macro') \n", " scores_f1_micro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_micro')\n", " scores_f1_weighted=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_weighted')\n", " print(\"Accuracy:%0.2f(+/- %0.2f) [%s]\\nAuc:%0.2f(+/- %0.2f) [%s]\\nf1:%0.2f(+/- %0.2f) [%s]\\nf1_micro:%0.2f(+/- %0.2f) [%s]\\nf1_macro:%0.2f(+/- %0.2f) [%s]\\nf1_weighted:%0.2f(+/- %0.2f) [%s]\\n\"\n", " %(scores_acc.mean(),scores_acc.std(),label,\n", " scores_auc.mean(),scores_auc.std(),label,\n", " scores_f1.mean(),scores_f1.std(),label,\n", " scores_f1_micro.mean(),scores_f1_micro.std(),label,\n", " scores_f1_macro.mean(),scores_f1_macro.std(),label,\n", " scores_f1_weighted.mean(),scores_f1_weighted.std(),label\n", " ))\n", " \n", " \"\"\"\n", "print(\"Normal stacking:\\n\")\n", "import numpy as np\n", "\n", "from sklearn import model_selection\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.ensemble import RandomForestClassifier\n", "from mlxtend.classifier import EnsembleVoteClassifier\n", "#initalizing classifiers\n", "clf1=LogisticRegression(random_state=0)\n", "clf2=RandomForestClassifier(random_state=0)\n", "clf3=SVC(random_state=0,probability=True)\n", "eclf=EnsembleVoteClassifier(clfs=[clf1,clf2,clf3],weights=[2,1,1],voting='soft')\n", "\n", "#loading some example data\n", "for clf,lab in zip([clf1,clf2,clf3,eclf],\n", " ['Logistic Regression','Random Forest','Naive Bayes','Ensemble']):\n", " scores_acc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='accuracy')\n", " scores_auc=model_selection.cross_val_score(clf,X,y,cv=3,scoring='roc_auc')\n", " scores_f1=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1')\n", " scores_f1_macro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_macro') \n", " scores_f1_micro=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_micro')\n", " scores_f1_weighted=model_selection.cross_val_score(clf,X,y,cv=3,scoring='f1_weighted')\n", " print(\"Accuracy:%0.2f(+/- %0.2f) [%s]\\nAuc:%0.2f(+/- %0.2f) [%s]\\nf1:%0.2f(+/- %0.2f) [%s]\\nf1_micro:%0.2f(+/- %0.2f) [%s]\\nf1_macro:%0.2f(+/- %0.2f) [%s]\\nf1_weighted:%0.2f(+/- %0.2f) [%s]\\n\"\n", " %(scores_acc.mean(),scores_acc.std(),lab,\n", " scores_auc.mean(),scores_auc.std(),lab,\n", " scores_f1.mean(),scores_f1.std(),lab,\n", " scores_f1_micro.mean(),scores_f1_micro.std(),lab,\n", " scores_f1_macro.mean(),scores_f1_macro.std(),lab,\n", " scores_f1_weighted.mean(),scores_f1_weighted.std(),lab\n", " ))\"\"\"\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#################################################### ROC Curve ####################################################" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "from sklearn.metrics import roc_curve,auc\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.pipeline import Pipeline\n", "from matplotlib import pyplot as plt" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ROC curve\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.partial_fit(X, y)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.fit(X, y, **fit_params).transform(X)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\pipeline.py:381: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " Xt = transform.transform(Xt)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.partial_fit(X, y)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\base.py:465: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " return self.fit(X, y, **fit_params).transform(X)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "C:\\Users\\dizhe\\Anaconda3\\envs\\mcm\\lib\\site-packages\\sklearn\\pipeline.py:381: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n", " Xt = transform.transform(Xt)\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# roc curve\n", "print('ROC curve')\n", "kfold=StratifiedKFold(n_splits=2,random_state=1)\n", "'''pipeline,turtle or list is outermost,inside must be turtle'''\n", "pipe_lr=Pipeline([('scl',StandardScaler()),('pca',PCA(n_components=1)),('clf',LogisticRegression(random_state=1))])\n", "for i, (train,test) in enumerate(kfold.split(X_train,y_train)): \n", " prob=pipe_lr.fit(X_train.iloc[train],y_train.iloc[train]).predict_proba(X_train.iloc[test])\n", " fpr,tqr,thresholds=roc_curve(y_train.iloc[test],prob[:,1],pos_label=1)\n", " roc_auc=auc(fpr,tqr)\n", " plt.plot(fpr,tqr,label='ROC fold:{},auc:{}'.format(i,roc_auc))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }