{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd \n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import seaborn as sns\n", "\n", "import warnings \n", "warnings.filterwarnings(\"ignore\")\n", "\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.metrics import roc_curve\n", "from xgboost import XGBClassifier\n", "from lightgbm import LGBMClassifier\n", "from catboost import CatBoostClassifier\n", "from scipy.stats import randint as sp_randint\n", "from sklearn.model_selection import RandomizedSearchCV\n", "from mlxtend.classifier import StackingClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "from imblearn.over_sampling import SMOTE\n", "\n", "plt.style.use('fivethirtyeight')\n", "sns.set(color_codes=True)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv(\"train.csv\")\n", "test = pd.read_csv(\"test.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ID | \n", "Gender | \n", "Age | \n", "Region_Code | \n", "Occupation | \n", "Channel_Code | \n", "Vintage | \n", "Credit_Product | \n", "Avg_Account_Balance | \n", "Is_Active | \n", "Is_Lead | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "NNVBBKZB | \n", "Female | \n", "73 | \n", "RG268 | \n", "Other | \n", "X3 | \n", "43 | \n", "No | \n", "1045696 | \n", "No | \n", "0 | \n", "
1 | \n", "IDD62UNG | \n", "Female | \n", "30 | \n", "RG277 | \n", "Salaried | \n", "X1 | \n", "32 | \n", "No | \n", "581988 | \n", "No | \n", "0 | \n", "
2 | \n", "HD3DSEMC | \n", "Female | \n", "56 | \n", "RG268 | \n", "Self_Employed | \n", "X3 | \n", "26 | \n", "No | \n", "1484315 | \n", "Yes | \n", "0 | \n", "
3 | \n", "BF3NC7KV | \n", "Male | \n", "34 | \n", "RG270 | \n", "Salaried | \n", "X1 | \n", "19 | \n", "No | \n", "470454 | \n", "No | \n", "0 | \n", "
4 | \n", "TEASRWXV | \n", "Female | \n", "30 | \n", "RG282 | \n", "Salaried | \n", "X1 | \n", "33 | \n", "No | \n", "886787 | \n", "No | \n", "0 | \n", "
\n", " | ID | \n", "Age | \n", "Vintage | \n", "Avg_Account_Balance | \n", "Is_Lead | \n", "Gender_Female | \n", "Gender_Male | \n", "Region_Code_RG250 | \n", "Region_Code_RG251 | \n", "Region_Code_RG252 | \n", "... | \n", "Occupation_Self_Employed | \n", "Channel_Code_X1 | \n", "Channel_Code_X2 | \n", "Channel_Code_X3 | \n", "Channel_Code_X4 | \n", "Credit_Product_No | \n", "Credit_Product_No_Info | \n", "Credit_Product_Yes | \n", "Is_Active_No | \n", "Is_Active_Yes | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "NNVBBKZB | \n", "73 | \n", "43 | \n", "1045696 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "IDD62UNG | \n", "30 | \n", "32 | \n", "581988 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
2 | \n", "HD3DSEMC | \n", "56 | \n", "26 | \n", "1484315 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
3 | \n", "BF3NC7KV | \n", "34 | \n", "19 | \n", "470454 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
4 | \n", "TEASRWXV | \n", "30 | \n", "33 | \n", "886787 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
5 rows × 55 columns
\n", "