{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os, math, subprocess\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from IPython.display import display\n", "from lib_feature_engineering import *\n", "\n", "# some settings for displaying Pandas results\n", "pd.set_option('display.width', 2000)\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.precision', 4)\n", "pd.set_option('display.max_colwidth', -1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load train and test data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('(rows, columns)', (307511, 122))\n", "First 5 rows\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5351000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0188-9461-637-3648.0-2120NaN110110Laborers1.022WEDNESDAY10000000Business Entity Type 30.08300.26290.13940.02470.03690.97220.61920.01430.000.06900.08330.12500.03690.02020.01900.00000.00000.02520.03830.97220.63410.01440.00000.06900.08330.12500.03770.0220.01980.00.00.02500.03690.97220.62430.01440.000.06900.08330.12500.03750.02050.01930.00000.00reg oper accountblock of flats0.0149Stone, brickNo2.02.02.02.0-1134.0010000000000000000000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.51129500.0FamilyState servantHigher educationMarriedHouse / apartment0.0035-16765-1188-1186.0-291NaN110110Core staff2.011MONDAY11000000School0.31130.6222NaN0.09590.05290.98510.79600.06050.080.03450.29170.33330.01300.07730.05490.00390.00980.09240.05380.98510.80400.04970.08060.03450.29170.33330.01280.0790.05540.00.00.09680.05290.98510.79870.06080.080.03450.29170.33330.01320.07870.05580.00390.01reg oper accountblock of flats0.0714BlockNo1.00.01.00.0-828.0010000000000000000000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0135000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0100-19046-225-4260.0-253126.0111110Laborers1.022MONDAY9000000GovernmentNaN0.55590.7296NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-815.0000000000000000000000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5297000.0UnaccompaniedWorkingSecondary / secondary specialCivil marriageHouse / apartment0.0080-19005-3039-9833.0-2437NaN110100Laborers2.022WEDNESDAY17000000Business Entity Type 3NaN0.6504NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.00.02.00.0-617.001000000000000000000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5513000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0287-19932-3038-4311.0-3458NaN110100Core staff1.022THURSDAY11000011ReligionNaN0.3227NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-1106.0000000100000000000000.00.00.00.00.00.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0188 -9461 -637 -3648.0 -2120 NaN 1 1 0 1 1 0 Laborers 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 0.0830 0.2629 0.1394 0.0247 0.0369 0.9722 0.6192 0.0143 0.00 0.0690 0.0833 0.1250 0.0369 0.0202 0.0190 0.0000 0.0000 0.0252 0.0383 0.9722 0.6341 0.0144 0.0000 0.0690 0.0833 0.1250 0.0377 0.022 0.0198 0.0 0.0 0.0250 0.0369 0.9722 0.6243 0.0144 0.00 0.0690 0.0833 0.1250 0.0375 0.0205 0.0193 0.0000 0.00 reg oper account block of flats 0.0149 Stone, brick No 2.0 2.0 2.0 2.0 -1134.0 0 1 0 0 0 0 0 0 0 0 \n", "1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.0035 -16765 -1188 -1186.0 -291 NaN 1 1 0 1 1 0 Core staff 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 0.3113 0.6222 NaN 0.0959 0.0529 0.9851 0.7960 0.0605 0.08 0.0345 0.2917 0.3333 0.0130 0.0773 0.0549 0.0039 0.0098 0.0924 0.0538 0.9851 0.8040 0.0497 0.0806 0.0345 0.2917 0.3333 0.0128 0.079 0.0554 0.0 0.0 0.0968 0.0529 0.9851 0.7987 0.0608 0.08 0.0345 0.2917 0.3333 0.0132 0.0787 0.0558 0.0039 0.01 reg oper account block of flats 0.0714 Block No 1.0 0.0 1.0 0.0 -828.0 0 1 0 0 0 0 0 0 0 0 \n", "2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0100 -19046 -225 -4260.0 -2531 26.0 1 1 1 1 1 0 Laborers 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government NaN 0.5559 0.7296 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0 0 0 0 0 0 0 0 0 0 \n", "3 100006 0 Cash loans F N Y 0 135000.0 312682.5 29686.5 297000.0 Unaccompanied Working Secondary / secondary special Civil marriage House / apartment 0.0080 -19005 -3039 -9833.0 -2437 NaN 1 1 0 1 0 0 Laborers 2.0 2 2 WEDNESDAY 17 0 0 0 0 0 0 Business Entity Type 3 NaN 0.6504 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 2.0 0.0 -617.0 0 1 0 0 0 0 0 0 0 0 \n", "4 100007 0 Cash loans M N Y 0 121500.0 513000.0 21865.5 513000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0287 -19932 -3038 -4311.0 -3458 NaN 1 1 0 1 0 0 Core staff 1.0 2 2 THURSDAY 11 0 0 0 0 1 1 Religion NaN 0.3227 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -1106.0 0 0 0 0 0 0 1 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "1 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN \n", "4 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "('(rows, columns)', (48744, 121))\n", "First 5 rows\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
0100001Cash loansFNY0135000.0568800.020560.5450000.0UnaccompaniedWorkingHigher educationMarriedHouse / apartment0.0188-19241-2329-5170.0-812NaN110101NaN2.022TUESDAY18000000Kindergarten0.75260.78970.15950.06600.05900.9732NaNNaNNaN0.13790.125NaNNaNNaN0.0505NaNNaN0.06720.06120.9732NaNNaNNaN0.13790.125NaNNaNNaN0.0526NaNNaN0.06660.05900.9732NaNNaNNaN0.13790.125NaNNaNNaN0.0514NaNNaNNaNblock of flats0.0392Stone, brickNo0.00.00.00.0-1740.0010000000000000000000.00.00.00.00.00.0
1100005Cash loansMNY099000.0222768.017370.0180000.0UnaccompaniedWorkingSecondary / secondary specialMarriedHouse / apartment0.0358-18064-4469-9118.0-1623NaN110100Low-skill Laborers2.022FRIDAY9000000Self-employed0.56500.29170.4330NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.00.0010000000000000000000.00.00.00.00.03.0
2100013Cash loansMYY0202500.0663264.069777.0630000.0NaNWorkingHigher educationMarriedHouse / apartment0.0191-20038-4458-2175.0-35035.0110100Drivers2.022MONDAY14000000Transport: type 3NaN0.69980.6110NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-856.0000000100000000000000.00.00.00.01.04.0
3100028Cash loansFNY2315000.01575000.049018.51575000.0UnaccompaniedWorkingSecondary / secondary specialMarriedHouse / apartment0.0264-13976-1866-2000.0-4208NaN110110Sales staff4.022WEDNESDAY11000000Business Entity Type 30.52570.50970.61270.30520.19740.99700.95920.11650.320.27590.3750.04170.20420.24040.36730.03860.080.31090.20490.99700.96080.11760.32220.27590.3750.04170.20890.26260.38270.03890.08470.30810.19740.99700.95970.11730.320.27590.3750.04170.20780.24460.37390.03880.0817reg oper accountblock of flats0.3700PanelNo0.00.00.00.0-1805.0010000000000000000000.00.00.00.00.03.0
4100038Cash loansMYN1180000.0625500.032067.0625500.0UnaccompaniedWorkingSecondary / secondary specialMarriedHouse / apartment0.0100-13040-2191-4000.0-426216.0111100NaN3.022FRIDAY5000011Business Entity Type 30.20210.4257NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-821.001000000000000000000NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " SK_ID_CURR NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 \\\n", "0 100001 Cash loans F N Y 0 135000.0 568800.0 20560.5 450000.0 Unaccompanied Working Higher education Married House / apartment 0.0188 -19241 -2329 -5170.0 -812 NaN 1 1 0 1 0 1 NaN 2.0 2 2 TUESDAY 18 0 0 0 0 0 0 Kindergarten 0.7526 0.7897 0.1595 0.0660 0.0590 0.9732 NaN NaN NaN 0.1379 0.125 NaN NaN NaN 0.0505 NaN NaN 0.0672 0.0612 0.9732 NaN NaN NaN 0.1379 0.125 NaN NaN NaN 0.0526 NaN NaN 0.0666 0.0590 0.9732 NaN NaN NaN 0.1379 0.125 NaN NaN NaN 0.0514 NaN NaN NaN block of flats 0.0392 Stone, brick No 0.0 0.0 0.0 0.0 -1740.0 0 1 0 0 0 0 0 0 0 0 0 \n", "1 100005 Cash loans M N Y 0 99000.0 222768.0 17370.0 180000.0 Unaccompanied Working Secondary / secondary special Married House / apartment 0.0358 -18064 -4469 -9118.0 -1623 NaN 1 1 0 1 0 0 Low-skill Laborers 2.0 2 2 FRIDAY 9 0 0 0 0 0 0 Self-employed 0.5650 0.2917 0.4330 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 0.0 0 1 0 0 0 0 0 0 0 0 0 \n", "2 100013 Cash loans M Y Y 0 202500.0 663264.0 69777.0 630000.0 NaN Working Higher education Married House / apartment 0.0191 -20038 -4458 -2175.0 -3503 5.0 1 1 0 1 0 0 Drivers 2.0 2 2 MONDAY 14 0 0 0 0 0 0 Transport: type 3 NaN 0.6998 0.6110 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -856.0 0 0 0 0 0 0 1 0 0 0 0 \n", "3 100028 Cash loans F N Y 2 315000.0 1575000.0 49018.5 1575000.0 Unaccompanied Working Secondary / secondary special Married House / apartment 0.0264 -13976 -1866 -2000.0 -4208 NaN 1 1 0 1 1 0 Sales staff 4.0 2 2 WEDNESDAY 11 0 0 0 0 0 0 Business Entity Type 3 0.5257 0.5097 0.6127 0.3052 0.1974 0.9970 0.9592 0.1165 0.32 0.2759 0.375 0.0417 0.2042 0.2404 0.3673 0.0386 0.08 0.3109 0.2049 0.9970 0.9608 0.1176 0.3222 0.2759 0.375 0.0417 0.2089 0.2626 0.3827 0.0389 0.0847 0.3081 0.1974 0.9970 0.9597 0.1173 0.32 0.2759 0.375 0.0417 0.2078 0.2446 0.3739 0.0388 0.0817 reg oper account block of flats 0.3700 Panel No 0.0 0.0 0.0 0.0 -1805.0 0 1 0 0 0 0 0 0 0 0 0 \n", "4 100038 Cash loans M Y N 1 180000.0 625500.0 32067.0 625500.0 Unaccompanied Working Secondary / secondary special Married House / apartment 0.0100 -13040 -2191 -4000.0 -4262 16.0 1 1 1 1 0 0 NaN 3.0 2 2 FRIDAY 5 0 0 0 0 1 1 Business Entity Type 3 0.2021 0.4257 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -821.0 0 1 0 0 0 0 0 0 0 0 0 \n", "\n", " FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 3.0 \n", "2 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 1.0 4.0 \n", "3 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 3.0 \n", "4 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesub_typen_distinctn_missn_negativen_zeros25%50%75%countmaxmeanminstdsample_0sample_1sample_2sample_3sample_4sample_5sample_6sample_7sample_8sample_9
0SK_ID_CURRint64307511 (100.00%)0 (0.00%)0 (0.00%)0 (0.00%)189145.5000278202.0000367142.5000307511.04.5626e+052.7818e+051.0000e+05102790.1753326682414578432657346257169928228494305986450918393627121604
1TARGETint642 (0.00%)0 (0.00%)0 (0.00%)282686 (91.93%)0.00000.00000.0000307511.01.0000e+008.0729e-020.0000e+000.27240100100100
2NAME_CONTRACT_TYPEobject2 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000Cash loansCash loansRevolving loansCash loansCash loansCash loansCash loansCash loansCash loansCash loans
3CODE_GENDERobject3 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000MMMMMFMFFF
4FLAG_OWN_CARobject2 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000YYNNNNNYNN
5FLAG_OWN_REALTYobject2 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000YNYYYYYNYY
6CNT_CHILDRENint6415 (0.00%)0 (0.00%)0 (0.00%)215371 (70.04%)0.00000.00001.0000307511.01.9000e+014.1705e-010.0000e+000.72210100000120
7AMT_INCOME_TOTALfloat642548 (0.83%)0 (0.00%)0 (0.00%)0 (0.00%)112500.0000147150.0000202500.0000307511.01.1700e+081.6880e+052.5650e+04237123.1463166500.0450000.0157500.0135000.0202500.0117000.090000.0180000.0157500.090000.0
8AMT_CREDITfloat645603 (1.82%)0 (0.00%)0 (0.00%)0 (0.00%)270000.0000513531.0000808650.0000307511.04.0500e+065.9903e+054.5000e+04402490.7770254700.01381113.0450000.01764000.0203760.0254700.0538704.0630000.0679500.0755190.0
9AMT_ANNUITYfloat6413672 (4.45%)12 (0.00%)0 (0.00%)0 (0.00%)16524.000024903.000034596.0000307499.02.5803e+052.7109e+041.6155e+0314493.737325191.039712.522500.048510.016227.025191.026046.023274.036202.536459.0
10AMT_GOODS_PRICEfloat641002 (0.33%)278 (0.09%)0 (0.00%)0 (0.00%)238500.0000450000.0000679500.0000307233.04.0500e+065.3840e+054.0500e+04369446.4605225000.01206000.0450000.01764000.0180000.0225000.0481500.0630000.0679500.0675000.0
11NAME_TYPE_SUITEobject7 (0.00%)1292 (0.42%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000UnaccompaniedUnaccompaniedUnaccompaniedUnaccompaniedUnaccompaniedUnaccompaniedUnaccompaniedUnaccompaniedFamilyFamily
12NAME_INCOME_TYPEobject8 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000State servantWorkingWorkingCommercial associateWorkingPensionerWorkingWorkingWorkingWorking
13NAME_EDUCATION_TYPEobject5 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000Higher educationIncomplete higherSecondary / secondary specialSecondary / secondary specialSecondary / secondary specialSecondary / secondary specialSecondary / secondary specialSecondary / secondary specialSecondary / secondary specialSecondary / secondary special
14NAME_FAMILY_STATUSobject6 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000MarriedMarriedMarriedMarriedSeparatedMarriedSingle / not marriedMarriedMarriedMarried
15NAME_HOUSING_TYPEobject6 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000House / apartmentHouse / apartmentHouse / apartmentHouse / apartmentHouse / apartmentHouse / apartmentHouse / apartmentHouse / apartmentHouse / apartmentWith parents
16REGION_POPULATION_RELATIVEfloat6481 (0.03%)0 (0.00%)0 (0.00%)0 (0.00%)0.01000.01880.0287307511.07.2508e-022.0868e-022.9000e-040.01380.024610.0186340.0152210.0080190.01050.0088659999999999990.0186340.01050.018850.030755
17DAYS_BIRTHint6417460 (5.68%)0 (0.00%)307511 (100.00%)0 (0.00%)-19682.0000-15750.0000-12413.0000307511.0-7.4890e+03-1.6037e+04-2.5229e+044363.9886-21882-14026-9905-11946-10493-24264-15251-14263-11179-10477
18DAYS_EMPLOYEDint6412574 (4.09%)0 (0.00%)252135 (81.99%)2 (0.00%)-2760.0000-1213.0000-289.0000307511.03.6524e+056.3815e+04-1.7912e+04141275.7665-2987-270-2691-1526-656365243-1984-481-687-1400
19DAYS_REGISTRATIONfloat6415688 (5.10%)0 (0.00%)307431 (99.97%)80 (0.03%)-7479.5000-4504.0000-2010.0000307511.00.0000e+00-4.9861e+03-2.4672e+043522.8863-11125.0-1625.0-4725.0-1513.0-2389.0-87.0-6933.0-1315.0-1491.0-5034.0
20DAYS_ID_PUBLISHint646168 (2.01%)0 (0.00%)307495 (99.99%)16 (0.01%)-4299.0000-3254.0000-1720.0000307511.00.0000e+00-2.9942e+03-7.1970e+031509.4504-3984-4768-2549-4392-2526-4388-4396-4830-2742-1625
21OWN_CAR_AGEfloat6462 (0.02%)202929 (65.99%)0 (0.00%)2134 (0.69%)5.00009.000015.0000104582.09.1000e+011.2061e+010.0000e+0011.94484.01.0NaNNaNNaNNaNNaN18.0NaNNaN
22FLAG_MOBILint642 (0.00%)0 (0.00%)0 (0.00%)1 (0.00%)1.00001.00001.0000307511.01.0000e+001.0000e+000.0000e+000.00181111111111
23FLAG_EMP_PHONEint642 (0.00%)0 (0.00%)0 (0.00%)55386 (18.01%)1.00001.00001.0000307511.01.0000e+008.1989e-010.0000e+000.38431111101111
24FLAG_WORK_PHONEint642 (0.00%)0 (0.00%)0 (0.00%)246203 (80.06%)0.00000.00000.0000307511.01.0000e+001.9937e-010.0000e+000.39951000100011
25FLAG_CONT_MOBILEint642 (0.00%)0 (0.00%)0 (0.00%)574 (0.19%)1.00001.00001.0000307511.01.0000e+009.9813e-010.0000e+000.04321111111111
26FLAG_PHONEint642 (0.00%)0 (0.00%)0 (0.00%)221080 (71.89%)0.00000.00001.0000307511.01.0000e+002.8107e-010.0000e+000.44951000100010
27FLAG_EMAILint642 (0.00%)0 (0.00%)0 (0.00%)290069 (94.33%)0.00000.00000.0000307511.01.0000e+005.6720e-020.0000e+000.23130000000000
28OCCUPATION_TYPEobject18 (0.01%)96391 (31.35%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000NaNDriversLaborersNaNLaborersNaNLaborersNaNManagersLaborers
29CNT_FAM_MEMBERSfloat6417 (0.01%)2 (0.00%)0 (0.00%)0 (0.00%)2.00002.00003.0000307509.02.0000e+012.1527e+001.0000e+000.91072.03.02.02.01.02.01.03.04.02.0
30REGION_RATING_CLIENTint643 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)2.00002.00002.0000307511.03.0000e+002.0525e+001.0000e+000.50902222322322
31REGION_RATING_CLIENT_W_CITYint643 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)2.00002.00002.0000307511.03.0000e+002.0315e+001.0000e+000.50272222322322
32WEEKDAY_APPR_PROCESS_STARTobject7 (0.00%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000THURSDAYMONDAYWEDNESDAYFRIDAYWEDNESDAYMONDAYFRIDAYMONDAYTUESDAYFRIDAY
33HOUR_APPR_PROCESS_STARTint6424 (0.01%)0 (0.00%)0 (0.00%)40 (0.01%)10.000012.000014.0000307511.02.3000e+011.2063e+010.0000e+003.2658171091015179131213
34REG_REGION_NOT_LIVE_REGIONint642 (0.00%)0 (0.00%)0 (0.00%)302854 (98.49%)0.00000.00000.0000307511.01.0000e+001.5144e-020.0000e+000.12210000000000
35REG_REGION_NOT_WORK_REGIONint642 (0.00%)0 (0.00%)0 (0.00%)291899 (94.92%)0.00000.00000.0000307511.01.0000e+005.0769e-020.0000e+000.21950001100000
36LIVE_REGION_NOT_WORK_REGIONint642 (0.00%)0 (0.00%)0 (0.00%)295008 (95.93%)0.00000.00000.0000307511.01.0000e+004.0659e-020.0000e+000.19750001100000
37REG_CITY_NOT_LIVE_CITYint642 (0.00%)0 (0.00%)0 (0.00%)283472 (92.18%)0.00000.00000.0000307511.01.0000e+007.8173e-020.0000e+000.26840010000000
38REG_CITY_NOT_WORK_CITYint642 (0.00%)0 (0.00%)0 (0.00%)236644 (76.95%)0.00000.00000.0000307511.01.0000e+002.3045e-010.0000e+000.42110011100000
39LIVE_CITY_NOT_WORK_CITYint642 (0.00%)0 (0.00%)0 (0.00%)252296 (82.04%)0.00000.00000.0000307511.01.0000e+001.7955e-010.0000e+000.38380001100000
40ORGANIZATION_TYPEobject58 (0.02%)0 (0.00%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000Business Entity Type 2Business Entity Type 3Business Entity Type 2Business Entity Type 3Self-employedXNASelf-employedBusiness Entity Type 3Trade: type 2Medicine
41EXT_SOURCE_1float64114584 (37.26%)173378 (56.38%)0 (0.00%)0 (0.00%)0.33400.50600.6751134133.09.6269e-015.0213e-011.4568e-020.2111NaN0.3569777836552319NaN0.4949180654446097NaNNaNNaN0.53925767633438390.6575317720639661NaN
42EXT_SOURCE_2float64119831 (38.97%)660 (0.21%)0 (0.00%)0 (0.00%)0.39250.56600.6636306851.08.5500e-015.1439e-018.1736e-080.19110.57847565315913290.51527710002925510.42481610584636410.66703524209102670.09624612181012040.394738255618091340.78811380832801890.55798413544152060.59165616911752960.6675741251599911
43EXT_SOURCE_3float64814 (0.26%)60965 (19.83%)0 (0.00%)0 (0.00%)0.37060.53530.6691246546.08.9601e-015.1085e-015.2727e-040.19480.59719242683371280.52269731728211120.31547215492577346NaN0.41709966825220970.6058362647264226NaN0.74902170484633910.49566582913972970.7394117535524816
44APARTMENTS_AVGfloat642339 (0.76%)156061 (50.75%)0 (0.00%)751 (0.24%)0.05770.08760.1485151450.01.0000e+001.1744e-010.0000e+000.1082NaN0.1485NaNNaNNaN0.22160.06290.08250.10310.1825
45BASEMENTAREA_AVGfloat643780 (1.23%)179943 (58.52%)0 (0.00%)14745 (4.79%)0.04420.07630.1122127568.01.0000e+008.8442e-020.0000e+000.0824NaN0.0991NaNNaNNaN0.07760.07560.0788NaN0.1322
46YEARS_BEGINEXPLUATATION_AVGfloat64285 (0.09%)150007 (48.78%)0 (0.00%)514 (0.17%)0.97670.98160.9866157504.01.0000e+009.7773e-010.0000e+000.05920.98560.9871NaNNaNNaN0.98260.98310.97860.97710.9861
47YEARS_BUILD_AVGfloat64149 (0.05%)204488 (66.50%)0 (0.00%)102 (0.03%)0.68720.75520.8232103023.01.0000e+007.5247e-010.0000e+000.1133NaN0.8232NaNNaNNaNNaN0.76880.70760.68720.8096
48COMMONAREA_AVGfloat643181 (1.03%)214865 (69.87%)0 (0.00%)8442 (2.75%)0.00780.02110.051592646.01.0000e+004.4621e-020.0000e+000.0760NaN0.0889NaNNaNNaNNaNNaN0.0079NaN0.0378
49ELEVATORS_AVGfloat64257 (0.08%)163891 (53.30%)0 (0.00%)85718 (27.87%)0.00000.00000.1200143620.01.0000e+007.8942e-020.0000e+000.13460.080.16NaNNaNNaN0.080.00.0NaN0.2
50ENTRANCES_AVGfloat64285 (0.09%)154828 (50.35%)0 (0.00%)323 (0.11%)0.06900.13790.2069152683.01.0000e+001.4972e-010.0000e+000.10000.0690.1379NaNNaNNaN0.03450.13790.13790.20690.1724
51FLOORSMAX_AVGfloat64403 (0.13%)153020 (49.76%)0 (0.00%)2938 (0.96%)0.16670.16670.3333154491.01.0000e+002.2628e-010.0000e+000.14460.33330.3333NaNNaNNaN0.33330.16670.16670.16670.3333
52FLOORSMIN_AVGfloat64305 (0.10%)208642 (67.85%)0 (0.00%)2320 (0.75%)0.08330.20830.375098869.01.0000e+002.3189e-010.0000e+000.1614NaN0.375NaNNaNNaNNaNNaN0.2083NaN0.375
53LANDAREA_AVGfloat643527 (1.15%)182590 (59.38%)0 (0.00%)15600 (5.07%)0.01870.04810.0856124921.01.0000e+006.6333e-020.0000e+000.0812NaN0.1127NaNNaNNaN0.09110.01510.0203NaN0.1238
54LIVINGAPARTMENTS_AVGfloat641868 (0.61%)210199 (68.35%)0 (0.00%)418 (0.14%)0.05040.07560.121097312.01.0000e+001.0077e-010.0000e+000.0926NaN0.121NaNNaNNaNNaN0.05040.06720.08070.1488
55LIVINGAREA_AVGfloat645199 (1.69%)154350 (50.19%)0 (0.00%)284 (0.09%)0.04530.07450.1299153161.01.0000e+001.0740e-010.0000e+000.11060.07390.0915NaNNaNNaN0.05820.05560.07030.08410.1824
56NONLIVINGAPARTMENTS_AVGfloat64386 (0.13%)213514 (69.43%)0 (0.00%)54549 (17.74%)0.00000.00000.003993997.01.0000e+008.8087e-030.0000e+000.0477NaN0.0NaNNaNNaNNaN0.00390.00.01540.0
57NONLIVINGAREA_AVGfloat643290 (1.07%)169682 (55.18%)0 (0.00%)58735 (19.10%)0.00000.00360.0277137829.01.0000e+002.8358e-020.0000e+000.0695NaN0.0NaNNaNNaN0.22420.01880.00.01280.0022
58APARTMENTS_MODEfloat64760 (0.25%)156061 (50.75%)0 (0.00%)976 (0.32%)0.05250.08400.1439151450.01.0000e+001.1423e-010.0000e+000.1079NaN0.1513NaNNaNNaN0.22580.06410.0840.1050.1859
59BASEMENTAREA_MODEfloat643841 (1.25%)179943 (58.52%)0 (0.00%)16598 (5.40%)0.04070.07460.1124127568.01.0000e+008.7543e-020.0000e+000.0843NaN0.1028NaNNaNNaN0.08060.07850.0818NaN0.1372
60YEARS_BEGINEXPLUATATION_MODEfloat64221 (0.07%)150007 (48.78%)0 (0.00%)142 (0.05%)0.97670.98160.9866157504.01.0000e+009.7707e-010.0000e+000.06460.98560.9871NaNNaNNaN0.98260.98310.97860.97720.9861
61YEARS_BUILD_MODEfloat64154 (0.05%)204488 (66.50%)0 (0.00%)103 (0.03%)0.69940.76480.8236103023.01.0000e+007.5964e-010.0000e+000.1101NaN0.8301NaNNaNNaNNaN0.77790.71900000000000010.69940.8171
62COMMONAREA_MODEfloat643128 (1.02%)214865 (69.87%)0 (0.00%)9690 (3.15%)0.00720.01900.049092646.01.0000e+004.2553e-020.0000e+000.0744NaN0.0897NaNNaNNaNNaNNaN0.008NaN0.0382
63ELEVATORS_MODEfloat6426 (0.01%)163891 (53.30%)0 (0.00%)89498 (29.10%)0.00000.00000.1208143620.01.0000e+007.4490e-020.0000e+000.13230.08060.1611NaNNaNNaN0.08060.00.0NaN0.2014
64ENTRANCES_MODEfloat6430 (0.01%)154828 (50.35%)0 (0.00%)387 (0.13%)0.06900.13790.2069152683.01.0000e+001.4519e-010.0000e+000.10100.0690.1379NaNNaNNaN0.03450.13790.13790.20690.1724
65FLOORSMAX_MODEfloat6425 (0.01%)153020 (49.76%)0 (0.00%)3415 (1.11%)0.16670.16670.3333154491.01.0000e+002.2232e-010.0000e+000.14370.33330.3333NaNNaNNaN0.33330.16670.16670.16670.3333
66FLOORSMIN_MODEfloat6425 (0.01%)208642 (67.85%)0 (0.00%)2517 (0.82%)0.08330.20830.375098869.01.0000e+002.2806e-010.0000e+000.1612NaN0.375NaNNaNNaNNaNNaN0.2083NaN0.375
67LANDAREA_MODEfloat643563 (1.16%)182590 (59.38%)0 (0.00%)17453 (5.68%)0.01660.04580.0841124921.01.0000e+006.4958e-020.0000e+000.0818NaN0.1153NaNNaNNaN0.09320.01550.0207NaN0.1266
68LIVINGAPARTMENTS_MODEfloat64736 (0.24%)210199 (68.35%)0 (0.00%)519 (0.17%)0.05420.07710.131397312.01.0000e+001.0564e-010.0000e+000.0979NaN0.1322NaNNaNNaNNaN0.05510.07350.08820.1625
69LIVINGAREA_MODEfloat645301 (1.72%)154350 (50.19%)0 (0.00%)444 (0.14%)0.04270.07310.1252153161.01.0000e+001.0598e-010.0000e+000.11180.0770.0953NaNNaNNaN0.06060.05790.07330.08760.1901
70NONLIVINGAPARTMENTS_MODEfloat64167 (0.05%)213514 (69.43%)0 (0.00%)59255 (19.27%)0.00000.00000.003993997.01.0000e+008.0764e-030.0000e+000.0463NaN0.0NaNNaNNaNNaN0.00390.00.01560.0
71NONLIVINGAREA_MODEfloat643327 (1.08%)169682 (55.18%)0 (0.00%)67126 (21.83%)0.00000.00110.0231137829.01.0000e+002.7022e-020.0000e+000.0703NaN0.0NaNNaNNaN0.23730.01990.00.01360.0023
72APARTMENTS_MEDIfloat641148 (0.37%)156061 (50.75%)0 (0.00%)771 (0.25%)0.05830.08640.1489151450.01.0000e+001.1785e-010.0000e+000.1091NaN0.1499NaNNaNNaN0.22380.06350.08330.10410.1842
73BASEMENTAREA_MEDIfloat643772 (1.23%)179943 (58.52%)0 (0.00%)14991 (4.87%)0.04370.07580.1116127568.01.0000e+008.7955e-020.0000e+000.0822NaN0.0991NaNNaNNaN0.07760.07560.0788NaN0.1322
74YEARS_BEGINEXPLUATATION_MEDIfloat64245 (0.08%)150007 (48.78%)0 (0.00%)548 (0.18%)0.97670.98160.9866157504.01.0000e+009.7775e-010.0000e+000.05990.98560.9871NaNNaNNaN0.98260.98310.97860.97710.9861
75YEARS_BUILD_MEDIfloat64151 (0.05%)204488 (66.50%)0 (0.00%)101 (0.03%)0.69140.75850.8256103023.01.0000e+007.5575e-010.0000e+000.1121NaN0.8256NaNNaNNaNNaN0.77190.71150.69140.8121
76COMMONAREA_MEDIfloat643202 (1.04%)214865 (69.87%)0 (0.00%)8691 (2.83%)0.00790.02080.051392646.01.0000e+004.4595e-020.0000e+000.0761NaN0.0895NaNNaNNaNNaNNaN0.008NaN0.0381
77ELEVATORS_MEDIfloat6446 (0.01%)163891 (53.30%)0 (0.00%)87026 (28.30%)0.00000.00000.1200143620.01.0000e+007.8078e-020.0000e+000.13450.080.16NaNNaNNaN0.080.00.0NaN0.2
78ENTRANCES_MEDIfloat6446 (0.01%)154828 (50.35%)0 (0.00%)329 (0.11%)0.06900.13790.2069152683.01.0000e+001.4921e-010.0000e+000.10040.0690.1379NaNNaNNaN0.03450.13790.13790.20690.1724
79FLOORSMAX_MEDIfloat6449 (0.02%)153020 (49.76%)0 (0.00%)2995 (0.97%)0.16670.16670.3333154491.01.0000e+002.2590e-010.0000e+000.14510.33330.3333NaNNaNNaN0.33330.16670.16670.16670.3333
80FLOORSMIN_MEDIfloat6447 (0.02%)208642 (67.85%)0 (0.00%)2351 (0.76%)0.08330.20830.375098869.01.0000e+002.3162e-010.0000e+000.1619NaN0.375NaNNaNNaNNaNNaN0.2083NaN0.375
81LANDAREA_MEDIfloat643560 (1.16%)182590 (59.38%)0 (0.00%)15919 (5.18%)0.01870.04870.0868124921.01.0000e+006.7169e-020.0000e+000.0822NaN0.1147NaNNaNNaN0.09270.01540.0206NaN0.126
82LIVINGAPARTMENTS_MEDIfloat641097 (0.36%)210199 (68.35%)0 (0.00%)433 (0.14%)0.05130.07610.123197312.01.0000e+001.0195e-010.0000e+000.0936NaN0.1231NaNNaNNaNNaN0.05130.06840.08210.1513
83LIVINGAREA_MEDIfloat645281 (1.72%)154350 (50.19%)0 (0.00%)299 (0.10%)0.04570.07490.1303153161.01.0000e+001.0861e-010.0000e+000.11230.07520.0931NaNNaNNaN0.05920.05660.07160.08560.1857
84NONLIVINGAPARTMENTS_MEDIfloat64214 (0.07%)213514 (69.43%)0 (0.00%)56097 (18.24%)0.00000.00000.003993997.01.0000e+008.6510e-030.0000e+000.0474NaN0.0NaNNaNNaNNaN0.00390.00.01550.0
85NONLIVINGAREA_MEDIfloat643323 (1.08%)169682 (55.18%)0 (0.00%)60954 (19.82%)0.00000.00310.0266137829.01.0000e+002.8236e-020.0000e+000.0702NaN0.0NaNNaNNaN0.22890.01920.00.01310.0022
86FONDKAPREMONT_MODEobject4 (0.00%)210295 (68.39%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000NaNreg oper accountNaNNaNNaNNaNreg oper spec accountreg oper accountreg oper accountreg oper account
87HOUSETYPE_MODEobject3 (0.00%)154297 (50.18%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000NaNblock of flatsNaNNaNNaNblock of flatsblock of flatsblock of flatsblock of flatsblock of flats
88TOTALAREA_MODEfloat645116 (1.66%)148431 (48.27%)0 (0.00%)582 (0.19%)0.04120.06880.1276159080.01.0000e+001.0255e-010.0000e+000.10750.05810.1206NaNNaNNaN0.09450.04750.05740.06890.1644
89WALLSMATERIAL_MODEobject7 (0.00%)156341 (50.84%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000NaNPanelNaNNaNNaNStone, brickStone, brickPanelStone, brickPanel
90EMERGENCYSTATE_MODEobject2 (0.00%)145755 (47.40%)0 (0.00%)0 (0.00%)0.00000.00000.00000.00.0000e+000.0000e+000.0000e+000.0000NoNoNaNNaNNaNNoNoNoNoNo
91OBS_30_CNT_SOCIAL_CIRCLEfloat6433 (0.01%)1021 (0.33%)0 (0.00%)163910 (53.30%)0.00000.00002.0000306490.03.4800e+021.4222e+000.0000e+002.40100.02.01.00.00.01.02.03.09.00.0
92DEF_30_CNT_SOCIAL_CIRCLEfloat6410 (0.00%)1021 (0.33%)0 (0.00%)271324 (88.23%)0.00000.00000.0000306490.03.4000e+011.4342e-010.0000e+000.44670.00.00.00.00.01.00.00.01.00.0
93OBS_60_CNT_SOCIAL_CIRCLEfloat6433 (0.01%)1021 (0.33%)0 (0.00%)164666 (53.55%)0.00000.00002.0000306490.03.4400e+021.4053e+000.0000e+002.37980.02.01.00.00.01.02.03.09.00.0
94DEF_60_CNT_SOCIAL_CIRCLEfloat649 (0.00%)1021 (0.33%)0 (0.00%)280721 (91.29%)0.00000.00000.0000306490.02.4000e+011.0005e-010.0000e+000.36230.00.00.00.00.00.00.00.01.00.0
95DAYS_LAST_PHONE_CHANGEfloat643773 (1.23%)1 (0.00%)269838 (87.75%)37672 (12.25%)-1570.0000-757.0000-274.0000307510.00.0000e+00-9.6286e+02-4.2920e+03826.8085-3143.0-2.0-1523.0-1224.00.0-201.0-1128.0-2959.0-1634.0-1258.0
96FLAG_DOCUMENT_2int642 (0.00%)0 (0.00%)0 (0.00%)307498 (100.00%)0.00000.00000.0000307511.01.0000e+004.2275e-050.0000e+000.00650000000000
97FLAG_DOCUMENT_3int642 (0.00%)0 (0.00%)0 (0.00%)89171 (29.00%)0.00001.00001.0000307511.01.0000e+007.1002e-010.0000e+000.45381111101111
98FLAG_DOCUMENT_4int642 (0.00%)0 (0.00%)0 (0.00%)307486 (99.99%)0.00000.00000.0000307511.01.0000e+008.1298e-050.0000e+000.00900000000000
99FLAG_DOCUMENT_5int642 (0.00%)0 (0.00%)0 (0.00%)302863 (98.49%)0.00000.00000.0000307511.01.0000e+001.5115e-020.0000e+000.12200000000000
100FLAG_DOCUMENT_6int642 (0.00%)0 (0.00%)0 (0.00%)280433 (91.19%)0.00000.00000.0000307511.01.0000e+008.8055e-020.0000e+000.28340000010000
101FLAG_DOCUMENT_7int642 (0.00%)0 (0.00%)0 (0.00%)307452 (99.98%)0.00000.00000.0000307511.01.0000e+001.9186e-040.0000e+000.01390000000000
102FLAG_DOCUMENT_8int642 (0.00%)0 (0.00%)0 (0.00%)282487 (91.86%)0.00000.00000.0000307511.01.0000e+008.1376e-020.0000e+000.27340000000000
103FLAG_DOCUMENT_9int642 (0.00%)0 (0.00%)0 (0.00%)306313 (99.61%)0.00000.00000.0000307511.01.0000e+003.8958e-030.0000e+000.06230000000000
104FLAG_DOCUMENT_10int642 (0.00%)0 (0.00%)0 (0.00%)307504 (100.00%)0.00000.00000.0000307511.01.0000e+002.2763e-050.0000e+000.00480000000000
105FLAG_DOCUMENT_11int642 (0.00%)0 (0.00%)0 (0.00%)306308 (99.61%)0.00000.00000.0000307511.01.0000e+003.9121e-030.0000e+000.06240000000000
106FLAG_DOCUMENT_12int642 (0.00%)0 (0.00%)0 (0.00%)307509 (100.00%)0.00000.00000.0000307511.01.0000e+006.5038e-060.0000e+000.00260000000000
107FLAG_DOCUMENT_13int642 (0.00%)0 (0.00%)0 (0.00%)306427 (99.65%)0.00000.00000.0000307511.01.0000e+003.5251e-030.0000e+000.05930000000000
108FLAG_DOCUMENT_14int642 (0.00%)0 (0.00%)0 (0.00%)306608 (99.71%)0.00000.00000.0000307511.01.0000e+002.9365e-030.0000e+000.05410000000000
109FLAG_DOCUMENT_15int642 (0.00%)0 (0.00%)0 (0.00%)307139 (99.88%)0.00000.00000.0000307511.01.0000e+001.2097e-030.0000e+000.03480000000000
110FLAG_DOCUMENT_16int642 (0.00%)0 (0.00%)0 (0.00%)304458 (99.01%)0.00000.00000.0000307511.01.0000e+009.9281e-030.0000e+000.09910000000000
111FLAG_DOCUMENT_17int642 (0.00%)0 (0.00%)0 (0.00%)307429 (99.97%)0.00000.00000.0000307511.01.0000e+002.6666e-040.0000e+000.01630000000000
112FLAG_DOCUMENT_18int642 (0.00%)0 (0.00%)0 (0.00%)305011 (99.19%)0.00000.00000.0000307511.01.0000e+008.1298e-030.0000e+000.08980000000000
113FLAG_DOCUMENT_19int642 (0.00%)0 (0.00%)0 (0.00%)307328 (99.94%)0.00000.00000.0000307511.01.0000e+005.9510e-040.0000e+000.02440000000000
114FLAG_DOCUMENT_20int642 (0.00%)0 (0.00%)0 (0.00%)307355 (99.95%)0.00000.00000.0000307511.01.0000e+005.0730e-040.0000e+000.02250000000000
115FLAG_DOCUMENT_21int642 (0.00%)0 (0.00%)0 (0.00%)307408 (99.97%)0.00000.00000.0000307511.01.0000e+003.3495e-040.0000e+000.01830000000000
116AMT_REQ_CREDIT_BUREAU_HOURfloat645 (0.00%)41519 (13.50%)0 (0.00%)264366 (85.97%)0.00000.00000.0000265992.04.0000e+006.4024e-030.0000e+000.08380.00.00.0NaN0.00.0NaN0.00.00.0
117AMT_REQ_CREDIT_BUREAU_DAYfloat649 (0.00%)41519 (13.50%)0 (0.00%)264503 (86.01%)0.00000.00000.0000265992.09.0000e+007.0002e-030.0000e+000.11080.00.00.0NaN0.00.0NaN0.00.00.0
118AMT_REQ_CREDIT_BUREAU_WEEKfloat649 (0.00%)41519 (13.50%)0 (0.00%)257456 (83.72%)0.00000.00000.0000265992.08.0000e+003.4362e-020.0000e+000.20470.00.00.0NaN0.00.0NaN0.00.00.0
119AMT_REQ_CREDIT_BUREAU_MONfloat6424 (0.01%)41519 (13.50%)0 (0.00%)222233 (72.27%)0.00000.00000.0000265992.02.7000e+012.6740e-010.0000e+000.91600.00.00.0NaN0.00.0NaN0.00.00.0
120AMT_REQ_CREDIT_BUREAU_QRTfloat6411 (0.00%)41519 (13.50%)0 (0.00%)215417 (70.05%)0.00000.00000.0000265992.02.6100e+022.6547e-010.0000e+000.79410.00.00.0NaN1.00.0NaN0.01.00.0
121AMT_REQ_CREDIT_BUREAU_YEARfloat6425 (0.01%)41519 (13.50%)0 (0.00%)71801 (23.35%)0.00001.00003.0000265992.02.5000e+011.9000e+000.0000e+001.86931.00.05.0NaN1.03.0NaN1.01.01.0
\n", "
" ], "text/plain": [ " name sub_type n_distinct n_miss n_negative n_zeros 25% 50% 75% count max mean min std sample_0 sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9\n", "0 SK_ID_CURR int64 307511 (100.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 189145.5000 278202.0000 367142.5000 307511.0 4.5626e+05 2.7818e+05 1.0000e+05 102790.1753 326682 414578 432657 346257 169928 228494 305986 450918 393627 121604 \n", "1 TARGET int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 282686 (91.93%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 8.0729e-02 0.0000e+00 0.2724 0 1 0 0 1 0 0 1 0 0 \n", "2 NAME_CONTRACT_TYPE object 2 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Cash loans Cash loans Revolving loans Cash loans Cash loans Cash loans Cash loans Cash loans Cash loans Cash loans \n", "3 CODE_GENDER object 3 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 M M M M M F M F F F \n", "4 FLAG_OWN_CAR object 2 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Y Y N N N N N Y N N \n", "5 FLAG_OWN_REALTY object 2 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Y N Y Y Y Y Y N Y Y \n", "6 CNT_CHILDREN int64 15 (0.00%) 0 (0.00%) 0 (0.00%) 215371 (70.04%) 0.0000 0.0000 1.0000 307511.0 1.9000e+01 4.1705e-01 0.0000e+00 0.7221 0 1 0 0 0 0 0 1 2 0 \n", "7 AMT_INCOME_TOTAL float64 2548 (0.83%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 112500.0000 147150.0000 202500.0000 307511.0 1.1700e+08 1.6880e+05 2.5650e+04 237123.1463 166500.0 450000.0 157500.0 135000.0 202500.0 117000.0 90000.0 180000.0 157500.0 90000.0 \n", "8 AMT_CREDIT float64 5603 (1.82%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 270000.0000 513531.0000 808650.0000 307511.0 4.0500e+06 5.9903e+05 4.5000e+04 402490.7770 254700.0 1381113.0 450000.0 1764000.0 203760.0 254700.0 538704.0 630000.0 679500.0 755190.0 \n", "9 AMT_ANNUITY float64 13672 (4.45%) 12 (0.00%) 0 (0.00%) 0 (0.00%) 16524.0000 24903.0000 34596.0000 307499.0 2.5803e+05 2.7109e+04 1.6155e+03 14493.7373 25191.0 39712.5 22500.0 48510.0 16227.0 25191.0 26046.0 23274.0 36202.5 36459.0 \n", "10 AMT_GOODS_PRICE float64 1002 (0.33%) 278 (0.09%) 0 (0.00%) 0 (0.00%) 238500.0000 450000.0000 679500.0000 307233.0 4.0500e+06 5.3840e+05 4.0500e+04 369446.4605 225000.0 1206000.0 450000.0 1764000.0 180000.0 225000.0 481500.0 630000.0 679500.0 675000.0 \n", "11 NAME_TYPE_SUITE object 7 (0.00%) 1292 (0.42%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Unaccompanied Unaccompanied Unaccompanied Unaccompanied Unaccompanied Unaccompanied Unaccompanied Unaccompanied Family Family \n", "12 NAME_INCOME_TYPE object 8 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 State servant Working Working Commercial associate Working Pensioner Working Working Working Working \n", "13 NAME_EDUCATION_TYPE object 5 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Higher education Incomplete higher Secondary / secondary special Secondary / secondary special Secondary / secondary special Secondary / secondary special Secondary / secondary special Secondary / secondary special Secondary / secondary special Secondary / secondary special\n", "14 NAME_FAMILY_STATUS object 6 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Married Married Married Married Separated Married Single / not married Married Married Married \n", "15 NAME_HOUSING_TYPE object 6 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 House / apartment House / apartment House / apartment House / apartment House / apartment House / apartment House / apartment House / apartment House / apartment With parents \n", "16 REGION_POPULATION_RELATIVE float64 81 (0.03%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0100 0.0188 0.0287 307511.0 7.2508e-02 2.0868e-02 2.9000e-04 0.0138 0.02461 0.018634 0.015221 0.008019 0.0105 0.008865999999999999 0.018634 0.0105 0.01885 0.030755 \n", "17 DAYS_BIRTH int64 17460 (5.68%) 0 (0.00%) 307511 (100.00%) 0 (0.00%) -19682.0000 -15750.0000 -12413.0000 307511.0 -7.4890e+03 -1.6037e+04 -2.5229e+04 4363.9886 -21882 -14026 -9905 -11946 -10493 -24264 -15251 -14263 -11179 -10477 \n", "18 DAYS_EMPLOYED int64 12574 (4.09%) 0 (0.00%) 252135 (81.99%) 2 (0.00%) -2760.0000 -1213.0000 -289.0000 307511.0 3.6524e+05 6.3815e+04 -1.7912e+04 141275.7665 -2987 -270 -2691 -1526 -656 365243 -1984 -481 -687 -1400 \n", "19 DAYS_REGISTRATION float64 15688 (5.10%) 0 (0.00%) 307431 (99.97%) 80 (0.03%) -7479.5000 -4504.0000 -2010.0000 307511.0 0.0000e+00 -4.9861e+03 -2.4672e+04 3522.8863 -11125.0 -1625.0 -4725.0 -1513.0 -2389.0 -87.0 -6933.0 -1315.0 -1491.0 -5034.0 \n", "20 DAYS_ID_PUBLISH int64 6168 (2.01%) 0 (0.00%) 307495 (99.99%) 16 (0.01%) -4299.0000 -3254.0000 -1720.0000 307511.0 0.0000e+00 -2.9942e+03 -7.1970e+03 1509.4504 -3984 -4768 -2549 -4392 -2526 -4388 -4396 -4830 -2742 -1625 \n", "21 OWN_CAR_AGE float64 62 (0.02%) 202929 (65.99%) 0 (0.00%) 2134 (0.69%) 5.0000 9.0000 15.0000 104582.0 9.1000e+01 1.2061e+01 0.0000e+00 11.9448 4.0 1.0 NaN NaN NaN NaN NaN 18.0 NaN NaN \n", "22 FLAG_MOBIL int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 1 (0.00%) 1.0000 1.0000 1.0000 307511.0 1.0000e+00 1.0000e+00 0.0000e+00 0.0018 1 1 1 1 1 1 1 1 1 1 \n", "23 FLAG_EMP_PHONE int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 55386 (18.01%) 1.0000 1.0000 1.0000 307511.0 1.0000e+00 8.1989e-01 0.0000e+00 0.3843 1 1 1 1 1 0 1 1 1 1 \n", "24 FLAG_WORK_PHONE int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 246203 (80.06%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.9937e-01 0.0000e+00 0.3995 1 0 0 0 1 0 0 0 1 1 \n", "25 FLAG_CONT_MOBILE int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 574 (0.19%) 1.0000 1.0000 1.0000 307511.0 1.0000e+00 9.9813e-01 0.0000e+00 0.0432 1 1 1 1 1 1 1 1 1 1 \n", "26 FLAG_PHONE int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 221080 (71.89%) 0.0000 0.0000 1.0000 307511.0 1.0000e+00 2.8107e-01 0.0000e+00 0.4495 1 0 0 0 1 0 0 0 1 0 \n", "27 FLAG_EMAIL int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 290069 (94.33%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 5.6720e-02 0.0000e+00 0.2313 0 0 0 0 0 0 0 0 0 0 \n", "28 OCCUPATION_TYPE object 18 (0.01%) 96391 (31.35%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 NaN Drivers Laborers NaN Laborers NaN Laborers NaN Managers Laborers \n", "29 CNT_FAM_MEMBERS float64 17 (0.01%) 2 (0.00%) 0 (0.00%) 0 (0.00%) 2.0000 2.0000 3.0000 307509.0 2.0000e+01 2.1527e+00 1.0000e+00 0.9107 2.0 3.0 2.0 2.0 1.0 2.0 1.0 3.0 4.0 2.0 \n", "30 REGION_RATING_CLIENT int64 3 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 2.0000 2.0000 2.0000 307511.0 3.0000e+00 2.0525e+00 1.0000e+00 0.5090 2 2 2 2 3 2 2 3 2 2 \n", "31 REGION_RATING_CLIENT_W_CITY int64 3 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 2.0000 2.0000 2.0000 307511.0 3.0000e+00 2.0315e+00 1.0000e+00 0.5027 2 2 2 2 3 2 2 3 2 2 \n", "32 WEEKDAY_APPR_PROCESS_START object 7 (0.00%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 THURSDAY MONDAY WEDNESDAY FRIDAY WEDNESDAY MONDAY FRIDAY MONDAY TUESDAY FRIDAY \n", "33 HOUR_APPR_PROCESS_START int64 24 (0.01%) 0 (0.00%) 0 (0.00%) 40 (0.01%) 10.0000 12.0000 14.0000 307511.0 2.3000e+01 1.2063e+01 0.0000e+00 3.2658 17 10 9 10 15 17 9 13 12 13 \n", "34 REG_REGION_NOT_LIVE_REGION int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 302854 (98.49%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.5144e-02 0.0000e+00 0.1221 0 0 0 0 0 0 0 0 0 0 \n", "35 REG_REGION_NOT_WORK_REGION int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 291899 (94.92%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 5.0769e-02 0.0000e+00 0.2195 0 0 0 1 1 0 0 0 0 0 \n", "36 LIVE_REGION_NOT_WORK_REGION int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 295008 (95.93%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 4.0659e-02 0.0000e+00 0.1975 0 0 0 1 1 0 0 0 0 0 \n", "37 REG_CITY_NOT_LIVE_CITY int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 283472 (92.18%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 7.8173e-02 0.0000e+00 0.2684 0 0 1 0 0 0 0 0 0 0 \n", "38 REG_CITY_NOT_WORK_CITY int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 236644 (76.95%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 2.3045e-01 0.0000e+00 0.4211 0 0 1 1 1 0 0 0 0 0 \n", "39 LIVE_CITY_NOT_WORK_CITY int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 252296 (82.04%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.7955e-01 0.0000e+00 0.3838 0 0 0 1 1 0 0 0 0 0 \n", "40 ORGANIZATION_TYPE object 58 (0.02%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 Business Entity Type 2 Business Entity Type 3 Business Entity Type 2 Business Entity Type 3 Self-employed XNA Self-employed Business Entity Type 3 Trade: type 2 Medicine \n", "41 EXT_SOURCE_1 float64 114584 (37.26%) 173378 (56.38%) 0 (0.00%) 0 (0.00%) 0.3340 0.5060 0.6751 134133.0 9.6269e-01 5.0213e-01 1.4568e-02 0.2111 NaN 0.3569777836552319 NaN 0.4949180654446097 NaN NaN NaN 0.5392576763343839 0.6575317720639661 NaN \n", "42 EXT_SOURCE_2 float64 119831 (38.97%) 660 (0.21%) 0 (0.00%) 0 (0.00%) 0.3925 0.5660 0.6636 306851.0 8.5500e-01 5.1439e-01 8.1736e-08 0.1911 0.5784756531591329 0.5152771000292551 0.4248161058463641 0.6670352420910267 0.0962461218101204 0.39473825561809134 0.7881138083280189 0.5579841354415206 0.5916561691175296 0.6675741251599911 \n", "43 EXT_SOURCE_3 float64 814 (0.26%) 60965 (19.83%) 0 (0.00%) 0 (0.00%) 0.3706 0.5353 0.6691 246546.0 8.9601e-01 5.1085e-01 5.2727e-04 0.1948 0.5971924268337128 0.5226973172821112 0.31547215492577346 NaN 0.4170996682522097 0.6058362647264226 NaN 0.7490217048463391 0.4956658291397297 0.7394117535524816 \n", "44 APARTMENTS_AVG float64 2339 (0.76%) 156061 (50.75%) 0 (0.00%) 751 (0.24%) 0.0577 0.0876 0.1485 151450.0 1.0000e+00 1.1744e-01 0.0000e+00 0.1082 NaN 0.1485 NaN NaN NaN 0.2216 0.0629 0.0825 0.1031 0.1825 \n", "45 BASEMENTAREA_AVG float64 3780 (1.23%) 179943 (58.52%) 0 (0.00%) 14745 (4.79%) 0.0442 0.0763 0.1122 127568.0 1.0000e+00 8.8442e-02 0.0000e+00 0.0824 NaN 0.0991 NaN NaN NaN 0.0776 0.0756 0.0788 NaN 0.1322 \n", "46 YEARS_BEGINEXPLUATATION_AVG float64 285 (0.09%) 150007 (48.78%) 0 (0.00%) 514 (0.17%) 0.9767 0.9816 0.9866 157504.0 1.0000e+00 9.7773e-01 0.0000e+00 0.0592 0.9856 0.9871 NaN NaN NaN 0.9826 0.9831 0.9786 0.9771 0.9861 \n", "47 YEARS_BUILD_AVG float64 149 (0.05%) 204488 (66.50%) 0 (0.00%) 102 (0.03%) 0.6872 0.7552 0.8232 103023.0 1.0000e+00 7.5247e-01 0.0000e+00 0.1133 NaN 0.8232 NaN NaN NaN NaN 0.7688 0.7076 0.6872 0.8096 \n", "48 COMMONAREA_AVG float64 3181 (1.03%) 214865 (69.87%) 0 (0.00%) 8442 (2.75%) 0.0078 0.0211 0.0515 92646.0 1.0000e+00 4.4621e-02 0.0000e+00 0.0760 NaN 0.0889 NaN NaN NaN NaN NaN 0.0079 NaN 0.0378 \n", "49 ELEVATORS_AVG float64 257 (0.08%) 163891 (53.30%) 0 (0.00%) 85718 (27.87%) 0.0000 0.0000 0.1200 143620.0 1.0000e+00 7.8942e-02 0.0000e+00 0.1346 0.08 0.16 NaN NaN NaN 0.08 0.0 0.0 NaN 0.2 \n", "50 ENTRANCES_AVG float64 285 (0.09%) 154828 (50.35%) 0 (0.00%) 323 (0.11%) 0.0690 0.1379 0.2069 152683.0 1.0000e+00 1.4972e-01 0.0000e+00 0.1000 0.069 0.1379 NaN NaN NaN 0.0345 0.1379 0.1379 0.2069 0.1724 \n", "51 FLOORSMAX_AVG float64 403 (0.13%) 153020 (49.76%) 0 (0.00%) 2938 (0.96%) 0.1667 0.1667 0.3333 154491.0 1.0000e+00 2.2628e-01 0.0000e+00 0.1446 0.3333 0.3333 NaN NaN NaN 0.3333 0.1667 0.1667 0.1667 0.3333 \n", "52 FLOORSMIN_AVG float64 305 (0.10%) 208642 (67.85%) 0 (0.00%) 2320 (0.75%) 0.0833 0.2083 0.3750 98869.0 1.0000e+00 2.3189e-01 0.0000e+00 0.1614 NaN 0.375 NaN NaN NaN NaN NaN 0.2083 NaN 0.375 \n", "53 LANDAREA_AVG float64 3527 (1.15%) 182590 (59.38%) 0 (0.00%) 15600 (5.07%) 0.0187 0.0481 0.0856 124921.0 1.0000e+00 6.6333e-02 0.0000e+00 0.0812 NaN 0.1127 NaN NaN NaN 0.0911 0.0151 0.0203 NaN 0.1238 \n", "54 LIVINGAPARTMENTS_AVG float64 1868 (0.61%) 210199 (68.35%) 0 (0.00%) 418 (0.14%) 0.0504 0.0756 0.1210 97312.0 1.0000e+00 1.0077e-01 0.0000e+00 0.0926 NaN 0.121 NaN NaN NaN NaN 0.0504 0.0672 0.0807 0.1488 \n", "55 LIVINGAREA_AVG float64 5199 (1.69%) 154350 (50.19%) 0 (0.00%) 284 (0.09%) 0.0453 0.0745 0.1299 153161.0 1.0000e+00 1.0740e-01 0.0000e+00 0.1106 0.0739 0.0915 NaN NaN NaN 0.0582 0.0556 0.0703 0.0841 0.1824 \n", "56 NONLIVINGAPARTMENTS_AVG float64 386 (0.13%) 213514 (69.43%) 0 (0.00%) 54549 (17.74%) 0.0000 0.0000 0.0039 93997.0 1.0000e+00 8.8087e-03 0.0000e+00 0.0477 NaN 0.0 NaN NaN NaN NaN 0.0039 0.0 0.0154 0.0 \n", "57 NONLIVINGAREA_AVG float64 3290 (1.07%) 169682 (55.18%) 0 (0.00%) 58735 (19.10%) 0.0000 0.0036 0.0277 137829.0 1.0000e+00 2.8358e-02 0.0000e+00 0.0695 NaN 0.0 NaN NaN NaN 0.2242 0.0188 0.0 0.0128 0.0022 \n", "58 APARTMENTS_MODE float64 760 (0.25%) 156061 (50.75%) 0 (0.00%) 976 (0.32%) 0.0525 0.0840 0.1439 151450.0 1.0000e+00 1.1423e-01 0.0000e+00 0.1079 NaN 0.1513 NaN NaN NaN 0.2258 0.0641 0.084 0.105 0.1859 \n", "59 BASEMENTAREA_MODE float64 3841 (1.25%) 179943 (58.52%) 0 (0.00%) 16598 (5.40%) 0.0407 0.0746 0.1124 127568.0 1.0000e+00 8.7543e-02 0.0000e+00 0.0843 NaN 0.1028 NaN NaN NaN 0.0806 0.0785 0.0818 NaN 0.1372 \n", "60 YEARS_BEGINEXPLUATATION_MODE float64 221 (0.07%) 150007 (48.78%) 0 (0.00%) 142 (0.05%) 0.9767 0.9816 0.9866 157504.0 1.0000e+00 9.7707e-01 0.0000e+00 0.0646 0.9856 0.9871 NaN NaN NaN 0.9826 0.9831 0.9786 0.9772 0.9861 \n", "61 YEARS_BUILD_MODE float64 154 (0.05%) 204488 (66.50%) 0 (0.00%) 103 (0.03%) 0.6994 0.7648 0.8236 103023.0 1.0000e+00 7.5964e-01 0.0000e+00 0.1101 NaN 0.8301 NaN NaN NaN NaN 0.7779 0.7190000000000001 0.6994 0.8171 \n", "62 COMMONAREA_MODE float64 3128 (1.02%) 214865 (69.87%) 0 (0.00%) 9690 (3.15%) 0.0072 0.0190 0.0490 92646.0 1.0000e+00 4.2553e-02 0.0000e+00 0.0744 NaN 0.0897 NaN NaN NaN NaN NaN 0.008 NaN 0.0382 \n", "63 ELEVATORS_MODE float64 26 (0.01%) 163891 (53.30%) 0 (0.00%) 89498 (29.10%) 0.0000 0.0000 0.1208 143620.0 1.0000e+00 7.4490e-02 0.0000e+00 0.1323 0.0806 0.1611 NaN NaN NaN 0.0806 0.0 0.0 NaN 0.2014 \n", "64 ENTRANCES_MODE float64 30 (0.01%) 154828 (50.35%) 0 (0.00%) 387 (0.13%) 0.0690 0.1379 0.2069 152683.0 1.0000e+00 1.4519e-01 0.0000e+00 0.1010 0.069 0.1379 NaN NaN NaN 0.0345 0.1379 0.1379 0.2069 0.1724 \n", "65 FLOORSMAX_MODE float64 25 (0.01%) 153020 (49.76%) 0 (0.00%) 3415 (1.11%) 0.1667 0.1667 0.3333 154491.0 1.0000e+00 2.2232e-01 0.0000e+00 0.1437 0.3333 0.3333 NaN NaN NaN 0.3333 0.1667 0.1667 0.1667 0.3333 \n", "66 FLOORSMIN_MODE float64 25 (0.01%) 208642 (67.85%) 0 (0.00%) 2517 (0.82%) 0.0833 0.2083 0.3750 98869.0 1.0000e+00 2.2806e-01 0.0000e+00 0.1612 NaN 0.375 NaN NaN NaN NaN NaN 0.2083 NaN 0.375 \n", "67 LANDAREA_MODE float64 3563 (1.16%) 182590 (59.38%) 0 (0.00%) 17453 (5.68%) 0.0166 0.0458 0.0841 124921.0 1.0000e+00 6.4958e-02 0.0000e+00 0.0818 NaN 0.1153 NaN NaN NaN 0.0932 0.0155 0.0207 NaN 0.1266 \n", "68 LIVINGAPARTMENTS_MODE float64 736 (0.24%) 210199 (68.35%) 0 (0.00%) 519 (0.17%) 0.0542 0.0771 0.1313 97312.0 1.0000e+00 1.0564e-01 0.0000e+00 0.0979 NaN 0.1322 NaN NaN NaN NaN 0.0551 0.0735 0.0882 0.1625 \n", "69 LIVINGAREA_MODE float64 5301 (1.72%) 154350 (50.19%) 0 (0.00%) 444 (0.14%) 0.0427 0.0731 0.1252 153161.0 1.0000e+00 1.0598e-01 0.0000e+00 0.1118 0.077 0.0953 NaN NaN NaN 0.0606 0.0579 0.0733 0.0876 0.1901 \n", "70 NONLIVINGAPARTMENTS_MODE float64 167 (0.05%) 213514 (69.43%) 0 (0.00%) 59255 (19.27%) 0.0000 0.0000 0.0039 93997.0 1.0000e+00 8.0764e-03 0.0000e+00 0.0463 NaN 0.0 NaN NaN NaN NaN 0.0039 0.0 0.0156 0.0 \n", "71 NONLIVINGAREA_MODE float64 3327 (1.08%) 169682 (55.18%) 0 (0.00%) 67126 (21.83%) 0.0000 0.0011 0.0231 137829.0 1.0000e+00 2.7022e-02 0.0000e+00 0.0703 NaN 0.0 NaN NaN NaN 0.2373 0.0199 0.0 0.0136 0.0023 \n", "72 APARTMENTS_MEDI float64 1148 (0.37%) 156061 (50.75%) 0 (0.00%) 771 (0.25%) 0.0583 0.0864 0.1489 151450.0 1.0000e+00 1.1785e-01 0.0000e+00 0.1091 NaN 0.1499 NaN NaN NaN 0.2238 0.0635 0.0833 0.1041 0.1842 \n", "73 BASEMENTAREA_MEDI float64 3772 (1.23%) 179943 (58.52%) 0 (0.00%) 14991 (4.87%) 0.0437 0.0758 0.1116 127568.0 1.0000e+00 8.7955e-02 0.0000e+00 0.0822 NaN 0.0991 NaN NaN NaN 0.0776 0.0756 0.0788 NaN 0.1322 \n", "74 YEARS_BEGINEXPLUATATION_MEDI float64 245 (0.08%) 150007 (48.78%) 0 (0.00%) 548 (0.18%) 0.9767 0.9816 0.9866 157504.0 1.0000e+00 9.7775e-01 0.0000e+00 0.0599 0.9856 0.9871 NaN NaN NaN 0.9826 0.9831 0.9786 0.9771 0.9861 \n", "75 YEARS_BUILD_MEDI float64 151 (0.05%) 204488 (66.50%) 0 (0.00%) 101 (0.03%) 0.6914 0.7585 0.8256 103023.0 1.0000e+00 7.5575e-01 0.0000e+00 0.1121 NaN 0.8256 NaN NaN NaN NaN 0.7719 0.7115 0.6914 0.8121 \n", "76 COMMONAREA_MEDI float64 3202 (1.04%) 214865 (69.87%) 0 (0.00%) 8691 (2.83%) 0.0079 0.0208 0.0513 92646.0 1.0000e+00 4.4595e-02 0.0000e+00 0.0761 NaN 0.0895 NaN NaN NaN NaN NaN 0.008 NaN 0.0381 \n", "77 ELEVATORS_MEDI float64 46 (0.01%) 163891 (53.30%) 0 (0.00%) 87026 (28.30%) 0.0000 0.0000 0.1200 143620.0 1.0000e+00 7.8078e-02 0.0000e+00 0.1345 0.08 0.16 NaN NaN NaN 0.08 0.0 0.0 NaN 0.2 \n", "78 ENTRANCES_MEDI float64 46 (0.01%) 154828 (50.35%) 0 (0.00%) 329 (0.11%) 0.0690 0.1379 0.2069 152683.0 1.0000e+00 1.4921e-01 0.0000e+00 0.1004 0.069 0.1379 NaN NaN NaN 0.0345 0.1379 0.1379 0.2069 0.1724 \n", "79 FLOORSMAX_MEDI float64 49 (0.02%) 153020 (49.76%) 0 (0.00%) 2995 (0.97%) 0.1667 0.1667 0.3333 154491.0 1.0000e+00 2.2590e-01 0.0000e+00 0.1451 0.3333 0.3333 NaN NaN NaN 0.3333 0.1667 0.1667 0.1667 0.3333 \n", "80 FLOORSMIN_MEDI float64 47 (0.02%) 208642 (67.85%) 0 (0.00%) 2351 (0.76%) 0.0833 0.2083 0.3750 98869.0 1.0000e+00 2.3162e-01 0.0000e+00 0.1619 NaN 0.375 NaN NaN NaN NaN NaN 0.2083 NaN 0.375 \n", "81 LANDAREA_MEDI float64 3560 (1.16%) 182590 (59.38%) 0 (0.00%) 15919 (5.18%) 0.0187 0.0487 0.0868 124921.0 1.0000e+00 6.7169e-02 0.0000e+00 0.0822 NaN 0.1147 NaN NaN NaN 0.0927 0.0154 0.0206 NaN 0.126 \n", "82 LIVINGAPARTMENTS_MEDI float64 1097 (0.36%) 210199 (68.35%) 0 (0.00%) 433 (0.14%) 0.0513 0.0761 0.1231 97312.0 1.0000e+00 1.0195e-01 0.0000e+00 0.0936 NaN 0.1231 NaN NaN NaN NaN 0.0513 0.0684 0.0821 0.1513 \n", "83 LIVINGAREA_MEDI float64 5281 (1.72%) 154350 (50.19%) 0 (0.00%) 299 (0.10%) 0.0457 0.0749 0.1303 153161.0 1.0000e+00 1.0861e-01 0.0000e+00 0.1123 0.0752 0.0931 NaN NaN NaN 0.0592 0.0566 0.0716 0.0856 0.1857 \n", "84 NONLIVINGAPARTMENTS_MEDI float64 214 (0.07%) 213514 (69.43%) 0 (0.00%) 56097 (18.24%) 0.0000 0.0000 0.0039 93997.0 1.0000e+00 8.6510e-03 0.0000e+00 0.0474 NaN 0.0 NaN NaN NaN NaN 0.0039 0.0 0.0155 0.0 \n", "85 NONLIVINGAREA_MEDI float64 3323 (1.08%) 169682 (55.18%) 0 (0.00%) 60954 (19.82%) 0.0000 0.0031 0.0266 137829.0 1.0000e+00 2.8236e-02 0.0000e+00 0.0702 NaN 0.0 NaN NaN NaN 0.2289 0.0192 0.0 0.0131 0.0022 \n", "86 FONDKAPREMONT_MODE object 4 (0.00%) 210295 (68.39%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 NaN reg oper account NaN NaN NaN NaN reg oper spec account reg oper account reg oper account reg oper account \n", "87 HOUSETYPE_MODE object 3 (0.00%) 154297 (50.18%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 NaN block of flats NaN NaN NaN block of flats block of flats block of flats block of flats block of flats \n", "88 TOTALAREA_MODE float64 5116 (1.66%) 148431 (48.27%) 0 (0.00%) 582 (0.19%) 0.0412 0.0688 0.1276 159080.0 1.0000e+00 1.0255e-01 0.0000e+00 0.1075 0.0581 0.1206 NaN NaN NaN 0.0945 0.0475 0.0574 0.0689 0.1644 \n", "89 WALLSMATERIAL_MODE object 7 (0.00%) 156341 (50.84%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 NaN Panel NaN NaN NaN Stone, brick Stone, brick Panel Stone, brick Panel \n", "90 EMERGENCYSTATE_MODE object 2 (0.00%) 145755 (47.40%) 0 (0.00%) 0 (0.00%) 0.0000 0.0000 0.0000 0.0 0.0000e+00 0.0000e+00 0.0000e+00 0.0000 No No NaN NaN NaN No No No No No \n", "91 OBS_30_CNT_SOCIAL_CIRCLE float64 33 (0.01%) 1021 (0.33%) 0 (0.00%) 163910 (53.30%) 0.0000 0.0000 2.0000 306490.0 3.4800e+02 1.4222e+00 0.0000e+00 2.4010 0.0 2.0 1.0 0.0 0.0 1.0 2.0 3.0 9.0 0.0 \n", "92 DEF_30_CNT_SOCIAL_CIRCLE float64 10 (0.00%) 1021 (0.33%) 0 (0.00%) 271324 (88.23%) 0.0000 0.0000 0.0000 306490.0 3.4000e+01 1.4342e-01 0.0000e+00 0.4467 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 \n", "93 OBS_60_CNT_SOCIAL_CIRCLE float64 33 (0.01%) 1021 (0.33%) 0 (0.00%) 164666 (53.55%) 0.0000 0.0000 2.0000 306490.0 3.4400e+02 1.4053e+00 0.0000e+00 2.3798 0.0 2.0 1.0 0.0 0.0 1.0 2.0 3.0 9.0 0.0 \n", "94 DEF_60_CNT_SOCIAL_CIRCLE float64 9 (0.00%) 1021 (0.33%) 0 (0.00%) 280721 (91.29%) 0.0000 0.0000 0.0000 306490.0 2.4000e+01 1.0005e-01 0.0000e+00 0.3623 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", "95 DAYS_LAST_PHONE_CHANGE float64 3773 (1.23%) 1 (0.00%) 269838 (87.75%) 37672 (12.25%) -1570.0000 -757.0000 -274.0000 307510.0 0.0000e+00 -9.6286e+02 -4.2920e+03 826.8085 -3143.0 -2.0 -1523.0 -1224.0 0.0 -201.0 -1128.0 -2959.0 -1634.0 -1258.0 \n", "96 FLAG_DOCUMENT_2 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307498 (100.00%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 4.2275e-05 0.0000e+00 0.0065 0 0 0 0 0 0 0 0 0 0 \n", "97 FLAG_DOCUMENT_3 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 89171 (29.00%) 0.0000 1.0000 1.0000 307511.0 1.0000e+00 7.1002e-01 0.0000e+00 0.4538 1 1 1 1 1 0 1 1 1 1 \n", "98 FLAG_DOCUMENT_4 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307486 (99.99%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 8.1298e-05 0.0000e+00 0.0090 0 0 0 0 0 0 0 0 0 0 \n", "99 FLAG_DOCUMENT_5 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 302863 (98.49%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.5115e-02 0.0000e+00 0.1220 0 0 0 0 0 0 0 0 0 0 \n", "100 FLAG_DOCUMENT_6 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 280433 (91.19%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 8.8055e-02 0.0000e+00 0.2834 0 0 0 0 0 1 0 0 0 0 \n", "101 FLAG_DOCUMENT_7 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307452 (99.98%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.9186e-04 0.0000e+00 0.0139 0 0 0 0 0 0 0 0 0 0 \n", "102 FLAG_DOCUMENT_8 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 282487 (91.86%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 8.1376e-02 0.0000e+00 0.2734 0 0 0 0 0 0 0 0 0 0 \n", "103 FLAG_DOCUMENT_9 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 306313 (99.61%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 3.8958e-03 0.0000e+00 0.0623 0 0 0 0 0 0 0 0 0 0 \n", "104 FLAG_DOCUMENT_10 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307504 (100.00%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 2.2763e-05 0.0000e+00 0.0048 0 0 0 0 0 0 0 0 0 0 \n", "105 FLAG_DOCUMENT_11 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 306308 (99.61%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 3.9121e-03 0.0000e+00 0.0624 0 0 0 0 0 0 0 0 0 0 \n", "106 FLAG_DOCUMENT_12 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307509 (100.00%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 6.5038e-06 0.0000e+00 0.0026 0 0 0 0 0 0 0 0 0 0 \n", "107 FLAG_DOCUMENT_13 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 306427 (99.65%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 3.5251e-03 0.0000e+00 0.0593 0 0 0 0 0 0 0 0 0 0 \n", "108 FLAG_DOCUMENT_14 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 306608 (99.71%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 2.9365e-03 0.0000e+00 0.0541 0 0 0 0 0 0 0 0 0 0 \n", "109 FLAG_DOCUMENT_15 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307139 (99.88%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 1.2097e-03 0.0000e+00 0.0348 0 0 0 0 0 0 0 0 0 0 \n", "110 FLAG_DOCUMENT_16 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 304458 (99.01%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 9.9281e-03 0.0000e+00 0.0991 0 0 0 0 0 0 0 0 0 0 \n", "111 FLAG_DOCUMENT_17 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307429 (99.97%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 2.6666e-04 0.0000e+00 0.0163 0 0 0 0 0 0 0 0 0 0 \n", "112 FLAG_DOCUMENT_18 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 305011 (99.19%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 8.1298e-03 0.0000e+00 0.0898 0 0 0 0 0 0 0 0 0 0 \n", "113 FLAG_DOCUMENT_19 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307328 (99.94%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 5.9510e-04 0.0000e+00 0.0244 0 0 0 0 0 0 0 0 0 0 \n", "114 FLAG_DOCUMENT_20 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307355 (99.95%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 5.0730e-04 0.0000e+00 0.0225 0 0 0 0 0 0 0 0 0 0 \n", "115 FLAG_DOCUMENT_21 int64 2 (0.00%) 0 (0.00%) 0 (0.00%) 307408 (99.97%) 0.0000 0.0000 0.0000 307511.0 1.0000e+00 3.3495e-04 0.0000e+00 0.0183 0 0 0 0 0 0 0 0 0 0 \n", "116 AMT_REQ_CREDIT_BUREAU_HOUR float64 5 (0.00%) 41519 (13.50%) 0 (0.00%) 264366 (85.97%) 0.0000 0.0000 0.0000 265992.0 4.0000e+00 6.4024e-03 0.0000e+00 0.0838 0.0 0.0 0.0 NaN 0.0 0.0 NaN 0.0 0.0 0.0 \n", "117 AMT_REQ_CREDIT_BUREAU_DAY float64 9 (0.00%) 41519 (13.50%) 0 (0.00%) 264503 (86.01%) 0.0000 0.0000 0.0000 265992.0 9.0000e+00 7.0002e-03 0.0000e+00 0.1108 0.0 0.0 0.0 NaN 0.0 0.0 NaN 0.0 0.0 0.0 \n", "118 AMT_REQ_CREDIT_BUREAU_WEEK float64 9 (0.00%) 41519 (13.50%) 0 (0.00%) 257456 (83.72%) 0.0000 0.0000 0.0000 265992.0 8.0000e+00 3.4362e-02 0.0000e+00 0.2047 0.0 0.0 0.0 NaN 0.0 0.0 NaN 0.0 0.0 0.0 \n", "119 AMT_REQ_CREDIT_BUREAU_MON float64 24 (0.01%) 41519 (13.50%) 0 (0.00%) 222233 (72.27%) 0.0000 0.0000 0.0000 265992.0 2.7000e+01 2.6740e-01 0.0000e+00 0.9160 0.0 0.0 0.0 NaN 0.0 0.0 NaN 0.0 0.0 0.0 \n", "120 AMT_REQ_CREDIT_BUREAU_QRT float64 11 (0.00%) 41519 (13.50%) 0 (0.00%) 215417 (70.05%) 0.0000 0.0000 0.0000 265992.0 2.6100e+02 2.6547e-01 0.0000e+00 0.7941 0.0 0.0 0.0 NaN 1.0 0.0 NaN 0.0 1.0 0.0 \n", "121 AMT_REQ_CREDIT_BUREAU_YEAR float64 25 (0.01%) 41519 (13.50%) 0 (0.00%) 71801 (23.35%) 0.0000 1.0000 3.0000 265992.0 2.5000e+01 1.9000e+00 0.0000e+00 1.8693 1.0 0.0 5.0 NaN 1.0 3.0 NaN 1.0 1.0 1.0 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# train\n", "train_path = \"home-credit-default-risk/application_train.csv\"\n", "pdf_train = pd.read_csv(train_path)\n", "print(\"(rows, columns)\", pdf_train.shape)\n", "print(\"First 5 rows\")\n", "display(pdf_train.head(5))\n", "\n", "# test\n", "test_path = \"home-credit-default-risk/application_test.csv\"\n", "pdf_test = pd.read_csv(test_path)\n", "print(\"(rows, columns)\", pdf_test.shape)\n", "print(\"First 5 rows\")\n", "display(pdf_test.head(5))\n", "\n", "# load meta data\n", "meta_path = \"../02_pandas/reports/report_application_train.csv\"\n", "pdf_meta = pd.read_csv(meta_path)\n", "display(pdf_meta)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGET
01000021
11000030
21000040
31000060
41000070
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET\n", "0 100002 1 \n", "1 100003 0 \n", "2 100004 0 \n", "3 100006 0 \n", "4 100007 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# filter by tvt code\n", "pdf_tvt_extend = pd.read_pickle(\"pdf_tvt_extend.pkl\", compression=\"bz2\")\n", "pdf_train_filtered = (pdf_tvt_extend.query(\"tvt_code == 'train'\")\n", " .merge(pdf_train[[\"SK_ID_CURR\"]], on=\"SK_ID_CURR\")\n", " .drop(columns=[\"tvt_code\"]))\n", "pdf_train_filtered.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## NAME_TYPE_SUITE: Người đi cùng khi đi vay" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1292" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train['NAME_TYPE_SUITE'].isna().sum()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Unaccompanied 248526\n", "Family 40149 \n", "Spouse, partner 11370 \n", "Children 3267 \n", "Other_B 1770 \n", "Other_A 866 \n", "Group of people 271 \n", "Name: NAME_TYPE_SUITE, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train['NAME_TYPE_SUITE'].value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Có 1292 giá trị null, vì đa số là Unaccompanied, nên sẽ fillna là Unaccomanied\n", "pdf_train['NAME_TYPE_SUITE'].fillna(value='Unaccompanied', inplace=True)\n", "pdf_test['NAME_TYPE_SUITE'].fillna(value='Unaccompanied', inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## OWN_CAR_AGE: Tuổi thọ của chiếc xe" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6599081008484249" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train['OWN_CAR_AGE'].isna().mean()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5, 122)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train[(pdf_train[\"OWN_CAR_AGE\"].isna()) & (pdf_train[\"FLAG_OWN_CAR\"] == \"Y\")].shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(202924, 122)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train[(pdf_train[\"OWN_CAR_AGE\"].isna()) & (pdf_train[\"FLAG_OWN_CAR\"] == \"N\")].shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# 65% không điền thông tin, những người không có xe thì sẽ không điền, nên fillna là 0\n", "pdf_train['OWN_CAR_AGE'].fillna(value=0, inplace=True)\n", "pdf_test['OWN_CAR_AGE'].fillna(value=0, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Percent credit and income" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# AMT_CREDIT: Số tiền vay\n", "# AMT_INCOME_TOTAL: Thu nhập của khách hàng\n", "# AMT_ANNUITY: Số tiền phải trả hàng năm\n", "pdf_train['CREDIT_INCOME_PERCENT'] = pdf_train['AMT_CREDIT'] / pdf_train['AMT_INCOME_TOTAL']\n", "pdf_train['ANNUITY_INCOME_PERCENT'] = pdf_train['AMT_ANNUITY'] / pdf_train['AMT_INCOME_TOTAL']\n", "pdf_train['CREDIT_TERM'] = pdf_train['AMT_ANNUITY'] / pdf_train['AMT_CREDIT']\n", "\n", "pdf_test['CREDIT_INCOME_PERCENT'] = pdf_test['AMT_CREDIT'] / pdf_test['AMT_INCOME_TOTAL']\n", "pdf_test['ANNUITY_INCOME_PERCENT'] = pdf_test['AMT_ANNUITY'] / pdf_test['AMT_INCOME_TOTAL']\n", "pdf_test['CREDIT_TERM'] = pdf_test['AMT_ANNUITY'] / pdf_test['AMT_CREDIT']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Binary vs one-hot encoding features" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "((307511, 144), (48744, 144))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
SK_ID_CURR100002100003100004100006100007
is_FLAG_EMP_PHONE11111
is_FLAG_WORK_PHONE00100
is_FLAG_PHONE11100
is_FLAG_EMAIL00000
is_REG_REGION_NOT_LIVE_REGION00000
is_REG_REGION_NOT_WORK_REGION00000
is_LIVE_REGION_NOT_WORK_REGION00000
is_REG_CITY_NOT_WORK_CITY00001
is_LIVE_CITY_NOT_WORK_CITY00001
is_FLAG_DOCUMENT_311010
is_FLAG_DOCUMENT_500000
is_FLAG_DOCUMENT_600000
is_FLAG_DOCUMENT_800001
is_FLAG_DOCUMENT_900000
is_REGION_RATING_CLIENT21222
is_REGION_RATING_CLIENT_W_CITY21222
NAME_INCOME_TYPE_Working10111
NAME_INCOME_TYPE_State_servant01000
NAME_INCOME_TYPE_Commercial_associate00000
NAME_INCOME_TYPE_Pensioner00000
NAME_INCOME_TYPE_Unemployed00000
NAME_INCOME_TYPE_Student00000
NAME_INCOME_TYPE_Businessman00000
NAME_INCOME_TYPE_Maternity_leave00000
FONDKAPREMONT_MODE_reg_oper_account11000
FONDKAPREMONT_MODE_org_spec_account00000
FONDKAPREMONT_MODE_reg_oper_spec_account00000
FONDKAPREMONT_MODE_not_specified00000
NAME_HOUSING_TYPE_House___apartment11111
NAME_HOUSING_TYPE_Rented_apartment00000
NAME_HOUSING_TYPE_With_parents00000
NAME_HOUSING_TYPE_Municipal_apartment00000
NAME_HOUSING_TYPE_Office_apartment00000
NAME_HOUSING_TYPE_Co_op_apartment00000
NAME_EDUCATION_TYPE_Secondary___secondary_special10111
NAME_EDUCATION_TYPE_Higher_education01000
NAME_EDUCATION_TYPE_Incomplete_higher00000
NAME_EDUCATION_TYPE_Lower_secondary00000
NAME_EDUCATION_TYPE_Academic_degree00000
OCCUPATION_TYPE_Laborers10110
OCCUPATION_TYPE_Core_staff01001
OCCUPATION_TYPE_Accountants00000
OCCUPATION_TYPE_Managers00000
OCCUPATION_TYPE_Drivers00000
OCCUPATION_TYPE_Sales_staff00000
OCCUPATION_TYPE_Cleaning_staff00000
OCCUPATION_TYPE_Cooking_staff00000
OCCUPATION_TYPE_Private_service_staff00000
OCCUPATION_TYPE_Medicine_staff00000
OCCUPATION_TYPE_Security_staff00000
OCCUPATION_TYPE_High_skill_tech_staff00000
OCCUPATION_TYPE_Waiters_barmen_staff00000
OCCUPATION_TYPE_Low_skill_Laborers00000
OCCUPATION_TYPE_Realty_agents00000
OCCUPATION_TYPE_Secretaries00000
OCCUPATION_TYPE_IT_staff00000
OCCUPATION_TYPE_HR_staff00000
ORGANIZATION_TYPE_Business_Entity_Type_310010
ORGANIZATION_TYPE_School01000
ORGANIZATION_TYPE_Government00100
ORGANIZATION_TYPE_Religion00001
ORGANIZATION_TYPE_Other00000
ORGANIZATION_TYPE_XNA00000
ORGANIZATION_TYPE_Electricity00000
ORGANIZATION_TYPE_Medicine00000
ORGANIZATION_TYPE_Business_Entity_Type_200000
ORGANIZATION_TYPE_Self_employed00000
ORGANIZATION_TYPE_Transport__type_200000
ORGANIZATION_TYPE_Construction00000
ORGANIZATION_TYPE_Housing00000
ORGANIZATION_TYPE_Kindergarten00000
ORGANIZATION_TYPE_Trade__type_700000
ORGANIZATION_TYPE_Industry__type_1100000
ORGANIZATION_TYPE_Military00000
ORGANIZATION_TYPE_Services00000
ORGANIZATION_TYPE_Security_Ministries00000
ORGANIZATION_TYPE_Transport__type_400000
ORGANIZATION_TYPE_Industry__type_100000
ORGANIZATION_TYPE_Emergency00000
ORGANIZATION_TYPE_Security00000
ORGANIZATION_TYPE_Trade__type_200000
ORGANIZATION_TYPE_University00000
ORGANIZATION_TYPE_Transport__type_300000
ORGANIZATION_TYPE_Police00000
ORGANIZATION_TYPE_Business_Entity_Type_100000
ORGANIZATION_TYPE_Postal00000
ORGANIZATION_TYPE_Industry__type_400000
ORGANIZATION_TYPE_Agriculture00000
ORGANIZATION_TYPE_Restaurant00000
ORGANIZATION_TYPE_Culture00000
ORGANIZATION_TYPE_Hotel00000
ORGANIZATION_TYPE_Industry__type_700000
ORGANIZATION_TYPE_Trade__type_300000
ORGANIZATION_TYPE_Industry__type_300000
ORGANIZATION_TYPE_Bank00000
ORGANIZATION_TYPE_Industry__type_900000
ORGANIZATION_TYPE_Insurance00000
ORGANIZATION_TYPE_Trade__type_600000
ORGANIZATION_TYPE_Industry__type_200000
ORGANIZATION_TYPE_Transport__type_100000
ORGANIZATION_TYPE_Industry__type_1200000
ORGANIZATION_TYPE_Mobile00000
ORGANIZATION_TYPE_Trade__type_100000
ORGANIZATION_TYPE_Industry__type_500000
ORGANIZATION_TYPE_Industry__type_1000000
ORGANIZATION_TYPE_Legal_Services00000
ORGANIZATION_TYPE_Advertising00000
ORGANIZATION_TYPE_Trade__type_500000
ORGANIZATION_TYPE_Cleaning00000
ORGANIZATION_TYPE_Industry__type_1300000
ORGANIZATION_TYPE_Trade__type_400000
ORGANIZATION_TYPE_Telecom00000
ORGANIZATION_TYPE_Industry__type_800000
ORGANIZATION_TYPE_Realtor00000
ORGANIZATION_TYPE_Industry__type_600000
WALLSMATERIAL_MODE_Stone,_brick10000
WALLSMATERIAL_MODE_Block01000
WALLSMATERIAL_MODE_Panel00000
WALLSMATERIAL_MODE_Mixed00000
WALLSMATERIAL_MODE_Wooden00000
WALLSMATERIAL_MODE_Others00000
WALLSMATERIAL_MODE_Monolithic00000
NAME_FAMILY_STATUS_Single___not_married10101
NAME_FAMILY_STATUS_Married01000
NAME_FAMILY_STATUS_Civil_marriage00010
NAME_FAMILY_STATUS_Widow00000
NAME_FAMILY_STATUS_Separated00000
NAME_FAMILY_STATUS_Unknown00000
HOUSETYPE_MODE_block_of_flats11000
HOUSETYPE_MODE_terraced_house00000
HOUSETYPE_MODE_specific_housing00000
NAME_TYPE_SUITE_Unaccompanied10111
NAME_TYPE_SUITE_Family01000
NAME_TYPE_SUITE_Spouse,_partner00000
NAME_TYPE_SUITE_Children00000
NAME_TYPE_SUITE_Other_A00000
NAME_TYPE_SUITE_Other_B00000
NAME_TYPE_SUITE_Group_of_people00000
is_FLAG_OWN_CAR00100
is_NAME_CONTRACT_TYPE11011
is_FLAG_OWN_REALTY10111
is_CODE_GENDER10101
is_EMERGENCYSTATE_MODE00000
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "SK_ID_CURR 100002 100003 100004 100006 100007\n", "is_FLAG_EMP_PHONE 1 1 1 1 1 \n", "is_FLAG_WORK_PHONE 0 0 1 0 0 \n", "is_FLAG_PHONE 1 1 1 0 0 \n", "is_FLAG_EMAIL 0 0 0 0 0 \n", "is_REG_REGION_NOT_LIVE_REGION 0 0 0 0 0 \n", "is_REG_REGION_NOT_WORK_REGION 0 0 0 0 0 \n", "is_LIVE_REGION_NOT_WORK_REGION 0 0 0 0 0 \n", "is_REG_CITY_NOT_WORK_CITY 0 0 0 0 1 \n", "is_LIVE_CITY_NOT_WORK_CITY 0 0 0 0 1 \n", "is_FLAG_DOCUMENT_3 1 1 0 1 0 \n", "is_FLAG_DOCUMENT_5 0 0 0 0 0 \n", "is_FLAG_DOCUMENT_6 0 0 0 0 0 \n", "is_FLAG_DOCUMENT_8 0 0 0 0 1 \n", "is_FLAG_DOCUMENT_9 0 0 0 0 0 \n", "is_REGION_RATING_CLIENT 2 1 2 2 2 \n", "is_REGION_RATING_CLIENT_W_CITY 2 1 2 2 2 \n", "NAME_INCOME_TYPE_Working 1 0 1 1 1 \n", "NAME_INCOME_TYPE_State_servant 0 1 0 0 0 \n", "NAME_INCOME_TYPE_Commercial_associate 0 0 0 0 0 \n", "NAME_INCOME_TYPE_Pensioner 0 0 0 0 0 \n", "NAME_INCOME_TYPE_Unemployed 0 0 0 0 0 \n", "NAME_INCOME_TYPE_Student 0 0 0 0 0 \n", "NAME_INCOME_TYPE_Businessman 0 0 0 0 0 \n", "NAME_INCOME_TYPE_Maternity_leave 0 0 0 0 0 \n", "FONDKAPREMONT_MODE_reg_oper_account 1 1 0 0 0 \n", "FONDKAPREMONT_MODE_org_spec_account 0 0 0 0 0 \n", "FONDKAPREMONT_MODE_reg_oper_spec_account 0 0 0 0 0 \n", "FONDKAPREMONT_MODE_not_specified 0 0 0 0 0 \n", "NAME_HOUSING_TYPE_House___apartment 1 1 1 1 1 \n", "NAME_HOUSING_TYPE_Rented_apartment 0 0 0 0 0 \n", "NAME_HOUSING_TYPE_With_parents 0 0 0 0 0 \n", "NAME_HOUSING_TYPE_Municipal_apartment 0 0 0 0 0 \n", "NAME_HOUSING_TYPE_Office_apartment 0 0 0 0 0 \n", "NAME_HOUSING_TYPE_Co_op_apartment 0 0 0 0 0 \n", "NAME_EDUCATION_TYPE_Secondary___secondary_special 1 0 1 1 1 \n", "NAME_EDUCATION_TYPE_Higher_education 0 1 0 0 0 \n", "NAME_EDUCATION_TYPE_Incomplete_higher 0 0 0 0 0 \n", "NAME_EDUCATION_TYPE_Lower_secondary 0 0 0 0 0 \n", "NAME_EDUCATION_TYPE_Academic_degree 0 0 0 0 0 \n", "OCCUPATION_TYPE_Laborers 1 0 1 1 0 \n", "OCCUPATION_TYPE_Core_staff 0 1 0 0 1 \n", "OCCUPATION_TYPE_Accountants 0 0 0 0 0 \n", "OCCUPATION_TYPE_Managers 0 0 0 0 0 \n", "OCCUPATION_TYPE_Drivers 0 0 0 0 0 \n", "OCCUPATION_TYPE_Sales_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Cleaning_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Cooking_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Private_service_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Medicine_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Security_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_High_skill_tech_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Waiters_barmen_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_Low_skill_Laborers 0 0 0 0 0 \n", "OCCUPATION_TYPE_Realty_agents 0 0 0 0 0 \n", "OCCUPATION_TYPE_Secretaries 0 0 0 0 0 \n", "OCCUPATION_TYPE_IT_staff 0 0 0 0 0 \n", "OCCUPATION_TYPE_HR_staff 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Business_Entity_Type_3 1 0 0 1 0 \n", "ORGANIZATION_TYPE_School 0 1 0 0 0 \n", "ORGANIZATION_TYPE_Government 0 0 1 0 0 \n", "ORGANIZATION_TYPE_Religion 0 0 0 0 1 \n", "ORGANIZATION_TYPE_Other 0 0 0 0 0 \n", "ORGANIZATION_TYPE_XNA 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Electricity 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Medicine 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Business_Entity_Type_2 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Self_employed 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Transport__type_2 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Construction 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Housing 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Kindergarten 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_7 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_11 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Military 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Services 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Security_Ministries 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Transport__type_4 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_1 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Emergency 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Security 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_2 0 0 0 0 0 \n", "ORGANIZATION_TYPE_University 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Transport__type_3 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Police 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Business_Entity_Type_1 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Postal 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_4 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Agriculture 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Restaurant 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Culture 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Hotel 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_7 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_3 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_3 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Bank 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_9 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Insurance 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_6 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_2 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Transport__type_1 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_12 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Mobile 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_1 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_5 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_10 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Legal_Services 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Advertising 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_5 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Cleaning 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_13 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Trade__type_4 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Telecom 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_8 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Realtor 0 0 0 0 0 \n", "ORGANIZATION_TYPE_Industry__type_6 0 0 0 0 0 \n", "WALLSMATERIAL_MODE_Stone,_brick 1 0 0 0 0 \n", "WALLSMATERIAL_MODE_Block 0 1 0 0 0 \n", "WALLSMATERIAL_MODE_Panel 0 0 0 0 0 \n", "WALLSMATERIAL_MODE_Mixed 0 0 0 0 0 \n", "WALLSMATERIAL_MODE_Wooden 0 0 0 0 0 \n", "WALLSMATERIAL_MODE_Others 0 0 0 0 0 \n", "WALLSMATERIAL_MODE_Monolithic 0 0 0 0 0 \n", "NAME_FAMILY_STATUS_Single___not_married 1 0 1 0 1 \n", "NAME_FAMILY_STATUS_Married 0 1 0 0 0 \n", "NAME_FAMILY_STATUS_Civil_marriage 0 0 0 1 0 \n", "NAME_FAMILY_STATUS_Widow 0 0 0 0 0 \n", "NAME_FAMILY_STATUS_Separated 0 0 0 0 0 \n", "NAME_FAMILY_STATUS_Unknown 0 0 0 0 0 \n", "HOUSETYPE_MODE_block_of_flats 1 1 0 0 0 \n", "HOUSETYPE_MODE_terraced_house 0 0 0 0 0 \n", "HOUSETYPE_MODE_specific_housing 0 0 0 0 0 \n", "NAME_TYPE_SUITE_Unaccompanied 1 0 1 1 1 \n", "NAME_TYPE_SUITE_Family 0 1 0 0 0 \n", "NAME_TYPE_SUITE_Spouse,_partner 0 0 0 0 0 \n", "NAME_TYPE_SUITE_Children 0 0 0 0 0 \n", "NAME_TYPE_SUITE_Other_A 0 0 0 0 0 \n", "NAME_TYPE_SUITE_Other_B 0 0 0 0 0 \n", "NAME_TYPE_SUITE_Group_of_people 0 0 0 0 0 \n", "is_FLAG_OWN_CAR 0 0 1 0 0 \n", "is_NAME_CONTRACT_TYPE 1 1 0 1 1 \n", "is_FLAG_OWN_REALTY 1 0 1 1 1 \n", "is_CODE_GENDER 1 0 1 0 1 \n", "is_EMERGENCYSTATE_MODE 0 0 0 0 0 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2min 44s, sys: 4.28 s, total: 2min 48s\n", "Wall time: 19.7 s\n" ] } ], "source": [ "%%time\n", "def gen_binary_one_hot_feat(pdf_input):\n", "\n", " pdf_data = pdf_input.copy()\n", " select_features = []\n", " dict_feat = {\n", " \"binary_default\": {\n", " \"NAME_CONTRACT_TYPE\": ['Cash loans', 'Revolving loans'],\n", " \"CODE_GENDER\": ['M', 'F', 'XNA'],\n", " \"FLAG_OWN_CAR\": ['Y', 'N'],\n", " \"FLAG_OWN_REALTY\": ['Y', 'N'],\n", " \"EMERGENCYSTATE_MODE\": ['Yes', 'No'],\n", " },\n", " \"binary\": [\n", " \"FLAG_EMP_PHONE\",\n", " \"FLAG_WORK_PHONE\",\n", " \"FLAG_PHONE\",\n", " \"FLAG_EMAIL\",\n", " \"REG_REGION_NOT_LIVE_REGION\",\n", " \"REG_REGION_NOT_WORK_REGION\",\n", " \"LIVE_REGION_NOT_WORK_REGION\",\n", " \"REG_CITY_NOT_WORK_CITY\",\n", " \"LIVE_CITY_NOT_WORK_CITY\",\n", " \"FLAG_DOCUMENT_3\",\n", " \"FLAG_DOCUMENT_5\",\n", " \"FLAG_DOCUMENT_6\",\n", " \"FLAG_DOCUMENT_8\",\n", " \"FLAG_DOCUMENT_9\",\n", " \"REGION_RATING_CLIENT\",\n", " \"REGION_RATING_CLIENT_W_CITY\",\n", " ],\n", " \"onehot\": {\n", " \"NAME_TYPE_SUITE\": [\"Unaccompanied\", \"Family\", \"Spouse, partner\", \"Children\", \"Other_A\", \"Other_B\", \"Group of people\"],\n", " \"NAME_INCOME_TYPE\": [\"Working\", \"State servant\", \"Commercial associate\", \"Pensioner\", \"Unemployed\", \"Student\", \"Businessman\", \"Maternity leave\"],\n", " \"NAME_EDUCATION_TYPE\": [\"Secondary / secondary special\", \"Higher education\", \"Incomplete higher\", \"Lower secondary\", \"Academic degree\"],\n", " \"NAME_FAMILY_STATUS\": [\"Single / not married\", \"Married\", \"Civil marriage\", \"Widow\", \"Separated\", \"Unknown\"],\n", " \"NAME_HOUSING_TYPE\": [\"House / apartment\", \"Rented apartment\", \"With parents\", \"Municipal apartment\", \"Office apartment\", \"Co-op apartment\"],\n", " \"OCCUPATION_TYPE\": [\"Laborers\", \"Core staff\", \"Accountants\", \"Managers\", \"Drivers\", \"Sales staff\", \"Cleaning staff\", \"Cooking staff\", \"Private service staff\", \"Medicine staff\", \"Security staff\", \"High skill tech staff\", \"Waiters/barmen staff\", \"Low-skill Laborers\", \"Realty agents\", \"Secretaries\", \"IT staff\", \"HR staff\"],\n", " \"ORGANIZATION_TYPE\": [\"Business Entity Type 3\", \"School\", \"Government\", \"Religion\", \"Other\", \"XNA\", \"Electricity\", \"Medicine\", \"Business Entity Type 2\", \"Self-employed\", \"Transport: type 2\", \"Construction\", \"Housing\", \"Kindergarten\", \"Trade: type 7\", \"Industry: type 11\", \"Military\", \"Services\", \"Security Ministries\", \"Transport: type 4\", \"Industry: type 1\", \"Emergency\", \"Security\", \"Trade: type 2\", \"University\", \"Transport: type 3\", \"Police\", \"Business Entity Type 1\", \"Postal\", \"Industry: type 4\", \"Agriculture\", \"Restaurant\", \"Culture\", \"Hotel\", \"Industry: type 7\", \"Trade: type 3\", \"Industry: type 3\", \"Bank\", \"Industry: type 9\", \"Insurance\", \"Trade: type 6\", \"Industry: type 2\", \"Transport: type 1\", \"Industry: type 12\", \"Mobile\", \"Trade: type 1\", \"Industry: type 5\", \"Industry: type 10\", \"Legal Services\", \"Advertising\", \"Trade: type 5\", \"Cleaning\", \"Industry: type 13\", \"Trade: type 4\", \"Telecom\", \"Industry: type 8\", \"Realtor\", \"Industry: type 6\"],\n", " \"FONDKAPREMONT_MODE\": [\"reg oper account\", \"org spec account\", \"reg oper spec account\", \"not specified\"],\n", " \"HOUSETYPE_MODE\": [\"block of flats\", \"terraced house\", \"specific housing\"],\n", " \"WALLSMATERIAL_MODE\": [\"Stone, brick\", \"Block\", \"Panel\", \"Mixed\", \"Wooden\", \"Others\", \"Monolithic\"], \n", " }\n", " }\n", "\n", " for k in dict_feat:\n", " if k == \"binary_default\":\n", " for cname in dict_feat[k]:\n", " # get default value\n", " default_val = dict_feat[k][cname][0]\n", "\n", " # convert category to binary\n", " feat_name = \"is_\" + cname\n", " select_features.append(feat_name)\n", " pdf_data[feat_name] = pdf_data[cname].apply(lambda x: int(x == default_val))\n", " elif k == \"binary\":\n", " # rename only\n", " for cname in dict_feat[k]:\n", " feat_name = \"is_\" + cname\n", " select_features.append(feat_name)\n", " pdf_data[feat_name] = pdf_data[cname]\n", " elif k == \"onehot\":\n", " for cname in dict_feat[k]:\n", " ls_vals = dict_feat[k][cname]\n", " for val in ls_vals:\n", " try:\n", " new_name = \"{}_{}\".format(cname, val.replace(\" \", \"_\")\\\n", " .replace(\":\", \"_\")\\\n", " .replace(\"/\", \"_\")\\\n", " .replace(\"-\", \"_\"))\n", "\n", " select_features.append(new_name)\n", " pdf_data[new_name] = pdf_data[cname].apply(lambda x: int(x == val))\n", " except Exception as err:\n", " print(\"One hot for {}-{}. Error: {}\".format(cname, val, err)) \n", " \n", " return pdf_data[[\"SK_ID_CURR\"] + select_features]\n", "\n", "# for train feat\n", "pdf01_baseline = gen_binary_one_hot_feat(pdf_train)\n", "\n", "# for test feat\n", "pdf02_baseline = gen_binary_one_hot_feat(pdf_test)\n", "\n", "# print results\n", "print(pdf01_baseline.shape, pdf02_baseline.shape)\n", "display(pdf01_baseline.head().T)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameauccorrcoverage
16NAME_INCOME_TYPE_Working0.55410.05911.0
15is_REGION_RATING_CLIENT_W_CITY0.54950.06151.0
14is_REGION_RATING_CLIENT0.54810.05921.0
141is_CODE_GENDER0.54660.05361.0
35NAME_EDUCATION_TYPE_Higher_education0.5440-0.05601.0
34NAME_EDUCATION_TYPE_Secondary___secondary_special0.54060.04881.0
7is_REG_CITY_NOT_WORK_CITY0.53950.05121.0
9is_FLAG_DOCUMENT_30.53790.04561.0
128HOUSETYPE_MODE_block_of_flats0.5379-0.04141.0
19NAME_INCOME_TYPE_Pensioner0.5324-0.04611.0
62ORGANIZATION_TYPE_XNA0.5322-0.04581.0
0is_FLAG_EMP_PHONE0.53220.04581.0
39OCCUPATION_TYPE_Laborers0.52950.04201.0
117WALLSMATERIAL_MODE_Panel0.5246-0.03271.0
8is_LIVE_CITY_NOT_WORK_CITY0.52390.03401.0
123NAME_FAMILY_STATUS_Married0.5220-0.02511.0
1is_FLAG_WORK_PHONE0.52080.02841.0
2is_FLAG_PHONE0.5192-0.02341.0
138is_FLAG_OWN_CAR0.5192-0.02211.0
66ORGANIZATION_TYPE_Self_employed0.51810.03001.0
57ORGANIZATION_TYPE_Business_Entity_Type_30.51800.02361.0
122NAME_FAMILY_STATUS_Single___not_married0.51770.02731.0
24FONDKAPREMONT_MODE_reg_oper_account0.5177-0.02261.0
139is_NAME_CONTRACT_TYPE0.51640.03041.0
28NAME_HOUSING_TYPE_House___apartment0.5162-0.02811.0
11is_FLAG_DOCUMENT_60.5154-0.02961.0
43OCCUPATION_TYPE_Drivers0.51290.02961.0
124NAME_FAMILY_STATUS_Civil_marriage0.51280.02381.0
30NAME_HOUSING_TYPE_With_parents0.51200.03061.0
17NAME_INCOME_TYPE_State_servant0.5115-0.02451.0
40OCCUPATION_TYPE_Core_staff0.5112-0.02141.0
44OCCUPATION_TYPE_Sales_staff0.51100.01971.0
18NAME_INCOME_TYPE_Commercial_associate0.5103-0.01331.0
115WALLSMATERIAL_MODE_Stone,_brick0.5089-0.01191.0
125NAME_FAMILY_STATUS_Widow0.5082-0.02001.0
42OCCUPATION_TYPE_Managers0.5081-0.01731.0
41OCCUPATION_TYPE_Accountants0.5068-0.02111.0
140is_FLAG_OWN_REALTY0.5062-0.00741.0
68ORGANIZATION_TYPE_Construction0.50560.02071.0
131NAME_TYPE_SUITE_Unaccompanied0.50540.00761.0
12is_FLAG_DOCUMENT_80.5051-0.01011.0
132NAME_TYPE_SUITE_Family0.5047-0.00771.0
50OCCUPATION_TYPE_High_skill_tech_staff0.5045-0.01301.0
58ORGANIZATION_TYPE_School0.5045-0.01461.0
29NAME_HOUSING_TYPE_Rented_apartment0.50450.01961.0
52OCCUPATION_TYPE_Low_skill_Laborers0.50440.02971.0
49OCCUPATION_TYPE_Security_staff0.50390.01461.0
26FONDKAPREMONT_MODE_reg_oper_spec_account0.5038-0.01071.0
64ORGANIZATION_TYPE_Medicine0.5032-0.00921.0
25FONDKAPREMONT_MODE_org_spec_account0.5031-0.01261.0
116WALLSMATERIAL_MODE_Block0.5029-0.00941.0
46OCCUPATION_TYPE_Cooking_staff0.50290.01151.0
5is_REG_REGION_NOT_WORK_REGION0.50250.00631.0
37NAME_EDUCATION_TYPE_Lower_secondary0.50250.01231.0
59ORGANIZATION_TYPE_Government0.5024-0.00731.0
71ORGANIZATION_TYPE_Trade__type_70.50220.00771.0
48OCCUPATION_TYPE_Medicine_staff0.5020-0.00681.0
73ORGANIZATION_TYPE_Military0.5019-0.01101.0
93ORGANIZATION_TYPE_Industry__type_30.50180.00961.0
94ORGANIZATION_TYPE_Bank0.5018-0.01081.0
82ORGANIZATION_TYPE_Transport__type_30.50180.01561.0
83ORGANIZATION_TYPE_Police0.5017-0.01071.0
88ORGANIZATION_TYPE_Restaurant0.50170.01211.0
70ORGANIZATION_TYPE_Kindergarten0.5017-0.00631.0
79ORGANIZATION_TYPE_Security0.50160.00881.0
87ORGANIZATION_TYPE_Agriculture0.50160.01001.0
45OCCUPATION_TYPE_Cleaning_staff0.50160.00701.0
119WALLSMATERIAL_MODE_Wooden0.50160.00651.0
75ORGANIZATION_TYPE_Security_Ministries0.5014-0.00951.0
92ORGANIZATION_TYPE_Trade__type_30.50140.00711.0
65ORGANIZATION_TYPE_Business_Entity_Type_20.50130.00411.0
61ORGANIZATION_TYPE_Other0.5013-0.00321.0
4is_REG_REGION_NOT_LIVE_REGION0.50130.00571.0
36NAME_EDUCATION_TYPE_Incomplete_higher0.50120.00371.0
121WALLSMATERIAL_MODE_Monolithic0.5012-0.00841.0
76ORGANIZATION_TYPE_Transport__type_40.50120.00481.0
51OCCUPATION_TYPE_Waiters_barmen_staff0.50110.00891.0
6is_LIVE_REGION_NOT_WORK_REGION0.50100.00271.0
81ORGANIZATION_TYPE_University0.5009-0.00761.0
142is_EMERGENCYSTATE_MODE0.50080.00521.0
95ORGANIZATION_TYPE_Industry__type_90.5008-0.00431.0
32NAME_HOUSING_TYPE_Office_apartment0.5008-0.00481.0
27FONDKAPREMONT_MODE_not_specified0.5008-0.00311.0
47OCCUPATION_TYPE_Private_service_staff0.5008-0.00451.0
133NAME_TYPE_SUITE_Spouse,_partner0.5007-0.00191.0
31NAME_HOUSING_TYPE_Municipal_apartment0.50070.00191.0
13is_FLAG_DOCUMENT_90.5006-0.00541.0
130HOUSETYPE_MODE_specific_housing0.50060.00481.0
72ORGANIZATION_TYPE_Industry__type_110.50060.00361.0
3is_FLAG_EMAIL0.5006-0.00141.0
86ORGANIZATION_TYPE_Industry__type_40.50060.00601.0
74ORGANIZATION_TYPE_Services0.5005-0.00401.0
80ORGANIZATION_TYPE_Trade__type_20.5005-0.00351.0
77ORGANIZATION_TYPE_Industry__type_10.50050.00461.0
97ORGANIZATION_TYPE_Trade__type_60.5004-0.00531.0
63ORGANIZATION_TYPE_Electricity0.5004-0.00411.0
96ORGANIZATION_TYPE_Insurance0.5004-0.00491.0
134NAME_TYPE_SUITE_Children0.5004-0.00191.0
126NAME_FAMILY_STATUS_Separated0.5004-0.00081.0
129HOUSETYPE_MODE_terraced_house0.50030.00301.0
100ORGANIZATION_TYPE_Industry__type_120.5003-0.00541.0
67ORGANIZATION_TYPE_Transport__type_20.5003-0.00201.0
10is_FLAG_DOCUMENT_50.50030.00141.0
118WALLSMATERIAL_MODE_Mixed0.5003-0.00191.0
113ORGANIZATION_TYPE_Realtor0.50030.00441.0
38NAME_EDUCATION_TYPE_Academic_degree0.5003-0.00621.0
56OCCUPATION_TYPE_HR_staff0.5002-0.00321.0
89ORGANIZATION_TYPE_Culture0.5002-0.00401.0
90ORGANIZATION_TYPE_Hotel0.5002-0.00231.0
69ORGANIZATION_TYPE_Housing0.5002-0.00131.0
55OCCUPATION_TYPE_IT_staff0.5002-0.00291.0
135NAME_TYPE_SUITE_Other_A0.50020.00211.0
78ORGANIZATION_TYPE_Emergency0.5002-0.00251.0
108ORGANIZATION_TYPE_Cleaning0.50020.00361.0
91ORGANIZATION_TYPE_Industry__type_70.5002-0.00161.0
20NAME_INCOME_TYPE_Unemployed0.50020.01171.0
85ORGANIZATION_TYPE_Postal0.5002-0.00111.0
99ORGANIZATION_TYPE_Transport__type_10.5002-0.00361.0
84ORGANIZATION_TYPE_Business_Entity_Type_10.50020.00061.0
98ORGANIZATION_TYPE_Industry__type_20.5002-0.00221.0
60ORGANIZATION_TYPE_Religion0.5001-0.00471.0
102ORGANIZATION_TYPE_Trade__type_10.50010.00221.0
103ORGANIZATION_TYPE_Industry__type_50.5001-0.00171.0
136NAME_TYPE_SUITE_Other_B0.50010.00081.0
101ORGANIZATION_TYPE_Mobile0.50010.00191.0
106ORGANIZATION_TYPE_Advertising0.5001-0.00161.0
54OCCUPATION_TYPE_Secretaries0.5001-0.00081.0
33NAME_HOUSING_TYPE_Co_op_apartment0.5001-0.00071.0
114ORGANIZATION_TYPE_Industry__type_60.5001-0.00221.0
120WALLSMATERIAL_MODE_Others0.5001-0.00061.0
105ORGANIZATION_TYPE_Legal_Services0.5001-0.00091.0
104ORGANIZATION_TYPE_Industry__type_100.5000-0.00141.0
110ORGANIZATION_TYPE_Trade__type_40.5000-0.00191.0
112ORGANIZATION_TYPE_Industry__type_80.50000.00261.0
53OCCUPATION_TYPE_Realty_agents0.50000.00041.0
109ORGANIZATION_TYPE_Industry__type_130.50000.00091.0
23NAME_INCOME_TYPE_Maternity_leave0.50000.00271.0
137NAME_TYPE_SUITE_Group_of_people0.50000.00041.0
111ORGANIZATION_TYPE_Telecom0.50000.00031.0
22NAME_INCOME_TYPE_Businessman0.5000-0.00181.0
21NAME_INCOME_TYPE_Student0.5000-0.00181.0
107ORGANIZATION_TYPE_Trade__type_50.5000-0.00081.0
127NAME_FAMILY_STATUS_Unknown0.5000-0.00061.0
\n", "
" ], "text/plain": [ " name auc corr coverage\n", "16 NAME_INCOME_TYPE_Working 0.5541 0.0591 1.0 \n", "15 is_REGION_RATING_CLIENT_W_CITY 0.5495 0.0615 1.0 \n", "14 is_REGION_RATING_CLIENT 0.5481 0.0592 1.0 \n", "141 is_CODE_GENDER 0.5466 0.0536 1.0 \n", "35 NAME_EDUCATION_TYPE_Higher_education 0.5440 -0.0560 1.0 \n", "34 NAME_EDUCATION_TYPE_Secondary___secondary_special 0.5406 0.0488 1.0 \n", "7 is_REG_CITY_NOT_WORK_CITY 0.5395 0.0512 1.0 \n", "9 is_FLAG_DOCUMENT_3 0.5379 0.0456 1.0 \n", "128 HOUSETYPE_MODE_block_of_flats 0.5379 -0.0414 1.0 \n", "19 NAME_INCOME_TYPE_Pensioner 0.5324 -0.0461 1.0 \n", "62 ORGANIZATION_TYPE_XNA 0.5322 -0.0458 1.0 \n", "0 is_FLAG_EMP_PHONE 0.5322 0.0458 1.0 \n", "39 OCCUPATION_TYPE_Laborers 0.5295 0.0420 1.0 \n", "117 WALLSMATERIAL_MODE_Panel 0.5246 -0.0327 1.0 \n", "8 is_LIVE_CITY_NOT_WORK_CITY 0.5239 0.0340 1.0 \n", "123 NAME_FAMILY_STATUS_Married 0.5220 -0.0251 1.0 \n", "1 is_FLAG_WORK_PHONE 0.5208 0.0284 1.0 \n", "2 is_FLAG_PHONE 0.5192 -0.0234 1.0 \n", "138 is_FLAG_OWN_CAR 0.5192 -0.0221 1.0 \n", "66 ORGANIZATION_TYPE_Self_employed 0.5181 0.0300 1.0 \n", "57 ORGANIZATION_TYPE_Business_Entity_Type_3 0.5180 0.0236 1.0 \n", "122 NAME_FAMILY_STATUS_Single___not_married 0.5177 0.0273 1.0 \n", "24 FONDKAPREMONT_MODE_reg_oper_account 0.5177 -0.0226 1.0 \n", "139 is_NAME_CONTRACT_TYPE 0.5164 0.0304 1.0 \n", "28 NAME_HOUSING_TYPE_House___apartment 0.5162 -0.0281 1.0 \n", "11 is_FLAG_DOCUMENT_6 0.5154 -0.0296 1.0 \n", "43 OCCUPATION_TYPE_Drivers 0.5129 0.0296 1.0 \n", "124 NAME_FAMILY_STATUS_Civil_marriage 0.5128 0.0238 1.0 \n", "30 NAME_HOUSING_TYPE_With_parents 0.5120 0.0306 1.0 \n", "17 NAME_INCOME_TYPE_State_servant 0.5115 -0.0245 1.0 \n", "40 OCCUPATION_TYPE_Core_staff 0.5112 -0.0214 1.0 \n", "44 OCCUPATION_TYPE_Sales_staff 0.5110 0.0197 1.0 \n", "18 NAME_INCOME_TYPE_Commercial_associate 0.5103 -0.0133 1.0 \n", "115 WALLSMATERIAL_MODE_Stone,_brick 0.5089 -0.0119 1.0 \n", "125 NAME_FAMILY_STATUS_Widow 0.5082 -0.0200 1.0 \n", "42 OCCUPATION_TYPE_Managers 0.5081 -0.0173 1.0 \n", "41 OCCUPATION_TYPE_Accountants 0.5068 -0.0211 1.0 \n", "140 is_FLAG_OWN_REALTY 0.5062 -0.0074 1.0 \n", "68 ORGANIZATION_TYPE_Construction 0.5056 0.0207 1.0 \n", "131 NAME_TYPE_SUITE_Unaccompanied 0.5054 0.0076 1.0 \n", "12 is_FLAG_DOCUMENT_8 0.5051 -0.0101 1.0 \n", "132 NAME_TYPE_SUITE_Family 0.5047 -0.0077 1.0 \n", "50 OCCUPATION_TYPE_High_skill_tech_staff 0.5045 -0.0130 1.0 \n", "58 ORGANIZATION_TYPE_School 0.5045 -0.0146 1.0 \n", "29 NAME_HOUSING_TYPE_Rented_apartment 0.5045 0.0196 1.0 \n", "52 OCCUPATION_TYPE_Low_skill_Laborers 0.5044 0.0297 1.0 \n", "49 OCCUPATION_TYPE_Security_staff 0.5039 0.0146 1.0 \n", "26 FONDKAPREMONT_MODE_reg_oper_spec_account 0.5038 -0.0107 1.0 \n", "64 ORGANIZATION_TYPE_Medicine 0.5032 -0.0092 1.0 \n", "25 FONDKAPREMONT_MODE_org_spec_account 0.5031 -0.0126 1.0 \n", "116 WALLSMATERIAL_MODE_Block 0.5029 -0.0094 1.0 \n", "46 OCCUPATION_TYPE_Cooking_staff 0.5029 0.0115 1.0 \n", "5 is_REG_REGION_NOT_WORK_REGION 0.5025 0.0063 1.0 \n", "37 NAME_EDUCATION_TYPE_Lower_secondary 0.5025 0.0123 1.0 \n", "59 ORGANIZATION_TYPE_Government 0.5024 -0.0073 1.0 \n", "71 ORGANIZATION_TYPE_Trade__type_7 0.5022 0.0077 1.0 \n", "48 OCCUPATION_TYPE_Medicine_staff 0.5020 -0.0068 1.0 \n", "73 ORGANIZATION_TYPE_Military 0.5019 -0.0110 1.0 \n", "93 ORGANIZATION_TYPE_Industry__type_3 0.5018 0.0096 1.0 \n", "94 ORGANIZATION_TYPE_Bank 0.5018 -0.0108 1.0 \n", "82 ORGANIZATION_TYPE_Transport__type_3 0.5018 0.0156 1.0 \n", "83 ORGANIZATION_TYPE_Police 0.5017 -0.0107 1.0 \n", "88 ORGANIZATION_TYPE_Restaurant 0.5017 0.0121 1.0 \n", "70 ORGANIZATION_TYPE_Kindergarten 0.5017 -0.0063 1.0 \n", "79 ORGANIZATION_TYPE_Security 0.5016 0.0088 1.0 \n", "87 ORGANIZATION_TYPE_Agriculture 0.5016 0.0100 1.0 \n", "45 OCCUPATION_TYPE_Cleaning_staff 0.5016 0.0070 1.0 \n", "119 WALLSMATERIAL_MODE_Wooden 0.5016 0.0065 1.0 \n", "75 ORGANIZATION_TYPE_Security_Ministries 0.5014 -0.0095 1.0 \n", "92 ORGANIZATION_TYPE_Trade__type_3 0.5014 0.0071 1.0 \n", "65 ORGANIZATION_TYPE_Business_Entity_Type_2 0.5013 0.0041 1.0 \n", "61 ORGANIZATION_TYPE_Other 0.5013 -0.0032 1.0 \n", "4 is_REG_REGION_NOT_LIVE_REGION 0.5013 0.0057 1.0 \n", "36 NAME_EDUCATION_TYPE_Incomplete_higher 0.5012 0.0037 1.0 \n", "121 WALLSMATERIAL_MODE_Monolithic 0.5012 -0.0084 1.0 \n", "76 ORGANIZATION_TYPE_Transport__type_4 0.5012 0.0048 1.0 \n", "51 OCCUPATION_TYPE_Waiters_barmen_staff 0.5011 0.0089 1.0 \n", "6 is_LIVE_REGION_NOT_WORK_REGION 0.5010 0.0027 1.0 \n", "81 ORGANIZATION_TYPE_University 0.5009 -0.0076 1.0 \n", "142 is_EMERGENCYSTATE_MODE 0.5008 0.0052 1.0 \n", "95 ORGANIZATION_TYPE_Industry__type_9 0.5008 -0.0043 1.0 \n", "32 NAME_HOUSING_TYPE_Office_apartment 0.5008 -0.0048 1.0 \n", "27 FONDKAPREMONT_MODE_not_specified 0.5008 -0.0031 1.0 \n", "47 OCCUPATION_TYPE_Private_service_staff 0.5008 -0.0045 1.0 \n", "133 NAME_TYPE_SUITE_Spouse,_partner 0.5007 -0.0019 1.0 \n", "31 NAME_HOUSING_TYPE_Municipal_apartment 0.5007 0.0019 1.0 \n", "13 is_FLAG_DOCUMENT_9 0.5006 -0.0054 1.0 \n", "130 HOUSETYPE_MODE_specific_housing 0.5006 0.0048 1.0 \n", "72 ORGANIZATION_TYPE_Industry__type_11 0.5006 0.0036 1.0 \n", "3 is_FLAG_EMAIL 0.5006 -0.0014 1.0 \n", "86 ORGANIZATION_TYPE_Industry__type_4 0.5006 0.0060 1.0 \n", "74 ORGANIZATION_TYPE_Services 0.5005 -0.0040 1.0 \n", "80 ORGANIZATION_TYPE_Trade__type_2 0.5005 -0.0035 1.0 \n", "77 ORGANIZATION_TYPE_Industry__type_1 0.5005 0.0046 1.0 \n", "97 ORGANIZATION_TYPE_Trade__type_6 0.5004 -0.0053 1.0 \n", "63 ORGANIZATION_TYPE_Electricity 0.5004 -0.0041 1.0 \n", "96 ORGANIZATION_TYPE_Insurance 0.5004 -0.0049 1.0 \n", "134 NAME_TYPE_SUITE_Children 0.5004 -0.0019 1.0 \n", "126 NAME_FAMILY_STATUS_Separated 0.5004 -0.0008 1.0 \n", "129 HOUSETYPE_MODE_terraced_house 0.5003 0.0030 1.0 \n", "100 ORGANIZATION_TYPE_Industry__type_12 0.5003 -0.0054 1.0 \n", "67 ORGANIZATION_TYPE_Transport__type_2 0.5003 -0.0020 1.0 \n", "10 is_FLAG_DOCUMENT_5 0.5003 0.0014 1.0 \n", "118 WALLSMATERIAL_MODE_Mixed 0.5003 -0.0019 1.0 \n", "113 ORGANIZATION_TYPE_Realtor 0.5003 0.0044 1.0 \n", "38 NAME_EDUCATION_TYPE_Academic_degree 0.5003 -0.0062 1.0 \n", "56 OCCUPATION_TYPE_HR_staff 0.5002 -0.0032 1.0 \n", "89 ORGANIZATION_TYPE_Culture 0.5002 -0.0040 1.0 \n", "90 ORGANIZATION_TYPE_Hotel 0.5002 -0.0023 1.0 \n", "69 ORGANIZATION_TYPE_Housing 0.5002 -0.0013 1.0 \n", "55 OCCUPATION_TYPE_IT_staff 0.5002 -0.0029 1.0 \n", "135 NAME_TYPE_SUITE_Other_A 0.5002 0.0021 1.0 \n", "78 ORGANIZATION_TYPE_Emergency 0.5002 -0.0025 1.0 \n", "108 ORGANIZATION_TYPE_Cleaning 0.5002 0.0036 1.0 \n", "91 ORGANIZATION_TYPE_Industry__type_7 0.5002 -0.0016 1.0 \n", "20 NAME_INCOME_TYPE_Unemployed 0.5002 0.0117 1.0 \n", "85 ORGANIZATION_TYPE_Postal 0.5002 -0.0011 1.0 \n", "99 ORGANIZATION_TYPE_Transport__type_1 0.5002 -0.0036 1.0 \n", "84 ORGANIZATION_TYPE_Business_Entity_Type_1 0.5002 0.0006 1.0 \n", "98 ORGANIZATION_TYPE_Industry__type_2 0.5002 -0.0022 1.0 \n", "60 ORGANIZATION_TYPE_Religion 0.5001 -0.0047 1.0 \n", "102 ORGANIZATION_TYPE_Trade__type_1 0.5001 0.0022 1.0 \n", "103 ORGANIZATION_TYPE_Industry__type_5 0.5001 -0.0017 1.0 \n", "136 NAME_TYPE_SUITE_Other_B 0.5001 0.0008 1.0 \n", "101 ORGANIZATION_TYPE_Mobile 0.5001 0.0019 1.0 \n", "106 ORGANIZATION_TYPE_Advertising 0.5001 -0.0016 1.0 \n", "54 OCCUPATION_TYPE_Secretaries 0.5001 -0.0008 1.0 \n", "33 NAME_HOUSING_TYPE_Co_op_apartment 0.5001 -0.0007 1.0 \n", "114 ORGANIZATION_TYPE_Industry__type_6 0.5001 -0.0022 1.0 \n", "120 WALLSMATERIAL_MODE_Others 0.5001 -0.0006 1.0 \n", "105 ORGANIZATION_TYPE_Legal_Services 0.5001 -0.0009 1.0 \n", "104 ORGANIZATION_TYPE_Industry__type_10 0.5000 -0.0014 1.0 \n", "110 ORGANIZATION_TYPE_Trade__type_4 0.5000 -0.0019 1.0 \n", "112 ORGANIZATION_TYPE_Industry__type_8 0.5000 0.0026 1.0 \n", "53 OCCUPATION_TYPE_Realty_agents 0.5000 0.0004 1.0 \n", "109 ORGANIZATION_TYPE_Industry__type_13 0.5000 0.0009 1.0 \n", "23 NAME_INCOME_TYPE_Maternity_leave 0.5000 0.0027 1.0 \n", "137 NAME_TYPE_SUITE_Group_of_people 0.5000 0.0004 1.0 \n", "111 ORGANIZATION_TYPE_Telecom 0.5000 0.0003 1.0 \n", "22 NAME_INCOME_TYPE_Businessman 0.5000 -0.0018 1.0 \n", "21 NAME_INCOME_TYPE_Student 0.5000 -0.0018 1.0 \n", "107 ORGANIZATION_TYPE_Trade__type_5 0.5000 -0.0008 1.0 \n", "127 NAME_FAMILY_STATUS_Unknown 0.5000 -0.0006 1.0 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "eval_agg01 = feature_evaluate(pdf_train_filtered, pdf01_baseline)\n", "display(eval_agg01)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(66, 4)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eval_agg01.query(\"auc <= 0.501\").shape" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(307511, 78)\n", "(48744, 78)\n" ] } ], "source": [ "sel_feat = eval_agg01.query(\"auc > 0.501\")[\"name\"].tolist()\n", "\n", "# for train\n", "pdf01_baseline = pdf01_baseline[[\"SK_ID_CURR\"] + sel_feat]\n", "print(pdf01_baseline.shape)\n", "\n", "# for test\n", "pdf02_baseline = pdf02_baseline[[\"SK_ID_CURR\"] + sel_feat]\n", "print(pdf02_baseline.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# keep columns" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "68\n" ] } ], "source": [ "# Due to add more columns for \"Percent credit and income\" so we cannot use info from meta data\n", "# ls_continuous_name = pdf_meta[pdf_meta[\"sub_type\"] == \"float64\"][\"name\"].tolist()\n", "s_dtype = pdf_train.dtypes\n", "ls_continuous_name = s_dtype[s_dtype == \"float64\"].index.tolist()\n", "print(len(ls_continuous_name))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "((307511, 69), (48744, 69))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
SK_ID_CURR100002.00001.0000e+05100004.0000100006.0000100007.0000
AMT_INCOME_TOTAL202500.00002.7000e+0567500.0000135000.0000121500.0000
AMT_CREDIT406597.50001.2935e+06135000.0000312682.5000513000.0000
AMT_ANNUITY24700.50003.5698e+046750.000029686.500021865.5000
AMT_GOODS_PRICE351000.00001.1295e+06135000.0000297000.0000513000.0000
REGION_POPULATION_RELATIVE0.01883.5410e-030.01000.00800.0287
DAYS_REGISTRATION-3648.0000-1.1860e+03-4260.0000-9833.0000-4311.0000
OWN_CAR_AGE0.00000.0000e+0026.00000.00000.0000
CNT_FAM_MEMBERS1.00002.0000e+001.00002.00001.0000
EXT_SOURCE_10.08303.1127e-01NaNNaNNaN
EXT_SOURCE_20.26296.2225e-010.55590.65040.3227
EXT_SOURCE_30.1394NaN0.7296NaNNaN
APARTMENTS_AVG0.02479.5900e-02NaNNaNNaN
BASEMENTAREA_AVG0.03695.2900e-02NaNNaNNaN
YEARS_BEGINEXPLUATATION_AVG0.97229.8510e-01NaNNaNNaN
YEARS_BUILD_AVG0.61927.9600e-01NaNNaNNaN
COMMONAREA_AVG0.01436.0500e-02NaNNaNNaN
ELEVATORS_AVG0.00008.0000e-02NaNNaNNaN
ENTRANCES_AVG0.06903.4500e-02NaNNaNNaN
FLOORSMAX_AVG0.08332.9170e-01NaNNaNNaN
FLOORSMIN_AVG0.12503.3330e-01NaNNaNNaN
LANDAREA_AVG0.03691.3000e-02NaNNaNNaN
LIVINGAPARTMENTS_AVG0.02027.7300e-02NaNNaNNaN
LIVINGAREA_AVG0.01905.4900e-02NaNNaNNaN
NONLIVINGAPARTMENTS_AVG0.00003.9000e-03NaNNaNNaN
NONLIVINGAREA_AVG0.00009.8000e-03NaNNaNNaN
APARTMENTS_MODE0.02529.2400e-02NaNNaNNaN
BASEMENTAREA_MODE0.03835.3800e-02NaNNaNNaN
YEARS_BEGINEXPLUATATION_MODE0.97229.8510e-01NaNNaNNaN
YEARS_BUILD_MODE0.63418.0400e-01NaNNaNNaN
COMMONAREA_MODE0.01444.9700e-02NaNNaNNaN
ELEVATORS_MODE0.00008.0600e-02NaNNaNNaN
ENTRANCES_MODE0.06903.4500e-02NaNNaNNaN
FLOORSMAX_MODE0.08332.9170e-01NaNNaNNaN
FLOORSMIN_MODE0.12503.3330e-01NaNNaNNaN
LANDAREA_MODE0.03771.2800e-02NaNNaNNaN
LIVINGAPARTMENTS_MODE0.02207.9000e-02NaNNaNNaN
LIVINGAREA_MODE0.01985.5400e-02NaNNaNNaN
NONLIVINGAPARTMENTS_MODE0.00000.0000e+00NaNNaNNaN
NONLIVINGAREA_MODE0.00000.0000e+00NaNNaNNaN
APARTMENTS_MEDI0.02509.6800e-02NaNNaNNaN
BASEMENTAREA_MEDI0.03695.2900e-02NaNNaNNaN
YEARS_BEGINEXPLUATATION_MEDI0.97229.8510e-01NaNNaNNaN
YEARS_BUILD_MEDI0.62437.9870e-01NaNNaNNaN
COMMONAREA_MEDI0.01446.0800e-02NaNNaNNaN
ELEVATORS_MEDI0.00008.0000e-02NaNNaNNaN
ENTRANCES_MEDI0.06903.4500e-02NaNNaNNaN
FLOORSMAX_MEDI0.08332.9170e-01NaNNaNNaN
FLOORSMIN_MEDI0.12503.3330e-01NaNNaNNaN
LANDAREA_MEDI0.03751.3200e-02NaNNaNNaN
LIVINGAPARTMENTS_MEDI0.02057.8700e-02NaNNaNNaN
LIVINGAREA_MEDI0.01935.5800e-02NaNNaNNaN
NONLIVINGAPARTMENTS_MEDI0.00003.9000e-03NaNNaNNaN
NONLIVINGAREA_MEDI0.00001.0000e-02NaNNaNNaN
TOTALAREA_MODE0.01497.1400e-02NaNNaNNaN
OBS_30_CNT_SOCIAL_CIRCLE2.00001.0000e+000.00002.00000.0000
DEF_30_CNT_SOCIAL_CIRCLE2.00000.0000e+000.00000.00000.0000
OBS_60_CNT_SOCIAL_CIRCLE2.00001.0000e+000.00002.00000.0000
DEF_60_CNT_SOCIAL_CIRCLE2.00000.0000e+000.00000.00000.0000
DAYS_LAST_PHONE_CHANGE-1134.0000-8.2800e+02-815.0000-617.0000-1106.0000
AMT_REQ_CREDIT_BUREAU_HOUR0.00000.0000e+000.0000NaN0.0000
AMT_REQ_CREDIT_BUREAU_DAY0.00000.0000e+000.0000NaN0.0000
AMT_REQ_CREDIT_BUREAU_WEEK0.00000.0000e+000.0000NaN0.0000
AMT_REQ_CREDIT_BUREAU_MON0.00000.0000e+000.0000NaN0.0000
AMT_REQ_CREDIT_BUREAU_QRT0.00000.0000e+000.0000NaN0.0000
AMT_REQ_CREDIT_BUREAU_YEAR1.00000.0000e+000.0000NaN0.0000
CREDIT_INCOME_PERCENT2.00794.7908e+002.00002.31624.2222
ANNUITY_INCOME_PERCENT0.12201.3222e-010.10000.21990.1800
CREDIT_TERM0.06072.7598e-020.05000.09490.0426
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "SK_ID_CURR 100002.0000 1.0000e+05 100004.0000 100006.0000 100007.0000\n", "AMT_INCOME_TOTAL 202500.0000 2.7000e+05 67500.0000 135000.0000 121500.0000\n", "AMT_CREDIT 406597.5000 1.2935e+06 135000.0000 312682.5000 513000.0000\n", "AMT_ANNUITY 24700.5000 3.5698e+04 6750.0000 29686.5000 21865.5000 \n", "AMT_GOODS_PRICE 351000.0000 1.1295e+06 135000.0000 297000.0000 513000.0000\n", "REGION_POPULATION_RELATIVE 0.0188 3.5410e-03 0.0100 0.0080 0.0287 \n", "DAYS_REGISTRATION -3648.0000 -1.1860e+03 -4260.0000 -9833.0000 -4311.0000 \n", "OWN_CAR_AGE 0.0000 0.0000e+00 26.0000 0.0000 0.0000 \n", "CNT_FAM_MEMBERS 1.0000 2.0000e+00 1.0000 2.0000 1.0000 \n", "EXT_SOURCE_1 0.0830 3.1127e-01 NaN NaN NaN \n", "EXT_SOURCE_2 0.2629 6.2225e-01 0.5559 0.6504 0.3227 \n", "EXT_SOURCE_3 0.1394 NaN 0.7296 NaN NaN \n", "APARTMENTS_AVG 0.0247 9.5900e-02 NaN NaN NaN \n", "BASEMENTAREA_AVG 0.0369 5.2900e-02 NaN NaN NaN \n", "YEARS_BEGINEXPLUATATION_AVG 0.9722 9.8510e-01 NaN NaN NaN \n", "YEARS_BUILD_AVG 0.6192 7.9600e-01 NaN NaN NaN \n", "COMMONAREA_AVG 0.0143 6.0500e-02 NaN NaN NaN \n", "ELEVATORS_AVG 0.0000 8.0000e-02 NaN NaN NaN \n", "ENTRANCES_AVG 0.0690 3.4500e-02 NaN NaN NaN \n", "FLOORSMAX_AVG 0.0833 2.9170e-01 NaN NaN NaN \n", "FLOORSMIN_AVG 0.1250 3.3330e-01 NaN NaN NaN \n", "LANDAREA_AVG 0.0369 1.3000e-02 NaN NaN NaN \n", "LIVINGAPARTMENTS_AVG 0.0202 7.7300e-02 NaN NaN NaN \n", "LIVINGAREA_AVG 0.0190 5.4900e-02 NaN NaN NaN \n", "NONLIVINGAPARTMENTS_AVG 0.0000 3.9000e-03 NaN NaN NaN \n", "NONLIVINGAREA_AVG 0.0000 9.8000e-03 NaN NaN NaN \n", "APARTMENTS_MODE 0.0252 9.2400e-02 NaN NaN NaN \n", "BASEMENTAREA_MODE 0.0383 5.3800e-02 NaN NaN NaN \n", "YEARS_BEGINEXPLUATATION_MODE 0.9722 9.8510e-01 NaN NaN NaN \n", "YEARS_BUILD_MODE 0.6341 8.0400e-01 NaN NaN NaN \n", "COMMONAREA_MODE 0.0144 4.9700e-02 NaN NaN NaN \n", "ELEVATORS_MODE 0.0000 8.0600e-02 NaN NaN NaN \n", "ENTRANCES_MODE 0.0690 3.4500e-02 NaN NaN NaN \n", "FLOORSMAX_MODE 0.0833 2.9170e-01 NaN NaN NaN \n", "FLOORSMIN_MODE 0.1250 3.3330e-01 NaN NaN NaN \n", "LANDAREA_MODE 0.0377 1.2800e-02 NaN NaN NaN \n", "LIVINGAPARTMENTS_MODE 0.0220 7.9000e-02 NaN NaN NaN \n", "LIVINGAREA_MODE 0.0198 5.5400e-02 NaN NaN NaN \n", "NONLIVINGAPARTMENTS_MODE 0.0000 0.0000e+00 NaN NaN NaN \n", "NONLIVINGAREA_MODE 0.0000 0.0000e+00 NaN NaN NaN \n", "APARTMENTS_MEDI 0.0250 9.6800e-02 NaN NaN NaN \n", "BASEMENTAREA_MEDI 0.0369 5.2900e-02 NaN NaN NaN \n", "YEARS_BEGINEXPLUATATION_MEDI 0.9722 9.8510e-01 NaN NaN NaN \n", "YEARS_BUILD_MEDI 0.6243 7.9870e-01 NaN NaN NaN \n", "COMMONAREA_MEDI 0.0144 6.0800e-02 NaN NaN NaN \n", "ELEVATORS_MEDI 0.0000 8.0000e-02 NaN NaN NaN \n", "ENTRANCES_MEDI 0.0690 3.4500e-02 NaN NaN NaN \n", "FLOORSMAX_MEDI 0.0833 2.9170e-01 NaN NaN NaN \n", "FLOORSMIN_MEDI 0.1250 3.3330e-01 NaN NaN NaN \n", "LANDAREA_MEDI 0.0375 1.3200e-02 NaN NaN NaN \n", "LIVINGAPARTMENTS_MEDI 0.0205 7.8700e-02 NaN NaN NaN \n", "LIVINGAREA_MEDI 0.0193 5.5800e-02 NaN NaN NaN \n", "NONLIVINGAPARTMENTS_MEDI 0.0000 3.9000e-03 NaN NaN NaN \n", "NONLIVINGAREA_MEDI 0.0000 1.0000e-02 NaN NaN NaN \n", "TOTALAREA_MODE 0.0149 7.1400e-02 NaN NaN NaN \n", "OBS_30_CNT_SOCIAL_CIRCLE 2.0000 1.0000e+00 0.0000 2.0000 0.0000 \n", "DEF_30_CNT_SOCIAL_CIRCLE 2.0000 0.0000e+00 0.0000 0.0000 0.0000 \n", "OBS_60_CNT_SOCIAL_CIRCLE 2.0000 1.0000e+00 0.0000 2.0000 0.0000 \n", "DEF_60_CNT_SOCIAL_CIRCLE 2.0000 0.0000e+00 0.0000 0.0000 0.0000 \n", "DAYS_LAST_PHONE_CHANGE -1134.0000 -8.2800e+02 -815.0000 -617.0000 -1106.0000 \n", "AMT_REQ_CREDIT_BUREAU_HOUR 0.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "AMT_REQ_CREDIT_BUREAU_DAY 0.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "AMT_REQ_CREDIT_BUREAU_WEEK 0.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "AMT_REQ_CREDIT_BUREAU_MON 0.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "AMT_REQ_CREDIT_BUREAU_QRT 0.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "AMT_REQ_CREDIT_BUREAU_YEAR 1.0000 0.0000e+00 0.0000 NaN 0.0000 \n", "CREDIT_INCOME_PERCENT 2.0079 4.7908e+00 2.0000 2.3162 4.2222 \n", "ANNUITY_INCOME_PERCENT 0.1220 1.3222e-01 0.1000 0.2199 0.1800 \n", "CREDIT_TERM 0.0607 2.7598e-02 0.0500 0.0949 0.0426 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# for train feat\n", "pdf11_baseline = pdf_train[[\"SK_ID_CURR\"] + ls_continuous_name].copy()\n", "\n", "# for test feat\n", "pdf12_baseline = pdf_test[[\"SK_ID_CURR\"] + ls_continuous_name].copy()\n", "\n", "# print results\n", "print(pdf11_baseline.shape, pdf12_baseline.shape)\n", "display(pdf11_baseline.head().T)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameauccorrcoverage
10EXT_SOURCE_30.6775-0.17740.8016
8EXT_SOURCE_10.6687-0.15890.4356
9EXT_SOURCE_20.6559-0.16020.9978
58DAYS_LAST_PHONE_CHANGE0.55580.05461.0000
18FLOORSMAX_AVG0.5502-0.04370.5013
46FLOORSMAX_MEDI0.5498-0.04340.5013
32FLOORSMAX_MODE0.5486-0.04250.5013
22LIVINGAREA_AVG0.5462-0.03150.4970
50LIVINGAREA_MEDI0.5456-0.03110.4970
5DAYS_REGISTRATION0.54500.04421.0000
53TOTALAREA_MODE0.5440-0.03120.5162
36LIVINGAREA_MODE0.5433-0.02900.4970
16ELEVATORS_AVG0.5385-0.03250.4663
11APARTMENTS_AVG0.5380-0.02760.4915
44ELEVATORS_MEDI0.5379-0.03220.4663
39APARTMENTS_MEDI0.5372-0.02690.4915
30ELEVATORS_MODE0.5356-0.03020.4663
19FLOORSMIN_AVG0.5355-0.03320.3214
25APARTMENTS_MODE0.5354-0.02490.4915
3AMT_GOODS_PRICE0.5353-0.04090.9991
47FLOORSMIN_MEDI0.5351-0.03290.3214
33FLOORSMIN_MODE0.5340-0.03230.3214
21LIVINGAPARTMENTS_AVG0.5337-0.02240.3167
49LIVINGAPARTMENTS_MEDI0.5331-0.02170.3167
4REGION_POPULATION_RELATIVE0.5326-0.03751.0000
67CREDIT_TERM0.53220.01281.0000
35LIVINGAPARTMENTS_MODE0.5313-0.02030.3167
41YEARS_BEGINEXPLUATATION_MEDI0.5309-0.00970.5112
13YEARS_BEGINEXPLUATATION_AVG0.5309-0.00930.5112
27YEARS_BEGINEXPLUATATION_MODE0.5304-0.00870.5112
14YEARS_BUILD_AVG0.5274-0.02330.3350
42YEARS_BUILD_MEDI0.5274-0.02330.3350
28YEARS_BUILD_MODE0.5266-0.02270.3350
12BASEMENTAREA_AVG0.5251-0.02150.4142
17ENTRANCES_AVG0.5244-0.01770.4957
40BASEMENTAREA_MEDI0.5243-0.02070.4142
45ENTRANCES_MEDI0.5243-0.01750.4957
31ENTRANCES_MODE0.5208-0.01540.4957
15COMMONAREA_AVG0.5206-0.01550.3016
1AMT_CREDIT0.5206-0.03181.0000
64AMT_REQ_CREDIT_BUREAU_YEAR0.52050.01970.8647
43COMMONAREA_MEDI0.5203-0.01550.3016
26BASEMENTAREA_MODE0.5202-0.01890.4142
55DEF_30_CNT_SOCIAL_CIRCLE0.51970.03410.9967
0AMT_INCOME_TOTAL0.5192-0.00151.0000
29COMMONAREA_MODE0.5181-0.01360.3016
20LANDAREA_AVG0.5173-0.01120.4059
48LANDAREA_MEDI0.5171-0.01200.4059
57DEF_60_CNT_SOCIAL_CIRCLE0.51700.03350.9967
66ANNUITY_INCOME_PERCENT0.51680.01231.0000
34LANDAREA_MODE0.5151-0.01110.4059
24NONLIVINGAREA_AVG0.5150-0.01090.4478
52NONLIVINGAREA_MEDI0.5144-0.01050.4478
38NONLIVINGAREA_MODE0.5137-0.01030.4478
6OWN_CAR_AGE0.51250.00291.0000
54OBS_30_CNT_SOCIAL_CIRCLE0.50860.00920.9967
56OBS_60_CNT_SOCIAL_CIRCLE0.50830.00890.9967
7CNT_FAM_MEMBERS0.50810.01091.0000
23NONLIVINGAPARTMENTS_AVG0.5061-0.00570.3058
62AMT_REQ_CREDIT_BUREAU_MON0.5060-0.01380.8647
51NONLIVINGAPARTMENTS_MEDI0.5050-0.00530.3058
63AMT_REQ_CREDIT_BUREAU_QRT0.5050-0.00140.8647
65CREDIT_INCOME_PERCENT0.5043-0.00941.0000
37NONLIVINGAPARTMENTS_MODE0.5040-0.00420.3058
2AMT_ANNUITY0.5025-0.01421.0000
60AMT_REQ_CREDIT_BUREAU_DAY0.50050.00160.8647
61AMT_REQ_CREDIT_BUREAU_WEEK0.50030.00090.8647
59AMT_REQ_CREDIT_BUREAU_HOUR0.50020.00160.8647
\n", "
" ], "text/plain": [ " name auc corr coverage\n", "10 EXT_SOURCE_3 0.6775 -0.1774 0.8016 \n", "8 EXT_SOURCE_1 0.6687 -0.1589 0.4356 \n", "9 EXT_SOURCE_2 0.6559 -0.1602 0.9978 \n", "58 DAYS_LAST_PHONE_CHANGE 0.5558 0.0546 1.0000 \n", "18 FLOORSMAX_AVG 0.5502 -0.0437 0.5013 \n", "46 FLOORSMAX_MEDI 0.5498 -0.0434 0.5013 \n", "32 FLOORSMAX_MODE 0.5486 -0.0425 0.5013 \n", "22 LIVINGAREA_AVG 0.5462 -0.0315 0.4970 \n", "50 LIVINGAREA_MEDI 0.5456 -0.0311 0.4970 \n", "5 DAYS_REGISTRATION 0.5450 0.0442 1.0000 \n", "53 TOTALAREA_MODE 0.5440 -0.0312 0.5162 \n", "36 LIVINGAREA_MODE 0.5433 -0.0290 0.4970 \n", "16 ELEVATORS_AVG 0.5385 -0.0325 0.4663 \n", "11 APARTMENTS_AVG 0.5380 -0.0276 0.4915 \n", "44 ELEVATORS_MEDI 0.5379 -0.0322 0.4663 \n", "39 APARTMENTS_MEDI 0.5372 -0.0269 0.4915 \n", "30 ELEVATORS_MODE 0.5356 -0.0302 0.4663 \n", "19 FLOORSMIN_AVG 0.5355 -0.0332 0.3214 \n", "25 APARTMENTS_MODE 0.5354 -0.0249 0.4915 \n", "3 AMT_GOODS_PRICE 0.5353 -0.0409 0.9991 \n", "47 FLOORSMIN_MEDI 0.5351 -0.0329 0.3214 \n", "33 FLOORSMIN_MODE 0.5340 -0.0323 0.3214 \n", "21 LIVINGAPARTMENTS_AVG 0.5337 -0.0224 0.3167 \n", "49 LIVINGAPARTMENTS_MEDI 0.5331 -0.0217 0.3167 \n", "4 REGION_POPULATION_RELATIVE 0.5326 -0.0375 1.0000 \n", "67 CREDIT_TERM 0.5322 0.0128 1.0000 \n", "35 LIVINGAPARTMENTS_MODE 0.5313 -0.0203 0.3167 \n", "41 YEARS_BEGINEXPLUATATION_MEDI 0.5309 -0.0097 0.5112 \n", "13 YEARS_BEGINEXPLUATATION_AVG 0.5309 -0.0093 0.5112 \n", "27 YEARS_BEGINEXPLUATATION_MODE 0.5304 -0.0087 0.5112 \n", "14 YEARS_BUILD_AVG 0.5274 -0.0233 0.3350 \n", "42 YEARS_BUILD_MEDI 0.5274 -0.0233 0.3350 \n", "28 YEARS_BUILD_MODE 0.5266 -0.0227 0.3350 \n", "12 BASEMENTAREA_AVG 0.5251 -0.0215 0.4142 \n", "17 ENTRANCES_AVG 0.5244 -0.0177 0.4957 \n", "40 BASEMENTAREA_MEDI 0.5243 -0.0207 0.4142 \n", "45 ENTRANCES_MEDI 0.5243 -0.0175 0.4957 \n", "31 ENTRANCES_MODE 0.5208 -0.0154 0.4957 \n", "15 COMMONAREA_AVG 0.5206 -0.0155 0.3016 \n", "1 AMT_CREDIT 0.5206 -0.0318 1.0000 \n", "64 AMT_REQ_CREDIT_BUREAU_YEAR 0.5205 0.0197 0.8647 \n", "43 COMMONAREA_MEDI 0.5203 -0.0155 0.3016 \n", "26 BASEMENTAREA_MODE 0.5202 -0.0189 0.4142 \n", "55 DEF_30_CNT_SOCIAL_CIRCLE 0.5197 0.0341 0.9967 \n", "0 AMT_INCOME_TOTAL 0.5192 -0.0015 1.0000 \n", "29 COMMONAREA_MODE 0.5181 -0.0136 0.3016 \n", "20 LANDAREA_AVG 0.5173 -0.0112 0.4059 \n", "48 LANDAREA_MEDI 0.5171 -0.0120 0.4059 \n", "57 DEF_60_CNT_SOCIAL_CIRCLE 0.5170 0.0335 0.9967 \n", "66 ANNUITY_INCOME_PERCENT 0.5168 0.0123 1.0000 \n", "34 LANDAREA_MODE 0.5151 -0.0111 0.4059 \n", "24 NONLIVINGAREA_AVG 0.5150 -0.0109 0.4478 \n", "52 NONLIVINGAREA_MEDI 0.5144 -0.0105 0.4478 \n", "38 NONLIVINGAREA_MODE 0.5137 -0.0103 0.4478 \n", "6 OWN_CAR_AGE 0.5125 0.0029 1.0000 \n", "54 OBS_30_CNT_SOCIAL_CIRCLE 0.5086 0.0092 0.9967 \n", "56 OBS_60_CNT_SOCIAL_CIRCLE 0.5083 0.0089 0.9967 \n", "7 CNT_FAM_MEMBERS 0.5081 0.0109 1.0000 \n", "23 NONLIVINGAPARTMENTS_AVG 0.5061 -0.0057 0.3058 \n", "62 AMT_REQ_CREDIT_BUREAU_MON 0.5060 -0.0138 0.8647 \n", "51 NONLIVINGAPARTMENTS_MEDI 0.5050 -0.0053 0.3058 \n", "63 AMT_REQ_CREDIT_BUREAU_QRT 0.5050 -0.0014 0.8647 \n", "65 CREDIT_INCOME_PERCENT 0.5043 -0.0094 1.0000 \n", "37 NONLIVINGAPARTMENTS_MODE 0.5040 -0.0042 0.3058 \n", "2 AMT_ANNUITY 0.5025 -0.0142 1.0000 \n", "60 AMT_REQ_CREDIT_BUREAU_DAY 0.5005 0.0016 0.8647 \n", "61 AMT_REQ_CREDIT_BUREAU_WEEK 0.5003 0.0009 0.8647 \n", "59 AMT_REQ_CREDIT_BUREAU_HOUR 0.5002 0.0016 0.8647 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "eval_agg02 = feature_evaluate(pdf_train_filtered, pdf11_baseline)\n", "display(eval_agg02)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3, 4)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eval_agg02.query(\"auc <= 0.501\").shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## convert days to years" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# The numbers in the DAYS_BIRTH column are negative because they are recorded relative to the current loan application. \n", "# To see these stats in years, we can mutliple by -1 and divide by the number of days in a year:\n", "pdf11_baseline[\"YEARS_BIRTH\"] = pdf_train[\"DAYS_BIRTH\"] / -365\n", "pdf12_baseline[\"YEARS_BIRTH\"] = pdf_test[\"DAYS_BIRTH\"] / -365\n", "\n", "# similarly\n", "pdf11_baseline[\"REGISTRATION_YEAR\"] = pdf_train[\"DAYS_REGISTRATION\"] / -365\n", "pdf12_baseline[\"REGISTRATION_YEAR\"] = pdf_test[\"DAYS_REGISTRATION\"] / -365\n", "\n", "# similarly\n", "pdf11_baseline[\"ID_PUBLISH_YEAR\"] = pdf_train[\"DAYS_ID_PUBLISH\"] / -365\n", "pdf12_baseline[\"ID_PUBLISH_YEAR\"] = pdf_test[\"DAYS_ID_PUBLISH\"] / -365\n", "\n", "# similarly\n", "pdf11_baseline[\"LAST_PHONE_CHANGE_YEAR\"] = pdf_train[\"DAYS_LAST_PHONE_CHANGE\"] / -365\n", "pdf12_baseline[\"LAST_PHONE_CHANGE_YEAR\"] = pdf_test[\"DAYS_LAST_PHONE_CHANGE\"] / -365" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# handling DAYS_EMPLOYED" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 307511.0000\n", "mean 63815.0459 \n", "std 141275.7665\n", "min -17912.0000 \n", "25% -2760.0000 \n", "50% -1213.0000 \n", "75% -289.0000 \n", "max 365243.0000\n", "Name: DAYS_EMPLOYED, dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdf_train[\"DAYS_EMPLOYED\"].describe()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD8CAYAAACLrvgBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFexJREFUeJzt3W2MXOV5h/Hrrh2IC0kwpawsbNWksdQ4uKWwBUepqk3TGkM+mEggmaKwJFSuUlAT1ZViWqnQEKSkEkFCSd04wsUkaRyaF2EVU8ciXkWRwmvjYLsO9YZYYWMLi5gQnLRpN737YZ5NhmV25tmZ3Z1pff2k0Zy5z3Oec+9Zdv6ec84ukZlIklTjl/rdgCTp/w5DQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlStcX9bmCunXfeebly5coF3eePf/xjzjrrrAXd52zYX28Gub9B7g3srxcL3dtTTz31Qmb+aseBmfn/6nHppZfmQtu3b9+C73M27K83g9zfIPeWaX+9WOjegCez4j3W01OSpGqGhiSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmq1jE0ImJFROyLiMMRcSgi3l/qt0fE9yNif3lc1bTNrRExHhHPRMQVTfX1pTYeEVua6hdGxGMRcSQiPh8RZ5T6meX1eFm/ci6/eEnS7NR80pgENmfmm4G1wM0RsbqsuzszLy6P3QBl3UbgLcB64O8iYlFELAI+AVwJrAaua5rno2WuVcCLwE2lfhPwYma+Cbi7jJMk9UnHPyOSmceB42X55Yg4DFzQZpMNwM7M/Cnw3YgYBy4r68Yz81mAiNgJbCjz/T7wR2XMDuB2YGuZ6/ZS/wLw8YiI8tuLc27lloe62m7zmklu7HLbKUc/8s6etpekhRCzef8tp4e+BlwE/DlwI/Aj4Ekan0ZejIiPA49m5mfKNvcCD5cp1mfmH5f6u4HLaYTCo+XTBBGxAng4My+KiINlm4my7jvA5Zn5wrS+NgGbAIaGhi7duXPn7I5CceD7L3W13dASeP4/utr059Zc8IbeJmjj1KlTnH322fM2f6/sr3uD3BvYXy8Wure3v/3tT2XmcKdx1X+wMCLOBr4IfCAzfxQRW4E7gCzPdwHvBaLF5knrU2HZZjwd1v2ikLkN2AYwPDycIyMjbb+WmXT7aWHzmknuOtDb3348ev1IT9u3MzY2RrfHZCHYX/cGuTewv14Mam9Vd09FxGtoBMZnM/NLAJn5fGb+LDP/B/gUvzgFNQGsaNp8OXCsTf0F4JyIWDyt/oq5yvo3ACdn8wVKkuZOzd1TAdwLHM7MjzXVlzUNexdwsCzvAjaWO58uBFYBjwNPAKvKnVJn0LhYvqtcn9gHXFO2HwUebJprtCxfA3x1vq5nSJI6qzmn8jbg3cCBiNhfan9J4+6ni2mcLjoK/AlAZh6KiAeAf6Nx59XNmfkzgIi4BdgDLAK2Z+ahMt8HgZ0R8WHgmzRCivL86XIx/SSNoJEk9UnN3VNfp/W1hd1ttrkTuLNFfXer7codVZe1qP8ncG2nHiVJC8PfCJckVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVK1jaETEiojYFxGHI+JQRLy/1M+NiL0RcaQ8Ly31iIh7ImI8Ip6OiEua5hot449ExGhT/dKIOFC2uSciot0+JEn9UfNJYxLYnJlvBtYCN0fEamAL8EhmrgIeKa8BrgRWlccmYCs0AgC4DbgcuAy4rSkEtpaxU9utL/WZ9iFJ6oOOoZGZxzPzX8vyy8Bh4AJgA7CjDNsBXF2WNwD3Z8OjwDkRsQy4AtibmScz80VgL7C+rHt9Zn4jMxO4f9pcrfYhSeqDWV3TiIiVwG8DjwFDmXkcGsECnF+GXQA817TZRKm1q0+0qNNmH5KkPlhcOzAizga+CHwgM39ULju0HNqill3Uq0XEJhqntxgaGmJsbGw2m//c5jWTXW03tKT7bad023ONU6dOzev8vbK/7g1yb2B/vRjU3qpCIyJeQyMwPpuZXyrl5yNiWWYeL6eYTpT6BLCiafPlwLFSH5lWHyv15S3Gt9vHK2TmNmAbwPDwcI6MjLQa1tGNWx7qarvNaya560B1/rZ09PqRnrZvZ2xsjG6PyUKwv+4Ncm9gf70Y1N5q7p4K4F7gcGZ+rGnVLmDqDqhR4MGm+g3lLqq1wEvl1NIeYF1ELC0XwNcBe8q6lyNibdnXDdPmarUPSVIf1Pzz+G3Au4EDEbG/1P4S+AjwQETcBHwPuLas2w1cBYwDPwHeA5CZJyPiDuCJMu5DmXmyLL8PuA9YAjxcHrTZhySpDzqGRmZ+ndbXHQDe0WJ8AjfPMNd2YHuL+pPARS3qP2i1D0lSf/gb4ZKkaoaGJKmaoSFJqmZoSJKqGRqSpGqGhiSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmqZmhIkqoZGpKkaoaGJKmaoSFJqmZoSJKqGRqSpGqGhiSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmqZmhIkqoZGpKkaoaGJKmaoSFJqmZoSJKqGRqSpGqGhiSpmqEhSapmaEiSqnUMjYjYHhEnIuJgU+32iPh+ROwvj6ua1t0aEeMR8UxEXNFUX19q4xGxpal+YUQ8FhFHIuLzEXFGqZ9ZXo+X9Svn6ouWJHWn5pPGfcD6FvW7M/Pi8tgNEBGrgY3AW8o2fxcRiyJiEfAJ4EpgNXBdGQvw0TLXKuBF4KZSvwl4MTPfBNxdxkmS+qhjaGTm14CTlfNtAHZm5k8z87vAOHBZeYxn5rOZ+V/ATmBDRATw+8AXyvY7gKub5tpRlr8AvKOMlyT1SS/XNG6JiKfL6aulpXYB8FzTmIlSm6n+K8APM3NyWv0Vc5X1L5XxkqQ+WdzldluBO4Asz3cB7wVafRJIWodTthlPh3WvEBGbgE0AQ0NDjI2NtWl9ZpvXTHYe1MLQku63ndJtzzVOnTo1r/P3yv66N8i9gf31YlB76yo0MvP5qeWI+BTwz+XlBLCiaehy4FhZblV/ATgnIhaXTxPN46fmmoiIxcAbmOE0WWZuA7YBDA8P58jISDdfFjdueair7TavmeSuA93mb8PR60d62r6dsbExuj0mC8H+ujfIvYH99WJQe+vq9FRELGt6+S5g6s6qXcDGcufThcAq4HHgCWBVuVPqDBoXy3dlZgL7gGvK9qPAg01zjZbla4CvlvGSpD7p+M/jiPgcMAKcFxETwG3ASERcTON00VHgTwAy81BEPAD8GzAJ3JyZPyvz3ALsARYB2zPzUNnFB4GdEfFh4JvAvaV+L/DpiBin8QljY89frSSpJx1DIzOva1G+t0VtavydwJ0t6ruB3S3qz9K4u2p6/T+Bazv1J0laOP5GuCSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmqZmhIkqoZGpKkaoaGJKmaoSFJqmZoSJKqGRqSpGqGhiSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmqZmhIkqoZGpKkaoaGJKmaoSFJqmZoSJKqGRqSpGqGhiSpmqEhSapmaEiSqhkakqRqhoYkqZqhIUmqZmhIkqoZGpKkah1DIyK2R8SJiDjYVDs3IvZGxJHyvLTUIyLuiYjxiHg6Ii5p2ma0jD8SEaNN9Usj4kDZ5p6IiHb7kCT1T80njfuA9dNqW4BHMnMV8Eh5DXAlsKo8NgFboREAwG3A5cBlwG1NIbC1jJ3abn2HfUiS+qRjaGTm14CT08obgB1leQdwdVP9/mx4FDgnIpYBVwB7M/NkZr4I7AXWl3Wvz8xvZGYC90+bq9U+JEl90u01jaHMPA5Qns8v9QuA55rGTZRau/pEi3q7fUiS+mTxHM8XLWrZRX12O43YROMUF0NDQ4yNjc12CgA2r5nsaruhJd1vO6XbnmucOnVqXufvlf11b5B7A/vrxaD21m1oPB8RyzLzeDnFdKLUJ4AVTeOWA8dKfWRafazUl7cY324fr5KZ24BtAMPDwzkyMjLT0LZu3PJQV9ttXjPJXQd6y9+j14/0tH07Y2NjdHtMFoL9dW+QewP768Wg9tbt6aldwNQdUKPAg031G8pdVGuBl8qppT3AuohYWi6ArwP2lHUvR8TactfUDdPmarUPSVKfdPzncUR8jsanhPMiYoLGXVAfAR6IiJuA7wHXluG7gauAceAnwHsAMvNkRNwBPFHGfSgzpy6uv4/GHVpLgIfLgzb7kCT1ScfQyMzrZlj1jhZjE7h5hnm2A9tb1J8ELmpR/0GrfUiS+sffCJckVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVK1xf1uQJL+P1m55aE5mWfzmklunOVcRz/yzjnZdzs9fdKIiKMRcSAi9kfEk6V2bkTsjYgj5XlpqUdE3BMR4xHxdERc0jTPaBl/JCJGm+qXlvnHy7bRS7+SpN7Mxempt2fmxZk5XF5vAR7JzFXAI+U1wJXAqvLYBGyFRsgAtwGXA5cBt00FTRmzqWm79XPQrySpS/NxTWMDsKMs7wCubqrfnw2PAudExDLgCmBvZp7MzBeBvcD6su71mfmNzEzg/qa5JEl90GtoJPCViHgqIjaV2lBmHgcoz+eX+gXAc03bTpRau/pEi7okqU96vRD+tsw8FhHnA3sj4tttxra6HpFd1F89cSOwNgEMDQ0xNjbWtumZbF4z2dV2Q0u633ZKtz3XOHXq1LzO3yv7694g9wanZ3+9vhdM6eZ9ZSGOdU+hkZnHyvOJiPgyjWsSz0fEssw8Xk4xnSjDJ4AVTZsvB46V+si0+lipL28xvlUf24BtAMPDwzkyMtJqWEezvVNhyuY1k9x1oLf8PXr9SE/btzM2Nka3x2Qh2F/3Brk3OD376/Z9ZLpu3lfm831kStenpyLirIh43dQysA44COwCpu6AGgUeLMu7gBvKXVRrgZfK6as9wLqIWFougK8D9pR1L0fE2nLX1A1Nc0mS+qCXfx4PAV8ud8EuBv4xM/8lIp4AHoiIm4DvAdeW8buBq4Bx4CfAewAy82RE3AE8UcZ9KDNPluX3AfcBS4CHy0OS1Cddh0ZmPgv8Vov6D4B3tKgncPMMc20HtreoPwlc1G2PkqS55Z8RkSRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUzdCQJFUzNCRJ1QwNSVI1Q0OSVM3QkCRVMzQkSdUMDUlSNUNDklTN0JAkVTM0JEnVDA1JUjVDQ5JUbeBDIyLWR8QzETEeEVv63Y8knc4GOjQiYhHwCeBKYDVwXUSs7m9XknT6GujQAC4DxjPz2cz8L2AnsKHPPUnSaWvQQ+MC4Lmm1xOlJknqg8X9bqCDaFHLVw2K2ARsKi9PRcQz89rVNH8G5wEv9DJHfHSOmmmt5/7mmf11b5B7A/vrWjfvKz2+j/xazaBBD40JYEXT6+XAsemDMnMbsG2hmpouIp7MzOF+7b8T++vNIPc3yL2B/fViUHsb9NNTTwCrIuLCiDgD2Ajs6nNPknTaGuhPGpk5GRG3AHuARcD2zDzU57Yk6bQ10KEBkJm7gd397qODvp0aq2R/vRnk/ga5N7C/Xgxkb5H5quvKkiS1NOjXNCRJA8TQ6NFC/pmTiDgaEQciYn9EPFlq50bE3og4Up6XlnpExD2lr6cj4pKmeUbL+CMRMdpUv7TMP162bXXLc3M/2yPiREQcbKrNez8z7aOyv9sj4vvlGO6PiKua1t1a9vVMRFzRVG/5PS43aDxW+vh8uVmDiDizvB4v61e26G1FROyLiMMRcSgi3j9Ix69Nf4Ny/F4bEY9HxLdKf3/T7Zxz1XdFb/dFxHebjt3F/fje9iwzfXT5oHFx/jvAG4EzgG8Bq+dxf0eB86bV/hbYUpa3AB8ty1cBD9P4XZe1wGOlfi7wbHleWpaXlnWPA28t2zwMXNmhn98DLgEOLmQ/M+2jsr/bgb9oMXZ1+f6dCVxYvq+L2n2PgQeAjWX574H3leU/Bf6+LG8EPt9if8uAS8ry64B/Lz0MxPFr09+gHL8Azi7LrwEeK8dlVnPOZd8Vvd0HXNPia1nwn42e3ofm6w3udHiUb9qepte3ArfO4/6O8urQeAZYVpaXAc+U5U8C100fB1wHfLKp/slSWwZ8u6n+inFtelrJK9+U572fmfZR2d/ttH7Te8X3jsYde2+d6XtcflhfABZP/29hatuyvLiMiw7H8UHgDwft+LXob+COH/DLwL8Cl892zrnsu6K3+2gdGn393s724emp3iz0nzlJ4CsR8VQ0fgseYCgzjwOU5/M79NauPtGiPlsL0c9M+6h1SzkNsL3p4/ts+/sV4IeZOdmiv59vU9a/VMa3VE6V/DaNf5EO3PGb1h8MyPGLiEURsR84Aeyl8clgtnPOZd8z9paZU8fuznLs7o6IM6f3VtnDfP5sdGRo9Kbqz5zMobdl5iU0/urvzRHxe23GztTbbOtzZVD62Qr8OnAxcBy4ax76q+49Is4Gvgh8IDN/NHPb/Tl+LfobmOOXmT/LzItp/KWIy4A3dzHnvBzX6b1FxEU0Pqn8BvA7NE45fXCOe1sQhkZvqv7MyVzJzGPl+QTwZRo/KM9HxDKA8nyiQ2/t6stb1GdrIfqZaR8dZebz5Qf6f4BP0TiG3fT3AnBORCyeVn/FXGX9G4CT03uJiNfQeEP+bGZ+qcPXtuDHr1V/g3T8pmTmD4ExGtcDZjvnXPbdrrf1mXk8G34K/APdH7t5+dmoZWj0ZsH+zElEnBURr5taBtYBB8v+RsuwURrnnin1G8qdGWuBl8rH1T3AuohYWk4trKNxTvY48HJErC13YtzQNNdsLEQ/M+2jo6kfqOJdNI7h1Jwby102FwKraFxsbPk9zsZJ433ANTN8rVP9XQN8tYxv7iOAe4HDmfmxplUDcfxm6m+Ajt+vRsQ5ZXkJ8AfA4S7mnMu+2/X27aY38wCunnbs+v6zUW2uL5Kcbg8adz78O43zqX81j/t5I407OL4FHJraF41zrI8AR8rzuaUeNP4HVt8BDgDDTXO9Fxgvj/c01Ydp/If8HeDjdL54+zkapyj+m8a/fm5aiH5m2kdlf58u+3+axg/Ysqbxf1X29QxNd47N9D0u35PHS9//BJxZ6q8tr8fL+je26O13aZxSeBrYXx5XDcrxa9PfoBy/3wS+Wfo4CPx1t3POVd8VvX21HLuDwGf4xR1WC/6z0cvD3wiXJFXz9JQkqZqhIUmqZmhIkqoZGpKkaoaGJKmaoSFJqmZoSJKqGRqSpGr/CzE4gZ/LQMaiAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pdf_train[\"DAYS_EMPLOYED\"].hist()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tỉ lệ phần trăm TARGET của non-anomalies: 8.65997453765\n", "Tỉ lệ phần trăm TARGET của anomalies: 5.39964604327\n", "Số lượng anomalies là 55374\n" ] } ], "source": [ "# check anomaly\n", "anom = pdf_train[pdf_train[\"DAYS_EMPLOYED\"] == 365243]\n", "non_anom = pdf_train[pdf_train[\"DAYS_EMPLOYED\"] != 365243]\n", "print(\"Tỉ lệ phần trăm TARGET của non-anomalies: {}\".format(100 * non_anom[\"TARGET\"].mean()))\n", "print(\"Tỉ lệ phần trăm TARGET của anomalies: {}\".format(100 * anom[\"TARGET\"].mean()))\n", "print(\"Số lượng anomalies là {}\".format(len(anom)))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "def handling_days_employed(pdf_input, pdf_output):\n", " # Create an anomalous flag column\n", " pdf_output[\"DAYS_EMPLOYED_ANOM\"] = pdf_input[\"DAYS_EMPLOYED\"] == 365243\n", "\n", " # Replace the anomalous values with nan\n", " pdf_output[\"DAYS_EMPLOYED\"] = pdf_input[\"DAYS_EMPLOYED\"].replace({365243: np.nan})\n", "\n", " # Calculate years employed\n", " pdf_output[\"YEARS_EMPLOYED\"] = pdf_output[\"DAYS_EMPLOYED\"] / -365\n", " \n", " # percent employed over years of birth\n", " pdf_output['YEARS_EMPLOYED_PERCENT'] = pdf_output['YEARS_EMPLOYED'] / pdf_output['YEARS_BIRTH']\n", "\n", " return pdf_output\n", "\n", "\n", "pdf11_baseline = handling_days_employed(pdf_train, pdf11_baseline)\n", "pdf12_baseline = handling_days_employed(pdf_test, pdf12_baseline)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# save features" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "((307511, 78), (48744, 78))\n", "Store features completed!\n", "((307511, 77), (48744, 77))\n", "Store features completed!\n", "CPU times: user 30.7 s, sys: 1.44 s, total: 32.2 s\n", "Wall time: 26.2 s\n" ] } ], "source": [ "%%time\n", "def store_features(pdf_train, pdf_test, fname):\n", " print(pdf_train.shape, pdf_test.shape)\n", " fname = os.path.join(\"features\", \"{}.pkl.bz2\".format(fname))\n", " pdf_out = pd.concat([pdf_train, pdf_test]).reset_index(drop=True)\n", " pdf_out.to_pickle(fname, compression=\"bz2\")\n", " print(\"Store features completed!\")\n", "\n", "store_features(pdf01_baseline, pdf02_baseline, \"baseline\")\n", "store_features(pdf11_baseline, pdf12_baseline, \"baseline_extend\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }