{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os, pickle\n", "import pandas as pd\n", "import numpy as np\n", "# \n", "import matplotlib.pyplot as plt\n", "from IPython.display import display\n", "# \n", "from sklearn import metrics\n", "from sklearn.model_selection import train_test_split\n", "# \n", "import xgboost as xgb\n", "from xgboost import plot_importance\n", "\n", "# some settings for displaying Pandas results\n", "pd.set_option('display.width', 2000)\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.precision', 4)\n", "pd.set_option('display.max_colwidth', -1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load combined features with label (option 1)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "baseline_extend.pkl.bz2 (356255, 77)\n", "rows, columns (356255, 154)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRNAME_INCOME_TYPE_Workingis_REGION_RATING_CLIENT_W_CITYis_REGION_RATING_CLIENTis_CODE_GENDERNAME_EDUCATION_TYPE_Higher_educationNAME_EDUCATION_TYPE_Secondary___secondary_specialis_REG_CITY_NOT_WORK_CITYis_FLAG_DOCUMENT_3HOUSETYPE_MODE_block_of_flatsNAME_INCOME_TYPE_PensionerORGANIZATION_TYPE_XNAis_FLAG_EMP_PHONEOCCUPATION_TYPE_LaborersWALLSMATERIAL_MODE_Panelis_LIVE_CITY_NOT_WORK_CITYNAME_FAMILY_STATUS_Marriedis_FLAG_WORK_PHONEis_FLAG_PHONEis_FLAG_OWN_CARORGANIZATION_TYPE_Self_employedORGANIZATION_TYPE_Business_Entity_Type_3NAME_FAMILY_STATUS_Single___not_marriedFONDKAPREMONT_MODE_reg_oper_accountis_NAME_CONTRACT_TYPENAME_HOUSING_TYPE_House___apartmentis_FLAG_DOCUMENT_6OCCUPATION_TYPE_DriversNAME_FAMILY_STATUS_Civil_marriageNAME_HOUSING_TYPE_With_parentsNAME_INCOME_TYPE_State_servantOCCUPATION_TYPE_Core_staffOCCUPATION_TYPE_Sales_staffNAME_INCOME_TYPE_Commercial_associateWALLSMATERIAL_MODE_Stone,_brickNAME_FAMILY_STATUS_WidowOCCUPATION_TYPE_ManagersOCCUPATION_TYPE_Accountantsis_FLAG_OWN_REALTYORGANIZATION_TYPE_ConstructionNAME_TYPE_SUITE_Unaccompaniedis_FLAG_DOCUMENT_8NAME_TYPE_SUITE_FamilyOCCUPATION_TYPE_High_skill_tech_staffORGANIZATION_TYPE_SchoolNAME_HOUSING_TYPE_Rented_apartmentOCCUPATION_TYPE_Low_skill_LaborersOCCUPATION_TYPE_Security_staffFONDKAPREMONT_MODE_reg_oper_spec_accountORGANIZATION_TYPE_MedicineFONDKAPREMONT_MODE_org_spec_accountWALLSMATERIAL_MODE_BlockOCCUPATION_TYPE_Cooking_staffis_REG_REGION_NOT_WORK_REGIONNAME_EDUCATION_TYPE_Lower_secondaryORGANIZATION_TYPE_GovernmentORGANIZATION_TYPE_Trade__type_7OCCUPATION_TYPE_Medicine_staffORGANIZATION_TYPE_MilitaryORGANIZATION_TYPE_Industry__type_3ORGANIZATION_TYPE_BankORGANIZATION_TYPE_Transport__type_3ORGANIZATION_TYPE_PoliceORGANIZATION_TYPE_RestaurantORGANIZATION_TYPE_KindergartenORGANIZATION_TYPE_SecurityORGANIZATION_TYPE_AgricultureOCCUPATION_TYPE_Cleaning_staffWALLSMATERIAL_MODE_WoodenORGANIZATION_TYPE_Security_MinistriesORGANIZATION_TYPE_Trade__type_3ORGANIZATION_TYPE_Business_Entity_Type_2ORGANIZATION_TYPE_Otheris_REG_REGION_NOT_LIVE_REGIONNAME_EDUCATION_TYPE_Incomplete_higherWALLSMATERIAL_MODE_MonolithicORGANIZATION_TYPE_Transport__type_4OCCUPATION_TYPE_Waiters_barmen_staffbaseline_extend_AMT_INCOME_TOTALbaseline_extend_AMT_CREDITbaseline_extend_AMT_ANNUITYbaseline_extend_AMT_GOODS_PRICEbaseline_extend_REGION_POPULATION_RELATIVEbaseline_extend_DAYS_REGISTRATIONbaseline_extend_OWN_CAR_AGEbaseline_extend_CNT_FAM_MEMBERSbaseline_extend_EXT_SOURCE_1baseline_extend_EXT_SOURCE_2baseline_extend_EXT_SOURCE_3baseline_extend_APARTMENTS_AVGbaseline_extend_BASEMENTAREA_AVGbaseline_extend_YEARS_BEGINEXPLUATATION_AVGbaseline_extend_YEARS_BUILD_AVGbaseline_extend_COMMONAREA_AVGbaseline_extend_ELEVATORS_AVGbaseline_extend_ENTRANCES_AVGbaseline_extend_FLOORSMAX_AVGbaseline_extend_FLOORSMIN_AVGbaseline_extend_LANDAREA_AVGbaseline_extend_LIVINGAPARTMENTS_AVGbaseline_extend_LIVINGAREA_AVGbaseline_extend_NONLIVINGAPARTMENTS_AVGbaseline_extend_NONLIVINGAREA_AVGbaseline_extend_APARTMENTS_MODEbaseline_extend_BASEMENTAREA_MODEbaseline_extend_YEARS_BEGINEXPLUATATION_MODEbaseline_extend_YEARS_BUILD_MODEbaseline_extend_COMMONAREA_MODEbaseline_extend_ELEVATORS_MODEbaseline_extend_ENTRANCES_MODEbaseline_extend_FLOORSMAX_MODEbaseline_extend_FLOORSMIN_MODEbaseline_extend_LANDAREA_MODEbaseline_extend_LIVINGAPARTMENTS_MODEbaseline_extend_LIVINGAREA_MODEbaseline_extend_NONLIVINGAPARTMENTS_MODEbaseline_extend_NONLIVINGAREA_MODEbaseline_extend_APARTMENTS_MEDIbaseline_extend_BASEMENTAREA_MEDIbaseline_extend_YEARS_BEGINEXPLUATATION_MEDIbaseline_extend_YEARS_BUILD_MEDIbaseline_extend_COMMONAREA_MEDIbaseline_extend_ELEVATORS_MEDIbaseline_extend_ENTRANCES_MEDIbaseline_extend_FLOORSMAX_MEDIbaseline_extend_FLOORSMIN_MEDIbaseline_extend_LANDAREA_MEDIbaseline_extend_LIVINGAPARTMENTS_MEDIbaseline_extend_LIVINGAREA_MEDIbaseline_extend_NONLIVINGAPARTMENTS_MEDIbaseline_extend_NONLIVINGAREA_MEDIbaseline_extend_TOTALAREA_MODEbaseline_extend_OBS_30_CNT_SOCIAL_CIRCLEbaseline_extend_DEF_30_CNT_SOCIAL_CIRCLEbaseline_extend_OBS_60_CNT_SOCIAL_CIRCLEbaseline_extend_DEF_60_CNT_SOCIAL_CIRCLEbaseline_extend_DAYS_LAST_PHONE_CHANGEbaseline_extend_AMT_REQ_CREDIT_BUREAU_HOURbaseline_extend_AMT_REQ_CREDIT_BUREAU_DAYbaseline_extend_AMT_REQ_CREDIT_BUREAU_WEEKbaseline_extend_AMT_REQ_CREDIT_BUREAU_MONbaseline_extend_AMT_REQ_CREDIT_BUREAU_QRTbaseline_extend_AMT_REQ_CREDIT_BUREAU_YEARbaseline_extend_CREDIT_INCOME_PERCENTbaseline_extend_ANNUITY_INCOME_PERCENTbaseline_extend_CREDIT_TERMbaseline_extend_YEARS_BIRTHbaseline_extend_REGISTRATION_YEARbaseline_extend_ID_PUBLISH_YEARbaseline_extend_LAST_PHONE_CHANGE_YEARbaseline_extend_DAYS_EMPLOYED_ANOMbaseline_extend_DAYS_EMPLOYEDbaseline_extend_YEARS_EMPLOYEDbaseline_extend_YEARS_EMPLOYED_PERCENT
010000212210101100110000100111110000000010001010000000000000000000000000000000000000202500.0406597.524700.5351000.00.0188-3648.00.01.00.08300.26290.13940.02470.03690.97220.61920.01430.000.06900.08330.12500.03690.02020.01900.00000.00000.02520.03830.97220.63410.01440.00000.06900.08330.12500.03770.0220.01980.00.00.02500.03690.97220.62430.01440.000.06900.08330.12500.03750.02050.01930.00000.000.01492.02.02.02.0-1134.00.00.00.00.00.01.02.00790.12200.060725.92059.99455.80823.1068False-637.01.74520.0673
110000301101001100100010100001110000110000000000101000000100000000000000000000000000270000.01293502.535698.51129500.00.0035-1186.00.02.00.31130.6222NaN0.09590.05290.98510.79600.06050.080.03450.29170.33330.01300.07730.05490.00390.00980.09240.05380.98510.80400.04970.08060.03450.29170.33330.01280.0790.05540.00.00.09680.05290.98510.79870.06080.080.03450.29170.33330.01320.07870.05580.00390.010.07141.00.01.00.0-828.00.00.00.00.00.00.04.79080.13220.027645.93153.24930.79732.2685False-1188.03.25480.0709
21000041221010000011000111001001000000000000101000000000000001000000000000000000000067500.0135000.06750.0135000.00.0100-4260.026.01.0NaN0.55590.7296NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-815.00.00.00.00.00.00.02.00000.10000.050052.180811.67126.93422.2329False-225.00.61640.0118
310000612200101000110000000100110010000000001010000000000000000000000000000000000000135000.0312682.529686.5297000.00.0080-9833.00.02.0NaN0.6504NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.00.02.00.0-617.0NaNNaNNaNNaNNaNNaN2.31620.21990.094952.068526.93976.67671.6904False-3039.08.32600.1599
410000712210110000100100000010110000010000001011000000000000000000000000000000000000121500.0513000.021865.5513000.00.0287-4311.00.01.0NaN0.3227NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-1106.00.00.00.00.00.00.04.22220.18000.042654.608211.81109.47403.0301False-3038.08.32330.1524
\n", "
" ], "text/plain": [ " SK_ID_CURR NAME_INCOME_TYPE_Working is_REGION_RATING_CLIENT_W_CITY is_REGION_RATING_CLIENT is_CODE_GENDER NAME_EDUCATION_TYPE_Higher_education NAME_EDUCATION_TYPE_Secondary___secondary_special is_REG_CITY_NOT_WORK_CITY is_FLAG_DOCUMENT_3 HOUSETYPE_MODE_block_of_flats NAME_INCOME_TYPE_Pensioner ORGANIZATION_TYPE_XNA is_FLAG_EMP_PHONE OCCUPATION_TYPE_Laborers WALLSMATERIAL_MODE_Panel is_LIVE_CITY_NOT_WORK_CITY NAME_FAMILY_STATUS_Married is_FLAG_WORK_PHONE is_FLAG_PHONE is_FLAG_OWN_CAR ORGANIZATION_TYPE_Self_employed ORGANIZATION_TYPE_Business_Entity_Type_3 NAME_FAMILY_STATUS_Single___not_married FONDKAPREMONT_MODE_reg_oper_account is_NAME_CONTRACT_TYPE NAME_HOUSING_TYPE_House___apartment is_FLAG_DOCUMENT_6 OCCUPATION_TYPE_Drivers NAME_FAMILY_STATUS_Civil_marriage NAME_HOUSING_TYPE_With_parents NAME_INCOME_TYPE_State_servant OCCUPATION_TYPE_Core_staff OCCUPATION_TYPE_Sales_staff NAME_INCOME_TYPE_Commercial_associate WALLSMATERIAL_MODE_Stone,_brick NAME_FAMILY_STATUS_Widow OCCUPATION_TYPE_Managers OCCUPATION_TYPE_Accountants is_FLAG_OWN_REALTY ORGANIZATION_TYPE_Construction NAME_TYPE_SUITE_Unaccompanied is_FLAG_DOCUMENT_8 NAME_TYPE_SUITE_Family OCCUPATION_TYPE_High_skill_tech_staff ORGANIZATION_TYPE_School NAME_HOUSING_TYPE_Rented_apartment OCCUPATION_TYPE_Low_skill_Laborers OCCUPATION_TYPE_Security_staff FONDKAPREMONT_MODE_reg_oper_spec_account ORGANIZATION_TYPE_Medicine FONDKAPREMONT_MODE_org_spec_account WALLSMATERIAL_MODE_Block OCCUPATION_TYPE_Cooking_staff is_REG_REGION_NOT_WORK_REGION NAME_EDUCATION_TYPE_Lower_secondary ORGANIZATION_TYPE_Government ORGANIZATION_TYPE_Trade__type_7 OCCUPATION_TYPE_Medicine_staff ORGANIZATION_TYPE_Military ORGANIZATION_TYPE_Industry__type_3 ORGANIZATION_TYPE_Bank ORGANIZATION_TYPE_Transport__type_3 ORGANIZATION_TYPE_Police ORGANIZATION_TYPE_Restaurant ORGANIZATION_TYPE_Kindergarten ORGANIZATION_TYPE_Security ORGANIZATION_TYPE_Agriculture \\\n", "0 100002 1 2 2 1 0 1 0 1 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \n", "1 100003 0 1 1 0 1 0 0 1 1 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \n", "2 100004 1 2 2 1 0 1 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 \n", "3 100006 1 2 2 0 0 1 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \n", "4 100007 1 2 2 1 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \n", "\n", " OCCUPATION_TYPE_Cleaning_staff WALLSMATERIAL_MODE_Wooden ORGANIZATION_TYPE_Security_Ministries ORGANIZATION_TYPE_Trade__type_3 ORGANIZATION_TYPE_Business_Entity_Type_2 ORGANIZATION_TYPE_Other is_REG_REGION_NOT_LIVE_REGION NAME_EDUCATION_TYPE_Incomplete_higher WALLSMATERIAL_MODE_Monolithic ORGANIZATION_TYPE_Transport__type_4 OCCUPATION_TYPE_Waiters_barmen_staff baseline_extend_AMT_INCOME_TOTAL baseline_extend_AMT_CREDIT baseline_extend_AMT_ANNUITY baseline_extend_AMT_GOODS_PRICE baseline_extend_REGION_POPULATION_RELATIVE baseline_extend_DAYS_REGISTRATION baseline_extend_OWN_CAR_AGE baseline_extend_CNT_FAM_MEMBERS baseline_extend_EXT_SOURCE_1 baseline_extend_EXT_SOURCE_2 baseline_extend_EXT_SOURCE_3 baseline_extend_APARTMENTS_AVG baseline_extend_BASEMENTAREA_AVG baseline_extend_YEARS_BEGINEXPLUATATION_AVG baseline_extend_YEARS_BUILD_AVG baseline_extend_COMMONAREA_AVG baseline_extend_ELEVATORS_AVG baseline_extend_ENTRANCES_AVG baseline_extend_FLOORSMAX_AVG baseline_extend_FLOORSMIN_AVG baseline_extend_LANDAREA_AVG baseline_extend_LIVINGAPARTMENTS_AVG baseline_extend_LIVINGAREA_AVG baseline_extend_NONLIVINGAPARTMENTS_AVG baseline_extend_NONLIVINGAREA_AVG baseline_extend_APARTMENTS_MODE baseline_extend_BASEMENTAREA_MODE baseline_extend_YEARS_BEGINEXPLUATATION_MODE baseline_extend_YEARS_BUILD_MODE baseline_extend_COMMONAREA_MODE baseline_extend_ELEVATORS_MODE baseline_extend_ENTRANCES_MODE baseline_extend_FLOORSMAX_MODE baseline_extend_FLOORSMIN_MODE baseline_extend_LANDAREA_MODE baseline_extend_LIVINGAPARTMENTS_MODE baseline_extend_LIVINGAREA_MODE baseline_extend_NONLIVINGAPARTMENTS_MODE baseline_extend_NONLIVINGAREA_MODE baseline_extend_APARTMENTS_MEDI baseline_extend_BASEMENTAREA_MEDI baseline_extend_YEARS_BEGINEXPLUATATION_MEDI baseline_extend_YEARS_BUILD_MEDI baseline_extend_COMMONAREA_MEDI baseline_extend_ELEVATORS_MEDI baseline_extend_ENTRANCES_MEDI baseline_extend_FLOORSMAX_MEDI \\\n", "0 0 0 0 0 0 0 0 0 0 0 0 202500.0 406597.5 24700.5 351000.0 0.0188 -3648.0 0.0 1.0 0.0830 0.2629 0.1394 0.0247 0.0369 0.9722 0.6192 0.0143 0.00 0.0690 0.0833 0.1250 0.0369 0.0202 0.0190 0.0000 0.0000 0.0252 0.0383 0.9722 0.6341 0.0144 0.0000 0.0690 0.0833 0.1250 0.0377 0.022 0.0198 0.0 0.0 0.0250 0.0369 0.9722 0.6243 0.0144 0.00 0.0690 0.0833 \n", "1 0 0 0 0 0 0 0 0 0 0 0 270000.0 1293502.5 35698.5 1129500.0 0.0035 -1186.0 0.0 2.0 0.3113 0.6222 NaN 0.0959 0.0529 0.9851 0.7960 0.0605 0.08 0.0345 0.2917 0.3333 0.0130 0.0773 0.0549 0.0039 0.0098 0.0924 0.0538 0.9851 0.8040 0.0497 0.0806 0.0345 0.2917 0.3333 0.0128 0.079 0.0554 0.0 0.0 0.0968 0.0529 0.9851 0.7987 0.0608 0.08 0.0345 0.2917 \n", "2 0 0 0 0 0 0 0 0 0 0 0 67500.0 135000.0 6750.0 135000.0 0.0100 -4260.0 26.0 1.0 NaN 0.5559 0.7296 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "3 0 0 0 0 0 0 0 0 0 0 0 135000.0 312682.5 29686.5 297000.0 0.0080 -9833.0 0.0 2.0 NaN 0.6504 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "4 0 0 0 0 0 0 0 0 0 0 0 121500.0 513000.0 21865.5 513000.0 0.0287 -4311.0 0.0 1.0 NaN 0.3227 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "\n", " baseline_extend_FLOORSMIN_MEDI baseline_extend_LANDAREA_MEDI baseline_extend_LIVINGAPARTMENTS_MEDI baseline_extend_LIVINGAREA_MEDI baseline_extend_NONLIVINGAPARTMENTS_MEDI baseline_extend_NONLIVINGAREA_MEDI baseline_extend_TOTALAREA_MODE baseline_extend_OBS_30_CNT_SOCIAL_CIRCLE baseline_extend_DEF_30_CNT_SOCIAL_CIRCLE baseline_extend_OBS_60_CNT_SOCIAL_CIRCLE baseline_extend_DEF_60_CNT_SOCIAL_CIRCLE baseline_extend_DAYS_LAST_PHONE_CHANGE baseline_extend_AMT_REQ_CREDIT_BUREAU_HOUR baseline_extend_AMT_REQ_CREDIT_BUREAU_DAY baseline_extend_AMT_REQ_CREDIT_BUREAU_WEEK baseline_extend_AMT_REQ_CREDIT_BUREAU_MON baseline_extend_AMT_REQ_CREDIT_BUREAU_QRT baseline_extend_AMT_REQ_CREDIT_BUREAU_YEAR baseline_extend_CREDIT_INCOME_PERCENT baseline_extend_ANNUITY_INCOME_PERCENT baseline_extend_CREDIT_TERM baseline_extend_YEARS_BIRTH baseline_extend_REGISTRATION_YEAR baseline_extend_ID_PUBLISH_YEAR baseline_extend_LAST_PHONE_CHANGE_YEAR baseline_extend_DAYS_EMPLOYED_ANOM baseline_extend_DAYS_EMPLOYED baseline_extend_YEARS_EMPLOYED baseline_extend_YEARS_EMPLOYED_PERCENT \n", "0 0.1250 0.0375 0.0205 0.0193 0.0000 0.00 0.0149 2.0 2.0 2.0 2.0 -1134.0 0.0 0.0 0.0 0.0 0.0 1.0 2.0079 0.1220 0.0607 25.9205 9.9945 5.8082 3.1068 False -637.0 1.7452 0.0673 \n", "1 0.3333 0.0132 0.0787 0.0558 0.0039 0.01 0.0714 1.0 0.0 1.0 0.0 -828.0 0.0 0.0 0.0 0.0 0.0 0.0 4.7908 0.1322 0.0276 45.9315 3.2493 0.7973 2.2685 False -1188.0 3.2548 0.0709 \n", "2 NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0000 0.1000 0.0500 52.1808 11.6712 6.9342 2.2329 False -225.0 0.6164 0.0118 \n", "3 NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 2.0 0.0 -617.0 NaN NaN NaN NaN NaN NaN 2.3162 0.2199 0.0949 52.0685 26.9397 6.6767 1.6904 False -3039.0 8.3260 0.1599 \n", "4 NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -1106.0 0.0 0.0 0.0 0.0 0.0 0.0 4.2222 0.1800 0.0426 54.6082 11.8110 9.4740 3.0301 False -3038.0 8.3233 0.1524 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 31.4 s, sys: 2.02 s, total: 33.4 s\n", "Wall time: 33.5 s\n" ] } ], "source": [ "%%time\n", "# specified features set for joining\n", "ls_feat_file = [\n", " 'baseline.pkl.bz2',\n", " 'baseline_extend.pkl.bz2',\n", "]\n", "\n", "# use first features for base joined\n", "feat_path = os.path.join(\"../04_feature_engineering/features\", ls_feat_file[0])\n", "pdf_combined = pd.read_pickle(feat_path, compression=\"bz2\")\n", "\n", "# join next features set\n", "for fname in ls_feat_file[1:]:\n", " feat_path = os.path.join(\"../04_feature_engineering/features\", fname)\n", " pdf_feat = pd.read_pickle(feat_path, compression=\"bz2\")\n", " print(fname, pdf_feat.shape)\n", " \n", " # add table prefix\n", " tbl_prefix = fname.split(\".\")[0]\n", " rename_col = {cname: \"{}_{}\".format(tbl_prefix, cname) for cname in pdf_feat.columns if cname != \"SK_ID_CURR\"}\n", " pdf_feat.rename(columns=rename_col, inplace=True)\n", " \n", " # join\n", " pdf_combined = pdf_combined.merge(pdf_feat, on=\"SK_ID_CURR\", how=\"left\")\n", "\n", "print(\"rows, columns\", pdf_combined.shape)\n", "ls_features = [feat for feat in pdf_combined.columns if feat not in [\"SK_ID_CURR\"]]\n", "display(pdf_combined.head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# join with label\n", "pdf_tvt = pd.read_pickle(\"../04_feature_engineering/pdf_tvt_extend.pkl\", compression=\"bz2\")\n", "pdf_features_label = pdf_tvt.merge(pdf_combined, on=\"SK_ID_CURR\", how=\"left\")\n", "print(pdf_features_label.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load combined features with label (option 2)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of features: 1042\n", "(356255, 1045)\n" ] } ], "source": [ "if True:\n", " pdf_features_label = pd.read_csv(os.path.join(\"../04_feature_engineering/features\", \"pdf_features_label.csv.bz2\"), compression=\"bz2\")\n", " meta_cols = [\"SK_ID_CURR\", \"TARGET\", \"tvt_code\"]\n", " ls_features = [cname for cname in pdf_features_label.columns if cname not in meta_cols]\n", "\n", " # \n", " print(\"Number of features: {}\".format(len(ls_features)))\n", " print(pdf_features_label.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Grid search" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "version = \"v07\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/anaconda3/lib/python3.6/site-packages/sklearn/base.py:251: UserWarning: Trying to unpickle estimator LabelEncoder from version 0.19.1 when using version 0.20.0. This might lead to breaking code or invalid results. Use at your own risk.\n", " UserWarning)\n" ] }, { "data": { "text/plain": [ "odict_keys(['auc', 'ls_tracked_auc', 'ls_curr_features', 'imp', 'ls_tracked_imp', 'model', 'features'])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read model\n", "with open(\"models/xgb_model_{}.mod\".format(version), \"rb\") as input_file:\n", " res_model = pickle.load(input_file)\n", "res_model.keys()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected features: 905\n" ] } ], "source": [ "meta_cols = [\"SK_ID_CURR\", \"TARGET\", \"tvt_code\"]\n", "ls_features = [feat for feat, val in res_model[\"imp\"]]\n", "pdf_features_label = pdf_features_label[meta_cols + ls_features]\n", "print(\"Selected features: {}\".format(len(ls_features)))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(307511, 908)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import GridSearchCV, StratifiedKFold\n", "pdf_data = pdf_features_label[pdf_features_label[\"tvt_code\"].isin([\"train\", \"val\", \"test\"])].copy()\n", "pdf_data.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 16 candidates, totalling 80 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.\n" ] } ], "source": [ "%%time\n", "param_grid = {\n", " \"objective\": [\"binary:logistic\"], \n", " \"booster\": [\"gbtree\"], \n", " \"max_depth\": [4, 7], # default: 3 only for depthwise\n", " \"n_estimators\": [1000], # default: 500 \n", " \"learning_rate\": [0.025], # default: 0.05 \n", " \"subsample\": [0.6, 0.8], \n", " \"colsample_bytree\": [0.6, 0.8], # default: 1.0\n", " \"colsample_bylevel\": [0.6, 0.8], # default: 1.0\n", " \"random_state\": [1],\n", " 'min_child_weight': [11],\n", "\n", " #\n", " \"silent\": [True], \n", " 'seed': [1]\n", "}\n", "\n", "xgb_model = xgb.XGBClassifier()\n", "grid_search = GridSearchCV(xgb_model, param_grid, n_jobs=16, \n", " cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=1), \n", " scoring='roc_auc',\n", " verbose=2)\n", "\n", "grid_result = grid_search.fit(pdf_data[ls_features], pdf_data[\"TARGET\"])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best: 0.790018 using {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.787458 (0.001713) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.787668 (0.001883) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.789044 (0.002316) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.790018 (0.001945) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.787579 (0.001834) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.787813 (0.001535) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.789147 (0.002190) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.789775 (0.002219) with: {'booster': 'gbtree', 'colsample_bylevel': 0.6, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.787652 (0.001759) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.787831 (0.001788) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.788921 (0.002179) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.789692 (0.001530) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.6, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.787874 (0.001682) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.787952 (0.001679) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 4, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n", "0.789661 (0.002410) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.6}\n", "0.789685 (0.002188) with: {'booster': 'gbtree', 'colsample_bylevel': 0.8, 'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 7, 'min_child_weight': 11, 'n_estimators': 1000, 'objective': 'binary:logistic', 'random_state': 1, 'seed': 1, 'silent': True, 'subsample': 0.8}\n" ] } ], "source": [ "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))\n", "\n", "# \n", "means = grid_result.cv_results_['mean_test_score']\n", "stds = grid_result.cv_results_['std_test_score']\n", "params = grid_result.cv_results_['params']\n", "for mean, stdev, param in zip(means, stds, params):\n", " print(\"%f (%f) with: %r\" % (mean, stdev, param))\n", " \n", "# \n", "# for check_param in [\"max_depth\", \"subsample\", \"colsample_bytree\", \"colsample_bylevel\"]:\n", "# plt.errorbar(param_grid[check_param], means, yerr=stds) \n", "# plt.title(\"XGBoost {} vs AUC\".format(check_param)) \n", "# plt.xlabel(check_param)\n", "# plt.ylabel('AUC') \n", "# plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Submission" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.06975483" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_kaggle_test = pdf_features_label.query(\"tvt_code == 'kaggle_test'\")[ls_features]\n", "y_test_pred = grid_search.predict_proba(X_kaggle_test)[:, 1]\n", "y_test_pred.mean()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGET
01000010.0267
11000050.1182
21000130.0361
31000280.0458
41000380.1852
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET\n", "0 100001 0.0267\n", "1 100005 0.1182\n", "2 100013 0.0361\n", "3 100028 0.0458\n", "4 100038 0.1852" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SK_IDs = pdf_features_label.query(\"tvt_code == 'kaggle_test'\")[\"SK_ID_CURR\"].tolist()\n", "pdf_submiss = pd.DataFrame({\"SK_ID_CURR\": SK_IDs, \"TARGET\": y_test_pred})\n", "pdf_submiss.to_csv(\"submissions/submission_gridsearch_{}.csv\".format(version), index=False)\n", "pdf_submiss.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# save model to file\n", "res_model = {\n", " \"grid_search\": grid_search,\n", " \"grid_result\": grid_result\n", "}\n", "pickle.dump(res_model, open(\"models/xgb_gridsearch_{}.mod\".format(version), \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }