{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os, math, subprocess\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# some settings for displaying Pandas results\n", "pd.set_option('display.width', 2000)\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.precision', 4)\n", "pd.set_option('display.max_colwidth', -1)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# load data\n", "data_path = \"home-credit-default-risk/application_train.csv\"\n", "pdf_data = pd.read_csv(data_path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SELECT, WHERE, DISTINCT, LIMIT" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT *\n", " FROM pdf_data\n", " LIMIT 3\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5351000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0188-9461-637-3648.0-2120NaN110110Laborers1.022WEDNESDAY10000000Business Entity Type 30.08300.26290.13940.02470.03690.97220.61920.01430.000.06900.08330.12500.03690.02020.01900.00000.00000.02520.03830.97220.63410.01440.00000.06900.08330.12500.03770.0220.01980.00.00.02500.03690.97220.62430.01440.000.06900.08330.12500.03750.02050.01930.00000.00reg oper accountblock of flats0.0149Stone, brickNo2.02.02.02.0-1134.0010000000000000000000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.51129500.0FamilyState servantHigher educationMarriedHouse / apartment0.0035-16765-1188-1186.0-291NaN110110Core staff2.011MONDAY11000000School0.31130.6222NaN0.09590.05290.98510.79600.06050.080.03450.29170.33330.01300.07730.05490.00390.00980.09240.05380.98510.80400.04970.08060.03450.29170.33330.01280.0790.05540.00.00.09680.05290.98510.79870.06080.080.03450.29170.33330.01320.07870.05580.00390.01reg oper accountblock of flats0.0714BlockNo1.00.01.00.0-828.0010000000000000000000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0135000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0100-19046-225-4260.0-253126.0111110Laborers1.022MONDAY9000000GovernmentNaN0.55590.7296NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-815.0000000000000000000000.00.00.00.00.00.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0188 -9461 -637 -3648.0 -2120 NaN 1 1 0 1 1 0 Laborers 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 0.0830 0.2629 0.1394 0.0247 0.0369 0.9722 0.6192 0.0143 0.00 0.0690 0.0833 0.1250 0.0369 0.0202 0.0190 0.0000 0.0000 0.0252 0.0383 0.9722 0.6341 0.0144 0.0000 0.0690 0.0833 0.1250 0.0377 0.022 0.0198 0.0 0.0 0.0250 0.0369 0.9722 0.6243 0.0144 0.00 0.0690 0.0833 0.1250 0.0375 0.0205 0.0193 0.0000 0.00 reg oper account block of flats 0.0149 Stone, brick No 2.0 2.0 2.0 2.0 -1134.0 0 1 0 0 0 0 0 0 0 0 \n", "1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.0035 -16765 -1188 -1186.0 -291 NaN 1 1 0 1 1 0 Core staff 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 0.3113 0.6222 NaN 0.0959 0.0529 0.9851 0.7960 0.0605 0.08 0.0345 0.2917 0.3333 0.0130 0.0773 0.0549 0.0039 0.0098 0.0924 0.0538 0.9851 0.8040 0.0497 0.0806 0.0345 0.2917 0.3333 0.0128 0.079 0.0554 0.0 0.0 0.0968 0.0529 0.9851 0.7987 0.0608 0.08 0.0345 0.2917 0.3333 0.0132 0.0787 0.0558 0.0039 0.01 reg oper account block of flats 0.0714 Block No 1.0 0.0 1.0 0.0 -828.0 0 1 0 0 0 0 0 0 0 0 \n", "2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0100 -19046 -225 -4260.0 -2531 26.0 1 1 1 1 1 0 Laborers 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government NaN 0.5559 0.7296 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0 0 0 0 0 0 0 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "1 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT *\n", " FROM pdf_data\n", " LIMIT 3\n", "\"\"\")\n", "pdf_data.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT NAME_CONTRACT_TYPE\n", " FROM pdf_data\n", " WHERE CODE_GENDER = 'M'\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAME_CONTRACT_TYPE
0Cash loans
2Revolving loans
4Cash loans
5Cash loans
7Cash loans
\n", "
" ], "text/plain": [ " NAME_CONTRACT_TYPE\n", "0 Cash loans \n", "2 Revolving loans \n", "4 Cash loans \n", "5 Cash loans \n", "7 Cash loans " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT NAME_CONTRACT_TYPE\n", " FROM pdf_data\n", " WHERE CODE_GENDER = 'M'\n", "\"\"\")\n", "pdf_data[pdf_data[\"CODE_GENDER\"] == 'M'][[\"NAME_CONTRACT_TYPE\"]].head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT DISTINCT NAME_CONTRACT_TYPE\n", " FROM pdf_data\n", "\n" ] }, { "data": { "text/plain": [ "array(['Cash loans', 'Revolving loans'], dtype=object)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT DISTINCT NAME_CONTRACT_TYPE\n", " FROM pdf_data\n", "\"\"\")\n", "pdf_data[\"NAME_CONTRACT_TYPE\"].unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SELECT with multiple conditions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT NAME_INCOME_TYPE, CODE_GENDER, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " WHERE CODE_GENDER = 'M' AND AMT_INCOME_TOTAL > 200000.0\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAME_INCOME_TYPECODE_GENDERAMT_INCOME_TOTAL
0WorkingM202500.0
7State servantM360000.0
13WorkingM225000.0
28State servantM270000.0
33Commercial associateM360000.0
\n", "
" ], "text/plain": [ " NAME_INCOME_TYPE CODE_GENDER AMT_INCOME_TOTAL\n", "0 Working M 202500.0 \n", "7 State servant M 360000.0 \n", "13 Working M 225000.0 \n", "28 State servant M 270000.0 \n", "33 Commercial associate M 360000.0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT NAME_INCOME_TYPE, CODE_GENDER, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " WHERE CODE_GENDER = 'M' AND AMT_INCOME_TOTAL > 200000.0\n", "\"\"\")\n", "\n", "condition = (pdf_data[\"CODE_GENDER\"] == 'M') & (pdf_data[\"AMT_INCOME_TOTAL\"] > 200000.0)\n", "pdf_data[condition][[\"NAME_INCOME_TYPE\", \"CODE_GENDER\", \"AMT_INCOME_TOTAL\"]].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# ORDER BY" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY AMT_INCOME_TOTAL\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAME_INCOME_TYPEAMT_INCOME_TOTAL
1678Working25650.0
20727Pensioner25650.0
240137Pensioner26100.0
186643Pensioner26100.0
246104Pensioner26100.0
\n", "
" ], "text/plain": [ " NAME_INCOME_TYPE AMT_INCOME_TOTAL\n", "1678 Working 25650.0 \n", "20727 Pensioner 25650.0 \n", "240137 Pensioner 26100.0 \n", "186643 Pensioner 26100.0 \n", "246104 Pensioner 26100.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY AMT_INCOME_TOTAL\n", "\"\"\")\n", "\n", "pdf_data[[\"NAME_INCOME_TYPE\", \"AMT_INCOME_TOTAL\"]].sort_values(\"AMT_INCOME_TOTAL\").head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY AMT_INCOME_TOTAL DESC\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAME_INCOME_TYPEAMT_INCOME_TOTAL
12840Working1.1700e+08
203693Commercial associate1.8000e+07
246858Commercial associate1.3500e+07
77768Working9.0000e+06
131127Working6.7500e+06
\n", "
" ], "text/plain": [ " NAME_INCOME_TYPE AMT_INCOME_TOTAL\n", "12840 Working 1.1700e+08 \n", "203693 Commercial associate 1.8000e+07 \n", "246858 Commercial associate 1.3500e+07 \n", "77768 Working 9.0000e+06 \n", "131127 Working 6.7500e+06 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY AMT_INCOME_TOTAL DESC\n", "\"\"\")\n", "\n", "pdf_data[[\"NAME_INCOME_TYPE\", \"AMT_INCOME_TOTAL\"]].sort_values(\"AMT_INCOME_TOTAL\", ascending=False).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# IN… NOT IN" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT *\n", " FROM pdf_data\n", " WHERE SK_ID_CURR IN (100002, 100010, 100011)\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5351000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0188-9461-637-3648.0-2120NaN110110Laborers1.022WEDNESDAY10000000Business Entity Type 30.08300.26290.13940.02470.03690.97220.61920.01430.00.0690.08330.1250.03690.02020.0190.00.00.02520.03830.97220.63410.01440.00.0690.08330.1250.03770.0220.01980.00.00.0250.03690.97220.62430.01440.00.0690.08330.1250.03750.02050.01930.00.0reg oper accountblock of flats0.0149Stone, brickNo2.02.02.02.0-1134.0010000000000000000000.00.00.00.00.01.0
71000100Cash loansMYY0360000.01530000.042075.01530000.0UnaccompaniedState servantHigher educationMarriedHouse / apartment0.0031-18850-449-4597.0-23798.0111100Managers2.033MONDAY16000011OtherNaN0.71430.5407NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.00.02.00.0-1070.0010000000000000000000.00.00.00.00.00.0
81000110Cash loansFNY0112500.01019610.033826.5913500.0ChildrenPensionerSecondary / secondary specialMarriedHouse / apartment0.0186-20099365243-7427.0-3514NaN100100NaN2.022WEDNESDAY14000000XNA0.58730.20570.7517NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.00.01.00.00.0010000000000000000000.00.00.00.00.01.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0188 -9461 -637 -3648.0 -2120 NaN 1 1 0 1 1 0 Laborers 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 Business Entity Type 3 0.0830 0.2629 0.1394 0.0247 0.0369 0.9722 0.6192 0.0143 0.0 0.069 0.0833 0.125 0.0369 0.0202 0.019 0.0 0.0 0.0252 0.0383 0.9722 0.6341 0.0144 0.0 0.069 0.0833 0.125 0.0377 0.022 0.0198 0.0 0.0 0.025 0.0369 0.9722 0.6243 0.0144 0.0 0.069 0.0833 0.125 0.0375 0.0205 0.0193 0.0 0.0 reg oper account block of flats 0.0149 Stone, brick No 2.0 2.0 2.0 2.0 -1134.0 0 1 0 0 0 0 0 0 0 0 \n", "7 100010 0 Cash loans M Y Y 0 360000.0 1530000.0 42075.0 1530000.0 Unaccompanied State servant Higher education Married House / apartment 0.0031 -18850 -449 -4597.0 -2379 8.0 1 1 1 1 0 0 Managers 2.0 3 3 MONDAY 16 0 0 0 0 1 1 Other NaN 0.7143 0.5407 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 2.0 0.0 -1070.0 0 1 0 0 0 0 0 0 0 0 \n", "8 100011 0 Cash loans F N Y 0 112500.0 1019610.0 33826.5 913500.0 Children Pensioner Secondary / secondary special Married House / apartment 0.0186 -20099 365243 -7427.0 -3514 NaN 1 0 0 1 0 0 NaN 2.0 2 2 WEDNESDAY 14 0 0 0 0 0 0 XNA 0.5873 0.2057 0.7517 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0 0.0 1.0 0.0 0.0 0 1 0 0 0 0 0 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "7 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "8 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT *\n", " FROM pdf_data\n", " WHERE SK_ID_CURR IN (100002, 100010, 100011)\n", "\"\"\")\n", "\n", "condition = pdf_data[\"SK_ID_CURR\"].isin([100002, 100010, 100011])\n", "pdf_data[condition].head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT *\n", " FROM pdf_data\n", " WHERE SK_ID_CURR NOT IN (100002, 100010, 100011)\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
11000030Cash loansFNN0270000.01293502.535698.51129500.0FamilyState servantHigher educationMarriedHouse / apartment0.0035-16765-1188-1186.0-291NaN110110Core staff2.011MONDAY11000000School0.31130.6222NaN0.09590.05290.98510.7960.06050.080.03450.29170.33330.0130.07730.05490.00390.00980.09240.05380.98510.8040.04970.08060.03450.29170.33330.01280.0790.05540.00.00.09680.05290.98510.79870.06080.080.03450.29170.33330.01320.07870.05580.00390.01reg oper accountblock of flats0.0714BlockNo1.00.01.00.0-828.0010000000000000000000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0135000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0100-19046-225-4260.0-253126.0111110Laborers1.022MONDAY9000000GovernmentNaN0.55590.7296NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-815.0000000000000000000000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5297000.0UnaccompaniedWorkingSecondary / secondary specialCivil marriageHouse / apartment0.0080-19005-3039-9833.0-2437NaN110100Laborers2.022WEDNESDAY17000000Business Entity Type 3NaN0.6504NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.00.02.00.0-617.001000000000000000000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5513000.0UnaccompaniedWorkingSecondary / secondary specialSingle / not marriedHouse / apartment0.0287-19932-3038-4311.0-3458NaN110100Core staff1.022THURSDAY11000011ReligionNaN0.3227NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-1106.0000000100000000000000.00.00.00.00.00.0
51000080Cash loansMNY099000.0490495.527517.5454500.0Spouse, partnerState servantSecondary / secondary specialMarriedHouse / apartment0.0358-16941-1588-4970.0-477NaN111110Laborers2.022WEDNESDAY16000000OtherNaN0.35420.6212NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-2536.0010000000000000000000.00.00.00.01.01.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.0035 -16765 -1188 -1186.0 -291 NaN 1 1 0 1 1 0 Core staff 2.0 1 1 MONDAY 11 0 0 0 0 0 0 School 0.3113 0.6222 NaN 0.0959 0.0529 0.9851 0.796 0.0605 0.08 0.0345 0.2917 0.3333 0.013 0.0773 0.0549 0.0039 0.0098 0.0924 0.0538 0.9851 0.804 0.0497 0.0806 0.0345 0.2917 0.3333 0.0128 0.079 0.0554 0.0 0.0 0.0968 0.0529 0.9851 0.7987 0.0608 0.08 0.0345 0.2917 0.3333 0.0132 0.0787 0.0558 0.0039 0.01 reg oper account block of flats 0.0714 Block No 1.0 0.0 1.0 0.0 -828.0 0 1 0 0 0 0 0 0 0 0 \n", "2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0100 -19046 -225 -4260.0 -2531 26.0 1 1 1 1 1 0 Laborers 1.0 2 2 MONDAY 9 0 0 0 0 0 0 Government NaN 0.5559 0.7296 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0 0 0 0 0 0 0 0 0 0 \n", "3 100006 0 Cash loans F N Y 0 135000.0 312682.5 29686.5 297000.0 Unaccompanied Working Secondary / secondary special Civil marriage House / apartment 0.0080 -19005 -3039 -9833.0 -2437 NaN 1 1 0 1 0 0 Laborers 2.0 2 2 WEDNESDAY 17 0 0 0 0 0 0 Business Entity Type 3 NaN 0.6504 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 2.0 0.0 -617.0 0 1 0 0 0 0 0 0 0 0 \n", "4 100007 0 Cash loans M N Y 0 121500.0 513000.0 21865.5 513000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.0287 -19932 -3038 -4311.0 -3458 NaN 1 1 0 1 0 0 Core staff 1.0 2 2 THURSDAY 11 0 0 0 0 1 1 Religion NaN 0.3227 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -1106.0 0 0 0 0 0 0 1 0 0 0 \n", "5 100008 0 Cash loans M N Y 0 99000.0 490495.5 27517.5 454500.0 Spouse, partner State servant Secondary / secondary special Married House / apartment 0.0358 -16941 -1588 -4970.0 -477 NaN 1 1 1 1 1 0 Laborers 2.0 2 2 WEDNESDAY 16 0 0 0 0 0 0 Other NaN 0.3542 0.6212 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -2536.0 0 1 0 0 0 0 0 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "1 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN \n", "4 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "5 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 1.0 1.0 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT *\n", " FROM pdf_data\n", " WHERE SK_ID_CURR NOT IN (100002, 100010, 100011)\n", "\"\"\")\n", "\n", "condition = ~pdf_data[\"SK_ID_CURR\"].isin([100002, 100010, 100011])\n", "pdf_data[condition].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# GROUP BY, COUNT, ORDER BY" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT CODE_GENDER, COUNT(TARGET)\n", " FROM pdf_data\n", " GROUP BY CODE_GENDER\n", " ORDER BY NUM_TARGET\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TARGET
CODE_GENDER
XNA4
M105059
F202448
\n", "
" ], "text/plain": [ " TARGET\n", "CODE_GENDER \n", "XNA 4 \n", "M 105059\n", "F 202448" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT CODE_GENDER, COUNT(TARGET)\n", " FROM pdf_data\n", " GROUP BY CODE_GENDER\n", " ORDER BY NUM_TARGET\n", "\"\"\")\n", "\n", "pdf_data.groupby(\"CODE_GENDER\").agg({\"TARGET\": \"count\"}).sort_values(\"TARGET\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Top N records" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY NUM_TARGET DESC\n", " LIMIT 5\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
128401149671Cash loansFNY11.1700e+08562491.026194.5454500.0UnaccompaniedWorkingSecondary / secondary specialMarriedHouse / apartment0.0106-12615-922-6762.0-3643NaN110100Laborers3.022TUESDAY14000000Business Entity Type 30.46080.11320.14550.10310.09470.9791NaNNaN0.00.20690.1667NaN0.0688NaN0.0615NaNNaN0.1050.09830.9791NaNNaN0.00.20690.1667NaN0.0704NaN0.0641NaNNaN0.10410.09470.9791NaNNaN0.00.20690.1667NaN0.07NaN0.0626NaNNaNNaNblock of flats0.0715Stone, brickNo0.00.00.00.00.0010000000000000000000.00.00.00.00.01.0
2036933361470Cash loansMYY21.8000e+07675000.069295.5675000.0UnaccompaniedCommercial associateSecondary / secondary specialMarriedHouse / apartment0.0308-15704-4961-3338.0-47287.0110100NaN4.022THURSDAY22000000Business Entity Type 30.54340.7886NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.00.01.00.0-1133.001000000000000000000NaNNaNNaNNaNNaNNaN
2468583856740Cash loansMYY01.3500e+071400503.5130945.51368000.0UnaccompaniedCommercial associateHigher educationMarriedHouse / apartment0.0308-13551-280-3953.0-497210.0111100NaN2.022SUNDAY12000000Business Entity Type 3NaN0.7135NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-13.000000001000000000000NaNNaNNaNNaNNaNNaN
777681901600Cash loansFYN09.0000e+061431531.0132601.51377000.0UnaccompaniedWorkingHigher educationCivil marriageHouse / apartment0.0101-16425-8476-7276.0-16568.0110100Managers2.022FRIDAY13000000Business Entity Type 10.65240.50410.4330NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.00.01.00.0-3.0000000100000000000000.00.00.00.00.00.0
1311272520840Cash loansMYN06.7500e+06790830.052978.5675000.0UnaccompaniedWorkingHigher educationMarriedHouse / apartment0.0095-19341-443-7414.0-28868.0111110Laborers2.022TUESDAY14011011Transport: type 4NaN0.55280.4957NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-695.0000000100000000000000.00.01.00.00.04.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "12840 114967 1 Cash loans F N Y 1 1.1700e+08 562491.0 26194.5 454500.0 Unaccompanied Working Secondary / secondary special Married House / apartment 0.0106 -12615 -922 -6762.0 -3643 NaN 1 1 0 1 0 0 Laborers 3.0 2 2 TUESDAY 14 0 0 0 0 0 0 Business Entity Type 3 0.4608 0.1132 0.1455 0.1031 0.0947 0.9791 NaN NaN 0.0 0.2069 0.1667 NaN 0.0688 NaN 0.0615 NaN NaN 0.105 0.0983 0.9791 NaN NaN 0.0 0.2069 0.1667 NaN 0.0704 NaN 0.0641 NaN NaN 0.1041 0.0947 0.9791 NaN NaN 0.0 0.2069 0.1667 NaN 0.07 NaN 0.0626 NaN NaN NaN block of flats 0.0715 Stone, brick No 0.0 0.0 0.0 0.0 0.0 0 1 0 0 0 0 0 0 0 0 \n", "203693 336147 0 Cash loans M Y Y 2 1.8000e+07 675000.0 69295.5 675000.0 Unaccompanied Commercial associate Secondary / secondary special Married House / apartment 0.0308 -15704 -4961 -3338.0 -4728 7.0 1 1 0 1 0 0 NaN 4.0 2 2 THURSDAY 22 0 0 0 0 0 0 Business Entity Type 3 0.5434 0.7886 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0 0.0 1.0 0.0 -1133.0 0 1 0 0 0 0 0 0 0 0 \n", "246858 385674 0 Cash loans M Y Y 0 1.3500e+07 1400503.5 130945.5 1368000.0 Unaccompanied Commercial associate Higher education Married House / apartment 0.0308 -13551 -280 -3953.0 -4972 10.0 1 1 1 1 0 0 NaN 2.0 2 2 SUNDAY 12 0 0 0 0 0 0 Business Entity Type 3 NaN 0.7135 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -13.0 0 0 0 0 0 0 0 1 0 0 \n", "77768 190160 0 Cash loans F Y N 0 9.0000e+06 1431531.0 132601.5 1377000.0 Unaccompanied Working Higher education Civil marriage House / apartment 0.0101 -16425 -8476 -7276.0 -1656 8.0 1 1 0 1 0 0 Managers 2.0 2 2 FRIDAY 13 0 0 0 0 0 0 Business Entity Type 1 0.6524 0.5041 0.4330 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0 0.0 1.0 0.0 -3.0 0 0 0 0 0 0 1 0 0 0 \n", "131127 252084 0 Cash loans M Y N 0 6.7500e+06 790830.0 52978.5 675000.0 Unaccompanied Working Higher education Married House / apartment 0.0095 -19341 -443 -7414.0 -2886 8.0 1 1 1 1 1 0 Laborers 2.0 2 2 TUESDAY 14 0 1 1 0 1 1 Transport: type 4 NaN 0.5528 0.4957 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -695.0 0 0 0 0 0 0 1 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "12840 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "203693 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN \n", "246858 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN \n", "77768 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "131127 0 0 0 0 0 0 0 0 0 0 0.0 0.0 1.0 0.0 0.0 4.0 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY NUM_TARGET DESC\n", " LIMIT 5\n", "\"\"\")\n", "\n", "pdf_data.nlargest(5, columns=\"AMT_INCOME_TOTAL\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY NUM_TARGET DESC\n", " LIMIT 10\n", " OFFSET 5\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITYAMT_GOODS_PRICENAME_TYPE_SUITENAME_INCOME_TYPENAME_EDUCATION_TYPENAME_FAMILY_STATUSNAME_HOUSING_TYPEREGION_POPULATION_RELATIVEDAYS_BIRTHDAYS_EMPLOYEDDAYS_REGISTRATIONDAYS_ID_PUBLISHOWN_CAR_AGEFLAG_MOBILFLAG_EMP_PHONEFLAG_WORK_PHONEFLAG_CONT_MOBILEFLAG_PHONEFLAG_EMAILOCCUPATION_TYPECNT_FAM_MEMBERSREGION_RATING_CLIENTREGION_RATING_CLIENT_W_CITYWEEKDAY_APPR_PROCESS_STARTHOUR_APPR_PROCESS_STARTREG_REGION_NOT_LIVE_REGIONREG_REGION_NOT_WORK_REGIONLIVE_REGION_NOT_WORK_REGIONREG_CITY_NOT_LIVE_CITYREG_CITY_NOT_WORK_CITYLIVE_CITY_NOT_WORK_CITYORGANIZATION_TYPEEXT_SOURCE_1EXT_SOURCE_2EXT_SOURCE_3APARTMENTS_AVGBASEMENTAREA_AVGYEARS_BEGINEXPLUATATION_AVGYEARS_BUILD_AVGCOMMONAREA_AVGELEVATORS_AVGENTRANCES_AVGFLOORSMAX_AVGFLOORSMIN_AVGLANDAREA_AVGLIVINGAPARTMENTS_AVGLIVINGAREA_AVGNONLIVINGAPARTMENTS_AVGNONLIVINGAREA_AVGAPARTMENTS_MODEBASEMENTAREA_MODEYEARS_BEGINEXPLUATATION_MODEYEARS_BUILD_MODECOMMONAREA_MODEELEVATORS_MODEENTRANCES_MODEFLOORSMAX_MODEFLOORSMIN_MODELANDAREA_MODELIVINGAPARTMENTS_MODELIVINGAREA_MODENONLIVINGAPARTMENTS_MODENONLIVINGAREA_MODEAPARTMENTS_MEDIBASEMENTAREA_MEDIYEARS_BEGINEXPLUATATION_MEDIYEARS_BUILD_MEDICOMMONAREA_MEDIELEVATORS_MEDIENTRANCES_MEDIFLOORSMAX_MEDIFLOORSMIN_MEDILANDAREA_MEDILIVINGAPARTMENTS_MEDILIVINGAREA_MEDINONLIVINGAPARTMENTS_MEDINONLIVINGAREA_MEDIFONDKAPREMONT_MODEHOUSETYPE_MODETOTALAREA_MODEWALLSMATERIAL_MODEEMERGENCYSTATE_MODEOBS_30_CNT_SOCIAL_CIRCLEDEF_30_CNT_SOCIAL_CIRCLEOBS_60_CNT_SOCIAL_CIRCLEDEF_60_CNT_SOCIAL_CIRCLEDAYS_LAST_PHONE_CHANGEFLAG_DOCUMENT_2FLAG_DOCUMENT_3FLAG_DOCUMENT_4FLAG_DOCUMENT_5FLAG_DOCUMENT_6FLAG_DOCUMENT_7FLAG_DOCUMENT_8FLAG_DOCUMENT_9FLAG_DOCUMENT_10FLAG_DOCUMENT_11FLAG_DOCUMENT_12FLAG_DOCUMENT_13FLAG_DOCUMENT_14FLAG_DOCUMENT_15FLAG_DOCUMENT_16FLAG_DOCUMENT_17FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
1030062195630Revolving loansMYY04500000.02250000.0225000.02250000.0UnaccompaniedWorkingHigher educationSingle / not marriedHouse / apartment0.0073-10778-378-4919.0-34413.0110110Managers1.022SATURDAY15011000Construction0.22640.64800.61950.06700.07280.97760.6940NaN0.000.13790.16670.12500.0000NaN0.0606NaN0.00230.06300.06490.97770.7060NaN0.00000.13790.16670.04170.0000NaN0.0564NaN0.00240.06770.07280.97760.6981NaN0.000.13790.16670.12500.0000NaN0.0617NaN0.0023reg oper accountblock of flats0.0426Stone, brickNo1.00.01.00.0-529.0000000000000000000000.00.00.00.00.02.0
1878333177480Cash loansMNN04500000.0835380.042651.0675000.0UnaccompaniedWorkingSecondary / secondary specialMarriedHouse / apartment0.0060-18715-3331-8877.0-2104NaN111100Laborers2.022FRIDAY13110110ConstructionNaN0.74880.2822NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.00.00.00.0-613.0010000000000000000000.00.00.00.00.02.0
2045643371510Cash loansMNN04500000.0450000.047749.5450000.0UnaccompaniedWorkingHigher educationSingle / not marriedHouse / apartment0.0093-18461-8472-1179.0-1997NaN110110Managers1.022TUESDAY12000000Military0.47460.7980NaN0.16080.11110.9806NaNNaN0.000.34480.1667NaN0.0741NaN0.1360NaN0.00000.16390.11530.9806NaNNaN0.00000.34480.1667NaN0.0758NaN0.1417NaN0.00000.16240.11110.9806NaNNaN0.000.34480.1667NaN0.0754NaN0.1385NaN0.0000NaNblock of flats0.1234PanelNo0.00.00.00.0-3206.001000000000000000000NaNNaNNaNNaNNaNNaN
2874634329800Cash loansMYY04500000.01755000.061132.51755000.0UnaccompaniedWorkingHigher educationMarriedHouse / apartment0.0100-18784-3618-9447.0-231511.0111100Managers2.022SATURDAY2000000Self-employed0.42950.40740.34580.10930.10090.98710.82320.04590.120.10340.33330.04170.00000.08830.07030.00390.04590.11130.10470.98710.83010.04630.12080.10340.33330.04170.00000.09640.07320.00390.04850.11030.10090.98710.82560.04620.120.10340.33330.04170.00000.08980.07150.00390.0468reg oper accountblock of flats0.0903PanelNo3.00.03.00.0-284.0010000000000000000000.00.01.00.00.00.0
1816983106010Cash loansMYY13950059.5675000.066217.5675000.0UnaccompaniedCommercial associateHigher educationMarriedWith parents0.0326-10572-3163-10078.0-6831.0111110Managers3.011MONDAY14000000Trade: type 2NaN0.59780.73100.04640.02210.9747NaNNaN0.040.03450.3333NaN0.0192NaN0.0394NaN0.00890.04730.02290.9747NaNNaN0.04030.03450.3333NaN0.0197NaN0.0411NaN0.00940.04680.02210.9747NaNNaN0.040.03450.3333NaN0.0196NaN0.0401NaN0.0090NaNblock of flats0.0329Stone, brickNo4.00.04.00.0-902.0000000000000000010000.00.00.00.00.01.0
\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ORGANIZATION_TYPE EXT_SOURCE_1 EXT_SOURCE_2 EXT_SOURCE_3 APARTMENTS_AVG BASEMENTAREA_AVG YEARS_BEGINEXPLUATATION_AVG YEARS_BUILD_AVG COMMONAREA_AVG ELEVATORS_AVG ENTRANCES_AVG FLOORSMAX_AVG FLOORSMIN_AVG LANDAREA_AVG LIVINGAPARTMENTS_AVG LIVINGAREA_AVG NONLIVINGAPARTMENTS_AVG NONLIVINGAREA_AVG APARTMENTS_MODE BASEMENTAREA_MODE YEARS_BEGINEXPLUATATION_MODE YEARS_BUILD_MODE COMMONAREA_MODE ELEVATORS_MODE ENTRANCES_MODE FLOORSMAX_MODE FLOORSMIN_MODE LANDAREA_MODE LIVINGAPARTMENTS_MODE LIVINGAREA_MODE NONLIVINGAPARTMENTS_MODE NONLIVINGAREA_MODE APARTMENTS_MEDI BASEMENTAREA_MEDI YEARS_BEGINEXPLUATATION_MEDI YEARS_BUILD_MEDI COMMONAREA_MEDI ELEVATORS_MEDI ENTRANCES_MEDI FLOORSMAX_MEDI FLOORSMIN_MEDI LANDAREA_MEDI LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 \\\n", "103006 219563 0 Revolving loans M Y Y 0 4500000.0 2250000.0 225000.0 2250000.0 Unaccompanied Working Higher education Single / not married House / apartment 0.0073 -10778 -378 -4919.0 -3441 3.0 1 1 0 1 1 0 Managers 1.0 2 2 SATURDAY 15 0 1 1 0 0 0 Construction 0.2264 0.6480 0.6195 0.0670 0.0728 0.9776 0.6940 NaN 0.00 0.1379 0.1667 0.1250 0.0000 NaN 0.0606 NaN 0.0023 0.0630 0.0649 0.9777 0.7060 NaN 0.0000 0.1379 0.1667 0.0417 0.0000 NaN 0.0564 NaN 0.0024 0.0677 0.0728 0.9776 0.6981 NaN 0.00 0.1379 0.1667 0.1250 0.0000 NaN 0.0617 NaN 0.0023 reg oper account block of flats 0.0426 Stone, brick No 1.0 0.0 1.0 0.0 -529.0 0 0 0 0 0 0 0 0 0 0 \n", "187833 317748 0 Cash loans M N N 0 4500000.0 835380.0 42651.0 675000.0 Unaccompanied Working Secondary / secondary special Married House / apartment 0.0060 -18715 -3331 -8877.0 -2104 NaN 1 1 1 1 0 0 Laborers 2.0 2 2 FRIDAY 13 1 1 0 1 1 0 Construction NaN 0.7488 0.2822 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -613.0 0 1 0 0 0 0 0 0 0 0 \n", "204564 337151 0 Cash loans M N N 0 4500000.0 450000.0 47749.5 450000.0 Unaccompanied Working Higher education Single / not married House / apartment 0.0093 -18461 -8472 -1179.0 -1997 NaN 1 1 0 1 1 0 Managers 1.0 2 2 TUESDAY 12 0 0 0 0 0 0 Military 0.4746 0.7980 NaN 0.1608 0.1111 0.9806 NaN NaN 0.00 0.3448 0.1667 NaN 0.0741 NaN 0.1360 NaN 0.0000 0.1639 0.1153 0.9806 NaN NaN 0.0000 0.3448 0.1667 NaN 0.0758 NaN 0.1417 NaN 0.0000 0.1624 0.1111 0.9806 NaN NaN 0.00 0.3448 0.1667 NaN 0.0754 NaN 0.1385 NaN 0.0000 NaN block of flats 0.1234 Panel No 0.0 0.0 0.0 0.0 -3206.0 0 1 0 0 0 0 0 0 0 0 \n", "287463 432980 0 Cash loans M Y Y 0 4500000.0 1755000.0 61132.5 1755000.0 Unaccompanied Working Higher education Married House / apartment 0.0100 -18784 -3618 -9447.0 -2315 11.0 1 1 1 1 0 0 Managers 2.0 2 2 SATURDAY 2 0 0 0 0 0 0 Self-employed 0.4295 0.4074 0.3458 0.1093 0.1009 0.9871 0.8232 0.0459 0.12 0.1034 0.3333 0.0417 0.0000 0.0883 0.0703 0.0039 0.0459 0.1113 0.1047 0.9871 0.8301 0.0463 0.1208 0.1034 0.3333 0.0417 0.0000 0.0964 0.0732 0.0039 0.0485 0.1103 0.1009 0.9871 0.8256 0.0462 0.12 0.1034 0.3333 0.0417 0.0000 0.0898 0.0715 0.0039 0.0468 reg oper account block of flats 0.0903 Panel No 3.0 0.0 3.0 0.0 -284.0 0 1 0 0 0 0 0 0 0 0 \n", "181698 310601 0 Cash loans M Y Y 1 3950059.5 675000.0 66217.5 675000.0 Unaccompanied Commercial associate Higher education Married With parents 0.0326 -10572 -3163 -10078.0 -683 1.0 1 1 1 1 1 0 Managers 3.0 1 1 MONDAY 14 0 0 0 0 0 0 Trade: type 2 NaN 0.5978 0.7310 0.0464 0.0221 0.9747 NaN NaN 0.04 0.0345 0.3333 NaN 0.0192 NaN 0.0394 NaN 0.0089 0.0473 0.0229 0.9747 NaN NaN 0.0403 0.0345 0.3333 NaN 0.0197 NaN 0.0411 NaN 0.0094 0.0468 0.0221 0.9747 NaN NaN 0.04 0.0345 0.3333 NaN 0.0196 NaN 0.0401 NaN 0.0090 NaN block of flats 0.0329 Stone, brick No 4.0 0.0 4.0 0.0 -902.0 0 0 0 0 0 0 0 0 0 0 \n", "\n", " FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "103006 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 2.0 \n", "187833 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 2.0 \n", "204564 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN \n", "287463 0 0 0 0 0 0 0 0 0 0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "181698 0 0 0 0 0 0 1 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT AMT_INCOME_TOTAL\n", " FROM pdf_data\n", " ORDER BY NUM_TARGET DESC\n", " LIMIT 10\n", " OFFSET 5\n", "\"\"\")\n", "\n", "pdf_data.nlargest(10, columns=\"AMT_INCOME_TOTAL\").tail(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Aggregate functions (MIN, MAX, MEAN)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT MAX(AMT_INCOME_TOTAL), MIN(AMT_INCOME_TOTAL), MEAN(AMT_INCOME_TOTAL)\n", " FROM pdf_data\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
maxminmean
AMT_INCOME_TOTAL1.1700e+0825650.0168797.9193
\n", "
" ], "text/plain": [ " max min mean\n", "AMT_INCOME_TOTAL 1.1700e+08 25650.0 168797.9193" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT MAX(AMT_INCOME_TOTAL), MIN(AMT_INCOME_TOTAL), MEAN(AMT_INCOME_TOTAL)\n", " FROM pdf_data\n", "\"\"\")\n", "\n", "pdf_data.agg({\"AMT_INCOME_TOTAL\": [\"max\", \"min\", \"mean\"]}).transpose()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# JOIN\n", "\n", "\n", "- (INNER) JOIN: Returns records that have matching values in both tables\n", "- LEFT (OUTER) JOIN: Return all records from the left table, and the matched records from the right table\n", "- RIGHT (OUTER) JOIN: Return all records from the right table, and the matched records from the left table\n", "- FULL (OUTER) JOIN: Return all records when there is a match in either left or right table\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT *\n", " FROM pdf1\n", " INNER JOIN pdf2\n", " ON pdf1.SK_ID_CURR = pdf2.SK_ID_CURR\n", "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRAMT_INCOME_TOTALCODE_GENDERFLAG_OWN_CAR
0100002202500.0MN
1100003270000.0FN
210000467500.0MY
3100006135000.0FN
4100007121500.0MN
\n", "
" ], "text/plain": [ " SK_ID_CURR AMT_INCOME_TOTAL CODE_GENDER FLAG_OWN_CAR\n", "0 100002 202500.0 M N \n", "1 100003 270000.0 F N \n", "2 100004 67500.0 M Y \n", "3 100006 135000.0 F N \n", "4 100007 121500.0 M N " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"\"\"\n", " SELECT *\n", " FROM pdf1\n", " INNER JOIN pdf2\n", " ON pdf1.SK_ID_CURR = pdf2.SK_ID_CURR\n", "\"\"\")\n", "\n", "pdf1 = pdf_data[[\"SK_ID_CURR\", \"AMT_INCOME_TOTAL\"]]\n", "pdf2 = pdf_data[[\"SK_ID_CURR\", \"CODE_GENDER\", \"FLAG_OWN_CAR\"]]\n", "pdf1.merge(pdf2, on=\"SK_ID_CURR\", how=\"inner\").head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# UNION ALL and UNION" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " SELECT * FROM pdf1\n", " UNION ALL\n", " SELECT * FROM pdf2\n", "\n", "('Union all:', (615022, 2))\n", "('Union:', (6, 2))\n" ] } ], "source": [ "print(\"\"\"\n", " SELECT * FROM pdf1\n", " UNION ALL\n", " SELECT * FROM pdf2\n", "\"\"\")\n", "\n", "pdf1 = pdf_data[[\"CODE_GENDER\", \"FLAG_OWN_CAR\"]]\n", "pdf2 = pdf_data[[\"CODE_GENDER\", \"FLAG_OWN_CAR\"]]\n", "\n", "print(\"Union all:\", pd.concat([pdf1, pdf2]).shape)\n", "print(\"Union:\", pd.concat([pdf1, pdf2]).drop_duplicates().shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# INSERT\n", "\n", "```\n", "INSERT INTO table_name (column1, column2, column3, ...)\n", "VALUES (value1, value2, value3, ...); \n", "```" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idname
01Harry Potter
12Ron Weasley
23Hermione Granger
\n", "
" ], "text/plain": [ " id name\n", "0 1 Harry Potter \n", "1 2 Ron Weasley \n", "2 3 Hermione Granger" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame({'id': [1, 2], 'name': ['Harry Potter', 'Ron Weasley']})\n", "df2 = pd.DataFrame({'id': [3], 'name': ['Hermione Granger']})\n", "df3 = pd.concat([df1, df2]).reset_index(drop=True)\n", "df3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# UPDATE\n", "\n", "```\n", "UPDATE table_name\n", "SET column1 = value1, column2 = value2, ...\n", "WHERE condition; \n", "```" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idname
01Harry Potter
12Ron
23Hermione Granger
\n", "
" ], "text/plain": [ " id name\n", "0 1 Harry Potter \n", "1 2 Ron \n", "2 3 Hermione Granger" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.loc[df3[\"id\"] == 2, \"name\"] = \"Ron\"\n", "df3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DELETE\n", "\n", "```\n", "DELETE FROM table_name WHERE condition;\n", "```" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idname
01Harry Potter
23Hermione Granger
\n", "
" ], "text/plain": [ " id name\n", "0 1 Harry Potter \n", "2 3 Hermione Granger" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.drop(df3[df3[\"name\"] == \"Ron\"].index, inplace=True)\n", "df3" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }