{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-09-22 16:47:19,276 - whylogs.logs - DEBUG - whylogs.logs logging -> stdout at level DEBUG\n" ] } ], "source": [ "# Just a simple convenience function to send the internal python\n", "# logs to stdout. Definitely not required\n", "from whylogs.logs import display_logging\n", "display_logging('debug')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "member_id | \n", "loan_amnt | \n", "funded_amnt | \n", "funded_amnt_inv | \n", "term | \n", "int_rate | \n", "installment | \n", "grade | \n", "sub_grade | \n", "... | \n", "hardship_payoff_balance_amount | \n", "hardship_last_payment_amount | \n", "disbursement_method | \n", "debt_settlement_flag | \n", "debt_settlement_flag_date | \n", "settlement_status | \n", "settlement_date | \n", "settlement_amount | \n", "settlement_percentage | \n", "settlement_term | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "90671227 | \n", "NaN | \n", "4800.0 | \n", "4800.0 | \n", "4800.0 | \n", "36 months | \n", "13.49 | \n", "162.87 | \n", "C | \n", "C2 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "90060135 | \n", "NaN | \n", "21600.0 | \n", "21600.0 | \n", "21600.0 | \n", "60 months | \n", "9.49 | \n", "453.54 | \n", "B | \n", "B2 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "90501423 | \n", "NaN | \n", "24200.0 | \n", "24200.0 | \n", "24200.0 | \n", "36 months | \n", "9.49 | \n", "775.09 | \n", "B | \n", "B2 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "90186302 | \n", "NaN | \n", "3600.0 | \n", "3600.0 | \n", "3600.0 | \n", "36 months | \n", "11.49 | \n", "118.70 | \n", "B | \n", "B5 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "90805192 | \n", "NaN | \n", "8000.0 | \n", "8000.0 | \n", "8000.0 | \n", "36 months | \n", "10.49 | \n", "259.99 | \n", "B | \n", "B3 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 151 columns
\n", "\n", " | column | \n", "count | \n", "null_count | \n", "bool_count | \n", "numeric_count | \n", "max | \n", "mean | \n", "min | \n", "stddev | \n", "nunique_numbers | \n", "... | \n", "ununique_str_upper | \n", "quantile_0.0000 | \n", "quantile_0.0100 | \n", "quantile_0.0500 | \n", "quantile_0.2500 | \n", "quantile_0.5000 | \n", "quantile_0.7500 | \n", "quantile_0.9500 | \n", "quantile_0.9900 | \n", "quantile_1.0000 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "num_il_tl | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "43.00 | \n", "9.834171 | \n", "0.00 | \n", "8.290657 | \n", "34.0 | \n", "... | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "1.000000 | \n", "4.000000 | \n", "7.000000 | \n", "14.000000 | \n", "28.000000 | \n", "42.000000 | \n", "43.000000 | \n", "
1 | \n", "open_acc_6m | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "8.00 | \n", "1.356784 | \n", "0.00 | \n", "1.420749 | \n", "8.0 | \n", "... | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "2.000000 | \n", "4.000000 | \n", "7.000000 | \n", "8.000000 | \n", "
2 | \n", "avg_cur_bal | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "72812.00 | \n", "13079.467337 | \n", "244.00 | \n", "14001.002777 | \n", "199.0 | \n", "... | \n", "0.0 | \n", "244.00 | \n", "425.000000 | \n", "1252.000000 | \n", "3039.000000 | \n", "8200.000000 | \n", "17591.000000 | \n", "43647.000000 | \n", "68086.000000 | \n", "72812.000000 | \n", "
3 | \n", "dti_joint | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "4.0 | \n", "20.65 | \n", "14.892500 | \n", "12.35 | \n", "3.863922 | \n", "4.0 | \n", "... | \n", "0.0 | \n", "12.35 | \n", "12.350000 | \n", "12.350000 | \n", "13.220000 | \n", "13.350000 | \n", "20.650000 | \n", "20.650000 | \n", "20.650000 | \n", "20.650000 | \n", "
4 | \n", "num_accts_ever_120_pd | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "7.00 | \n", "0.542714 | \n", "0.00 | \n", "1.229657 | \n", "8.0 | \n", "... | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "3.000000 | \n", "7.000000 | \n", "7.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
146 | \n", "sec_app_collections_12_mths_ex_med | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
147 | \n", "emp_length | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "11.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
148 | \n", "last_pymnt_amnt | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "35304.76 | \n", "5068.370452 | \n", "0.00 | \n", "7696.468449 | \n", "194.0 | \n", "... | \n", "0.0 | \n", "0.00 | \n", "7.980000 | \n", "118.699997 | \n", "334.100006 | \n", "771.229980 | \n", "7585.509766 | \n", "22287.580078 | \n", "32954.308594 | \n", "35304.761719 | \n", "
149 | \n", "total_pymnt_inv | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "52583.97 | \n", "15089.057337 | \n", "0.00 | \n", "10349.878426 | \n", "198.0 | \n", "... | \n", "0.0 | \n", "0.00 | \n", "828.900024 | \n", "2734.159912 | \n", "7149.430176 | \n", "12359.349609 | \n", "20929.970703 | \n", "35261.218750 | \n", "51942.230469 | \n", "52583.968750 | \n", "
150 | \n", "debt_settlement_flag | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "2.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
151 rows × 32 columns
\n", "\n", " | column | \n", "count | \n", "null_count | \n", "bool_count | \n", "numeric_count | \n", "max | \n", "mean | \n", "min | \n", "stddev | \n", "nunique_numbers | \n", "... | \n", "ununique_str_upper | \n", "quantile_0.0000 | \n", "quantile_0.0100 | \n", "quantile_0.0500 | \n", "quantile_0.2500 | \n", "quantile_0.5000 | \n", "quantile_0.7500 | \n", "quantile_0.9500 | \n", "quantile_0.9900 | \n", "quantile_1.0000 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "funded_amnt | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "40000.00 | \n", "16479.899497 | \n", "1000.00 | \n", "9811.384942 | \n", "79.0 | \n", "... | \n", "0.0 | \n", "1000.000000 | \n", "1000.000000 | \n", "3325.000000 | \n", "9600.00 | \n", "15000.000000 | \n", "23000.000000 | \n", "35000.000000 | \n", "40000.000000 | \n", "40000.000000 | \n", "
1 | \n", "mo_sin_rcnt_tl | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "46.00 | \n", "6.195980 | \n", "0.00 | \n", "6.649735 | \n", "26.0 | \n", "... | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.00 | \n", "4.000000 | \n", "8.000000 | \n", "21.000000 | \n", "35.000000 | \n", "46.000000 | \n", "
2 | \n", "open_il_12m | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "4.00 | \n", "0.678392 | \n", "0.00 | \n", "0.845120 | \n", "5.0 | \n", "... | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "1.000000 | \n", "2.000000 | \n", "3.000000 | \n", "4.000000 | \n", "
3 | \n", "installment | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "1300.55 | \n", "486.018090 | \n", "34.96 | \n", "283.607183 | \n", "180.0 | \n", "... | \n", "0.0 | \n", "34.959999 | \n", "36.150002 | \n", "112.139999 | \n", "271.75 | \n", "413.000000 | \n", "668.859985 | \n", "1069.439941 | \n", "1204.569946 | \n", "1300.550049 | \n", "
4 | \n", "bc_open_to_buy | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "198.0 | \n", "88250.00 | \n", "11172.843434 | \n", "0.00 | \n", "14448.281979 | \n", "194.0 | \n", "... | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "118.000000 | \n", "2011.00 | \n", "5719.000000 | \n", "15374.000000 | \n", "42950.000000 | \n", "85587.000000 | \n", "88250.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
146 | \n", "num_rev_tl_bal_gt_0 | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "199.0 | \n", "18.00 | \n", "5.979899 | \n", "0.00 | \n", "3.357428 | \n", "19.0 | \n", "... | \n", "0.0 | \n", "0.000000 | \n", "1.000000 | \n", "2.000000 | \n", "4.00 | \n", "5.000000 | \n", "8.000000 | \n", "13.000000 | \n", "17.000000 | \n", "18.000000 | \n", "
147 | \n", "last_pymnt_d | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "30.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
148 | \n", "percent_bc_gt_75 | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "198.0 | \n", "100.00 | \n", "40.382323 | \n", "0.00 | \n", "33.933261 | \n", "26.0 | \n", "... | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "7.70 | \n", "33.299999 | \n", "66.699997 | \n", "100.000000 | \n", "100.000000 | \n", "100.000000 | \n", "
149 | \n", "debt_settlement_flag | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "2.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
150 | \n", "mo_sin_old_il_acct | \n", "200.0 | \n", "0.0 | \n", "0.0 | \n", "195.0 | \n", "269.00 | \n", "127.148718 | \n", "3.00 | \n", "49.477824 | \n", "114.0 | \n", "... | \n", "0.0 | \n", "3.000000 | \n", "5.000000 | \n", "28.000000 | \n", "110.00 | \n", "132.000000 | \n", "153.000000 | \n", "209.000000 | \n", "264.000000 | \n", "269.000000 | \n", "
151 rows × 32 columns
\n", "