{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"import numpy as np "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"loan_data = pd.read_csv('./02-loan_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" credit_lines_outstanding | \n",
" loan_amt_outstanding | \n",
" total_debt_outstanding | \n",
" income | \n",
" years_employed | \n",
" fico_score | \n",
" default | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 8153374 | \n",
" 0 | \n",
" 5221.545193 | \n",
" 3915.471226 | \n",
" 78039.38546 | \n",
" 5 | \n",
" 605 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 7442532 | \n",
" 5 | \n",
" 1958.928726 | \n",
" 8228.752520 | \n",
" 26648.43525 | \n",
" 2 | \n",
" 572 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2256073 | \n",
" 0 | \n",
" 3363.009259 | \n",
" 2027.830850 | \n",
" 65866.71246 | \n",
" 4 | \n",
" 602 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4885975 | \n",
" 0 | \n",
" 4766.648001 | \n",
" 2501.730397 | \n",
" 74356.88347 | \n",
" 5 | \n",
" 612 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4700614 | \n",
" 1 | \n",
" 1345.827718 | \n",
" 1768.826187 | \n",
" 23448.32631 | \n",
" 6 | \n",
" 631 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" customer_id credit_lines_outstanding loan_amt_outstanding \\\n",
"0 8153374 0 5221.545193 \n",
"1 7442532 5 1958.928726 \n",
"2 2256073 0 3363.009259 \n",
"3 4885975 0 4766.648001 \n",
"4 4700614 1 1345.827718 \n",
"\n",
" total_debt_outstanding income years_employed fico_score default \n",
"0 3915.471226 78039.38546 5 605 0 \n",
"1 8228.752520 26648.43525 2 572 1 \n",
"2 2027.830850 65866.71246 4 602 0 \n",
"3 2501.730397 74356.88347 5 612 0 \n",
"4 1768.826187 23448.32631 6 631 0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loan_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" customer_id | \n",
" fico_score | \n",
" default | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 8153374 | \n",
" 605 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 7442532 | \n",
" 572 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2256073 | \n",
" 602 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4885975 | \n",
" 612 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4700614 | \n",
" 631 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" customer_id fico_score default\n",
"0 8153374 605 0\n",
"1 7442532 572 1\n",
"2 2256073 602 0\n",
"3 4885975 612 0\n",
"4 4700614 631 0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = loan_data[['customer_id', 'fico_score', 'default']]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from scipy.optimize import minimize"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: divide by zero encountered in log\n",
" LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
"/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: invalid value encountered in multiply\n",
" LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
"/Users/brhank/miniconda3/lib/python3.11/site-packages/scipy/optimize/_numdiff.py:576: RuntimeWarning: invalid value encountered in subtract\n",
" df = fun(x) - f0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimized Boundaries: [408. 435.5 490.5 545.5 600.9999951 656.5\n",
" 711.5 766.5 822. 850. ]\n",
"Rating Map: {'Rating 1': (408.0, 435.5), 'Rating 2': (435.5, 490.5), 'Rating 3': (490.5, 545.5), 'Rating 4': (545.5, 600.9999951015242), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (656.5, 711.5), 'Rating 7': (711.5, 766.5), 'Rating 8': (766.5, 822.0), 'Rating 9': (822.0, 850.0)}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:13: RuntimeWarning: invalid value encountered in divide\n",
" pi = ki / ni\n"
]
}
],
"source": [
"# Set desired number of buckets\n",
"num_buckets = 10\n",
"min_score, max_score = data.fico_score.min(), data.fico_score.max()\n",
"\n",
"def log_likelihood(boundaries, scores, defaults):\n",
" n = len(scores)\n",
" ni = np.zeros_like(boundaries)\n",
" ki = np.zeros_like(boundaries)\n",
" for i, score in enumerate(scores):\n",
" bucket = np.digitize(score, boundaries) - 1\n",
" ni[bucket] += 1\n",
" ki[bucket] += defaults[i]\n",
" pi = ki / ni\n",
" LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
" return -LL\n",
"\n",
"initial_boundaries = np.linspace(min_score, max_score, num_buckets + 1)\n",
"memo = {} # Top-down dynamic programming\n",
"threshold = 100\n",
"\n",
"def optimize_boundaries(scores, defaults, min_score, max_score):\n",
" if max_score - min_score <= threshold:\n",
" return simple_bucketing(scores, defaults, min_score, max_score)\n",
"\n",
" if (min_score, max_score) in memo:\n",
" return memo[(min_score, max_score)]\n",
"\n",
" mid = (min_score + max_score) // 2\n",
" left_boundaries = optimize_boundaries(scores, defaults, min_score, mid)\n",
" right_boundaries = optimize_boundaries(scores, defaults, mid, max_score)\n",
"\n",
" combined_boundaries = combine_boundaries(left_boundaries, right_boundaries)\n",
" optimized_boundaries = minimize(log_likelihood, combined_boundaries, args=(scores, defaults)).x\n",
"\n",
" memo[(min_score, max_score)] = optimized_boundaries\n",
" return optimized_boundaries\n",
"\n",
"def simple_bucketing(scores, defaults, min_score, max_score):\n",
" return np.linspace(min_score, max_score, 3)\n",
"\n",
"def combine_boundaries(left_boundaries, right_boundaries):\n",
" return np.concatenate((left_boundaries[:-1], right_boundaries[1:]))\n",
"\n",
"def create_rating_map(optimal_boundaries):\n",
" rating_map = {}\n",
" for i in range(len(optimal_boundaries) - 1):\n",
" rating_map[f\"Rating {i + 1}\"] = (optimal_boundaries[i], optimal_boundaries[i + 1])\n",
" return rating_map\n",
"\n",
"optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n",
"rating_map = create_rating_map(optimal_boundaries)\n",
"\n",
"print(\"Optimized Boundaries:\", optimal_boundaries)\n",
"print(\"Rating Map:\", rating_map)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimized Boundaries: [408. 435.5 490.5 545.5 600.9999951 656.5\n",
" 711.5 766.5 822. 850. ]\n",
"Rating Map: {'Rating 1': (822.0, 850.0), 'Rating 2': (766.5, 822.0), 'Rating 3': (711.5, 766.5), 'Rating 4': (656.5, 711.5), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (545.5, 600.9999951015242), 'Rating 7': (490.5, 545.5), 'Rating 8': (435.5, 490.5), 'Rating 9': (408.0, 435.5)}\n"
]
}
],
"source": [
"# We are told to create a rating map that maps the FICO score of the borrowers to a rating where a lower rating signifies a better credit score.\n",
"\n",
"def create_rating_map(optimal_boundaries):\n",
" rating_map = {}\n",
" l = len(optimal_boundaries)\n",
" for i in range(l - 1,0,-1):\n",
" rating_map[f\"Rating {l-i}\"] = (optimal_boundaries[i-1], optimal_boundaries[i])\n",
" return rating_map\n",
"\n",
"optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n",
"rating_map = create_rating_map(optimal_boundaries)\n",
"\n",
"print(\"Optimized Boundaries:\", optimal_boundaries)\n",
"print(\"Rating Map:\", rating_map)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}