{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "loan_data = pd.read_csv('./02-loan_data.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idcredit_lines_outstandingloan_amt_outstandingtotal_debt_outstandingincomeyears_employedfico_scoredefault
0815337405221.5451933915.47122678039.3854656050
1744253251958.9287268228.75252026648.4352525721
2225607303363.0092592027.83085065866.7124646020
3488597504766.6480012501.73039774356.8834756120
4470061411345.8277181768.82618723448.3263166310
\n", "
" ], "text/plain": [ " customer_id credit_lines_outstanding loan_amt_outstanding \\\n", "0 8153374 0 5221.545193 \n", "1 7442532 5 1958.928726 \n", "2 2256073 0 3363.009259 \n", "3 4885975 0 4766.648001 \n", "4 4700614 1 1345.827718 \n", "\n", " total_debt_outstanding income years_employed fico_score default \n", "0 3915.471226 78039.38546 5 605 0 \n", "1 8228.752520 26648.43525 2 572 1 \n", "2 2027.830850 65866.71246 4 602 0 \n", "3 2501.730397 74356.88347 5 612 0 \n", "4 1768.826187 23448.32631 6 631 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loan_data.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idfico_scoredefault
081533746050
174425325721
222560736020
348859756120
447006146310
\n", "
" ], "text/plain": [ " customer_id fico_score default\n", "0 8153374 605 0\n", "1 7442532 572 1\n", "2 2256073 602 0\n", "3 4885975 612 0\n", "4 4700614 631 0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = loan_data[['customer_id', 'fico_score', 'default']]\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from scipy.optimize import minimize" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: divide by zero encountered in log\n", " LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n", "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: invalid value encountered in multiply\n", " LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n", "/Users/brhank/miniconda3/lib/python3.11/site-packages/scipy/optimize/_numdiff.py:576: RuntimeWarning: invalid value encountered in subtract\n", " df = fun(x) - f0\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Optimized Boundaries: [408. 435.5 490.5 545.5 600.9999951 656.5\n", " 711.5 766.5 822. 850. ]\n", "Rating Map: {'Rating 1': (408.0, 435.5), 'Rating 2': (435.5, 490.5), 'Rating 3': (490.5, 545.5), 'Rating 4': (545.5, 600.9999951015242), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (656.5, 711.5), 'Rating 7': (711.5, 766.5), 'Rating 8': (766.5, 822.0), 'Rating 9': (822.0, 850.0)}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:13: RuntimeWarning: invalid value encountered in divide\n", " pi = ki / ni\n" ] } ], "source": [ "# Set desired number of buckets\n", "num_buckets = 10\n", "min_score, max_score = data.fico_score.min(), data.fico_score.max()\n", "\n", "def log_likelihood(boundaries, scores, defaults):\n", " n = len(scores)\n", " ni = np.zeros_like(boundaries)\n", " ki = np.zeros_like(boundaries)\n", " for i, score in enumerate(scores):\n", " bucket = np.digitize(score, boundaries) - 1\n", " ni[bucket] += 1\n", " ki[bucket] += defaults[i]\n", " pi = ki / ni\n", " LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n", " return -LL\n", "\n", "initial_boundaries = np.linspace(min_score, max_score, num_buckets + 1)\n", "memo = {} # Top-down dynamic programming\n", "threshold = 100\n", "\n", "def optimize_boundaries(scores, defaults, min_score, max_score):\n", " if max_score - min_score <= threshold:\n", " return simple_bucketing(scores, defaults, min_score, max_score)\n", "\n", " if (min_score, max_score) in memo:\n", " return memo[(min_score, max_score)]\n", "\n", " mid = (min_score + max_score) // 2\n", " left_boundaries = optimize_boundaries(scores, defaults, min_score, mid)\n", " right_boundaries = optimize_boundaries(scores, defaults, mid, max_score)\n", "\n", " combined_boundaries = combine_boundaries(left_boundaries, right_boundaries)\n", " optimized_boundaries = minimize(log_likelihood, combined_boundaries, args=(scores, defaults)).x\n", "\n", " memo[(min_score, max_score)] = optimized_boundaries\n", " return optimized_boundaries\n", "\n", "def simple_bucketing(scores, defaults, min_score, max_score):\n", " return np.linspace(min_score, max_score, 3)\n", "\n", "def combine_boundaries(left_boundaries, right_boundaries):\n", " return np.concatenate((left_boundaries[:-1], right_boundaries[1:]))\n", "\n", "def create_rating_map(optimal_boundaries):\n", " rating_map = {}\n", " for i in range(len(optimal_boundaries) - 1):\n", " rating_map[f\"Rating {i + 1}\"] = (optimal_boundaries[i], optimal_boundaries[i + 1])\n", " return rating_map\n", "\n", "optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n", "rating_map = create_rating_map(optimal_boundaries)\n", "\n", "print(\"Optimized Boundaries:\", optimal_boundaries)\n", "print(\"Rating Map:\", rating_map)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimized Boundaries: [408. 435.5 490.5 545.5 600.9999951 656.5\n", " 711.5 766.5 822. 850. ]\n", "Rating Map: {'Rating 1': (822.0, 850.0), 'Rating 2': (766.5, 822.0), 'Rating 3': (711.5, 766.5), 'Rating 4': (656.5, 711.5), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (545.5, 600.9999951015242), 'Rating 7': (490.5, 545.5), 'Rating 8': (435.5, 490.5), 'Rating 9': (408.0, 435.5)}\n" ] } ], "source": [ "# We are told to create a rating map that maps the FICO score of the borrowers to a rating where a lower rating signifies a better credit score.\n", "\n", "def create_rating_map(optimal_boundaries):\n", " rating_map = {}\n", " l = len(optimal_boundaries)\n", " for i in range(l - 1,0,-1):\n", " rating_map[f\"Rating {l-i}\"] = (optimal_boundaries[i-1], optimal_boundaries[i])\n", " return rating_map\n", "\n", "optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n", "rating_map = create_rating_map(optimal_boundaries)\n", "\n", "print(\"Optimized Boundaries:\", optimal_boundaries)\n", "print(\"Rating Map:\", rating_map)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }