{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "loan_data = pd.read_csv('./02-loan_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>credit_lines_outstanding</th>\n",
       "      <th>loan_amt_outstanding</th>\n",
       "      <th>total_debt_outstanding</th>\n",
       "      <th>income</th>\n",
       "      <th>years_employed</th>\n",
       "      <th>fico_score</th>\n",
       "      <th>default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8153374</td>\n",
       "      <td>0</td>\n",
       "      <td>5221.545193</td>\n",
       "      <td>3915.471226</td>\n",
       "      <td>78039.38546</td>\n",
       "      <td>5</td>\n",
       "      <td>605</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7442532</td>\n",
       "      <td>5</td>\n",
       "      <td>1958.928726</td>\n",
       "      <td>8228.752520</td>\n",
       "      <td>26648.43525</td>\n",
       "      <td>2</td>\n",
       "      <td>572</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2256073</td>\n",
       "      <td>0</td>\n",
       "      <td>3363.009259</td>\n",
       "      <td>2027.830850</td>\n",
       "      <td>65866.71246</td>\n",
       "      <td>4</td>\n",
       "      <td>602</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4885975</td>\n",
       "      <td>0</td>\n",
       "      <td>4766.648001</td>\n",
       "      <td>2501.730397</td>\n",
       "      <td>74356.88347</td>\n",
       "      <td>5</td>\n",
       "      <td>612</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4700614</td>\n",
       "      <td>1</td>\n",
       "      <td>1345.827718</td>\n",
       "      <td>1768.826187</td>\n",
       "      <td>23448.32631</td>\n",
       "      <td>6</td>\n",
       "      <td>631</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  credit_lines_outstanding  loan_amt_outstanding  \\\n",
       "0      8153374                         0           5221.545193   \n",
       "1      7442532                         5           1958.928726   \n",
       "2      2256073                         0           3363.009259   \n",
       "3      4885975                         0           4766.648001   \n",
       "4      4700614                         1           1345.827718   \n",
       "\n",
       "   total_debt_outstanding       income  years_employed  fico_score  default  \n",
       "0             3915.471226  78039.38546               5         605        0  \n",
       "1             8228.752520  26648.43525               2         572        1  \n",
       "2             2027.830850  65866.71246               4         602        0  \n",
       "3             2501.730397  74356.88347               5         612        0  \n",
       "4             1768.826187  23448.32631               6         631        0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loan_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>fico_score</th>\n",
       "      <th>default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8153374</td>\n",
       "      <td>605</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7442532</td>\n",
       "      <td>572</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2256073</td>\n",
       "      <td>602</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4885975</td>\n",
       "      <td>612</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4700614</td>\n",
       "      <td>631</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id  fico_score  default\n",
       "0      8153374         605        0\n",
       "1      7442532         572        1\n",
       "2      2256073         602        0\n",
       "3      4885975         612        0\n",
       "4      4700614         631        0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = loan_data[['customer_id', 'fico_score', 'default']]\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.optimize import minimize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: divide by zero encountered in log\n",
      "  LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
      "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:14: RuntimeWarning: invalid value encountered in multiply\n",
      "  LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
      "/Users/brhank/miniconda3/lib/python3.11/site-packages/scipy/optimize/_numdiff.py:576: RuntimeWarning: invalid value encountered in subtract\n",
      "  df = fun(x) - f0\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimized Boundaries: [408.        435.5       490.5       545.5       600.9999951 656.5\n",
      " 711.5       766.5       822.        850.       ]\n",
      "Rating Map: {'Rating 1': (408.0, 435.5), 'Rating 2': (435.5, 490.5), 'Rating 3': (490.5, 545.5), 'Rating 4': (545.5, 600.9999951015242), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (656.5, 711.5), 'Rating 7': (711.5, 766.5), 'Rating 8': (766.5, 822.0), 'Rating 9': (822.0, 850.0)}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/h0/722z94dd3fb0pfv4wg3qmdkh0000gn/T/ipykernel_25445/2800205351.py:13: RuntimeWarning: invalid value encountered in divide\n",
      "  pi = ki / ni\n"
     ]
    }
   ],
   "source": [
    "# Set desired number of buckets\n",
    "num_buckets = 10\n",
    "min_score, max_score = data.fico_score.min(), data.fico_score.max()\n",
    "\n",
    "def log_likelihood(boundaries, scores, defaults):\n",
    "    n = len(scores)\n",
    "    ni = np.zeros_like(boundaries)\n",
    "    ki = np.zeros_like(boundaries)\n",
    "    for i, score in enumerate(scores):\n",
    "        bucket = np.digitize(score, boundaries) - 1\n",
    "        ni[bucket] += 1\n",
    "        ki[bucket] += defaults[i]\n",
    "    pi = ki / ni\n",
    "    LL = np.sum(ni * np.log(pi) + (ni - ki) * np.log(1 - pi))\n",
    "    return -LL\n",
    "\n",
    "initial_boundaries = np.linspace(min_score, max_score, num_buckets + 1)\n",
    "memo = {} # Top-down dynamic programming\n",
    "threshold = 100\n",
    "\n",
    "def optimize_boundaries(scores, defaults, min_score, max_score):\n",
    "    if max_score - min_score <= threshold:\n",
    "        return simple_bucketing(scores, defaults, min_score, max_score)\n",
    "\n",
    "    if (min_score, max_score) in memo:\n",
    "        return memo[(min_score, max_score)]\n",
    "\n",
    "    mid = (min_score + max_score) // 2\n",
    "    left_boundaries = optimize_boundaries(scores, defaults, min_score, mid)\n",
    "    right_boundaries = optimize_boundaries(scores, defaults, mid, max_score)\n",
    "\n",
    "    combined_boundaries = combine_boundaries(left_boundaries, right_boundaries)\n",
    "    optimized_boundaries = minimize(log_likelihood, combined_boundaries, args=(scores, defaults)).x\n",
    "\n",
    "    memo[(min_score, max_score)] = optimized_boundaries\n",
    "    return optimized_boundaries\n",
    "\n",
    "def simple_bucketing(scores, defaults, min_score, max_score):\n",
    "    return np.linspace(min_score, max_score, 3)\n",
    "\n",
    "def combine_boundaries(left_boundaries, right_boundaries):\n",
    "    return np.concatenate((left_boundaries[:-1], right_boundaries[1:]))\n",
    "\n",
    "def create_rating_map(optimal_boundaries):\n",
    "    rating_map = {}\n",
    "    for i in range(len(optimal_boundaries) - 1):\n",
    "        rating_map[f\"Rating {i + 1}\"] = (optimal_boundaries[i], optimal_boundaries[i + 1])\n",
    "    return rating_map\n",
    "\n",
    "optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n",
    "rating_map = create_rating_map(optimal_boundaries)\n",
    "\n",
    "print(\"Optimized Boundaries:\", optimal_boundaries)\n",
    "print(\"Rating Map:\", rating_map)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimized Boundaries: [408.        435.5       490.5       545.5       600.9999951 656.5\n",
      " 711.5       766.5       822.        850.       ]\n",
      "Rating Map: {'Rating 1': (822.0, 850.0), 'Rating 2': (766.5, 822.0), 'Rating 3': (711.5, 766.5), 'Rating 4': (656.5, 711.5), 'Rating 5': (600.9999951015242, 656.5), 'Rating 6': (545.5, 600.9999951015242), 'Rating 7': (490.5, 545.5), 'Rating 8': (435.5, 490.5), 'Rating 9': (408.0, 435.5)}\n"
     ]
    }
   ],
   "source": [
    "# We are told to create a rating map that maps the FICO score of the borrowers to a rating where a lower rating signifies a better credit score.\n",
    "\n",
    "def create_rating_map(optimal_boundaries):\n",
    "    rating_map = {}\n",
    "    l = len(optimal_boundaries)\n",
    "    for i in range(l - 1,0,-1):\n",
    "        rating_map[f\"Rating {l-i}\"] = (optimal_boundaries[i-1], optimal_boundaries[i])\n",
    "    return rating_map\n",
    "\n",
    "optimal_boundaries = optimize_boundaries(data['fico_score'], data['default'], min(data['fico_score']), max(data['fico_score']))\n",
    "rating_map = create_rating_map(optimal_boundaries)\n",
    "\n",
    "print(\"Optimized Boundaries:\", optimal_boundaries)\n",
    "print(\"Rating Map:\", rating_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}