{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Credit Risk Classifications\n", "\n", "by: Keith Qu\n", "\n", "Using the data set from Kaggle's Give Me Some Credit competition, which contains 150,000 observations with 10 features and the objective of predicting whether a lendee will have a serious delinquincy (90+ days past due) within 2 years.\n", "\n", "Using the methods described, we were able to attain an AUC score of 0.866752 on the private leaderboard.\n", "\n", "Contents:\n", "\n", "" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.manifold import TSNE\n", "from scipy.stats import randint, uniform\n", "from scipy import linalg\n", "from sklearn.decomposition import PCA\n", "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", "from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split, cross_val_predict, RandomizedSearchCV\n", "from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score\n", "from sklearn.metrics import accuracy_score, r2_score, mean_squared_error\n", "import xgboost as xgb\n", "import random\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Exploration, Cleaning, Creating Features\n", "\n", "We have the following variables:\n", "\n", "