{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Customer retention - Thinkful final exam" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import confusion_matrix\n", "sns.set_style('darkgrid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Let's import the data and have a first look.\n", "\n", "We use the id column as our index, and try to answer some initial questions.\n", "\n", "- Are there class imbalances?\n", "- Are some features irrelevant?\n", "- Are there immediately obvious trends?\n", "- Are there problems with missing or NaN values?" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | purch_amt | \n", "gender | \n", "card_on_file | \n", "age | \n", "days_since_last_purch | \n", "loyalty | \n", "
---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "19.58 | \n", "male | \n", "no | \n", "31.0 | \n", "35.0 | \n", "False | \n", "
1 | \n", "65.16 | \n", "male | \n", "yes | \n", "23.0 | \n", "61.0 | \n", "False | \n", "
2 | \n", "40.60 | \n", "female | \n", "no | \n", "36.0 | \n", "49.0 | \n", "False | \n", "
3 | \n", "38.01 | \n", "male | \n", "yes | \n", "47.0 | \n", "57.0 | \n", "False | \n", "
4 | \n", "22.32 | \n", "female | \n", "yes | \n", "5.0 | \n", "39.0 | \n", "False | \n", "