{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Credit Card Fraud Detection\n", "Jaime Avendaño\n", "\n", "This is a classification notebook that will use a Naive Bayes classifier. The data has been created through a previous PCA transformation. \n", "The data is also highly unbalanced." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import ListedColormap\n", "import seaborn as sns\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import LabelEncoder, MinMaxScaler\n", "\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.naive_bayes import MultinomialNB, GaussianNB \n", "from sklearn.metrics import classification_report, confusion_matrix\n", "\n", "from imblearn.over_sampling import SMOTE" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Time | \n", "V1 | \n", "V2 | \n", "V3 | \n", "V4 | \n", "V5 | \n", "V6 | \n", "V7 | \n", "V8 | \n", "V9 | \n", "... | \n", "V21 | \n", "V22 | \n", "V23 | \n", "V24 | \n", "V25 | \n", "V26 | \n", "V27 | \n", "V28 | \n", "Amount | \n", "Class | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.0 | \n", "-1.359807 | \n", "-0.072781 | \n", "2.536347 | \n", "1.378155 | \n", "-0.338321 | \n", "0.462388 | \n", "0.239599 | \n", "0.098698 | \n", "0.363787 | \n", "... | \n", "-0.018307 | \n", "0.277838 | \n", "-0.110474 | \n", "0.066928 | \n", "0.128539 | \n", "-0.189115 | \n", "0.133558 | \n", "-0.021053 | \n", "149.62 | \n", "0 | \n", "
| 1 | \n", "0.0 | \n", "1.191857 | \n", "0.266151 | \n", "0.166480 | \n", "0.448154 | \n", "0.060018 | \n", "-0.082361 | \n", "-0.078803 | \n", "0.085102 | \n", "-0.255425 | \n", "... | \n", "-0.225775 | \n", "-0.638672 | \n", "0.101288 | \n", "-0.339846 | \n", "0.167170 | \n", "0.125895 | \n", "-0.008983 | \n", "0.014724 | \n", "2.69 | \n", "0 | \n", "
| 2 | \n", "1.0 | \n", "-1.358354 | \n", "-1.340163 | \n", "1.773209 | \n", "0.379780 | \n", "-0.503198 | \n", "1.800499 | \n", "0.791461 | \n", "0.247676 | \n", "-1.514654 | \n", "... | \n", "0.247998 | \n", "0.771679 | \n", "0.909412 | \n", "-0.689281 | \n", "-0.327642 | \n", "-0.139097 | \n", "-0.055353 | \n", "-0.059752 | \n", "378.66 | \n", "0 | \n", "
| 3 | \n", "1.0 | \n", "-0.966272 | \n", "-0.185226 | \n", "1.792993 | \n", "-0.863291 | \n", "-0.010309 | \n", "1.247203 | \n", "0.237609 | \n", "0.377436 | \n", "-1.387024 | \n", "... | \n", "-0.108300 | \n", "0.005274 | \n", "-0.190321 | \n", "-1.175575 | \n", "0.647376 | \n", "-0.221929 | \n", "0.062723 | \n", "0.061458 | \n", "123.50 | \n", "0 | \n", "
| 4 | \n", "2.0 | \n", "-1.158233 | \n", "0.877737 | \n", "1.548718 | \n", "0.403034 | \n", "-0.407193 | \n", "0.095921 | \n", "0.592941 | \n", "-0.270533 | \n", "0.817739 | \n", "... | \n", "-0.009431 | \n", "0.798278 | \n", "-0.137458 | \n", "0.141267 | \n", "-0.206010 | \n", "0.502292 | \n", "0.219422 | \n", "0.215153 | \n", "69.99 | \n", "0 | \n", "
5 rows × 31 columns
\n", "| \n", " | time | \n", "v1 | \n", "v2 | \n", "v3 | \n", "v4 | \n", "v5 | \n", "v6 | \n", "v7 | \n", "v8 | \n", "v9 | \n", "... | \n", "v21 | \n", "v22 | \n", "v23 | \n", "v24 | \n", "v25 | \n", "v26 | \n", "v27 | \n", "v28 | \n", "amount | \n", "class | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "284807.000000 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "... | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "2.848070e+05 | \n", "284807.000000 | \n", "284807.000000 | \n", "
| mean | \n", "94813.859575 | \n", "3.919560e-15 | \n", "5.688174e-16 | \n", "-8.769071e-15 | \n", "2.782312e-15 | \n", "-1.552563e-15 | \n", "2.010663e-15 | \n", "-1.694249e-15 | \n", "-1.927028e-16 | \n", "-3.137024e-15 | \n", "... | \n", "1.537294e-16 | \n", "7.959909e-16 | \n", "5.367590e-16 | \n", "4.458112e-15 | \n", "1.453003e-15 | \n", "1.699104e-15 | \n", "-3.660161e-16 | \n", "-1.206049e-16 | \n", "88.349619 | \n", "0.001727 | \n", "
| std | \n", "47488.145955 | \n", "1.958696e+00 | \n", "1.651309e+00 | \n", "1.516255e+00 | \n", "1.415869e+00 | \n", "1.380247e+00 | \n", "1.332271e+00 | \n", "1.237094e+00 | \n", "1.194353e+00 | \n", "1.098632e+00 | \n", "... | \n", "7.345240e-01 | \n", "7.257016e-01 | \n", "6.244603e-01 | \n", "6.056471e-01 | \n", "5.212781e-01 | \n", "4.822270e-01 | \n", "4.036325e-01 | \n", "3.300833e-01 | \n", "250.120109 | \n", "0.041527 | \n", "
| min | \n", "0.000000 | \n", "-5.640751e+01 | \n", "-7.271573e+01 | \n", "-4.832559e+01 | \n", "-5.683171e+00 | \n", "-1.137433e+02 | \n", "-2.616051e+01 | \n", "-4.355724e+01 | \n", "-7.321672e+01 | \n", "-1.343407e+01 | \n", "... | \n", "-3.483038e+01 | \n", "-1.093314e+01 | \n", "-4.480774e+01 | \n", "-2.836627e+00 | \n", "-1.029540e+01 | \n", "-2.604551e+00 | \n", "-2.256568e+01 | \n", "-1.543008e+01 | \n", "0.000000 | \n", "0.000000 | \n", "
| 25% | \n", "54201.500000 | \n", "-9.203734e-01 | \n", "-5.985499e-01 | \n", "-8.903648e-01 | \n", "-8.486401e-01 | \n", "-6.915971e-01 | \n", "-7.682956e-01 | \n", "-5.540759e-01 | \n", "-2.086297e-01 | \n", "-6.430976e-01 | \n", "... | \n", "-2.283949e-01 | \n", "-5.423504e-01 | \n", "-1.618463e-01 | \n", "-3.545861e-01 | \n", "-3.171451e-01 | \n", "-3.269839e-01 | \n", "-7.083953e-02 | \n", "-5.295979e-02 | \n", "5.600000 | \n", "0.000000 | \n", "
| 50% | \n", "84692.000000 | \n", "1.810880e-02 | \n", "6.548556e-02 | \n", "1.798463e-01 | \n", "-1.984653e-02 | \n", "-5.433583e-02 | \n", "-2.741871e-01 | \n", "4.010308e-02 | \n", "2.235804e-02 | \n", "-5.142873e-02 | \n", "... | \n", "-2.945017e-02 | \n", "6.781943e-03 | \n", "-1.119293e-02 | \n", "4.097606e-02 | \n", "1.659350e-02 | \n", "-5.213911e-02 | \n", "1.342146e-03 | \n", "1.124383e-02 | \n", "22.000000 | \n", "0.000000 | \n", "
| 75% | \n", "139320.500000 | \n", "1.315642e+00 | \n", "8.037239e-01 | \n", "1.027196e+00 | \n", "7.433413e-01 | \n", "6.119264e-01 | \n", "3.985649e-01 | \n", "5.704361e-01 | \n", "3.273459e-01 | \n", "5.971390e-01 | \n", "... | \n", "1.863772e-01 | \n", "5.285536e-01 | \n", "1.476421e-01 | \n", "4.395266e-01 | \n", "3.507156e-01 | \n", "2.409522e-01 | \n", "9.104512e-02 | \n", "7.827995e-02 | \n", "77.165000 | \n", "0.000000 | \n", "
| max | \n", "172792.000000 | \n", "2.454930e+00 | \n", "2.205773e+01 | \n", "9.382558e+00 | \n", "1.687534e+01 | \n", "3.480167e+01 | \n", "7.330163e+01 | \n", "1.205895e+02 | \n", "2.000721e+01 | \n", "1.559499e+01 | \n", "... | \n", "2.720284e+01 | \n", "1.050309e+01 | \n", "2.252841e+01 | \n", "4.584549e+00 | \n", "7.519589e+00 | \n", "3.517346e+00 | \n", "3.161220e+01 | \n", "3.384781e+01 | \n", "25691.160000 | \n", "1.000000 | \n", "
8 rows × 31 columns
\n", "