{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "61ef78a3", "metadata": {}, "outputs": [], "source": [ "from pycaret.classification import *\n", "from pycaret.datasets import get_data\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "a5e8eefc", "metadata": {}, "source": [ "## Getting the dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "7e484728", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedeposit
058managementmarriedtertiaryno2143yesnounknown5may2611-10unknownno
144techniciansinglesecondaryno29yesnounknown5may1511-10unknownno
233entrepreneurmarriedsecondaryno2yesyesunknown5may761-10unknownno
347blue-collarmarriedunknownno1506yesnounknown5may921-10unknownno
433unknownsingleunknownno1nonounknown5may1981-10unknownno
\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "0 58 management married tertiary no 2143 yes no \n", "1 44 technician single secondary no 29 yes no \n", "2 33 entrepreneur married secondary no 2 yes yes \n", "3 47 blue-collar married unknown no 1506 yes no \n", "4 33 unknown single unknown no 1 no no \n", "\n", " contact day month duration campaign pdays previous poutcome deposit \n", "0 unknown 5 may 261 1 -1 0 unknown no \n", "1 unknown 5 may 151 1 -1 0 unknown no \n", "2 unknown 5 may 76 1 -1 0 unknown no \n", "3 unknown 5 may 92 1 -1 0 unknown no \n", "4 unknown 5 may 198 1 -1 0 unknown no " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#get dataset\n", "dataset = get_data('bank')" ] }, { "cell_type": "code", "execution_count": 3, "id": "c9db90e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(45211, 17)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check the shape of data\n", "dataset.shape" ] }, { "cell_type": "code", "execution_count": 5, "id": "0fb2e781", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age 0\n", "job 0\n", "marital 0\n", "education 0\n", "default 0\n", "balance 0\n", "housing 0\n", "loan 0\n", "contact 0\n", "day 0\n", "month 0\n", "duration 0\n", "campaign 0\n", "pdays 0\n", "previous 0\n", "poutcome 0\n", "deposit 0\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.isna().sum()" ] }, { "cell_type": "code", "execution_count": 6, "id": "a18b10c3", "metadata": {}, "outputs": [], "source": [ "## sample returns a random sample from an axis of the object. That would be 38,429 samples, not 45211\n", "data = dataset.sample(frac=0.85, random_state=456)" ] }, { "cell_type": "code", "execution_count": 7, "id": "cb03c457", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedeposit
886329blue-collarmarriedprimaryno25yesnounknown4jun1882-10unknownno
2268840techniciandivorcedsecondaryno237nonocellular25aug875-10unknownno
96157retiredmarriedtertiaryno906yesnounknown7may1171-10unknownno
1022945servicesmarriedprimaryno116yesnounknown11jun2873-10unknownno
2118948blue-collarmarriedprimaryno-83nonocellular14aug1363-10unknownno
......................................................
2572858blue-collardivorcedprimaryno8218yesnocellular19nov141211110failureno
3343028studentsinglesecondaryno0nonocellular20apr1851-10unknownyes
748144blue-collarsingleprimaryno1593yesnounknown29may8283-10unknownyes
459340servicesmarriedprimaryno3559yesnounknown20may1388-10unknownno
4268037managementmarriedtertiaryno0nonocellular15jan42621961otheryes
\n", "

38429 rows × 17 columns

\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "8863 29 blue-collar married primary no 25 yes no \n", "22688 40 technician divorced secondary no 237 no no \n", "961 57 retired married tertiary no 906 yes no \n", "10229 45 services married primary no 116 yes no \n", "21189 48 blue-collar married primary no -83 no no \n", "... ... ... ... ... ... ... ... ... \n", "25728 58 blue-collar divorced primary no 8218 yes no \n", "33430 28 student single secondary no 0 no no \n", "7481 44 blue-collar single primary no 1593 yes no \n", "4593 40 services married primary no 3559 yes no \n", "42680 37 management married tertiary no 0 no no \n", "\n", " contact day month duration campaign pdays previous poutcome \\\n", "8863 unknown 4 jun 188 2 -1 0 unknown \n", "22688 cellular 25 aug 87 5 -1 0 unknown \n", "961 unknown 7 may 117 1 -1 0 unknown \n", "10229 unknown 11 jun 287 3 -1 0 unknown \n", "21189 cellular 14 aug 136 3 -1 0 unknown \n", "... ... ... ... ... ... ... ... ... \n", "25728 cellular 19 nov 141 2 111 10 failure \n", "33430 cellular 20 apr 185 1 -1 0 unknown \n", "7481 unknown 29 may 828 3 -1 0 unknown \n", "4593 unknown 20 may 138 8 -1 0 unknown \n", "42680 cellular 15 jan 426 2 196 1 other \n", "\n", " deposit \n", "8863 no \n", "22688 no \n", "961 no \n", "10229 no \n", "21189 no \n", "... ... \n", "25728 no \n", "33430 yes \n", "7481 yes \n", "4593 no \n", "42680 yes \n", "\n", "[38429 rows x 17 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 8, "id": "35afe966", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedeposit
233entrepreneurmarriedsecondaryno2yesyesunknown5may761-10unknownno
433unknownsingleunknownno1nonounknown5may1981-10unknownno
1645admin.singleunknownno13yesnounknown5may981-10unknownno
4150managementmarriedsecondaryno49yesnounknown5may1802-10unknownno
5232managementmarriedtertiaryno0yesnounknown5may1791-10unknownno
......................................................
4518463retiredmarriedsecondaryno1495nonocellular16nov1381225successno
4518925servicessinglesecondaryno199nonocellular16nov1731925failureno
4520153managementmarriedtertiaryno583nonocellular17nov22611844successyes
4520771retireddivorcedprimaryno1729nonocellular17nov4562-10unknownyes
4521037entrepreneurmarriedsecondaryno2971nonocellular17nov361218811otherno
\n", "

6782 rows × 17 columns

\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "2 33 entrepreneur married secondary no 2 yes yes \n", "4 33 unknown single unknown no 1 no no \n", "16 45 admin. single unknown no 13 yes no \n", "41 50 management married secondary no 49 yes no \n", "52 32 management married tertiary no 0 yes no \n", "... ... ... ... ... ... ... ... ... \n", "45184 63 retired married secondary no 1495 no no \n", "45189 25 services single secondary no 199 no no \n", "45201 53 management married tertiary no 583 no no \n", "45207 71 retired divorced primary no 1729 no no \n", "45210 37 entrepreneur married secondary no 2971 no no \n", "\n", " contact day month duration campaign pdays previous poutcome \\\n", "2 unknown 5 may 76 1 -1 0 unknown \n", "4 unknown 5 may 198 1 -1 0 unknown \n", "16 unknown 5 may 98 1 -1 0 unknown \n", "41 unknown 5 may 180 2 -1 0 unknown \n", "52 unknown 5 may 179 1 -1 0 unknown \n", "... ... ... ... ... ... ... ... ... \n", "45184 cellular 16 nov 138 1 22 5 success \n", "45189 cellular 16 nov 173 1 92 5 failure \n", "45201 cellular 17 nov 226 1 184 4 success \n", "45207 cellular 17 nov 456 2 -1 0 unknown \n", "45210 cellular 17 nov 361 2 188 11 other \n", "\n", " deposit \n", "2 no \n", "4 no \n", "16 no \n", "41 no \n", "52 no \n", "... ... \n", "45184 no \n", "45189 no \n", "45201 yes \n", "45207 yes \n", "45210 no \n", "\n", "[6782 rows x 17 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# remove from the original dataset this random data\n", "data_unseen = dataset.drop(data.index)\n", "data_unseen" ] }, { "cell_type": "code", "execution_count": 9, "id": "31f3169f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data for Modeling: (38429, 17)\n", "Unseen Data For Predictions: (6782, 17)\n" ] } ], "source": [ "# Reseting the index of both datasets\n", "data.reset_index(inplace=True, drop=True)\n", "data_unseen.reset_index(inplace=True, drop=True)\n", "print('Data for Modeling: ' + str(data.shape))\n", "print('Unseen Data For Predictions: ' + str(data_unseen.shape))" ] }, { "cell_type": "code", "execution_count": 10, "id": "8fa0278a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0Session id321
1Targetdeposit
2Target typeBinary
3Target mappingno: 0, yes: 1
4Original data shape(38429, 17)
5Transformed data shape(38429, 49)
6Transformed train set shape(26900, 49)
7Transformed test set shape(11529, 49)
8Ordinal features3
9Numeric features7
10Categorical features9
11PreprocessTrue
12Imputation typesimple
13Numeric imputationmean
14Categorical imputationmode
15Maximum one-hot encoding25
16Encoding methodNone
17Fold GeneratorStratifiedKFold
18Fold Number10
19CPU Jobs-1
20Use GPUFalse
21Log ExperimentFalse
22Experiment Nameclf-default-name
23USIdfd7
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model_setup = setup(data=data, target='deposit', session_id=321)" ] }, { "cell_type": "markdown", "id": "a2e17f35", "metadata": {}, "source": [ "## Compare Model" ] }, { "cell_type": "code", "execution_count": 11, "id": "c5ff181d", "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelAccuracyAUCRecallPrec.F1KappaMCCTT (Sec)
catboostCatBoost Classifier0.90910.93640.48390.64950.55410.50470.51181.4950
lightgbmLight Gradient Boosting Machine0.90810.93470.48140.64320.54990.50000.50691.4960
xgboostExtreme Gradient Boosting0.90720.93090.48840.63440.55130.50050.50621.2350
gbcGradient Boosting Classifier0.90560.92620.40540.65560.50030.45140.46771.4290
rfRandom Forest Classifier0.90390.92670.36530.66220.47030.42230.44531.5270
lrLogistic Regression0.90150.90380.34750.64590.45100.40230.42612.5670
ldaLinear Discriminant Analysis0.90020.90780.44320.59870.50890.45480.46141.2640
ridgeRidge Classifier0.90000.00000.28380.67160.39820.35340.39311.4140
adaAda Boost Classifier0.89970.90930.37990.61430.46860.41680.43191.3580
etExtra Trees Classifier0.89840.90500.32480.62920.42790.37830.40361.6490
dummyDummy Classifier0.88320.50000.00000.00000.00000.00000.00001.2130
knnK Neighbors Classifier0.88190.75840.26440.48990.34320.28470.30122.0930
dtDecision Tree Classifier0.87360.70370.48200.46110.47090.39920.39961.4360
nbNaive Bayes0.85970.82500.52150.41960.46490.38530.38841.3590
qdaQuadratic Discriminant Analysis0.85860.82310.47730.42400.44300.36400.36791.4850
svmSVM - Linear Kernel0.83020.00000.20810.26740.20540.12240.13471.4500
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/69 [00:00\n" ] } ], "source": [ "print(best_model)" ] }, { "cell_type": "markdown", "id": "bddc36a6", "metadata": {}, "source": [ "## Create the Model" ] }, { "cell_type": "code", "execution_count": 13, "id": "b3dcb641", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameReferenceTurbo
ID
lrLogistic Regressionsklearn.linear_model._logistic.LogisticRegressionTrue
knnK Neighbors Classifiersklearn.neighbors._classification.KNeighborsCl...True
nbNaive Bayessklearn.naive_bayes.GaussianNBTrue
dtDecision Tree Classifiersklearn.tree._classes.DecisionTreeClassifierTrue
svmSVM - Linear Kernelsklearn.linear_model._stochastic_gradient.SGDC...True
rbfsvmSVM - Radial Kernelsklearn.svm._classes.SVCFalse
gpcGaussian Process Classifiersklearn.gaussian_process._gpc.GaussianProcessC...False
mlpMLP Classifiersklearn.neural_network._multilayer_perceptron....False
ridgeRidge Classifiersklearn.linear_model._ridge.RidgeClassifierTrue
rfRandom Forest Classifiersklearn.ensemble._forest.RandomForestClassifierTrue
qdaQuadratic Discriminant Analysissklearn.discriminant_analysis.QuadraticDiscrim...True
adaAda Boost Classifiersklearn.ensemble._weight_boosting.AdaBoostClas...True
gbcGradient Boosting Classifiersklearn.ensemble._gb.GradientBoostingClassifierTrue
ldaLinear Discriminant Analysissklearn.discriminant_analysis.LinearDiscrimina...True
etExtra Trees Classifiersklearn.ensemble._forest.ExtraTreesClassifierTrue
xgboostExtreme Gradient Boostingxgboost.sklearn.XGBClassifierTrue
lightgbmLight Gradient Boosting Machinelightgbm.sklearn.LGBMClassifierTrue
catboostCatBoost Classifiercatboost.core.CatBoostClassifierTrue
dummyDummy Classifiersklearn.dummy.DummyClassifierTrue
\n", "
" ], "text/plain": [ " Name \\\n", "ID \n", "lr Logistic Regression \n", "knn K Neighbors Classifier \n", "nb Naive Bayes \n", "dt Decision Tree Classifier \n", "svm SVM - Linear Kernel \n", "rbfsvm SVM - Radial Kernel \n", "gpc Gaussian Process Classifier \n", "mlp MLP Classifier \n", "ridge Ridge Classifier \n", "rf Random Forest Classifier \n", "qda Quadratic Discriminant Analysis \n", "ada Ada Boost Classifier \n", "gbc Gradient Boosting Classifier \n", "lda Linear Discriminant Analysis \n", "et Extra Trees Classifier \n", "xgboost Extreme Gradient Boosting \n", "lightgbm Light Gradient Boosting Machine \n", "catboost CatBoost Classifier \n", "dummy Dummy Classifier \n", "\n", " Reference Turbo \n", "ID \n", "lr sklearn.linear_model._logistic.LogisticRegression True \n", "knn sklearn.neighbors._classification.KNeighborsCl... True \n", "nb sklearn.naive_bayes.GaussianNB True \n", "dt sklearn.tree._classes.DecisionTreeClassifier True \n", "svm sklearn.linear_model._stochastic_gradient.SGDC... True \n", "rbfsvm sklearn.svm._classes.SVC False \n", "gpc sklearn.gaussian_process._gpc.GaussianProcessC... False \n", "mlp sklearn.neural_network._multilayer_perceptron.... False \n", "ridge sklearn.linear_model._ridge.RidgeClassifier True \n", "rf sklearn.ensemble._forest.RandomForestClassifier True \n", "qda sklearn.discriminant_analysis.QuadraticDiscrim... True \n", "ada sklearn.ensemble._weight_boosting.AdaBoostClas... True \n", "gbc sklearn.ensemble._gb.GradientBoostingClassifier True \n", "lda sklearn.discriminant_analysis.LinearDiscrimina... True \n", "et sklearn.ensemble._forest.ExtraTreesClassifier True \n", "xgboost xgboost.sklearn.XGBClassifier True \n", "lightgbm lightgbm.sklearn.LGBMClassifier True \n", "catboost catboost.core.CatBoostClassifier True \n", "dummy sklearn.dummy.DummyClassifier True " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "models()" ] }, { "cell_type": "code", "execution_count": 14, "id": "90173a54", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 AccuracyAUCRecallPrec.F1KappaMCC
Fold       
00.91640.93360.48410.70700.57470.53010.5418
10.90780.92760.38540.68750.49390.44760.4703
20.90040.92530.36620.62500.46180.41100.4289
30.90630.92070.42360.65200.51350.46430.4775
40.90780.93690.41080.67190.50990.46220.4793
50.90590.92680.40760.65640.50290.45410.4699
60.90110.93090.42360.61010.50000.44710.4563
70.90480.92210.37140.66860.47760.42990.4524
80.90110.91400.36510.63540.46370.41360.4329
90.90450.92400.41590.64220.50480.45460.4678
Mean0.90560.92620.40540.65560.50030.45140.4677
Std0.00440.00630.03420.02780.03030.03170.0297
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/4 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 AccuracyAUCRecallPrec.F1KappaMCC
Fold       
00.91040.93730.48410.65800.55780.50920.5167
10.90820.92950.42990.66500.52220.47400.4879
20.90450.92780.48410.61540.54190.48940.4938
30.90970.92530.51590.64030.57140.52160.5254
40.91000.94180.47130.66070.55020.50180.5106
50.90410.92410.46820.61760.53260.48030.4860
60.90590.93260.49040.62350.54900.49740.5018
70.90370.92880.46030.61970.52820.47590.4824
80.90110.92070.45400.60340.51810.46420.4701
90.90710.92830.48570.63490.55040.49960.5051
Mean0.90650.92960.47440.63380.54220.49130.4980
Std0.00300.00590.02220.02040.01590.01690.0162
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/7 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## AUC Plot\n", "\n", "plot_model(tuned_gbc, plot = 'auc')\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "92a67823", "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Consufion matrix\n", "\n", "plot_model(tuned_gbc, plot = 'confusion_matrix')" ] }, { "cell_type": "markdown", "id": "bda74342", "metadata": {}, "source": [ "## Evaluation the model\n" ] }, { "cell_type": "code", "execution_count": 21, "id": "00249ace", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "53ad94081d7142a9948bf6bb6cfb7d1d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## model performance is to use the evaluate_model()\n", "evaluate_model(tuned_gbc)" ] }, { "cell_type": "markdown", "id": "ebaf37a8", "metadata": {}, "source": [ "## Finalizing the Model" ] }, { "cell_type": "code", "execution_count": 22, "id": "ae1c129c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(memory=FastMemory(location=C:\\Users\\owner\\AppData\\Local\\Temp\\joblib),\n",
       "         steps=[('label_encoding',\n",
       "                 TransformerWrapperWithInverse(exclude=None, include=None,\n",
       "                                               transformer=LabelEncoder())),\n",
       "                ('numerical_imputer',\n",
       "                 TransformerWrapper(exclude=None,\n",
       "                                    include=['age', 'balance', 'day',\n",
       "                                             'duration', 'campaign', 'pdays',\n",
       "                                             'previous'],\n",
       "                                    transformer=SimpleImputer(add_indica...\n",
       "                                            criterion='friedman_mse', init=None,\n",
       "                                            learning_rate=0.3, loss='log_loss',\n",
       "                                            max_depth=5, max_features='sqrt',\n",
       "                                            max_leaf_nodes=None,\n",
       "                                            min_impurity_decrease=0.01,\n",
       "                                            min_samples_leaf=3,\n",
       "                                            min_samples_split=4,\n",
       "                                            min_weight_fraction_leaf=0.0,\n",
       "                                            n_estimators=80,\n",
       "                                            n_iter_no_change=None,\n",
       "                                            random_state=321, subsample=0.95,\n",
       "                                            tol=0.0001, validation_fraction=0.1,\n",
       "                                            verbose=0, warm_start=False))],\n",
       "         verbose=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(memory=FastMemory(location=C:\\Users\\owner\\AppData\\Local\\Temp\\joblib),\n", " steps=[('label_encoding',\n", " TransformerWrapperWithInverse(exclude=None, include=None,\n", " transformer=LabelEncoder())),\n", " ('numerical_imputer',\n", " TransformerWrapper(exclude=None,\n", " include=['age', 'balance', 'day',\n", " 'duration', 'campaign', 'pdays',\n", " 'previous'],\n", " transformer=SimpleImputer(add_indica...\n", " criterion='friedman_mse', init=None,\n", " learning_rate=0.3, loss='log_loss',\n", " max_depth=5, max_features='sqrt',\n", " max_leaf_nodes=None,\n", " min_impurity_decrease=0.01,\n", " min_samples_leaf=3,\n", " min_samples_split=4,\n", " min_weight_fraction_leaf=0.0,\n", " n_estimators=80,\n", " n_iter_no_change=None,\n", " random_state=321, subsample=0.95,\n", " tol=0.0001, validation_fraction=0.1,\n", " verbose=0, warm_start=False))],\n", " verbose=False)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_gbc = finalize_model(tuned_gbc)\n", "final_gbc" ] }, { "cell_type": "code", "execution_count": 23, "id": "7a4bcc6e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pipeline(memory=FastMemory(location=C:\\Users\\owner\\AppData\\Local\\Temp\\joblib),\n", " steps=[('label_encoding',\n", " TransformerWrapperWithInverse(exclude=None, include=None,\n", " transformer=LabelEncoder())),\n", " ('numerical_imputer',\n", " TransformerWrapper(exclude=None,\n", " include=['age', 'balance', 'day',\n", " 'duration', 'campaign', 'pdays',\n", " 'previous'],\n", " transformer=SimpleImputer(add_indica...\n", " criterion='friedman_mse', init=None,\n", " learning_rate=0.3, loss='log_loss',\n", " max_depth=5, max_features='sqrt',\n", " max_leaf_nodes=None,\n", " min_impurity_decrease=0.01,\n", " min_samples_leaf=3,\n", " min_samples_split=4,\n", " min_weight_fraction_leaf=0.0,\n", " n_estimators=80,\n", " n_iter_no_change=None,\n", " random_state=321, subsample=0.95,\n", " tol=0.0001, validation_fraction=0.1,\n", " verbose=0, warm_start=False))],\n", " verbose=False)\n" ] } ], "source": [ "#Final gbc parameters for deployment\n", "print(final_gbc)" ] }, { "cell_type": "markdown", "id": "36670c58", "metadata": {}, "source": [ "## Predicting with the model" ] }, { "cell_type": "code", "execution_count": 24, "id": "1295a1e6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelAccuracyAUCRecallPrec.F1KappaMCC
0Gradient Boosting Classifier0.93000.95630000.62170.6289
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedepositprediction_labelprediction_score
652842managementmarriedtertiaryno144nonocellular4mar1481874failureyesno0.5555
3403251unemployedmarriedsecondaryno636nonocellular30jan3211-10unknownnono0.9481
3075436blue-collarsinglesecondaryno2235yesnocellular20nov2872-10unknownnono0.9765
3445648servicesmarriedsecondaryno116yesnotelephone20apr704-10unknownnono0.9978
1345930unknownsingletertiaryno6836nonocellular27feb303-10unknownnono0.7563
............................................................
572856servicesmarriedsecondaryno83nonocellular27aug2611-10unknownnono0.9970
2552042blue-collardivorcedunknownno0nonocellular7jul642-10unknownnono0.9964
2623231servicesmarriedsecondaryno428yesnounknown21may2721-10unknownnono0.9913
1943430blue-collarmarriedsecondaryno664noyestelephone14may571-10unknownnono0.9900
3019255managementmarriedtertiaryno236nonocellular4aug2001-10unknownnono0.9249
\n", "

11529 rows × 19 columns

\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "6528 42 management married tertiary no 144 no no \n", "34032 51 unemployed married secondary no 636 no no \n", "30754 36 blue-collar single secondary no 2235 yes no \n", "34456 48 services married secondary no 116 yes no \n", "13459 30 unknown single tertiary no 6836 no no \n", "... ... ... ... ... ... ... ... ... \n", "5728 56 services married secondary no 83 no no \n", "25520 42 blue-collar divorced unknown no 0 no no \n", "26232 31 services married secondary no 428 yes no \n", "19434 30 blue-collar married secondary no 664 no yes \n", "30192 55 management married tertiary no 236 no no \n", "\n", " contact day month duration campaign pdays previous poutcome \\\n", "6528 cellular 4 mar 148 1 87 4 failure \n", "34032 cellular 30 jan 321 1 -1 0 unknown \n", "30754 cellular 20 nov 287 2 -1 0 unknown \n", "34456 telephone 20 apr 70 4 -1 0 unknown \n", "13459 cellular 27 feb 30 3 -1 0 unknown \n", "... ... ... ... ... ... ... ... ... \n", "5728 cellular 27 aug 26 11 -1 0 unknown \n", "25520 cellular 7 jul 64 2 -1 0 unknown \n", "26232 unknown 21 may 272 1 -1 0 unknown \n", "19434 telephone 14 may 57 1 -1 0 unknown \n", "30192 cellular 4 aug 200 1 -1 0 unknown \n", "\n", " deposit prediction_label prediction_score \n", "6528 yes no 0.5555 \n", "34032 no no 0.9481 \n", "30754 no no 0.9765 \n", "34456 no no 0.9978 \n", "13459 no no 0.7563 \n", "... ... ... ... \n", "5728 no no 0.9970 \n", "25520 no no 0.9964 \n", "26232 no no 0.9913 \n", "19434 no no 0.9900 \n", "30192 no no 0.9249 \n", "\n", "[11529 rows x 19 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_model(final_gbc)" ] }, { "cell_type": "code", "execution_count": 25, "id": "c1816ccd", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelAccuracyAUCRecallPrec.F1KappaMCC
0Gradient Boosting Classifier0.90680.93400000.50020.5059
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedepositprediction_labelprediction_score
033entrepreneurmarriedsecondaryno2yesyesunknown5may761-10unknownnono0.9993
133unknownsingleunknownno1nonounknown5may1981-10unknownnono0.9978
245admin.singleunknownno13yesnounknown5may981-10unknownnono0.9979
350managementmarriedsecondaryno49yesnounknown5may1802-10unknownnono0.9967
432managementmarriedtertiaryno0yesnounknown5may1791-10unknownnono0.9967
\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "0 33 entrepreneur married secondary no 2 yes yes \n", "1 33 unknown single unknown no 1 no no \n", "2 45 admin. single unknown no 13 yes no \n", "3 50 management married secondary no 49 yes no \n", "4 32 management married tertiary no 0 yes no \n", "\n", " contact day month duration campaign pdays previous poutcome deposit \\\n", "0 unknown 5 may 76 1 -1 0 unknown no \n", "1 unknown 5 may 198 1 -1 0 unknown no \n", "2 unknown 5 may 98 1 -1 0 unknown no \n", "3 unknown 5 may 180 2 -1 0 unknown no \n", "4 unknown 5 may 179 1 -1 0 unknown no \n", "\n", " prediction_label prediction_score \n", "0 no 0.9993 \n", "1 no 0.9978 \n", "2 no 0.9979 \n", "3 no 0.9967 \n", "4 no 0.9967 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unseen_predictions = predict_model(final_gbc, data=data_unseen)\n", "unseen_predictions.head()" ] }, { "cell_type": "markdown", "id": "1b576255", "metadata": {}, "source": [ "## Save Model " ] }, { "cell_type": "code", "execution_count": 26, "id": "c584299c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Saved\n" ] }, { "data": { "text/plain": [ "(Pipeline(memory=FastMemory(location=C:\\Users\\owner\\AppData\\Local\\Temp\\joblib),\n", " steps=[('label_encoding',\n", " TransformerWrapperWithInverse(exclude=None, include=None,\n", " transformer=LabelEncoder())),\n", " ('numerical_imputer',\n", " TransformerWrapper(exclude=None,\n", " include=['age', 'balance', 'day',\n", " 'duration', 'campaign', 'pdays',\n", " 'previous'],\n", " transformer=SimpleImputer(add_indica...\n", " criterion='friedman_mse', init=None,\n", " learning_rate=0.3, loss='log_loss',\n", " max_depth=5, max_features='sqrt',\n", " max_leaf_nodes=None,\n", " min_impurity_decrease=0.01,\n", " min_samples_leaf=3,\n", " min_samples_split=4,\n", " min_weight_fraction_leaf=0.0,\n", " n_estimators=80,\n", " n_iter_no_change=None,\n", " random_state=321, subsample=0.95,\n", " tol=0.0001, validation_fraction=0.1,\n", " verbose=0, warm_start=False))],\n", " verbose=False),\n", " './Pycaret/Final_gbc.pkl')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "save_model(final_gbc, './Pycaret/Final_gbc')" ] }, { "cell_type": "markdown", "id": "3ec75d79", "metadata": {}, "source": [ "## Load Model" ] }, { "cell_type": "code", "execution_count": 27, "id": "ec0008ee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Loaded\n" ] } ], "source": [ "saved_final_gbc = load_model('./Pycaret/Final_gbc')" ] }, { "cell_type": "code", "execution_count": 28, "id": "8b1d03d7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelAccuracyAUCRecallPrec.F1KappaMCC
0Gradient Boosting Classifier0.90680.93400000.50020.5059
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "new_prediction = predict_model(saved_final_gbc, data=data_unseen)" ] }, { "cell_type": "code", "execution_count": 29, "id": "509c9d77", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedepositprediction_labelprediction_score
033entrepreneurmarriedsecondaryno2yesyesunknown5may761-10unknownnono0.9993
133unknownsingleunknownno1nonounknown5may1981-10unknownnono0.9978
245admin.singleunknownno13yesnounknown5may981-10unknownnono0.9979
350managementmarriedsecondaryno49yesnounknown5may1802-10unknownnono0.9967
432managementmarriedtertiaryno0yesnounknown5may1791-10unknownnono0.9967
\n", "
" ], "text/plain": [ " age job marital education default balance housing loan \\\n", "0 33 entrepreneur married secondary no 2 yes yes \n", "1 33 unknown single unknown no 1 no no \n", "2 45 admin. single unknown no 13 yes no \n", "3 50 management married secondary no 49 yes no \n", "4 32 management married tertiary no 0 yes no \n", "\n", " contact day month duration campaign pdays previous poutcome deposit \\\n", "0 unknown 5 may 76 1 -1 0 unknown no \n", "1 unknown 5 may 198 1 -1 0 unknown no \n", "2 unknown 5 may 98 1 -1 0 unknown no \n", "3 unknown 5 may 180 2 -1 0 unknown no \n", "4 unknown 5 may 179 1 -1 0 unknown no \n", "\n", " prediction_label prediction_score \n", "0 no 0.9993 \n", "1 no 0.9978 \n", "2 no 0.9979 \n", "3 no 0.9967 \n", "4 no 0.9967 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_prediction.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "beeb95ab", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }