# PyCaret 2 Classification Example
This notebook is created using PyCaret 2.0. Last updated : 28-07-2020

In [1]:
# check version
from pycaret.utils import version
version()

pycaret-nightly-0.39


# 1. Data Repository

In [2]:
from pycaret.datasets import get_data
index = get_data('index')

Unnamed: 0,Dataset,Data Types,Default Task,Target Variable,# Instances,# Attributes,Missing Values
0,anomaly,Multivariate,Anomaly Detection,,1000,10,N
1,france,Multivariate,Association Rule Mining,"InvoiceNo, Description",8557,8,N
2,germany,Multivariate,Association Rule Mining,"InvoiceNo, Description",9495,8,N
3,bank,Multivariate,Classification (Binary),deposit,45211,17,N
4,blood,Multivariate,Classification (Binary),Class,748,5,N
5,cancer,Multivariate,Classification (Binary),Class,683,10,N
6,credit,Multivariate,Classification (Binary),default,24000,24,N
7,diabetes,Multivariate,Classification (Binary),Class variable,768,9,N
8,electrical_grid,Multivariate,Classification (Binary),stabf,10000,14,N
9,employee,Multivariate,Classification (Binary),left,14999,10,N


In [4]:
data = get_data('juice')

Unnamed: 0,Id,Purchase,WeekofPurchase,StoreID,PriceCH,PriceMM,DiscCH,DiscMM,SpecialCH,SpecialMM,LoyalCH,SalePriceMM,SalePriceCH,PriceDiff,Store7,PctDiscMM,PctDiscCH,ListPriceDiff,STORE
0,1,CH,237,1,1.75,1.99,0.0,0.0,0,0,0.5,1.99,1.75,0.24,No,0.0,0.0,0.24,1
1,2,CH,239,1,1.75,1.99,0.0,0.3,0,1,0.6,1.69,1.75,-0.06,No,0.150754,0.0,0.24,1
2,3,CH,245,1,1.86,2.09,0.17,0.0,0,0,0.68,2.09,1.69,0.4,No,0.0,0.091398,0.23,1
3,4,MM,227,1,1.69,1.69,0.0,0.0,0,0,0.4,1.69,1.69,0.0,No,0.0,0.0,0.0,1
4,5,CH,228,7,1.69,1.69,0.0,0.0,0,0,0.956535,1.69,1.69,0.0,Yes,0.0,0.0,0.0,0


# 2. Initialize Setup

In [5]:
from pycaret.classification import *
clf1 = setup(data, target = 'Purchase', session_id=123, log_experiment=False, experiment_name='bank1')

Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,123
1,Target Type,Binary
2,Label Encoded,"CH: 0, MM: 1"
3,Original Data,"(1070, 19)"
4,Missing Values,False
5,Numeric Features,13
6,Categorical Features,5
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


# 3. Compare Baseline

In [6]:
top5 = compare_models(n_select=5)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
0,Logistic Regression,0.8263,0.8959,0.7262,0.8139,0.7644,0.628,0.6338,0.0429
1,Linear Discriminant Analysis,0.8263,0.8938,0.7536,0.7938,0.7713,0.6317,0.6342,0.0089
2,Ridge Classifier,0.8236,0.0,0.7499,0.792,0.768,0.6262,0.6292,0.0045
3,Ada Boost Classifier,0.8075,0.8637,0.7053,0.7837,0.7398,0.5881,0.5924,0.078
4,Gradient Boosting Classifier,0.8062,0.8869,0.7363,0.7651,0.7479,0.5909,0.5939,0.1205
5,CatBoost Classifier,0.8049,0.8932,0.7326,0.7629,0.7457,0.5878,0.5899,3.5314
6,Extreme Gradient Boosting,0.7914,0.8716,0.7294,0.7367,0.7309,0.5609,0.5633,0.0675
7,Light Gradient Boosting Machine,0.7861,0.8806,0.7053,0.7393,0.7195,0.5471,0.5497,0.0898
8,Quadratic Discriminant Analysis,0.7621,0.824,0.6267,0.7397,0.6678,0.4863,0.5,0.006
9,Random Forest Classifier,0.7608,0.8397,0.6674,0.7124,0.6848,0.4928,0.4974,0.1142


In [7]:
top5_tuned = [tune_model(i) for i in top5]

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7467,0.8426,0.6897,0.6667,0.678,0.4693,0.4695
1,0.8267,0.9475,0.7241,0.8077,0.7636,0.6274,0.6298
2,0.76,0.8201,0.6552,0.7037,0.6786,0.4875,0.4883
3,0.8133,0.907,0.8276,0.7273,0.7742,0.6162,0.62
4,0.7867,0.8973,0.8621,0.6757,0.7576,0.572,0.5856
5,0.8133,0.8906,0.7241,0.7778,0.75,0.6014,0.6023
6,0.8133,0.8833,0.8333,0.7353,0.7812,0.6196,0.6233
7,0.84,0.9222,0.8,0.8,0.8,0.6667,0.6667
8,0.7973,0.8759,0.6897,0.7692,0.7273,0.5667,0.5689
9,0.8514,0.9111,0.7586,0.8462,0.8,0.6823,0.6849


# 4. Create Model

In [None]:
lr = create_model('lr', fold = 5)

In [None]:
dt = create_model('dt')

In [None]:
rf = create_model('rf', fold = 5)

In [None]:
models()

In [None]:
models(type='ensemble').index.tolist()

In [None]:
ensembled_models = compare_models(whitelist = models(type='ensemble').index.tolist(), fold = 3)

# 5. Tune Hyperparameters

In [None]:
tuned_lr = tune_model(lr)

In [None]:
tuned_rf = tune_model(rf)

# 6. Ensemble Model

In [None]:
bagged_dt = ensemble_model(dt)

In [None]:
boosted_dt = ensemble_model(dt, method = 'Boosting')

# 7. Blend Models

In [None]:
blender = blend_models(estimator_list = [boosted_dt, bagged_dt], method = 'soft')

# 8. Stack Models

In [None]:
stacker = stack_models(estimator_list = [boosted_dt,bagged_dt,tuned_rf], meta_model=rf)

# 9. Analyze Model

In [None]:
plot_model(rf)

In [None]:
plot_model(rf, plot = 'confusion_matrix')

In [None]:
plot_model(rf, plot = 'boundary')

In [None]:
evaluate_model(rf)

# 10. Interpret Model

In [None]:
catboost = create_model('catboost', cross_validation=False)

In [None]:
interpret_model(catboost)

In [None]:
interpret_model(catboost, plot = 'correlation')

In [None]:
interpret_model(catboost, plot = 'reason', observation = 12)

# 11. AutoML()

In [None]:
best = automl(optimize = 'Recall')
best

# 12. Predict Model

In [None]:
pred_holdouts = predict_model(lr)
pred_holdouts.head()

In [None]:
new_data = data.copy()
new_data.drop(['deposit'], axis=1, inplace=True)
predict_new = predict_model(lr, data=new_data)
predict_new.head()

# 13. Save / Load Model

In [None]:
save_model(lr, model_name='best-model')

In [None]:
loaded_bestmodel = load_model('best-model')
print(loaded_bestmodel)

In [None]:
from sklearn import set_config
set_config(display='diagram')
loaded_bestmodel[0]

In [None]:
from sklearn import set_config
set_config(display='text')

# 14. Deploy Model

In [None]:
deploy_model(lr, model_name = 'best-aws', authentication = {'bucket' : 'pycaret-test'})

# 15. Get Config / Set Config

In [None]:
X_train = get_config('X_train')
X_train.head()

In [None]:
get_config('seed')

In [None]:
from pycaret.classification import set_config
set_config('seed', 999)

In [None]:
get_config('seed')

# 16. Get System Logs

In [None]:
get_system_logs()

# 17. MLFlow UI

In [None]:
!mlflow ui

In [None]:
# to generate csv file with experiment logs
get_logs()

# End
Thank you. For more information / tutorials on PyCaret, please visit https://www.pycaret.org