{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PyCaret 2.0x New Features" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#pip install pycaret-nightly" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pycaret-nightly-0.26\n" ] } ], "source": [ "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdPurchaseWeekofPurchaseStoreIDPriceCHPriceMMDiscCHDiscMMSpecialCHSpecialMMLoyalCHSalePriceMMSalePriceCHPriceDiffStore7PctDiscMMPctDiscCHListPriceDiffSTORE
01CH23711.751.990.000.0000.5000001.991.750.24No0.0000000.0000000.241
12CH23911.751.990.000.3010.6000001.691.75-0.06No0.1507540.0000000.241
23CH24511.862.090.170.0000.6800002.091.690.40No0.0000000.0913980.231
34MM22711.691.690.000.0000.4000001.691.690.00No0.0000000.0000000.001
45CH22871.691.690.000.0000.9565351.691.690.00Yes0.0000000.0000000.000
\n", "
" ], "text/plain": [ " Id Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM \\\n", "0 1 CH 237 1 1.75 1.99 0.00 0.0 \n", "1 2 CH 239 1 1.75 1.99 0.00 0.3 \n", "2 3 CH 245 1 1.86 2.09 0.17 0.0 \n", "3 4 MM 227 1 1.69 1.69 0.00 0.0 \n", "4 5 CH 228 7 1.69 1.69 0.00 0.0 \n", "\n", " SpecialCH SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 \\\n", "0 0 0 0.500000 1.99 1.75 0.24 No \n", "1 0 1 0.600000 1.69 1.75 -0.06 No \n", "2 0 0 0.680000 2.09 1.69 0.40 No \n", "3 0 0 0.400000 1.69 1.69 0.00 No \n", "4 0 0 0.956535 1.69 1.69 0.00 Yes \n", "\n", " PctDiscMM PctDiscCH ListPriceDiff STORE \n", "0 0.000000 0.000000 0.24 1 \n", "1 0.150754 0.000000 0.24 1 \n", "2 0.000000 0.091398 0.23 1 \n", "3 0.000000 0.000000 0.00 1 \n", "4 0.000000 0.000000 0.00 0 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.datasets import get_data\n", "data = get_data('juice')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Setup Succesfully Completed!\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Description Value
0session_id7267
1Target TypeBinary
2Label EncodedCH: 0, MM: 1
3Original Data(1070, 19)
4Missing Values False
5Numeric Features 13
6Categorical Features 5
7Ordinal Features False
8High Cardinality Features False
9High Cardinality Method None
10Sampled Data(1070, 19)
11Transformed Train Set(748, 16)
12Transformed Test Set(322, 16)
13Numeric Imputer mean
14Categorical Imputer constant
15Normalize False
16Normalize Method None
17Transformation False
18Transformation Method None
19PCA False
20PCA Method None
21PCA Components None
22Ignore Low Variance False
23Combine Rare Levels False
24Rare Level Threshold None
25Numeric Binning False
26Remove Outliers False
27Outliers Threshold None
28Remove Multicollinearity False
29Multicollinearity Threshold None
30Clustering False
31Clustering Iteration None
32Polynomial Features False
33Polynomial Degree None
34Trignometry Features False
35Polynomial Threshold None
36Group Features False
37Feature Selection False
38Features Selection Threshold None
39Feature Interaction False
40Feature Ratio False
41Interaction Threshold None
42Fix ImbalanceFalse
43Fix Imbalance MethodSMOTE
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.classification import *\n", "clf1 = setup(data, target = 'Purchase', session_id=7267, log_experiment=True, experiment_name='pycaret2-juice')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Behavior of compare_models" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.1 Default" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy AUC Recall Prec. F1 Kappa MCC TT (Sec)
0Linear Discriminant Analysis0.83830.90830.79780.79330.79340.66070.66320.0161
1Ridge Classifier0.83560.00000.79790.78730.79070.65550.65780.0295
2Ada Boost Classifier0.82760.89130.76370.79220.77600.63600.63800.2087
3CatBoost Classifier0.82230.90260.75020.78840.76720.62380.62615.0027
4Logistic Regression0.82220.90710.74300.79270.76430.62210.62550.1138
5Gradient Boosting Classifier0.82220.89670.76370.77770.76970.62510.62610.2643
6Light Gradient Boosting Machine0.80890.88650.74690.76090.75290.59730.59820.3430
7Random Forest Classifier0.80350.86720.71630.76720.73930.58240.58470.1235
8Extreme Gradient Boosting0.80350.87760.74640.75330.74800.58720.58920.2332
9Extra Trees Classifier0.79810.84680.71590.75580.73430.57180.57340.3468
10Naive Bayes0.78590.84880.81850.69200.74930.56490.57180.0068
11Decision Tree Classifier0.75660.74290.67800.69420.68470.48690.48810.0181
12K Neighbors Classifier0.73930.78900.60980.69100.64470.44090.44540.0138
13Quadratic Discriminant Analysis0.72860.79170.68440.66990.64600.43400.45040.0121
14SVM - Linear Kernel0.56430.00000.20000.07730.11150.00000.00000.0319
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "best_model = compare_models()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,\n", " solver='svd', store_covariance=False, tol=0.0001)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.2 n_select parameter" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "top5 = compare_models(n_select=5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.1, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "w" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.3 Whitelist parameter" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy AUC Recall Prec. F1 Kappa MCC TT (Sec)
0Light Gradient Boosting Machine0.80890.88650.74690.76090.75290.59730.59820.3285
1Random Forest Classifier0.80350.86720.71630.76720.73930.58240.58470.1270
2Extreme Gradient Boosting0.80350.87760.74640.75330.74800.58720.58920.2405
3Decision Tree Classifier0.75660.74290.67800.69420.68470.48690.48810.0081
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "w = compare_models(whitelist=['dt','rf','xgboost','lightgbm'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2. Behavior of tune_model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Accuracy AUC Recall Prec. F1 Kappa MCC
00.84000.94450.72410.84000.77780.65380.6582
10.78670.86210.65520.76000.70370.53850.5421
20.82670.88080.72410.80770.76360.62740.6298
30.85330.94530.86210.78120.81970.69660.6990
40.80000.88080.65520.79170.71700.56450.5705
50.81330.92200.72410.77780.75000.60140.6023
60.82670.88300.70000.84000.76360.62860.6351
70.81330.91780.70000.80770.75000.60230.6062
80.83780.87200.75860.81480.78570.65550.6566
90.81080.90690.68970.80000.74070.59310.5971
Mean0.82090.90150.71930.80210.75720.61620.6197
SD0.01900.02850.05640.02450.03170.04400.0435
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tuned_best_model = tune_model(best_model, optimize='Precision')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuned_best_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "____" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 3. Train custom models" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from gplearn.genetic import SymbolicClassifier\n", "sc = SymbolicClassifier()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a6d18eba3cf94413bed177e05628aec3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "IntProgress(value=0, description='Processing: ', max=9)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Initiated. . . . . . . . . . . . . . . . . .12:55:14
Status. . . . . . . . . . . . . . . . . .Creating Logs
ETC. . . . . . . . . . . . . . . . . .Almost Finished
\n", "
" ], "text/plain": [ " \n", " \n", "Initiated . . . . . . . . . . . . . . . . . . 12:55:14\n", "Status . . . . . . . . . . . . . . . . . . Creating Logs\n", "ETC . . . . . . . . . . . . . . . . . . Almost Finished" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccuracyAUCRecallPrec.F1KappaMCC
00.82670.92130.72410.80770.76360.62740.6298
\n", "
" ], "text/plain": [ " Accuracy AUC Recall Prec. F1 Kappa MCC\n", "0 0.8267 0.9213 0.7241 0.8077 0.7636 0.6274 0.6298" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcreate_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfold\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\pycaret\\classification.py\u001b[0m in \u001b[0;36mcreate_model\u001b[1;34m(estimator, ensemble, method, fold, round, cross_validation, verbose, system, **kwargs)\u001b[0m\n\u001b[0;32m 2422\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'predict_proba'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2423\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Fitting Model\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2424\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mXtrain\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mytrain\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2425\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Evaluating Metrics\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2426\u001b[0m \u001b[0mpred_prob\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mXtest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\genetic.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 469\u001b[0m \u001b[0mseeds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstarts\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstarts\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 470\u001b[0m params)\n\u001b[1;32m--> 471\u001b[1;33m for i in range(n_jobs))\n\u001b[0m\u001b[0;32m 472\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 473\u001b[0m \u001b[1;31m# Reduce, maintaining order across different n_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1027\u001b[0m \u001b[1;31m# remaining jobs.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1028\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1029\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1030\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1031\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[1;34m(self, iterator)\u001b[0m\n\u001b[0;32m 845\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 846\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 847\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 848\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 849\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[1;34m(self, batch)\u001b[0m\n\u001b[0;32m 763\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 764\u001b[0m \u001b[0mjob_idx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 765\u001b[1;33m \u001b[0mjob\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 766\u001b[0m \u001b[1;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 767\u001b[0m \u001b[1;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[1;34m(self, func, callback)\u001b[0m\n\u001b[0;32m 204\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 205\u001b[0m \u001b[1;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 206\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 207\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, batch)\u001b[0m\n\u001b[0;32m 568\u001b[0m \u001b[1;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 569\u001b[0m \u001b[1;31m# arguments in memory\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 570\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 571\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 572\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 251\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 252\u001b[0m return [func(*args, **kwargs)\n\u001b[1;32m--> 253\u001b[1;33m for func, args, kwargs in self.items]\n\u001b[0m\u001b[0;32m 254\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 255\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__reduce__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 251\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 252\u001b[0m return [func(*args, **kwargs)\n\u001b[1;32m--> 253\u001b[1;33m for func, args, kwargs in self.items]\n\u001b[0m\u001b[0;32m 254\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 255\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__reduce__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\genetic.py\u001b[0m in \u001b[0;36m_parallel_evolve\u001b[1;34m(n_programs, parents, X, y, sample_weight, seeds, params)\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[0moob_sample_weight\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mindices\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 146\u001b[1;33m \u001b[0mprogram\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraw_fitness_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mprogram\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraw_fitness\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcurr_sample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 147\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmax_samples\u001b[0m \u001b[1;33m<\u001b[0m \u001b[0mn_samples\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 148\u001b[0m \u001b[1;31m# Calculate OOB fitness\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\_program.py\u001b[0m in \u001b[0;36mraw_fitness\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 461\u001b[0m \"\"\"\n\u001b[1;32m--> 462\u001b[1;33m \u001b[0my_pred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 463\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 464\u001b[0m \u001b[0my_pred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\_program.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 378\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mt\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 379\u001b[0m else t for t in apply_stack[-1][1:]]\n\u001b[1;32m--> 380\u001b[1;33m \u001b[0mintermediate_result\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunction\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mterminals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 381\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapply_stack\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 382\u001b[0m \u001b[0mapply_stack\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\functions.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 46\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 47\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\envs\\pycaret-nightly-env\\lib\\site-packages\\gplearn\\functions.py\u001b[0m in \u001b[0;36m_protected_division\u001b[1;34m(x1, x2)\u001b[0m\n\u001b[0;32m 125\u001b[0m \u001b[1;34m\"\"\"Closure of division (x1/x2) for zero denominator.\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 126\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdivide\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minvalid\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 127\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx2\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0.001\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdivide\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1.\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 128\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 129\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "sc = create_model(sc, fold=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predict_model(sc);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_model(sc, plot = 'confusion_matrix')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_model(sc, plot = 'auc')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_model(sc, plot = 'threshold')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 4. Customize parameters in create_model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Accuracy AUC Recall Prec. F1 Kappa MCC
00.78670.90780.65520.76000.70370.53850.5421
10.78670.87410.75860.70970.73330.55590.5567
20.76000.85010.72410.67740.70000.50040.5011
30.88000.91750.86210.83330.84750.74860.7489
40.82670.87030.79310.76670.77970.63690.6371
50.84000.90250.79310.79310.79310.66270.6627
60.74670.85410.66670.68970.67800.46930.4695
70.81330.92440.73330.78570.75860.60670.6077
80.82430.85060.72410.80770.76360.62450.6269
90.82430.91380.75860.78570.77190.62910.6294
Mean0.80890.88650.74690.76090.75290.59730.5982
SD0.03750.02820.05820.04940.04800.07840.0782
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lightgbm = create_model('lightgbm')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.1, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lightgbm" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "lgbms = []\n", "\n", "for i in np.arange(0.1,1,0.1):\n", " m = create_model('lightgbm', learning_rate=i, verbose=False)\n", " lgbms.append(m)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "9\n" ] } ], "source": [ "print(len(lgbms))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.1, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.2, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.30000000000000004,\n", " max_depth=-1, min_child_samples=20, min_child_weight=0.001,\n", " min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=31,\n", " objective=None, random_state=7267, reg_alpha=0.0, reg_lambda=0.0,\n", " silent=True, subsample=1.0, subsample_for_bin=200000,\n", " subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.4, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.5, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.6, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.7000000000000001,\n", " max_depth=-1, min_child_samples=20, min_child_weight=0.001,\n", " min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=31,\n", " objective=None, random_state=7267, reg_alpha=0.0, reg_lambda=0.0,\n", " silent=True, subsample=1.0, subsample_for_bin=200000,\n", " subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.8, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n", " LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n", " importance_type='split', learning_rate=0.9, max_depth=-1,\n", " min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n", " n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,\n", " random_state=7267, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n", " subsample=1.0, subsample_for_bin=200000, subsample_freq=0)]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lgbms" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "____" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 5. AutoML()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,\n", " solver='svd', store_covariance=False, tol=0.0001)\n" ] } ], "source": [ "best_model_cv = automl()\n", "print(best_model_cv)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,\n", " solver='svd', store_covariance=False, tol=0.0001)\n" ] } ], "source": [ "best_model_holdout = automl(use_holdout=True)\n", "print(best_model_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 6. MLFlow UI" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "^C\n" ] } ], "source": [ "!mlflow ui" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 7. Log plots" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Setup Succesfully Completed!\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Description Value
0session_id7267
1Target TypeBinary
2Label EncodedCH: 0, MM: 1
3Original Data(1070, 19)
4Missing Values False
5Numeric Features 13
6Categorical Features 5
7Ordinal Features False
8High Cardinality Features False
9High Cardinality Method None
10Sampled Data(1070, 19)
11Transformed Train Set(748, 16)
12Transformed Test Set(322, 16)
13Numeric Imputer mean
14Categorical Imputer constant
15Normalize False
16Normalize Method None
17Transformation False
18Transformation Method None
19PCA False
20PCA Method None
21PCA Components None
22Ignore Low Variance False
23Combine Rare Levels False
24Rare Level Threshold None
25Numeric Binning False
26Remove Outliers False
27Outliers Threshold None
28Remove Multicollinearity False
29Multicollinearity Threshold None
30Clustering False
31Clustering Iteration None
32Polynomial Features False
33Polynomial Degree None
34Trignometry Features False
35Polynomial Threshold None
36Group Features False
37Feature Selection False
38Features Selection Threshold None
39Feature Interaction False
40Feature Ratio False
41Interaction Threshold None
42Fix ImbalanceFalse
43Fix Imbalance MethodSMOTE
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.classification import *\n", "clf1 = setup(data, target = 'Purchase', session_id=7267, log_experiment=True, experiment_name='pycaret2-juice',\n", " log_plots = True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Accuracy AUC Recall Prec. F1 Kappa MCC
00.82670.94680.65520.86360.74510.61750.6310
10.76000.87780.72410.67740.70000.50040.5011
20.82670.87710.68970.83330.75470.62250.6292
30.88000.94000.89660.81250.85250.75170.7543
40.81330.88080.65520.82610.73080.59080.6001
50.80000.91380.72410.75000.73680.57560.5759
60.78670.88890.70000.75000.72410.55060.5514
70.85330.92960.83330.80650.81970.69610.6964
80.82430.89040.75860.78570.77190.62910.6294
90.85140.92610.79310.82140.80700.68620.6865
Mean0.82220.90710.74300.79270.76430.62210.6255
SD0.03300.02580.07440.05120.04560.07040.0703
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lr = create_model('lr')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "___" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 8. Logs on excel" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "#useful for remote runs like Kaggle Kernel or GitHub actions\n", "xl_logs = get_logs(save=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 9. Databricks integration" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#see example on Databricks" ] } ], "metadata": { "kernelspec": { "display_name": "pycaret-nightly-env", "language": "python", "name": "pycaret-nightly-env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 2 }