{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !pip install pycaret" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !pip install pycaret[full] " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.3.0'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdPurchaseWeekofPurchaseStoreIDPriceCHPriceMMDiscCHDiscMMSpecialCHSpecialMMLoyalCHSalePriceMMSalePriceCHPriceDiffStore7PctDiscMMPctDiscCHListPriceDiffSTORE
01CH23711.751.990.000.0000.5000001.991.750.24No0.0000000.0000000.241
12CH23911.751.990.000.3010.6000001.691.75-0.06No0.1507540.0000000.241
23CH24511.862.090.170.0000.6800002.091.690.40No0.0000000.0913980.231
34MM22711.691.690.000.0000.4000001.691.690.00No0.0000000.0000000.001
45CH22871.691.690.000.0000.9565351.691.690.00Yes0.0000000.0000000.000
\n", "
" ], "text/plain": [ " Id Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM \\\n", "0 1 CH 237 1 1.75 1.99 0.00 0.0 \n", "1 2 CH 239 1 1.75 1.99 0.00 0.3 \n", "2 3 CH 245 1 1.86 2.09 0.17 0.0 \n", "3 4 MM 227 1 1.69 1.69 0.00 0.0 \n", "4 5 CH 228 7 1.69 1.69 0.00 0.0 \n", "\n", " SpecialCH SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 \\\n", "0 0 0 0.500000 1.99 1.75 0.24 No \n", "1 0 1 0.600000 1.69 1.75 -0.06 No \n", "2 0 0 0.680000 2.09 1.69 0.40 No \n", "3 0 0 0.400000 1.69 1.69 0.00 No \n", "4 0 0 0.956535 1.69 1.69 0.00 Yes \n", "\n", " PctDiscMM PctDiscCH ListPriceDiff STORE \n", "0 0.000000 0.000000 0.24 1 \n", "1 0.150754 0.000000 0.24 1 \n", "2 0.000000 0.091398 0.23 1 \n", "3 0.000000 0.000000 0.00 1 \n", "4 0.000000 0.000000 0.00 0 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.datasets import get_data\n", "data = get_data('juice')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Id int64\n", "Purchase object\n", "WeekofPurchase int64\n", "StoreID int64\n", "PriceCH float64\n", "PriceMM float64\n", "DiscCH float64\n", "DiscMM float64\n", "SpecialCH int64\n", "SpecialMM int64\n", "LoyalCH float64\n", "SalePriceMM float64\n", "SalePriceCH float64\n", "PriceDiff float64\n", "Store7 object\n", "PctDiscMM float64\n", "PctDiscCH float64\n", "ListPriceDiff float64\n", "STORE int64\n", "dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtypes" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CH 653\n", "MM 417\n", "Name: Purchase, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['Purchase'].value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Description Value
0session_id8572
1TargetPurchase
2Target TypeBinary
3Label EncodedCH: 0, MM: 1
4Original Data(1070, 19)
5Missing ValuesFalse
6Numeric Features13
7Categorical Features5
8Ordinal FeaturesFalse
9High Cardinality FeaturesFalse
10High Cardinality MethodNone
11Transformed Train Set(748, 16)
12Transformed Test Set(322, 16)
13Shuffle Train-TestTrue
14Stratify Train-TestFalse
15Fold GeneratorStratifiedKFold
16Fold Number3
17CPU Jobs-1
18Use GPUFalse
19Log ExperimentTrue
20Experiment Namedsc1
21USIdedd
22Imputation Typesimple
23Iterative Imputation IterationNone
24Numeric Imputermean
25Iterative Imputation Numeric ModelNone
26Categorical Imputerconstant
27Iterative Imputation Categorical ModelNone
28Unknown Categoricals Handlingleast_frequent
29NormalizeFalse
30Normalize MethodNone
31TransformationFalse
32Transformation MethodNone
33PCAFalse
34PCA MethodNone
35PCA ComponentsNone
36Ignore Low VarianceFalse
37Combine Rare LevelsFalse
38Rare Level ThresholdNone
39Numeric BinningFalse
40Remove OutliersFalse
41Outliers ThresholdNone
42Remove MulticollinearityFalse
43Multicollinearity ThresholdNone
44ClusteringFalse
45Clustering IterationNone
46Polynomial FeaturesFalse
47Polynomial DegreeNone
48Trignometry FeaturesFalse
49Polynomial ThresholdNone
50Group FeaturesFalse
51Feature SelectionFalse
52Feature Selection Methodclassic
53Features Selection ThresholdNone
54Feature InteractionFalse
55Feature RatioFalse
56Interaction ThresholdNone
57Fix ImbalanceFalse
58Fix Imbalance MethodSMOTE
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "2021/04/15 17:27:44 WARNING mlflow.tracking.context.git_context: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: No module named 'repository'\n" ] } ], "source": [ "from pycaret.classification import *\n", "s = setup(data, target = 'Purchase', log_experiment = True, experiment_name = 'dsc1',\n", " fold = 3)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Accuracy AUC Recall Prec. F1 Kappa MCC
00.79200.87460.68090.74420.71110.54910.5504
10.81930.91010.68820.80000.73990.60260.6066
20.84340.92400.88170.74550.80790.67720.6840
Mean0.81820.90290.75020.76320.75300.60970.6137
SD0.02100.02080.09300.02600.04060.05250.0548
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "xgboost = create_model('xgboost')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model Succesfully Deployed on AWS S3\n" ] } ], "source": [ "deploy_model(xgboost, 'dsc123', platform = 'aws', authentication = {'bucket' : 'pycaret-test'})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# add_metric()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "best = compare_models()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "asd = LogisticRegression()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "create_model(asd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr = create_model('lr')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuned_lr = tune_model(lr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuned_lr = tune_model(lr, search_library = 'optuna')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuned_lr = tune_model(lr, search_library = 'scikit-optimize')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dt = create_model('dt')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bagged_dt = ensemble_model(dt, method = 'Boosting')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bagged_dt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_model(lr, plot = 'feature')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "evaluate_model(lr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dt = create_model('dt')\n", "plot_model(dt, plot = 'boundary')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xgboost = create_model('xgboost')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "interpret_model(xgboost)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "save_model(xgboost, model_name = 'abc')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mlflow ui" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }