{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install pycaret"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install pycaret[full] "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.3.0'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pycaret.utils import version\n",
"version()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" Purchase | \n",
" WeekofPurchase | \n",
" StoreID | \n",
" PriceCH | \n",
" PriceMM | \n",
" DiscCH | \n",
" DiscMM | \n",
" SpecialCH | \n",
" SpecialMM | \n",
" LoyalCH | \n",
" SalePriceMM | \n",
" SalePriceCH | \n",
" PriceDiff | \n",
" Store7 | \n",
" PctDiscMM | \n",
" PctDiscCH | \n",
" ListPriceDiff | \n",
" STORE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" CH | \n",
" 237 | \n",
" 1 | \n",
" 1.75 | \n",
" 1.99 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0 | \n",
" 0 | \n",
" 0.500000 | \n",
" 1.99 | \n",
" 1.75 | \n",
" 0.24 | \n",
" No | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.24 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" CH | \n",
" 239 | \n",
" 1 | \n",
" 1.75 | \n",
" 1.99 | \n",
" 0.00 | \n",
" 0.3 | \n",
" 0 | \n",
" 1 | \n",
" 0.600000 | \n",
" 1.69 | \n",
" 1.75 | \n",
" -0.06 | \n",
" No | \n",
" 0.150754 | \n",
" 0.000000 | \n",
" 0.24 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" CH | \n",
" 245 | \n",
" 1 | \n",
" 1.86 | \n",
" 2.09 | \n",
" 0.17 | \n",
" 0.0 | \n",
" 0 | \n",
" 0 | \n",
" 0.680000 | \n",
" 2.09 | \n",
" 1.69 | \n",
" 0.40 | \n",
" No | \n",
" 0.000000 | \n",
" 0.091398 | \n",
" 0.23 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" MM | \n",
" 227 | \n",
" 1 | \n",
" 1.69 | \n",
" 1.69 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0 | \n",
" 0 | \n",
" 0.400000 | \n",
" 1.69 | \n",
" 1.69 | \n",
" 0.00 | \n",
" No | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.00 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" CH | \n",
" 228 | \n",
" 7 | \n",
" 1.69 | \n",
" 1.69 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0 | \n",
" 0 | \n",
" 0.956535 | \n",
" 1.69 | \n",
" 1.69 | \n",
" 0.00 | \n",
" Yes | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.00 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM \\\n",
"0 1 CH 237 1 1.75 1.99 0.00 0.0 \n",
"1 2 CH 239 1 1.75 1.99 0.00 0.3 \n",
"2 3 CH 245 1 1.86 2.09 0.17 0.0 \n",
"3 4 MM 227 1 1.69 1.69 0.00 0.0 \n",
"4 5 CH 228 7 1.69 1.69 0.00 0.0 \n",
"\n",
" SpecialCH SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 \\\n",
"0 0 0 0.500000 1.99 1.75 0.24 No \n",
"1 0 1 0.600000 1.69 1.75 -0.06 No \n",
"2 0 0 0.680000 2.09 1.69 0.40 No \n",
"3 0 0 0.400000 1.69 1.69 0.00 No \n",
"4 0 0 0.956535 1.69 1.69 0.00 Yes \n",
"\n",
" PctDiscMM PctDiscCH ListPriceDiff STORE \n",
"0 0.000000 0.000000 0.24 1 \n",
"1 0.150754 0.000000 0.24 1 \n",
"2 0.000000 0.091398 0.23 1 \n",
"3 0.000000 0.000000 0.00 1 \n",
"4 0.000000 0.000000 0.00 0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.datasets import get_data\n",
"data = get_data('juice')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Id int64\n",
"Purchase object\n",
"WeekofPurchase int64\n",
"StoreID int64\n",
"PriceCH float64\n",
"PriceMM float64\n",
"DiscCH float64\n",
"DiscMM float64\n",
"SpecialCH int64\n",
"SpecialMM int64\n",
"LoyalCH float64\n",
"SalePriceMM float64\n",
"SalePriceCH float64\n",
"PriceDiff float64\n",
"Store7 object\n",
"PctDiscMM float64\n",
"PctDiscCH float64\n",
"ListPriceDiff float64\n",
"STORE int64\n",
"dtype: object"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CH 653\n",
"MM 417\n",
"Name: Purchase, dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Purchase'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Description | Value |
\n",
" \n",
" 0 | \n",
" session_id | \n",
" 8572 | \n",
"
\n",
" \n",
" 1 | \n",
" Target | \n",
" Purchase | \n",
"
\n",
" \n",
" 2 | \n",
" Target Type | \n",
" Binary | \n",
"
\n",
" \n",
" 3 | \n",
" Label Encoded | \n",
" CH: 0, MM: 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Original Data | \n",
" (1070, 19) | \n",
"
\n",
" \n",
" 5 | \n",
" Missing Values | \n",
" False | \n",
"
\n",
" \n",
" 6 | \n",
" Numeric Features | \n",
" 13 | \n",
"
\n",
" \n",
" 7 | \n",
" Categorical Features | \n",
" 5 | \n",
"
\n",
" \n",
" 8 | \n",
" Ordinal Features | \n",
" False | \n",
"
\n",
" \n",
" 9 | \n",
" High Cardinality Features | \n",
" False | \n",
"
\n",
" \n",
" 10 | \n",
" High Cardinality Method | \n",
" None | \n",
"
\n",
" \n",
" 11 | \n",
" Transformed Train Set | \n",
" (748, 16) | \n",
"
\n",
" \n",
" 12 | \n",
" Transformed Test Set | \n",
" (322, 16) | \n",
"
\n",
" \n",
" 13 | \n",
" Shuffle Train-Test | \n",
" True | \n",
"
\n",
" \n",
" 14 | \n",
" Stratify Train-Test | \n",
" False | \n",
"
\n",
" \n",
" 15 | \n",
" Fold Generator | \n",
" StratifiedKFold | \n",
"
\n",
" \n",
" 16 | \n",
" Fold Number | \n",
" 3 | \n",
"
\n",
" \n",
" 17 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" 18 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" 19 | \n",
" Log Experiment | \n",
" True | \n",
"
\n",
" \n",
" 20 | \n",
" Experiment Name | \n",
" dsc1 | \n",
"
\n",
" \n",
" 21 | \n",
" USI | \n",
" dedd | \n",
"
\n",
" \n",
" 22 | \n",
" Imputation Type | \n",
" simple | \n",
"
\n",
" \n",
" 23 | \n",
" Iterative Imputation Iteration | \n",
" None | \n",
"
\n",
" \n",
" 24 | \n",
" Numeric Imputer | \n",
" mean | \n",
"
\n",
" \n",
" 25 | \n",
" Iterative Imputation Numeric Model | \n",
" None | \n",
"
\n",
" \n",
" 26 | \n",
" Categorical Imputer | \n",
" constant | \n",
"
\n",
" \n",
" 27 | \n",
" Iterative Imputation Categorical Model | \n",
" None | \n",
"
\n",
" \n",
" 28 | \n",
" Unknown Categoricals Handling | \n",
" least_frequent | \n",
"
\n",
" \n",
" 29 | \n",
" Normalize | \n",
" False | \n",
"
\n",
" \n",
" 30 | \n",
" Normalize Method | \n",
" None | \n",
"
\n",
" \n",
" 31 | \n",
" Transformation | \n",
" False | \n",
"
\n",
" \n",
" 32 | \n",
" Transformation Method | \n",
" None | \n",
"
\n",
" \n",
" 33 | \n",
" PCA | \n",
" False | \n",
"
\n",
" \n",
" 34 | \n",
" PCA Method | \n",
" None | \n",
"
\n",
" \n",
" 35 | \n",
" PCA Components | \n",
" None | \n",
"
\n",
" \n",
" 36 | \n",
" Ignore Low Variance | \n",
" False | \n",
"
\n",
" \n",
" 37 | \n",
" Combine Rare Levels | \n",
" False | \n",
"
\n",
" \n",
" 38 | \n",
" Rare Level Threshold | \n",
" None | \n",
"
\n",
" \n",
" 39 | \n",
" Numeric Binning | \n",
" False | \n",
"
\n",
" \n",
" 40 | \n",
" Remove Outliers | \n",
" False | \n",
"
\n",
" \n",
" 41 | \n",
" Outliers Threshold | \n",
" None | \n",
"
\n",
" \n",
" 42 | \n",
" Remove Multicollinearity | \n",
" False | \n",
"
\n",
" \n",
" 43 | \n",
" Multicollinearity Threshold | \n",
" None | \n",
"
\n",
" \n",
" 44 | \n",
" Clustering | \n",
" False | \n",
"
\n",
" \n",
" 45 | \n",
" Clustering Iteration | \n",
" None | \n",
"
\n",
" \n",
" 46 | \n",
" Polynomial Features | \n",
" False | \n",
"
\n",
" \n",
" 47 | \n",
" Polynomial Degree | \n",
" None | \n",
"
\n",
" \n",
" 48 | \n",
" Trignometry Features | \n",
" False | \n",
"
\n",
" \n",
" 49 | \n",
" Polynomial Threshold | \n",
" None | \n",
"
\n",
" \n",
" 50 | \n",
" Group Features | \n",
" False | \n",
"
\n",
" \n",
" 51 | \n",
" Feature Selection | \n",
" False | \n",
"
\n",
" \n",
" 52 | \n",
" Feature Selection Method | \n",
" classic | \n",
"
\n",
" \n",
" 53 | \n",
" Features Selection Threshold | \n",
" None | \n",
"
\n",
" \n",
" 54 | \n",
" Feature Interaction | \n",
" False | \n",
"
\n",
" \n",
" 55 | \n",
" Feature Ratio | \n",
" False | \n",
"
\n",
" \n",
" 56 | \n",
" Interaction Threshold | \n",
" None | \n",
"
\n",
" \n",
" 57 | \n",
" Fix Imbalance | \n",
" False | \n",
"
\n",
" \n",
" 58 | \n",
" Fix Imbalance Method | \n",
" SMOTE | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021/04/15 17:27:44 WARNING mlflow.tracking.context.git_context: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: No module named 'repository'\n"
]
}
],
"source": [
"from pycaret.classification import *\n",
"s = setup(data, target = 'Purchase', log_experiment = True, experiment_name = 'dsc1',\n",
" fold = 3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC |
\n",
" \n",
" 0 | \n",
" 0.7920 | \n",
" 0.8746 | \n",
" 0.6809 | \n",
" 0.7442 | \n",
" 0.7111 | \n",
" 0.5491 | \n",
" 0.5504 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8193 | \n",
" 0.9101 | \n",
" 0.6882 | \n",
" 0.8000 | \n",
" 0.7399 | \n",
" 0.6026 | \n",
" 0.6066 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.8434 | \n",
" 0.9240 | \n",
" 0.8817 | \n",
" 0.7455 | \n",
" 0.8079 | \n",
" 0.6772 | \n",
" 0.6840 | \n",
"
\n",
" \n",
" Mean | \n",
" 0.8182 | \n",
" 0.9029 | \n",
" 0.7502 | \n",
" 0.7632 | \n",
" 0.7530 | \n",
" 0.6097 | \n",
" 0.6137 | \n",
"
\n",
" \n",
" SD | \n",
" 0.0210 | \n",
" 0.0208 | \n",
" 0.0930 | \n",
" 0.0260 | \n",
" 0.0406 | \n",
" 0.0525 | \n",
" 0.0548 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"xgboost = create_model('xgboost')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Succesfully Deployed on AWS S3\n"
]
}
],
"source": [
"deploy_model(xgboost, 'dsc123', platform = 'aws', authentication = {'bucket' : 'pycaret-test'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add_metric()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"best = compare_models()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"asd = LogisticRegression()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"create_model(asd)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lr = create_model('lr')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tuned_lr = tune_model(lr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tuned_lr = tune_model(lr, search_library = 'optuna')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tuned_lr = tune_model(lr, search_library = 'scikit-optimize')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dt = create_model('dt')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bagged_dt = ensemble_model(dt, method = 'Boosting')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"bagged_dt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_model(lr, plot = 'feature')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"evaluate_model(lr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dt = create_model('dt')\n",
"plot_model(dt, plot = 'boundary')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"xgboost = create_model('xgboost')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"interpret_model(xgboost)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_model(xgboost, model_name = 'abc')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!mlflow ui"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}