{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# install pycaret\n", "# pip install pycaret\n", "\n", "# pip install pycaret[full]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.2.3'" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "
| \n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
|---|---|---|---|---|---|---|---|---|
| Col1 | \n", "1000.0 | \n", "0.491362 | \n", "0.259138 | \n", "0.000000 | \n", "0.287458 | \n", "0.492070 | \n", "0.694192 | \n", "0.994431 | \n", "
| Col2 | \n", "1000.0 | \n", "0.490200 | \n", "0.251931 | \n", "0.000000 | \n", "0.291449 | \n", "0.488656 | \n", "0.686531 | \n", "1.000000 | \n", "
| Col3 | \n", "1000.0 | \n", "0.509077 | \n", "0.256606 | \n", "0.000000 | \n", "0.337802 | \n", "0.510077 | \n", "0.686914 | \n", "1.000000 | \n", "
| Col4 | \n", "1000.0 | \n", "0.497362 | \n", "0.263562 | \n", "0.000000 | \n", "0.256147 | \n", "0.497537 | \n", "0.731949 | \n", "1.000000 | \n", "
| Col5 | \n", "1000.0 | \n", "0.586120 | \n", "0.334658 | \n", "0.000000 | \n", "0.169680 | \n", "0.782019 | \n", "0.847956 | \n", "1.000000 | \n", "
| Col6 | \n", "1000.0 | \n", "0.514636 | \n", "0.317470 | \n", "0.000000 | \n", "0.142587 | \n", "0.537953 | \n", "0.856512 | \n", "1.000000 | \n", "
| Col7 | \n", "1000.0 | \n", "0.508270 | \n", "0.278483 | \n", "0.000000 | \n", "0.246021 | \n", "0.465679 | \n", "0.804935 | \n", "1.000000 | \n", "
| Col8 | \n", "1000.0 | \n", "0.457541 | \n", "0.220129 | \n", "0.000000 | \n", "0.245539 | \n", "0.515619 | \n", "0.626757 | \n", "1.000000 | \n", "
| Col9 | \n", "1000.0 | \n", "0.477685 | \n", "0.241432 | \n", "0.000000 | \n", "0.291452 | \n", "0.387753 | \n", "0.723674 | \n", "0.988732 | \n", "
| Col10 | \n", "1000.0 | \n", "0.495760 | \n", "0.211677 | \n", "0.014495 | \n", "0.329904 | \n", "0.488891 | \n", "0.659528 | \n", "1.000000 | \n", "
| Description | Value | |
|---|---|---|
| 0 | \n", "session_id | \n", "123 | \n", "
| 1 | \n", "Original Data | \n", "(1000, 10) | \n", "
| 2 | \n", "Missing Values | \n", "False | \n", "
| 3 | \n", "Numeric Features | \n", "10 | \n", "
| 4 | \n", "Categorical Features | \n", "0 | \n", "
| 5 | \n", "Ordinal Features | \n", "False | \n", "
| 6 | \n", "High Cardinality Features | \n", "False | \n", "
| 7 | \n", "High Cardinality Method | \n", "None | \n", "
| 8 | \n", "Transformed Data | \n", "(1000, 10) | \n", "
| 9 | \n", "CPU Jobs | \n", "-1 | \n", "
| 10 | \n", "Use GPU | \n", "False | \n", "
| 11 | \n", "Log Experiment | \n", "True | \n", "
| 12 | \n", "Experiment Name | \n", "anomaly-demo | \n", "
| 13 | \n", "USI | \n", "5118 | \n", "
| 14 | \n", "Imputation Type | \n", "simple | \n", "
| 15 | \n", "Iterative Imputation Iteration | \n", "None | \n", "
| 16 | \n", "Numeric Imputer | \n", "mean | \n", "
| 17 | \n", "Iterative Imputation Numeric Model | \n", "None | \n", "
| 18 | \n", "Categorical Imputer | \n", "mode | \n", "
| 19 | \n", "Iterative Imputation Categorical Model | \n", "None | \n", "
| 20 | \n", "Unknown Categoricals Handling | \n", "least_frequent | \n", "
| 21 | \n", "Normalize | \n", "False | \n", "
| 22 | \n", "Normalize Method | \n", "None | \n", "
| 23 | \n", "Transformation | \n", "False | \n", "
| 24 | \n", "Transformation Method | \n", "None | \n", "
| 25 | \n", "PCA | \n", "False | \n", "
| 26 | \n", "PCA Method | \n", "None | \n", "
| 27 | \n", "PCA Components | \n", "None | \n", "
| 28 | \n", "Ignore Low Variance | \n", "False | \n", "
| 29 | \n", "Combine Rare Levels | \n", "False | \n", "
| 30 | \n", "Rare Level Threshold | \n", "None | \n", "
| 31 | \n", "Numeric Binning | \n", "False | \n", "
| 32 | \n", "Remove Outliers | \n", "False | \n", "
| 33 | \n", "Outliers Threshold | \n", "None | \n", "
| 34 | \n", "Remove Multicollinearity | \n", "False | \n", "
| 35 | \n", "Multicollinearity Threshold | \n", "None | \n", "
| 36 | \n", "Clustering | \n", "False | \n", "
| 37 | \n", "Clustering Iteration | \n", "None | \n", "
| 38 | \n", "Polynomial Features | \n", "False | \n", "
| 39 | \n", "Polynomial Degree | \n", "None | \n", "
| 40 | \n", "Trignometry Features | \n", "False | \n", "
| 41 | \n", "Polynomial Threshold | \n", "None | \n", "
| 42 | \n", "Group Features | \n", "False | \n", "
| 43 | \n", "Feature Selection | \n", "False | \n", "
| 44 | \n", "Features Selection Threshold | \n", "None | \n", "
| 45 | \n", "Feature Interaction | \n", "False | \n", "
| 46 | \n", "Feature Ratio | \n", "False | \n", "
| 47 | \n", "Interaction Threshold | \n", "None | \n", "
| \n", " | Name | \n", "Reference | \n", "
|---|---|---|
| ID | \n", "\n", " | \n", " |
| abod | \n", "Angle-base Outlier Detection | \n", "pyod.models.abod.ABOD | \n", "
| cluster | \n", "Clustering-Based Local Outlier | \n", "pyod.models.cblof.CBLOF | \n", "
| cof | \n", "Connectivity-Based Local Outlier | \n", "pyod.models.cof.COF | \n", "
| iforest | \n", "Isolation Forest | \n", "pyod.models.iforest.IForest | \n", "
| histogram | \n", "Histogram-based Outlier Detection | \n", "pyod.models.hbos.HBOS | \n", "
| knn | \n", "K-Nearest Neighbors Detector | \n", "pyod.models.knn.KNN | \n", "
| lof | \n", "Local Outlier Factor | \n", "pyod.models.lof.LOF | \n", "
| svm | \n", "One-class SVM detector | \n", "pyod.models.ocsvm.OCSVM | \n", "
| pca | \n", "Principal Component Analysis | \n", "pyod.models.pca.PCA | \n", "
| mcd | \n", "Minimum Covariance Determinant | \n", "pyod.models.mcd.MCD | \n", "
| sod | \n", "Subspace Outlier Detection | \n", "pyod.models.sod.SOD | \n", "
| sos | \n", "Stochastic Outlier Selection | \n", "pyod.models.sos.SOS | \n", "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.030361 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.078290 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.026938 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.053551 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.015639 | \n", "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.030361 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.078290 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.026938 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.053551 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.015639 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 995 | \n", "0.305055 | \n", "0.656837 | \n", "0.331665 | \n", "0.822525 | \n", "0.907127 | \n", "0.882276 | \n", "0.855732 | \n", "0.584786 | \n", "0.808640 | \n", "0.242762 | \n", "0 | \n", "-0.082756 | \n", "
| 996 | \n", "0.812627 | \n", "0.864258 | \n", "0.616604 | \n", "0.167966 | \n", "0.811223 | \n", "0.938071 | \n", "0.418462 | \n", "0.472306 | \n", "0.348347 | \n", "0.671129 | \n", "0 | \n", "-0.065453 | \n", "
| 997 | \n", "0.250967 | \n", "0.138627 | \n", "0.919703 | \n", "0.461234 | \n", "0.886555 | \n", "0.869888 | \n", "0.800908 | \n", "0.530324 | \n", "0.779433 | \n", "0.234952 | \n", "0 | \n", "-0.055405 | \n", "
| 998 | \n", "0.502436 | \n", "0.936820 | \n", "0.580062 | \n", "0.540773 | \n", "0.151995 | \n", "0.059452 | \n", "0.225220 | \n", "0.242755 | \n", "0.279385 | \n", "0.538755 | \n", "0 | \n", "-0.068005 | \n", "
| 999 | \n", "0.457991 | \n", "0.017755 | \n", "0.714113 | \n", "0.125992 | \n", "0.063316 | \n", "0.154739 | \n", "0.922974 | \n", "0.692299 | \n", "0.816777 | \n", "0.307592 | \n", "0 | \n", "-0.012268 | \n", "
1000 rows × 12 columns
\n", "| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
Pipeline(memory=None,\n",
" steps=[('dtypes',\n",
" DataTypes_Auto_infer(categorical_features=[],\n",
" display_types=True, features_todrop=[],\n",
" id_columns=[], ml_usecase='regression',\n",
" numerical_features=[],\n",
" target='UNSUPERVISED_DUMMY_TARGET',\n",
" time_features=[])),\n",
" ('imputer',\n",
" Simple_Imputer(categorical_strategy='most frequent',\n",
" fill_value_categorical=None,\n",
" fill_value_numerical=None...\n",
" ('fix_perfect', 'passthrough'),\n",
" ('clean_names', Clean_Colum_Names()),\n",
" ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),\n",
" ('dfs', 'passthrough'), ('pca', 'passthrough'),\n",
" ['trained_model',\n",
" IForest(behaviour='new', bootstrap=False, contamination=0.05,\n",
" max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,\n",
" random_state=123, verbose=0)]],\n",
" verbose=False)DataTypes_Auto_infer(ml_usecase='regression',\n",
" target='UNSUPERVISED_DUMMY_TARGET')Simple_Imputer(categorical_strategy='most frequent',\n",
" fill_value_categorical=None, fill_value_numerical=None,\n",
" numeric_strategy='mean', target_variable=None)New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n",
" target='UNSUPERVISED_DUMMY_TARGET')passthrough
passthrough
passthrough
passthrough
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n",
" target='UNSUPERVISED_DUMMY_TARGET')Make_Time_Features(list_of_features=None,\n",
" time_feature=Index([], dtype='object'))passthrough
passthrough
passthrough
passthrough
passthrough
passthrough
passthrough
Dummify(target='UNSUPERVISED_DUMMY_TARGET')
passthrough
Clean_Colum_Names()
passthrough
passthrough
passthrough
passthrough
IForest(behaviour='new', bootstrap=False, contamination=0.05,\n",
" max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,\n",
" random_state=123, verbose=0)| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "