{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# install pycaret\n", "# pip install pycaret\n", "\n", "# pip install pycaret[full]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.2.3'" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "
\n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
---|---|---|---|---|---|---|---|---|
Col1 | \n", "1000.0 | \n", "0.491362 | \n", "0.259138 | \n", "0.000000 | \n", "0.287458 | \n", "0.492070 | \n", "0.694192 | \n", "0.994431 | \n", "
Col2 | \n", "1000.0 | \n", "0.490200 | \n", "0.251931 | \n", "0.000000 | \n", "0.291449 | \n", "0.488656 | \n", "0.686531 | \n", "1.000000 | \n", "
Col3 | \n", "1000.0 | \n", "0.509077 | \n", "0.256606 | \n", "0.000000 | \n", "0.337802 | \n", "0.510077 | \n", "0.686914 | \n", "1.000000 | \n", "
Col4 | \n", "1000.0 | \n", "0.497362 | \n", "0.263562 | \n", "0.000000 | \n", "0.256147 | \n", "0.497537 | \n", "0.731949 | \n", "1.000000 | \n", "
Col5 | \n", "1000.0 | \n", "0.586120 | \n", "0.334658 | \n", "0.000000 | \n", "0.169680 | \n", "0.782019 | \n", "0.847956 | \n", "1.000000 | \n", "
Col6 | \n", "1000.0 | \n", "0.514636 | \n", "0.317470 | \n", "0.000000 | \n", "0.142587 | \n", "0.537953 | \n", "0.856512 | \n", "1.000000 | \n", "
Col7 | \n", "1000.0 | \n", "0.508270 | \n", "0.278483 | \n", "0.000000 | \n", "0.246021 | \n", "0.465679 | \n", "0.804935 | \n", "1.000000 | \n", "
Col8 | \n", "1000.0 | \n", "0.457541 | \n", "0.220129 | \n", "0.000000 | \n", "0.245539 | \n", "0.515619 | \n", "0.626757 | \n", "1.000000 | \n", "
Col9 | \n", "1000.0 | \n", "0.477685 | \n", "0.241432 | \n", "0.000000 | \n", "0.291452 | \n", "0.387753 | \n", "0.723674 | \n", "0.988732 | \n", "
Col10 | \n", "1000.0 | \n", "0.495760 | \n", "0.211677 | \n", "0.014495 | \n", "0.329904 | \n", "0.488891 | \n", "0.659528 | \n", "1.000000 | \n", "
Description | Value | |
---|---|---|
0 | \n", "session_id | \n", "123 | \n", "
1 | \n", "Original Data | \n", "(1000, 10) | \n", "
2 | \n", "Missing Values | \n", "False | \n", "
3 | \n", "Numeric Features | \n", "10 | \n", "
4 | \n", "Categorical Features | \n", "0 | \n", "
5 | \n", "Ordinal Features | \n", "False | \n", "
6 | \n", "High Cardinality Features | \n", "False | \n", "
7 | \n", "High Cardinality Method | \n", "None | \n", "
8 | \n", "Transformed Data | \n", "(1000, 10) | \n", "
9 | \n", "CPU Jobs | \n", "-1 | \n", "
10 | \n", "Use GPU | \n", "False | \n", "
11 | \n", "Log Experiment | \n", "True | \n", "
12 | \n", "Experiment Name | \n", "anomaly-demo | \n", "
13 | \n", "USI | \n", "5118 | \n", "
14 | \n", "Imputation Type | \n", "simple | \n", "
15 | \n", "Iterative Imputation Iteration | \n", "None | \n", "
16 | \n", "Numeric Imputer | \n", "mean | \n", "
17 | \n", "Iterative Imputation Numeric Model | \n", "None | \n", "
18 | \n", "Categorical Imputer | \n", "mode | \n", "
19 | \n", "Iterative Imputation Categorical Model | \n", "None | \n", "
20 | \n", "Unknown Categoricals Handling | \n", "least_frequent | \n", "
21 | \n", "Normalize | \n", "False | \n", "
22 | \n", "Normalize Method | \n", "None | \n", "
23 | \n", "Transformation | \n", "False | \n", "
24 | \n", "Transformation Method | \n", "None | \n", "
25 | \n", "PCA | \n", "False | \n", "
26 | \n", "PCA Method | \n", "None | \n", "
27 | \n", "PCA Components | \n", "None | \n", "
28 | \n", "Ignore Low Variance | \n", "False | \n", "
29 | \n", "Combine Rare Levels | \n", "False | \n", "
30 | \n", "Rare Level Threshold | \n", "None | \n", "
31 | \n", "Numeric Binning | \n", "False | \n", "
32 | \n", "Remove Outliers | \n", "False | \n", "
33 | \n", "Outliers Threshold | \n", "None | \n", "
34 | \n", "Remove Multicollinearity | \n", "False | \n", "
35 | \n", "Multicollinearity Threshold | \n", "None | \n", "
36 | \n", "Clustering | \n", "False | \n", "
37 | \n", "Clustering Iteration | \n", "None | \n", "
38 | \n", "Polynomial Features | \n", "False | \n", "
39 | \n", "Polynomial Degree | \n", "None | \n", "
40 | \n", "Trignometry Features | \n", "False | \n", "
41 | \n", "Polynomial Threshold | \n", "None | \n", "
42 | \n", "Group Features | \n", "False | \n", "
43 | \n", "Feature Selection | \n", "False | \n", "
44 | \n", "Features Selection Threshold | \n", "None | \n", "
45 | \n", "Feature Interaction | \n", "False | \n", "
46 | \n", "Feature Ratio | \n", "False | \n", "
47 | \n", "Interaction Threshold | \n", "None | \n", "
\n", " | Name | \n", "Reference | \n", "
---|---|---|
ID | \n", "\n", " | \n", " |
abod | \n", "Angle-base Outlier Detection | \n", "pyod.models.abod.ABOD | \n", "
cluster | \n", "Clustering-Based Local Outlier | \n", "pyod.models.cblof.CBLOF | \n", "
cof | \n", "Connectivity-Based Local Outlier | \n", "pyod.models.cof.COF | \n", "
iforest | \n", "Isolation Forest | \n", "pyod.models.iforest.IForest | \n", "
histogram | \n", "Histogram-based Outlier Detection | \n", "pyod.models.hbos.HBOS | \n", "
knn | \n", "K-Nearest Neighbors Detector | \n", "pyod.models.knn.KNN | \n", "
lof | \n", "Local Outlier Factor | \n", "pyod.models.lof.LOF | \n", "
svm | \n", "One-class SVM detector | \n", "pyod.models.ocsvm.OCSVM | \n", "
pca | \n", "Principal Component Analysis | \n", "pyod.models.pca.PCA | \n", "
mcd | \n", "Minimum Covariance Determinant | \n", "pyod.models.mcd.MCD | \n", "
sod | \n", "Subspace Outlier Detection | \n", "pyod.models.sod.SOD | \n", "
sos | \n", "Stochastic Outlier Selection | \n", "pyod.models.sos.SOS | \n", "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.030361 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.078290 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.026938 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.053551 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.015639 | \n", "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.030361 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.078290 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.026938 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.053551 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.015639 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "0.305055 | \n", "0.656837 | \n", "0.331665 | \n", "0.822525 | \n", "0.907127 | \n", "0.882276 | \n", "0.855732 | \n", "0.584786 | \n", "0.808640 | \n", "0.242762 | \n", "0 | \n", "-0.082756 | \n", "
996 | \n", "0.812627 | \n", "0.864258 | \n", "0.616604 | \n", "0.167966 | \n", "0.811223 | \n", "0.938071 | \n", "0.418462 | \n", "0.472306 | \n", "0.348347 | \n", "0.671129 | \n", "0 | \n", "-0.065453 | \n", "
997 | \n", "0.250967 | \n", "0.138627 | \n", "0.919703 | \n", "0.461234 | \n", "0.886555 | \n", "0.869888 | \n", "0.800908 | \n", "0.530324 | \n", "0.779433 | \n", "0.234952 | \n", "0 | \n", "-0.055405 | \n", "
998 | \n", "0.502436 | \n", "0.936820 | \n", "0.580062 | \n", "0.540773 | \n", "0.151995 | \n", "0.059452 | \n", "0.225220 | \n", "0.242755 | \n", "0.279385 | \n", "0.538755 | \n", "0 | \n", "-0.068005 | \n", "
999 | \n", "0.457991 | \n", "0.017755 | \n", "0.714113 | \n", "0.125992 | \n", "0.063316 | \n", "0.154739 | \n", "0.922974 | \n", "0.692299 | \n", "0.816777 | \n", "0.307592 | \n", "0 | \n", "-0.012268 | \n", "
1000 rows × 12 columns
\n", "\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
Pipeline(memory=None,\n", " steps=[('dtypes',\n", " DataTypes_Auto_infer(categorical_features=[],\n", " display_types=True, features_todrop=[],\n", " id_columns=[], ml_usecase='regression',\n", " numerical_features=[],\n", " target='UNSUPERVISED_DUMMY_TARGET',\n", " time_features=[])),\n", " ('imputer',\n", " Simple_Imputer(categorical_strategy='most frequent',\n", " fill_value_categorical=None,\n", " fill_value_numerical=None...\n", " ('fix_perfect', 'passthrough'),\n", " ('clean_names', Clean_Colum_Names()),\n", " ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),\n", " ('dfs', 'passthrough'), ('pca', 'passthrough'),\n", " ['trained_model',\n", " IForest(behaviour='new', bootstrap=False, contamination=0.05,\n", " max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,\n", " random_state=123, verbose=0)]],\n", " verbose=False)
DataTypes_Auto_infer(ml_usecase='regression',\n", " target='UNSUPERVISED_DUMMY_TARGET')
Simple_Imputer(categorical_strategy='most frequent',\n", " fill_value_categorical=None, fill_value_numerical=None,\n", " numeric_strategy='mean', target_variable=None)
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n", " target='UNSUPERVISED_DUMMY_TARGET')
passthrough
passthrough
passthrough
passthrough
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n", " target='UNSUPERVISED_DUMMY_TARGET')
Make_Time_Features(list_of_features=None,\n", " time_feature=Index([], dtype='object'))
passthrough
passthrough
passthrough
passthrough
passthrough
passthrough
passthrough
Dummify(target='UNSUPERVISED_DUMMY_TARGET')
passthrough
Clean_Colum_Names()
passthrough
passthrough
passthrough
passthrough
IForest(behaviour='new', bootstrap=False, contamination=0.05,\n", " max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,\n", " random_state=123, verbose=0)
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Anomaly | \n", "Anomaly_Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "