{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PyCaret 2 Anomaly Example\n", "This notebook is created using PyCaret 2.0. Last updated : 28-07-2020" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pycaret-nightly-0.39\n" ] } ], "source": [ "# check version\n", "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Loading Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "
Description | Value | |
---|---|---|
0 | \n", "session_id | \n", "123 | \n", "
1 | \n", "Original Data | \n", "(1000, 10) | \n", "
2 | \n", "Missing Values | \n", "False | \n", "
3 | \n", "Numeric Features | \n", "10 | \n", "
4 | \n", "Categorical Features | \n", "0 | \n", "
5 | \n", "Ordinal Features | \n", "False | \n", "
6 | \n", "High Cardinality Features | \n", "False | \n", "
7 | \n", "Transformed Data | \n", "(1000, 10) | \n", "
8 | \n", "Numeric Imputer | \n", "mean | \n", "
9 | \n", "Categorical Imputer | \n", "constant | \n", "
10 | \n", "Normalize | \n", "False | \n", "
11 | \n", "Normalize Method | \n", "None | \n", "
12 | \n", "Transformation | \n", "False | \n", "
13 | \n", "Transformation Method | \n", "None | \n", "
14 | \n", "PCA | \n", "False | \n", "
15 | \n", "PCA Method | \n", "None | \n", "
16 | \n", "PCA components | \n", "None | \n", "
17 | \n", "Ignore Low Variance | \n", "False | \n", "
18 | \n", "Combine Rare Levels | \n", "False | \n", "
19 | \n", "Rare Level Threshold | \n", "None | \n", "
20 | \n", "Numeric Binning | \n", "False | \n", "
21 | \n", "Remove Multicollinearity | \n", "False | \n", "
22 | \n", "Multicollinearity Threshold | \n", "None | \n", "
23 | \n", "Group Features | \n", "False | \n", "
\n", " | Name | \n", "Reference | \n", "
---|---|---|
ID | \n", "\n", " | \n", " |
abod | \n", "Angle-base Outlier Detection | \n", "pyod.models.abod.ABOD | \n", "
iforest | \n", "Isolation Forest | \n", "pyod.models.iforest | \n", "
cluster | \n", "Clustering-Based Local Outlier | \n", "pyod.models.cblof | \n", "
cof | \n", "Connectivity-Based Outlier Factor | \n", "pyod.models.cof | \n", "
histogram | \n", "Histogram-based Outlier Detection | \n", "pyod.models.hbos | \n", "
knn | \n", "k-Nearest Neighbors Detector | \n", "pyod.models.knn | \n", "
lof | \n", "Local Outlier Factor | \n", "pyod.models.lof | \n", "
svm | \n", "One-class SVM detector | \n", "pyod.models.ocsvm | \n", "
pca | \n", "Principal Component Analysis | \n", "pyod.models.pca | \n", "
mcd | \n", "Minimum Covariance Determinant | \n", "pyod.models.mcd | \n", "
sod | \n", "Subspace Outlier Detection | \n", "pyod.models.sod | \n", "
sos | \n", "Stochastic Outlier Selection | \n", "pyod.models.sos | \n", "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Label | \n", "Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Label | \n", "Score | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
Pipeline(memory=None,\n", " steps=[('dtypes',\n", " DataTypes_Auto_infer(categorical_features=[],\n", " display_types=True, features_todrop=[],\n", " ml_usecase='regression',\n", " numerical_features=[],\n", " target='dummy_target',\n", " time_features=[])),\n", " ('imputer',\n", " Simple_Imputer(categorical_strategy='not_available',\n", " numeric_strategy='mean',\n", " target_variable=None)),\n", " ('new_levels1',\n", " New_Catagorical_L...\n", " target='dummy_target')),\n", " ('feature_time',\n", " Make_Time_Features(list_of_features=None, time_feature=[])),\n", " ('group', Empty()), ('scaling', Empty()),\n", " ('P_transform', Empty()), ('binn', Empty()),\n", " ('fix_perfect', Empty()), ('rem_outliers', Empty()),\n", " ('dummy', Dummify(target='dummy_target')),\n", " ('clean_names', Clean_Colum_Names()), ('fix_multi', Empty()),\n", " ('pca', Empty())],\n", " verbose=False)
DataTypes_Auto_infer(ml_usecase='regression', target='dummy_target')
Simple_Imputer(categorical_strategy='not_available', numeric_strategy='mean',\n", " target_variable=None)
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n", " target='dummy_target')
Empty()
Empty()
Empty()
Empty()
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n", " target='dummy_target')
Make_Time_Features(list_of_features=None)
Empty()
Empty()
Empty()
Empty()
Empty()
Empty()
Dummify(target='dummy_target')
Clean_Colum_Names()
Empty()
Empty()
\n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "