{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PyCaret 2 Anomaly Example\n", "This notebook is created using PyCaret 2.0. Last updated : 28-07-2020" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pycaret-nightly-0.39\n" ] } ], "source": [ "# check version\n", "from pycaret.utils import version\n", "version()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Loading Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "
| Description | Value | |
|---|---|---|
| 0 | \n", "session_id | \n", "123 | \n", "
| 1 | \n", "Original Data | \n", "(1000, 10) | \n", "
| 2 | \n", "Missing Values | \n", "False | \n", "
| 3 | \n", "Numeric Features | \n", "10 | \n", "
| 4 | \n", "Categorical Features | \n", "0 | \n", "
| 5 | \n", "Ordinal Features | \n", "False | \n", "
| 6 | \n", "High Cardinality Features | \n", "False | \n", "
| 7 | \n", "Transformed Data | \n", "(1000, 10) | \n", "
| 8 | \n", "Numeric Imputer | \n", "mean | \n", "
| 9 | \n", "Categorical Imputer | \n", "constant | \n", "
| 10 | \n", "Normalize | \n", "False | \n", "
| 11 | \n", "Normalize Method | \n", "None | \n", "
| 12 | \n", "Transformation | \n", "False | \n", "
| 13 | \n", "Transformation Method | \n", "None | \n", "
| 14 | \n", "PCA | \n", "False | \n", "
| 15 | \n", "PCA Method | \n", "None | \n", "
| 16 | \n", "PCA components | \n", "None | \n", "
| 17 | \n", "Ignore Low Variance | \n", "False | \n", "
| 18 | \n", "Combine Rare Levels | \n", "False | \n", "
| 19 | \n", "Rare Level Threshold | \n", "None | \n", "
| 20 | \n", "Numeric Binning | \n", "False | \n", "
| 21 | \n", "Remove Multicollinearity | \n", "False | \n", "
| 22 | \n", "Multicollinearity Threshold | \n", "None | \n", "
| 23 | \n", "Group Features | \n", "False | \n", "
| \n", " | Name | \n", "Reference | \n", "
|---|---|---|
| ID | \n", "\n", " | \n", " |
| abod | \n", "Angle-base Outlier Detection | \n", "pyod.models.abod.ABOD | \n", "
| iforest | \n", "Isolation Forest | \n", "pyod.models.iforest | \n", "
| cluster | \n", "Clustering-Based Local Outlier | \n", "pyod.models.cblof | \n", "
| cof | \n", "Connectivity-Based Outlier Factor | \n", "pyod.models.cof | \n", "
| histogram | \n", "Histogram-based Outlier Detection | \n", "pyod.models.hbos | \n", "
| knn | \n", "k-Nearest Neighbors Detector | \n", "pyod.models.knn | \n", "
| lof | \n", "Local Outlier Factor | \n", "pyod.models.lof | \n", "
| svm | \n", "One-class SVM detector | \n", "pyod.models.ocsvm | \n", "
| pca | \n", "Principal Component Analysis | \n", "pyod.models.pca | \n", "
| mcd | \n", "Minimum Covariance Determinant | \n", "pyod.models.mcd | \n", "
| sod | \n", "Subspace Outlier Detection | \n", "pyod.models.sod | \n", "
| sos | \n", "Stochastic Outlier Selection | \n", "pyod.models.sos | \n", "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Label | \n", "Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "Label | \n", "Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "0 | \n", "-0.035865 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "0 | \n", "-0.084927 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "1 | \n", "0.025356 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "1 | \n", "0.042415 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "0 | \n", "-0.023408 | \n", "
Pipeline(memory=None,\n",
" steps=[('dtypes',\n",
" DataTypes_Auto_infer(categorical_features=[],\n",
" display_types=True, features_todrop=[],\n",
" ml_usecase='regression',\n",
" numerical_features=[],\n",
" target='dummy_target',\n",
" time_features=[])),\n",
" ('imputer',\n",
" Simple_Imputer(categorical_strategy='not_available',\n",
" numeric_strategy='mean',\n",
" target_variable=None)),\n",
" ('new_levels1',\n",
" New_Catagorical_L...\n",
" target='dummy_target')),\n",
" ('feature_time',\n",
" Make_Time_Features(list_of_features=None, time_feature=[])),\n",
" ('group', Empty()), ('scaling', Empty()),\n",
" ('P_transform', Empty()), ('binn', Empty()),\n",
" ('fix_perfect', Empty()), ('rem_outliers', Empty()),\n",
" ('dummy', Dummify(target='dummy_target')),\n",
" ('clean_names', Clean_Colum_Names()), ('fix_multi', Empty()),\n",
" ('pca', Empty())],\n",
" verbose=False)DataTypes_Auto_infer(ml_usecase='regression', target='dummy_target')
Simple_Imputer(categorical_strategy='not_available', numeric_strategy='mean',\n",
" target_variable=None)New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n",
" target='dummy_target')Empty()
Empty()
Empty()
Empty()
New_Catagorical_Levels_in_TestData(replacement_strategy='least frequent',\n",
" target='dummy_target')Make_Time_Features(list_of_features=None)
Empty()
Empty()
Empty()
Empty()
Empty()
Empty()
Dummify(target='dummy_target')
Clean_Colum_Names()
Empty()
Empty()
| \n", " | Col1 | \n", "Col2 | \n", "Col3 | \n", "Col4 | \n", "Col5 | \n", "Col6 | \n", "Col7 | \n", "Col8 | \n", "Col9 | \n", "Col10 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.263995 | \n", "0.764929 | \n", "0.138424 | \n", "0.935242 | \n", "0.605867 | \n", "0.518790 | \n", "0.912225 | \n", "0.608234 | \n", "0.723782 | \n", "0.733591 | \n", "
| 1 | \n", "0.546092 | \n", "0.653975 | \n", "0.065575 | \n", "0.227772 | \n", "0.845269 | \n", "0.837066 | \n", "0.272379 | \n", "0.331679 | \n", "0.429297 | \n", "0.367422 | \n", "
| 2 | \n", "0.336714 | \n", "0.538842 | \n", "0.192801 | \n", "0.553563 | \n", "0.074515 | \n", "0.332993 | \n", "0.365792 | \n", "0.861309 | \n", "0.899017 | \n", "0.088600 | \n", "
| 3 | \n", "0.092108 | \n", "0.995017 | \n", "0.014465 | \n", "0.176371 | \n", "0.241530 | \n", "0.514724 | \n", "0.562208 | \n", "0.158963 | \n", "0.073715 | \n", "0.208463 | \n", "
| 4 | \n", "0.325261 | \n", "0.805968 | \n", "0.957033 | \n", "0.331665 | \n", "0.307923 | \n", "0.355315 | \n", "0.501899 | \n", "0.558449 | \n", "0.885169 | \n", "0.182754 | \n", "