{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# CS5481 - Tutorial 8\n", "## Social Network and Anomaly Detection\n", "\n", "Welcome to CS5481 tutorial 8. In this tutorial, you will learn how to represent a graph, compute some important graph attributions with networkx and do anomaly detection with pyod\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Context\n", "1. Anomaly Detection\n", "2. Create a Graph and obtain its attributions with networkx\n", "3. Practice" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Anomaly Detection" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "* KMeans\n", "* PCA\n", "* IsolationForest\n", "* SVM\n", "* EllipticEnvelope" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from numpy import percentile\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "sns.set_theme();\n", "sns.set_style(\"darkgrid\",{\"font.sans-serif\":['simhei','Droid Sans Fallback']})\n", "from sklearn.ensemble import IsolationForest\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from pyod.models.abod import ABOD\n", "from pyod.models.cblof import CBLOF\n", "from pyod.models.feature_bagging import FeatureBagging\n", "from pyod.models.hbos import HBOS\n", "from pyod.models.iforest import IForest\n", "from pyod.models.knn import KNN\n", "from pyod.models.lof import LOF\n", "from pyod.models.mcd import MCD\n", "from pyod.models.ocsvm import OCSVM\n", "from pyod.models.pca import PCA\n", "from pyod.models.lscp import LSCP\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | date | \n", "num_people | \n", "num_order | \n", "
|---|---|---|---|
| 0 | \n", "2019-01-01 | \n", "1046.0 | \n", "295.0 | \n", "
| 1 | \n", "2019-01-02 | \n", "733.0 | \n", "191.0 | \n", "
| 2 | \n", "2019-01-03 | \n", "695.0 | \n", "222.0 | \n", "
| 3 | \n", "2019-01-04 | \n", "580.0 | \n", "193.0 | \n", "
| 4 | \n", "2019-01-05 | \n", "844.0 | \n", "285.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 890 | \n", "2020-10-31 | \n", "NaN | \n", "346.0 | \n", "
| 891 | \n", "2020-11-30 | \n", "NaN | \n", "204.0 | \n", "
| 892 | \n", "2020-12-31 | \n", "NaN | \n", "288.0 | \n", "
| 893 | \n", "2021-01-31 | \n", "NaN | \n", "254.0 | \n", "
| 894 | \n", "2021-02-28 | \n", "NaN | \n", "285.0 | \n", "
895 rows × 3 columns
\n", "