{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Detecting Dataset Drift with whylogs\n", "\n", "We will be using data from Kaggle (https://www.kaggle.com/yugagrawal95/sample-media-spends-data) that is packaged with this notebook." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import datetime\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "\n", "from whylogs import get_or_create_session" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Read our Media Spend dataset as Pandas dataframe\n", "data = pd.read_csv(\"MediaSpendDataset.csv\",\n", " parse_dates=[\"Calendar_Week\"], infer_datetime_format=True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Division | \n", "Calendar_Week | \n", "Paid_Views | \n", "Organic_Views | \n", "Google_Impressions | \n", "Email_Impressions | \n", "Facebook_Impressions | \n", "Affiliate_Impressions | \n", "Overall_Views | \n", "Sales | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "A | \n", "2018-01-06 | \n", "392 | \n", "422 | \n", "408 | \n", "3.498950e+05 | \n", "73580 | \n", "12072 | \n", "682 | \n", "59417 | \n", "
1 | \n", "A | \n", "2018-01-13 | \n", "787 | \n", "904 | \n", "110 | \n", "5.062702e+05 | \n", "11804 | \n", "9499 | \n", "853 | \n", "56806 | \n", "
2 | \n", "A | \n", "2018-01-20 | \n", "81 | \n", "970 | \n", "742 | \n", "4.300422e+05 | \n", "52232 | \n", "17048 | \n", "759 | \n", "48715 | \n", "
3 | \n", "A | \n", "2018-01-27 | \n", "25 | \n", "575 | \n", "65 | \n", "4.177457e+05 | \n", "78640 | \n", "10207 | \n", "942 | \n", "72047 | \n", "
4 | \n", "A | \n", "2018-02-03 | \n", "565 | \n", "284 | \n", "295 | \n", "4.085058e+05 | \n", "40561 | \n", "5834 | \n", "658 | \n", "56235 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
3046 | \n", "Z | \n", "2020-02-01 | \n", "29239 | \n", "25311 | \n", "622406 | \n", "1.459071e+06 | \n", "45026 | \n", "12098 | \n", "53667 | \n", "82707 | \n", "
3047 | \n", "Z | \n", "2020-02-08 | \n", "26230 | \n", "28031 | \n", "624409 | \n", "5.342505e+05 | \n", "227070 | \n", "9548 | \n", "53665 | \n", "84503 | \n", "
3048 | \n", "Z | \n", "2020-02-15 | \n", "24749 | \n", "31281 | \n", "439362 | \n", "4.227182e+05 | \n", "393685 | \n", "9861 | \n", "55561 | \n", "147325 | \n", "
3049 | \n", "Z | \n", "2020-02-22 | \n", "20713 | \n", "30356 | \n", "464178 | \n", "6.085799e+05 | \n", "424676 | \n", "10221 | \n", "49221 | \n", "111525 | \n", "
3050 | \n", "Z | \n", "2020-02-29 | \n", "15990 | \n", "26993 | \n", "449032 | \n", "4.390165e+05 | \n", "161439 | \n", "10294 | \n", "42994 | \n", "98187 | \n", "
3051 rows × 10 columns
\n", "Calendar_Week | \n", "2018-01-06 | \n", "2018-01-13 | \n", "2018-01-20 | \n", "2018-01-27 | \n", "2018-02-03 | \n", "2018-02-10 | \n", "2018-02-17 | \n", "2018-02-24 | \n", "2018-03-03 | \n", "2018-03-10 | \n", "... | \n", "2019-12-28 | \n", "2020-01-04 | \n", "2020-01-11 | \n", "2020-01-18 | \n", "2020-01-25 | \n", "2020-02-01 | \n", "2020-02-08 | \n", "2020-02-15 | \n", "2020-02-22 | \n", "2020-02-29 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Division | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Paid_Views | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Organic_Views | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Google_Impressions | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Email_Impressions | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Facebook_Impressions | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Affiliate_Impressions | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Overall_Views | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
Sales | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "... | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "
9 rows × 113 columns
\n", "Division | \n", "A | \n", "B | \n", "C | \n", "D | \n", "E | \n", "F | \n", "G | \n", "H | \n", "I | \n", "J | \n", "... | \n", "Q | \n", "R | \n", "S | \n", "T | \n", "U | \n", "V | \n", "W | \n", "X | \n", "Y | \n", "Z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Calendar_Week | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Paid_Views | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Organic_Views | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Google_Impressions | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Email_Impressions | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Facebook_Impressions | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Affiliate_Impressions | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Overall_Views | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
Sales | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "... | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "113 | \n", "226 | \n", "
9 rows × 26 columns
\n", "\n", " | column | \n", "count | \n", "null_count | \n", "bool_count | \n", "numeric_count | \n", "max | \n", "mean | \n", "min | \n", "stddev | \n", "nunique_numbers | \n", "... | \n", "ununique_str_upper | \n", "quantile_0.0000 | \n", "quantile_0.0100 | \n", "quantile_0.0500 | \n", "quantile_0.2500 | \n", "quantile_0.5000 | \n", "quantile_0.7500 | \n", "quantile_0.9500 | \n", "quantile_0.9900 | \n", "quantile_1.0000 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Facebook_Impressions | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "7.558435e+06 | \n", "269330.694088 | \n", "29.00000 | \n", "4.800746e+05 | \n", "2793.0 | \n", "... | \n", "0.0 | \n", "29.000000 | \n", "3092.000000 | \n", "13177.00000 | \n", "55651.00000 | \n", "122368.00 | \n", "279480.0 | \n", "1068670.000 | \n", "2549912.0 | \n", "7558435.0 | \n", "
1 | \n", "Overall_Views | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "6.350570e+05 | \n", "24357.878561 | \n", "2.00000 | \n", "4.901499e+04 | \n", "2360.0 | \n", "... | \n", "0.0 | \n", "2.000000 | \n", "32.000000 | \n", "159.00000 | \n", "698.00000 | \n", "5772.00 | \n", "27299.0 | \n", "106580.000 | \n", "249012.0 | \n", "635057.0 | \n", "
2 | \n", "Division | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000e+00 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000e+00 | \n", "0.0 | \n", "... | \n", "26.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "Sales | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "3.575430e+06 | \n", "186838.814459 | \n", "15436.00000 | \n", "2.372908e+05 | \n", "2788.0 | \n", "... | \n", "0.0 | \n", "15436.000000 | \n", "25081.000000 | \n", "44106.00000 | \n", "72069.00000 | \n", "112454.00 | \n", "201010.0 | \n", "579378.000 | \n", "997182.0 | \n", "3575430.0 | \n", "
4 | \n", "Paid_Views | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "5.181900e+05 | \n", "13424.596510 | \n", "1.00000 | \n", "2.974463e+04 | \n", "2111.0 | \n", "... | \n", "0.0 | \n", "1.000000 | \n", "17.000000 | \n", "120.00000 | \n", "497.00000 | \n", "982.00 | \n", "14169.0 | \n", "54496.000 | \n", "140432.0 | \n", "518190.0 | \n", "
5 | \n", "Email_Impressions | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "5.160764e+06 | \n", "755149.145825 | \n", "40894.44732 | \n", "6.095038e+05 | \n", "2808.0 | \n", "... | \n", "0.0 | \n", "40894.449219 | \n", "72250.273438 | \n", "133280.15625 | \n", "377753.90625 | \n", "590957.25 | \n", "955171.0 | \n", "1884721.125 | \n", "3123674.5 | \n", "5160763.5 | \n", "
6 | \n", "Google_Impressions | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "1.715044e+07 | \n", "898847.349715 | \n", "7.00000 | \n", "1.391820e+06 | \n", "2678.0 | \n", "... | \n", "0.0 | \n", "7.000000 | \n", "71.000000 | \n", "262.00000 | \n", "164461.00000 | \n", "490842.00 | \n", "1042217.0 | \n", "3213254.000 | \n", "5978991.0 | \n", "17150440.0 | \n", "
7 | \n", "Calendar_Week | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000e+00 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000e+00 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
8 | \n", "Affiliate_Impressions | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "1.757910e+05 | \n", "23426.283476 | \n", "910.00000 | \n", "2.198081e+04 | \n", "2715.0 | \n", "... | \n", "0.0 | \n", "910.000000 | \n", "1678.000000 | \n", "3255.00000 | \n", "9202.00000 | \n", "17048.00 | \n", "28283.0 | \n", "69402.000 | \n", "112846.0 | \n", "175791.0 | \n", "
9 | \n", "Organic_Views | \n", "2808.0 | \n", "0.0 | \n", "0.0 | \n", "2808.0 | \n", "2.704530e+05 | \n", "11406.093305 | \n", "1.00000 | \n", "2.218563e+04 | \n", "2316.0 | \n", "... | \n", "0.0 | \n", "1.000000 | \n", "28.000000 | \n", "138.00000 | \n", "667.00000 | \n", "3075.00 | \n", "12929.0 | \n", "47239.000 | \n", "100749.0 | \n", "270453.0 | \n", "
10 rows × 32 columns
\n", "\n", " | column | \n", "count | \n", "null_count | \n", "bool_count | \n", "numeric_count | \n", "max | \n", "mean | \n", "min | \n", "stddev | \n", "nunique_numbers | \n", "... | \n", "ununique_str_upper | \n", "quantile_0.0000 | \n", "quantile_0.0100 | \n", "quantile_0.0500 | \n", "quantile_0.2500 | \n", "quantile_0.5000 | \n", "quantile_0.7500 | \n", "quantile_0.9500 | \n", "quantile_0.9900 | \n", "quantile_1.0000 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Google_Impressions | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "4924815.000 | \n", "739724.279835 | \n", "156.00000 | \n", "806022.000918 | \n", "243.0 | \n", "... | \n", "0.0 | \n", "156.000000 | \n", "196.000000 | \n", "531.000000 | \n", "274410.000 | \n", "504923.0000 | \n", "926291.0000 | \n", "2542278.0 | \n", "4304549.00 | \n", "4924815.0 | \n", "
1 | \n", "Facebook_Impressions | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "1884146.000 | \n", "266771.794239 | \n", "6987.00000 | \n", "263839.011975 | \n", "243.0 | \n", "... | \n", "0.0 | \n", "6987.000000 | \n", "13480.000000 | \n", "33168.000000 | \n", "103418.000 | \n", "180094.0000 | \n", "331702.0000 | \n", "792423.0 | \n", "1073992.00 | \n", "1884146.0 | \n", "
2 | \n", "Sales | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "1181536.000 | \n", "175069.012346 | \n", "23012.00000 | \n", "162280.683991 | \n", "243.0 | \n", "... | \n", "0.0 | \n", "23012.000000 | \n", "25043.000000 | \n", "48057.000000 | \n", "86953.000 | \n", "124325.0000 | \n", "206752.0000 | \n", "430355.0 | \n", "968892.00 | \n", "1181536.0 | \n", "
3 | \n", "Calendar_Week | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "Affiliate_Impressions | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "79685.000 | \n", "16955.390947 | \n", "1139.00000 | \n", "15712.996483 | \n", "241.0 | \n", "... | \n", "0.0 | \n", "1139.000000 | \n", "1529.000000 | \n", "2498.000000 | \n", "7719.000 | \n", "12525.0000 | \n", "20478.0000 | \n", "59464.0 | \n", "74730.00 | \n", "79685.0 | \n", "
5 | \n", "Paid_Views | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "195738.000 | \n", "34384.152263 | \n", "374.00000 | \n", "35731.933116 | \n", "242.0 | \n", "... | \n", "0.0 | \n", "374.000000 | \n", "1033.000000 | \n", "4106.000000 | \n", "13171.000 | \n", "23623.0000 | \n", "41052.0000 | \n", "118075.0 | \n", "183982.00 | \n", "195738.0 | \n", "
6 | \n", "Email_Impressions | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "7317730.249 | \n", "822449.835680 | \n", "61334.57103 | \n", "791161.059026 | \n", "243.0 | \n", "... | \n", "0.0 | \n", "61334.570312 | \n", "65779.898438 | \n", "137562.171875 | \n", "387119.625 | \n", "580975.6875 | \n", "997442.8125 | \n", "2390848.0 | \n", "3884578.75 | \n", "7317730.0 | \n", "
7 | \n", "Organic_Views | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "198041.000 | \n", "35884.152263 | \n", "1917.00000 | \n", "32322.531757 | \n", "242.0 | \n", "... | \n", "0.0 | \n", "1917.000000 | \n", "2064.000000 | \n", "7516.000000 | \n", "17112.000 | \n", "26315.0000 | \n", "44050.0000 | \n", "104290.0 | \n", "173100.00 | \n", "198041.0 | \n", "
8 | \n", "Division | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "26.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
9 | \n", "Overall_Views | \n", "243.0 | \n", "0.0 | \n", "0.0 | \n", "243.0 | \n", "381029.000 | \n", "69847.102881 | \n", "2767.00000 | \n", "66124.495307 | \n", "243.0 | \n", "... | \n", "0.0 | \n", "2767.000000 | \n", "2934.000000 | \n", "12456.000000 | \n", "31996.000 | \n", "49221.0000 | \n", "82206.0000 | \n", "227079.0 | \n", "337803.00 | \n", "381029.0 | \n", "
10 rows × 32 columns
\n", "