{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's go on to our modeling step. As a reminder, our plan of action was as follows:\n", "\n", "1. Perform EDA on the dataset to extract valuable insight about the process generating the time series **(COMPLETED)**.\n", "2. Build a baseline model (univariable model without exogenous variables) for benchmarking purposes. **(Covered in this notebook)**\n", "3. Build a univariate model with all exogenous variables to check best possible performance. **(Covered in this notebook)**\n", "4. Evaluate the model with exogenous variables and discuss any potential issues. **(Covered in this notebook)**\n", "5. Overcome issues identified above. **(Covered in this notebook)**\n", "6. Make future predictions with the best model.\n", "7. Replicate flow with Automated Time Series Modeling (AutoML)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Only enable critical logging (Optional)\n", "import os\n", "os.environ[\"PYCARET_CUSTOM_LOGGING_LEVEL\"] = \"CRITICAL\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "System:\n", " python: 3.10.11 | packaged by Anaconda, Inc. | (main, May 16 2023, 00:55:32) [MSC v.1916 64 bit (AMD64)]\n", "executable: C:\\Users\\Nikhil\\.conda\\envs\\pycaret_dev_sktime_19p1\\python.exe\n", " machine: Windows-10-10.0.19045-SP0\n", "\n", "PyCaret required dependencies:\n", " pip: 23.1.2\n", " setuptools: 67.8.0\n", " pycaret: 3.0.2\n", " IPython: 7.34.0\n", " ipywidgets: 7.7.5\n", " tqdm: 4.65.0\n", " numpy: 1.23.0\n", " pandas: 1.5.3\n", " jinja2: 3.1.2\n", " scipy: 1.10.1\n", " joblib: 1.2.0\n", " sklearn: 1.2.2\n", " pyod: 1.0.9\n", " imblearn: 0.10.1\n", " category_encoders: 2.6.1\n", " lightgbm: 3.3.5\n", " numba: 0.57.0\n", " requests: 2.31.0\n", " matplotlib: 3.7.1\n", " scikitplot: 0.3.7\n", " yellowbrick: 1.5\n", " plotly: 5.15.0\n", " plotly-resampler: Not installed\n", " kaleido: 0.2.1\n", " schemdraw: 0.15\n", " statsmodels: 0.14.0\n", " sktime: 0.19.1\n", " tbats: 1.1.3\n", " pmdarima: 2.0.3\n", " psutil: 5.9.5\n", " markupsafe: 2.1.3\n", " pickle5: Not installed\n", " cloudpickle: 2.2.1\n", " deprecation: 2.1.0\n", " xxhash: 3.2.0\n", " wurlitzer: Not installed\n", "\n", "PyCaret optional dependencies:\n", " shap: 0.41.0\n", " interpret: 0.4.2\n", " umap: 0.5.3\n", " pandas_profiling: 4.2.0\n", " explainerdashboard: 0.4.2.2\n", " autoviz: 0.1.720\n", " fairlearn: 0.7.0\n", " deepchecks: 0.17.2\n", " xgboost: 1.7.5\n", " catboost: 1.2\n", " kmodes: 0.12.2\n", " mlxtend: 0.22.0\n", " statsforecast: 1.5.0\n", " tune_sklearn: 0.4.5\n", " ray: 2.5.0\n", " hyperopt: 0.2.7\n", " optuna: 3.2.0\n", " skopt: 0.9.0\n", " mlflow: 1.30.1\n", " gradio: 3.34.0\n", " fastapi: 0.96.0\n", " uvicorn: 0.22.0\n", " m2cgen: 0.10.0\n", " evidently: 0.2.8\n", " fugue: 0.8.4\n", " streamlit: Not installed\n", " prophet: 1.1.4\n" ] } ], "source": [ "def what_is_installed():\n", " from pycaret import show_versions\n", " show_versions()\n", "\n", "try:\n", " what_is_installed()\n", "except ModuleNotFoundError:\n", " !pip install pycaret\n", " what_is_installed()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pycaret.datasets import get_data\n", "from pycaret.time_series import TSForecastingExperiment" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Global Figure Settings for notebook ----\n", "# Depending on whether you are using jupyter notebook, jupyter lab, Google Colab, you may have to set the renderer appropriately\n", "# NOTE: Setting to a static renderer here so that the notebook saved size is reduced.\n", "global_fig_settings = {\n", " # \"renderer\": \"notebook\",\n", " \"renderer\": \"png\",\n", " \"width\": 1000,\n", " \"height\": 600,\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateTimeCO(GT)PT08.S1(CO)NMHC(GT)C6H6(GT)PT08.S2(NMHC)NOx(GT)PT08.S3(NOx)NO2(GT)PT08.S4(NO2)PT08.S5(O3)TRHAH
02004-03-1018:00:002.6136015011.9104616610561131692126813.648.90.7578
12004-03-1019:00:002.012921129.4955103117492155997213.347.70.7255
22004-03-1020:00:002.21402889.093913111401141555107411.954.00.7502
32004-03-1021:00:002.21376809.294817210921221584120311.060.00.7867
42004-03-1022:00:001.61272516.583613112051161490111011.259.60.7888
\n", "
" ], "text/plain": [ " Date Time CO(GT) PT08.S1(CO) NMHC(GT) C6H6(GT) \\\n", "0 2004-03-10 18:00:00 2.6 1360 150 11.9 \n", "1 2004-03-10 19:00:00 2.0 1292 112 9.4 \n", "2 2004-03-10 20:00:00 2.2 1402 88 9.0 \n", "3 2004-03-10 21:00:00 2.2 1376 80 9.2 \n", "4 2004-03-10 22:00:00 1.6 1272 51 6.5 \n", "\n", " PT08.S2(NMHC) NOx(GT) PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3) \\\n", "0 1046 166 1056 113 1692 1268 \n", "1 955 103 1174 92 1559 972 \n", "2 939 131 1140 114 1555 1074 \n", "3 948 172 1092 122 1584 1203 \n", "4 836 131 1205 116 1490 1110 \n", "\n", " T RH AH \n", "0 13.6 48.9 0.7578 \n", "1 13.3 47.7 0.7255 \n", "2 11.9 54.0 0.7502 \n", "3 11.0 60.0 0.7867 \n", "4 11.2 59.6 0.7888 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CO(GT)PT08.S1(CO)C6H6(GT)PT08.S2(NMHC)NOx(GT)PT08.S3(NOx)NO2(GT)PT08.S4(NO2)PT08.S5(O3)TRHindex
86371.5983.05.9806.0180.0820.0132.0966.0615.013.528.32005-03-05 15:00:00
86381.81018.07.3868.0255.0751.0162.01015.0804.013.029.72005-03-05 16:00:00
86392.01101.08.4916.0251.0721.0159.01125.0861.011.638.72005-03-05 17:00:00
86401.91116.07.7888.0258.0695.0156.01176.0980.08.656.32005-03-05 18:00:00
86412.51161.09.1945.0344.0654.0177.01205.01077.08.557.92005-03-05 19:00:00
\n", "
" ], "text/plain": [ " CO(GT) PT08.S1(CO) C6H6(GT) PT08.S2(NMHC) NOx(GT) PT08.S3(NOx) \\\n", "8637 1.5 983.0 5.9 806.0 180.0 820.0 \n", "8638 1.8 1018.0 7.3 868.0 255.0 751.0 \n", "8639 2.0 1101.0 8.4 916.0 251.0 721.0 \n", "8640 1.9 1116.0 7.7 888.0 258.0 695.0 \n", "8641 2.5 1161.0 9.1 945.0 344.0 654.0 \n", "\n", " NO2(GT) PT08.S4(NO2) PT08.S5(O3) T RH index \n", "8637 132.0 966.0 615.0 13.5 28.3 2005-03-05 15:00:00 \n", "8638 162.0 1015.0 804.0 13.0 29.7 2005-03-05 16:00:00 \n", "8639 159.0 1125.0 861.0 11.6 38.7 2005-03-05 17:00:00 \n", "8640 156.0 1176.0 980.0 8.6 56.3 2005-03-05 18:00:00 \n", "8641 177.0 1205.0 1077.0 8.5 57.9 2005-03-05 19:00:00 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = get_data(\"airquality\")\n", "\n", "# Limiting the data for demonstration purposes.\n", "data = data.iloc[-720:]\n", "data[\"index\"] = pd.to_datetime(data[\"Date\"] + \" \" + data[\"Time\"])\n", "data.drop(columns=[\"Date\", \"Time\"], inplace=True)\n", "data.replace(-200, np.nan, inplace=True)\n", "target = \"CO(GT)\"\n", "\n", "exclude = ['NMHC(GT)', 'AH']\n", "data.drop(columns=exclude, inplace=True)\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 2: Baseline Model - Univariate forecasting without exogenous variables" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetCO(GT)
2ApproachUnivariate
3Exogenous VariablesNot Present
4Original data shape(720, 1)
5Transformed data shape(720, 1)
6Transformed train set shape(672, 1)
7Transformed test set shape(48, 1)
8Rows with missing values2.8%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
18Significant Seasonal Period(s)[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
19Significant Seasonal Period(s) without Harmonics[48, 46, 50, 22, 47, 49, 26, 21]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[24]
24Primary Seasonality24
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d0
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36CPU Jobs-1
37Use GPUFalse
38Log ExperimentFalse
39Experiment Namets-default-name
40USI3960
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_uni = data.copy()\n", "data_uni.set_index(\"index\", inplace=True)\n", "data_uni = data_uni[target]\n", "\n", "exp_uni = TSForecastingExperiment()\n", "exp_uni.setup(\n", " data=data_uni, fh=48,\n", " numeric_imputation_target=\"ffill\", numeric_imputation_exogenous=\"ffill\",\n", " fig_kwargs=global_fig_settings, session_id=42\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 cutoffMASERMSSEMAERMSEMAPESMAPER2
02005-03-27 14:001.56061.49041.14661.60390.65981.0119-1.7616
12005-03-29 14:002.59191.97671.86552.11051.49910.7419-2.7317
22005-03-31 14:001.46051.15391.04011.21241.28601.2780-6.2314
MeanNaT1.87101.54031.35071.64221.14831.0106-3.5749
SDNaT0.51140.33780.36660.36760.35620.21891.9197
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/4 [00:00 1 indicates that the model is performing worse than even a naive model with one step ahead forecasts. This model needs more improvement. Let's see if adding exogenous variables can help improve the model performance." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 3: Improved Model - Univariate forecasting with exogenous variables" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetCO(GT)
2ApproachUnivariate
3Exogenous VariablesPresent
4Original data shape(720, 11)
5Transformed data shape(720, 11)
6Transformed train set shape(672, 11)
7Transformed test set shape(48, 11)
8Rows with missing values3.8%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
18Significant Seasonal Period(s)[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
19Significant Seasonal Period(s) without Harmonics[48, 46, 50, 22, 47, 49, 26, 21]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[24]
24Primary Seasonality24
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d0
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36Numerical Imputation (Exogenous)ffill
37Transformation (Exogenous)None
38Scaling (Exogenous)None
39CPU Jobs-1
40Use GPUFalse
41Log ExperimentFalse
42Experiment Namets-default-name
43USI31bf
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp_exo = TSForecastingExperiment()\n", "exp_exo.setup(\n", " data=data, target=target, index=\"index\", fh=48,\n", " numeric_imputation_target=\"ffill\", numeric_imputation_exogenous=\"ffill\",\n", " fig_kwargs=global_fig_settings, session_id=42\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 cutoffMASERMSSEMAERMSEMAPESMAPER2
02005-03-27 14:000.25120.22820.18460.24550.10890.11920.9353
12005-03-29 14:000.23210.22270.16700.23780.11000.12190.9526
22005-03-31 14:000.25250.27330.17980.28720.17230.15720.5943
MeanNaT0.24530.24140.17710.25680.13040.13280.8274
SDNaT0.00930.02270.00740.02170.02960.01730.1650
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/4 [00:00\n", "#T_e6fdc_row25_col1, #T_e6fdc_row31_col1 {\n", " background-color: lightgreen;\n", "}\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetCO(GT)
2ApproachUnivariate
3Exogenous VariablesPresent
4Original data shape(720, 4)
5Transformed data shape(720, 4)
6Transformed train set shape(672, 4)
7Transformed test set shape(48, 4)
8Rows with missing values3.8%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
18Significant Seasonal Period(s)[24, 23, 25, 2, 48, 22, 47, 49, 26, 3, 12, 11, 21, 13, 46, 10, 50]
19Significant Seasonal Period(s) without Harmonics[48, 46, 50, 22, 47, 49, 26, 21]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[24]
24Primary Seasonality24
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d0
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36Numerical Imputation (Exogenous)ffill
37Transformation (Exogenous)None
38Scaling (Exogenous)None
39CPU Jobs-1
40Use GPUFalse
41Log ExperimentFalse
42Experiment Namets-default-name
43USI3ab8
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp_slim = TSForecastingExperiment()\n", "keep = [target, \"index\", 'NOx(GT)', \"PT08.S3(NOx)\", \"RH\"]\n", "data_slim = data[keep]\n", "exp_slim.setup(\n", " data=data_slim, target=target, index=\"index\", fh=48,\n", " numeric_imputation_target=\"ffill\", numeric_imputation_exogenous=\"ffill\",\n", " fig_kwargs=global_fig_settings, session_id=42 \n", ")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 cutoffMASERMSSEMAERMSEMAPESMAPER2
02005-03-27 14:000.50140.40280.36840.43340.26630.22550.7983
12005-03-29 14:000.26570.23690.19120.25290.10040.09930.9464
22005-03-31 14:000.30230.28470.21530.29920.21320.19670.5596
MeanNaT0.35650.30810.25830.32850.19330.17380.7681
SDNaT0.10350.06970.07840.07660.06920.05400.1593
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/4 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexNOx(GT)PT08.S3(NOx)RH
86372005-03-05 15:00:00180.0820.028.3
86382005-03-05 16:00:00255.0751.029.7
86392005-03-05 17:00:00251.0721.038.7
86402005-03-05 18:00:00258.0695.056.3
86412005-03-05 19:00:00344.0654.057.9
\n", "" ], "text/plain": [ " index NOx(GT) PT08.S3(NOx) RH\n", "8637 2005-03-05 15:00:00 180.0 820.0 28.3\n", "8638 2005-03-05 16:00:00 255.0 751.0 29.7\n", "8639 2005-03-05 17:00:00 251.0 721.0 38.7\n", "8640 2005-03-05 18:00:00 258.0 695.0 56.3\n", "8641 2005-03-05 19:00:00 344.0 654.0 57.9" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exog_vars = ['NOx(GT)', 'PT08.S3(NOx)', 'RH']\n", "data = data[[\"index\"] + exog_vars]\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 6: Making Future Predictions\n", "\n", "# Step 6A: Get future exogenous variable values using forecasting" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetNOx(GT)
2ApproachUnivariate
3Exogenous VariablesNot Present
4Original data shape(720, 1)
5Transformed data shape(720, 1)
6Transformed train set shape(672, 1)
7Transformed test set shape(48, 1)
8Rows with missing values0.8%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[24, 48, 23, 25, 47, 49, 13, 12, 36, 11, 35, 60]
18Significant Seasonal Period(s)[24, 48, 23, 25, 47, 49, 13, 12, 36, 11, 35, 60]
19Significant Seasonal Period(s) without Harmonics[48, 23, 25, 47, 49, 13, 60, 36, 11, 35]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[24]
24Primary Seasonality24
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d1
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36CPU Jobs-1
37Use GPUFalse
38Log ExperimentFalse
39Experiment Namets-default-name
40USI6166
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelMASERMSSEMAERMSEMAPESMAPER2TT (Sec)
arimaARIMA0.84060.915887.3689133.06420.42730.3443-1.30720.3200
exp_smoothExponential Smoothing0.89540.840093.0132121.97600.48280.5917-0.93110.1667
thetaTheta Forecaster1.02790.9437107.4620137.68860.51920.4990-0.40720.0533
lightgbm_cds_dtLight Gradient Boosting w/ Cond. Deseasonalize & Detrending1.20211.1033124.6215160.10690.69950.5078-2.57171.5100
etsETS1.64661.5514171.0757225.41930.92840.5548-4.32061.5600
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/25 [00:00\n", "#T_b2c3b_row25_col1, #T_b2c3b_row31_col1 {\n", " background-color: lightgreen;\n", "}\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetPT08.S3(NOx)
2ApproachUnivariate
3Exogenous VariablesNot Present
4Original data shape(720, 1)
5Transformed data shape(720, 1)
6Transformed train set shape(672, 1)
7Transformed test set shape(48, 1)
8Rows with missing values0.1%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[24, 48, 25, 23, 47, 49, 12, 36, 11]
18Significant Seasonal Period(s)[24, 48, 25, 23, 47, 49, 12, 36, 11]
19Significant Seasonal Period(s) without Harmonics[48, 25, 23, 47, 49, 36, 11]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[24]
24Primary Seasonality24
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d1
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36CPU Jobs-1
37Use GPUFalse
38Log ExperimentFalse
39Experiment Namets-default-name
40USI9b1d
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelMASERMSSEMAERMSEMAPESMAPER2TT (Sec)
exp_smoothExponential Smoothing1.24351.2056126.5383158.92410.17380.1695-0.02110.1533
etsETS1.36301.3140138.7259173.25450.19060.1879-0.20910.7067
thetaTheta Forecaster1.37161.3079139.5929172.42720.19090.1878-0.19630.0533
arimaARIMA1.39291.3245141.6775174.62110.17920.1953-0.39850.4133
lightgbm_cds_dtLight Gradient Boosting w/ Cond. Deseasonalize & Detrending1.67781.5491170.7442204.25880.21970.2541-0.76661.4900
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/25 [00:00\n", "#T_5996e_row25_col1, #T_5996e_row31_col1 {\n", " background-color: lightgreen;\n", "}\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0session_id42
1TargetRH
2ApproachUnivariate
3Exogenous VariablesNot Present
4Original data shape(720, 1)
5Transformed data shape(720, 1)
6Transformed train set shape(672, 1)
7Transformed test set shape(48, 1)
8Rows with missing values0.1%
9Fold GeneratorExpandingWindowSplitter
10Fold Number3
11Enforce Prediction IntervalFalse
12Splits used for hyperparametersall
13User Defined Seasonal Period(s)None
14Ignore Seasonality TestFalse
15Seasonality Detection Algoauto
16Max Period to Consider60
17Seasonal Period(s) Tested[2, 3, 24, 23, 25, 22, 4, 26, 21, 48, 47, 49, 46, 5, 27, 50, 20, 45, 51, 28, 19, 6, 44, 52]
18Significant Seasonal Period(s)[2, 3, 24, 23, 25, 22, 4, 26, 21, 48, 47, 49, 46, 5, 27, 50, 20, 45, 51, 28, 19, 6, 44, 52]
19Significant Seasonal Period(s) without Harmonics[52, 51, 48, 46, 50, 44, 21, 47, 49, 27, 20, 45, 28, 19]
20Remove HarmonicsFalse
21Harmonics Order Methodharmonic_max
22Num Seasonalities to Use1
23All Seasonalities to Use[2]
24Primary Seasonality2
25Seasonality PresentTrue
26Seasonality Typemul
27Target Strictly PositiveTrue
28Target White NoiseNo
29Recommended d0
30Recommended Seasonal D0
31PreprocessTrue
32Numerical Imputation (Target)ffill
33Transformation (Target)None
34Scaling (Target)None
35Feature Engineering (Target) - Reduced RegressionFalse
36CPU Jobs-1
37Use GPUFalse
38Log ExperimentFalse
39Experiment Namets-default-name
40USIb42b
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelMASERMSSEMAERMSEMAPESMAPER2TT (Sec)
thetaTheta Forecaster1.62181.476511.374913.15780.24810.2286-0.05850.0533
arimaARIMA1.80011.616512.631014.41080.25480.2523-0.26950.0867
lightgbm_cds_dtLight Gradient Boosting w/ Cond. Deseasonalize & Detrending2.77972.611519.466723.25190.52410.3609-4.87110.8100
exp_smoothExponential Smoothing5.29724.759237.242342.49180.71880.9298-10.52610.1400
etsETS5.32354.781237.425942.68720.72280.9349-10.59110.1233
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/25 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NOx(GT)PT08.S3(NOx)RH
2005-04-04 15:00247.2036651.183013.0838
2005-04-04 16:00274.0699644.277013.1056
2005-04-04 17:00298.6685613.330913.0876
2005-04-04 18:00231.8742558.197013.1094
2005-04-04 19:00398.1459499.699413.0913
2005-04-04 20:00378.3662524.638513.1131
2005-04-04 21:00252.7246596.223613.0950
2005-04-04 22:00246.6336684.652313.1169
2005-04-04 23:00180.6670721.417413.0988
2005-04-05 00:00125.5157756.124113.1206
2005-04-05 01:0076.9556832.293613.1025
2005-04-05 02:0074.8243889.789913.1244
2005-04-05 03:0067.0040960.893113.1063
2005-04-05 04:0056.40911017.308913.1281
2005-04-05 05:0080.97771023.558613.1100
2005-04-05 06:00185.6649969.106913.1318
2005-04-05 07:00597.4380824.132813.1138
2005-04-05 08:00589.2735721.392613.1356
2005-04-05 09:00526.1542666.404413.1175
2005-04-05 10:00475.0677706.371513.1393
2005-04-05 11:00356.0050747.218113.1212
2005-04-05 12:00295.9595774.706513.1431
2005-04-05 13:00237.9265799.278613.1250
2005-04-05 14:00267.9026800.729113.1468
2005-04-05 15:00250.0888793.498913.1287
2005-04-05 16:00276.9426783.813113.1506
2005-04-05 17:00301.5320744.976713.1325
2005-04-05 18:00234.7311676.946813.1543
2005-04-05 19:00400.9980605.070513.1362
2005-04-05 20:00381.2148634.304913.1581
2005-04-05 21:00255.5708719.777513.1399
2005-04-05 22:00249.4779825.316613.1618
2005-04-05 23:00183.5099868.377113.1437
2005-04-06 00:00128.3577908.857513.1656
2005-04-06 01:0079.7969999.009713.1474
2005-04-06 02:0077.66511066.547813.1693
2005-04-06 03:0069.84441150.208713.1512
2005-04-06 04:0059.24921216.107613.1731
2005-04-06 05:0083.81771221.963113.1549
2005-04-06 06:00188.50471155.451513.1768
2005-04-06 07:00600.2777981.341613.1586
2005-04-06 08:00592.1131857.917913.1806
2005-04-06 09:00528.9938791.536313.1624
2005-04-06 10:00477.9073837.978413.1843
2005-04-06 11:00358.8445885.362913.1661
2005-04-06 12:00298.7990916.838413.1880
2005-04-06 13:00240.7660944.806313.1699
2005-04-06 14:00270.7421945.423113.1918
\n", "" ], "text/plain": [ " NOx(GT) PT08.S3(NOx) RH\n", "2005-04-04 15:00 247.2036 651.1830 13.0838\n", "2005-04-04 16:00 274.0699 644.2770 13.1056\n", "2005-04-04 17:00 298.6685 613.3309 13.0876\n", "2005-04-04 18:00 231.8742 558.1970 13.1094\n", "2005-04-04 19:00 398.1459 499.6994 13.0913\n", "2005-04-04 20:00 378.3662 524.6385 13.1131\n", "2005-04-04 21:00 252.7246 596.2236 13.0950\n", "2005-04-04 22:00 246.6336 684.6523 13.1169\n", "2005-04-04 23:00 180.6670 721.4174 13.0988\n", "2005-04-05 00:00 125.5157 756.1241 13.1206\n", "2005-04-05 01:00 76.9556 832.2936 13.1025\n", "2005-04-05 02:00 74.8243 889.7899 13.1244\n", "2005-04-05 03:00 67.0040 960.8931 13.1063\n", "2005-04-05 04:00 56.4091 1017.3089 13.1281\n", "2005-04-05 05:00 80.9777 1023.5586 13.1100\n", "2005-04-05 06:00 185.6649 969.1069 13.1318\n", "2005-04-05 07:00 597.4380 824.1328 13.1138\n", "2005-04-05 08:00 589.2735 721.3926 13.1356\n", "2005-04-05 09:00 526.1542 666.4044 13.1175\n", "2005-04-05 10:00 475.0677 706.3715 13.1393\n", "2005-04-05 11:00 356.0050 747.2181 13.1212\n", "2005-04-05 12:00 295.9595 774.7065 13.1431\n", "2005-04-05 13:00 237.9265 799.2786 13.1250\n", "2005-04-05 14:00 267.9026 800.7291 13.1468\n", "2005-04-05 15:00 250.0888 793.4989 13.1287\n", "2005-04-05 16:00 276.9426 783.8131 13.1506\n", "2005-04-05 17:00 301.5320 744.9767 13.1325\n", "2005-04-05 18:00 234.7311 676.9468 13.1543\n", "2005-04-05 19:00 400.9980 605.0705 13.1362\n", "2005-04-05 20:00 381.2148 634.3049 13.1581\n", "2005-04-05 21:00 255.5708 719.7775 13.1399\n", "2005-04-05 22:00 249.4779 825.3166 13.1618\n", "2005-04-05 23:00 183.5099 868.3771 13.1437\n", "2005-04-06 00:00 128.3577 908.8575 13.1656\n", "2005-04-06 01:00 79.7969 999.0097 13.1474\n", "2005-04-06 02:00 77.6651 1066.5478 13.1693\n", "2005-04-06 03:00 69.8444 1150.2087 13.1512\n", "2005-04-06 04:00 59.2492 1216.1076 13.1731\n", "2005-04-06 05:00 83.8177 1221.9631 13.1549\n", "2005-04-06 06:00 188.5047 1155.4515 13.1768\n", "2005-04-06 07:00 600.2777 981.3416 13.1586\n", "2005-04-06 08:00 592.1131 857.9179 13.1806\n", "2005-04-06 09:00 528.9938 791.5363 13.1624\n", "2005-04-06 10:00 477.9073 837.9784 13.1843\n", "2005-04-06 11:00 358.8445 885.3629 13.1661\n", "2005-04-06 12:00 298.7990 916.8384 13.1880\n", "2005-04-06 13:00 240.7660 944.8063 13.1699\n", "2005-04-06 14:00 270.7421 945.4231 13.1918" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "future_exog" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 6B: Load Model and make future predcitons for the target variable" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "exp_future = TSForecastingExperiment()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Loaded\n" ] } ], "source": [ "final_slim_model = exp_future.load_model(\"final_slim_model\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "future_preds = exp_future.predict_model(final_slim_model, X=future_exog)\n", "future_preds.plot()" ] } ], "metadata": { "interpreter": { "hash": "c161a91f6f4623a54f30c5492a42e7cf0592610fb90c8abd312086f09f8fbe0f" }, "kernelspec": { "display_name": "pycaret_dev_sktime_19p1", "language": "python", "name": "pycaret_dev_sktime_19p1" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 4 }