{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have build our model, let's make future predictions. As a reminder, our plan of action was as follows:\n", "\n", "1. Perform EDA on the dataset to extract valuable insight about the process generating the time series **(COMPLETED)**.\n", "2. Build a baseline model (univariable model without exogenous variables) for benchmarking purposes **(COMPLETED)**.\n", "3. Build a univariate model with all exogenous variables to check best possible performance **(COMPLETED)**.\n", "4. Evaluate the model with exogenous variables and discuss any potential issues **(COMPLETED)**.\n", "5. Overcome issues identified above **(COMPLETED)**.\n", "6. Make future predictions with the best model. **(Covered in this notebook)**\n", "7. Replicate flow with Automated Time Series Modeling (AutoML)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Only enable critical logging (Optional)\n", "import os\n", "os.environ[\"PYCARET_CUSTOM_LOGGING_LEVEL\"] = \"CRITICAL\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "System:\n", " python: 3.8.13 (default, Mar 28 2022, 06:59:08) [MSC v.1916 64 bit (AMD64)]\n", "executable: C:\\Users\\Nikhil\\.conda\\envs\\pycaret_dev_sktime_0p11_2\\python.exe\n", " machine: Windows-10-10.0.19044-SP0\n", "\n", "PyCaret required dependencies:\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Nikhil\\.conda\\envs\\pycaret_dev_sktime_0p11_2\\lib\\site-packages\\_distutils_hack\\__init__.py:30: UserWarning: Setuptools is replacing distutils.\n", " warnings.warn(\"Setuptools is replacing distutils.\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " pip: 21.2.2\n", " setuptools: 61.2.0\n", " pycaret: 3.0.0\n", " ipython: Not installed\n", " ipywidgets: 7.7.0\n", " numpy: 1.21.6\n", " pandas: 1.4.2\n", " jinja2: 3.1.2\n", " scipy: 1.8.0\n", " joblib: 1.1.0\n", " sklearn: 1.0.2\n", " pyod: Installed but version unavailable\n", " imblearn: 0.9.0\n", " category_encoders: 2.4.1\n", " lightgbm: 3.3.2\n", " numba: 0.55.1\n", " requests: 2.27.1\n", " matplotlib: 3.5.2\n", " scikitplot: 0.3.7\n", " yellowbrick: 1.4\n", " plotly: 5.8.0\n", " kaleido: 0.2.1\n", " statsmodels: 0.13.2\n", " sktime: 0.11.4\n", " tbats: Installed but version unavailable\n", " pmdarima: 1.8.5\n", "\n", "PyCaret optional dependencies:\n", " shap: Not installed\n", " interpret: Not installed\n", " umap: Not installed\n", " pandas_profiling: Not installed\n", " explainerdashboard: Not installed\n", " autoviz: Not installed\n", " fairlearn: Not installed\n", " xgboost: Not installed\n", " catboost: Not installed\n", " kmodes: Not installed\n", " mlxtend: Not installed\n", " statsforecast: 0.5.5\n", " tune_sklearn: Not installed\n", " ray: Not installed\n", " hyperopt: Not installed\n", " optuna: Not installed\n", " skopt: Not installed\n", " mlflow: 1.25.1\n", " gradio: Not installed\n", " fastapi: Not installed\n", " uvicorn: Not installed\n", " m2cgen: Not installed\n", " evidently: Not installed\n", " nltk: Not installed\n", " pyLDAvis: Not installed\n", " gensim: Not installed\n", " spacy: Not installed\n", " wordcloud: Not installed\n", " textblob: Not installed\n", " psutil: 5.9.0\n", " fugue: Not installed\n", " streamlit: Not installed\n", " prophet: Not installed\n" ] } ], "source": [ "def what_is_installed():\n", " from pycaret import show_versions\n", " show_versions()\n", "\n", "try:\n", " what_is_installed()\n", "except ModuleNotFoundError:\n", " !pip install pycaret\n", " what_is_installed()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pycaret.datasets import get_data\n", "from pycaret.time_series import TSForecastingExperiment" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Global Figure Settings for notebook ----\n", "global_fig_settings = {\"renderer\": \"notebook\", \"width\": 1000, \"height\": 600}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NOx(GT)PT08.S3(NOx)RH
index
2004-03-10 18:00:00166.01056.048.9
2004-03-10 19:00:00103.01174.047.7
2004-03-10 20:00:00131.01140.054.0
2004-03-10 21:00:00172.01092.060.0
2004-03-10 22:00:00131.01205.059.6
\n", "
" ], "text/plain": [ " NOx(GT) PT08.S3(NOx) RH\n", "index \n", "2004-03-10 18:00:00 166.0 1056.0 48.9\n", "2004-03-10 19:00:00 103.0 1174.0 47.7\n", "2004-03-10 20:00:00 131.0 1140.0 54.0\n", "2004-03-10 21:00:00 172.0 1092.0 60.0\n", "2004-03-10 22:00:00 131.0 1205.0 59.6" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = get_data(\"airquality\", verbose=False)\n", "data[\"index\"] = pd.to_datetime(data[\"Date\"] + \" \" + data[\"Time\"])\n", "data.drop(columns=[\"Date\", \"Time\"], inplace=True)\n", "data.replace(-200, np.nan, inplace=True)\n", "data.set_index(\"index\", inplace=True)\n", "\n", "exog_vars = ['NOx(GT)', 'PT08.S3(NOx)', 'RH']\n", "data = data[exog_vars]\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 6: Making Future Predictions\n", "\n", "# Step 6A: Get future exogenous variable values using forecasting" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelMASERMSSEMAERMSEMAPESMAPER2TT (Sec)
lightgbm_cds_dtLight Gradient Boosting w/ Cond. Deseasonalize & Detrending1.19901.048611.083613.58790.28620.2304-0.95025.1700
exp_smoothExponential Smoothing1.48901.190413.751015.41400.30370.2478-0.42483.2700
arimaARIMA1.56571.369314.469617.74120.35390.2739-2.11913.6833
thetaTheta Forecaster1.91021.490217.642019.29580.37130.2986-1.26293.1567
etsETS3.45042.655731.865234.38390.62950.4585-6.17339.8033
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "exog_exps = []\n", "exog_models = []\n", "for exog_var in exog_vars:\n", " exog_exp = TSForecastingExperiment()\n", " exog_exp.setup(\n", " data=data[exog_var], fh=48,\n", " numeric_imputation_target=\"ffill\", numeric_imputation_exogenous=\"ffill\",\n", " fig_kwargs=global_fig_settings, session_id=42\n", " )\n", "\n", " # Users can customize how to model future exogenous variables i.e. add\n", " # more steps and models to potentially get better models at the expense\n", " # of higher modeling time.\n", " best = exog_exp.compare_models(\n", " sort=\"mase\", include=[\"arima\", \"ets\", \"exp_smooth\", \"theta\", \"lightgbm_cds_dt\",] \n", " )\n", " final_exog_model = exog_exp.finalize_model(best)\n", "\n", " exog_exps.append(exog_exp)\n", " exog_models.append(final_exog_model)\n", "\n", "# Step 2: Get future predictions for exog variables ----\n", "future_exog = [\n", " exog_exp.predict_model(exog_model)\n", " for exog_exp, exog_model in zip(exog_exps, exog_models)\n", "]\n", "future_exog = pd.concat(future_exog, axis=1)\n", "future_exog.columns = exog_vars" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NOx(GT)PT08.S3(NOx)RH
2005-04-04 15:00262.1091656.104414.5065
2005-04-04 16:00297.1918638.665915.6333
2005-04-04 17:00325.5872596.722717.5673
2005-04-04 18:00259.7444556.067222.7364
2005-04-04 19:00425.2039534.210828.3385
2005-04-04 20:00403.5829553.198233.7580
2005-04-04 21:00275.5622611.599337.0870
2005-04-04 22:00266.8754652.885739.7600
2005-04-04 23:00198.3008662.624641.3155
2005-04-05 00:00140.6534680.422142.4415
2005-04-05 01:0089.7790730.955044.4348
2005-04-05 02:0085.5490799.794546.8401
2005-04-05 03:0075.8562861.373548.6809
2005-04-05 04:0063.6113890.852649.7074
2005-04-05 05:0086.7398878.592150.1808
2005-04-05 06:00190.1795796.509050.4606
2005-04-05 07:00600.8788663.640648.9828
2005-04-05 08:00591.7944561.329546.7280
2005-04-05 09:00527.8903565.203941.8545
2005-04-05 10:00476.1367599.865637.9259
2005-04-05 11:00356.5084622.072935.3302
2005-04-05 12:00295.9845630.007834.0590
2005-04-05 13:00237.5478632.999835.7544
2005-04-05 14:00267.1838653.693836.6735
2005-04-05 15:00263.9894655.809535.8249
2005-04-05 16:00298.8190638.378835.1843
2005-04-05 17:00327.0035596.454535.7987
2005-04-05 18:00260.9848555.817338.7220
2005-04-05 19:00426.2978533.970641.7648
2005-04-05 20:00404.5546552.949644.0184
2005-04-05 21:00276.4319611.324447.1953
2005-04-05 22:00267.6602652.592249.4721
2005-04-05 23:00199.0148662.326851.0524
2005-04-06 00:00141.3084680.116253.1152
2005-04-06 01:0090.3847730.626454.2431
2005-04-06 02:0086.1137799.434955.4766
2005-04-06 03:0076.3867860.986356.8362
2005-04-06 04:0064.1132890.452158.0996
2005-04-06 05:0087.2179878.197159.2240
2005-04-06 06:00190.6379796.151059.8861
2005-04-06 07:00601.3206663.342259.1575
2005-04-06 08:00592.2224561.077157.2327
2005-04-06 09:00528.3069564.949852.8706
2005-04-06 10:00476.5436599.595846.1781
2005-04-06 11:00356.9073621.793241.6050
2005-04-06 12:00296.3769629.724538.9799
2005-04-06 13:00237.9346632.715237.8304
2005-04-06 14:00267.5660653.399937.2366
\n", "
" ], "text/plain": [ " NOx(GT) PT08.S3(NOx) RH\n", "2005-04-04 15:00 262.1091 656.1044 14.5065\n", "2005-04-04 16:00 297.1918 638.6659 15.6333\n", "2005-04-04 17:00 325.5872 596.7227 17.5673\n", "2005-04-04 18:00 259.7444 556.0672 22.7364\n", "2005-04-04 19:00 425.2039 534.2108 28.3385\n", "2005-04-04 20:00 403.5829 553.1982 33.7580\n", "2005-04-04 21:00 275.5622 611.5993 37.0870\n", "2005-04-04 22:00 266.8754 652.8857 39.7600\n", "2005-04-04 23:00 198.3008 662.6246 41.3155\n", "2005-04-05 00:00 140.6534 680.4221 42.4415\n", "2005-04-05 01:00 89.7790 730.9550 44.4348\n", "2005-04-05 02:00 85.5490 799.7945 46.8401\n", "2005-04-05 03:00 75.8562 861.3735 48.6809\n", "2005-04-05 04:00 63.6113 890.8526 49.7074\n", "2005-04-05 05:00 86.7398 878.5921 50.1808\n", "2005-04-05 06:00 190.1795 796.5090 50.4606\n", "2005-04-05 07:00 600.8788 663.6406 48.9828\n", "2005-04-05 08:00 591.7944 561.3295 46.7280\n", "2005-04-05 09:00 527.8903 565.2039 41.8545\n", "2005-04-05 10:00 476.1367 599.8656 37.9259\n", "2005-04-05 11:00 356.5084 622.0729 35.3302\n", "2005-04-05 12:00 295.9845 630.0078 34.0590\n", "2005-04-05 13:00 237.5478 632.9998 35.7544\n", "2005-04-05 14:00 267.1838 653.6938 36.6735\n", "2005-04-05 15:00 263.9894 655.8095 35.8249\n", "2005-04-05 16:00 298.8190 638.3788 35.1843\n", "2005-04-05 17:00 327.0035 596.4545 35.7987\n", "2005-04-05 18:00 260.9848 555.8173 38.7220\n", "2005-04-05 19:00 426.2978 533.9706 41.7648\n", "2005-04-05 20:00 404.5546 552.9496 44.0184\n", "2005-04-05 21:00 276.4319 611.3244 47.1953\n", "2005-04-05 22:00 267.6602 652.5922 49.4721\n", "2005-04-05 23:00 199.0148 662.3268 51.0524\n", "2005-04-06 00:00 141.3084 680.1162 53.1152\n", "2005-04-06 01:00 90.3847 730.6264 54.2431\n", "2005-04-06 02:00 86.1137 799.4349 55.4766\n", "2005-04-06 03:00 76.3867 860.9863 56.8362\n", "2005-04-06 04:00 64.1132 890.4521 58.0996\n", "2005-04-06 05:00 87.2179 878.1971 59.2240\n", "2005-04-06 06:00 190.6379 796.1510 59.8861\n", "2005-04-06 07:00 601.3206 663.3422 59.1575\n", "2005-04-06 08:00 592.2224 561.0771 57.2327\n", "2005-04-06 09:00 528.3069 564.9498 52.8706\n", "2005-04-06 10:00 476.5436 599.5958 46.1781\n", "2005-04-06 11:00 356.9073 621.7932 41.6050\n", "2005-04-06 12:00 296.3769 629.7245 38.9799\n", "2005-04-06 13:00 237.9346 632.7152 37.8304\n", "2005-04-06 14:00 267.5660 653.3999 37.2366" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "future_exog" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 6B: Load Model and make future predcitons for the target variable" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "exp_future = TSForecastingExperiment()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Loaded\n" ] } ], "source": [ "final_slim_model = exp_future.load_model(\"final_slim_model\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "future_preds = exp_future.predict_model(final_slim_model, X=future_exog)\n", "future_preds.plot()" ] } ], "metadata": { "interpreter": { "hash": "c161a91f6f4623a54f30c5492a42e7cf0592610fb90c8abd312086f09f8fbe0f" }, "kernelspec": { "display_name": "pycaret_sktime_0p11_2", "language": "python", "name": "pycaret_sktime_0p11_2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 2 }