{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "pycaret_ts_architecture.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyP5Ri9a6A6rI9T3SotaFVkq", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "9980f0f1fe054bd7b0e456745c359bcf": { "model_module": "@jupyter-widgets/controls", "model_name": "IntProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_516c8ff6302849dbb3ac8a97ca89ff3c", "_dom_classes": [], "description": "Processing: ", "_model_name": "IntProgressModel", "bar_style": "", "max": 3, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 3, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_78cf30b0d262460ebbfa30c6c82fc9ad" } }, "516c8ff6302849dbb3ac8a97ca89ff3c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "78cf30b0d262460ebbfa30c6c82fc9ad": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "27cc64d7b762434c83e262ca14708f4d": { "model_module": "@jupyter-widgets/controls", "model_name": "IntProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_e4e25112bf6945a8ba2df14307b7bfff", "_dom_classes": [], "description": "Processing: ", "_model_name": "IntProgressModel", "bar_style": "", "max": 4, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 4, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_bf57f05fdac642fcab8339321952f6e5" } }, "e4e25112bf6945a8ba2df14307b7bfff": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "bf57f05fdac642fcab8339321952f6e5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9809210362bf479abd6e28d015dc46f9": { "model_module": "@jupyter-widgets/controls", "model_name": "IntProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_15d608a77faa4464a2a28d24d1b95adf", "_dom_classes": [], "description": "Processing: ", "_model_name": "IntProgressModel", "bar_style": "", "max": 4, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 4, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_dcd168e55f8c427d80f0ac3eae746088" } }, "15d608a77faa4464a2a28d24d1b95adf": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "dcd168e55f8c427d80f0ac3eae746088": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "89546f23763544119e3f2684c986ecc2": { "model_module": "@jupyter-widgets/controls", "model_name": "IntProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_c60317db8b684206bdbe3b50b5e6681c", "_dom_classes": [], "description": "Processing: ", "_model_name": "IntProgressModel", "bar_style": "", "max": 4, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 4, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_9fa888acaa094ef8bb6e74c20b600485" } }, "c60317db8b684206bdbe3b50b5e6681c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "9fa888acaa094ef8bb6e74c20b600485": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a98fcdc0d73443ddad2d6a986a430501": { "model_module": "@jupyter-widgets/controls", "model_name": "IntProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_4f1c43a4ce1d4f4e81433e652849cb96", "_dom_classes": [], "description": "Processing: ", "_model_name": "IntProgressModel", "bar_style": "", "max": 4, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 4, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_18cb971747a54ab0a6acd4bc23afb2ca" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "<a href=\"https://colab.research.google.com/github/ngupta23/medium_articles/blob/main/time_series/pycaret/pycaret_ts_architecture.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { "cell_type": "code", "metadata": { "id": "-tCKH8JxbGu2" }, "source": [ "try:\n", " import pycaret\n", "except:\n", " !pip install pycaret-ts-alpha" ], "execution_count": 1, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "1W1wYHilbKHC", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "e78dc408-28f9-4f8e-a03a-28b0b2093321" }, "source": [ "#### Import libraries ----\n", "from pprint import pprint\n", "from pycaret.datasets import get_data\n", "from pycaret.internal.pycaret_experiment import TimeSeriesExperiment" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", " defaults = yaml.load(f)\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 146 }, "id": "g8NyAM33bvc8", "outputId": "37f6dd0a-35ce-462b-a2ae-8c4afbb29e04" }, "source": [ "#### Get the data ---\n", "y = get_data(\"airline\")\n" ], "execution_count": 3, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Period\n", "1949-01 112.0\n", "1949-02 118.0\n", "1949-03 132.0\n", "1949-04 129.0\n", "1949-05 121.0\n", "Freq: M, Name: Number of airline passengers, dtype: float64" ] }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 695, "referenced_widgets": [ "9980f0f1fe054bd7b0e456745c359bcf", "516c8ff6302849dbb3ac8a97ca89ff3c", "78cf30b0d262460ebbfa30c6c82fc9ad" ] }, "id": "WXuVXJdGb0Pb", "outputId": "3d7a99e0-314a-4491-e8f8-8dd7fa826531" }, "source": [ "#### Setup the experiment ----\n", "exp = TimeSeriesExperiment()\n", "exp.setup(data=y, fh=12, seasonal_period=12, session_id=42)" ], "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Description</th>\n", " <th>Value</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>session_id</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Original Data</td>\n", " <td>(144, 1)</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Missing Values</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Transformed Train Set</td>\n", " <td>(132,)</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Transformed Test Set</td>\n", " <td>(12,)</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>Fold Generator</td>\n", " <td>ExpandingWindowSplitter</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>Fold Number</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>Enforce Prediction Interval</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>Seasonal Period Tested</td>\n", " <td>12</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>Seasonality Detected</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>Target Strictly Positive</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>Target White Noise</td>\n", " <td>No</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>Recommended d</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>Recommended Seasonal D</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>CPU Jobs</td>\n", " <td>-1</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>Use GPU</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>Log Experiment</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>Experiment Name</td>\n", " <td>ts-default-name</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>USI</td>\n", " <td>f18d</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>Imputation Type</td>\n", " <td>simple</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Description Value\n", "0 session_id 42\n", "1 Original Data (144, 1)\n", "2 Missing Values False\n", "3 Transformed Train Set (132,)\n", "4 Transformed Test Set (12,)\n", "5 Fold Generator ExpandingWindowSplitter\n", "6 Fold Number 3\n", "7 Enforce Prediction Interval False\n", "8 Seasonal Period Tested 12\n", "9 Seasonality Detected True\n", "10 Target Strictly Positive True\n", "11 Target White Noise No\n", "12 Recommended d 1\n", "13 Recommended Seasonal D 1\n", "14 CPU Jobs -1\n", "15 Use GPU False\n", "16 Log Experiment False\n", "17 Experiment Name ts-default-name\n", "18 USI f18d\n", "19 Imputation Type simple" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x7f39adfe0f90>" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206, "referenced_widgets": [ "27cc64d7b762434c83e262ca14708f4d", "e4e25112bf6945a8ba2df14307b7bfff", "bf57f05fdac642fcab8339321952f6e5", "9809210362bf479abd6e28d015dc46f9", "15d608a77faa4464a2a28d24d1b95adf", "dcd168e55f8c427d80f0ac3eae746088", "89546f23763544119e3f2684c986ecc2", "c60317db8b684206bdbe3b50b5e6681c", "9fa888acaa094ef8bb6e74c20b600485", "a98fcdc0d73443ddad2d6a986a430501" ] }, "id": "_2bvrfpIb4nt", "outputId": "518b0374-19e0-4ec9-f0e3-e4a95ced3e3d" }, "source": [ "#### Create different types of models ----\n", "\n", "# ARIMA model from `pmdarima`\n", "arima_model = exp.create_model(\"arima\")\n", "\n", "# ETS and Exponential Smoothing models from `statsmodels`\n", "ets_model = exp.create_model(\"ets\")\n", "exp_smooth_model = exp.create_model(\"exp_smooth\")\n", "\n", "# Reduced Regression model using `sklearn` Linear Regression\n", "lr_model = exp.create_model(\"lr_cds_dt\")" ], "execution_count": 5, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>cutoff</th>\n", " <th>MAE</th>\n", " <th>RMSE</th>\n", " <th>MAPE</th>\n", " <th>SMAPE</th>\n", " <th>R2</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1956-12</td>\n", " <td>38.6824</td>\n", " <td>45.0820</td>\n", " <td>0.0998</td>\n", " <td>0.1051</td>\n", " <td>0.3384</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1957-12</td>\n", " <td>28.0608</td>\n", " <td>34.6867</td>\n", " <td>0.0751</td>\n", " <td>0.0734</td>\n", " <td>0.6848</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1958-12</td>\n", " <td>32.1693</td>\n", " <td>38.2681</td>\n", " <td>0.0737</td>\n", " <td>0.0753</td>\n", " <td>0.6724</td>\n", " </tr>\n", " <tr>\n", " <th>Mean</th>\n", " <td>NaN</td>\n", " <td>32.9708</td>\n", " <td>39.3456</td>\n", " <td>0.0828</td>\n", " <td>0.0846</td>\n", " <td>0.5652</td>\n", " </tr>\n", " <tr>\n", " <th>SD</th>\n", " <td>NaN</td>\n", " <td>4.3731</td>\n", " <td>4.3117</td>\n", " <td>0.0120</td>\n", " <td>0.0145</td>\n", " <td>0.1604</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " cutoff MAE RMSE MAPE SMAPE R2\n", "0 1956-12 38.6824 45.0820 0.0998 0.1051 0.3384\n", "1 1957-12 28.0608 34.6867 0.0751 0.0734 0.6848\n", "2 1958-12 32.1693 38.2681 0.0737 0.0753 0.6724\n", "Mean NaN 32.9708 39.3456 0.0828 0.0846 0.5652\n", "SD NaN 4.3731 4.3117 0.0120 0.0145 0.1604" ] }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wfUW_tM9ibk4", "outputId": "e170c7a1-ac4c-4099-aac2-9ff8e65ce142" }, "source": [ "#### Check model types ----\n", "print(type(arima_model)) # <-- sktime `pmdarima` adapter \n", "print(type(ets_model)) # <-- sktime `statsmodels` adapter\n", "print(type(exp_smooth_model)) # <-- sktime `statsmodels` adapter\n", "print(type(lr_model)) # <-- Your custom sktime compatible model pipeline" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "<class 'sktime.forecasting.arima.ARIMA'>\n", "<class 'sktime.forecasting.ets.AutoETS'>\n", "<class 'sktime.forecasting.exp_smoothing.ExponentialSmoothing'>\n", "<class 'pycaret.containers.models.time_series.BaseCdsDtForecaster'>\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "44fIXKpMb81M", "outputId": "eaaabf5a-9ac5-492f-e4e3-bf33bc4fe1f3" }, "source": [ "#### Access internal models using `_forecaster` ----\n", "print(type(arima_model._forecaster))\n", "print(type(ets_model._forecaster))\n", "print(type(exp_smooth_model._forecaster))\n", "print(type(lr_model._forecaster))" ], "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "<class 'pmdarima.arima.arima.ARIMA'>\n", "<class 'statsmodels.tsa.exponential_smoothing.ets.ETSModel'>\n", "<class 'statsmodels.tsa.holtwinters.model.ExponentialSmoothing'>\n", "<class 'sktime.forecasting.compose._pipeline.TransformedTargetForecaster'>\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RvfOa4K0NGuI", "outputId": "2b7a8053-730f-4408-e4b3-e9b84bf8ca7c" }, "source": [ "#### What hyperparameters were used to train the model? ----\n", "print(arima_model)" ], "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "ARIMA(maxiter=50, method='lbfgs', order=(1, 0, 0), out_of_sample_size=0,\n", " scoring='mse', scoring_args=None, seasonal_order=(0, 1, 0, 12),\n", " start_params=None, suppress_warnings=False, trend=None,\n", " with_intercept=True)\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 430 }, "id": "WMiBrLwwMHNs", "outputId": "81ee60d7-b1fc-4294-8240-c2700c42773e" }, "source": [ "#### Access statistical fit properties using underlying `pmdarima`\n", "arima_model._forecaster.summary()\n", "\n", "#### Alternately, use sktime's convenient wrapper to do so ---- \n", "arima_model.summary()" ], "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>SARIMAX Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>y</td> <th> No. Observations: </th> <td>132</td> \n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>SARIMAX(1, 0, 0)x(0, 1, 0, 12)</td> <th> Log Likelihood </th> <td>-450.590</td>\n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Tue, 16 Nov 2021</td> <th> AIC </th> <td>907.180</td>\n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>11:26:51</td> <th> BIC </th> <td>915.542</td>\n", "</tr>\n", "<tr>\n", " <th>Sample:</th> <td>0</td> <th> HQIC </th> <td>910.576</td>\n", "</tr>\n", "<tr>\n", " <th></th> <td> - 132</td> <th> </th> <td> </td> \n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>opg</td> <th> </th> <td> </td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>intercept</th> <td> 5.7982</td> <td> 2.005</td> <td> 2.892</td> <td> 0.004</td> <td> 1.869</td> <td> 9.727</td>\n", "</tr>\n", "<tr>\n", " <th>ar.L1</th> <td> 0.8100</td> <td> 0.061</td> <td> 13.261</td> <td> 0.000</td> <td> 0.690</td> <td> 0.930</td>\n", "</tr>\n", "<tr>\n", " <th>sigma2</th> <td> 105.9407</td> <td> 12.533</td> <td> 8.453</td> <td> 0.000</td> <td> 81.377</td> <td> 130.505</td>\n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <th>Ljung-Box (L1) (Q):</th> <td>2.30</td> <th> Jarque-Bera (JB): </th> <td>1.04</td> \n", "</tr>\n", "<tr>\n", " <th>Prob(Q):</th> <td>0.13</td> <th> Prob(JB): </th> <td>0.60</td> \n", "</tr>\n", "<tr>\n", " <th>Heteroskedasticity (H):</th> <td>1.34</td> <th> Skew: </th> <td>-0.07</td>\n", "</tr>\n", "<tr>\n", " <th>Prob(H) (two-sided):</th> <td>0.36</td> <th> Kurtosis: </th> <td>3.43</td> \n", "</tr>\n", "</table><br/><br/>Warnings:<br/>[1] Covariance matrix calculated using the outer product of gradients (complex-step)." ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " SARIMAX Results \n", "==========================================================================================\n", "Dep. Variable: y No. Observations: 132\n", "Model: SARIMAX(1, 0, 0)x(0, 1, 0, 12) Log Likelihood -450.590\n", "Date: Tue, 16 Nov 2021 AIC 907.180\n", "Time: 11:26:51 BIC 915.542\n", "Sample: 0 HQIC 910.576\n", " - 132 \n", "Covariance Type: opg \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "intercept 5.7982 2.005 2.892 0.004 1.869 9.727\n", "ar.L1 0.8100 0.061 13.261 0.000 0.690 0.930\n", "sigma2 105.9407 12.533 8.453 0.000 81.377 130.505\n", "===================================================================================\n", "Ljung-Box (L1) (Q): 2.30 Jarque-Bera (JB): 1.04\n", "Prob(Q): 0.13 Prob(JB): 0.60\n", "Heteroskedasticity (H): 1.34 Skew: -0.07\n", "Prob(H) (two-sided): 0.36 Kurtosis: 3.43\n", "===================================================================================\n", "\n", "Warnings:\n", "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n", "\"\"\"" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "markdown", "metadata": { "id": "JM4wF9EBNS6A" }, "source": [ "**You can not starts correlating these properties to the forecasts that you see. I will write about it in a subsewquent post.** " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CoDik7uHnxBp", "outputId": "77182486-0108-4e4f-dd03-7299fec8ad9a" }, "source": [ "#### What hyperparameters were used to train the model? ----\n", "print(ets_model)" ], "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,\n", " bounds=None, callback=None, damped_trend=False, dates=None, disp=False,\n", " error='add', freq=None, full_output=True, ignore_inf_ic=True,\n", " information_criterion='aic', initial_level=None, initial_seasonal=None,\n", " initial_trend=None, initialization_method='estimated', maxiter=1000,\n", " missing='none', n_jobs=None, restrict=True, return_params=False,\n", " seasonal='mul', sp=12, start_params=None, trend='add')\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 729 }, "id": "XRJ7iyNRKAUi", "outputId": "2fca8159-0001-4f9e-ec78-ee162cc0e783" }, "source": [ "#### Access statsitical fit properties using underlying statsmodel\n", "ets_model._forecaster.fit().summary()\n", "\n", "#### Alternatively, use sktime's convenient wrapper to do so ---- \n", "ets_model.summary()" ], "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>ETS Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>Number of airline passengers</td> <th> No. Observations: </th> <td>132</td> \n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>ETS(AAM)</td> <th> Log Likelihood </th> <td>-488.626</td>\n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Tue, 16 Nov 2021</td> <th> AIC </th> <td>1013.253</td>\n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>11:26:51</td> <th> BIC </th> <td>1065.143</td>\n", "</tr>\n", "<tr>\n", " <th>Sample:</th> <td>01-31-1949</td> <th> HQIC </th> <td>1034.339</td>\n", "</tr>\n", "<tr>\n", " <th></th> <td>- 12-31-1959</td> <th> Scale </th> <td>96.116</td> \n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>approx</td> <th> </th> <td> </td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>smoothing_level</th> <td> 0.3734</td> <td> 0.067</td> <td> 5.550</td> <td> 0.000</td> <td> 0.242</td> <td> 0.505</td>\n", "</tr>\n", "<tr>\n", " <th>smoothing_trend</th> <td> 3.734e-05</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>smoothing_seasonal</th> <td> 0.6265</td> <td> 0.067</td> <td> 9.296</td> <td> 0.000</td> <td> 0.494</td> <td> 0.759</td>\n", "</tr>\n", "<tr>\n", " <th>initial_level</th> <td> 109.3470</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_trend</th> <td> 2.6555</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.0</th> <td> 0.9773</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.1</th> <td> 0.8482</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.2</th> <td> 0.9508</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.3</th> <td> 1.0885</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.4</th> <td> 1.1927</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.5</th> <td> 1.2076</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.6</th> <td> 1.1092</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.7</th> <td> 1.0129</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.8</th> <td> 1.0970</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.9</th> <td> 1.1541</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.10</th> <td> 1.0517</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "<tr>\n", " <th>initial_seasonal.11</th> <td> 1.0000</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td> <td> nan</td>\n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <th>Ljung-Box (Q):</th> <td>41.34</td> <th> Jarque-Bera (JB): </th> <td>1.25</td>\n", "</tr>\n", "<tr>\n", " <th>Prob(Q):</th> <td>0.02</td> <th> Prob(JB): </th> <td>0.54</td>\n", "</tr>\n", "<tr>\n", " <th>Heteroskedasticity (H):</th> <td>2.21</td> <th> Skew: </th> <td>0.11</td>\n", "</tr>\n", "<tr>\n", " <th>Prob(H) (two-sided):</th> <td>0.01</td> <th> Kurtosis: </th> <td>3.42</td>\n", "</tr>\n", "</table><br/><br/>Warnings:<br/>[1] Covariance matrix calculated using numerical (complex-step) differentiation." ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " ETS Results \n", "========================================================================================\n", "Dep. Variable: Number of airline passengers No. Observations: 132\n", "Model: ETS(AAM) Log Likelihood -488.626\n", "Date: Tue, 16 Nov 2021 AIC 1013.253\n", "Time: 11:26:51 BIC 1065.143\n", "Sample: 01-31-1949 HQIC 1034.339\n", " - 12-31-1959 Scale 96.116\n", "Covariance Type: approx \n", "=======================================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------\n", "smoothing_level 0.3734 0.067 5.550 0.000 0.242 0.505\n", "smoothing_trend 3.734e-05 nan nan nan nan nan\n", "smoothing_seasonal 0.6265 0.067 9.296 0.000 0.494 0.759\n", "initial_level 109.3470 nan nan nan nan nan\n", "initial_trend 2.6555 nan nan nan nan nan\n", "initial_seasonal.0 0.9773 nan nan nan nan nan\n", "initial_seasonal.1 0.8482 nan nan nan nan nan\n", "initial_seasonal.2 0.9508 nan nan nan nan nan\n", "initial_seasonal.3 1.0885 nan nan nan nan nan\n", "initial_seasonal.4 1.1927 nan nan nan nan nan\n", "initial_seasonal.5 1.2076 nan nan nan nan nan\n", "initial_seasonal.6 1.1092 nan nan nan nan nan\n", "initial_seasonal.7 1.0129 nan nan nan nan nan\n", "initial_seasonal.8 1.0970 nan nan nan nan nan\n", "initial_seasonal.9 1.1541 nan nan nan nan nan\n", "initial_seasonal.10 1.0517 nan nan nan nan nan\n", "initial_seasonal.11 1.0000 nan nan nan nan nan\n", "===================================================================================\n", "Ljung-Box (Q): 41.34 Jarque-Bera (JB): 1.25\n", "Prob(Q): 0.02 Prob(JB): 0.54\n", "Heteroskedasticity (H): 2.21 Skew: 0.11\n", "Prob(H) (two-sided): 0.01 Kurtosis: 3.42\n", "===================================================================================\n", "\n", "Warnings:\n", "[1] Covariance matrix calculated using numerical (complex-step) differentiation.\n", "\"\"\"" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JwUusEPSc44V", "outputId": "2bb38069-9062-439b-f407-2893e647e8d7" }, "source": [ "#### sktime pipelines are similar to sklearn.\n", "#### Access steps using `named_steps` attribute\n", "print(lr_model._forecaster.named_steps.keys(), \"\\n\\n\")\n", "\n", "#### Details about the steps ----\n", "pprint(lr_model._forecaster.named_steps)" ], "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "dict_keys(['conditional_deseasonalise', 'detrend', 'forecast']) \n", "\n", "\n", "{'conditional_deseasonalise': ConditionalDeseasonalizer(model='additive', seasonality_test=None, sp=1),\n", " 'detrend': Detrender(forecaster=PolynomialTrendForecaster(degree=1, regressor=None,\n", " with_intercept=True)),\n", " 'forecast': RecursiveTabularRegressionForecaster(estimator=LinearRegression(copy_X=True,\n", " fit_intercept=True,\n", " n_jobs=-1,\n", " normalize=False,\n", " positive=False),\n", " window_length=10)}\n" ] } ] } ] }