{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from statsforecast import StatsForecast\n",
"from statsforecast.models import AutoARIMA, AutoETS, AutoCES, AutoTheta, Naive, SeasonalNaive\n",
"from utilsforecast.plotting import plot_series\n",
"from utilsforecast.evaluation import evaluate\n",
"from utilsforecast.losses import mae, rmse, smape, mase\n",
"\n",
"from utils import plot_metric_bar_multi, evaluate_cv, get_best_model_forecast"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1) Load Data"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Load in M4 dataset\n",
"Y_train_df = pd.read_csv('https://auto-arima-results.s3.amazonaws.com/M4-Hourly.csv')\n",
"Y_test_df = pd.read_csv('https://auto-arima-results.s3.amazonaws.com/M4-Hourly-test.csv')\n",
"Y_train_df['ds'] = pd.to_datetime('2024-01-01') + pd.to_timedelta(Y_train_df['ds'], unit='h')\n",
"Y_test_df['ds'] = pd.to_datetime('2024-01-01') + pd.to_timedelta(Y_test_df['ds'], unit='h')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# randomly select 8 series\n",
"n_series = 8\n",
"uids = Y_train_df['unique_id'].drop_duplicates().sample(8, random_state=23).values\n",
"df_train = Y_train_df.query('unique_id in @uids')\n",
"df_test = Y_test_df.query('unique_id in @uids')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# Define Error Metrics\n",
"from functools import partial\n",
"hourly_mase = partial(mase, seasonality=24)\n",
"metrics = [hourly_mase, rmse, smape]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"fig = plot_series(df_train, df_test.rename(columns={\"y\": \"actual\"}), max_ids=4)\n",
"fig.savefig('/Users/khuyentran/nixtla_blog/images/statsforecast-automatic-model-selection/selected-series.svg', format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2) Baseline: Naive & SeasonalNaive\n",
"Before diving into more sophisticated models, we begin with two classical and interpretable statistical baselines:\n",
"\n",
"**Naive model**: always predicts the last observed value.\n",
"\n",
"**SeasonalNaive model**: predicts that each point will repeat the value observed one season ago (e.g.,the same hour yesterday for hourly data)."
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/utilsforecast/processing.py:384: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
" freq = pd.tseries.frequencies.to_offset(freq)\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/utilsforecast/processing.py:440: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
" freq = pd.tseries.frequencies.to_offset(freq)\n"
]
}
],
"source": [
"# Use Naive and SeasonalNaive model as a baseline\n",
"sf_base = StatsForecast(\n",
" models=[Naive(), SeasonalNaive(season_length=24)], \n",
" freq='H', \n",
" n_jobs=-1\n",
" )\n",
"\n",
"# Make baseline prediction \n",
"fcst_base = sf_base.forecast(df = df_train, h = 48)\n",
"\n",
"# Evaluate with test dataset\n",
"eval_base = df_test.merge(fcst_base, on = ['unique_id', 'ds'])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# Plotting the baseline result\n",
"fig = plot_series(df_train, eval_base, max_ids=4, max_insample_length=5*24)\n",
"fig.savefig('/Users/khuyentran/nixtla_blog/images/statsforecast-automatic-model-selection/baseline-forecasts.svg', format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
Naive
\n",
"
SeasonalNaive
\n",
"
\n",
"
\n",
"
metric
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"
\n",
"
mase
\n",
"
8.029174
\n",
"
0.993421
\n",
"
\n",
"
\n",
"
rmse
\n",
"
179.520049
\n",
"
66.529088
\n",
"
\n",
"
\n",
"
smape
\n",
"
0.252074
\n",
"
0.065754
\n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Naive SeasonalNaive\n",
"metric \n",
"mase 8.029174 0.993421\n",
"rmse 179.520049 66.529088\n",
"smape 0.252074 0.065754"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Evaluate performance \n",
"metrics_base = evaluate(\n",
" df=eval_base,\n",
" train_df = df_train,\n",
" metrics=metrics,\n",
" agg_fn='mean',\n",
").set_index('metric')\n",
"metrics_base"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3) StatsForecast Models\n",
"After establishing the baselines, we now move on to a set of classical statistical forecasting models implemented efficiently in StatsForecast. These models automatically estimate optimal parameters for each series.\n",
"\n",
"**AutoARIMA**: Captures autocorrelation patterns and adjusts for both trend and seasonality through differencing and autoregressive components.\n",
"\n",
"**AutoETS**: Exponential smoothing model that automatically selects additive or multiplicative trend/seasonal components. Excellent for smooth, structured data.\n",
"\n",
"**AutoCES**: Complex Exponential Smoothing, which extends ETS by modeling cyclical components in a more flexible way.\n",
"\n",
"**AutoTheta**: A robust, fast method derived from the Theta forecasting model\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# Define models\n",
"models = [\n",
" AutoARIMA(season_length=24), # With seasonality set as 24 for hourly data\n",
" AutoETS(season_length=24),\n",
" AutoCES(season_length=24),\n",
" AutoTheta(season_length=24)\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/utilsforecast/processing.py:384: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
" freq = pd.tseries.frequencies.to_offset(freq)\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/utilsforecast/processing.py:440: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
" freq = pd.tseries.frequencies.to_offset(freq)\n"
]
}
],
"source": [
"# Initialize statsforecast \n",
"sf = StatsForecast(\n",
" models=models, \n",
" freq='H', \n",
" n_jobs=-1\n",
" )\n",
"\n",
"# Autofit the stats models and make prediction all in one step.\n",
"fcst_sf_models = sf.forecast(df = df_train, h = 48, level=[90])\n",
"\n",
"# Evaluate with test dataset\n",
"eval_sf_models = df_test.merge(fcst_sf_models, on = ['unique_id', 'ds'])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# Plot the result\n",
"fig = plot_series(df_train, eval_sf_models, max_ids=4, max_insample_length=5*24)\n",
"fig.savefig('/Users/khuyentran/nixtla_blog/images/statsforecast-automatic-model-selection/statsforecast-predictions.svg', format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plot_metric_bar_multi(dfs = [metrics_sf_models, metrics_base], metric='mase')\n",
"fig.savefig('/Users/khuyentran/nixtla_blog/images/statsforecast-automatic-model-selection/model-comparison-bar-chart.svg', format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4) Cross-Validation with a Rolling Window\n",
"In this section, we use rolling-origin cross-validation to select the best model for each time series. Nixtla’s implementation of cross-validation ensures the temporal order is respected, avoiding data leakage and producing a more stable and streamlined evaluation process.\n",
"\n",
"**How it works:** \n",
"1. Start with an initial training window and forecast the next *h* steps. \n",
"2. Slide the window forward by *step_size* and repeat. \n",
"3. Compute error metrics for each window and model. \n",
"4. Select the model with the **lowest average error** for each series.\n",
"\n",
""
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n",
"/Users/khuyentran/nixtla_blog/.venv/lib/python3.11/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n"
]
}
],
"source": [
"# Run cross-validation with 2 rolling windows of 24 steps each\n",
"cv_df = sf.cross_validation(\n",
" df=df_train,\n",
" h=24, # forecast horizon\n",
" step_size=24, # roll the window forward by 24 steps each time\n",
" n_windows=2 # number of evaluation windows\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
best_statsforecast_model
\n",
"
count
\n",
"
\n",
" \n",
" \n",
"
\n",
"
0
\n",
"
AutoARIMA
\n",
"
3
\n",
"
\n",
"
\n",
"
1
\n",
"
AutoETS
\n",
"
2
\n",
"
\n",
"
\n",
"
2
\n",
"
AutoTheta
\n",
"
2
\n",
"
\n",
"
\n",
"
3
\n",
"
CES
\n",
"
1
\n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" best_statsforecast_model count\n",
"0 AutoARIMA 3\n",
"1 AutoETS 2\n",
"2 AutoTheta 2\n",
"3 CES 1"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Evaluate model performance using MSE across cross-validation windows\n",
"evaluation_df = evaluate_cv(cv_df, mae)\n",
"\n",
"# Count how many times each model was selected as best\n",
"evaluation_df['best_statsforecast_model'].value_counts().to_frame().reset_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After selecting the best model for each series, we visualize the final forecasts alongside the actual test data.\n",
"\n",
"Here, the 90% **prediction interval** (shown as the shaded band) provides an estimate of forecast uncertainty\n",
"reflecting how much variation we can expect in future observations based on past residuals."
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# Extract the forecasts from the best-performing model for each series\n",
"best_fcst_sf = get_best_model_forecast(fcst_sf_models, evaluation_df)\n",
"eval_best_sf = df_test.merge(best_fcst_sf, on = ['unique_id', 'ds'])\n",
"\n",
"# Plot forecasts with 90% interval\n",
"fig = plot_series(df_train, eval_best_sf, level=[90], max_insample_length=5*24, max_ids=4)\n",
"fig.savefig('/Users/khuyentran/nixtla_blog/images/statsforecast-automatic-model-selection/best-model-forecasts.svg', format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"
"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Extract the forecasts from the best-performing model for each series\n",
"best_fcst_sf = get_best_model_forecast(fcst_sf_models, evaluation_df)\n",
"eval_best_sf = df_test.merge(best_fcst_sf, on = ['unique_id', 'ds'])\n",
"\n",
"# Filter for H25 only\n",
"df_train_h25 = df_train[df_train['unique_id'] == 'H25']\n",
"eval_best_sf_h25 = eval_best_sf[eval_best_sf['unique_id'] == 'H25']\n",
"\n",
"# Plot forecasts with 90% interval\n",
"fig = plot_series(df_train_h25, eval_best_sf_h25, level=[90], max_insample_length=5*24)\n",
"fig"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"