library(mvgam)
library(forecast)
library(janitor)
library(gratia)
library(marginaleffects)
library(ggplot2); theme_set(theme_bw())
library(patchwork)
# Load the AirPassengers dataset and plot the time series
data("AirPassengers")
plot(AirPassengers, bty = 'l', lwd = 1.5,
col = 'darkred')
# Look at an STL decomposition
stl(AirPassengers, s.window = 9) %>%
autoplot()
# This plot suggests that the seasonal pattern changes over time,
# not just in magnitude but also perhaps a bit in shape. Convert
# to a data.frame() object that is suitable for mgcv / mvgam modeling
airdat <- series_to_mvgam(AirPassengers, freq = frequency(AirPassengers))
dplyr::glimpse(airdat$data_train)
# Plot features of the series
plot_mvgam_series(data = airdat$data_train,
newdata = airdat$data_test)
# Now fit a model. First the traditional way to model time-varying
# seasonality using a tensor product of 'time' and 'season',
# where 'season' is modeled with a cyclic spline
mod1 <- mvgam(y ~ te(season, time,
bs = c('cc', 'tp'),
k = c(6, 15)),
knots = list(season = c(0.5, 12.5)),
family = nb(),
data = airdat$data_train,
newdata = airdat$data_test)
# Predictions aren't too bad here, but perhaps the uncertainty
# is a bit too wide
plot(mod1, type = 'forecast')
# We can conveniently use gratia::draw() to view the time * season
# smooth function
draw(mod1$mgcv_model)
# This model is not too bad, but it might not be preferable for a few
# reasons:
# 1. We are capturing the temporal dimension with a spline, and we all
# know that splines generally give poor extrapolation behaviours
# 2. This model assumes that the seasonal pattern changes at the same
# rate that the temporal spline changes, and this might not always
# be the most suitable model
# There is another way we can model time-varying seasonality, in this
# case using a fourier transform. First compute sine and cosine functions
# using the forecast::fourier() function
data.frame(forecast::fourier(AirPassengers, K = 4)) %>%
# Use clean_names as fourier() gives terrible name types
janitor::clean_names() -> fourier_terms
dplyr::glimpse(fourier_terms)
# Now add these new predictors to the training and testing sets
airdat$data_train = airdat$data_train %>%
dplyr::bind_cols(fourier_terms[1:NROW(airdat$data_train), ])
airdat$data_test = airdat$data_test %>%
dplyr::bind_cols(fourier_terms[(NROW(airdat$data_train) + 1):
NROW(fourier_terms), ])
# Next we can fit an mvgam model that includes a smooth nonlinear
# temporal trend and time-varying seasonality. This is done by first
# using a monotonic smooth of time that ensures the temporal trend never
# declines, which seems appropriate here. The time-varying seasonality
# is captured by allowing the coefficients on the fourier terms to vary
# over time using separate Gaussian Processes
mod2 <- mvgam(y ~
# Monotonic smooth of time to ensure the trend
# either increases or flattens, but does not
# decrease
s(time, bs = 'moi', k = 10) +
# Time-varying fourier coefficients to capture
# possibly changing seasonality
s(time, by = s1_12, k = 5) +
s(time, by = c1_12, k = 5) +
s(time, by = s2_12, k = 5) +
s(time, by = c2_12, k = 5) +
s(time, by = s3_12, k = 5) +
s(time, by = c3_12, k = 5),
family = nb(),
data = airdat$data_train,
newdata = airdat$data_test)
# Of course this model still uses a spline of 'time' for the trend,
# but the monotonic restriction ensures that the predictions are
# at least somewhat controllable
# This second model is preferred based on in-sample fits
loo_compare(mod1, mod2)
# But what about forecasts? Second model is again slightly preferred
layout(matrix(1:2, nrow = 2))
plot(mod1, type = 'forecast')
plot(mod2, type = 'forecast')
layout(1)
# Look at the smooths to see how the fourier coefficients change
# over time
plot(mod2, type = 'smooths')
summary(mod2, include_betas = FALSE)
# Plotting this model takes a bit more work, as we need to use predictions
# to visualise the effects
# First the trend, using the condition argument in plot_predictions()
p1 <- plot_predictions(mod2,
condition = 'time',
type = 'expected') +
labs(y = 'Expected passengers',
title = 'Long-term trend')
# And the time-varying seasonal pattern, which requires subtracting
# the conditional trend predictions from the total predictions
with_season <- predict(mod2, summary = FALSE,
type = 'expected')
agg_over_season <- predict(mod2,
newdata = datagrid(model = mod2,
time = unique),
summary = FALSE,
type = 'expected')
season_preds <- with_season - agg_over_season
p2 <- ggplot(airdat$data_train %>%
dplyr::mutate(pred = apply(season_preds, 2, mean),
upper = apply(season_preds, 2, function(x)
quantile(x, probs = 0.975)),
lower = apply(season_preds, 2, function(x)
quantile(x, probs = 0.025))),
aes(x = time, y = pred)) +
geom_ribbon(aes(ymax = upper,
ymin = lower),
alpha = 0.2) +
geom_line()+
labs(y = '',
title = 'Time-varying seasonality')
# Plot these effects together using patchwork
p1 + p2
#### Bonus tasks ####
# 1. Plot the 1st derivative of the temporal trend using plot_slopes()
# to ensure the monotonic constraint is being respected
?marginaleffects::plot_slopes
# 2. Extract median residuals from both models and plot these against 'season'.
# Does our model contain enough complexity to capture seasonal patterns well?
?residuals.mvgam
# 3. Since we might not still be comfortable forecasting from a monotonic
# spline, fit a competing model that uses a piecewise logistic trend
# instead
# This requires a 'cap' variable in the data, which specifies the
# maximum value we'd expect the series to take
airdat$data_train$cap <- 700
airdat$data_test$cap <- 700
mod3 <- mvgam(y ~
# Time-varying fourier coefficients to capture
# possibly changing seasonality
s(time, by = s1_12, k = 5) +
s(time, by = c1_12, k = 5) +
s(time, by = s2_12, k = 5) +
s(time, by = c2_12, k = 5) +
s(time, by = s3_12, k = 5) +
s(time, by = c3_12, k = 5) - 1,
# Specify a piecewise logistic growth model;
# note how we suppressed the intercept in the formula,
# which is usually necessary in piecewise trend models
trend_model = PW(growth = 'logistic',
n_changepoints = 5),
family = poisson(),
data = airdat$data_train)
# Plot the trend estimates
plot(mod3, type = 'trend')
# Compute and plot the forecasts from this model
fc <- mvgam::forecast(object = mod3, newdata = airdat$data_test)
plot(fc)
# Look at estimates of key logistic growth parameters
summary(mod3, include_betas = FALSE)