library(forecast) library(mvgam) library(janitor) library(gratia) library(marginaleffects) library(ggplot2) theme_set(theme_bw(base_size = 12)) library(patchwork) # Load the AirPassengers dataset and plot the time series data("AirPassengers") plot(AirPassengers, bty = "l", lwd = 2, col = "darkred" ) # Look at an STL decomposition stl(AirPassengers, s.window = 9) %>% autoplot() # This plot suggests that the seasonal pattern changes over time, # not just in magnitude but also perhaps a bit in shape. Convert # to a data.frame() object that is suitable for mgcv / mvgam modeling airdat <- series_to_mvgam( AirPassengers, freq = frequency(AirPassengers) ) dplyr::glimpse(airdat$data_train) # Plot features of the series plot_mvgam_series( data = airdat$data_train, newdata = airdat$data_test ) # Now fit a model. First the traditional way to model time-varying # seasonality using a tensor product of 'time' and 'season', # where 'season' is modeled with a cyclic spline mod1 <- mvgam( y ~ te(season, time, bs = c("cc", "tp"), k = c(6, 15)), knots = list(season = c(0.5, 12.5)), family = nb(), data = airdat$data_train, newdata = airdat$data_test ) # Predictions aren't too bad here, but perhaps the uncertainty # is a bit too wide plot(forecast(mod1)) + theme_bw(base_size = 12) # We can conveniently use gratia::draw() to view the time * season # smooth function draw(mod1$mgcv_model) # This model is not too bad, but it might not be preferable for a few # reasons: # 1. We are capturing the temporal dimension with a spline, and we all # know that splines generally give poor extrapolation behaviours # 2. This model assumes that the seasonal pattern changes at the same # rate that the temporal spline changes, and this might not always # be the most suitable model # There is another way we can model time-varying seasonality, in this # case using a fourier transform. First compute sine and cosine functions # using the forecast::fourier() function data.frame( forecast::fourier(AirPassengers, K = 4) ) %>% # Use clean_names as fourier() gives terrible name types janitor::clean_names() -> fourier_terms dplyr::glimpse(fourier_terms) # Now add these new predictors to the training and testing sets airdat$data_train <- airdat$data_train %>% dplyr::bind_cols( fourier_terms[1:NROW(airdat$data_train), ] ) airdat$data_test <- airdat$data_test %>% dplyr::bind_cols( fourier_terms[(NROW(airdat$data_train) + 1): NROW(fourier_terms), ] ) # Next we can fit an mvgam model that includes a smooth nonlinear # temporal trend and time-varying seasonality. This is done by first # using a monotonic smooth of time that ensures the temporal trend never # declines, which seems appropriate here. The time-varying seasonality # is captured by allowing the coefficients on the fourier terms to vary # over time using separate smooths mod2 <- mvgam( y ~ # Monotonic smooth of time to ensure the trend # either increases or flattens, but does not # decrease s(time, bs = "moi", k = 10) + # Time-varying fourier coefficients to capture # possibly changing seasonality s(time, by = s1_12, k = 5) + s(time, by = c1_12, k = 5) + s(time, by = s2_12, k = 5) + s(time, by = c2_12, k = 5) + s(time, by = s3_12, k = 5) + s(time, by = c3_12, k = 5), family = nb(), data = airdat$data_train, newdata = airdat$data_test ) # Of course this model still uses a spline of 'time' for the trend, # but the monotonic restriction ensures that the predictions are # at least somewhat controllable # This second model is preferred based on in-sample fits loo_compare(mod1, mod2) # But what about forecasts? Second model is again slightly preferred patchwork::wrap_plots( plot(forecast(mod1)) + labs(title = 'Tensor product'), plot(forecast(mod2)) + labs(title = 'Fourier terms'), ncol = 1 ) & theme_bw(base_size = 12) # Look at the smooths to see how the fourier coefficients change # over time draw(mod2) # Use predictions to visualise the effects of seasonality and trend # First the trend, using the condition argument in plot_predictions() p1 <- plot_predictions( mod2, condition = "time", type = "expected" ) + labs( y = "Expected passengers", title = "Long-term trend" ) # And the time-varying seasonal pattern, which requires subtracting # the conditional trend predictions from the total predictions with_season <- predict( mod2, summary = FALSE, type = "expected" ) agg_over_season <- predict( mod2, newdata = datagrid( model = mod2, time = unique ), summary = FALSE, type = "expected" ) season_preds <- with_season - agg_over_season p2 <- ggplot( airdat$data_train %>% dplyr::mutate( pred = apply(season_preds, 2, mean), upper = apply(season_preds, 2, function(x) { quantile(x, probs = 0.975) }), lower = apply(season_preds, 2, function(x) { quantile(x, probs = 0.025) }) ), aes(x = time, y = pred) ) + geom_ribbon( aes( ymax = upper, ymin = lower ), alpha = 0.2 ) + geom_line() + labs( y = "", title = "Time-varying seasonality" ) # Plot these effects together using patchwork p1 + p2 #### Bonus tasks #### # 1. Plot the 1st derivative of the temporal trend using plot_slopes() # to ensure the monotonic constraint is being respected ?marginaleffects::plot_slopes # 2. Extract median residuals from both models and plot these against 'season'. # Does our model contain enough complexity to capture seasonal patterns well? ?residuals.mvgam ?pp_check.mvgam # 3. Since we might not still be comfortable forecasting from a monotonic # spline, fit a competing model that uses a GP trend # instead mod3 <- mvgam( y ~ # Time-varying fourier coefficients to capture # possibly changing seasonality time + s(time, by = s1_12, k = 5) + s(time, by = c1_12, k = 5) + s(time, by = s2_12, k = 5) + s(time, by = c2_12, k = 5) + s(time, by = s3_12, k = 5) + s(time, by = c3_12, k = 5) - 1, trend_model = GP(), family = poisson(), data = airdat$data_train, newdata = airdat$data_test ) # Compute and plot the forecasts from this model plot(forecast(mod3)) # Look at estimates of key logistic growth parameters summary(mod3, include_betas = FALSE)