# Exercise 5.1 
# 
library(nlme)
data(renergie, package = "hecstatmod")
cols <- c(gray.colors(n = 16), 2)[c(1:10,17,11:16)]

# Plot the time series using interaction plots 
par(mar = c(5,6,1,1), mfrow = c(1,2), bty = "l", pch = 20)
with(renergie,
     interaction.plot(response = ave, 
                      trace.factor = factor(region), 
                      x.factor = as.factor(date),
                      col = cols, lty = 1, legend = FALSE,
                      xlab = "date", ylab = "weekly average price (in CAD)")
)
with(renergie,
     interaction.plot(response = ave-min, 
                      trace.factor = factor(region), 
                      x.factor = as.factor(date),
                      col = cols, lty = 1, legend = FALSE,
                      xlab = "date", ylab = "weekly average of retailer\n margin of profit (in CAD)")
)

# Produce prettier plots (using time series package!)
library(xts)
par(mar = c(3,3,1,1), mfrow = c(1,2), bty = "l", pch = 20)
plot(xts(x = ren[,19:35], order.by = ren$date), main =  "Average retailers margin of profit")
plot(xts(x = ren[,2:18], order.by = ren$date), main =  "Average sale price")


# Transform from long to wide format
ren <- renergie
ren$marg <- renergie$ave - ren$min
ren$min <- NULL
ren <- tidyr::pivot_wider(data = ren, values_from = c("ave","marg"), names_from = region)


rener_mod1 <- nlme::gls(I(ave-min) ~ factor(region), data = renergie, 
                        correlation = corAR1(form=~1 | date))
plot(ACF(rener_mod1, resType = "normalized"))
likH0 <- logLik(rener_mod1)

# assessing the hypothesis requires essentially 
# fitting 17 different models and adding their REML
likH1 <- sum(sapply(1:17, function(i){
  logLik(nlme::gls(I(ave-min) ~ 1, data = renergie[renergie$region==i,],
                   correlation = corAR1(form=~1 | date)))}))
pchisq(2*as.numeric(likH0-likH1), df = 32, lower.tail = FALSE)


# Exercise 5.2
data(baumann, package = "hecstatmod")
# Cast factor and create ID variable
baumann$id <- factor(1:nrow(baumann))
# Long format
baumann_long <- tidyr::pivot_longer(data = baumann, 
                                    cols = c("mpre","mpost"),
                                    names_to = "test",
                                    names_prefix = "m",
                                    values_to = "score")

# Cast factors
baumann_long$test <- factor(baumann_long$test)
baumann$dpp <- baumann$mpost - baumann$mpre
# Preliminary ANOVA - check equality of mean pre-intervention
anova(lm(mpre ~ group, data = baumann))

# Two competing models (one way ANOVA and linear model)
model1 <- lm(dpp ~ group, data = baumann)
car::Anova(model1, type = 3)
model2 <- lm(mpost ~ group + mpre, data = baumann)
# Test for significance using the confidence interval
confint(modele2)["mpre",]
# Or tweek the test to get the p-value and F-statistic 
# (keep the offset, add mpre as covariate)
car::Anova(lm(mpost ~ group + mpre, 
              offset=mpre, data = baumann), type=3)[3,]
LRT <- as.numeric(2*(logLik(model2) - logLik(model1)))
pchisq(LRT, df = 1, lower.tail = FALSE)


model3 <- gls(score ~ group*test, 
              data = baumann_long,
              correlation = nlme::corSymm(form = ~ 1 | id))
model4 <- gls(score ~ group*test, 
              data = baumann_long,
              correlation = nlme::corSymm(form = ~ 1 | id),
              weights = varIdent(form=~1|test))
#Likelihood ratio test comparing model 3 and 4 (test of heteroscedasticity)
anova(model3, model4)
#Covariance matrix of block
getVarCov(model4)

# We can fit different parameters in each group to compare Model 3
ll <- rep(0,3)
for(i in 1:3){
  ll[i] <- logLik(gls(score ~ test,
                      data = baumann_long,
                      subset = which(baumann_long$group == levels(baumann_long$group)[i]),
                      correlation = corSymm(form = ~ 1 | id)))
}
pchisq(sum(ll) - as.vector(logLik(model3)), df = 4, lower.tail = FALSE)

#Finally, check the effect of the teaching method
summary(model4)
car::Anova(model4, type = 3)[4,]
# Exercise 5.3

data(tolerance, package = "hecstatmod")
tolerance$sex <- factor(tolerance$sex, labels = c("female","male"))
# EDA plots
library(ggplot2)
library(patchwork)
g1 <- ggplot(data = tolerance) + 
  geom_boxplot(aes(y = tolerance, x = sex))
g2 <- ggplot(data = tolerance) + 
  geom_point(aes(y = tolerance, x = exposure))
g3 <- ggplot(data = tolerance) + 
  geom_boxplot(aes(y = tolerance, x = factor(age))) + 
  xlab("age (in years)")

# To save to pdf, use the following argument
# pdf("E5p3-tolerance_eda.pdf", width = 8, height = 3)
g1 + g2 + g3
dev.off()

# pdf("E5p3-tolerance_spagh.pdf", width = 8, height = 5)
ggplot(data = tolerance, aes(x = age, y = tolerance, group = id, color=id)) +
  geom_line() + theme(legend.position="none")
dev.off()
## Base R plots
# par(mfrow = c(1,3), mar = c(4,4,0.5,0.5), bty = "l")
# boxplot(tolerance ~ sex, data = tolerance)
# plot(tolerance ~ exposure, data = tolerance)
# boxplot(tolerance ~ factor(age), xlab = "age (in years)", data = tolerance)
# dev.off()
# Spaghetti plot
# with(tolerance,
#      interaction.plot(x.factor = age,
#                  trace.factor = id,
#                  response = tolerance,
#                  xlab = "age (in years)", 
#                  ylab = "tolerance score",
#                  col = 1:16,
#                  legend=FALSE, bty = "l")
# )
mod1 <- gls(tolerance ~ sex + exposure + age, data = tolerance, 
    correlation = corCompSymm(form= ~ 1 | id))
mod2 <- gls(tolerance ~ sex + exposure + age, data = tolerance, 
    correlation = corAR1(form= ~ 1 | id))
mod3 <- gls(tolerance ~ sex + exposure + age, data = tolerance)
anova(mod1, mod3)
anova(mod2, mod3)
cbind("AIC" = c(AIC(mod1), AIC(mod2), AIC(mod3)), 
      "BIC" = c(BIC(mod1), BIC(mod2), BIC(mod3)))