library(hecstatmod) library(ggplot2) # grammar of graphics library(poorman) # poorman version of dplyr library(coin) # nonparametric tests ## Exercise 1.1 data(renfe, package = "hecstatmod") # Select subsample with only high-speed trains data <- renfe |> filter(type %in% c("AVE","AVE-TGV")) # Transform duration in hours duration <- data$duration / 60 # Compute summary statistics of the duration mean_d <- mean(duration); # Power simulation B <- 1e4L # number of simulations n <- nrow(data) m <- 100L # subsample size alpha <- 0.05 # level of test # Create a grid of duration times for the alternative hypothesis time <- seq(from = mean_d, to = 3, by = 0.005) power <- matrix(nrow = B, ncol = length(time)) # container to store results # fix random number generation seed for reproducibility set.seed(2020) for(i in seq_len(B)){ # for each replication for(j in seq_along(time)){ # for each alternative mean duration # Compute the p-value of the one-sample t-test power[i,j] <- t.test(x = duration[sample.int(n = n, size = m)], mu = time[j])$p.value # Store p-values } } # Plot the power curve as a function of time ggplot(data = data.frame(time = time, # Proportion of rejection of H0 at level alpha power = colMeans(power < alpha)), mapping = aes(x = time, y = power)) + geom_point() + geom_vline(xintercept = 2.845, linetype = "dashed") + geom_hline(yintercept = alpha) + labs(x = "average duration time (in hours)", y = "", subtitle = "Power as a function of the mean alternative time") + scale_y_continuous(limits = c(0,1), expand = expansion(add = 0.01)) + theme_classic() # Exercise 1.2 # Coverage rate summarise(renfe_simu, coverage = mean((cilb < -0.28) & (ciub > -0.28))) # Histogram of mean price difference as a function of destination ggplot(data = renfe_simu, aes(x = meandif)) + geom_histogram(bins = 30) + geom_vline(xintercept = -0.28, col = "blue") + xlab("mean price difference (in euros)") + ylab("count") # Power of test (alternative is true!) summarise(renfe_simu, power = mean(pval < 0.05)) # Alternative code # with(renfe_simu, mean(pval < 0.05)) # mean(renfe_simu$pval < 0.05) # Exercise 1.3 with(renfe, t.test(x = price, mu = 43.25, conf.level = 0.9, subset = type %in% "AVE-TGV"))