####################################################### ### Statistical Computing with R 2e ### ### Maria L. Rizzo ### ### Chapman & Hall/CRC The R Series ### ### ISBN 9781466553323 - CAT# K15269 ### ### March 6, 2019 ### ### ### ### R code for Chapter 7 ### ### Monte Carlo Methods in Inference ### ####################################################### # packages to install: energy, ggplot2 ### Example 7.1 (Basic Monte Carlo estimation) m <- 1000 g <- numeric(m) for (i in 1:m) { x <- rnorm(2) g[i] <- abs(x[1] - x[2]) } est <- mean(g) est ### Example 7.2 (Estimating the MSE of a trimmed mean) n <- 20 m <- 1000 mean <- numeric(m) for (i in 1:m) { x <- rnorm(n); #x[1] <- rt(1,df=1); #x[1:2] <- rt(2,df=1) mean[i] <- mean(x) } mse <- mean(mean^2) mse sqrt(sum((mean - mean(mean))^2)) / m #se n <- 20 m <- 1000 tmean <- numeric(m) for (i in 1:m) { x <- rnorm(n); #x[1] <- rt(1,df=1); #x[1:2] <- rt(4,df=1) x <- sort(x) tmean[i] <- sum(x[2:(n-1)]) / (n-2) } mse <- mean(tmean^2) mse sqrt(sum((tmean - mean(tmean))^2)) / m #se n <- 20 m <- 1000 tmean <- numeric(m) for (i in 1:m) { x <- rnorm(n); #x[1] <- rt(1,df=1); #x[1:2] <- rt(2,df=1) tmean[i] <- median(x) } mse <- mean(tmean^2) mse sqrt(sum((tmean - mean(tmean))^2)) / m #se ### Example 7.3 (MSE of a trimmed mean, cont.) set.seed(522) n <- 20 K <- n/2 - 1 m <- 1000 mse <- matrix(0, n/2, 6) trimmed.mse <- function(n, m, k, p) { #MC est of mse for k-level trimmed mean of #contaminated normal pN(0,1) + (1-p)N(0,100) tmean <- numeric(m) for (i in 1:m) { sigma <- sample(c(1, 10), size = n, replace = TRUE, prob = c(p, 1-p)) x <- sort(rnorm(n, 0, sigma)) tmean[i] <- sum(x[(k+1):(n-k)]) / (n-2*k) } mse.est <- mean(tmean^2) se.mse <- sqrt(mean((tmean-mean(tmean))^2)) / sqrt(m) return(c(mse.est, se.mse)) } for (k in 0:K) { mse[k+1, 1:2] <- trimmed.mse(n=n, m=m, k=k, p=1.0) mse[k+1, 3:4] <- trimmed.mse(n=n, m=m, k=k, p=.95) mse[k+1, 5:6] <- trimmed.mse(n=n, m=m, k=k, p=.9) } ### Example 7.4 (Confidence interval for variance) n <- 20 alpha <- .05 x <- rnorm(n, mean=0, sd=2) UCL <- (n-1) * var(x) / qchisq(alpha, df=n-1) ### Example 7.5 (MC estimate of confidence level) n <- 20 alpha <- .05 UCL <- replicate(1000, expr = { x <- rnorm(n, mean = 0, sd = 2) (n-1) * var(x) / qchisq(alpha, df = n-1) } ) #count the number of intervals that contain sigma^2=4 sum(UCL > 4) #or compute the mean to get the confidence level mean(UCL > 4) ### Example 7.6 (Empirical confidence level) n <- 20 alpha <- .05 UCL <- replicate(1000, expr = { x <- rchisq(n, df = 2) (n-1) * var(x) / qchisq(alpha, df = n-1) } ) sum(UCL > 4) mean(UCL > 4) ### Example 7.7 (Empirical Type I error rate) n <- 20 alpha <- .05 mu0 <- 500 sigma <- 100 m <- 10000 #number of replicates p <- numeric(m) #storage for p-values for (j in 1:m) { x <- rnorm(n, mu0, sigma) ttest <- t.test(x, alternative = "greater", mu = mu0) p[j] <- ttest$p.value } p.hat <- mean(p < alpha) se.hat <- sqrt(p.hat * (1 - p.hat) / m) print(c(p.hat, se.hat)) ### Example 7.8 (Skewness test of normality) n <- c(10, 20, 30, 50, 100, 500) #sample sizes cv <- qnorm(.975, 0, sqrt(6/n)) #crit. values for each n #cv <- qnorm(.975, 0, sqrt(6*(n-2) / ((n+1)*(n+3)))) sk <- function(x) { #computes the sample skewness coeff. xbar <- mean(x) m3 <- mean((x - xbar)^3) m2 <- mean((x - xbar)^2) return( m3 / m2^1.5 ) } #n is a vector of sample sizes #we are doing length(n) different simulations p.reject <- numeric(length(n)) #to store sim. results m <- 10000 #num. repl. each sim. for (i in 1:length(n)) { sktests <- numeric(m) #test decisions for (j in 1:m) { x <- rnorm(n[i]) #test decision is 1 (reject) or 0 sktests[j] <- as.integer(abs(sk(x)) >= cv[i] ) } p.reject[i] <- mean(sktests) #proportion rejected } p.reject ### Example 7.9 (Empirical power) #set.seed(521) n <- 20 m <- 1000 mu0 <- 500 sigma <- 100 mu <- c(seq(450, 650, 10)) #alternatives M <- length(mu) power <- numeric(M) for (i in 1:M) { mu1 <- mu[i] pvalues <- replicate(m, expr = { #simulate under alternative mu1 x <- rnorm(n, mean = mu1, sd = sigma) ttest <- t.test(x, alternative = "greater", mu = mu0) ttest$p.value } ) power[i] <- mean(pvalues <= .05) } se <- sqrt(power * (1-power) / m) df <- data.frame(mean=mu, power=power, upper=power+2*se, lower=power-2*se) library(ggplot2) ggplot(df, aes(x=mean, y=power)) + geom_line() + geom_vline(xintercept=500, lty=2) + geom_hline(yintercept=c(0,.05), lty=1:2) + geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.2, lwd=1.5) ### Example 7.10 (Power of the skewness test of normality) #set.seed(111) alpha <- .1 n <- 30 m <- 2500 epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05)) N <- length(epsilon) pwr <- numeric(N) #critical value for the skewness test cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3)))) for (j in 1:N) { #for each epsilon e <- epsilon[j] sktests <- numeric(m) for (i in 1:m) { #for each replicate sigma <- sample(c(1, 10), replace = TRUE, size = n, prob = c(1-e, e)) x <- rnorm(n, 0, sigma) sktests[i] <- as.integer(abs(sk(x)) >= cv) } pwr[j] <- mean(sktests) } se <- sqrt(pwr * (1-pwr) / m) df <- data.frame(epsilon=epsilon, power=pwr, upper=pwr+2*se, lower=pwr-2*se) #plot power vs epsilon library(ggplot2) ggplot(df, aes(x=epsilon, y=power)) + geom_line() + labs(x=bquote(epsilon)) + geom_hline(yintercept=.1, lty=2) + geom_pointrange(aes(ymin=lower, ymax=upper)) #+ #geom_ribbon(aes(ymin=lower, ymax=upper),alpha=.1) ### Example 7.11 (Power comparison of tests of normality) #only one loop, for epsilon=0.1, was shown in the text #the simulation below takes several minutes to run # initialize input and output library(energy) alpha <- .1 n <- 30 m <- 500 #try small m for a trial run test1 <- test2 <- test3 <- numeric(m) #critical value for the skewness test cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3)))) sim <- matrix(0, 11, 4) # estimate power for (i in 0:10) { epsilon <- i * .1 for (j in 1:m) { e <- epsilon sigma <- sample(c(1, 10), replace = TRUE, size = n, prob = c(1-e, e)) x <- rnorm(n, 0, sigma) test1[j] <- as.integer(abs(sk(x)) >= cv) test2[j] <- as.integer( shapiro.test(x)$p.value <= alpha) test3[j] <- as.integer( mvnorm.etest(x, R=200)$p.value <= alpha) } print(c(epsilon, mean(test1), mean(test2), mean(test3))) sim[i+1, ] <- c(epsilon, mean(test1), mean(test2), mean(test3)) } detach(package:energy) # plot the empirical estimates of power plot(sim[,1], sim[,2], ylim = c(0, 1), type = "l", xlab = bquote(epsilon), ylab = "power") lines(sim[,1], sim[,3], lty = 2) lines(sim[,1], sim[,4], lty = 4) abline(h = alpha, lty = 3) legend("topright", 1, c("skewness", "S-W", "energy"), lty = c(1,2,4), inset = .02) ### Example 7.12 (Count Five test statistic) x1 <- rnorm(20, 0, sd = 1) x2 <- rnorm(20, 0, sd = 1.5) y <- c(x1, x2) group <- rep(1:2, each = length(x1)) boxplot(y ~ group, boxwex = .3, xlim = c(.5, 2.5), main = "") points(group, y) # now identify the extreme points range(x1) range(x2) i <- which(x1 < min(x2)) j <- which(x2 > max(x1)) x1[i] x2[j] out1 <- sum(x1 > max(x2)) + sum(x1 < min(x2)) out2 <- sum(x2 > max(x1)) + sum(x2 < min(x1)) max(c(out1, out2)) ### Example 7.13 (Count Five test statistic, cont.) maxout <- function(x, y) { X <- x - mean(x) Y <- y - mean(y) outx <- sum(X > max(Y)) + sum(X < min(Y)) outy <- sum(Y > max(X)) + sum(Y < min(X)) return(max(c(outx, outy))) } n1 <- n2 <- 20 mu1 <- mu2 <- 0 sigma1 <- sigma2 <- 1 m <- 1000 # generate samples under H0 stat <- replicate(m, expr={ x <- rnorm(n1, mu1, sigma1) y <- rnorm(n2, mu2, sigma2) maxout(x, y) }) print(cumsum(table(stat)) / m) print(quantile(stat, c(.8, .9, .95))) ### Example 7.14 (Count Five test) count5test <- function(x, y) { X <- x - mean(x) Y <- y - mean(y) outx <- sum(X > max(Y)) + sum(X < min(Y)) outy <- sum(Y > max(X)) + sum(Y < min(X)) # return 1 (reject) or 0 (do not reject H0) return(as.integer(max(c(outx, outy)) > 5)) } n1 <- n2 <- 20 mu1 <- mu2 <- 0 sigma1 <- sigma2 <- 1 m <- 10000 tests <- replicate(m, expr = { x <- rnorm(n1, mu1, sigma1) y <- rnorm(n2, mu2, sigma2) x <- x - mean(x) #centered by sample mean y <- y - mean(y) count5test(x, y) } ) alphahat <- mean(tests) print(alphahat) ### Example 7.15 (Count Five test, cont.) n1 <- 20 n2 <- 30 mu1 <- mu2 <- 0 sigma1 <- sigma2 <- 1 m <- 10000 alphahat <- mean(replicate(m, expr={ x <- rnorm(n1, mu1, sigma1) y <- rnorm(n2, mu2, sigma2) x <- x - mean(x) #centered by sample mean y <- y - mean(y) count5test(x, y) })) print(alphahat) ### Example 7.16 (Count Five, cont.) # generate samples under H1 to estimate power sigma1 <- 1 sigma2 <- 1.5 power <- mean(replicate(m, expr={ x <- rnorm(20, 0, sigma1) y <- rnorm(20, 0, sigma2) count5test(x, y) })) print(power)