## ----ch1, child = "src/ch1.Rnw"------------------------------------------ ## ----init1, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch01-', self.contained = FALSE) suppressPackageStartupMessages(library(mice, warn.conflicts = FALSE, quietly = TRUE)) source("R/mi.hist.R") ## ----mean1--------------------------------------------------------------- y <- c(1, 2, 4) mean(y) ## ----mean2--------------------------------------------------------------- y <- c(1, 2, NA) mean(y) ## ----mean3--------------------------------------------------------------- mean(y, na.rm = TRUE) ## ----air2, eval = FALSE-------------------------------------------------- fit <- lm(Ozone ~ Wind, data = airquality) # Error in na.fail.default: missing values in object} ## ----air3---------------------------------------------------------------- fit <- lm(Ozone ~ Wind, data = airquality, na.action = na.omit) ## ----air3b--------------------------------------------------------------- options(na.action = na.omit) ## ----air4, eval = FALSE-------------------------------------------------- airquality2 <- cbind(airquality, predict(fit)) # Error: arguments imply differing number of rows: 153, 116 ## ----air5---------------------------------------------------------------- head(na.action(fit)) ## ----air6---------------------------------------------------------------- naprint(na.action(fit)) ## ----air7---------------------------------------------------------------- colSums(is.na(airquality)) ## ----air8, include = FALSE----------------------------------------------- airquality2 <- cbind(na.omit(airquality[, c("Ozone", "Wind")]), predicted = predict(fit)) ## ----plotair9, echo=FALSE, fig.width=4.5, fig.height=2.25, duo = TRUE---- lwd <- 0.6 plot(predicted ~ Ozone, data = airquality2, type = "n", lwd = lwd, ylim = c(-20, 165), xlim = c(-20, 165), ylab = "Ozone predicted (ppb)", xlab = "Ozone measured (ppb)", axes = FALSE) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) abline(0, 1, lty = 3, lwd = 0.6) points(predicted ~ Ozone, data = airquality2, lwd = 1.5, col = mdc(1)) box(lwd = lwd) fit2 <- lm(Ozone ~ Wind, data = airquality, na.action = na.exclude) plot(predicted ~ Ozone, data = airquality2, type = "n", ylim = c(-20, 165), xlim = c(-20, 165), ylab = " Ozone predicted (ppb)", xlab = "Ozone measured (ppb)", axes = FALSE) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) abline(0, 1, lty = 3, lwd = 0.6) pred2 <- cbind(1, airquality$Wind) %*% coef(fit) abline(h = pred2[ici(predict(fit2))], col = mdc(2), lwd = 0.6, lty = 1) box(lwd = lwd) ## ----airx6--------------------------------------------------------------- fit2 <- lm(Ozone ~ Wind + Solar.R, data = airquality) naprint(na.action(fit2)) ## ----pairwise1, eval=FALSE----------------------------------------------- data <- airquality[, c("Ozone", "Solar.R", "Wind")] mu <- colMeans(data, na.rm = TRUE) cv <- cov(data, use = "pairwise") ## ----pairwise2, eval = FALSE--------------------------------------------- library(lavaan) fit <- lavaan("Ozone ~ 1 + Wind + Solar.R Ozone ~~ Ozone", sample.mean = mu, sample.cov = cv, sample.nobs = sum(complete.cases(data))) ## ----install, eval=FALSE------------------------------------------------- install.packages("mice") ## ----load, eval = FALSE-------------------------------------------------- library("mice") ## ----meanimp, echo=TRUE-------------------------------------------------- imp <- mice(airquality, method = "mean", m = 1, maxit = 1) ## ----plotmeanimp, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25---- lwd <- 0.6 data <- complete(imp) Yobs <- airquality[,"Ozone"] Yimp <- data[,"Ozone"] mi.hist(Yimp, Yobs,b=seq(-20,200,10),type="continuous", gray=F,lwd = lwd, obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5, obs.col=mdc(4), mis.col=mdc(5), imp.col="transparent", mlt=0.08,main="",xlab="Ozone (ppb)", axes = FALSE) box(lwd = 1) plot(data[cci(imp),2:1],col=mdc(1), lwd=1.5,ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",ylim=c(-10,170), axes = FALSE) points(data[ici(imp),2:1],col=mdc(2),lwd=1.5) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = 1) ## ----regimp, echo=TRUE--------------------------------------------------- fit <- lm(Ozone ~ Solar.R, data = airquality) pred <- predict(fit, newdata = ic(airquality)) ## ----plotregimp, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25---- lwd <- 0.6 Yobs <- airquality[,"Ozone"] Yimp <- Yobs Yimp[ici(airquality)] <- pred ss <- cci(airquality$Solar.R) data <- data.frame(Ozone=Yimp, Solar.R=airquality$Solar.R) mi.hist(Yimp[ss], Yobs[ss],b=seq(-20,200,10),type="continuous", gray=F, lwd = lwd, obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5, obs.col=mdc(4),mis.col=mdc(5), imp.col="transparent", mlt=0.08,main="",xlab="Ozone (ppb)", axes = FALSE) box(lwd = 1) plot(data[cci(imp),2:1],col=mdc(1),lwd=1.5, ylab="Ozone (ppb)", xlab="Solar Radiation (lang)", ylim=c(-10,170), axes = FALSE) points(data[ici(imp),2:1],col=mdc(2),lwd=1.5) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = 1) ## ----regimp2, eval = FALSE----------------------------------------------- data <- airquality[, c("Ozone", "Solar.R")] imp <- mice(data, method = "norm.predict", seed = 1, m = 1, print = FALSE) xyplot(imp, Ozone ~ Solar.R) ## ----sri----------------------------------------------------------------- data <- airquality[, c("Ozone", "Solar.R")] imp <- mice(data, method = "norm.nob", m = 1, maxit = 1, seed = 1, print = FALSE) ## ----plotsri, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25----- lwd <- 0.6 data <- complete(imp) Yobs <- airquality[, "Ozone"] Yimp <- data[, "Ozone"] mi.hist(Yimp, Yobs, b = seq(-20, 200, 10), type = "continuous", gray = FALSE, lwd = lwd, obs.lwd = 1.5, mis.lwd = 1.5, imp.lwd = 1.5, obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent", mlt = 0.08, main = "", xlab = "Ozone (ppb)") box(lwd = 1) plot(data[cci(imp), 2:1], col = mdc(1), lwd = 1.5, ylab = "Ozone (ppb)", xlab = "Solar Radiation (lang)", ylim = c(-10, 170), axes = FALSE) points(data[ici(imp), 2:1], col = mdc(2), lwd = 1.5) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = 1) ## ----locf1--------------------------------------------------------------- airquality2 <- tidyr::fill(airquality, Ozone) ## ----locf2,echo=FALSE,fig.width=4.5,fig.height=2.5, solo=TRUE------------ lwd <- 0.6 Oz <- airquality$Oz Ozi <- airquality2$Oz colvec <- ifelse(is.na(Oz), mdc(2), mdc(1)) plot(Ozi[1:80], col = colvec, type = "l", xlab = "Day number", ylab = "Ozone (ppb)", lwd = lwd, axes = FALSE) points(Ozi[1:80], col = colvec, pch = 20) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) ## ----indicator1---------------------------------------------------------- imp <- mice(airquality, method = "mean", m = 1, maxit = 1, print = FALSE) airquality2 <- cbind(complete(imp), r.Ozone = is.na(airquality[, "Ozone"])) fit <- lm(Wind ~ Ozone + r.Ozone, data = airquality2) ## ----airx8--------------------------------------------------------------- imp <- mice(airquality, seed = 1, m = 20, print = FALSE) fit <- with(imp, lm(Ozone ~ Wind + Temp + Solar.R)) summary(pool(fit)) ## ----airx9--------------------------------------------------------------- fit <- lm(Ozone ~ Wind + Temp + Solar.R, data = airquality) coef(summary(fit)) ## ----plotairmi, echo=FALSE, fig.width=4.5, fig.height=2.25, duo = TRUE---- lwd <- 0.6 data <- complete(imp) Yobs <- airquality[,"Ozone"] Yimp <- data[,"Ozone"] mi.hist(Yimp, Yobs,b=seq(-20,200,10),type="continuous", gray=F,lwd = lwd, obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5, obs.col=mdc(4),mis.col=mdc(5), imp.col="transparent", mlt=0.08,main="",xlab="Ozone (ppb)") box(lwd = 1) plot(data[cci(imp),2:1], col = mdc(1),lwd = 1.5, ylab = "Ozone (ppb)", xlab = "Solar Radiation (lang)", ylim = c(-10, 170), axes = FALSE) points(data[ici(imp),2:1], col = mdc(2), lwd = 1.5) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = 1) ## ----air10, echo=FALSE,fig.width=4.5,fig.height=2.5, solo=TRUE----------- Oz <- airquality$Ozone colvec <- ifelse(is.na(Oz),mdc(2),mdc(1)) plot(Oz[1:80],col=mdc(1),type="l",xlab="Day number",ylab="Ozone (ppb)", axes = FALSE,lwd = lwd) points(Oz[1:80],col=mdc(1),pch=20) lwd <- 0.6 axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) idx <- ici(airquality$Ozone) & (1:153)<81 x <- (1:153)[idx] points(x=x,y=complete(imp,1)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20) points(x=x,y=complete(imp,2)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20) points(x=x,y=complete(imp,3)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20) points(x=x,y=complete(imp,4)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20) points(x=x,y=complete(imp,5)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20) ## ----acf, echo=FALSE----------------------------------------------------- par(mfrow=c(2,5)) acf.ozone <- with(imp, acf(Ozone, plot = FALSE)) model <- expression(acf(resid(lm(Ozone ~ Wind + Temp + Solar.R)), plot = FALSE)) acf.resid <- with(imp,model, plot = FALSE) calcacf <- function(acf.list) { k <- length(acf.list) acc <- acf.list[[1]]$acf for (i in 2:k) acc <- acc + acf.list[[i]]$acf return(acc/k) } oz <- round(calcacf(acf.ozone$analyses),2) re <- round(calcacf(acf.resid$analyses),2) ## ----ch2, child = "src/ch2.Rnw"------------------------------------------ ## ----init2, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch02-', self.contained = FALSE) pkg <- c("mice", "lattice") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----publications, echo=FALSE, fig.width=4.5, fig.height=2.25, solo=TRUE---- cit <- c( 2017, 75, 381, NA, 2016, 81, 328, NA, 2015, 59, 306, NA, 2014, 55, 281, NA, 2013, 45, 233, NA, 2012, 47, 214, NA, 2011, 55, 181, NA, 2010, 44, 157, NA, 2009, 38, 111, NA, 2008, 28, 102, NA, 2007, 34, 113, NA, 2006, 19, 75, NA, 2005, 21, 63, NA, 2004, 7, 44, NA, 2003, 18, 38, NA, 2002, 15, 36, NA, 2001, 14, 35, 57, 2000, 8, 19, 33, 1999, 6, 18, 47, 1998, 6, 12, 22, 1997, 6, 16, 29, 1996, 5, 12, 28, 1995, 3, 5, 20, 1994, 4, 7, 34, 1993, 3, 6, 15, 1992, NA, 4, NA, 1991, 3, 4, 19, 1990, 2, 3, 15, 1989, NA, 2, 11, 1988, NA, 1, 13, 1987, NA, 3, 10, 1986, 2, 3, 5, 1985, NA, NA, 1, 1984, NA, 1, 2, 1983, NA, NA, 5, 1982, NA, NA, 2, 1981, NA, NA, 1, 1980, NA, NA, 5, 1979, NA, NA, 2, 1978, NA, NA, 1, 1977, NA, NA, 2) cit <- matrix(cit, nr=2018-1977, nc=4, byrow=TRUE) cit <- as.data.frame(cit) names(cit) <- c("Year","Title","Abstract","All") par(cex = 0.7, lwd = 0.5) plot(x = cit$Year, y = cit$Abstract, type="o",log="y", xlim = c(1975,2017), ylim = c(1,400), ylab="Number of publications (log)", xlab="Year", pch=24, bg = "white", axes=FALSE) axis(1, at = seq(1977, 2017, 5), lwd = par("lwd")) axis(2, lwd = par("lwd"), las=1) lines(x=cit$Year, y=cit$Title, pch=15, type="o") lines(x=cit$Year, y=cit$All, pch=16, type="o") legend(x=1975,y=200,legend=c('early publications', '"multiple imputation" in abstract', '"multiple imputation" in title'), pch=c(16,2,15), bty="n") ## ----mar----------------------------------------------------------------- logistic <- function(x) exp(x) / (1 + exp(x)) set.seed(80122) n <- 300 y <- MASS::mvrnorm(n = n, mu = c(0, 0), Sigma = matrix(c(1, 0.5, 0.5, 1), nrow = 2)) r2.mcar <- 1 - rbinom(n, 1, 0.5) r2.mar <- 1 - rbinom(n, 1, logistic(y[, 1])) r2.mnar <- 1 - rbinom(n, 1, logistic(y[, 2])) ## ----marplot, echo=FALSE, fig.height=4, fig.width=6, solo = TRUE--------- library(lattice) y1 <- y[, 1] y2 <- y[, 2] y3 <- rbind(y,y,y) r2 <- c(r2.mcar,r2.mar,r2.mnar) r2 <- factor(r2, labels=c("Ymis","Yobs")) typ <- factor(rep(3:1,each=n),labels=c("MNAR","MAR","MCAR")) d <- data.frame(y1=y3[,1],y2=y3[,2],r2=r2,typ=typ) trellis.par.set(box.rectangle=list(col=c(mdc(2),mdc(1)),lwd=1.2)) trellis.par.set(box.umbrella=list(col=c(mdc(2),mdc(1)),lwd=1.2)) trellis.par.set(plot.symbol=list(col=mdc(3),lwd=1)) tp <- bwplot(r2~y2|typ, data=d, horizontal=TRUE, layout=c(1,3), xlab=expression(Y[2]), col=c(mdc(2),mdc(1)),strip=FALSE, xlim=c(-3,3), strip.left = strip.custom(bg="grey95")) print(tp) ## ----theory1------------------------------------------------------------- library(mice) imp <- mice(nhanes, print = FALSE, m = 10, seed = 24415) fit <- with(imp, lm(bmi ~ age)) est <- pool(fit) est ## ----theory2------------------------------------------------------------- summary(est, conf.int = TRUE) ## ----lin.sim1------------------------------------------------------------ create.data <- function(beta = 1, sigma2 = 1, n = 50, run = 1) { set.seed(seed = run) x <- rnorm(n) y <- beta * x + rnorm(n, sd = sqrt(sigma2)) cbind(x = x, y = y) } ## ----lin.sim2------------------------------------------------------------ make.missing <- function(data, p = 0.5){ rx <- rbinom(nrow(data), 1, p) data[rx == 0, "x"] <- NA data } ## ----lin.sim3------------------------------------------------------------ test.impute <- function(data, m = 5, method = "norm", ...) { imp <- mice(data, method = method, m = m, print = FALSE, ...) fit <- with(imp, lm(y ~ x)) tab <- summary(pool(fit), "all", conf.int = TRUE) as.numeric(tab["x", c("estimate", "2.5 %", "97.5 %")]) } ## ----lin.sim4------------------------------------------------------------ simulate <- function(runs = 10) { res <- array(NA, dim = c(2, runs, 3)) dimnames(res) <- list(c("norm.predict", "norm.nob"), as.character(1:runs), c("estimate", "2.5 %","97.5 %")) for(run in 1:runs) { data <- create.data(run = run) data <- make.missing(data) res[1, run, ] <- test.impute(data, method = "norm.predict", m = 2) res[2, run, ] <- test.impute(data, method = "norm.nob") } res } ## ----lin.sim6a, cache=TRUE----------------------------------------------- res <- simulate(1000) ## ----lin.sim6c----------------------------------------------------------- apply(res, c(1, 3), mean, na.rm = TRUE) ## ----lin.bias------------------------------------------------------------ true <- 1 RB <- rowMeans(res[,, "estimate"]) - true PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true) CR <- rowMeans(res[,, "2.5 %"] < true & true < res[,, "97.5 %"]) AW <- rowMeans(res[,, "97.5 %"] - res[,, "2.5 %"]) RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2)) data.frame(RB, PB, CR, AW, RMSE) ## ----mse1---------------------------------------------------------------- rmse <- function(truedata, imp, v = "x") { mx <- is.na(mice::complete(imp, 0))[, v] mse <- rep(NA, imp$m) for (k in seq_len(imp$m)) { filled <- mice::complete(imp, k)[mx, v] true <- truedata[mx, v] mse[k] <- mean((filled - true)^2) } sqrt(mean(mse)) } ## ----mse2---------------------------------------------------------------- simulate2 <- function(runs = 10) { res <- array(NA, dim = c(2, runs, 1)) dimnames(res) <- list(c("norm.predict", "norm.nob"), as.character(1:runs), "RMSE") for(run in 1:runs) { truedata <- create.data(run = run) data <- make.missing(truedata) imp <- mice(data, method = "norm.predict", m = 1, print = FALSE) res[1, run, ] <- rmse(truedata, imp) imp <- mice(data, method = "norm.nob", print = FALSE) res[2, run, ] <- rmse(truedata, imp) } res } ## ----mse.sim6a, cache = TRUE--------------------------------------------- res2 <- simulate2(1000) apply(res2, c(1, 3), mean, na.rm = TRUE) ## ----ch3, child = "src/ch3.Rnw"------------------------------------------ ## ----init3, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch03-', self.contained = FALSE) suppressPackageStartupMessages(library(gamlss, warn.conflicts = FALSE, quietly = TRUE)) pkg <- c("mice", "lattice", "MASS", "ImputeRobust", "rpart") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) source("R/mice.impute.x.R") ## ----gas1, six=TRUE, echo=FALSE, fig.width=4.5, fig.height=6.75--------- library("MASS") data <- whiteside lwd <- 1.5 plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) points(x=5, y=3.6, pch=4, cex=2, lwd=lwd, col=mdc(2)) legend(x="bottomleft", legend="deleted observation", pch=4, col=mdc(2), pt.lwd=lwd, bty="n", pt.cex=2, cex = 1.6) text(x=9, y=6.5, label="a",cex=2) data[47,"Gas"] <- NA plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4)) points(5,4.04, lwd=lwd, col=mdc(2),pch=19) text(x=9, y=6.5, label="b",cex=2) plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) imp <- mice(data, m=1, maxit=0) pred <- imp$pred pred["Gas","Insul"] <- 0 imp <- mice(data, m=5, pred=pred, meth="norm.nob", maxit=1, print=FALSE, seed=45433) abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4)) points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19) text(x=9, y=6.5, label="c",cex=2) plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) imp <- mice(data, m=1, maxit=0) pred <- imp$pred pred["Gas","Insul"] <- 0 betadump <- vector("list", 0) imp <- mice(data, m=5, pred=pred, meth="normdump", maxit=1, print=FALSE, seed=83126) abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4)) betadump <- matrix(betadump, nc=2, byrow=TRUE) for (i in 1:5) abline(coef=unlist(betadump[i,]), col=mdc(5)) points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19) text(x=9, y=6.5, label="d",cex=2) pch <- c(rep(3,26),rep(1,30)) plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) imp <- mice(data, m=5, meth="norm", maxit=1, print=FALSE, seed=11727) abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit, subset=Insul=="Before"), col=mdc(4)) abline(m2<-lm(Gas~Temp, data=data, na.action=na.omit, subset=Insul=="After"), col=mdc(4)) points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19) legend(x="bottomleft", legend=c("before insulation","after insulation"), pch=c(3,1),bty="n", pt.lwd=lwd, cex = 1.6, pt.cex = 1, y.intersp = 0.6) text(x=9, y=6.5, label="e",cex=2) pch <- c(rep(3,26),rep(1,30)) plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", cex.lab = 1.6, axes = FALSE) axis(1, lwd = 0.7) axis(2, lwd = 0.7, las = 1) box(lwd = 0.7) betadump <- vector("list", 0) imp <- mice(data, m=5, meth="pmmdump", maxit=1, print=FALSE, seed=68006) betadump <- matrix(betadump, nc=3, byrow=TRUE) m1<-lm(Gas~Temp+Insul, data=data, na.action=na.omit) an <- coef(m1)[1] ai <- an + coef(m1)[3] b <- coef(m1)[2] abline(a=ai, b=b, col=mdc(4)) abline(a=an, b=b, col=mdc(4)) # for (i in 1:1) { # abline(a=unlist(betadump[i,1]), b=unlist(betadump[i,2]), col=mdc(5)) # abline(a=unlist(betadump[i,1])+unlist(betadump[i,3]), b=unlist(betadump[i,2]), col=mdc(5)) # } # points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2), pch=20) eta <- 0.6 ylo <- ai+b*(5-eta) yhi <- ai+b*(5+eta) lines(x=c(5-eta,5+eta),y=c(ylo,yhi),lwd=3,col=mdc(5)) xlo <- (ylo-an)/b xhi <- (yhi-an)/b lines(x=c(xlo,xhi),y=c(ylo,yhi),lwd=3,col=mdc(5)) donors <- subset(data, (Insul=="After"&Temp>5-eta&Temp<5+eta) | (Insul=="Before"&Temp>xlo&Temp 1 & age < 2, c("age","head")) names(data) <- c("age", "hc") truehist(data$hc, col = mdc(1), border = "white", xlab="Head circumference (cm)", ylab="Density", ylim=c(0,0.3), xlim=c(38,60), h = 0.5, axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) mn <- gamlss(hc~1, data=na.omit(data), family=NO, trace=FALSE) mu <- coef(mn) sigma <- exp(coef(mn, "sigma")) cmfine <- seq(38,60,0.1) lines(cmfine, dNO(cmfine, mu, sigma), lwd=0.8, lty=2) mt <- gamlss(hc~1, data=data, family=TF, trace=FALSE) mu <- coef(mt) sigma <- exp(coef(mt, "sigma")) nu <- exp(coef(mt, "nu")) lines(cmfine, dTF(cmfine, mu, sigma, nu), lwd=0.8, lty=1) legend(x="right",legend=c("normal","t, df=6.7"), lwd=0.8, lty=c(2,1), bty="n", cex=0.7) plot(x=data$age, y=data$hc, col=mdc(1), cex=0.3, xlab="Age (in years)", ylab="Head circumference (cm)", ylim=c(39,60), axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) box(lwd = 0.7) } ## ----linhc2, cache=TRUE, warning = FALSE--------------------------------- library("ImputeRobust") library("gamlss") data(db) data <- subset(db, age > 1 & age < 2, c("age", "head")) names(data) <- c("age", "hc") synthetic <- rep(c(FALSE, TRUE), each = nrow(data)) data2 <- rbind(data, data) row.names(data2) <- 1:nrow(data2) data2[synthetic, "hc"] <- NA imp <- mice(data2, m = 1, meth = "gamlssTF", seed = 88009, print = FALSE) syn <- subset(mice::complete(imp), synthetic) ## ----linhc3, echo=FALSE, duo = TRUE, fig.width=4.5, fig.height=2.25----- if (empty_figure) { plot.new() plot.new() } else { data <- syn truehist(data$hc, col=mdc(2), border="white", xlab="Head circumference (cm)", ylab="Density", ylim=c(0,0.3), xlim=c(38,60), h=0.5, axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) mn <- gamlss(hc~1, data=na.omit(data), family=NO, trace=FALSE) mu <- coef(mn) sigma <- exp(coef(mn, "sigma")) cmfine <- seq(38,60,0.1) lines(cmfine, dNO(cmfine, mu, sigma), lwd=0.8, lty=2) mt <- gamlss(hc~1, data=data, family=TF, trace=FALSE) mu <- coef(mt) sigma <- exp(coef(mt, "sigma")) nu <- exp(coef(mt, "nu")) lines(cmfine, dTF(cmfine, mu, sigma, nu), lwd=0.8, lty=1) legend(x="right",legend=c("normal",paste("t, df=",round(nu,1),sep="")), lwd=0.8, lty=c(2,1), bty="n", cex=0.7) plot(x=data$age, y=data$hc, col=mdc(2), cex=0.3, xlab="Age (in years)", ylab="Head circumference (cm)", ylim=c(39,60), axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) box(lwd = 0.7) } ## ----misspecify, echo=FALSE, duo = TRUE, fig.height=2.25, fig.width=4.5---- data <- boys[boys$age<=2,c("age","bmi")] set.seed(87120) data[sample(92:136,10),"bmi"] <- NA imp <- mice(data, meth="norm", m=1, seed=32212, print=FALSE) cd1 <- mice::complete(imp) imp <- mice(data, m=1, seed=32212, print=FALSE) cd2 <- mice::complete(imp) r <- !is.na(data$bmi) plot(data, col=mdc(1), xlab="Age", ylab="BMI", cex = 0.7, axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) box(lwd = 0.7) points(cd1[!r,], col=mdc(2), pch=19, cex=0.7) plot(data, col=mdc(1), xlab="Age", ylab="BMI", cex = 0.7, axes = FALSE) axis(1, lwd = 0.7, cex.axis = 0.7) axis(2, lwd = 0.7, las = 1, cex.axis = 0.7) box(lwd = 0.7) points(cd2[!r,], col=mdc(2), pch=19, cex=0.7) ## ----pmmfigure, echo=FALSE, solo=TRUE, fig.height=3.5, fig.width=3.5----- data <- whiteside lwd <- 1.5 data[47,"Gas"] <- NA pch <- c(rep(3,26),rep(1,30)) plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch, xlab=expression(paste("Temperature (", degree, "C)")), ylab="Gas consumption (cubic feet)", axes = FALSE) axis(1, lwd = 0.7, cex.axis = 1) axis(2, lwd = 0.7, las = 1, cex.axis = 1) box(lwd = 0.7) betadump <- vector("list", 0) imp <- mice(data, m=5, meth="pmmdump", maxit=1, print=FALSE, seed=68006) betadump <- matrix(unlist(betadump), nc=3, byrow=TRUE) m1<-lm(Gas~Temp+Insul, data=data, na.action=na.omit) an <- coef(m1)[1] ai <- an + coef(m1)[3] b <- coef(m1)[2] abline(a=ai, b=b, col=mdc(4)) abline(a=an, b=b, col=mdc(4)) # for (i in 56:56) { # abline(a=unlist(betadump[i,1]), b=unlist(betadump[i,2]), col=mdc(5)) # abline(a=unlist(betadump[i,1])+unlist(betadump[i,3]), b=unlist(betadump[i,2]), col=mdc(5)) # } # points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2), pch=20) eta <- 0.6 ylo <- ai+b*(5-eta) yhi <- ai+b*(5+eta) lines(x=c(5-eta,5+eta),y=c(ylo,yhi),lwd=3,col=mdc(4)) an <- 7.05; ai<-an-1.7; b <- -0.38 xlo1 <- (ylo-ai)/b xhi1 <- (yhi-ai)/b xlo2 <- (ylo-an)/b xhi2 <- (yhi-an)/b abline(a=an, b=b, col=mdc(5)) abline(a=ai, b=b, col=mdc(5)) lines(x=c(xlo1,xhi1),y=c(ylo,yhi),lwd=3,col=mdc(5)) lines(x=c(xlo2,xhi2),y=c(ylo,yhi),lwd=3,col=mdc(5)) abline(v=c(5-eta,5+eta),h=c(ylo,yhi),col=mdc(4),lty=3) rect(xlo1,0,xhi1,8,col=hcl(0,100,40,0.05),border=NA) rect(xlo2,0,xhi2,8,col=hcl(0,100,40,0.05),border=NA) donors <- subset(data, (Insul=="After"&Temp>xlo1&Tempxlo2&Temp 0) data[(cs[1]+1):cs[2],"Y1"] <- NA if (n[3] > 0) data[(cs[2]+1):cs[3],"Y2"] <- NA if (n[4] > 0) data[(cs[3]+1):cs[4],c("Y1","Y2")] <- NA return(data) } ## ----slow2--------------------------------------------------------------- impute <- function(data, m = 5, method = "norm", print = FALSE, maxit = 10, ...) { statistic <- matrix(NA, nrow = maxit, ncol = m) for (iter in 1:maxit) { if (iter==1) imp <- mice(data, m = m, method = method, print = print, maxit = 1, ...) else imp <- mice.mids(imp, maxit = 1, print = print, ...) statistic[iter, ] <- unlist(with(imp, cor(Y1, Y2))$analyses) } return(list(imp = imp, statistic = statistic)) } ## ----slow3--------------------------------------------------------------- simulate <- function( ns = matrix(c(1000, 500, 250, 100, 50, 0, rep(c(4500, 4750, 4875, 4950, 4975, 5000), 2), rep(0, 6)), nrow = 6), m = 5, maxit = 10, seed = 1, ...) { if (!missing(seed)) set.seed(seed) s <- cbind(rep(1:nrow(ns), each = maxit * m), apply(ns, 2, rep, each = maxit * m), rep(1:maxit, each = m), 1:m, NA) colnames(s) <- c("k", "n111", "n101", "n110", "n100", "iteration", "m", "rY1Y2") for (k in 1:nrow(ns)) { data <- generate(ns[k, ], ...) r <- impute(data, m = m, maxit = maxit, ...) s[s[,"k"] == k, "rY1Y2"] <- t(r$statistic) } return(data.frame(s)) } ## ----slow4, cache = TRUE------------------------------------------------- slow.demo <- simulate(maxit = 150, seed = 62771) ## ----slowplot, echo=FALSE, fig.height=5---------------------------------- labels <- c("90% missing", "95% missing", "97.5% missing", "99% missing", "99.5% missing", "100% missing") xyplot(rY1Y2 ~ iteration | as.factor(k), group = m, data = slow.demo, layout = c(3,2), type="l", as.table = TRUE, ylab = "Correlation between Y1 and Y2", xlab = "Iteration", col = mdc(3), scales = list(y = list(alternating = 1, tck = c(1, 0))), strip = strip.custom(bg = "grey95", style = 1, factor.levels = labels)) ## ----pubimpjm, eval=TRUE, cache=TRUE------------------------------------- select <- with(boys, age >= 8 & age <= 21.0) djm <- boys[select, -4] djm$gen <- as.integer(djm$gen) djm$phb <- as.integer(djm$phb) djm$reg <- as.integer(djm$reg) dfcs <- boys[select, -4] ## impute under jm and fcs jm <- mice(djm, method = "norm", seed = 93005, m = 10, print = FALSE) pmm <- mice(djm, method = "pmm", seed = 71332, m = 10, print = FALSE) fcs <- mice(dfcs, seed = 81420, m = 10, print = FALSE) ## ----pubfigjm, echo=TRUE, fig.width=6, fig.height=5-------------------- xyplot(jm, gen ~ age | as.factor(.imp), subset = .imp < 6, xlab = "Age", ylab = "Genital stage", col = mdc(1:2), ylim = c(0, 6)) ## ----pubfigfcs, echo=FALSE, fig.width=6, fig.height=5------------------- xyplot(fcs, gen ~ age | as.factor(.imp), subset = .imp < 6, xlab = "Age", ylab = "Genital stage", col = mdc(1:2), ylim = c(0, 6)) ## ----pubfig2, echo=FALSE, cache=TRUE------------------------------------ drawgen <- function(imp, var="gen", doround=FALSE) { m <- imp$m ires <- vector("list", m) for (i in 1:m){ cda <- mice::complete(imp,i) if (doround) cda[,var] <- round(squeeze(cda[,var],c(1,5))) count.boys(cda) ires[[i]] <-fitTannerStages(data=cda,var=var,nstages=5,counts=boy.count.gen) } ## plot the fitted models for the CCA and imputed analyses plot(c(8,21),c(0,1),type="n",xlab="Age (years)", ylab="Proportion",lab=c(10,12,5), las=1,cex=1,tck=-0.01) abline(0.1,0,lty=2) abline(0.5,0,lty=2) abline(0.9,0,lty=2) x <- seq(8.75,20.5,0.25) for (s in 1:4) { lines(x=x,y=boys.gen.cc[[s]][[2]]$predicted,lwd=6,col=mdc(1)) for (i in 1:m) { lines(x=x,y=ires[[i]][[s]][[3]]$predicted,lwd=1,col=mdc(2)) } } } four.gen <- function(formula=gen~age|reorder(factor(c("JM: multivariate normal","JM: rounded","FCS: predictive mean matching","FCS: proportional odds")),1:4), data=list(jm=jm, jmr=jm, pmm=pmm, fcs=fcs), doround=FALSE, layout=c(1,4), xlab="Age (years)", ylab="Proportion", as.table=TRUE, ...) { prepanel.gen <- function(x, y, ...){ list(xlim=c(9,20), ylim=c(0,1)) } panel.gen <- function(x, y, ...){ pan <- panel.number() doround <- ifelse(pan==2, TRUE, FALSE) imp <- data[[pan]] m <- imp$m ires <- vector("list", m) for (i in 1:m){ cda <- mice::complete(imp,i) if (doround) cda[,"gen"] <- round(squeeze(cda[,"gen"],c(1,5))) count.boys(cda) ires[[i]] <-fitTannerStages(data=cda,var="gen",nstages=5,counts=boy.count.gen) } panel.grid(h=-1, v=-1) # panel.abline(0.1,0,lty=2,...) # panel.abline(0.5,0,lty=2,...) # panel.abline(0.9,0,lty=2,...) x <- seq(8.75,20.5,0.25) for (s in 1:4) { panel.lines(x=x,y=boys.gen.cc[[s]][[2]]$predicted,lwd=4,col=mdc(1)) for (i in 1:m) { panel.lines(x=x,y=ires[[i]][[s]][[3]]$predicted,lwd=1,col=mdc(2)) } } } gen <- 0; age <- 0; x <- 0 ## complete cases idat <- data[[1]]$data idat <- idat[cci(idat),] count.boys(idat) boys.gen.cc <-fitTannerStages(data=idat,var="gen",nstages=5,counts=boy.count.gen) tp <- xyplot(formula, data, layout=layout, prepanel=prepanel.gen, panel=panel.gen, xlab=xlab, ylab=ylab, as.table=as.table, ...) return(tp) } tp<-four.gen(layout=c(2,2),ylab="",strip=function(...,bg) strip.default(..., bg="grey95")) print(tp) ## ----ex.slow1, eval=FALSE------------------------------------------------ slow2 <- simulate(ns = ns2, maxit = 50, seed = 62771) ## ----ch5, child = "src/ch5.Rnw"------------------------------------------ ## ----init5, echo=FALSE, results='hide'----------------------------------- opts_chunk$set(fig.path = 'fig/ch05-', self.contained = FALSE) pkg <- c("mice", "magrittr", "purrr", "dplyr") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----workflow1, warning = FALSE------------------------------------------ # mids workflow using saved objects library(mice) imp <- mice(nhanes, seed = 123, print = FALSE) fit <- with(imp, lm(chl ~ age + bmi + hyp)) est1 <- pool(fit) ## ----workflow2----------------------------------------------------------- # mids workflow using pipes library(magrittr) est2 <- nhanes %>% mice(seed = 123, print = FALSE) %>% with(lm(chl ~ age + bmi + hyp)) %>% pool() ## ----workflow3----------------------------------------------------------- # mild workflow using base::lapply est3 <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("all") %>% lapply(lm, formula = chl ~ age + bmi + hyp) %>% pool() ## ----workflow4----------------------------------------------------------- # mild workflow using pipes and base::Map est4 <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("all") %>% Map(f = lm, MoreArgs = list(f = chl ~ age + bmi + hyp)) %>% pool() ## ----workflow5, warning = FALSE------------------------------------------ # mild workflow using purrr::map library(purrr) est5 <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("all") %>% map(lm, formula = chl ~ age + bmi + hyp) %>% pool() ## ----workflow6----------------------------------------------------------- # long workflow using base::by est6 <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("long") %>% by(as.factor(.$.imp), lm, formula = chl ~ age + bmi + hyp) %>% pool() ## ----workflow7----------------------------------------------------------- # long workflow using a dplyr list-column library(dplyr) est7 <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("long") %>% group_by(.imp) %>% do(model = lm(formula = chl ~ age + bmi + hyp, data = .)) %>% as.list() %>% .[[-1]] %>% pool() ## ----workflow8----------------------------------------------------------- # incorrect workflow: averaging data, no pooling ave <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("long") %>% group_by(.id) %>% summarise_all(.funs = mean) %>% select(-.id, -.imp) est8 <- lm(formula = chl ~ age + bmi + hyp, data = ave) ## ----corave-------------------------------------------------------------- cor(ave) ## ----corimp-------------------------------------------------------------- cor <- nhanes %>% mice(seed = 123, print = FALSE) %>% mice::complete("all") %>% lapply(cor) Reduce("+", cor) / length(cor) ## ----stack--------------------------------------------------------------- est9 <- nhanes2 %>% mice(seed = 123, print = FALSE) %>% mice::complete("long") %>% lm(formula = chl ~ age + bmi + hyp) ## ----repeatedanalyses---------------------------------------------------- fit <- with(imp, lm(chl~bmi+age)) coef(fit$analyses[[1]]) coef(fit$analyses[[2]]) ## ----poolstandard-------------------------------------------------------- est <- pool(fit) summary(est) ## ----frequency----------------------------------------------------------- expr <- expression(freq <- table(hyp), freq[1] - freq[2]) fit <- with(imp, eval(expr)) unlist(fit$analyses) ## ----d1-1---------------------------------------------------------------- imp <- mice(nhanes2, m = 10, print = FALSE, seed = 71242) m2 <- with(imp, lm(chl ~ age + bmi)) pool(m2) ## ----d1-2---------------------------------------------------------------- m1 <- with(imp, lm(chl ~ bmi)) summary(D1(m2, m1)) ## ----d1-3---------------------------------------------------------------- D2(m2, m1) ## ----d3-1---------------------------------------------------------------- D3(m2, m1) ## ----select3, cache = TRUE, results = 'hide'----------------------------- data <- boys[boys$age >= 8, -4] imp <- mice(data, seed = 28382, m = 10, print = FALSE) scope <- list(upper = ~ age + hgt + wgt + hc + gen + phb + reg, lower = ~1) expr <- expression(f1 <- lm(tv ~ 1), f2 <- step(f1, scope = scope)) fit <- with(imp, expr) ## ----select3votes-------------------------------------------------------- formulas <- lapply(fit$analyses, formula) terms <- lapply(formulas, terms) votes <- unlist(lapply(terms, labels)) table(votes) ## ----select5------------------------------------------------------------- fit.without <- with(imp, lm(tv ~ age + gen + reg + phb)) fit.with <- with(imp, lm(tv ~ age + gen + reg + phb + hgt)) D1(fit.with, fit.without) ## ----select6------------------------------------------------------------- fit.without <- with(imp, lm(tv ~ age + gen + reg)) fit.with <- with(imp, lm(tv ~ age + gen + reg + phb)) D1(fit.with, fit.without) ## ----ch6, child = "src/ch6.Rnw"------------------------------------------ ## ----init6, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch6-', self.contained = FALSE) pkg <- c("mice", "lattice", "gamlss", "smcfcs") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) suppressPackageStartupMessages(library(AGD, warn.conflicts = FALSE, quietly = TRUE)) source("R/mi.hist.R") ## ----pred1--------------------------------------------------------------- imp <- mice(nhanes, print = FALSE) imp$predictorMatrix ## ----multi--------------------------------------------------------------- imp <- mice(cbind(nhanes, chl2 = 2 * nhanes$chl), print = FALSE, maxit = 1, m = 3, seed = 1) imp$loggedEvents ## ----multi2-------------------------------------------------------------- imp <- mice(cbind(nhanes, chl2 = 2 * nhanes$chl), print = FALSE, maxit = 1, m = 3, seed = 1, remove.collinear = FALSE) imp$loggedEvents ## ----chlchl, solo = TRUE, fig.width =7, echo = FALSE, fig.height = 7---- xyplot(imp, chl2 ~ chl | as.factor(.imp), pch = 21) ## ----derived1, cache=TRUE------------------------------------------------ data <- boys[, c("age", "hgt", "wgt", "hc", "reg")] imp <- mice(data, print = FALSE, seed = 71712) long <- mice::complete(imp, "long", include = TRUE) long$whr <- with(long, 100 * wgt / hgt) imp.itt <- as.mids(long) ## ----derived2, cache = TRUE---------------------------------------------- data$whr <- 100 * data$wgt / data$hgt imp.jav1 <- mice(data, seed = 32093, print = FALSE) ## ----derived3------------------------------------------------------------ tail(imp.jav1$loggedEvents, 3) ## ----derivedjavpred------------------------------------------------------ pred <- make.predictorMatrix(data) pred[c("wgt", "whr"), c("wgt", "whr")] <- 0 pred[c("hgt", "whr"), c("hgt", "whr")] <- 0 pred imp.jav2 <- mice(data, pred = pred, seed = 32093, print = FALSE) ## ----derived5, cache = TRUE---------------------------------------------- data <- boys[, c("age", "hgt", "wgt", "hc", "reg")] data$whr <- 100 * data$wgt / data$hgt meth <- make.method(data) meth["whr"] <- "~I(100 * wgt / hgt)" pred <- make.predictorMatrix(data) pred[c("wgt", "hgt"), "whr"] <- 0 imp.pas <- mice(data, meth = meth, pred = pred, print = FALSE, seed = 32093) ## ----passive, echo=FALSE, fig.width=7,fig.height=3.5------------------ if (empty_figure) { plot.new() } else { c0 <- cbind(model = "Impute, then transform", mice::complete(imp.itt)) c1 <- cbind(model = "JAV", mice::complete(imp.jav2)) c2 <- cbind(model = "Passive imputation", mice::complete(imp.pas)) cd <- rbind(c0, c1, c2) trellis.par.set(mice.theme()) tp <- xyplot(I(wgt / hgt * 100) ~ whr | model, data = cd, layout = c(3, 1), groups = rep(is.na(imp.itt$data$whr), 3), pch = c(21, 20), cex = c(0.3, 1), ylab = "Calculated Weight/Height (kg/m)", xlab = "Imputed Weight/Height (kg/m)") print(tp) } ## ----simratio------------------------------------------------------------ pop <- na.omit(boys[, c("age", "hgt", "wgt", "hc", "reg")]) pop$whr <- with(pop, 100 * wgt / hgt) broom::tidy(lm(hc ~ age + hgt + wgt + whr, data = pop)) ## ----ratiofcs, eval = FALSE---------------------------------------------- library(smcfcs) data <- pop data[sample(nrow(data), size = 100), "wgt"] <- NA data[sample(nrow(data), size = 100), "hgt"] <- NA data$whr <- 100 * data$wgt / data$hgt meth <- c("", "norm", "norm", "", "", "norm") imps <- smcfcs(originaldata = data, meth = meth, smtype = "lm", smformula = "hc ~ age + hgt + wgt + whr") fit <- lapply(imps$impDatasets, lm, formula = hc ~ age + hgt + wgt + whr) summary(pool(fit)) ## ----inter1, cache = TRUE------------------------------------------------ expr <- expression((wgt - 40) * (hc - 50)) boys$wgt.hc <- with(boys, eval(expr)) meth <- make.method(boys) meth["wgt.hc"] <- paste("~I(", expr, ")", sep = "") meth["bmi"] <- "" pred <- make.predictorMatrix(boys) pred[c("wgt", "hc"), "wgt.hc"] <- 0 imp.int <- mice(boys, m = 1, meth = meth, pred = pred, print = FALSE, seed = 62587, maxit = 10) ## ----interaction, echo=FALSE, duo=TRUE, fig.width=4.5, fig.height=2.25-------- if (empty_figure) { plot.new() } else { cd <- mice::complete(imp.int) lwd <- 0.6 miss <- is.na(imp.int$data$wgt.hc) plot(x=cd[!miss,c("wgt.hc","wgt")],col=mdc(1),lwd=0.8,cex=0.3, ylab="Weight (kg)", xlab="Interaction",pch=1, axes = FALSE) points(x=cd[miss,c("wgt.hc","wgt")],col=mdc(2),lwd=0.8,cex=0.8, pch=20) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) plot(x=cd[!miss,c("wgt.hc","hc")],col=mdc(1),lwd=0.8,cex=0.3, ylab="Head circumference (cm)", xlab="Interaction", axes = FALSE) points(x=cd[miss,c("wgt.hc","hc")],col=mdc(2),lwd=0.8,cex=0.8, pch=20) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) } ## ----compos1------------------------------------------------------------- set.seed(43112) n <- 400 Y1 <- sample(1:10, size = n, replace = TRUE) Y2 <- sample(1:20, size = n, replace = TRUE) Y3 <- 10 + 2 * Y1 + 0.6 * Y2 + sample(-10:10, size = n, replace = TRUE) Y <- data.frame(Y1, Y2, Y3) Y[1:100, 1:2] <- NA md.pattern(Y, plot = FALSE) ## ----compos2------------------------------------------------------------- Y123 <- Y1 + Y2 + Y3 Y12 <- Y123 - Y[,3] P1 <- Y[,1] / Y12 data <- data.frame(Y, Y123, Y12, P1) ## ----compos3------------------------------------------------------------- meth <- make.method(data) meth["Y1"] <- "~ I(P1 * Y12)" meth["Y2"] <- "~ I((1 - P1) * Y12)" meth["Y12"] <- "~ I(Y123 - Y3)" pred <- make.predictorMatrix(data) pred["P1", ] <- 0 pred[c("P1"), c("Y12", "Y3")] <- 1 imp1 <- mice(data, meth = meth, pred = pred, m = 10, print = FALSE) ## ----compos4------------------------------------------------------------- round(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], 2) ## ----composition, echo=FALSE, fig.width=7, fig.height=4--------------------- meth <- make.method(data) meth["Y1"] <- "~ I(P1 * Y12)" meth["Y2"] <- "~ I((1 - P1) * Y12)" meth["Y12"] <- "~ I(Y123 - Y3)" pred <- make.predictorMatrix(data) pred["P1", ] <- 0 pred[c("P1"), c("Y12", "Y3")] <- 1 imp1 <- mice(data, meth = meth, pred = pred, m = 1, print = FALSE) pred["P1", "Y3"] <- 0 imp2 <- mice(data, meth = meth, pred = pred, m = 1, print = FALSE) imp <- rbind(imp1, imp2) model <- rep(c("Y12 and Y3", "Y12 only"),each=n) tp <-xyplot(imp, P1~Y12|model, pch=c(1,19), xlab=expression(italic(Y)[1] + italic(Y)[2]), ylab = expression(italic(P)[1]), col=mdc(1:2), strip=strip.custom(factor.levels=c(expression(italic(Y)[12] + italic(Y)[3]), expression(italic(Y)[12])))) plot(tp) ## ----squeezeprep, eval=TRUE, echo=TRUE, results='hide'----------------------- data <- airquality[, 1:2] post <- make.post(data) post["Ozone"] <- "imp[[j]][, i] <- squeeze(imp[[j]][, i], c(1, 200))" imp <- mice(data, method = "norm.nob", m = 1, maxit = 1, seed = 1, post = post) ## ----squeeze, echo=FALSE, duo=TRUE, fig.width=4.5, fig.height=2.25---- lwd <- 1.5 data <- mice::complete(imp) Yobs <- airquality[, "Ozone"] Yimp <- data[, "Ozone"] mi.hist(Yimp, Yobs, b=seq(-20, 200, 10), type = "continuous", gray = FALSE, obs.lwd = lwd, mis.lwd = lwd, imp.lwd = lwd, obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent", mlt = 0.08, main = "", xlab = "Ozone (ppb)") box(lwd = 1) plot(data[cci(imp),2:1],col=mdc(1),lwd=lwd,cex=0.75, ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",ylim=c(-15,170), axes=FALSE) points(data[ici(imp),2:1],col=mdc(2),lwd=lwd,cex=0.75) axis(1, lwd = 1) axis(2, lwd = 1, las = 1) box(lwd = 1) ## ----doif---------------------------------------------------------------- post["Ozone"] <- "ifdo(c(Ozone < 1, Ozone > 200), c(1, 200))" ## ----post2, eval=TRUE, echo=FALSE, cache = TRUE-------------------------- post <- make.post(boys) post["gen"] <- "imp[[j]][data$age[!r[, j]] < 8, i] <- levels(boys$gen)[1]" post["phb"] <- "imp[[j]][data$age[!r[, j]] < 8, i] <- levels(boys$phb)[1]" post["tv"] <- "imp[[j]][data$age[!r[, j]] < 8, i] <- 1" free <- mice(boys, m = 1, seed = 85444, print = FALSE) restricted <- mice(boys, m = 1, post = post, seed = 85444, print = FALSE) ## ----plotgen, echo=FALSE, fig.width=7, fig.height=4--------------------- imp3 <- rbind(free, restricted) model <- rep(c("Free","Restricted"),each=nrow(boys)) tp <-xyplot(imp3, gen~age|model, pch=c(3,1), cex=c(3,1.5), ylab="Genital development", xlab="Age (years)", col=mdc(1:2)) print(tp) ## ----visit1-------------------------------------------------------------- imp.int$visitSequence ## ----visit2a, echo=FALSE------------------------------------------------- rm(boys) ## ----visit2-------------------------------------------------------------- vis <- c("hgt", "wgt", "hc", "wgt.hc", "gen", "phb", "tv", "reg") expr <- expression((wgt - 40) * (hc - 50)) boys$wgt.hc <- with(boys, eval(expr)) imp.int2 <- mice(boys, m = 1, maxit = 1, visitSequence = vis, meth = imp.int$meth, pred = imp.int$pred, seed = 23390) ## ----visit3-------------------------------------------------------------- imp.int2 <- mice(boys, m = 1, maxit = 1, vis = "monotone", meth = imp.int$meth, pred = imp.int$pred, seed = 23390) ## ----convergence1, echo = FALSE, cache = TRUE----------------------------- imp <- mice(nhanes, seed = 62006, maxit = 20, print = FALSE) print(plot(imp, lwd = 1)) ## ----convergence0, eval = FALSE, echo = TRUE--------------------------------- imp <- mice(nhanes, seed = 62006, maxit = 20, print = FALSE) plot(imp) ## ----convergence2, cache = TRUE------------------------------------------ meth <- make.method(boys) meth["bmi"] <- "~I(wgt / (hgt / 100)^2)" imp.bmi1 <- mice(boys, meth = meth, maxit = 20, print = FALSE, seed = 60109) ## ----convergence3, echo=FALSE------------------------------------------- print(plot(imp.bmi1, c("hgt", "wgt", "bmi"))) ## ----convergence4, cache = TRUE------------------------------------------ pred <- make.predictorMatrix(boys) pred[c("hgt", "wgt"), "bmi"] <- 0 imp.bmi2 <- mice(boys, meth = meth, pred = pred, maxit = 20, print = FALSE, seed = 60109) ## ----convergence5, echo=FALSE------------------------------------------- print(plot(imp.bmi2, c("hgt", "wgt", "bmi"))) ## ----wormplot5, echo=FALSE, results = 'hide'----------------------------- set.seed(24417) rm(boys) boys$wgt[sample(1:nrow(boys), nrow(boys) / 2)] <- NA imp <- mice(boys, m = 1, seed = 53882, print = FALSE) cd <- mice::complete(imp)[, -4] isobs <- !is.na(boys$wgt) cdobs <- cd[isobs, ] cdmis <- cd[!isobs, ] obs <- gamlss(wgt ~ age + hgt + hc + gen + phb + tv + reg, data = cdobs) mis <- gamlss(wgt ~ age + hgt + hc + gen + phb + tv + reg, data = cdmis) ## ----wormplot6, solo=TRUE, echo=FALSE, fig.width = 4.4, fig.height=4.4---- wp.twin(obs, mis, xvar = NULL, xvar.column = 2, n.inter = 9, col1 = mdc(4), col2 = mdc(5), ylim = 0.9, cex = 0.7, pch = 1) ## ----stripplot, echo=FALSE, fig.width = 7, fig.height=5---------------- imp <- mice(nhanes, seed = 29981, print = FALSE) stripplot(imp, pch = c(21, 20), cex = c(1, 1.5)) ## ----stripplot1, eval=FALSE, echo=TRUE----------------------------------- imp <- mice(nhanes, seed = 29981) stripplot(imp, pch = c(21, 20), cex = c(1, 1.5)) ## ----densityplot1, eval=FALSE, echo=TRUE--------------------------------- densityplot(imp, layout = c(3, 1)) ## ----densityplot, solo=TRUE, echo=FALSE, fig.width = 7, fig.height=3---- densityplot(imp, layout = c(3, 1), col = c(mdc(4), rep(mdc(5), 5))) ## ----propensityplot1, eval = FALSE--------------------------------------- fit <- with(imp, glm(ici(imp) ~ age + bmi + hyp + chl, family = binomial)) ps <- rep(rowMeans(sapply(fit$analyses, fitted.values)), imp$m + 1) xyplot(imp, bmi ~ ps | as.factor(.imp), xlab = "Probability that record is incomplete", ylab = "BMI", pch = c(1, 19), col = mdc(1:2)) ## ----propensityplot, solo=TRUE, echo=FALSE, fig.width = 7, fig.height=5---- fit <- with(imp, glm(ici(imp)~age+bmi+hyp+chl,family=binomial)) ps <- rep(rowMeans(sapply(fit$analyses, fitted.values)),imp$m+1) tp <- xyplot(imp, bmi~ps|as.factor(.imp), pch=c(1,19), xlab="Probability that record is incomplete", ylab="BMI", col=mdc(1:2)) print(tp) ## ----ch7, child = "src/ch7.Rnw"------------------------------------------ ## ----init7, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch7-', self.contained = FALSE) pkg <- c("mice", "micemd", "lme4", "tidyr", "dplyr", "purrr", "gridExtra") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) suppressPackageStartupMessages(library(miceadds, warn.conflicts = FALSE, quietly = TRUE)) suppressPackageStartupMessages(library(DPpackage, warn.conflicts = FALSE, quietly = TRUE)) suppressPackageStartupMessages(library(mitml, warn.conflicts = FALSE, quietly = TRUE)) ## ----mla.data0----------------------------------------------------------- library(mice) data("brandsma", package = "mice") d <- brandsma[, c("sch", "lpo", "sex", "den")] ## ----mla.data1----------------------------------------------------------- d <- brandsma[, c("sch", "lpo")] ## ----mla.data2----------------------------------------------------------- md.pattern(d, plot = FALSE) ## ----mla.empty1, cache=TRUE---------------------------------------------- library(miceadds) methods <- c("sample", "pmm", "2l.pan", "2l.norm", "2l.pmm") result <- vector("list", length(methods)) names(result) <- methods for (meth in methods) { d <- brandsma[, c("sch", "lpo")] pred <- make.predictorMatrix(d) pred["lpo", "sch"] <- -2 result[[meth]] <- mice(d, pred = pred, meth = meth, m = 10, maxit = 1, print = FALSE, seed = 82828) } ## ----mlsd, fig.width=4.5, fig.height=2.25, solo=TRUE, echo = FALSE---- group_by(d, sch) %>% summarise(sdg = sd(lpo, na.rm = TRUE)) %>% pull(sdg) %>% hist(main = "", xlab = "SD(language score) per school") ## ----mldist, echo = FALSE, fig.width = 6, fig.height = 8, solo = TRUE---- imp <- result[[1]] imp <- cbind(imp, mis = is.na(imp$data$lpo)) yobs <- mice::complete(imp, "long") %>% filter(.imp == 1) %>% group_by(sch) %>% mutate( nm = sum(mis), ng = cut(nm, c(0:2, 4, 100), right = FALSE, labels = c("0", "1", "2-3", ">3")), method = "observed") %>% filter(!mis) %>% ungroup() dl <- vector("list", length(methods)) names(dl) <- methods for (meth in methods) { imp <- result[[meth]] imp <- cbind(imp, mis = is.na(imp$data$lpo)) ym <- mice::complete(imp, "long") %>% group_by(sch, .imp) %>% mutate( nm = sum(mis), ng = cut(nm, c(0:2, 4, 100), right = FALSE, labels = c("0", "1", "2-3", ">3")), method = meth) %>% filter(mis) %>% ungroup() %>% mutate( sch = as.integer(sch) ) dl[[meth]] <- ym } ymis <- do.call(rbind.data.frame, dl) yom <- rbind.data.frame(yobs, ymis) yom$method <- factor(yom$method, levels = c("observed", "sample", "pmm", "2l.pan", "2l.norm", "2l.pmm")) trellis.par.set(box.rectangle = list(col = c(rep(mdc(1), 4), mdc(2)),lwd = 0)) trellis.par.set(box.umbrella = list(col = c(mdc(3)), lwd = 1.2)) trellis.par.set(plot.symbol = list(col = "white", lwd = 1)) colors <- c(mdc(4), rep(mdc(5), 5)) bwplot(ng ~ lpo | method, data = yom, do.out = FALSE, as.table = TRUE, layout = c( 1, 6), xlim = c(16, 64), xlab = "Language score (grade 8)", ylab = "Number of missing values per school", col = colors, pch = "|", panel = function(x, y, col = col, ...) { panel.grid(v = -1, h = 0) panel.bwplot(x, y, col = col, fill = col[packet.number()], ...) meds <- tapply(x, y, median) ylocs <- seq_along(meds) panel.segments(meds, ylocs - 1/4, meds, ylocs + 1/4, lwd = 2, col = "white") } ) ## ----mldens, echo = FALSE, fig.width = 6, fig.height = 6, solo = TRUE---- imp <- result[["2l.pan"]] imp2 <- cbind(imp, mis = is.na(imp$data$lpo)) ym <- mice::complete(imp2, "long", include = TRUE) %>% group_by(sch, .imp) %>% mutate( nm = sum(mis), ng = cut(nm, c(0:2, 4, 100), right = FALSE, labels = c("0", "1", "2-3", ">3")), method = meth) %>% ungroup() imp3 <- as.mids(cbind(mice::complete(imp2, "long", include = TRUE), ng = ym$ng)) p.pan <- densityplot(imp3, ~ lpo | ng, subset = ng %in% c("1", "2-3"), xlim = c(16, 64), ylim = c(0, 0.06), xlab = "Language score (grade 8)", ylab = "Density (2l.pan)") imp <- result[["2l.pmm"]] imp2 <- cbind(imp, mis = is.na(imp$data$lpo)) ym <- mice::complete(imp2, "long", include = TRUE) %>% group_by(sch, .imp) %>% mutate( nm = sum(mis), ng = cut(nm, c(0:2, 4, 100), right = FALSE, labels = c("0", "1", "2-3", ">3")), method = meth) %>% ungroup() imp3 <- as.mids(cbind(mice::complete(imp2, "long", include = TRUE), ng = ym$ng)) p.pmm <- densityplot(imp3, ~ lpo | ng, subset = ng %in% c("1", "2-3"), xlim = c(16, 64), ylim = c(0, 0.06), xlab = "Language score (grade 8)", ylab = "Density (2l.pmm)") grid.arrange(p.pan, p.pmm) ## ----toenail.2l.3, cache=TRUE-------------------------------------------- library(tidyr) data("toenail", package = "DPpackage") data <- tidyr::complete(toenail, ID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-month) table(data$outcome, useNA = "always") ## ----toenail.2l.4, cache=TRUE-------------------------------------------- pred <- make.predictorMatrix(data) pred["outcome", "ID"] <- -2 imp <- mice(data, method = "2l.bin", pred = pred, seed = 12102, maxit = 1, m = 5, print = FALSE) table(mice::complete(imp)$outcome, useNA = "always") ## ----toenailprofiles, echo = FALSE, solo = TRUE, fig.width = 6, fig.height = 6---- trellis.par.set(strip.background=list(col="grey95")) ids <- data %>% group_by(ID) %>% summarize(n = sum(is.na(outcome))) %>% filter(n >= 4) %>% pull(ID) long <- mice::complete(imp, "long", include = TRUE) miss <- is.na(long[long$.imp == 0, "outcome"]) miss <- ifelse(miss, 2, 1) long <- cbind(long, miss = miss) long <- long[long$.imp != 0, ] ids <- c(9, 12, 13, 117, 51, 168, 188, 230, 21, 31, 214, 309, 41, 45, 48, 99) tp1 <- xyplot(outcome ~ visit | factor(ID), data = long, type = "p", groups = miss, subset = ID %in% ids, col = mdc(1:2), ylab = "Severe infection (Y/N)", xlab = "Visit", pch = 19, cex = 0.9, as.table = TRUE) fit <- with(imp, lme4::glmer(outcome ~ treatment * visit + (1 | ID), family = binomial)) prob <- imp %>% mice::complete("all") %>% purrr::map(lme4::glmer, formula = outcome ~ treatment * visit + (1 | ID), family = binomial) %>% purrr:::map(predict, type = "response") %>% unlist() long$prob <- prob tp2 <- xyplot(prob ~ visit | factor(ID), data = long, type = "l", lwd = 1.5, groups = factor(.imp), subset = ID %in% ids, col = c(mdc(3), mdc(3), "#009E73", mdc(3), mdc(3)), ylab = "Severe infection (Y/N)", xlab = "Visit", as.table = TRUE) print(tp2) print(tp1, newpage = FALSE) ## ----toenail.est, cache=TRUE--------------------------------------------- library(purrr) mice::complete(imp, "all") %>% purrr::map(lme4::glmer, formula = outcome ~ treatment * visit + (1 | ID), family = binomial) %>% pool() %>% summary() ## ----mla.data3----------------------------------------------------------- data("brandsma", package = "mice") dat <- brandsma[, c("sch", "pup", "lpo", "iqv", "ses", "ssi")] ## ----mdp, echo = FALSE, solo = TRUE, fig.height=5------------------ z <- md.pattern(dat) ## ----mla.empty10, cache=TRUE--------------------------------------------- d <- brandsma[, c("sch", "lpo")] pred <- make.predictorMatrix(d) pred["lpo", "sch"] <- -2 imp <- mice(d, pred = pred, meth = "2l.pmm", m = 10, maxit = 1, print = FALSE, seed = 152) ## ----mla.empty5---------------------------------------------------------- library(lme4) fit <- with(imp, lmer(lpo ~ (1 | sch), REML = FALSE)) summary(pool(fit)) ## ----mla.empty6---------------------------------------------------------- library(mitml) testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp ## ----mla.ri1, cache=TRUE------------------------------------------------- d <- brandsma[, c("sch", "lpo", "iqv")] pred <- make.predictorMatrix(d) pred["lpo", ] <- c(-2, 0, 3) pred["iqv", ] <- c(-2, 3, 0) imp <- mice(d, pred = pred, meth = "2l.pmm", seed = 919, m = 10, print = FALSE) ## ----mla.ri1b, eval = FALSE---------------------------------------------- d$lpo <- as.vector(scale(d$lpo, scale = FALSE)) ## ----mla.ri2, cache = TRUE----------------------------------------------- fm1 <- lpo + iqv ~ 1 + (1 | sch) mit <- mitml::panImpute(data = d, formula = fm1, m = 5, silent = TRUE) ## ----mla.ri3, cache = TRUE----------------------------------------------- blk <- make.blocks(d, "collect") fm2 <- list(collect = fm1) imp2 <- mice(d, meth = "panImpute", blocks = blk, form = fm2, print = FALSE, seed = 711) ## ----mla.ri4------------------------------------------------------------- fit <- with(imp, lmer(lpo ~ iqv + (1 | sch), REML = FALSE)) summary(pool(fit)) testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp ## ----mla.riwb1----------------------------------------------------------- res <- mice::complete(imp, "long") %>% group_by(sch, .imp) %>% mutate(iqm = mean(iqv)) %>% group_by(.imp) %>% do(model = lmer(lpo ~ iqv + iqm + (1 | sch), REML = FALSE, data = .)) %>% as.list() %>% .[[-1]] summary(pool(res)) testEstimates(res, var.comp = TRUE)$var.comp ## ----mla.ril2p1---------------------------------------------------------- d <- brandsma[, c("sch", "lpo", "iqv", "den")] meth <- make.method(d) meth[c("lpo", "iqv", "den")] <- c("2l.pmm", "2l.pmm", "2lonly.pmm") pred <- make.predictorMatrix(d) pred["lpo", ] <- c(-2, 0, 3, 1) pred["iqv", ] <- c(-2, 3, 0, 1) pred["den", ] <- c(-2, 1, 1, 0) imp <- mice(d, pred = pred, meth = meth, seed = 418, m = 10, print = FALSE) ## ----mla.ril2p3, cache = TRUE-------------------------------------------- d$den <- as.factor(d$den) fml <- list(lpo + iqv ~ 1 + (1 | sch), den ~ 1) mit <- mitml::jomoImpute(data = d, formula = fml, m = 10, silent = TRUE) ## ----mla.ril2p4, cache = TRUE-------------------------------------------- blk <- make.blocks(d, "collect") fm2 <- list(collect = fml) imp2 <- mice(d, meth = "jomoImpute", blocks = blk, form = fm2, print = FALSE, seed = 418, maxit = 1, m = 10, n.burn = 100) ## ----mladens, echo = FALSE, solo = TRUE, fig.height=6, fig.width=6---- # convert den back to numeric for plotting z <- mice::complete(imp2, "long", include = TRUE) z$den <- as.numeric(levels(z$den))[z$den] imp3 <- as.mids(z) p.jomo <- densityplot(imp3, ~ lpo + den, ylab = "jomoImpute", ylim = list(c(0, 0.05), c(0, 1.5))) p.pmm <- densityplot(imp, ~ lpo + den, ylab = "2l.pmm", ylim = list(c(0, 0.05), c(0, 1.5))) p.jomo2 <- update(p.jomo, xlim = list(c(16, 64), c(0, 5))) p.pmm2 <- update(p.pmm, xlim = list(c(16, 64), c(0, 5))) grid.arrange(p.jomo2, p.pmm2) ## ----mla.ril2p6---------------------------------------------------------- fit <- with(imp, lmer(lpo ~ 1 + iqv + as.factor(den) + (1 | sch), REML = FALSE)) summary(pool(fit)) testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp ## ----mla.int1------------------------------------------------------------ d <- brandsma[, c("sch", "lpo", "iqv", "sex", "den")] d <- data.frame(d, lpm = NA, iqm = NA, sxm = NA, iqd = NA, lpd = NA, iqd.sex = NA, lpd.sex = NA, iqd.lpd = NA, iqd.den = NA, sex.den = NA, lpd.den = NA, iqm.den = NA, sxm.den = NA, lpm.den = NA) ## ----mla.int2------------------------------------------------------------ # level-1 variables meth <- make.method(d) meth[c("lpo", "iqv", "sex")] <- "2l.pmm" pred <- make.predictorMatrix(d) pred[,] <- 0 pred[, "sch"] <- -2 codes <- c(3, 3, rep(1, 6)) pred["lpo", c("iqv", "sex", "iqd.sex", "sex.den", "iqd.den", "den", "iqm.den", "sxm.den")] <- codes pred["iqv", c("lpo", "sex", "lpd.sex", "sex.den", "lpd.den", "den", "lpm.den", "sxm.den")] <- codes pred["sex", c("lpo", "iqv", "iqd.lpd", "lpd.den", "iqd.den", "den", "iqm.den", "lpm.den")] <- codes ## ----mla.int2b----------------------------------------------------------- # level-2 variables meth["den"] <- "2lonly.pmm" pred["den", c("lpo", "iqv", "sex", "iqd.sex", "lpd.sex", "iqd.lpd")] <- 1 ## ----mla.int2c, echo = FALSE--------------------------------------------- t(pred[c("lpo", "iqv", "sex", "den"), c("sch", "lpo", "iqv", "sex", "den", "iqd.sex", "lpd.sex", "iqd.lpd", "iqd.den", "sex.den", "lpd.den", "iqm.den", "sxm.den", "lpm.den")]) ## ----mla.int4------------------------------------------------------------ # derive group means meth[c("iqm", "sxm", "lpm")] <- "2l.groupmean" pred[c("iqm", "sxm", "lpm"), c("iqv", "sex", "lpo")] <- diag(3) # derive deviations from cluster mean meth["iqd"] <- "~ I(iqv - iqm)" meth["lpd"] <- "~ I(lpo - lpm)" ## ----mla.int4a----------------------------------------------------------- # derive interactions meth["iqd.sex"] <- "~ I(iqd * sex)" meth["lpd.sex"] <- "~ I(lpd * sex)" meth["iqd.lpd"] <- "~ I(iqd * lpd)" meth["iqd.den"] <- "~ I(iqd * den)" meth["sex.den"] <- "~ I(sex * den)" meth["lpd.den"] <- "~ I(lpd * den)" meth["iqm.den"] <- "~ I(iqm * den)" meth["sxm.den"] <- "~ I(sxm * den)" meth["lpm.den"] <- "~ I(lpm * den)" ## ----mla.int5, cache=TRUE------------------------------------------------ visit <- c("lpo", "lpm", "lpd", "lpd.sex", "iqd.lpd", "lpd.den", "lpm.den", "iqv", "iqm", "iqd", "iqd.sex", "iqd.lpd", "iqd.den", "iqm.den", "sex", "sxm", "iqd.sex", "lpd.sex", "sex.den", "sxm.den", "den", "iqd.den", "sex.den", "lpd.den", "iqm.den", "sxm.den", "lpm.den") imp <- mice(d, pred = pred, meth = meth, seed = 188, visit = visit, m = 10, print = FALSE, allow.na = TRUE) ## ----mla.int7------------------------------------------------------------ long <- mice::complete(imp, "long", include = TRUE) long$den <- as.factor(long$den) imp2 <- as.mids(long) fit <- with(imp2, lmer(lpo ~ 1 + iqv*sex + iqm*den + sex*den + (1 | sch), REML = FALSE)) summary(pool(fit)) ## ----mla.rs1, cache=TRUE------------------------------------------------- d <- brandsma[, c("sch", "lpo", "iqv")] d$lpo <- as.vector(scale(d$lpo, scale = FALSE)) pred <- make.predictorMatrix(d) pred["lpo", ] <- c(-2, 0, 4) pred["iqv", ] <- c(-2, 4, 0) pred imp <- mice(d, pred = pred, meth = "2l.pmm", seed = 441, m = 10, print = FALSE, maxit = 20) ## ----mla.rs2------------------------------------------------------------- imp2 <- mice::complete(imp, "long", include = TRUE) %>% group_by(sch) %>% mutate(iqm = mean(iqv, na.rm = TRUE), lpo = lpo + mean(brandsma$lpo, na.rm = TRUE)) %>% as.mids() fit <- with(imp2, lmer(lpo ~ iqv + iqm + (1 + iqv | sch), REML = FALSE)) summary(pool(fit)) testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp ## ----mla.rs3------------------------------------------------------------- d <- brandsma[, c("sch", "lpo", "iqv", "ses")] d$lpo <- as.vector(scale(d$lpo, scale = FALSE)) d <- data.frame(d, iqv.ses = NA, ses.lpo = NA, iqv.lpo = NA, lpm = NA, iqm = NA, sem = NA, iqv.iqm = NA, ses.sem = NA, lpo.lpm = NA, iqv.sem = NA, iqv.lpm = NA, ses.iqm = NA, ses.lpm = NA, lpo.iqm = NA, lpo.sem = NA, iqm.sem = NA, lpm.sem = NA, iqm.lpm = NA) ## ----mla.rs4, echo = FALSE----------------------------------------------- pred <- make.predictorMatrix(d) meth <- make.method(d) # level-1 variables meth[c("lpo", "iqv", "ses")] <- "2l.pmm" pred["lpo", ] <- c(-2, 0, 3, 3, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0) pred["iqv", ] <- c(-2, 3, 0, 3, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0) pred["ses", ] <- c(-2, 3, 3, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1) t(pred[c("lpo", "iqv", "ses"), -(8:10)]) ## ----mla.rs6------------------------------------------------------------- meth[c("iqm", "sem", "lpm")] <- "2l.groupmean" pred[c("iqm", "sem", "lpm"), ] <- 0 pred["iqm", c("sch", "iqv")] <- c(-2, 1) pred["sem", c("sch", "ses")] <- c(-2, 1) pred["lpm", c("sch", "lpo")] <- c(-2, 1) ## ----mla.rs5------------------------------------------------------------- meth["iqv.ses"] <- "~ I(iqv * ses)" meth["iqv.lpo"] <- "~ I(iqv * lpo)" meth["ses.lpo"] <- "~ I(ses * lpo)" ## ----mla.rs7, echo = FALSE----------------------------------------------- meth["iqv.iqm"] <- "~ I(iqv * iqm)" meth["ses.sem"] <- "~ I(ses * sem)" meth["lpo.lpm"] <- "~ I(lpo * lpm)" meth["iqv.sem"] <- "~ I(iqv * sem)" meth["iqv.lpm"] <- "~ I(iqv * lpm)" meth["ses.iqm"] <- "~ I(ses * iqm)" meth["ses.lpm"] <- "~ I(ses * lpm)" meth["lpo.iqm"] <- "~ I(lpo * iqm)" meth["lpo.sem"] <- "~ I(lpo * sem)" meth["iqm.sem"] <- "~ I(iqm * sem)" meth["iqm.lpm"] <- "~ I(iqm * lpm)" meth["lpm.sem"] <- "~ I(lpm * sem)" ## ----mla.rs8, cache=TRUE------------------------------------------------- visit <- c("lpo", "iqv.lpo", "ses.lpo", "lpm", "lpo.lpm", "iqv.lpm", "ses.lpm", "lpo.iqm", "lpo.sem", "iqm.lpm", "lpm.sem", "iqv", "iqv.ses", "iqv.lpo", "iqm", "iqv.iqm", "iqv.sem", "iqv.lpm", "ses.iqm", "lpo.iqm", "iqm.sem", "iqm.lpm", "ses", "iqv.ses", "ses.lpo", "sem", "ses.sem", "iqv.sem", "ses.iqm", "ses.lpm", "lpo.sem", "iqm.sem", "lpm.sem") imp <- mice(d, pred = pred, meth = meth, seed = 211, visit = visit, m = 10, print = FALSE, maxit = 10, allow.na = TRUE) ## ----mla.rs9, warning=FALSE---------------------------------------------- fit <- with(imp, lmer(lpo ~ iqv * ses + iqm * sem + iqv * iqm + iqv * sem + ses * iqm + ses * sem + (1 + ses + iqv | sch), REML = FALSE)) ## ----mla.rs10------------------------------------------------------------ summary(pool(fit)) testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp ## ----ch8, child = "src/ch8.Rnw"------------------------------------------ ## ----init8, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch8-', self.contained = FALSE) pkg <- c("mice", "dplyr", "gridExtra") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----ice.definedata------------------------------------------------------ ideal <- data.frame( x = c(68, 76, 66, 81, 70, 72, 81, 72), y1 = c(14, 0, 1, 2, 3, 1, 10, 9), y0 = c(13, 6, 4, 5, 6, 6, 8, 8), row.names = c("John", "Caren", "Joyce", "Robert", "Ruth", "Nick", "Peter", "Torey") ) # assign first three units to trt data <- ideal data[1:3, "y0"] <- NA data[4:8, "y1"] <- NA ## ----ice.firsttry-------------------------------------------------------- library(mice) data2 <- data[, -1] imp <- mice(data2, method = "norm", seed = 188, print = FALSE) ## ----iceplot1, fig.width=7, fig.height=3.5, echo=FALSE, solo=TRUE------- trellis.par.set(list(layout.heights = list(strip = 1.3))) stripplot(imp, y1 + y0 ~ as.factor(.imp), pch = c(1, 19), col = c(mdc(1:2)), ylim = c(-100, 100)) ## ----ice.corr1----------------------------------------------------------- sapply(mice::complete(imp, "all"), function(x) cor(x$y1, x$y0)) ## ----ice.makeprior------------------------------------------------------- set.seed(84409) rho <- 0.9 mu <- mean(unlist(data2[, c("y1", "y0")]), na.rm = TRUE) sigma2 <- var(unlist(data2), na.rm = TRUE) cv <- rho * sigma2 s2 <- matrix(c(sigma2, cv, cv, sigma2), nrow = 2) prior <- data.frame(MASS::mvrnorm(n = 100, mu = rep(mu, 2), Sigma = s2)) names(prior) <- c("y1", "y0") ## ----ice.stack----------------------------------------------------------- # combine data and prior stacked <- dplyr::bind_rows(prior, data2, .id = "d") stacked$d <- as.numeric(stacked$d) - 1 ## ----ice.impute---------------------------------------------------------- stacked$tau <- stacked$y1 - stacked$y0 pred <- make.predictorMatrix(stacked) pred[, "tau"] <- 0 meth <- c("", "norm", "norm", "~ I(y1 - y0)") imp <- mice(stacked, maxit = 100, pred = pred, meth = meth, print = FALSE) ## ----icetrace, fig.width=7, fig.height=5, echo=FALSE, solo=TRUE---- plot(imp) ## ----iceplot2, fig.width=7, fig.height=3.5, echo=FALSE, solo=TRUE------- stripplot(imp, y1 + y0 ~ as.factor(.imp), pch = c(1, 19), col = c("grey70", mdc(2)), ylim = c(-5, 15)) ## ----ice.corr2----------------------------------------------------------- sapply(mice::complete(imp, "all"), function(x) { x <- x[x$d == 1, ]; cor(x$y0, x$y1)}) ## ----potential1, fig.height = 5, echo = FALSE--------------------------- xyplot(imp, y1 ~ y0 | as.factor(.imp), layout = c(3, 2), groups = d, col = c("grey70", mdc(2)), pch = c(1, 19)) ## ----potential1.show, eval = FALSE-------------------------------------- xyplot(imp, y1 ~ y0 | as.factor(.imp), layout = c(3, 2), groups = d, col = c("grey70", mdc(2)), pch = c(1, 19)) ## ----ice.iceplotfunction, echo = FALSE----------------------------------- iceplot <- function(data, rho, ynames = c("y1", "y0"), maxit = 100, m =100, seed = NULL) { # no covariates if (!is.null(seed)) set.seed(seed) # define prior dataset mu <- mean(unlist(data[, ynames]), na.rm = TRUE) s2 <- matrix(c(1, rho, rho, 1), nrow = 2) prior <- data.frame(MASS::mvrnorm(n = 100, mu = rep(mu, 2), Sigma = s2)) names(prior) <- ynames # combine data and prior stacked <- dplyr::bind_rows(prior, data[, ynames], .id = "d") stacked$d <- as.numeric(stacked$d) - 1 stacked$tau <- stacked[, ynames[1]] - stacked[, ynames[2]] # define imputation model pred <- make.predictorMatrix(stacked) pred[, "tau"] <- 0 meth <- c("", "norm", "norm", "~ I(y1 - y0)") imp <- mice(stacked, maxit = maxit, pred = pred, meth = meth, m = m, print = FALSE) long <- mice::complete(imp, "long") long <- long[long$d == 1, ] long$pat <- factor(long$.id, labels = row.names(data)) p <- parallelplot(~ long[, 4:5] | pat, data = long, horizontal.axis = FALSE, common.scale = TRUE, layout = c(8, 1), as.table = TRUE, between = list(x = 0.8, y = 0.5), par.settings = mice.theme(), col = mdc(2), lwd = 1, varnames = c("1", "0"), scales = list(cex = 0.8)) p } ## ----ice.calciceplots, cache=TRUE, echo = FALSE-------------------------- p50 <- iceplot(data, rho = 0.5, maxit = 10, seed = 1) p90 <- iceplot(data, rho = 0.9, seed = 1) p99 <- iceplot(data, rho = 0.99, seed = 1) ## ----fanplot, fig.width=7, fig.height=10, solo = TRUE, echo = FALSE---- library(gridExtra, warn.conflicts = FALSE) if (empty_figure) { plot.new() } else { grid.arrange(p50, p90, p99, nrow = 3) } ## ----ch9, child = "src/ch9.Rnw"------------------------------------------ ## ----init9, echo = FALSE, results = 'hide'------------------------------- opts_chunk$set(fig.path = 'fig/ch9-', self.contained = FALSE) pkg <- c("mice", "MASS", "foreign") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----c85imputeblind, eval=FALSE------------------------------------------ library(mice) ## DO NOT DO THIS imp <- mice(data) # not recommended ## ----c85readdata1-------------------------------------------------------- library(foreign) file.sas <- "data/c85/master85.xport" original.sas <- read.xport(file.sas) names(original.sas) <- tolower(names(original.sas)) dim(original.sas) ## ----c85readdata2-------------------------------------------------------- # remove 15 columns (text, administrative) all <- names(original.sas) drop <- c(3, 22, 58, 162:170, 206:208) keep <- !(1:length(all) %in% drop) leiden85 <- original.sas[original.sas$abr == "1", keep] data <- leiden85 ## ----c85inspect1--------------------------------------------------------- ini <- mice(data, maxit = 0) # recommended table(ini$nmis) ## ----c85inspect2--------------------------------------------------------- table(data$beroep1, useNA = "always") ## ----c85inspect4a-------------------------------------------------------- v1 <- names(ini$nmis[ini$nmis == 0]) outlist1 <- v1[c(1, 3:5, 7:10, 16:47, 51:60, 62, 64:65, 69:72)] length(outlist1) ## ----c85flux, solo=TRUE, echo=FALSE, fig.height=4, fig.width=4---------- if (empty_figure) { fx <- fluxplot(data, main=NULL, cex=0.7, lwd = 0.6, col = mdc(4), plot = FALSE) plot.new() } else { fx <- fluxplot(data, main=NULL, cex=0.7, lwd = 0.6, col = mdc(4)) } ## ----c85inspect6--------------------------------------------------------- outlist2 <- row.names(fx)[fx$outflux < 0.5] length(outlist2) ## ----c85inspect7, eval=TRUE---------------------------------------------- data2 <- data[, !names(data) %in% outlist2] fx2 <- flux(data2) outlist3 <- row.names(fx2)[fx2$outflux < 0.5] ## ----c85inspect8--------------------------------------------------------- head(ini$loggedEvents, 2) tail(ini$loggedEvents, 2) ## ----c85inspect9--------------------------------------------------------- outlist4 <- as.character(ini$loggedEvents[, "out"]) ## ----c85inspect10-------------------------------------------------------- outlist <- unique(c(outlist1, outlist2, outlist4)) length(outlist) ## ----c85inspect11-------------------------------------------------------- data2 <- data[, !names(data) %in% outlist] ## ----c85quickpred-------------------------------------------------------- inlist <- c("sex", "lftanam", "rrsyst", "rrdiast") pred <- quickpred(data2, minpuc = 0.5, include = inlist) ## ----c85predinspect1----------------------------------------------------- table(rowSums(pred)) ## ----c85predinspect2----------------------------------------------------- rowSums(pred[c("rrsyst", "rrdiast"),]) ## ----c85predinspect3, eval=FALSE----------------------------------------- names(data2)[pred["rrsyst", ] == 1] ## ----c85predinspect4, eval=FALSE----------------------------------------- vname <- "rrsyst" y <- cbind(data2[vname], r =! is.na(data2[, vname])) vdata <- data2[,pred[vname,] == 1] round(cor(y = y, x = vdata, use = "pair"), 2) ## ----c85fetch, echo=FALSE------------------------------------------------ load("data/c85/imp.blind") load("data/c85/imp.smart") imp.qp <- imp.smart imp <- imp.blind rm(imp.blind, imp.smart) ## ----c85imputesmart, eval=FALSE------------------------------------------ imp.qp <- mice(data2, pred = pred, seed = 29725) ## ----blindvsquickpred, echo=FALSE, fig.width=6, fig.height=4-------------- vnames <- c("rrsyst", "rrdiast") cd1 <- mice::complete(imp)[, vnames] cd2 <- mice::complete(imp.qp)[, vnames] typ <- factor(rep(c("blind imputation","quickpred"), each = nrow(cd1))) mis <- ici(data2[,vnames]) mis <- is.na(imp$data$rrsyst)|is.na(imp$data$rrdiast) cd <- data.frame(typ=typ,mis=mis,rbind(cd1, cd2)) if (empty_figure) { plot.new() } else { tp <- xyplot(jitter(rrdiast,10) ~ jitter(rrsyst,10) | typ, data = cd, groups = mis, xlab = "Systolic BP (mmHg)", ylab = "Diastolic BP (mmHg)", col=c(mdc(1),mdc(2)), pch=c(1,19),type=c("g","p"), strip = strip.custom(bg="grey95"), scales = list(alternating=1, tck=c(1,0))) print(tp) } ## ----c85trellis, eval=FALSE---------------------------------------------- vnames <- c("rrsyst", "rrdiast") cd1 <- mice::complete(imp)[, vnames] cd2 <- mice::complete(imp.qp)[, vnames] typ <- factor(rep(c("blind imputation", "quickpred"), each = nrow(cd1))) mis <- ici(data2[, vnames]) mis <- is.na(imp$data$rrsyst) | is.na(imp$data$rrdiast) cd <- data.frame(typ = typ, mis = mis, rbind(cd1, cd2)) xyplot(jitter(rrdiast, 10) ~ jitter(rrsyst, 10) | typ, data = cd, groups = mis, col = c(mdc(1), mdc(2)), xlab = "Systolic BP (mmHg)", type = c("g","p"), ylab = "Diastolic BP (mmHg)", pch = c(1, 19), strip = strip.custom(bg = "grey95"), scales = list(alternating = 1, tck = c(1, 0))) ## ----c85nelson----------------------------------------------------------- dat <- cbind(data2, dead = 1 - data2$dwa) hazard <- nelsonaalen(dat, survda, dead) ## ----c85nelsoncorrelation, eval=FALSE, echo=FALSE------------------------ tmp <- data.frame(hazard, t = data2$survda, logt = log(data2$survda), SBP = data2$rrsyst, DBP = data2$rrdiast) round(cor(tmp, use = "pair"), 3) ## ----c85km, echo=FALSE, fig.height=4------------------------------------- library(survival) fit <- survfit(Surv(survda / 365, 1 - dwa) ~ is.na(rrsyst), data = data2) lwd <- 0.6 plot(fit, lty = 1, lwd = 0.9, xlab = "Years since Intake", ylab = "K-M Survival Probability", col = c(mdc(4), mdc(5)), mark.time = FALSE, axes = FALSE) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) text(4, 0.7, "BP measured") text(2, 0.3, "BP missing") ## ----c85undamped, eval=FALSE--------------------------------------------- delta <- c(0, -5, -10, -15, -20) post <- imp.qp$post imp.all.undamped <- vector("list", length(delta)) for (i in 1:length(delta)) { d <- delta[i] cmd <- paste("imp[[j]][,i] <- imp[[j]][,i] +", d) post["rrsyst"] <- cmd imp <- mice(data2, pred = pred, post = post, maxit = 10, seed = i * 22) imp.all.undamped[[i]] <- imp } ## ----c85damper, eval=FALSE----------------------------------------------- cmd <- paste("fit <- lm(y ~ as.matrix(x)); damp <- sqrt(1 - summary(fit)$r.squared); imp[[j]][, i] <- imp[[j]][, i] + damp * ", d) ## ----c85readimp, echo=FALSE---------------------------------------------- load("data/c85/imp.all.damper2") imp.all.damped <- imp.all.damper2 rm(imp.all.damper2) ## ----c85cda-------------------------------------------------------------- cda <- expression( sbpgp <- cut(rrsyst, breaks = c(50, 124, 144, 164, 184, 200, 500)), agegp <- cut(lftanam, breaks = c(85, 90, 95, 110)), dead <- 1 - dwa, coxph(Surv(survda, dead) ~ C(sbpgp, contr.treatment(6, base = 3)) + strata(sexe, agegp))) imp <- imp.all.damped[[1]] fit <- with(imp, cda) ## ----c85hazardratio------------------------------------------------------ as.vector(exp(summary(pool(fit))[, 1])) ## ----readdata7a, echo=FALSE---------------------------------------------- library(mice) bmi <- function(h, w) w / (h / 100)^2 krul <- selfreport[selfreport$src == "krul",] males <- krul[krul$sex=="Male",] ## ----plotbmi, fig.width=4.5, fig.height=3,solo=TRUE,echo=FALSE---------- lwd <- 0.6 xy <- xy.coords(krul$bm, krul$br-krul$bm) if (empty_figure) { plot.new() } else { plot(xy, col = mdc(1), xlab = "Measured BMI", ylab = "Self-Reported - Measured BMI", xlim = c(17, 45), ylim = c(-5,5), type = "n",lwd = lwd, axes = FALSE) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) polygon(x = c(30, 20, 30), y = c(0, 10, 10), col = "grey95", border = NA) polygon(x = c(30, 40, 30), y = c(0, -10, -10), col = "grey95", border = NA) abline(0, 0, lty = 2, lwd = lwd) points(xy, col = mdc(1),cex = 0.7) lines(lowess(xy), lwd = 1.5, col = mdc(4)) text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3) box(lwd = lwd) } ## ----plotexplain,fig.height=3, fig.width=3,solo=TRUE,echo=FALSE---------- lwd = 0.6 fit <- lm(bm~br,data=males) plot(x=males$br,y=males$bm, xlim=c(26,34),ylim=c(26,34), xlab="Self-Reported BMI", ylab="Measured BMI",col=mdc(1), cex.lab=1, cex = 0.7, bty = "n", axes = FALSE) axis(side = 1, lwd = lwd) axis(side = 2, lwd = lwd, las = 1) box(lwd = lwd) abline(h=30,v=30,lty=2, lwd = lwd) abline(coef(fit),col=mdc(4), lwd = lwd) abline(v=(30-coef(fit)[1])/coef(fit)[2],col=mdc(4), lwd = lwd) text(1:4,x=c(33.8,33.8,26.2,26.2),y=c(33.8,26.2,26.2,33.8),cex=2.5) text(c("a","b"),x=c(29.1,29.7,29.1,29.7),y=c(34.1,34.1,26.4,26.4),cex=1.8,adj=c(0.5,1)) ## ----srcpattern---------------------------------------------------------- data <- selfreport[, c("age", "sex", "hm", "hr", "wm", "wr")] md.pattern(data, plot = FALSE) ## ----imputebmi, cache=TRUE----------------------------------------------- bmi <- function(h, w) w / (h / 100)^2 meth <- make.method(selfreport) meth[c("prg", "edu", "etn")] <- "" meth["bm"] <- "~ bmi(hm, wm)" pred <- make.predictorMatrix(selfreport) pred[, c("src", "id", "pop", "prg", "edu", "etn", "web", "bm", "br")] <- 0 imp <- mice(selfreport, pred = pred, meth = meth, m = 10, seed = 66573, maxit = 20, print = FALSE) ## ----plotimpbmi, fig.width=4.5, fig.height=3,solo=TRUE,echo=FALSE------- lwd <- 0.6 cd <- mice::complete(imp, 1) xy <- xy.coords(cd$bm, cd$br - cd$bm) if (empty_figure) { plot.new() } else { plot(xy, col = mdc(2), xlab = "Measured BMI", ylab = "Self-Reported - Measured BMI", xlim = c(17, 45), ylim = c(-5, 5), type = "n", lwd = lwd, axes = FALSE) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) polygon(x = c(30, 20, 30), y = c(0, 10, 10), col = "grey95", border = NA) polygon(x = c(30, 40, 30), y = c(0, -10, -10), col = "grey95", border = NA) abline(0, 0, lty = 2, lwd = lwd) idx <- cd$src == "krul" xyc <- xy; xyc$x <- xy$x[idx]; xyc$y <- xy$y[idx] xys <- xy; xys$x <- xy$x[!idx]; xys$y <- xy$y[!idx] points(xyc, col = mdc(1), cex=0.7) points(xys, col = mdc(2), cex=0.7) lines(lowess(xyc), col = mdc(4), lwd = 1.5) lines(lowess(xys), col = mdc(5), lwd = 1.5) text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3) box(lwd = lwd) } ## ----walkinginit, echo=FALSE--------------------------------------------- library(mice) library(foreign) dataproject <- "Data/wad" .store <- file.path(dataproject,"R/store/") file <- file.path(dataproject, "original/walklink.sav") original <- read.spss(file=file, to.data.frame=TRUE) data <- original names(data) <- c("src","sex","age","sip1","haq8","grs9") data$src <- factor(data$src, levels=2:3, labels=c("Ergo","Euri")) data$sip1 <- as.factor(data$sip1) data$haq8 <- as.ordered(data$haq8) data$grs9 <- as.ordered(data$grs9) ## ----walkingimpute1------------------------------------------------------ fA <- c(242, 43, 15, 0, 6) fB <- c(145, 110, 29, 8) YA <- rep(ordered(c(0:3, NA)), fA) YB <- rep(ordered(c(0:3)), fB) Y <- rbind(data.frame(YA, YB = ordered(NA)), data.frame(YB, YA = ordered(NA))) ## ----walkingpattern, echo=FALSE, fig.height=2.5-------------------------- z <- md.pattern(Y) ## ----walkingimpute2------------------------------------------------------ ra <- function(x, simplify = FALSE) { if (!is.mira(x)) return(NULL) ra <- x$analyses if (simplify) ra <- unlist(ra) return(ra) } micemill <- function(n) { for (i in 1:n) { imp <<- mice.mids(imp, print = FALSE) cors <- with(imp, cor(as.numeric(YA), as.numeric(YB), method = "kendall")) tau <<- rbind(tau, ra(cors, simplify = TRUE)) } } ## ----walkingimpute3, cache = TRUE---------------------------------------- tau <- NULL imp <- mice(Y, maxit = 0, m = 10, seed = 32662, print = FALSE) micemill(50) ## ----walkingimpute5a, eval = FALSE--------------------------------------- plotit <- function() matplot(x = 1:nrow(tau), y = tau, ylab = expression(paste("Kendall's ", tau)), xlab = "Iteration", type = "l", las = 1) plotit() ## ----walkingimpute5b, solo=TRUE, echo=FALSE, fig.width = 6, fig.height = 3---- plotit2 <- function(lwd = 0.6) { matplot(x = 1:nrow(tau), y = tau, ylab = expression(paste("Kendall's ", tau)), xlab = "Iteration", type = "l", axes = FALSE, lwd = lwd, cex.lab = 1.25) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) } plotit2() ## ----walkingimpute5d, fig.height=3, echo=FALSE--------------------------- z <- md.pattern(walking) ## ----wasimpute7a, cache = TRUE------------------------------------------- tau <- NULL pred <- make.predictorMatrix(walking) pred[, c("src", "age", "sex")] <- 0 imp <- mice(walking, maxit = 0, m = 10, pred = pred, seed = 92786, print = FALSE) micemill(20) ## ----walkingimpute7b, echo=FALSE, fig.height=3, fig.width=6, solo=TRUE---- plotit2() ## ----walkingimpute8, eval = FALSE---- props <- with(imp, mean(YB[src == "A"] == '0')) thetaAB <<- rbind(thetaAB, ra(props, simplify = TRUE)) ## ----walkingimpute9, echo=FALSE------------------------------------------ micemill <- function(n) { for (i in 1:n) { imp <<- mice.mids(imp, print = FALSE) cors <- with(imp, cor(as.numeric(YA[src=="A"]), as.numeric(YB[src=="A"]), method="kendall")) tau <<- rbind(tau, ra(cors, s=TRUE)) # global assignment means <- with(imp, mean(as.numeric(YA[src=="A"]), na.rm=TRUE)) thetaBA <<- rbind(thetaBA, ra(means, s=TRUE)-1) props <- with(imp, mean(YB[src=="A"]=='0')) thetaAB <<- rbind(thetaAB, ra(props, s=TRUE)) tabs <- with(imp, ftable(addmargins( table(YA[src=="A"],YB[src=="A"], useNA="ifany", dnn=c("YA","YB"))))) } } thetaBA <- NULL ## ----walkingimpute10, cache = TRUE--------------------------------------- tau <- NULL thetaAB <- NULL pred2 <- pred1 <- make.predictorMatrix(walking) pred1[, c("src", "age", "sex")] <- 0 pred2[, "src"] <- 0 imp <- mice(walking, maxit = 0, m = 10, pred = pred1, seed = 99786) micemill(20) imp <- mice(walking, maxit = 0, m = 10, pred = pred2) micemill(20) ## ----walkingplotthetaAB, fig.height=3, fig.width=6, solo=TRUE, echo=FALSE---- lwd <- 0.6 matplot(x = 1:nrow(thetaAB), y = thetaAB, xlab = "Iteration", ylab = expression(hat(theta)[AB]), type = "l", axes = FALSE, lwd = lwd, cex.lab = 1.25, ylim = c(0.42, 0.67)) axis(1, lwd = lwd) axis(2, lwd = lwd, las = 1) box(lwd = lwd) abline(h = 0.497, v = 20, lty = 2, lwd = lwd, col = "grey") text(x = 5, y = 0.488, expression(hat(theta)[BB]), adj = 0) text(x = 5, y = 0.430, "Without covariates", adj = 0) text(x = 25, y = 0.430, "With covariates", adj = 0) arrows(x0 = 4.5, y0 = 0.488, x1 = 0, y1 = 0.495, length = 0.1, angle = 20) points(x = -0.4, y = 0.497, pch = 20) ## ----ch10, child = "src/ch10.Rnw"---------------------------------------- ## ----init10, echo = FALSE, results = 'hide'------------------------------ opts_chunk$set(fig.path = 'fig/ch10-', self.contained = FALSE) pkg <- c("mice", "haven") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----popreaddata, echo=FALSE--------------------------------------------- ## Incomplete POPS-19 data (n=959, nvar=88) file.spss <- file.path("data/pop/incompleet_v3.sav") pops <- read_spss(file=file.spss) pops <- as_factor(pops) data <- pops[,-(87:88)] ## Read predictor matrix file.pred <- file.path("data/pop/predictor matrix_v3.txt") pred <- read.table(file=file.pred, sep="\t") pred <- as.matrix(pred[-c(87:89)]) ## Label data vnames <- tolower(names(data)) names(data) <- vnames dimnames(pred) <- list(vnames, vnames) ## some edits pred[1,] <- 0 pred[,1] <- 0 ## ----popimpute, cache=TRUE----------------------------------------------- imp1 <- mice(data, pred = pred, maxit = 20, seed = 51121, print = FALSE) ## ----popstrace, echo=FALSE, fig.height=4------------------------------- trellis.par.set(list(layout.heights = list(strip = 3))) plot(imp1, c("a10u"), layout = c(2, 1)) trellis.par.set(list(layout.heights = list(strip = 1.3))) ## ----popsbwplot1, echo=FALSE, fig.height=4------------------------------- thickBoxSettings <- list(box.rectangle=list(lwd=1.5), box.umbrella=list(lwd=1.5)) bwplot(imp1, iq + coping ~ .imp, par.settings = thickBoxSettings) ## ----impute2, cache=TRUE------------------------------------------------- pred[61:86, 61:86] <- 0 imp2 <- mice(data, pred = pred, maxit = 20, seed = 51121, print = FALSE) ## ----popsbwplot2, echo=FALSE, fig.height=4------------------------------- bwplot(imp2, iq + coping ~ .imp, par.settings = thickBoxSettings) ## ----vlginit, echo=FALSE------------------------------------------------- dataproject <- "Data/vlg" .store <- file.path(dataproject,"R/store/") ## ----vlgaugment---------------------------------------------------------- nimp <- c(400, 600, 75, 300, 200, 400) regcat <- c("North", "City", "North", "East", "North", "City") reg <- rep(regcat, nimp) nimp2 <- floor(rep(nimp, each = 2)/2) nimp2[5:6] <- c(38, 37) sex <- rep(rep(c("boy", "girl"), 6), nimp2) minage <- rep(c(0, 0, 10, 10, 14, 14), nimp) maxage <- rep(c(10, 10, 14, 14, 21, 21), nimp) set.seed(42444) age <- runif(length(minage), minage, maxage) id <- 600001:601975 data("fdgs", package = "mice") pad <- data.frame(id, reg, age, sex, hgt = NA, wgt = NA, hgt.z = NA, wgt.z = NA) data <- rbind(fdgs, pad) ## ----sdsbyregion, echo=FALSE, fig.height=4------------------------- ## inspect the age by region pattern means <- aggregate(data$hgt.z, by=list(reg=data$reg, age=floor(data$age)), mean, na.rm=TRUE) col <- c("skyblue", "green", "red", "brown", "grey") tp <- xyplot(x~age, means, group=reg, type=c("g","l"), lwd = 1.3, lty=1:5, col = col, pch = 19, xlim=c(-1,22), ylim=c(-0.6, 0.8), ylab="Height (SDS)", xlab="Age (years)", key=list( text=list(levels(means$reg)), lines=list(lty=1:5, col = col), x=0.1, y=0.98, background="white", columns=3, between.columns=0), scales=list(x=list(tck=c(1,0)), y=list(tck=c(1,0)))) print(tp) ## ----vlgimpute, cache = TRUE--------------------------------------------- form <- list(hgt.z ~ reg + age + sex + wgt.z + I((age - 10) * wgt.z) + age * reg, wgt.z ~ reg + age + sex + hgt.z + I((age - 10) * hgt.z) + age * reg) imp <- mice(data, meth = "norm", form = form, m = 10, maxit = 20, seed = 28107, print = FALSE) ## ----vlginspect, echo=FALSE, fig.height=4-------------------------------- cda <- mice::complete(imp, "long", include=TRUE) means2 <- aggregate(cda$hgt.z, by=list(reg=cda$reg, age=floor(cda$age), imp=cda$.imp), mean, na.rm=TRUE) xyplot(x~age|reg,means2, group=imp, subset=(reg=="North"|reg=="City"), type=c("g","l"),lwd=c(4, rep(1,imp$m)), lty=1:5, col=c(mdc(4), rep(mdc(6),imp$m)), ylab="Height (SDS)", xlab="Age (years)", ylim=c(-0.5,0.8), xlim=c(-2,23), scales=list(x=list(alternating=FALSE, tck=c(1,0)), y=list(tck=c(1,0))), strip = strip.custom(bg="grey95")) ## ----finalheight, echo=FALSE, fig.height=4------------------------ load("data/vlg/boyslms") load("data/vlg/girlslms") ## plot the mcurves for 16+ lms <- boyslms mcurvesm <- data.frame(sex="Boys", imp=rep(0:10,each=nrow(lms[[1]])), age=rep(lms[[1]]$age,11), m=c(lms[[1]]$m, lms[[2]]$m, lms[[3]]$m, lms[[4]]$m, lms[[5]]$m, lms[[6]]$m, lms[[7]]$m, lms[[8]]$m, lms[[9]]$m, lms[[10]]$m, lms[[11]]$m)) lms <- girlslms mcurvesf <- data.frame(sex="Girls", imp=rep(0:10,each=nrow(lms[[1]])), age=rep(lms[[1]]$age,11), m=c(lms[[1]]$m, lms[[2]]$m, lms[[3]]$m, lms[[4]]$m, lms[[5]]$m, lms[[6]]$m, lms[[7]]$m, lms[[8]]$m, lms[[9]]$m, lms[[10]]$m, lms[[11]]$m)) mcurves <- rbind(mcurvesm, mcurvesf) tpm <- xyplot(m~age|sex, groups=imp, data=mcurves, xlab="Age (years)", ylab="Height (cm)", col=c(mdc(4),rep(mdc(6),10)), lwd=c(5,rep(0.8,10)), lty=c(2,rep(1,10)), type=c("l"), xlim=c(15.8,21.2), ylim=list(c(178,185),c(165,172)), scales=list(y = list(relation="free", at=list(seq(from=178,to=185,2), seq(165,172,2))), x = list(alternating=FALSE), tck=c(1,0)), panel = function(...) { panel.abline(h=165:185, v=16:21, col="grey90") panel.xyplot(...) }, strip = strip.custom(bg="grey95")) print(tpm) ## ----ch11, child = "src/ch11.Rnw"---------------------------------------- ## ----init11, echo = FALSE, results = 'hide'------------------------------ opts_chunk$set(fig.path = 'fig/ch11-', self.contained = FALSE) pkg <- c("mice", "haven", "lme4", "splines") loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) ## ----fddpattern, duo = TRUE, fig.height=4, echo=FALSE-------------------- yvars <- c("yc1", "yc2", "yc3", "yp1", "yp2", "yp3") z <- md.pattern(fdd[fdd$pp == "Y", yvars]) z <- md.pattern(fdd[fdd$pp == "N", yvars]) ## ----fddpred------------------------------------------------------------- vars <-c("ypa1", "ypb1", "ypc1", "ypa2", "ypb2", "ypc2", "ypa3", "ypb3", "ypc3") fdd.pred[vars[1:3], vars] ## ----fddimpute, cache = TRUE, warning = FALSE---------------------------- meth <- make.method(fdd) meth["yc1"] <- "~I(yca1 + ycb1 + ycc1)" meth["yc2"] <- "~I(yca2 + ycb2 + ycc2)" meth["yc3"] <- "~I(yca3 + ycb3 + ycc3)" meth["yp1"] <- "~I(ypa1 + ypb1 + ypc1)" meth["yp2"] <- "~I(ypa2 + ypb2 + ypc2)" meth["yp3"] <- "~I(ypa3 + ypb3 + ypc3)" imp <- mice(fdd, pred = fdd.pred, meth = meth, maxit = 20, seed = 54434, print = FALSE) ## ----fddreshape---------------------------------------------------------- lowi <- mice::complete(imp, "long", inc=TRUE) lowi <- data.frame(lowi,cbcl2=NA, cbin2=NA,cbex2=NA) lolo <- reshape(lowi, idvar = 'id', varying = 11:ncol(lowi), direction = "long", new.row.names = 1:(nrow(lowi)*3), sep="") lolo <- lolo[order(lolo$.imp, lolo$id, lolo$time),] row.names(lolo) <- 1:nrow(lolo) ## ----fddplotimp, echo = FALSE-------------------------------------------- iv <- is.na(lolo[lolo$.imp==0,]$yp) ivn <- ifelse(iv,1,0) col12 <- c("grey80","grey80", mdc(2),mdc(1), mdc(2),"transparent", mdc(2),"transparent", mdc(2),"transparent", mdc(2),"transparent") ic <- unique(lolo$id[iv]) ss <- lolo$id %in% ic grp <- 2*as.integer(lolo$.imp) - ivn loloss <- data.frame(lolo, grp=grp) trellis.par.set(strip.background=list(col="grey95")) trellis.par.set(list(layout.heights = list(strip = 1))) tp1 <- xyplot(yp~time|factor(id), data=loloss, type="l", layout = c(4,4), groups=factor(.imp), col="grey80", subset=ss, ylab="UCLA-RI Parent Score", pch=19, cex=1, xlab="Time", xlim=c("T1","T2","T3"), as.table=TRUE) print(tp1) tp2 <- xyplot(yp~time|factor(id), data=loloss, type="p", layout = c(4,4), groups=grp, col=col12, subset=ss, ylab="UCLA-RI Parent Score", pch=19, cex=0.8, xlab="Time", xlim=c("T1","T2","T3"), as.table=TRUE) print(tp2, newpage=FALSE) ## ----fddplotparent, fig.height=3.5, echo=FALSE--------------------------- means <- aggregate(lolo$yp, list(lolo$.imp!=0,lolo$trt,lolo$time), mean, na.rm=TRUE) names(means) <- c(".imp","trt","time","yp") levels(means$trt) <- c("EMDR","CBT") tp <- xyplot(yp~time|trt, data=means, type="o", pch=19, groups=factor(.imp), col=c(mdc(4), mdc(6)), ylab="UCLA-RI Parent Score", lwd=2, xlab="Time", xlim=c("T1","T2","T3")) print(tp) ## ----fddplotchild, fig.height=3.5, echo=FALSE---------------------------- means <- aggregate(lolo$yc, list(lolo$.imp!=0,lolo$trt,lolo$time), mean, na.rm=TRUE) names(means) <- c(".imp","trt","time","yc") levels(means$trt) <- c("EMDR","CBT") tp <- xyplot(yc~time|trt, data=means, type="o", pch=19, groups=factor(.imp), col=c(mdc(4),mdc(6)), ylab="UCLA-RI Child Score", lwd=2, xlab="Time", xlim=c("T1","T2","T3")) print(tp) ## ----tbcinit, echo=FALSE------------------------------------------------- load("data/tbc/imp.1745") load("data/tbc/fit.hgt") load("data/tbc/fit.wgt") load("data/tbc/fit.bmi") original <- read_spss(file = "data/tbc/long_spss2splus.sav") names(original) <- c("id", "age", "occ", "nocc", "sex", "hgt", "wgt", "bmi", "hgt.z", "wgt.z", "bmi.z", "wfh.z", "sys140") broad <- read_spss(file = "data/tbc/broad_spss2splus.sav") broad <- as_factor(broad) names(broad) <- tolower(names(broad)) target <- data.frame(id=broad$koppel, ao = broad$ovjv, bmi.z.jv = broad$b_sdjv) # merge the target variables original <- merge(original, target, all.x = TRUE) ### define subsets subset <- !is.na(original$age) & !(is.na(original$hgt.z) & is.na(original$wgt.z) & is.na(original$bmi.z)) tbc <- original[subset, c("id","occ","nocc","age","sex", "hgt.z","wgt.z","bmi.z","ao")] tbc <- tbc[order(tbc$id, tbc$age),] ### define useful administrative variables ord <- tbc$id first <- diff(ord)>=1 typ <- factor(rep("obs",nrow(tbc)),levels=c("obs","sup","pred")) tbc <- data.frame(tbc[,1:3],first=c(TRUE,first),typ,tbc[4:ncol(tbc)]) ### make small dataset six <- c(1259,7019,2447,7460,8046,7646) set.seed(23221) sample <- unique(c(six,sample(unique(tbc$id),300))) idx <- (tbc$id) %in% sample data <- tbc[idx,] ### select all data data <- tbc ### remove those with nocc 1 or 2 or 3 data <- data[data$nocc >= 3,] ### missing data heat map tbc <- data ## ----tbcspline----------------------------------------------------------- library(splines) data <- tbc ### specify break ages brk <- c(0, 8/365, 1/3, 1, 2, 6, 10, 18, 29) k <- length(brk) ### calculate B-spline X <- bs(data$age, knots = brk, B = c(brk[1], brk[k] + 0.0001), degree = 1) X <- X[,-(k + 1)] dimnames(X)[[2]] <- paste("x", 1:ncol(X), sep = "") data <- cbind(data, X) round(head(X, 3), 2) ## ----tbcfitstick, eval=FALSE--------------------------------------------- library(lme4) fit <- lmer(wgt.z ~ 0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + (0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 | id), data = data) ### calculate size and increment per person tsiz <- t(ranef(fit)$id) + fixef(fit) tinc <- diff(tsiz) round(head(t(tsiz)), 2) ## ----tbctimewarp, echo=FALSE--------------------------------------------- warp.setup <- data.frame(age = brk, age2 = seq(0, 29, length.out=k)) warp.model <- lm( age2 ~ bs(age, knots = brk[c(-1, -k)], degree = 1) - 1, data = warp.setup, x = TRUE, y = TRUE) warped.knots <- warp.model$y maxage <- max(warped.knots) age2 <- predict(warp.model, newdata = data) data <- cbind(data, age2 = age2) rm(age2) id <- unique(data$id) data2 <- appendbreak(data, brk, id = id, warp.model = warp.model, typ = "sup") table(data2$typ) ## ----tbcdecimal, echo=FALSE---------------------------------------------- options(digits = 2) ## ----tbcappenddata------------------------------------------------------- head(data2) ## ----tbcimpmeth---------------------------------------------------------- Y <- c("hgt.z", "wgt.z", "bmi.z") meth <- make.method(data2) meth[1:length(meth)] <- "" meth[Y] <- "2l.pan" ## ----tbcimppred---------------------------------------------------------- pred <- make.predictorMatrix(data2) pred[1:nrow(pred), 1:ncol(pred)] <- 0 pred[Y, "id"] <- (-2) pred[Y, "sex"] <- 1 pred[Y, paste("x", 2:9, sep = "")] <- 1 pred[Y[1], Y[2]] <- 1 pred[Y[2], Y[1]] <- 1 pred[Y[3], Y[1:2]] <- 1 ## ----tbcimpvis, echo=FALSE----------------------------------------------- vis <- c("hgt.z","wgt.z","bmi.z") ## ----tbcimpmice, eval=FALSE---------------------------------------------- imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10, maxit = 10, seed = 52711, print = FALSE) ## ----tbcplotimp, echo=FALSE--------------------------------------------- cd <- mice::complete(imp.1745, "long") sup <- cd[cd$typ=="sup", ] # sup <- cd[cd$typ=="imp",-c(2,10:12)] # sup$ao <- NA data3 <- data.frame(.imp=0,data2) data3 <- rbind(data3, sup[,-2]) # prepare for plotting idx <- data3$id %in% six pd <- data3[idx,] pd$id <- as.factor(pd$id) pd$grp <- pd$.imp pd$grp[pd$grp==0] <- NA pd$grp[pd$typ=="obs"] <- 11 pd$grp <- reorder(pd$grp, as.numeric(pd$grp)) # now plot tbcimp <- xyplot(wgt.z~age2|id, data=pd, xlim=c(-1,maxage+1), ylim=c(-4,4), as.table = TRUE, scales = list(x=list(at=warped.knots, labels=c("0","8d","4m","1y","2y","6y","10y","18y","29y"))), xlab = "Age", ylab = "Weight SDS", groups=grp, layout = c(2,3), pch=c(rep(20,10), 19), type=c(rep("l",10),"p"), lwd=c(rep(1,10),1), col=c(rep(mdc(5),10),mdc(1)), distribute.type=TRUE, panel = function(...) { panel.abline(v=warped.knots, lty=2, col="grey80") panel.abline(h=c(-2,0,2), lty=1, col="grey80") panel.xyplot(...) }, strip=strip.custom(bg="grey95") ) print(tbcimp) ## ----tbchw, eval=TRUE, echo=FALSE--------------------------------------- ### prepare for plotting cd <- mice::complete(imp.1745, 1) idx <- (cd$id) %in% sample cd <- cd[idx,] shingle <- cut(cd$age,breaks=c(brk,29.01),right=FALSE, inc=TRUE, labels=c("0d-8d","8d-4m","4m-1y","1y-2y","2y-6y", "6y-10y","10y-18y","18y-29y","29y")) if (empty_figure) { plot.new() } else { tbchw <- xyplot(wgt.z~hgt.z|shingle,data=cd, xlim=c(-4,4), ylim=c(-4,4), type=c("p","g"), group=(typ=="sup"),pch=c(1,20), col=c(mdc(1:2)), xlab="Height SDS", ylab="Weight SDS",pty="s", strip=strip.custom(bg="grey95")) print(tbchw) } ## ----tbccomplete,echo=FALSE,eval=FALSE----------------------------------- imp <- imp.1745 cd <- mice::complete(imp,"long") sup <- cd[cd$typ=="sup",] sup$age <- round(sup$age,2) sup$hgt.z <- round(sup$hgt.z,2) sup$wgt.z <- round(sup$wgt.z,2) sup$bmi.z <- round(sup$bmi.z,2) lowi <- reshape(sup, idvar=c('id','.imp'), timevar = 'age', v.names=c('hgt.z','wgt.z','bmi.z'), direction="wide", drop=c(".id","occ","first","typ","hgt","wgt","bmi", paste("x",1:9,sep=""),"age2")) hsiz <- lowi[,c(".imp","id","nocc","sex", "hgt.z.0","hgt.z.0.02","hgt.z.0.33", "hgt.z.1","hgt.z.2","hgt.z.6","hgt.z.10","hgt.z.18", "hgt.z.29")] wsiz <- lowi[,c(".imp","id","nocc","sex", "wgt.z.0","wgt.z.0.02","wgt.z.0.33", "wgt.z.1","wgt.z.2","wgt.z.6","wgt.z.10","wgt.z.18", "wgt.z.29")] bsiz <- lowi[,c(".imp","id","nocc","sex", "bmi.z.0","bmi.z.0.02","bmi.z.0.33", "bmi.z.1","bmi.z.2","bmi.z.6","bmi.z.10","bmi.z.18", "bmi.z.29")] # merge outcome data bsiz <- merge(bsiz, target,all.x=TRUE) hinc <- cbind(hsiz[,1:4],t(diff(t(hsiz[,-1:-4])))) winc <- cbind(wsiz[,1:4],t(diff(t(wsiz[,-1:-4])))) binc <- cbind(bsiz[,1:4],t(diff(t(bsiz[,-1:-4])))) # merge outcome data binc <- merge(binc, target,all.x=TRUE) bmi.z.0.02 <- by(binc, binc$.imp, function(x) lm(bmi.z.0.02~ao, data = x, na.action='na.omit')) bmi.z.0.33 <- by(binc, binc$.imp, function(x) lm(bmi.z.0.33~ao, data = x, na.action='na.omit')) bmi.z.1 <- by(binc, binc$.imp, function(x) lm(bmi.z.1 ~ao, data = x, na.action='na.omit')) bmi.z.2 <- by(binc, binc$.imp, function(x) lm(bmi.z.2 ~ao, data = x, na.action='na.omit')) bmi.z.6 <- by(binc, binc$.imp, function(x) lm(bmi.z.6 ~ao, data = x, na.action='na.omit')) bmi.z.10 <- by(binc, binc$.imp, function(x) lm(bmi.z.10 ~ao, data = x, na.action='na.omit')) bmi.z.18 <- by(binc, binc$.imp, function(x) lm(bmi.z.18 ~ao, data = x, na.action='na.omit')) bmi.z.29 <- by(binc, binc$.imp, function(x) lm(bmi.z.29 ~ao, data = x, na.action='na.omit')) tab.bmi.z.0.02 <- summary(pool(as.mira(bmi.z.0.02))) tab.bmi.z.0.33 <- summary(pool(as.mira(bmi.z.0.33))) tab.bmi.z.1 <- summary(pool(as.mira(bmi.z.1))) tab.bmi.z.2 <- summary(pool(as.mira(bmi.z.2))) tab.bmi.z.6 <- summary(pool(as.mira(bmi.z.6))) tab.bmi.z.10 <- summary(pool(as.mira(bmi.z.10))) tab.bmi.z.18 <- summary(pool(as.mira(bmi.z.18))) tab.bmi.z.29 <- summary(pool(as.mira(bmi.z.29))) (bmi.z.0.33 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.0.33+bmi.z.0.02, data = x, na.action='na.omit')))))) (bmi.z.1 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.1+bmi.z.0.33, data = x, na.action='na.omit')))))) (bmi.z.2 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.2+bmi.z.1, data = x, na.action='na.omit')))))) (bmi.z.6 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.6+bmi.z.2, data = x, na.action='na.omit')))))) (bmi.z.10 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.10+bmi.z.6, data = x, na.action='na.omit')))))) (bmi.z.18 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.18+bmi.z.10, data = x, na.action='na.omit')))))) (bmi.z.6 <- summary(pool(as.mira(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.6+bmi.z.2, data = x, na.action='na.omit')))))) summary(by(bsiz, bsiz$.imp, function(x) lm(bmi.z.jv~bmi.z.6+bmi.z.2, data = x, na.action='na.omit'))[[2]]) # repeat the analysis for the broken stick bsiz.bs <- as.data.frame(t(t(ranef(fit.bmi)[[1]]) + fixef(fit.bmi))) dimnames(bsiz.bs)[[2]] <- names(bsiz)[5:13] binc.bs <- t(diff(t(bsiz.bs))) bsiz.bs <- data.frame(id=bsiz[bsiz$.imp==1,"id"], bsiz.bs) binc.bs <- data.frame(id=bsiz[bsiz$.imp==1,"id"], binc.bs) bsiz.bs <- merge(bsiz.bs, target) binc.bs <- merge(binc.bs, target) bmi.z.0.02.bs <- lm(bmi.z.0.02~ao, data = binc.bs, na.action='na.omit') bmi.z.0.33.bs <- lm(bmi.z.0.33~ao, data = binc.bs, na.action='na.omit') bmi.z.1.bs <- lm(bmi.z.1 ~ao, data = binc.bs, na.action='na.omit') bmi.z.2.bs <- lm(bmi.z.2 ~ao, data = binc.bs, na.action='na.omit') bmi.z.6.bs <- lm(bmi.z.6 ~ao, data = binc.bs, na.action='na.omit') bmi.z.10.bs <- lm(bmi.z.10 ~ao, data = binc.bs, na.action='na.omit') bmi.z.18.bs <- lm(bmi.z.18 ~ao, data = binc.bs, na.action='na.omit') summary(bmi.z.0.02.bs) summary(bmi.z.0.33.bs) summary(bmi.z.1.bs) summary(bmi.z.2.bs) summary(bmi.z.6.bs) summary(bmi.z.10.bs) summary(bmi.z.18.bs) a<-round(cor(bsiz[,-(1:4)],use="pair"),2) b<-round(cor(bsiz.bs,use="pair"),2) a2<-round(cor(bsiz[!is.na(bsiz$ao),-(1:4)],use="complete.obs"),2) b2<-round(cor(bsiz.bs[!is.na(bsiz$ao),],use="complete.obs"),2) ## ----ch12, child = "src/ch12.Rnw"---------------------------------------- ## ----init12, echo = FALSE, results = 'hide'------------------------------ opts_chunk$set(fig.path = 'fig/ch12-', self.contained = FALSE)