#################################################################################################### ## EPID 7500 ## Steve Bellan ## Fall 2017 ## UGA ## Licensed for reuse with attribution as CC-BY NC ## (https://creativecommons.org/licenses/by-nc/4.0/legalcode) #################################################################################################### require(tidyverse) ## Let's say we have two continuous variables number of eggs per gram of a parasitic worm blood ## hemoglobin level (g/dl) createMyStudy <- function(sampSize=50, epg_mu=40, epg_sigma=10, hb_slope=-1/20, hb_int=14, hb_sd=1.2, hb_male = 2, doPlot=T, browse=F) { if(browse) browser() dat <- tibble(epg = rlnorm(sampSize, mean = log(epg_mu), sd = log(epg_mu) - log(epg_mu-epg_sigma)), sex = sample(c('m','f'), size = sampSize, replace = T)) dat <- dat %>% mutate(hb_expected = hb_int + hb_slope*epg + hb_male*as.numeric(sex=='m')) dat <- dat %>% mutate(hb_noise = rnorm(n(), 0, sd=hb_sd), hb = hb_expected + hb_noise) if(doPlot) { p1 <- ggplot(dat, aes(epg, hb, col=sex)) + geom_point() + xlim(min(0, min(dat$epg)), max(dat$epg, 100)) + ylim(0,max(20, max(dat$epg))) print(p1) } return(dat) } createMyStudy(browse = F, epg_sigma = 20) createMyStudy(browse=F, epg_sigma = 4) createMyMod <- function(dat) { mod <- lm(hb ~ epg, data = dat) modfit <- tibble(int = coef(mod)['(Intercept)'], intlower = confint(mod)['(Intercept)', '2.5 %'], intupper = confint(mod)['(Intercept)', '97.5 %'], epg = coef(mod)['epg'], epglower = confint(mod)['epg', '2.5 %'], epgupper = confint(mod)['epg', '97.5 %']) return(modfit) } tempdat <- createMyStudy(browse=F, epg_sigma = 4) createMyMod(tempdat) #################################################################################################### ## Nov 9 ## Create a function, simulateLM() that simulates a data set, then fits a linear model to it, and ## does this multiple times within a for loop. Use do.call() to be able to flexibly feed a list of ## arguments to simulateLM() that get fed to createMyStudy, and a separate list of arguments to ## createMyMod. Have the option to plot the main effects and confidence intervals from all the ## runs. The function return a list containing 1) a tibble of the linear model estimates for each ## run, and 2) the estimate of statistical power across runs. simulateLM <- function() { }