# ============================================================================ # PSYC 434 — Lab 5: Average Treatment Effects # self-standing script — run from top to bottom # ============================================================================ # --- packages --------------------------------------------------------------- # install causalworkshop if needed: # install.packages("remotes") # remotes::install_github("go-bayes/causalworkshop") library(causalworkshop) library(grf) library(tidyverse) # --- simulate data ---------------------------------------------------------- d <- simulate_nzavs_data(n = 5000, seed = 2026) dim(d) names(d) # separate waves d0 <- d |> filter(wave == 0) # baseline confounders d1 <- d |> filter(wave == 1) # exposure assignment d2 <- d |> filter(wave == 2) # outcomes stopifnot(all(d0$id == d1$id), all(d0$id == d2$id)) # ground truth true_ate <- mean(d0$tau_community_wellbeing) cat("True ATE:", round(true_ate, 3), "\n") # --- naive ATE (biased) ---------------------------------------------------- fit_naive <- lm(d2$wellbeing ~ d1$community_group) naive_ate <- coef(fit_naive)[2] cat("Naive ATE:", round(naive_ate, 3), "\n") cat("True ATE: ", round(true_ate, 3), "\n") cat("Bias: ", round(naive_ate - true_ate, 3), "\n") # --- adjusted ATE (regression) --------------------------------------------- df <- data.frame( y = d2$wellbeing, a = d1$community_group, age = d0$age, male = d0$male, nz_european = d0$nz_european, education = d0$education, partner = d0$partner, employed = d0$employed, log_income = d0$log_income, nz_dep = d0$nz_dep, agreeableness = d0$agreeableness, conscientiousness = d0$conscientiousness, extraversion = d0$extraversion, neuroticism = d0$neuroticism, openness = d0$openness, community_t0 = d0$community_group, wellbeing_t0 = d0$wellbeing ) fit_adj <- lm(y ~ a + age + male + nz_european + education + partner + employed + log_income + nz_dep + agreeableness + conscientiousness + extraversion + neuroticism + openness + community_t0 + wellbeing_t0, data = df) adj_ate <- coef(fit_adj)["a"] cat("Adjusted ATE:", round(adj_ate, 3), "\n") cat("True ATE: ", round(true_ate, 3), "\n") cat("Bias: ", round(adj_ate - true_ate, 3), "\n") # --- g-computation by hand ------------------------------------------------- df_treated <- df df_treated$a <- 1 df_control <- df df_control$a <- 0 y_hat_treated <- predict(fit_adj, newdata = df_treated) y_hat_control <- predict(fit_adj, newdata = df_control) gcomp_ate <- mean(y_hat_treated - y_hat_control) cat("G-computation ATE:", round(gcomp_ate, 3), "\n") cat("True ATE: ", round(true_ate, 3), "\n") # --- causal forest ATE ------------------------------------------------------ covariate_cols <- c( "age", "male", "nz_european", "education", "partner", "employed", "log_income", "nz_dep", "agreeableness", "conscientiousness", "extraversion", "neuroticism", "openness", "community_t0", "wellbeing_t0" ) X <- as.matrix(df[, covariate_cols]) Y <- df$y W <- df$a cf <- causal_forest( X, Y, W, num.trees = 1000, honesty = TRUE, tune.parameters = "all", seed = 2026 ) ate_cf <- average_treatment_effect(cf) cat("Causal forest ATE:", round(ate_cf["estimate"], 3), "(SE:", round(ate_cf["std.err"], 3), ")\n") cat("True ATE: ", round(true_ate, 3), "\n") # --- compare all estimates -------------------------------------------------- results <- data.frame( method = c("Naive", "Adjusted regression", "G-computation", "Causal forest"), estimate = c(naive_ate, adj_ate, gcomp_ate, ate_cf["estimate"]), bias = c(naive_ate - true_ate, adj_ate - true_ate, gcomp_ate - true_ate, ate_cf["estimate"] - true_ate) ) results$estimate <- round(results$estimate, 3) results$bias <- round(results$bias, 3) print(results) cat("\nTrue ATE:", round(true_ate, 3), "\n")