# ============================================================================ # PSYC 434 — Lab 6: Conditional Average Treatment Effects # self-standing script — run from top to bottom # ============================================================================ # --- packages --------------------------------------------------------------- library(causalworkshop) library(grf) library(tidyverse) # --- why functional form matters -------------------------------------------- # simulate data with non-linear treatment effects d_nl <- simulate_nonlinear_data(n = 2000, seed = 2026) # compare four estimation methods result <- compare_ate_methods(d_nl) # compare RMSE for individual-level predictions print(result$summary) # --- individual treatment effects from causal forest ------------------------ # simulate NZAVS data (same as Lab 5) d <- simulate_nzavs_data(n = 5000, seed = 2026) d0 <- d |> filter(wave == 0) d1 <- d |> filter(wave == 1) d2 <- d |> filter(wave == 2) # construct matrices covariate_cols <- c( "age", "male", "nz_european", "education", "partner", "employed", "log_income", "nz_dep", "agreeableness", "conscientiousness", "extraversion", "neuroticism", "openness", "community_group", "wellbeing" ) X <- as.matrix(d0[, covariate_cols]) Y <- d2$wellbeing W <- d1$community_group # fit causal forest cf <- causal_forest( X, Y, W, num.trees = 1000, honesty = TRUE, tune.parameters = "all", seed = 2026 ) # predicted treatment effects for each individual tau_hat <- predict(cf)$predictions cat("Mean tau_hat: ", round(mean(tau_hat), 3), "\n") cat("SD tau_hat: ", round(sd(tau_hat), 3), "\n") cat("Range tau_hat: ", round(range(tau_hat), 3), "\n") # compare with true individual effects tau_true <- d0$tau_community_wellbeing cat("Correlation(tau_hat, tau_true):", round(cor(tau_hat, tau_true), 3), "\n") cat("RMSE:", round(sqrt(mean((tau_hat - tau_true)^2)), 3), "\n") # histogram of predicted treatment effects ggplot(data.frame(tau_hat = tau_hat), aes(x = tau_hat)) + geom_histogram(bins = 40, fill = "steelblue", alpha = 0.7) + geom_vline(xintercept = mean(tau_hat), colour = "red", linetype = "dashed") + labs( title = "Distribution of predicted treatment effects", x = expression(hat(tau)(x)), y = "Count" ) + theme_minimal() # --- test for heterogeneity ------------------------------------------------- cal_test <- test_calibration(cf) print(cal_test) # --- variable importance ---------------------------------------------------- var_imp <- variable_importance(cf) importance_df <- data.frame( variable = colnames(X), importance = as.numeric(var_imp) ) |> arrange(desc(importance)) print(importance_df) # --- subgroup analysis ------------------------------------------------------ # compare effects by extraversion high_extra <- tau_hat[d0$extraversion > 0] low_extra <- tau_hat[d0$extraversion <= 0] cat("Mean tau_hat (high extraversion):", round(mean(high_extra), 3), "\n") cat("Mean tau_hat (low extraversion): ", round(mean(low_extra), 3), "\n") cat("Difference: ", round(mean(high_extra) - mean(low_extra), 3), "\n") # compare effects by partner status partnered <- tau_hat[d0$partner == 1] unpartnered <- tau_hat[d0$partner == 0] cat("\nMean tau_hat (partnered): ", round(mean(partnered), 3), "\n") cat("Mean tau_hat (unpartnered):", round(mean(unpartnered), 3), "\n") cat("Difference: ", round(mean(partnered) - mean(unpartnered), 3), "\n") # --- scatter plot: predicted vs true effects -------------------------------- ggplot(data.frame(true = tau_true, predicted = tau_hat), aes(x = true, y = predicted)) + geom_point(alpha = 0.1, colour = "steelblue") + geom_abline(slope = 1, intercept = 0, linetype = "dashed", colour = "red") + labs( title = "Predicted vs true individual treatment effects", x = expression(tau(x)), y = expression(hat(tau)(x)) ) + theme_minimal()