# ============================================================================ # PSYC 434 — Lab 8: RATE and QINI Curves # self-standing script — run from top to bottom # ============================================================================ # --- packages --------------------------------------------------------------- library(causalworkshop) library(grf) library(tidyverse) # --- fit causal forest (from Labs 5-6) -------------------------------------- d <- simulate_nzavs_data(n = 5000, seed = 2026) d0 <- d |> filter(wave == 0) d1 <- d |> filter(wave == 1) d2 <- d |> filter(wave == 2) covariate_cols <- c( "age", "male", "nz_european", "education", "partner", "employed", "log_income", "nz_dep", "agreeableness", "conscientiousness", "extraversion", "neuroticism", "openness", "community_group", "wellbeing" ) X <- as.matrix(d0[, covariate_cols]) Y <- d2$wellbeing W <- d1$community_group cf <- causal_forest( X, Y, W, num.trees = 1000, honesty = TRUE, tune.parameters = "all", seed = 2026 ) tau_hat <- predict(cf)$predictions # --- rank individuals by predicted benefit ---------------------------------- n <- length(tau_hat) tau_order <- order(tau_hat, decreasing = TRUE) tau_sorted <- tau_hat[tau_order] cat("Top 5 predicted effects: ", round(head(tau_sorted, 5), 3), "\n") cat("Bottom 5 predicted effects:", round(tail(tau_sorted, 5), 3), "\n") cat("Overall mean: ", round(mean(tau_hat), 3), "\n") # --- RATE curve ------------------------------------------------------------- rates <- seq(0.05, 1.00, by = 0.05) rate_results <- tibble( rate = numeric(), avg_tau_targeted = numeric(), gain_over_random = numeric() ) for (r in rates) { n_targeted <- floor(r * n) targeted_idx <- tau_order[seq_len(n_targeted)] avg_targeted <- mean(tau_hat[targeted_idx]) gain <- avg_targeted - mean(tau_hat) rate_results <- bind_rows( rate_results, tibble(rate = r, avg_tau_targeted = avg_targeted, gain_over_random = gain) ) } print(rate_results |> mutate(across(where(is.numeric), \(x) round(x, 3)))) # plot RATE curve ggplot(rate_results, aes(x = rate, y = gain_over_random)) + geom_line(colour = "#E69F00", linewidth = 1) + geom_point(colour = "#E69F00", size = 2) + scale_x_continuous(labels = scales::percent_format()) + labs( title = "RATE curve: gain from targeting", x = "Targeting rate (proportion treated)", y = "Gain over random assignment" ) + theme_minimal() # --- QINI curve ------------------------------------------------------------- qini_results <- tibble( percentile = numeric(), cumulative_gain = numeric() ) for (p in rates) { n_top <- floor(p * n) top_idx <- tau_order[seq_len(n_top)] cum_gain <- sum(tau_hat[top_idx]) - p * sum(tau_hat) qini_results <- bind_rows( qini_results, tibble(percentile = p, cumulative_gain = cum_gain) ) } print(qini_results |> mutate(across(where(is.numeric), \(x) round(x, 3)))) # plot QINI curve ggplot(qini_results, aes(x = percentile, y = cumulative_gain)) + geom_line(colour = "#56B4E9", linewidth = 1) + geom_point(colour = "#56B4E9", size = 2) + scale_x_continuous(labels = scales::percent_format()) + labs( title = "QINI curve: cumulative targeting gain", x = "Population percentile", y = "Cumulative gain over random" ) + theme_minimal() # area under QINI curve auqc <- sum(qini_results$cumulative_gain * 0.05) cat("Area Under QINI Curve (AUQC):", round(auqc, 3), "\n") # --- targeting efficiency --------------------------------------------------- top_10_idx <- tau_order[seq_len(floor(0.10 * n))] top_20_idx <- tau_order[seq_len(floor(0.20 * n))] top_50_idx <- tau_order[seq_len(floor(0.50 * n))] efficiency <- tibble( group = c("Top 10%", "Top 20%", "Top 50%", "Everyone"), avg_effect = c( mean(tau_hat[top_10_idx]), mean(tau_hat[top_20_idx]), mean(tau_hat[top_50_idx]), mean(tau_hat) ), lift_vs_random = c( mean(tau_hat[top_10_idx]) / mean(tau_hat), mean(tau_hat[top_20_idx]) / mean(tau_hat), mean(tau_hat[top_50_idx]) / mean(tau_hat), 1 ) ) |> mutate(efficiency_gain_pct = round((lift_vs_random - 1) * 100, 1)) print(efficiency |> mutate(across(where(is.numeric), \(x) round(x, 3)))) # --- covariate profile of high-benefit individuals -------------------------- top_10_data <- d0[tau_order[seq_len(floor(0.10 * n))], ] everyone <- d0 cat("Top 10% vs everyone:\n") cat(" Extraversion: ", round(mean(top_10_data$extraversion), 2), "vs", round(mean(everyone$extraversion), 2), "\n") cat(" Neuroticism: ", round(mean(top_10_data$neuroticism), 2), "vs", round(mean(everyone$neuroticism), 2), "\n") cat(" Partner (prop): ", round(mean(top_10_data$partner), 2), "vs", round(mean(everyone$partner), 2), "\n") cat(" Agreeableness: ", round(mean(top_10_data$agreeableness), 2), "vs", round(mean(everyone$agreeableness), 2), "\n")