#' @title Compute Aggregated Treatment Effect Parameters #' #' @description Does the heavy lifting on computing aggregated group-time #' average treatment effects #' #' @inheritParams att_gt #' @inheritParams aggte #' @param call The function call to aggte #' #' @return [`AGGTEobj`] object #' #' @keywords internal #' #' @export compute.aggte <- function(MP, type = "group", balance_e = NULL, min_e = -Inf, max_e = Inf, na.rm = FALSE, bstrap = NULL, biters = NULL, cband = NULL, alp = NULL, clustervars = NULL, call = NULL) { if (!inherits(MP, "MP")) { stop("MP must be an MP object produced by att_gt().") } #----------------------------------------------------------------------------- # unpack MP object #----------------------------------------------------------------------------- # load parameters group <- MP$group t <- MP$t att <- MP$att dp <- MP$DIDparams inffunc1 <- MP$inffunc n <- MP$n validate_logical_scalar(na.rm, "na.rm") validate_choice_scalar( type, "type", c("simple", "dynamic", "group", "calendar"), '`type` must be one of c("simple", "dynamic", "group", "calendar")' ) validate_numeric_scalar(min_e, "min_e") validate_numeric_scalar(max_e, "max_e") if (!is.null(balance_e)) validate_nonnegative_whole_number(balance_e, "balance_e") # aggte() needs the influence functions to aggregate and to compute standard errors. # They are absent when att_gt() was run with compute_inffunc = FALSE (point estimates only). if (is.null(inffunc1)) { stop("This att_gt() result was produced with compute_inffunc = FALSE (point estimates ", "only), so it has no influence functions and cannot be aggregated by aggte(). ", "Re-run att_gt() with compute_inffunc = TRUE (the default) to use aggte().") } if (length(group) != length(t) || length(att) != length(group)) { stop("MP object has inconsistent group, time, and att lengths.") } if (NCOL(inffunc1) != length(att)) { stop("MP object has inconsistent influence-function columns and att estimates.") } if (!is.null(n) && NROW(inffunc1) != n) { stop("MP object has inconsistent influence-function rows and n.") } gname <- dp$gname tname <- dp$tname idname <- dp$idname panel <- dp$panel if (is.null(clustervars)) { clustervars <- dp$clustervars } # Aggregation-level clustering reuses the per-unit cluster information that att_gt() stored when # clustervars was supplied to it. If clustering is requested here (inherited from the att_gt object or # via an aggte-level clustervars override) but that information is unavailable -- because the cluster # variable was not passed to att_gt() (so neither the cluster column nor cluster_vector were retained), # or because an override names a different variable than att_gt clustered on -- it cannot be honored. # Warn and fall back to non-clustered standard errors, instead of silently returning the i.i.d. SE # (analytic path) or erroring in mboot() (bootstrap path). To make this a hard error instead, replace # warning() with stop() below. extra_cl_req <- clustervars[!(clustervars %in% c(idname, ""))] if (length(extra_cl_req) > 0) { cv <- dp$cluster_vector can_cluster <- !is.null(cv) && length(cv) == nrow(inffunc1) && identical(extra_cl_req, dp$cluster_vector_var) if (!can_cluster) { warning(paste0( "Clustered standard errors were requested in aggte() (clustervars = '", paste(extra_cl_req, collapse = "', '"), "'), but the cluster information needed is not available ", "in this att_gt object", if (!is.null(dp$cluster_vector_var)) { paste0(" (att_gt() clustered on '", paste(dp$cluster_vector_var, collapse = "', '"), "', and aggte() cannot switch to a different cluster variable)") } else { " (clustervars was not supplied to att_gt(), so aggte() cannot introduce clustering)" }, ". Reporting standard errors that do NOT account for clustering; re-run att_gt() with ", "clustervars = '", paste(extra_cl_req, collapse = "', '"), "' for clustered inference.")) clustervars <- NULL } } if (is.null(bstrap)) { bstrap <- dp$bstrap } if (is.null(biters)) { biters <- dp$biters } if (is.null(alp)) { alp <- dp$alp } if (is.null(cband)) { cband <- dp$cband } validate_logical_scalar(bstrap, "bstrap") validate_logical_scalar(cband, "cband") validate_alp(alp) if (bstrap || cband) validate_positive_whole_number(biters, "biters") if (isTRUE(dp$faster_mode)) { tlist <- dp$time_periods glist <- dp$treated_groups } else { data <- as.data.frame(dp$data) tlist <- dp$tlist glist <- dp$glist } # In faster_mode, the full long data is only needed on the two branches below that # actually consume `data` (most faster-mode panel calls use time_invariant_data # instead). Materialize it lazily there, recoding gname Inf -> 0 (the old # convention) on the converted copy so the data.table stored inside the user's # MP object is never modified by reference. faster_mode_long_data <- function() { data <- as.data.frame(dp$data) data[data[, gname] == Inf, gname] <- 0 data } # overwrite MP objects (so we can actually compute bootstrap) MP$DIDparams$clustervars <- clustervars MP$DIDparams$bstrap <- bstrap MP$DIDparams$biters <- biters MP$DIDparams$alp <- alp MP$DIDparams$cband <- cband dp <- MP$DIDparams if (na.rm) { notna <- !is.na(att) if (!any(notna)) { stop("All att_gt() estimates are NA. Cannot compute aggregated treatment effects. ", "Check your att_gt() results for estimation problems (e.g., too few observations, ", "singular matrices, or overlap violations).") } group <- group[notna] t <- t[notna] att <- att[notna] inffunc1 <- inffunc1[, notna, drop = FALSE] # tlist <- sort(unique(t)) glist <- sort(unique(group)) # If aggte is of the group type, ensure we have non-missing post-treatment ATTs for each group if (type == "group") { # Get the groups that have some non-missing ATT(g,t) in post-treatmemt periods gnotna <- sapply(glist, function(g) { # look at post-treatment periods for group g, restricted to the SAME # max_e window used by the group-specific estimate below (selective.att.g / # selective.se.inner). Without the (t <= group + max_e) condition a group # whose only non-NA ATT(g,t) lies PAST max_e would pass this filter but then # have an all-NA (hence, under na.rm, empty) selection in the estimate, # erroring in get_agg_inf_func(). (max_e defaults to Inf, so this is a no-op # unless the user sets a finite max_e.) whichg <- which((group == g) & (g <= t) & (t <= (group + max_e))) attg <- att[whichg] group_select <- !is.na(mean(attg)) return(group_select) }) gnotna <- glist[gnotna] # indicator for not all post-treatment ATT(g,t) missing not_all_na <- group %in% gnotna if (!any(not_all_na)) { stop("No groups have non-missing post-treatment att_gt() estimates. ", "Cannot compute group aggregation. Check your att_gt() results.") } # Re-do the na.rm thing to update the groups group <- group[not_all_na] t <- t[not_all_na] att <- att[not_all_na] inffunc1 <- inffunc1[, not_all_na, drop = FALSE] # tlist <- sort(unique(t)) glist <- sort(unique(group)) } } if ((na.rm == FALSE) && base::anyNA(att)) stop("Missing values found in att_gt() estimates. To remove them, set `na.rm = TRUE`.") # data from first period # ifelse(panel, # dta <- data[ data[,tname]==tlist[1], ], # dta <- data # ) if (panel) { if (isTRUE(dp$faster_mode) && !is.null(dp$time_invariant_data)) { # use pre-computed time-invariant data (one row per unit) instead of subsetting dta <- as.data.frame(dp$time_invariant_data) # map Inf back to 0 for gname to match the old convention dta[dta[, gname] == Inf, gname] <- 0 } else { if (isTRUE(dp$faster_mode)) data <- faster_mode_long_data() # data from first period dta <- data[data[, tname] == tlist[1], ] } } else { if (isTRUE(dp$faster_mode)) data <- faster_mode_long_data() # Aggregate to one row per unit. aggregate() returns rows sorted by idname, but the influence # function (inffunc1) is in the data's *first-appearance* unit order -- which differs from sorted for # unbalanced panels under faster_mode. Restore that order so the estimated-weight influence term (wif) # is added to the matching units in get_agg_inf_func(); otherwise wif lands on the wrong units and the # aggregated standard error is wrong (the point estimate is unaffected because wif is mean-zero). dta_agg <- base::suppressWarnings(stats::aggregate(data, list((data[, idname])), mean)) dta <- dta_agg[match(unique(data[, idname]), dta_agg[, 1L]), -1L, drop = FALSE] } #----------------------------------------------------------------------------- # data organization and recoding #----------------------------------------------------------------------------- # do some recoding to make sure time periods are 1 unit apart # and then put these back together at the end originalt <- t originalgroup <- group originalglist <- glist originaltlist <- tlist # In case g's are not part of tlist originalgtlist <- sort(unique(c(originaltlist, originalglist))) uniquet <- seq(1, length(unique(originalgtlist))) # lookup vectors for vectorized mapping between original and new t values orig_with_zero <- c(originalgtlist, 0) new_with_zero <- c(uniquet, 0) # vectorized function to switch from "new" t values to original t values t2orig <- function(t) { orig_with_zero[match(t, new_with_zero)] } # vectorized function to switch from original t values to new sequential t values orig2t_vec <- function(orig) { new_with_zero[match(orig, orig_with_zero)] } t <- orig2t_vec(originalt) group <- orig2t_vec(originalgroup) glist <- orig2t_vec(originalglist) tlist <- unique(t) maxT <- max(t) # Set the weights if (".w" %in% names(dta)) { weights.ind <- dta$.w } else if (isTRUE(dp$faster_mode)) { weights.ind <- dta$weights } else { weights.ind <- dta$.w } # we can work in overall probabilities because conditioning will cancel out # cause it shows up in numerator and denominator gvar <- dta[[gname]] # extract the group column once, reused below pg <- sapply(originalglist, function(g) mean(weights.ind * (gvar == g))) # length of this is equal to number of groups pgg <- pg # same but length is equal to the number of ATT(g,t) pg <- pg[match(group, glist)] # which group time average treatment effects are post-treatment keepers <- which(group <= t & t <= (group + max_e)) ### added second condition to allow for limit on longest period included in att # n x 1 vector of group variable G <- orig2t_vec(gvar) #----------------------------------------------------------------------------- # Compute the simple ATT summary #----------------------------------------------------------------------------- if (type == "simple") { # simple att # averages all post-treatment ATT(g,t) with weights # given by group size simple.att <- sum(att[keepers] * pg[keepers]) / (sum(pg[keepers])) if (is.nan(simple.att)) simple.att <- NA # get the part of the influence function coming from estimated weights simple.wif <- wif(keepers, pg, weights.ind, G, group) # get the overall influence function simple.if <- get_agg_inf_func( att = att, inffunc1 = inffunc1, whichones = keepers, weights.agg = pg[keepers] / sum(pg[keepers]), wif = simple.wif ) # Make it as vector simple.if <- as.numeric(simple.if) # get standard errors from overall influence function simple.se <- getSE(simple.if, dp) if (!is.na(simple.se)) { if (simple.se <= sqrt(.Machine$double.eps) * 10) simple.se <- NA } return(AGGTEobj( overall.att = simple.att, overall.se = simple.se, type = type, inf.function = list(simple.att = simple.if), call = call, DIDparams = dp )) } #----------------------------------------------------------------------------- # Compute the group (i.e., selective) treatment timing estimators #----------------------------------------------------------------------------- if (type == "group") { # get group specific ATTs # note: there are no estimated weights here selective.att.g <- sapply(glist, function(g) { # look at post-treatment periods for group g whichg <- which((group == g) & (g <= t) & (t <= (group + max_e))) ### added last condition to allow for limit on longest period included in att attg <- att[whichg] mean(attg) }) selective.att.g[is.nan(selective.att.g)] <- NA # get standard errors for each group specific ATT selective.se.inner <- lapply(glist, function(g) { whichg <- which((group == g) & (g <= t) & (t <= (group + max_e))) ### added last condition to allow for limit on longest period included in att inf.func.g <- as.numeric(get_agg_inf_func( att = att, inffunc1 = inffunc1, whichones = whichg, weights.agg = pg[whichg] / sum(pg[whichg]), wif = NULL )) se.g <- getSE(inf.func.g, dp) list(inf.func = inf.func.g, se = se.g) }) # recover standard errors separately by group selective.se.g <- unlist(BMisc::get_list_element(selective.se.inner, "se")) selective.se.g[selective.se.g <= sqrt(.Machine$double.eps) * 10] <- NA # recover influence function separately by group selective.inf.func.g <- simplify2array(BMisc::get_list_element(selective.se.inner, "inf.func")) # use multiplier bootstrap (across groups) to get critical value # for constructing uniform confidence bands selective.crit.val <- stats::qnorm(1 - alp / 2) if (dp$cband == TRUE) { if (dp$bstrap == FALSE) { warning("Used bootstrap procedure to compute simultaneous confidence band") } selective.crit.val <- mboot(selective.inf.func.g, dp, return_V = FALSE)$crit.val if (is.na(selective.crit.val) | is.infinite(selective.crit.val)) { warning("Simultaneous critical value is NA, likely because the standard errors could not be computed. Falling back to pointwise confidence intervals.") selective.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (selective.crit.val < stats::qnorm(1 - alp / 2)) { warning("The simultaneous confidence band is narrower than the pointwise confidence interval, which is unexpected. Falling back to pointwise confidence intervals.") selective.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (selective.crit.val >= 7) { warning("Simultaneous critical value is very large, suggesting it may be unreliable. This typically happens when the number of observations per group is small and/or there is not much variation in outcomes. Consider using pointwise confidence intervals instead (set `cband = FALSE`).") } } # get overall att under selective treatment timing # (here use pgg instead of pg because we can just look at each group) selective.att <- sum(selective.att.g * pgg) / sum(pgg) # account for having to estimate pgg in the influence function selective.wif <- wif( keepers = 1:length(glist), pg = pgg, weights.ind = weights.ind, G = G, group = glist ) # get overall influence function selective.inf.func <- get_agg_inf_func( att = selective.att.g, inffunc1 = selective.inf.func.g, whichones = (1:length(glist)), weights.agg = pgg / sum(pgg), wif = selective.wif ) selective.inf.func <- as.numeric(selective.inf.func) # get overall standard error selective.se <- getSE(selective.inf.func, dp) if (!is.na(selective.se)) { if ((selective.se <= sqrt(.Machine$double.eps) * 10)) selective.se <- NA } return(AGGTEobj( overall.att = selective.att, overall.se = selective.se, type = type, egt = originalglist, att.egt = selective.att.g, se.egt = selective.se.g, crit.val.egt = selective.crit.val, inf.function = list( selective.inf.func.g = selective.inf.func.g, selective.inf.func = selective.inf.func ), call = call, DIDparams = dp )) } #----------------------------------------------------------------------------- # Compute the event-study estimators #----------------------------------------------------------------------------- if (type == "dynamic") { # event times # this looks at all available event times # note: event times can be negative here. # note: event time = 0 corresponds to "on impact" # eseq <- unique(t-group) eseq <- unique(originalt - originalgroup) eseq <- eseq[order(eseq)] # if the user specifies balance_e, then we are going to # drop some event times and some groups; if not, we just # keep everything (that is what this variable is for) include.balanced.gt <- rep(TRUE, length(originalgroup)) # if we balance the sample with resepect to event time if (!is.null(balance_e)) { include.balanced.gt <- (t2orig(maxT) - originalgroup >= balance_e) eseq <- unique(originalt[include.balanced.gt] - originalgroup[include.balanced.gt]) eseq <- eseq[order(eseq)] eseq <- eseq[(eseq <= balance_e) & (eseq >= balance_e - t2orig(maxT) + t2orig(1))] } # only looks at some event times eseq <- eseq[(eseq >= min_e) & (eseq <= max_e)] # Guard the empty window (e.g. min_e/max_e exclude every event time): # downstream sapply(eseq, ...) would otherwise return a list and fail later # with a cryptic "Not compatible with requested type" error. if (length(eseq) == 0) { stop("No event times fall within the requested window. ", "Adjust 'min_e'/'max_e' (and 'balance_e') so at least one event ", "time is included.") } # compute atts that are specific to each event time dynamic.att.e <- sapply(eseq, function(e) { # keep att(g,t) for the right g&t as well as ones that # are not trimmed out from balancing the sample whiche <- which((originalt - originalgroup == e) & (include.balanced.gt)) atte <- att[whiche] pge <- pg[whiche] / (sum(pg[whiche])) sum(atte * pge) }) # compute standard errors for dynamic effects dynamic.se.inner <- lapply(eseq, function(e) { whiche <- which((originalt - originalgroup == e) & (include.balanced.gt)) pge <- pg[whiche] / (sum(pg[whiche])) wif.e <- wif(whiche, pg, weights.ind, G, group) inf.func.e <- as.numeric(get_agg_inf_func( att = att, inffunc1 = inffunc1, whichones = whiche, weights.agg = pge, wif = wif.e )) se.e <- getSE(inf.func.e, dp) list(inf.func = inf.func.e, se = se.e) }) dynamic.se.e <- unlist(BMisc::get_list_element(dynamic.se.inner, "se")) dynamic.se.e[dynamic.se.e <= sqrt(.Machine$double.eps) * 10] <- NA dynamic.inf.func.e <- simplify2array(BMisc::get_list_element(dynamic.se.inner, "inf.func")) dynamic.crit.val <- stats::qnorm(1 - alp / 2) if (dp$cband == TRUE) { if (dp$bstrap == FALSE) { warning("Used bootstrap procedure to compute simultaneous confidence band") } dynamic.crit.val <- mboot(dynamic.inf.func.e, dp, return_V = FALSE)$crit.val if (is.na(dynamic.crit.val) | is.infinite(dynamic.crit.val)) { warning("Simultaneous critical value is NA, likely because the standard errors could not be computed. Falling back to pointwise confidence intervals.") dynamic.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (dynamic.crit.val < stats::qnorm(1 - alp / 2)) { warning("The simultaneous confidence band is narrower than the pointwise confidence interval, which is unexpected. Falling back to pointwise confidence intervals.") dynamic.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (dynamic.crit.val >= 7) { warning("Simultaneous critical value is very large, suggesting it may be unreliable. This typically happens when the number of observations per group is small and/or there is not much variation in outcomes. Consider using pointwise confidence intervals instead (set `cband = FALSE`).") } } # get overall average treatment effect # by averaging over positive dynamics epos <- eseq >= 0 dynamic.att <- mean(dynamic.att.e[epos]) dynamic.inf.func <- get_agg_inf_func( att = dynamic.att.e[epos], inffunc1 = as.matrix(dynamic.inf.func.e[, epos]), whichones = seq_len(sum(epos)), weights.agg = (rep(1 / sum(epos), sum(epos))), wif = NULL ) dynamic.inf.func <- as.numeric(dynamic.inf.func) dynamic.se <- getSE(dynamic.inf.func, dp) if (!is.na(dynamic.se)) { if (dynamic.se <= sqrt(.Machine$double.eps) * 10) dynamic.se <- NA } return(AGGTEobj( overall.att = dynamic.att, overall.se = dynamic.se, type = type, egt = eseq, att.egt = dynamic.att.e, se.egt = dynamic.se.e, crit.val.egt = dynamic.crit.val, inf.function = list( dynamic.inf.func.e = dynamic.inf.func.e, dynamic.inf.func = dynamic.inf.func ), call = call, min_e = min_e, max_e = max_e, balance_e = balance_e, DIDparams = dp )) } #----------------------------------------------------------------------------- # calendar time effects #----------------------------------------------------------------------------- if (type == "calendar") { # min_e / max_e / balance_e have no effect on calendar-time aggregation; # warn if the user set them so the (correct) unrestricted result is not # mistaken for a windowed one. if (is.finite(max_e) || is.finite(min_e) || !is.null(balance_e)) { warning("`min_e`, `max_e`, and `balance_e` are ignored for type = \"calendar\"; ", "returning the unrestricted calendar-time effects.") } # drop time periods where no one is treated yet # (can't get treatment effects in those periods) minG <- min(group) calendar.tlist <- tlist[tlist >= minG] # Drop calendar periods with no non-missing post-treatment ATT(g,t) cell # (e.g. after na.rm removed all of a period's cells, common with unbalanced # panels that have genuinely-NA cells). Analogous to the `gnotna` guard in # the group branch above; without it wif()/get_agg_inf_func() are called on # an empty selection and error with a cryptic message. has_post <- vapply(calendar.tlist, function(t1) any((t == t1) & (group <= t)), logical(1)) calendar.tlist <- calendar.tlist[has_post] if (length(calendar.tlist) == 0) { stop("No calendar periods have non-missing post-treatment att_gt() ", "estimates. Cannot compute calendar aggregation. Check your ", "att_gt() results.") } # calendar time specific atts calendar.att.t <- sapply(calendar.tlist, function(t1) { # look at post-treatment periods for group g whicht <- which((t == t1) & (group <= t)) attt <- att[whicht] pgt <- pg[whicht] / (sum(pg[whicht])) sum(pgt * attt) }) # get standard errors and influence functions # for each time specific att calendar.se.inner <- lapply(calendar.tlist, function(t1) { whicht <- which((t == t1) & (group <= t)) pgt <- pg[whicht] / (sum(pg[whicht])) wif.t <- wif( keepers = whicht, pg = pg, weights.ind = weights.ind, G = G, group = group ) inf.func.t <- as.numeric(get_agg_inf_func( att = att, inffunc1 = inffunc1, whichones = whicht, weights.agg = pgt, wif = wif.t )) se.t <- getSE(inf.func.t, dp) list(inf.func = inf.func.t, se = se.t) }) # recover standard errors separately by time calendar.se.t <- unlist(BMisc::get_list_element(calendar.se.inner, "se")) calendar.se.t[calendar.se.t <= sqrt(.Machine$double.eps) * 10] <- NA # recover influence function separately by time calendar.inf.func.t <- simplify2array(BMisc::get_list_element(calendar.se.inner, "inf.func")) # use multiplier boostrap (across groups) to get critical value # for constructing uniform confidence bands calendar.crit.val <- stats::qnorm(1 - alp / 2) if (dp$cband == TRUE) { if (dp$bstrap == FALSE) { warning("Used bootstrap procedure to compute simultaneous confidence band") } calendar.crit.val <- mboot(calendar.inf.func.t, dp, return_V = FALSE)$crit.val if (is.na(calendar.crit.val) | is.infinite(calendar.crit.val)) { warning("Simultaneous critical value is NA, likely because the standard errors could not be computed. Falling back to pointwise confidence intervals.") calendar.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (calendar.crit.val < stats::qnorm(1 - alp / 2)) { warning("The simultaneous confidence band is narrower than the pointwise confidence interval, which is unexpected. Falling back to pointwise confidence intervals.") calendar.crit.val <- stats::qnorm(1 - alp / 2) dp$cband <- FALSE } if (calendar.crit.val >= 7) { warning("Simultaneous critical value is very large, suggesting it may be unreliable. This typically happens when the number of observations per group is small and/or there is not much variation in outcomes. Consider using pointwise confidence intervals instead (set `cband = FALSE`).") } } # get overall att under calendar time effects # this is just average over all time periods calendar.att <- mean(calendar.att.t) # get overall influence function calendar.inf.func <- get_agg_inf_func( att = calendar.att.t, inffunc1 = calendar.inf.func.t, whichones = (1:length(calendar.tlist)), weights.agg = rep(1 / length(calendar.tlist), length(calendar.tlist)), wif = NULL ) calendar.inf.func <- as.numeric(calendar.inf.func) # get overall standard error calendar.se <- getSE(calendar.inf.func, dp) if (!is.na(calendar.se)) { if (calendar.se <= sqrt(.Machine$double.eps) * 10) calendar.se <- NA } return(AGGTEobj( overall.att = calendar.att, overall.se = calendar.se, type = type, egt = t2orig(calendar.tlist), att.egt = calendar.att.t, se.egt = calendar.se.t, crit.val.egt = calendar.crit.val, inf.function = list( calendar.inf.func.t = calendar.inf.func.t, calendar.inf.func = calendar.inf.func ), call = call, DIDparams = dp )) } } #----------------------------------------------------------------------------- # Internal functions for getting standard errors #----------------------------------------------------------------------------- #' @title Compute extra term in influence function due to estimating weights #' #' @description A function to compute the extra term that shows up in the #' influence function for aggregated treatment effect parameters #' due to estimating the weights #' #' @param keepers a vector of indices for which group-time average #' treatment effects are used to compute a particular aggregated parameter #' @param pg a vector with same length as total number of group-time average #' treatment effects that contains the probability of being in particular group #' @param weights.ind additional sampling weights (nx1) #' @param G vector containing which group a unit belongs to (nx1) #' @param group vector of groups #' #' @return nxk influence function matrix #' #' @keywords internal wif <- function(keepers, pg, weights.ind, G, group) { # note: weights are all of the form P(G=g|cond)/sum_cond(P(G=g|cond)) # this is equal to P(G=g)/sum_cond(P(G=g)) which simplifies things here # The (n x k) matrix [ weights.ind * 1{G == group[k]} - pg[k] ] appears in both # the numerator and the denominator terms below; build it once and reuse it. # This is identical to the previous code, which constructed it twice via two # separate sapply() calls. sum(pg[keepers]) is likewise hoisted out of the loop. # No keepers (e.g. an empty post-treatment selection under na.rm or a # min_e/max_e window that excludes all cells): return a 0-column influence # matrix so the caller's get_agg_inf_func() emits its clean "no valid # estimates" error instead of `list() / 0` throwing a cryptic # "non-numeric argument to binary operator" here. if (length(keepers) == 0L) { return(matrix(numeric(0), nrow = length(weights.ind), ncol = 0L)) } Spg <- sum(pg[keepers]) centered <- sapply(keepers, function(k) { weights.ind * 1 * BMisc::TorF(G == group[k]) - pg[k] }) # effect of estimating weights in the numerator if1 <- centered / Spg # effect of estimating weights in the denominator if2 <- base::rowSums(centered) %*% t(pg[keepers] / (Spg^2)) # return the influence function for the weights if1 - if2 } #' @title Get an influence function for particular aggregate parameters #' #' @title This is a generic internal function for combining influence #' functions across ATT(g,t)'s to return an influence function for #' various aggregated treatment effect parameters. #' #' @param att vector of group-time average treatment effects #' @param inffunc1 influence function for all group-time average treatment effects #' (matrix) #' @param whichones which elements of att will be used to compute the aggregated #' treatment effect parameter #' @param weights.agg the weights to apply to each element of att(whichones); #' should have the same dimension as att(whichones) #' @param wif extra influence function term coming from estimating the weights; #' should be n x k matrix where k is dimension of whichones #' #' @return nx1 influence function #' #' @keywords internal get_agg_inf_func <- function(att, inffunc1, whichones, weights.agg, wif = NULL) { # enforce weights are in matrix form weights.agg <- as.matrix(weights.agg) # check dimensions if (length(whichones) == 0) { stop("No valid att_gt() estimates found for this aggregation. ", "This may happen if all estimates for a particular group or time period are NA.") } if (max(whichones) > ncol(inffunc1)) { stop("Internal error: influence function column indices (", max(whichones), ") exceed available columns (", ncol(inffunc1), "). Please report this as a bug.") } # multiplies influence function times weights and sums to get vector of weighted IF (of length n) thisinffunc <- inffunc1[, whichones, drop = FALSE] %*% weights.agg # Incorporate influence function of the weights if (!is.null(wif)) { thisinffunc <- thisinffunc + wif %*% as.matrix(att[whichones]) } # return influence function return(thisinffunc) } #' @title Take influence function and return standard errors #' #' @description Function to take an nx1 influence function and return #' a standard error #' #' @param thisinffunc An influence function #' @inheritParams compute.aggte #' #' @return scalar standard error #' #' @keywords internal getSE <- function(thisinffunc, DIDparams = NULL) { alp <- .05 bstrap <- FALSE n <- length(thisinffunc) if (!is.null(DIDparams)) { bstrap <- DIDparams$bstrap alp <- DIDparams$alp cband <- DIDparams$cband } if (bstrap) { bout <- mboot(thisinffunc, DIDparams, return_V = FALSE) return(bout$se) } else { # Analytical cluster-robust SE (no bootstrap): cluster sums of the aggregated influence function, # matching the cluster-sum aggregation of the multiplier bootstrap (CS 2021, Remark 10). Falls back to # the i.i.d. form when there is no cluster variable beyond idname or cluster identifiers are unavailable. # dp$cluster_vector is set by att_gt() for both faster_mode = TRUE and FALSE. cv <- if (!is.null(DIDparams)) DIDparams$cluster_vector else NULL extra_cl <- if (!is.null(DIDparams)) DIDparams$clustervars[!(DIDparams$clustervars %in% c(DIDparams$idname, ""))] else character(0) if (length(extra_cl) > 0 && !is.null(cv) && length(cv) == length(thisinffunc)) { S <- rowsum(thisinffunc, cv) return(sqrt(sum(S^2)) / n) } return(sqrt(mean((thisinffunc)^2) / n)) } }