--- title: "Lab 6 Tutorial | PAF 516" subtitle: "Spatio-Temporal Analysis — LISA Trajectories & Space-Time Moran's I" format: html: theme: readable highlight-style: tango toc: true toc-title: "Steps" self-contained: true number-sections: false execute: echo: true warning: false message: false fig-width: 10 --- ::: {.callout-tip} ## How to use this file **First time:** Click **Render** to see all results. Then run chunk by chunk to understand each step. This file is for learning — do not edit it. When ready for the assignment, go to: [Lab 6 Assignment](https://antjam-howell.github.io/paf-516-labs/lab6-assignment.html) ::: ```{r} #| label: setup #| include: false knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 10) options(tigris_use_cache = TRUE) options(tigris_progress_bar = FALSE) ``` ```{r} #| label: packages # ── One-time course environment setup ───────────────────────────────────── # Sentinel lives in user home (~/.paf516_ready) — persists across all lab # folders. renv restores ONCE for the entire course, not once per lab. # To force re-install: delete ~/.paf516_ready and re-render. if (!file.exists("~/.paf516_ready")) { if (!requireNamespace("renv", quietly = TRUE)) install.packages("renv") tmp <- tempfile(fileext = ".lock") download.file( url = "https://raw.githubusercontent.com/AntJam-Howell/paf-516-labs/main/renv.lock", destfile = tmp, mode = "wb" ) renv::restore(lockfile = tmp, prompt = FALSE) file.create("~/.paf516_ready") } library(tidyverse) library(tidycensus) library(sf) library(spdep) library(viridis) library(patchwork) ``` ```{r} #| label: api-key # Run ONCE: census_api_key("YOUR_KEY", install = TRUE), then restart R. ``` # Overview Lab 5 answered **what changed** in Maricopa County between 2013 and 2019. Lab 6 asks the deeper question: **is change spatially structured?** Three new analyses, all using the same 2013 and 2019 data from Lab 5: 1. **LISA trajectories** — classify each tract by how its cluster membership changed 2. **Space-time Moran's I** — test whether change scores are spatially autocorrelated 3. **Three-point trend** — add a 2016 middle observation to distinguish sustained trends from noise All three time points (2013, 2016, 2019) use 2010 TIGER/Line tract boundaries, so GEOIDs match cleanly across all three years. # Step 1: Rebuild the Lab 5 Dataset We reconstruct the `change_df` object using the same 3-variable index, pooled standardization, and 2013/2019 time points from Lab 5. ```{r} #| label: step.01-vars # 3-variable economic hardship index — consistent across 2013, 2016, and 2019 ACS hardship_vars <- c( pov_below50 = "C17002_002", # Income-to-poverty ratio < 0.50 pov_50to99 = "C17002_003", # Income-to-poverty ratio 0.50–0.99 pov_den = "C17002_001", # Poverty universe denominator unemp_num = "B23025_005", # Unemployed unemp_den = "B23025_002", # In labor force median_income = "B19013_001" # Median household income ) ``` ```{r} #| label: step.01-pull #| results: hide # ── Load from shared data cache or pull from Census API ─────────────────── # Pre-built RDS files in /data/ are shared across Labs 5 and 6 — no duplicate # API calls. If running outside the repo structure, the fallback pulls live. load_or_pull <- function(rds_path, pull_fn) { if (file.exists(rds_path)) { readRDS(rds_path) } else { result <- pull_fn() dir.create(dirname(rds_path), showWarnings = FALSE, recursive = TRUE) saveRDS(result, rds_path) result } } # year = 2013: 2009–2013 ACS, 2010 TIGER/Line boundaries tract_2013_raw <- load_or_pull( "../data/maricopa_tracts_2013.rds", function() get_acs( geography = "tract", variables = hardship_vars, state = "AZ", county = "Maricopa", year = 2013, survey = "acs5", geometry = TRUE, progress_bar = FALSE ) ) # year = 2019: 2015–2019 ACS, also 2010 TIGER/Line boundaries tract_2019_raw <- load_or_pull( "../data/maricopa_tracts_2019.rds", function() get_acs( geography = "tract", variables = hardship_vars, state = "AZ", county = "Maricopa", year = 2019, survey = "acs5", geometry = TRUE, progress_bar = FALSE ) ) ``` ```{r} #| label: step.01-pull-offline #| eval: false # ── Offline fallback — run this block instead if the Census API is down ── # Replace the get_acs() chunk above with these readRDS() calls. # All datasets are pre-built and stored in data/ for offline use. # # tract_2013_raw <- readRDS("../data/maricopa_tracts_2013.rds") # tract_2019_raw <- readRDS("../data/maricopa_tracts_2019.rds") ``` ```{r} #| label: step.01-wrangle # ── Reshape, compute rates, standardize on pooled scale ────────────────── widen_and_rate <- function(df) { df %>% select(-moe) %>% pivot_wider(names_from = variable, values_from = estimate) %>% mutate( poverty_rate = (pov_below50 + pov_50to99) / pov_den, unemp_rate = unemp_num / unemp_den ) %>% filter(!is.na(poverty_rate), !is.na(unemp_rate), !is.na(median_income)) } tract_2013_wide <- widen_and_rate(tract_2013_raw) tract_2019_wide <- widen_and_rate(tract_2019_raw) r2013 <- tract_2013_wide %>% st_drop_geometry() %>% select(GEOID, poverty_rate, unemp_rate, median_income) r2019 <- tract_2019_wide %>% st_drop_geometry() %>% select(GEOID, poverty_rate, unemp_rate, median_income) vars_to_pool <- c("poverty_rate", "unemp_rate", "median_income") # Pooled mean and SD across 2013 + 2019 combined pooled_stats <- map_dfr(vars_to_pool, function(v) { vals <- c(r2013[[v]], r2019[[v]]) tibble(variable = v, mean = mean(vals, na.rm = TRUE), sd = sd(vals, na.rm = TRUE)) }) apply_pooled_z <- function(df, stats) { for (v in stats$variable) { pm <- stats$mean[stats$variable == v] ps <- stats$sd[stats$variable == v] df[[paste0("z_", v)]] <- (df[[v]] - pm) / ps } df$z_median_income <- -1 * df$z_median_income df$hardship_index <- rowMeans( df[, c("z_poverty_rate", "z_unemp_rate", "z_median_income")], na.rm = TRUE) return(df) } r2013_z <- apply_pooled_z(r2013, pooled_stats) r2019_z <- apply_pooled_z(r2019, pooled_stats) # Join and compute change score change_df <- tract_2019_wide %>% select(GEOID, NAME) %>% left_join(r2019_z %>% select(GEOID, hardship_2019 = hardship_index), by = "GEOID") %>% inner_join(r2013_z %>% select(GEOID, hardship_2013 = hardship_index), by = "GEOID") %>% mutate(hardship_change = hardship_2019 - hardship_2013) cat("Tracts in change_df:", nrow(change_df), "\n") cat("Mean change 2013→2019:", round(mean(change_df$hardship_change), 3), "\n") ``` # Step 2: Build Spatial Weights Matrix ```{r} #| label: step.02-weights # Queen contiguity, row-standardized — same approach as Lab 4 nb <- poly2nb(change_df, queen = TRUE) w <- nb2listw(nb, style = "W", zero.policy = TRUE) cat("Weights matrix built.\n") cat("Tracts with zero neighbors:", sum(card(nb) == 0), "\n") ``` # Step 3: Run LISA on Both Periods We classify each tract into HH/LL/HL/LH/NS separately for 2013 and 2019. ```{r} #| label: step.03-lisa-2013 # ── LISA on the 2013 economic hardship index ────────────────────────────────────── lisa_2013 <- localmoran(change_df$hardship_2013, w, zero.policy = TRUE) z13 <- scale(change_df$hardship_2013)[, 1] wz13 <- lag.listw(w, z13, zero.policy = TRUE) p13 <- lisa_2013[, "Pr(z != E(Ii))"] change_df <- change_df %>% mutate( lisa_2013 = case_when( z13 > 0 & wz13 > 0 & p13 < 0.05 ~ "HH", z13 < 0 & wz13 < 0 & p13 < 0.05 ~ "LL", z13 > 0 & wz13 < 0 & p13 < 0.05 ~ "HL", z13 < 0 & wz13 > 0 & p13 < 0.05 ~ "LH", TRUE ~ "NS" ) ) cat("2013 LISA distribution:\n") print(table(change_df$lisa_2013)) ``` ```{r} #| label: step.03-lisa-2019 # ── LISA on the 2019 economic hardship index ────────────────────────────────────── lisa_2019 <- localmoran(change_df$hardship_2019, w, zero.policy = TRUE) z19 <- scale(change_df$hardship_2019)[, 1] wz19 <- lag.listw(w, z19, zero.policy = TRUE) p19 <- lisa_2019[, "Pr(z != E(Ii))"] change_df <- change_df %>% mutate( lisa_2019 = case_when( z19 > 0 & wz19 > 0 & p19 < 0.05 ~ "HH", z19 < 0 & wz19 < 0 & p19 < 0.05 ~ "LL", z19 > 0 & wz19 < 0 & p19 < 0.05 ~ "HL", z19 < 0 & wz19 > 0 & p19 < 0.05 ~ "LH", TRUE ~ "NS" ) ) cat("2019 LISA distribution:\n") print(table(change_df$lisa_2019)) ``` # Step 4: LISA Transition Matrix ```{r} #| label: step.04-transition cat("── LISA Transition Matrix (rows = 2013, columns = 2019) ──\n") trans_mat <- table(`2013` = change_df$lisa_2013, `2019` = change_df$lisa_2019) print(trans_mat) cat("\n── As row proportions (% of 2013 category that ended in each 2019 category) ──\n") print(round(prop.table(trans_mat, margin = 1) * 100, 1)) ``` # Step 5: Trajectory Classification ```{r} #| label: step.05-trajectory # ── Classify each tract into a policy-relevant trajectory ──────────────── change_df <- change_df %>% mutate( trajectory = case_when( lisa_2013 == "HH" & lisa_2019 == "HH" ~ "Persistent HH", lisa_2013 != "HH" & lisa_2019 == "HH" ~ "Emerging HH", lisa_2013 == "HH" & lisa_2019 != "HH" ~ "Dissolving HH", lisa_2013 == "LL" & lisa_2019 == "LL" ~ "Persistent LL", lisa_2013 != "LL" & lisa_2019 == "LL" ~ "Emerging LL", lisa_2013 == "HL" | lisa_2019 == "HL" ~ "HL Outlier", TRUE ~ "Stable NS" ) ) cat("── Trajectory distribution ──\n") change_df %>% st_drop_geometry() %>% count(trajectory, sort = TRUE) %>% mutate(pct = round(100 * n / sum(n), 1)) %>% print() ``` # Step 6: Trajectory Map ```{r} #| label: step.06-trajectory-map #| fig-height: 9 traj_colors <- c( "Persistent HH" = "#D7191C", "Emerging HH" = "#FDAE61", "Dissolving HH" = "#FEE08B", "Persistent LL" = "#2C7BB6", "Emerging LL" = "#ABD9E9", "HL Outlier" = "#9E5BB4", "Stable NS" = "#CCCCCC" ) traj_labels <- c( "Persistent HH" = "Persistent Hot Spot (HH→HH)", "Emerging HH" = "Emerging Hot Spot (→HH)", "Dissolving HH" = "Dissolving Hot Spot (HH→)", "Persistent LL" = "Persistent Cold Spot (LL→LL)", "Emerging LL" = "Emerging Cold Spot (→LL)", "HL Outlier" = "Spatial Outlier (HL)", "Stable NS" = "No Dominant Cluster Pattern" ) ggplot(change_df) + geom_sf(aes(fill = trajectory), color = NA) + scale_fill_manual(values = traj_colors, labels = traj_labels, name = "Trajectory\n(2013 → 2019)") + theme_void(base_size = 11) + labs( title = "Neighborhood Hardship Trajectories, 2013–2019", subtitle = "Maricopa County Census Tracts | LISA Cluster Transition Classification", caption = "LISA run separately on 2013 and 2019 pooled-standardized economic hardship index (queen contiguity, p < 0.05)" ) ``` ```{r} #| label: step.06-sidebyside #| fig-width: 16 #| fig-height: 7 # ── Side-by-side: 2013 LISA | 2019 LISA | Trajectories ─────────────────── lisa_colors <- c("HH" = "#D7191C", "LL" = "#2C7BB6", "HL" = "#FDAE61", "LH" = "#ABD9E9", "NS" = "#CCCCCC") p_l13 <- ggplot(change_df) + geom_sf(aes(fill = lisa_2013), color = NA) + scale_fill_manual(values = lisa_colors, name = "LISA") + theme_void() + labs(title = "LISA Clusters 2013") p_l19 <- ggplot(change_df) + geom_sf(aes(fill = lisa_2019), color = NA) + scale_fill_manual(values = lisa_colors, name = "LISA") + theme_void() + labs(title = "LISA Clusters 2019") p_traj <- ggplot(change_df) + geom_sf(aes(fill = trajectory), color = NA) + scale_fill_manual(values = traj_colors, name = "Trajectory") + theme_void() + labs(title = "Trajectories 2013→2019") p_l13 + p_l19 + p_traj + plot_annotation(title = "LISA Clusters and Trajectories: Post-Recession Decade", subtitle = "Maricopa County Census Tracts") ``` # Step 7: Space-Time Moran's I Is the *change score itself* spatially autocorrelated? If tracts that improved cluster together and tracts that worsened cluster together, then decline and recovery are spreading processes — not random. ```{r} #| label: step.07-moran set.seed(42) moran_chg <- moran.mc( change_df$hardship_change, w, nsim = 999, zero.policy = TRUE ) cat("── Space-Time Moran's I (applied to 2013→2019 change score) ──\n") cat("Moran's I:", round(moran_chg$statistic, 4), "\n") cat("p-value (permutation, 999 simulations):", round(moran_chg$p.value, 4), "\n") if (moran_chg$p.value < 0.05) { if (moran_chg$statistic > 0) { cat("\nInterpretation: SIGNIFICANT POSITIVE autocorrelation in change.\n") cat("Tracts that improved cluster together; tracts that worsened cluster together.\n") cat("Change is a spatially spreading process, not random.\n") } else { cat("\nInterpretation: SIGNIFICANT NEGATIVE autocorrelation.\n") cat("Improving and worsening tracts are spatially dispersed.\n") } } else { cat("\nInterpretation: NOT SIGNIFICANT.\n") cat("Change is spatially random — driven by local factors, not spatial spread.\n") } ``` ```{r} #| label: step.07-moran-plot #| fig-height: 5 # ── Moran scatterplot for the change score ──────────────────────────────── z_chg <- scale(change_df$hardship_change)[, 1] wz_chg <- lag.listw(w, z_chg, zero.policy = TRUE) ggplot(tibble(z = z_chg, wz = wz_chg, trajectory = change_df$trajectory), aes(x = z, y = wz)) + geom_hline(yintercept = 0, color = "gray70") + geom_vline(xintercept = 0, color = "gray70") + geom_point(aes(color = trajectory), alpha = 0.5, size = 1.2) + geom_smooth(method = "lm", se = FALSE, color = "#8C1D40", linewidth = 1) + scale_color_manual(values = traj_colors) + theme_minimal() + labs( title = "Moran Scatterplot: Hardship Change Scores (2013 → 2019)", subtitle = paste0("Moran's I = ", round(moran_chg$statistic, 3), " | p = ", round(moran_chg$p.value, 3)), x = "Standardized Change Score", y = "Spatial Lag of Change Score", color = "Trajectory" ) ``` # Step 8: Three-Point Trend Analysis Adding the 2012–2016 ACS (`year = 2016`) as a middle observation lets us distinguish sustained trends from endpoint noise. All three years — 2013, 2016, 2019 — use 2010 TIGER/Line tract boundaries. ```{r} #| label: step.08-pull-2016 #| results: hide # year = 2016: 2012–2016 ACS | uses 2010 TIGER/Line boundaries (same as 2013 and 2019) # Loads from shared cache if available — same load_or_pull() helper as Step 1 tract_2016_raw <- load_or_pull( "../data/maricopa_tracts_2016.rds", function() get_acs( geography = "tract", variables = hardship_vars, state = "AZ", county = "Maricopa", year = 2016, survey = "acs5", geometry = FALSE, progress_bar = FALSE ) ) ``` ```{r} #| label: step.08-pull-2016-offline #| eval: false # ── Offline fallback — run this block instead if the Census API is down ── # Replace the get_acs() chunk above with these readRDS() calls. # All datasets are pre-built and stored in data/ for offline use. # # tract_2016_raw <- readRDS("../data/maricopa_tracts_2016.rds") ``` ```{r} #| label: step.08-build-2016 # ── Apply same pooled standardization (pooled_stats from Step 1) ────────── tract_2016_wide <- widen_and_rate(tract_2016_raw %>% mutate(geometry = NULL)) r2016 <- tract_2016_wide %>% select(GEOID, poverty_rate, unemp_rate, median_income) r2016_z <- apply_pooled_z(r2016, pooled_stats) cat("2016 tracts with valid index:", nrow(r2016_z), "\n") ``` ```{r} #| label: step.08-trend # ── Join all three years; classify monotonicity ─────────────────────────── trend_df <- change_df %>% st_drop_geometry() %>% select(GEOID, hardship_2013, hardship_2019) %>% inner_join(r2016_z %>% select(GEOID, hardship_2016 = hardship_index), by = "GEOID") %>% mutate( trend_type = case_when( hardship_2013 > hardship_2016 & hardship_2016 > hardship_2019 ~ "Consistently improving", hardship_2013 < hardship_2016 & hardship_2016 < hardship_2019 ~ "Consistently worsening", hardship_2013 > hardship_2016 & hardship_2016 < hardship_2019 ~ "V-shaped (improved mid)", hardship_2013 < hardship_2016 & hardship_2016 > hardship_2019 ~ "Inverted-V (worsened mid)", TRUE ~ "Flat / non-monotone" ) ) cat("── Three-point trend distribution (2013 | 2016 | 2019) ──\n") trend_df %>% count(trend_type, sort = TRUE) %>% mutate(pct = round(100 * n / sum(n), 1)) %>% print() change_df <- change_df %>% left_join(trend_df %>% select(GEOID, trend_type), by = "GEOID") ``` ```{r} #| label: step.08-trend-map #| fig-height: 9 trend_colors <- c( "Consistently improving" = "#2166AC", "Consistently worsening" = "#D7191C", "V-shaped (improved mid)" = "#74ADD1", "Inverted-V (worsened mid)" = "#F46D43", "Flat / non-monotone" = "#CCCCCC" ) ggplot(change_df) + geom_sf(aes(fill = trend_type), color = NA) + scale_fill_manual(values = trend_colors, na.value = "#EEEEEE", name = "Trend (2013|2016|2019)") + theme_void(base_size = 11) + labs( title = "Neighborhood Hardship Trend Type (2013 | 2016 | 2019)", subtitle = "Maricopa County Census Tracts | Three-point monotonicity classification", caption = "Based on pooled-standardized economic hardship index at three ACS vintages" ) ``` --- ::: {.callout-note} ## Ready for the Assignment Head to the [Lab 6 Assignment](https://antjam-howell.github.io/paf-516-labs/lab6-assignment.html) to download `Lab6_Assignment.qmd`. :::