--- title: "Visualizing and Maintaining the Green Canopy of NYC" author: "Akashdeep Singh" --- ## Task 1: Download NYC City Council District Boundaries ```{r} #| code-fold: true #| code-summary: "Show code" #| output: false library(sf) library(tidyverse) library(httr2) get_nyc_council_districts <- function(){ # URL for NYC City Council District Boundaries url <- "https://s-media.nyc.gov/agencies/dcp/assets/files/zip/data-tools/bytes/city-council/nycc_25c.zip" # Step 1: Create data/mp03 directory if needed if(!dir.exists(file.path("data", "mp03"))){ dir.create(file.path("data", "mp03"), showWarnings=FALSE, recursive=TRUE) } # Define file paths zip_file <- file.path("data", "mp03", "nycc_25c.zip") extract_dir <- file.path("data", "mp03") # Step 2: Download zip file only if needed if(!file.exists(zip_file)){ message("Downloading NYC City Council District boundaries...") download.file(url, zip_file, mode="wb") } # Step 3: Unzip only if needed # List files in zip to find the .shp file zip_contents <- unzip(zip_file, list=TRUE) shp_files <- zip_contents$Name[grepl("\\.shp$", zip_contents$Name)] if(length(shp_files) == 0){ stop("No .shp file found in the zip archive") } shp_file <- file.path("data", "mp03", shp_files[1]) if(!file.exists(shp_file)){ message("Unzipping district boundaries...") unzip(zip_file, exdir=extract_dir) } # Step 4: Read the shapefile districts <-st_read(shp_file, quiet=TRUE) # Step 5: Transform to WGS 84 districts_wgs84 <-st_transform(districts, crs="WGS84") # Step 6: Return the transformed data return(districts_wgs84) } # Usage: nyc_districts <- get_nyc_council_districts() ``` ## Task 2: Download Tree Points ```{r} #| code-fold: true #| code-summary: "Show code" #| output: false library(httr2) library(sf) library(dplyr) library(glue) download_tree_points <- function( base_url = "https://data.cityofnewyork.us/resource/hn5i-inap.json", limit = 5000, out_dir = "data/mp03" ) { if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE) offset <- 0 chunk_id <- 1 downloaded_files <- c() repeat { out_file <- file.path(out_dir, glue("trees_{sprintf('%03d', chunk_id)}.geojson")) if (!file.exists(out_file)) { cat(glue("Downloading chunk {chunk_id} (offset = {offset})...\n")) req <- request(base_url) |> req_url_query( `$limit` = limit, `$offset` = offset ) resp <- req_perform(req) resp_body_raw(resp) |> writeBin(con = out_file) } else { cat(glue("Chunk {chunk_id} already exists — skipping.\n")) } dat <- tryCatch( st_read(out_file, quiet = TRUE), error = function(e) NULL ) if (is.null(dat) || nrow(dat) == 0) { cat("No more data returned — stopping download.\n") break } downloaded_files <- c(downloaded_files, out_file) if (nrow(dat) < limit) { cat("Final chunk downloaded — reached end of dataset.\n") break } offset <- offset + limit chunk_id <- chunk_id + 1 } # combine all files all_files <- list.files(out_dir, pattern = "trees_\\d+\\.geojson", full.names = TRUE) cat("Reading all GeoJSON files...\n") tree_data <- bind_rows(lapply(all_files, st_read, quiet = TRUE)) cat(glue("✔ Finished — total rows: {nrow(tree_data)}\n")) return(tree_data) } trees <- download_tree_points() ``` ## Task 3-Plot All Tree Points ```{r} #| code-fold: true #| code-summary: "Show code" #| output: false library(sf) library(dplyr) library(ggplot2) trees_fixed <- trees %>% st_drop_geometry() %>% filter(!is.na(longitude) & !is.na(latitude)) %>% st_as_sf(coords = c("longitude", "latitude"), crs = 4326) print(st_bbox(trees_fixed)) print(head(trees_fixed)) trees_transformed <- st_transform(trees_fixed, st_crs(nyc_districts)) ``` ```{r} #| code-fold: true #| code-summary: "Show code" ggplot() + geom_sf(data = nyc_districts, fill = "lightgray", color = "black", linewidth = 0.5) + geom_sf(data = trees_transformed[1:20000, ], color = "darkgreen", alpha = 0.3, size = 0.5) + labs(title = "NYC Street Trees by Council District", subtitle = "20,000 trees") + theme_minimal() trees_with_districts <- st_join(trees_transformed, nyc_districts, join = st_intersects) ``` ```{r} #| code-fold: true #| code-summary: "Show code" #| output: false # check districts final <- trees_with_districts %>% st_drop_geometry() %>% filter(!is.na(CounDist)) %>% group_by(CounDist) %>% summarize(count = n()) print(final) ``` ## Task 4- District-Level Analysis of Tree Coverage ```{r} #| code-fold: true #| code-summary: "Show code" #| output: false library(dplyr) library(sf) head(trees_with_districts) nrow(trees_with_districts) ``` ### Question 1: Which council district has the most trees? ```{r} #| code-fold: true #| code-summary: "Show code" q1 <- trees_with_districts %>% st_drop_geometry() %>% group_by(CounDist) %>% summarize(tree_count = n()) %>% arrange(desc(tree_count)) %>% slice(1) %>% left_join( trees_with_districts %>% st_drop_geometry() %>% select(CounDist, zip_city) %>% distinct(), by = "CounDist" ) print(q1) ``` **The District with the most trees is the 51st district, which is located in Staten Island. this makes sense as Staten Island is often considered the most suburban borough** ### Question 2: Which district has highest density of trees? ```{r} #| code-fold: true #| code-summary: "Show code" q2 <- trees_with_districts %>% st_drop_geometry() %>% group_by(CounDist, Shape_Area) %>% summarize(tree_count = n(), .groups = "drop") %>% mutate(tree_density = tree_count / Shape_Area) %>% arrange(desc(tree_density)) %>% slice(1) %>% left_join( trees_with_districts %>% st_drop_geometry() %>% select(CounDist, zip_city) %>% distinct(), by = "CounDist" ) print(q2) ``` **The district with the highest density count is district 9, in Manhattan. This is very interesting because Manhattan would seem to be the last place the highest density of trees would be in.** ### Question 3: Which district has highest fraction of dead trees? ```{r} #| code-fold: true #| code-summary: "Show code" q3 <- trees_with_districts %>% st_drop_geometry() %>% group_by(CounDist) %>% summarize( total_trees = n(), dead_trees = sum(status == "Dead", na.rm = TRUE), fraction_dead = dead_trees / total_trees ) %>% arrange(desc(fraction_dead)) %>% slice(1) %>% left_join( trees_with_districts %>% st_drop_geometry() %>% select(CounDist, zip_city) %>% distinct(), by = "CounDist" ) print(q3) ``` **The most dead trees are located in the Bronx in district 16.** ### Question 4: Most common tree species in Manhattan ```{r} trees_with_districts <- trees_with_districts %>% mutate(borough = case_when( CounDist >= 1 & CounDist <= 10 ~ "Manhattan", CounDist >= 11 & CounDist <= 18 ~ "Bronx", CounDist >= 19 & CounDist <= 32 ~ "Queens", CounDist >= 33 & CounDist <= 48 ~ "Brooklyn", CounDist >= 49 & CounDist <= 51 ~ "Staten Island", TRUE ~ NA_character_ )) q4 <- trees_with_districts %>% st_drop_geometry() %>% filter(borough == "Manhattan") %>% group_by(spc_common) %>% summarize(count = n()) %>% arrange(desc(count)) %>% slice(1) print(q4) ``` **The most common species in Manhattan is the Honeylocust.** ### Question 5: Species of tree closest to Baruch's campus ```{r} # Baruch coordinates: 40.7402 N, 73.9834 W new_st_point <- function(lat, lon, ...){ st_sfc(st_point(c(lon, lat))) |> st_set_crs("WGS84") } baruch_point <- new_st_point(40.7402, -73.9834) baruch_point <- st_transform(baruch_point, st_crs(trees_with_districts)) q5 <- trees_with_districts %>% mutate(distance = st_distance(geometry, baruch_point)) %>% arrange(distance) %>% slice(1) %>% select(spc_common, distance, address) print(q5) ``` **The closest species of tree that is near Baruch College, is the Callery pear.** ## Task 5-NYC Parks Proposal **Park Proposal:** We propose a comprehensive tree health initiative for Manhattan's District 2 to address the district's critical tree mortality crisis. This program will **plant 500 new street trees** and **remove all dead trees**, with priority given to under-served corridors east of Second Avenue where tree coverage falls significantly below city standards. The initiative will focus on diversifying species composition by introducing climate-resilient varieties such as London Planetree and Red Oak, while reducing dependence on over represented species like Honeylocust. District 2 faces a tree health emergency that demands immediate intervention. Our analysis reveals that District 2 has one of the highest percentages of dead trees among comparable Lower Manhattan districts. Furthermore, District 2's tree density falls below neighboring districts despite having among the highest pedestrian traffic volumes. The eastern corridor particularly suffers, with streets having significantly fewer trees per block compared to recommended standards of 12-15 trees per block. ```{r} #| code-fold: true #| code-summary: "Show code" #| echo: false library(sf); library(dplyr); library(ggplot2); library(patchwork) my_district <- 2 district_stats <- trees_with_districts %>% st_drop_geometry() %>% filter(CounDist %in% c(1, 2, 3, 4), !is.na(CounDist)) %>% group_by(CounDist) %>% summarize(total_trees = n(), dead_trees = sum(status == "Dead", na.rm = TRUE), pct_dead = (dead_trees/total_trees)*100, area = first(Shape_Area)) %>% mutate(highlight = ifelse(CounDist == 2, "District 2", "Other"), trees_per_sqmi = (total_trees/area)*27878400) d2 <- district_stats %>% filter(CounDist == 2) cat(sprintf("Scope: %d dead trees removed + 500 new plantings = %d total trees\n", d2$dead_trees, d2$dead_trees + 500)) ``` ```{r} #| code-fold: true #| code-summary: "Show code" #| echo: false #| fig-width: 10 #| fig-height: 2.5 p1 <- ggplot(district_stats, aes(x=factor(CounDist), y=pct_dead, fill=highlight)) + geom_col(width=0.6) + geom_text(aes(label=sprintf("%.1f%%", pct_dead)), vjust=-0.5, size=3.5, fontface="bold") + scale_fill_manual(values=c("District 2"="#dc2626", "Other"="#10b981")) + labs(title="Highest Dead Tree %", x="District", y="% Dead") + theme_minimal(base_size=9) + theme(legend.position="none", plot.title=element_text(face="bold", size=10)) p2 <- ggplot(district_stats, aes(x=factor(CounDist), y=trees_per_sqmi, fill=highlight)) + geom_col(width=0.6) + geom_hline(yintercept=mean(district_stats$trees_per_sqmi), linetype="dashed", color="gray40") + geom_text(aes(label=format(round(trees_per_sqmi), big.mark=",")), vjust=-0.5, size=3.5, fontface="bold") + scale_fill_manual(values=c("District 2"="#dc2626", "Other"="#10b981")) + labs(title="Below-Avg Density", x="District", y="Trees/sq mi") + theme_minimal(base_size=9) + theme(legend.position="none", plot.title=element_text(face="bold", size=10)) p1 p2 ``` Our district requires urgent intervention. **Tree Mortality Crisis:** District 2 has the highest rate of dead trees among comparable districts. ```{r} district2_trees <- trees_with_districts %>% filter(CounDist==2) %>% mutate(tree_status=case_when(status=="Dead"~"Dead", health=="Poor"~"Poor", TRUE~"Healthy")) district2_boundary <- nyc_districts %>% filter(CounDist==2) m1 <- ggplot() + geom_sf(data=district2_boundary, fill="gray95", color="black", linewidth=0.8) + geom_sf(data=district2_trees, aes(color=tree_status), size=0.5, alpha=0.6) + scale_color_manual(values=c("Healthy"="#10b981","Poor"="#fbbf24","Dead"="#dc2626"), name="") + labs(title="District 2 Tree Health") + theme_void(base_size=9) + theme(plot.title=element_text(face="bold", hjust=0.5, size=10), legend.position="right") m1 ``` The zoomed map reveals that tree health problems are concentrated in the eastern corridor of the district, particularly east of Second Avenue, where lower-income residents face the worst conditions. By removing 300+ dead trees and planting 500 new climate-resilient species, this program will restore District 2's urban canopy to be more in line with neighboring districts, improve air quality, reduce urban heat effects, and eliminate safety hazards. The side-by-side comparison with District 1 demonstrates the stark disparity in tree health that this initiative will address.