library(glue) ; g=glue ; len=length
library(magrittr)
library(purrr)
library(tidyr)
library(dplyr)
library(data.table)
library(stringr)
library(rdist)
library(ggplot2)
library(cowplot)
# library(ggrastr)
# library(rhdf5)
library(viridisLite)
library(viridis)
library(qpdf)

### Helper methods #############################################################

gdraw <- function(text, s=14) {cowplot::ggdraw()+cowplot::draw_label(text, size=s)}
plot.tab <- function(df) {return(cowplot::plot_grid(gridExtra::tableGrob(df, rows=NULL)))}
dec2pct <- function(x, d=2) {return(paste0(round(x*100, digits=d), "%"))}
add.commas <- function(num) {prettyNum(num, big.mark=",")}
h_index <- function(vec) {
  return(data.table(x=vec)[,.(n=.N),x][order(-x), max(pmin(x, cumsum(n)), default=0)])
}
make.pdf <- function(plots, name, w, h) {
  if (any(c("gg", "ggplot", "Heatmap") %in% class(plots))) {plots = list(plots)}
  pdf(file=name, width=w, height=h)
  lapply(plots, print)
  dev.off()
}

remap_10X_CB <- function(vec) {
  basemap <- setNames(c("AG","TC","CA","GT"), c("TC","AG","GT","CA"))
  return(paste0(substr(vec,1,7), basemap[substr(vec,8,9)], substr(vec,10,16)))
}

determine_remap_10X_CB <- function(vec, dt) {
  if (class(vec) != "character") {
    return(FALSE)
  } else if (!all(nchar(vec) == 16)) {
    return(FALSE)
  } else if (!all(unique(substr(vec,8,9)) %in% c("TC","AG","GT","CA"))) {
    return(FALSE)
  }
  
  # Determine which mapping has more reads
  reads_noremap <- dt[cb %in% cb_whitelist, sum(reads)]
  reads_remap <- dt[cb %in% remap_10X_CB(cb_whitelist), sum(reads)]
  return(reads_remap > reads_noremap)
}

listHD1neighbors <- function(input_string) {
  bases <- c('A','C','G','T','N')
  lapply(1:nchar(input_string), function(i){
    paste0(substr(input_string, 1, i-1),
           setdiff(bases, substr(input_string, i, i)),
           substr(input_string, i+1, nchar(input_string)))
  }) %>% list_c
}

################################################################################
### Loading methods ############################################################
################################################################################

ReadOptimus <- function(matrix_path) {
  fetch <- function(x){return(rhdf5::h5read(matrix_path, x))}
  stopifnot("Unrecognized .h5ad matrix" = c("data","indices","indptr") %in% rhdf5::h5ls(matrix_path)$name)
  
  data <- fetch("X/data") %>% as.numeric
  indices <- fetch("X/indices") %>% as.numeric
  indptr <- fetch("X/indptr") %>% as.numeric
  
  row_names <- fetch("var/_index") %>% as.character %>% make.unique # var/Gene and var/gene_names were too short
  col_names <- fetch("obs/_index") %>% as.character %>% make.unique # obs/CellID and obs/cell_names were the same
  star_IsCell <- fetch("obs/star_IsCell") %>% as.logical
  
  mat <- Matrix::sparseMatrix(
    j = indices,
    p = indptr,
    x = data,
    dimnames = list(row_names, col_names),
    index1 = F,
    check = T
  )
  
  stopifnot(ncol(mat) == len(star_IsCell))
  return(mat[,star_IsCell])
}


ReadIntronic <- function(intronic_path, cb_list) {
  if (!file.exists(intronic_path)) {
    return(rep(NA, length(cb_list)))
  }
  
  fields <- rhdf5::h5ls(intronic_path)$name
  fetch <- function(x){return(rhdf5::h5read(intronic_path, x))}
  
  if (all(c("barcodes", "barcode_idx", "umi_type") %in% fields)) { # 10X
    barcodes = fetch("barcodes")
    stopifnot(!any(duplicated(barcodes)), cb_list %in% barcodes)
    dt <- data.table(barcode_idx=fetch("barcode_idx")+1,
                     umi_type=fetch("umi_type"))
    pct.intronic <- dt[, .(total_umi=.N, intronic_umi=sum(umi_type==0)), barcode_idx][cb_list %>% match(barcodes) %>% match(barcode_idx), intronic_umi/total_umi*100]
  } else if (all(c("reads_mapped_intronic", "reads_mapped_exonic") %in% fields)) { # Optimus
    dt <- data.table(barcode = fetch("/obs/CellID"),
                     intronic = fetch("/obs/reads_mapped_intronic"),
                     exonic = fetch("/obs/reads_mapped_exonic"))
    stopifnot(!any(duplicated(dt[,barcode])), cb_list %in% dt[,barcode])
    pct.intronic <- dt[, .(pct.intronic=intronic/(intronic+exonic)*100), keyby=barcode][cb_list, pct.intronic]
  } else {
    print("WARNING: no intronic information found in the .h5")
    pct.intronic <- rep(NA, length(cb_list))
  }
  return(pct.intronic)
}


ReadLibraryMetrics <- function(metrics_path) {
  if (!file.exists(metrics_path)) {
    return(data.frame())
  }
  
  df = read.table(metrics_path, header=F, sep=",", comment.char="")
  if (ncol(df) > nrow(df)) { df %<>% t } # 10X
  df %<>% as.data.frame
  rownames(df) <- NULL
  colnames(df) <- NULL
  
  if (is.numeric(df[,2])) {
    df[,2] %<>% {case_when(
      . > 0 & . < 1 ~ dec2pct(.),
      . > 1 ~ add.commas(.),
      TRUE ~ as.character(.)
    )}
  }
  
  return(df)
}


ReadSpatialMatrix <- function(f) {
  cb_list <- f("lists/cb_list")
  sb_list <- f("lists/sb_list")
  
  dt <- data.table(cb = f("matrix/cb_index") %>% as.integer %>% factor(levels=seq_along(cb_list), labels=cb_list),
                   umi = f("matrix/umi") %>% as.integer,
                   sb = f("matrix/sb_index") %>% as.integer %>% factor(levels=seq_along(sb_list), labels=sb_list),
                   reads = f("matrix/reads") %>% as.integer)
  
  return(dt)
}


ReadSpatialMetadata <- function(f) {
  metadata = list(SB_info = list(R1s=f("lists/R1_list"),
                                 R2s=f("lists/R2_list"),
                                 pucks=f("lists/puck_list"),
                                 UMI_downsampling=f("metadata/downsampling"),
                                 switch_R1R2=f("metadata/switch") %>% as.logical,
                                 bead_type=f("metadata/bead")),
                  UMI_filtering = setNames(f("metadata/UMI/count"), f("metadata/UMI/type")),
                  UP_matching = setNames(f("metadata/UP/count"), f("metadata/UP/type")),
                  SB_matching = setNames(f("metadata/SB/count"), f("metadata/SB/type")),
                  SB_fuzzy_position = setNames(f("metadata/SB_HD/count"), f("metadata/SB_HD/type")) %>% {.[order(as.integer(names(.)))]},
                  puck_info = list(num_lowQ=f("metadata/num_lowQbeads"))
  )
  metadata$SB_filtering = c(reads_total=f("metadata/reads"),
                            reads_tooshort=f("metadata/R1_tooshort")+f("metadata/R2_tooshort"),
                            reads_noumi=sum(metadata$UMI[c("N","homopolymer")]),
                            reads_noup=sum(metadata$UP[c("none","GG")]),
                            reads_nosb=sum(metadata$SB_matching[c("none","HD1ambig")]))
  return(metadata)
}


ReadPuck <- function(f) {
  # Read matrix from .h5 file
  puckdf <- data.table(sb=f("puck/sb") %>% as.character,
                       x=f("puck/x") %>% as.numeric,
                       y=f("puck/y") %>% as.numeric,
                       puck_index=f("puck/puck_index") %>% as.integer)
  
  # Add degeneracy statistics
  most_character_count <- function(vec) {
    stopifnot(typeof(vec) == "character")
    degenA = str_count(vec, "A")
    degenC = str_count(vec, "C")
    degenG = str_count(vec, "G")
    degenT = str_count(vec, "T")
    degenN = str_count(vec, "N")
    return(pmax.int(degenA, degenC, degenG, degenT, degenN))
  }
  puckdf[, mc := most_character_count(sb)]
  
  longest_run_length <- function(vec) {
    stopifnot(typeof(vec) == "character", nchar(vec) > 0)
    ret = stringr::str_extract_all(vec, "(.)\\1*")
    return(map_int(ret, ~max(nchar(.))))
  }
  puckdf[, lr := longest_run_length(sb)]
  
  sb_len = max(nchar(puckdf$sb))
  mc_tol = round(sb_len*4/5)
  lr_tol = round(sb_len*2/3)
  
  dups = unique(puckdf$sb[duplicated(puckdf$sb)])
  Ns = puckdf[grepl("N", sb), sb]
  
  # split into pucks
  puckdfs <- split(puckdf, by = "puck_index", sorted = TRUE)
  for (puck in puckdfs) {puck[, puck_index := NULL]}
  
  # save metadata
  puckmeta <- list()
  puckmeta$puck_name = as.character(f("lists/puck_list")) %>% unname
  puckmeta$num_beads = map_int(puckdfs, nrow) %>% unname
  puckmeta$num_dup = map_int(puckdfs, ~.[sb %in% dups, .N]) %>% unname
  puckmeta$num_N = map_int(puckdfs, ~.[sb %in% Ns, .N]) %>% unname
  puckmeta$num_degen = map_int(puckdfs, ~.[mc > mc_tol | lr > lr_tol, .N]) %>% unname
  
  # remove duplicated or low-quality beads
  puckdfs %<>% map(~.[!sb %in% dups])
  puckdfs %<>% map(~.[!sb %in% Ns])
  puckdfs %<>% map(~.[mc <= mc_tol & lr <= lr_tol])
  for (puck in puckdfs) {puck[, mc := NULL]}
  for (puck in puckdfs) {puck[, lr := NULL]}
  
  # for multiplexed pucks, stack along "x" to prevent overlapping
  if (len(puckdfs) > 1) {
    maxs <- map_dbl(puckdfs, ~max(.$x, na.rm=TRUE)) %>% unname
    mins <- map_dbl(puckdfs, ~min(.$x, na.rm=TRUE)) %>% unname
    starts <- cumsum(maxs-mins) %>% lag(1, 0)
    puckdfs %>% map2(starts, ~.x[, x := x-min(x)+.y])
  } else {
    starts <- min(puckdfs[[1]]$x)
  }
  
  puckdf <- rbindlist(puckdfs)
  stopifnot(!any(duplicated(puckdf[, sb])))
  puckmeta$puck_boundaries = c(starts, max(puckdf$x))
  puckdf[, sb := factor(sb)]
  
  return(list(puckdf, puckmeta))
}

################################################################################
### RNA plots ##################################################################
################################################################################

# Page 1: 10X/Optimus RNA metrics
plot_metrics_csv <- function(df) {
  if (nrow(df) == 0) {
    return(gdraw("No metrics_summary.csv found"))
  }
  
  hide_cols <- c("NHashID","keeper_cells","percent_keeper","percent_target","percent_usable","keeper_mean_reads_per_cell","keeper_median_genes","total_genes_unique_detected")
  plot <- plot_grid(ggdraw()+draw_label(""),
                    ggdraw()+draw_label(g("Metrics Summary")),
                    plot.tab(df[!df[,1] %in% hide_cols,]),
                    ggdraw()+draw_label(""),
                    ncol=1, rel_heights=c(0.1,0.1,0.7,0.2))
  return(plot)
}


# Page 2: Cell calling
plot_cellcalling <- function(intronic_path, cb_list, ct = 200) {
  # Check if the supplementary .h5 exists
  if (!file.exists(intronic_path) || nchar(intronic_path) == 0) {
    plot <- gdraw("No supplementary .h5 found")
    return(plot)
  }
  
  # Load the supplementary .h5
  fields <- rhdf5::h5ls(intronic_path)$name
  fetch <- function(x){return(rhdf5::h5read(intronic_path, x))}
  if (all(c("barcodes", "barcode_idx", "umi_type") %in% fields)) { # 10X
    dt <- data.table(barcode_idx=fetch("barcode_idx")+1,
                     umi_type=fetch("umi_type")
                    )[,.(umi=.N, pct.intronic=sum(umi_type==0)/.N*100), barcode_idx]
    dt[, called := barcode_idx %in% match(cb_list, fetch("barcodes"))]
    dt[, barcode_idx := NULL]
    dt2 <- data.table(x=fetch("count"))[, .(N=.N), x]
  } else if (all(c("reads_mapped_intronic", "reads_mapped_exonic") %in% fields)) { # Optimus
    dt <- data.table(barcode = fetch("/obs/CellID"),
                     umi = data.table(data=fetch("X/data"),indices=fetch("X/indices"))[,.(umi=sum(data)),indices][order(indices), umi],
                     pct.intronic = fetch("/obs/reads_mapped_intronic") %>% {./(.+fetch("/obs/reads_mapped_exonic"))*100}
                    )
    dt[, called := barcode %in% cb_list]
    dt[, barcode := NULL]
    dt2 <- data.table(x=c(), N=c())
  } else {
    plot <- gdraw("Unrecognized supplementary .h5 file")
    return(plot)
  }
  # dt: umi pct.intronic called
  # dt2: x N
  
  # Panel 1: cell barcode rank plot
  plotdf <- dt[order(-umi), .(umi,called)]
  plotdf[, index := .I]
  plotdf <- plotdf[plotdf$umi != lag(plotdf$umi,1,0) | plotdf$umi != lead(plotdf$umi,1,0)]
  p1 <- ggplot(plotdf, aes(x=index,y=umi,col=called)) + geom_line() + 
    theme_bw() + scale_x_log10() + scale_y_log10() +
    ggtitle("Cell barcode rank plot") + xlab("Barcode rank") + ylab("UMI count") +
    theme(legend.position = "inside",
          legend.position.inside = c(0.05, 0.05),
          legend.justification.inside = c("left", "bottom"),
          legend.background = element_blank(),
          legend.spacing.y = unit(0.1, "lines"))
  
  # Panel 2: cell calling cloud
  if (is.null(dt$pct.intronic) || all(dt$pct.intronic==0)) {
    p2 <- gdraw(g("No intronic information"))
  } else if (all(dt$umi < ct)) {
    p2 <- gdraw(g("No cells with {ct}+ UMI"))
  } else {
    p2 <- dt[umi >= ct] %>% ggplot(aes(x=log10(umi+1), y=pct.intronic, color=called)) + 
      ggrastr::rasterize(geom_point(alpha = 0.1, size = 0.5), dpi = 300) +
      theme_minimal() +
      labs(title = g("Cell calling"), x = "log10(UMI)", y = "%Intronic") +
      theme(legend.position = "inside",
            legend.position.inside = c(0.95, 0.05),
            legend.justification.inside = c("right", "bottom"),
            legend.background = element_blank(),
            legend.spacing.y = unit(0.1,"lines"))
  }
  
  # Panel 3: Read per UMI histogram
  if (nrow(dt2) > 0) {
    dt2[, x := pmin(x, 10)]
    dt2 <- dt2[, .(N=sum(N)), x][order(x)]
    p3 <- ggplot(dt2, aes(x=as.factor(x),y=N)) + geom_col() +
      scale_x_discrete(breaks=min(dt2$x):max(dt2$x), labels=(min(dt2$x):max(dt2$x)) %>% {ifelse(.==10, "10+", .)}) +
      xlab("Reads per UMI") + ylab("UMI Count") + ggtitle("Reads per UMI distribution") +
      theme_minimal()
  } else {
    p3 <- gdraw("No Read per UMI data")
  }
  
  # Panel 4: Intronic density
  max_density_x = dt[called==T, pct.intronic] %>% density %>% {.$x[which.max(.$y)]}
  p4 <- dt[umi>=ct] %>% ggplot(aes(x=pct.intronic, color=called)) + geom_density() + 
    labs(title = g("Intronic density (UMI>={ct})"), x = "%Intronic", y = "Density") + 
    geom_vline(xintercept=max_density_x, color="red", linetype="dashed") +
    annotate(geom='text', label=round(max_density_x, 2), x=max_density_x-1, y=Inf, hjust=1, vjust=1, col="red") +
    theme_minimal() +
    theme(legend.position = "inside",
          legend.position.inside = c(0.05, 0.95),
          legend.justification.inside = c("left", "top"),
          legend.background = element_blank(),
          legend.spacing.y = unit(0.1,"lines"))
  
  plot <- plot_grid(p1, p2, p3, p4, ncol=2)
  return(plot)
}


# Page 3: UMAP + QC
plot_umaps <- function(obj) {
  mytheme <- function(){theme(plot.title=element_text(hjust=0.5), axis.title.x=element_blank(), axis.title.y=element_blank(), legend.position="top", legend.justification="center", legend.key.width=unit(2, "lines"))}
  p1 <- DimPlot(obj, label=T) + ggtitle(g("UMAP")) + mytheme() + NoLegend()  + coord_fixed(ratio=1)
  p2 <- VlnPlot(obj,"logumi", alpha=0) + mytheme() + NoLegend() 
  p3 <- FeaturePlot(obj,"percent.mt") + ggtitle("%MT") + mytheme() + coord_fixed(ratio=1) + 
    annotate("text", x = Inf, y = Inf, label = g("Median: {round(median(obj$percent.mt), 2)}%\nMean: {round(mean(obj$percent.mt), 2)}%"), hjust=1, vjust=1, size=2.5, color="black")
  if ("pct.intronic" %in% names(obj@meta.data)) {
    p4 <- FeaturePlot(obj, "pct.intronic") + ggtitle("%Intronic") + mytheme() + coord_fixed(ratio=1) +
      annotate("text", x = Inf, y = Inf, label = g("Median: {round(median(obj$pct.intronic), 2)}%\nMean: {round(mean(obj$pct.intronic), 2)}%"), hjust=1, vjust=1, size=2.5, color="black")
  } else {
    p4 <- gdraw("No intronic information")
  }
  plot <- plot_grid(p1, p2, p3, p4, ncol=2)
  return(plot)
}

################################################################################
### SB plots ###################################################################
################################################################################

# Page 4: Spatial library
plot_SBlibrary <- function(dt, f) {
  sb_pct_in_called_cells <- round(nrow(dt[!is.na(cb)])/nrow(dt) * 100, digits=2) %>% paste0("%")
  
  # Panel 1: Spatial barcodes per cell
  cb.data <- rbindlist(list(dt[is.na(cb), .N, cb_raw][,.(N)], dt[!is.na(cb), .N, cb][,.(N)]))[order(-N)]
  cb.data[, index := .I]
  cb.data <- cb.data[N != lag(N,1,0) | N != lead(N,1,0)]
  cb.data[, filter := "all cell barcodes"]
  
  cb.data2 <- dt[!is.na(cb), .N, cb][,.(N)][order(-N)]
  cb.data2[, index := .I]
  cb.data2 <- cb.data2[N != lag(N,1,0) | N != lead(N,1,0)]
  cb.data2[, filter := "called cell barcodes only"]
  
  p1 <- ggplot(mapping=aes(x=index, y=N, col=filter)) + 
    geom_line(data=cb.data) + geom_line(data=cb.data2) +
    scale_x_log10() + scale_y_log10() + theme_bw() + 
    ggtitle("SB UMI per cell") + ylab("SB UMI counts") + xlab("Cell barcode rank") +
    theme(legend.position = "inside",
          legend.position.inside = c(0.05, 0.05),
          legend.justification.inside = c("left", "bottom"),
          legend.background = element_blank(),
          legend.spacing.y = unit(0.1,"lines"),
          legend.title=element_blank()) +
    annotate("text", x = Inf, y = Inf, label = g("SB UMI in called cells: {sb_pct_in_called_cells}"), hjust = 1.02, vjust = 1.33)
  rm(cb.data, cb.data2) ; invisible(gc())
  
  # Panel 2: Spatial barcodes per bead
  sb.data <- dt[, .N, sb][, .(N)][order(-N)]
  sb.data[, index := .I]
  sb.data <- sb.data[N != lag(N,1,0) | N != lead(N,1,0)]
  sb.data[, filter := "all cell barcodes"]
  
  sb.data2 <- dt[!is.na(cb), .N, sb][, .(N)][order(-N)]
  sb.data2[, index := .I]
  sb.data2 <- sb.data2[N != lag(N,1,0) | N != lead(N,1,0)]
  sb.data2[, filter := "called cell barcodes only"]
  
  p2 <- ggplot(mapping=aes(x=index, y=N, col=filter)) + 
    geom_line(data=sb.data) + geom_line(data=sb.data2) +
    scale_x_log10() + scale_y_log10() + theme_bw() + 
    ggtitle("SB UMI per bead") + ylab("SB UMI counts") + xlab("Beads") +
    theme(legend.position = "inside",
          legend.position.inside = c(0.05, 0.05),
          legend.justification.inside = c("left", "bottom"),
          legend.background = element_blank(),
          legend.spacing.y = unit(0.1,"lines"),
          legend.title=element_blank())
  rm(sb.data, sb.data2) ; invisible(gc())
  
  # Panel 3: Spatial barcode library downsampling curve
  p3 <- data.frame(x = seq(0, 1, 0.05) * f("metadata/reads")/1000000,
                   y = f("metadata/downsampling")/1000000) %>% 
        ggplot(aes(x=x,y=y)) + geom_point() + theme_bw() + 
        xlab("Millions of SB reads") + ylab("Millions of filtered SB UMIs") + ggtitle("SB downsampling curve")
  
  # Panel 4: Reads per UMI distribution
  d <- dt[, .N, reads]
  d[, reads := pmin(reads, 10)]
  d <- d[, .(N = sum(N)), reads][order(reads)]
  
  total_reads = add.commas(f('metadata/reads'))
  sequencing_saturation = round((1 - nrow(dt) / sum(dt$reads)) * 100, 2) %>% paste0("%")
  
  p4 <- ggplot(d, aes(x=reads, y=N/1000/1000)) + geom_col() +
    theme_bw() + xlab("Reads per UMI") + ylab("Millions of filtered SB UMIs") + ggtitle("Reads per UMI") + 
    annotate("text", x = Inf, y = Inf, label = g("sequencing saturation = {sequencing_saturation}\ntotal reads = {total_reads}"), hjust = 1.02, vjust = 1.33) +
    scale_x_continuous(breaks=min(d$reads):max(d$reads), labels=(min(d$reads):max(d$reads)) %>% {ifelse(.==10, "10+", .)})
  
  return(plot_grid(p1, p2, p3, p4, ncol=2))
}


# Page 5: Bead plots
beadplot <- function(sb.data) {
  ggplot(sb.data, aes(x=x, y=y, col=umi)) +
    ggrastr::rasterize(geom_point(size=0.1, shape=16), dpi=200) +
    coord_fixed(ratio=1) +
    theme_classic() +
    labs(x="x", y="y") +
    scale_color_viridis(trans="log", option="B", name="UMI") + 
    ggtitle(g("SB UMI per bead"))
}
plot_SBplot <- function(dt, puckdf) {
  p1 <- merge(dt[, .(umi=.N), sb], puckdf, by = "sb")[order(umi)] %>% 
    beadplot() + ggtitle(g("SB UMI per bead (total)"))
  
  p2 <- merge(dt[!is.na(cb), .(umi=.N), sb], puckdf, by = "sb")[order(umi)] %>% 
    beadplot() + ggtitle(g("SB UMI per bead (called cells only)"))

  return(plot_grid(p1, p2, ncol=1))
}


# Page 6: Spatial library metrics
plot_SBmetrics <- function(metadata) {
  
  # Panel A1
  plot.df = list(
    c("Total Reads", metadata$SB_filtering[["reads_total"]] %>% add.commas),
    c("Final UMIs", metadata$SB_filtering[["UMIs_final"]] %>% add.commas),
    c("R1 <-> R2", metadata$SB_info$switch_R1R2),
    c("Remap CB", metadata$SB_info$remap_10X_CB),
    c("Bead type", metadata$SB_info$bead_type)
  ) %>% {do.call(rbind,.)} %>% as.data.frame %>% setNames(c("Metric", "Value"))
  A1 <- plot_grid(gdraw("Library information", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1, 0.6))
  
  # Panel A2
  header = c("Metric", metadata$puck_info$puck_name %>% str_remove("^Puck_") %>% str_remove("\\.csv$"))
  plot.df = list(c("Beads total", metadata$puck_info$num_beads %>% add.commas),
                 c("Beads removed", Reduce(`+`,metadata$puck_info[c("num_dup","num_N","num_degen","num_lowQ")]) %>% add.commas),
                 # c("Size", metadata$puck_info$puck_sizes),
                 c("Diameter", metadata$puck_info$puck_boundaries %>% {map2_dbl(head(.,-1), tail(.,-1), ~round(.y-.x, 2))}),
                 #c("Scaling factor", metadata$puck_info$scaling_factors %>% round(2)),
                 c("Final UMIs", metadata$puck_info$umi_final %>% add.commas)
  ) %>% {do.call(rbind,.)} %>% as.data.frame %>% setNames(header)
  A2 <- plot_grid(gdraw("Puck information", 13), plot.tab(plot.df), gdraw(""), ncol=1, rel_heights=c(0.1 ,0.5, 0.1))
  
  # Panel B1
  plot.df = data.frame(a=c("exact", "fuzzy", "GG", "none"),
                       b=metadata$UP_matching[c("exact", "fuzzy", "GG", "none")]) %>%
                       mutate(b=(b/sum(b)*100) %>% round(2) %>% paste0("%")) %>% unname
  B1 <- plot_grid(gdraw("UP matching", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1, 0.4))
  
  # Panel B2
  plot.df = data.frame(a=c("exact","fuzzy","ambig","none"),
                       b=metadata$SB_matching[c("exact","HD1","HD1ambig","none")] %>% {./sum(.)*100} %>% round(2) %>% unname %>% paste0("%")) %>% unname
  B2 <- plot_grid(gdraw("SB matching", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1, 0.4))
  
  # Panel B3
  plot.df = data.frame(a=c("exact","fuzzy","ambig","none"),
                       b=metadata$CB_matching[c("exact","fuzzy","ambig","none")] %>% {./sum(.)*100} %>% round(2) %>% unname %>% paste0("%")) %>% unname
  B3 <- plot_grid(gdraw("CB matching", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1, 0.4))
  
  # Panel C1
  plot.df = data.frame(a=c("LQ UMI", "No UP", "No SB", "LQ SB", "Uncalled CB", "Chimeric"),
                       b=metadata$SB_filtering[c("reads_noumi", "reads_noup", "reads_nosb", "reads_lqsb", "reads_nocb", "reads_chimeric")] %>% unname) %>% 
            setNames(c("Filter","Percent"))
  plot.df$Percent = as.numeric(plot.df$Percent) / (metadata$SB_filtering[["reads_total"]] - lag(cumsum(plot.df$Percent),1,0))
  plot.df$Percent = round(plot.df$Percent * 100, 2) %>% paste0("%")
  C1 <- plot_grid(gdraw("SB filtering", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1,0.7))
  
  # Panel C2
  if (metadata$SB_info$bead_type %in% c("V10","V17","V15","V16")) {
    plot.df = metadata$SB_fuzzy_position %>% {data.frame(pos=as.numeric(names(.)),count=as.numeric(unname(.)))}
    switch(metadata$SB_info$bead_type, 
           "V10"={u1=9  ; u2=26},
           "V17"={u1=10 ; u2=27},
           "V15"={u1=16 ; u2=25},
           "V16"={u1=18 ; u2=27}
    )
    plot.df$pos[plot.df$pos>=u1] %<>% add(u2-u1+1)
    C2 <- ggplot(plot.df, aes(x=pos,y=count)) + geom_col() + theme_bw() +
      geom_rect(aes(xmin=u1, xmax=u2, ymin=-Inf, ymax=Inf), fill="grey") +
      annotate("text", x=(u1+u2)/2, y=max(plot.df$count, na.rm=T)*0.1, label="UP Site", color="black") +
      xlab("Spatial barcode base position") + ylab("Fuzzy matches") + ggtitle("Location of spatial barcode fuzzy match")
  } else {
    C2 <- gdraw("Unrecognized bead type")
  }

  # Panel D
  plot.df <- data.frame(R1s=metadata$SB_info$R1s %>% basename,
                        R2s=metadata$SB_info$R1s %>% basename)
  if (nrow(plot.df) > 4) {
    plot.df <- rbind(plot.df[1:3,], data.frame(R1s=g("({nrow(plot.df)-3} more)"), R2s=g("({nrow(plot.df)-3} more)")))
  }
  D1 <- plot_grid(gdraw("Spatial library FASTQs", 13), plot.tab(plot.df), ncol=1, rel_heights=c(0.1, 0.5))
  
  plot <- plot_grid(
    gdraw("Spatial library metadata", 15),
    plot_grid(A1, A2, ncol=2, rel_widths = c(0.38, 0.62)),
    plot_grid(B1, B2, B3, ncol=3),
    plot_grid(C1, C2, ncol=2, rel_widths = c(0.35, 0.65)),
    D1,
    ncol=1,
    rel_heights = c(0.1*1.1, 0.7, 0.5, 0.8, 0.6)
  )
  
  return(plot)
}

################################################################################
### DBSCAN plots ###############################################################
################################################################################

plot_gdbscan_opt <- function(coords_global, mranges) {
  # Panel 1: DBSCAN cluster distribution
  d <- coords_global[, .(pct=.N/len(cb_whitelist)*100), pmin(clusters, 5)]
  d[, pmin := factor(pmin, levels=0:5, labels=c(0:4, "5+"))]
  p1 <- ggplot(d, aes(x=pmin, y=pct)) + geom_col() +
               geom_text(aes(label=sprintf("%1.0f%%",pct), y=pct), vjust=-0.5) +
               theme_classic() +
               scale_y_continuous(limits=c(0,100)) +
               labs(x="DBSCAN clusters", y="Percent", title="Cluster distribution")
  
  # Panel 2: minPts search
  d <- IRanges::IRanges(start=mranges$i2+1L, end=mranges$i1) %>% IRanges::coverage() %>% as.integer %>% {data.table(i=seq_along(.), p=.)}
  i_opt <- d[p==max(p), tail(i, 1)]
  minPts <- i_opt * ms
  pct.placed <- d[, round(max(p)/len(cb_whitelist)*100, 2)]
  d <- d[i >= 2 & i <= max(20, i_opt*2)]
  p2 <- ggplot(d, aes(x=i*ms, y=p/len(cb_whitelist)*100)) + geom_line() +
               geom_vline(xintercept=minPts, color="red", linetype="dashed") +
               annotate(geom='text',
                        label=g("minPts: {minPts}\nplaced: {pct.placed}%\neps: {round(eps,2)} | k: {k}"),
                        x=minPts+ms, y=0,
                        hjust=0, vjust=0, col="red") + 
               theme_bw() +
               labs(x="minPts", y="%Placed", title="Parameter optimization")
  
  # Panel 3: SB UMI distribution
  p3 <- ggplot(coords_global, aes(x=factor(pmin(clusters,5), levels=0:5, labels=c(0:4,"5+")), y=log10(umi))) + 
               geom_violin(scale="count") + 
               theme_classic() +
               labs(x="DBSCAN clusters", y="log10 SB UMI", title="SB UMI distribution") +
               annotate("text", x=Inf, y=-Inf,
                        label=g("Mean SB UMI per cell: {round(coords_global[,sum(umi)/len(cb_whitelist)],1)}"), 
                        hjust=1.05, vjust=-0.75, size=3)
  
  # Panel 4: Start vs. End
  p4 <- ggplot() + geom_density(data=mranges, aes(x=log10((i2+1L)*ms), color=factor("min",levels=c("min","max")))) +
                   geom_density(data=mranges, aes(x=log10(i1*ms), color=factor("max",levels=c("min","max")))) + 
                   geom_vline(xintercept=log10(minPts), color="blue", linetype="dashed") + 
                   theme_bw() +
                   labs(x="log10(minPts)", y="Density", title="minPts placement intervals") + 
                   scale_color_manual(name=NULL, values=c("min"="green3", "max"="red3")) +
                   theme(legend.position = "inside",
                         legend.position.inside = c(.99, .99),
                         legend.justification.inside = c("right", "top"),
                         legend.background = element_blank(),
                         legend.spacing.y = unit(0.1,"lines"))
  
  plot <- plot_grid(gdraw("Global DBSCAN Optimization"),
                   plot_grid(p1, p2, p3, p4, ncol=2),
                   ncol=1, rel_heights=c(0.05,0.95))
  return(plot)
}

plot_gdbscan_1 <- function(coords_global) {
  coords <- coords_global[clusters==1]
  
  # Panel 1: placements
  p1 <- ggplot(coords, aes(x=x1, y=y1)) + geom_point(size=0.1) +
    coord_fixed(ratio=1) + theme_classic() + 
    xlim(range(puckdf$x)) + ylim(range(puckdf$y)) +
    labs(x="x-position", y="y-position", title="DBSCAN=1 Placements")
  
  # Helper
  mydensplot <- function(coords, var, name, percent) {
    stopifnot(var %in% names(coords))
    max_density_x = mean(coords[[var]], na.rm=TRUE)
    ggplot(coords, aes(x = .data[[var]])) + geom_density() + 
      theme_minimal() +
      labs(x=name, y="Density", title=g("{name} density")) + 
      geom_vline(xintercept=max_density_x, color="red", linetype="dashed") +
      annotate(geom='text',
               label=ifelse(percent, g(" Mean: {round(max_density_x*100, 1)}%"), g(" Mean: {round(max_density_x, 2)}")),
               x=max_density_x, y=Inf,
               hjust=0, vjust=1, col="red")
  }
  
  # Panel 2: SNR
  coords[, SNR := umi1/umi]
  p2 <- mydensplot(coords, "SNR", "SNR", TRUE)
  
  # Panel 3: RMS
  p3 <- mydensplot(coords, "rmsd1", "RMSD", FALSE)
  
  # Panel 4: h-index
  p4 <- mydensplot(coords, "h1", "h-index", FALSE)
  
  plot <- plot_grid(gdraw("Global DBSCAN=1 Results"),
                    plot_grid(p1, p2, p3, p4, ncol=2),
                    ncol=1, rel_heights=c(0.05,0.95))
  return(plot)
}

plot_gdbscan_2 <- function(coords_global) {
  coords <- coords_global[clusters==2]
  
  # Panel 1: DBSCAN=1 placements
  p1 <- ggplot(coords, aes(x=x1, y=y1)) + geom_point(size=0.1) +
    coord_fixed(ratio=1) + theme_classic() + 
    xlim(range(puckdf$x)) + ylim(range(puckdf$y)) +
    labs(x="x-position", y="y-position", title="DBSCAN=1 Positions")
  
  # Panel 2: DBSCAN=2 placements
  p2 <- ggplot(coords, aes(x=x2, y=y2)) + geom_point(size=0.1) +
    coord_fixed(ratio=1) + theme_classic() + 
    xlim(range(puckdf$x)) + ylim(range(puckdf$y)) +
    labs(x="x-position", y="y-position", title="DBSCAN=2 Positions")
  
  # Panel 3: Violins
  p3 <- plot_grid(
    coords[,.(umi1,umi2)] %>% melt(measure.vars=c("umi1","umi2"), variable.name="pair", value.name="value") %>% 
      ggplot(aes(x=pair, y=log10(value), fill=pair)) + geom_violin(scale="count") + 
      theme_minimal() + 
      labs(x=NULL, y="log10(UMI)", title=NULL) + 
      guides(fill="none"),
    coords[,.(h1,h2)] %>% melt(measure.vars=c("h1","h2"), variable.name="pair", value.name="value") %>% 
      ggplot(aes(x=pair, y=value, fill=pair)) + geom_violin(scale="count") + 
      theme_minimal() + 
      labs(x=NULL, y="h-index", title=NULL) + 
      guides(fill="none"),
    coords[,.(rmsd1,rmsd2)] %>% melt(measure.vars=c("rmsd1","rmsd2"), variable.name="pair", value.name="value") %>% 
      ggplot(aes(x=pair, y=value, fill=pair)) + geom_violin(scale="count") + 
      theme_minimal() + 
      labs(x=NULL, y="RMSD", title=NULL) + 
      guides(fill="none"),
    nrow=1
  )
  
  # Panel 4: DBSCAN 1-2 Distance
  p4 <- ggplot(coords, aes(x=sqrt((x1-x2)^2+(y1-y2)^2))) + geom_histogram() + 
    theme_classic() +
    labs(x="Distance", y="Cells", title="DBSCAN 1-2 Distance")
  
  plot <- plot_grid(gdraw("Global DBSCAN=2 Results"),
                    plot_grid(p1, p2, plot_grid(gdraw("Distributions"),p3,ncol=1,rel_heights=c(0.05,0.95)), p4, ncol=2),
                    ncol=1, rel_heights=c(0.05,0.95))
  return(plot)
}

plot_ddbscan_xy <- function(coords_dynamic) {
  d <- coords_dynamic[!is.na(x1) & !is.na(y1)]
  p1 <- ggplot(d, aes(x=x1, y=y1, col=log10(umi1))) + geom_point(size=0.2) + 
               theme_classic() + coord_fixed(ratio=1) + 
               labs(title="1st DBSCAN", x="x", y="y", col="UMI")
  
  d <- coords_dynamic[!is.na(x2) & !is.na(y2)]
  p2 <- ggplot(d, aes(x=x2, y=y2,col=log10(umi2))) + geom_point(size=0.2) + 
               theme_classic() + coord_fixed(ratio=1) + 
               labs(title="2nd DBSCAN", x="x", y="y", col="UMI")
  
  d <- coords_dynamic[, .(r=sqrt((x2-x1)^2+(y2-y1)^2))][is.finite(r)]
  p3 <- ggplot(d, aes(x=r)) + geom_histogram(bins=30) +
               theme_classic() +
               labs(title="Distance between top-2 densities", x="Distance", y="Cells")
  
  plot <- plot_grid(gdraw("Dynamic DBSCAN Positions"),
                    plot_grid(p1, p2, p3, ncol=1),
                    ncol=1, rel_heights=c(0.05,0.95))
  return(plot)
}

# plot_ddbscan_stats <- function(coords_dynamic) {
#   d <- melt(coords_dynamic[,.(sumi1,sumi2)], measure.vars = c("sumi1","sumi2"), variable.name="var", value.name="val")
#   ggplot(d, aes(x=log10(val), col=var)) + geom_density()
# }

# plot_ddbscan_1 <- function() {}
# plot_ddbscan_opt <- function() {}

# plot_dbscan_comp <- function(gcoords, dcoords) {
#   stopifnot(nrow(gcoords) == nrow(dcoords),
#             "cb" %in% names(gcoords), "cb" %in% names(dcoords),
#             gcoords$cb == dcoords$cb)
# }
# 

################################################################################
### Per-cell plots #############################################################
################################################################################

cellplottheme <- theme(plot.title=element_text(hjust=0.5, size=12),
                       legend.background=element_blank(),
                       legend.margin=margin(0,0,0,0),
                       legend.key=element_blank(),
                       legend.key.height=unit(1,"lines"),    # colorbar height
                       legend.key.width=unit(0.5,"lines"),   # colorbar width
                       legend.key.spacing=unit(0.25,"lines"),# colorbar-lab spacing (x/y?)
                       legend.frame=element_blank(),
                       # legend.ticks*, legend.axis*, legend.text*
                       legend.title=element_blank(),
                       legend.position="right",
                       legend.direction="vertical"
                       #legend.byrow*, legend.justification*, legend.location*, legend.box*
                      )

my_geom_circle <- function(cx, cy, r) {
  geom_path(
    data = data.frame(x = cx + r*cos(seq(0, 2*pi, length.out=100)),
                      y = cy + r*sin(seq(0, 2*pi, length.out=100))),
    aes(x = x, y = y),
    color = "blue",
    linewidth = 0.3,
    linetype = "dashed"
  )
}

global_cellplot <- function(CB) {
  subdf <- data.list[[CB]][!is.na(x) & !is.na(y)][order(umi)]
  subdf1 <- subdf[cluster==1]
  subdf2 <- subdf[cluster==2]
  
  plot <- ggplot() + coord_fixed(ratio=1 ,xlim=range(puckdf$x), ylim=range(puckdf$y)) + theme_void() +
    geom_point(data=subdf, mapping=aes(x=x, y=y, col=umi), size=1, shape=20) +
    labs(title=g("[{CB}]"), col=NULL) +
    cellplottheme
    
  
  # Add the DBSCAN=1 point
  if(nrow(subdf1) > 0) {
    plot <- plot + geom_point(aes(x=weighted.mean(subdf1$x, w=subdf1$umi),
                                  y=weighted.mean(subdf1$y, w=subdf1$umi)),
                              color="red", shape=0, size=2)
  }
  # Add the DBSCAN=2 point
  if(nrow(subdf2) > 0) {
    plot <- plot + geom_point(aes(x=weighted.mean(subdf2$x, w=subdf2$umi),
                                  y=weighted.mean(subdf2$y, w=subdf2$umi)),
                              color="green", shape=0, size=2)
  }
  return(plot)
}

plot_global_cellplots <- function(data.list) {
  list0 <- map_lgl(data.list, ~max(.$cluster)==0 & nrow(.)>0) %>% {names(.)[.]}
  list1 <- map_lgl(data.list, ~max(.$cluster)==1 & nrow(.)>0) %>% {names(.)[.]}
  list2 <- map_lgl(data.list, ~max(.$cluster)==2 & nrow(.)>0) %>% {names(.)[.]}
  
  if(len(list0) > 0) {
    p0s <- sample(list0, min(12,len(list0)), replace=FALSE) %>% map(global_cellplot)
    p0 <- plot_grid(ggdraw()+draw_label("DBSCAN=0"),
                    plot_grid(plotlist=p0s, ncol=3),
                    ncol=1, rel_heights=c(0.1,2))
  } else {p0 <- gdraw("No DBSCAN=0")}
  if(len(list1) > 0) {
    p1s <- sample(list1, min(12,len(list1)), replace=FALSE) %>% map(global_cellplot)
    p1 <- plot_grid(ggdraw()+draw_label("DBSCAN=1"),
                    plot_grid(plotlist=p1s, ncol=3),
                    ncol=1, rel_heights=c(0.1,2))
  } else {p1 <- gdraw("No DBSCAN=1")}
  if(len(list2) > 0) {
    p2s <- sample(list2, min(12,len(list2)), replace=FALSE) %>% map(global_cellplot)
    p2 <- plot_grid(ggdraw()+draw_label("DBSCAN=2"),
                    plot_grid(plotlist=p2s, ncol=3),
                    ncol=1, rel_heights=c(0.1,2))
  } else {p2 <- gdraw("No DBSCAN=2")}
  
  plots <- list(p0, p1, p2)
  
  return(plots)
}

dynamic_cellplot <- function(CB) {
  dl <- data.list[[CB]][!is.na(x) & !is.na(y)][order(umi)]
  row <- coords_dynamic[cb == CB] %T>% {stopifnot(nrow(.) == 1)}
  
  # Zoomed out
  p1 <- ggplot() + geom_point(data=dl, mapping=aes(x=x, y=y, col=umi)) + 
    geom_point(data=row, mapping=aes(x=x1,y=y1),col="red",shape=0) + 
    geom_point(data=row, mapping=aes(x=x2,y=y2),col="green",shape=0) +
    geom_point(data=row, mapping=aes(x=x,y=y),col="red",shape=5) +
    theme_void() +
    coord_fixed(ratio=1) +
    xlim(range(puckdf$x)) + ylim(range(puckdf$y)) +
    cellplottheme
  
  # Zoomed in
  s <- row[, max(abs(x1-x2), abs(y1-y2))]
  ggplot() + geom_point(data=puckdf, mapping=aes(x=x,y=y), shape=1, col="grey") + 
    geom_point(data=dl, mapping=aes(x=x, y=y, col=umi)) + 
    geom_point(data=dl[clusters2==1], mapping=aes(x=x, y=y), col="red",   shape=1, stroke=0.2, size=1.5) +
    geom_point(data=dl[clusters2==2], mapping=aes(x=x, y=y), col="green", shape=1, stroke=0.2, size=1.5) + 
    geom_point(data=row, mapping=aes(x=x1,y=y1),col="red",shape=0) + 
    geom_point(data=row, mapping=aes(x=x2,y=y2),col="green",shape=0) +
    geom_point(data=row, mapping=aes(x=x,y=y),col="red",shape=5) + 
    my_geom_circle(row$x, row$y, eps) +
    theme_void() +
    coord_fixed(ratio=1) +
    xlim(row[,c(min(x1,x2)-s, max(x1,x2)+s)]) + 
    ylim(row[,c(min(y1,y2)-s, max(y1,y2)+s)]) + 
    cellplottheme
  
  p2 <- ggplot()
  
}

# data.list requires x/y/umi and clusters2
# coords_dynamic requires
plot_dynamic_cellplots <- function(data.list, coords_dynamic) {
  stopifnot(len(data.list) == nrow(coords_dynamic),
            names(data.list) == coords_dynamic$cb)
  
}

################################################################################
### RNA+Spatial plots ##########################################################
################################################################################

# Create DimPlot
plot_clusters <- function(obj, reduction) {
  npucks = (max(obj$x, na.rm=TRUE) - min(obj$x, na.rm=TRUE))/(max(obj$y, na.rm=TRUE) - min(obj$y, na.rm=TRUE))
  nclusters = len(unique(obj$seurat_clusters))
  ncols = round(sqrt(npucks*nclusters/2)/npucks*2) 
  
  m = obj@reductions[[reduction]]@cell.embeddings %>% {!is.na(.[,1]) & !is.na(.[,2])}
  title = g("%placed: {round(sum(m)/len(m)*100,2)} ({sum(m)}/{len(m)}) [{reduction}]")
  p1 = DimPlot(obj, reduction=reduction) + coord_fixed(ratio=1) +
    ggtitle(title) + NoLegend() + xlab("x-position") + ylab("y-position") + 
    theme(axis.title.x=element_text(size=12), axis.text.x=element_text(size=10)) +
    theme(axis.title.y=element_text(size=12), axis.text.y=element_text(size=10))
  p2 = DimPlot(obj, reduction=reduction, split.by="seurat_clusters", ncol=ncols) + theme_void() + coord_fixed(ratio=1) + NoLegend()
  plot = plot_grid(p1, p2, ncol=1, rel_heights=c(0.4,0.6))
  return(plot)
}

# RNA vs SB metrics
plot_RNAvsSB <- function(obj) {
  if (!is.null(Misc(obj,"coords_global")$umi)) {
    obj$sb_umi <- Misc(obj,"coords_global")$umi %>% tidyr::replace_na(0)
  } else if (!is.null(Misc(obj,"coords_global")$umi_dbscan)) {
    obj$sb_umi <- Misc(obj,"coords_global")$umi_dbscan %>% tidyr::replace_na(0)
  } else {
    obj$sb_umi = 0
  }
  
  obj$clusters <- Misc(obj,"coords_global")$clusters %>% tidyr::replace_na(0)
  obj$placed <- !is.na(obj$x) & !is.na(obj$y)
  
  p1 <- ggplot(obj@meta.data, aes(x=log10(nCount_RNA), y=log10(sb_umi), col=placed)) + 
    geom_point(size=0.2) + theme_bw() + xlab("log10 RNA UMI") + ylab("log10 SB UMI") + ggtitle("SB UMI vs. RNA UMI") + 
    labs(color = "placed") +
    theme(legend.position = c(0.95, 0.05),
          legend.justification = c("right", "bottom"),
          legend.background = element_blank(),
          legend.title=element_text(size=10),
          legend.text=element_text(size=8),
          legend.margin=margin(0,0,0,0,"pt"),
          legend.box.margin=margin(0,0,0,0,"pt"),
          legend.spacing.y = unit(0.1,"lines"),
          legend.key.size = unit(0.5, "lines"))
  
  d = obj@meta.data %>% rowwise %>% mutate(x=min(clusters,5)) %>% ungroup
  p2 <- ggplot(d, aes(x=as.factor(x), y=log10(nCount_RNA))) + geom_violin(scale="count") + 
    scale_x_discrete(breaks=min(d$x):max(d$x), labels=(min(d$x):max(d$x)) %>% {ifelse(.==5, "5+", .)}) +
    xlab("DBSCAN clusters") + ylab("log10 RNA UMI") + ggtitle("RNA UMI vs. DBSCAN cluster") + theme_classic()
  
  d = obj@meta.data %>% group_by(seurat_clusters) %>% summarize(pct.placed=paste0(round(sum(placed)/n()*100,2),"%")) %>% setNames(c("cluster","placed"))
  m = ceiling(nrow(d)/2) ; d1 = d[1:m,] ; d2 = d[(m+1):nrow(d),]
  p3 <- plot_grid(plot.tab(d1), plot.tab(d2), ncol=2)
  
  plot = plot_grid(gdraw("RNA vs. SB metrics"),
                   plot_grid(plot_grid(p1,p2,ncol=1), p3, ncol=2, rel_widths=c(0.5,0.5)),
                   ncol=1, rel_heights=c(0.05,0.95))
  return(plot)
}

# dbscan_vs_kde <- function(coords) {
#   coords %<>% mutate(dist=sqrt((x_um_dbscan-x_um_kde)^2+(y_um_dbscan-y_um_kde)^2))
#   
#   # Panel 1: Distance between DBSCAN and KDE assignments
#   d = filter(coords, !is.na(dist))
#   max_density_x = median(d$dist+1, na.rm=T) %>% log10
#   p1 <- ggplot(d, aes(x=log10(dist+1)))+geom_histogram(bins=30)+theme_bw()+
#     labs(title = "DBSCAN vs KDE distance", x = "log1p Distance (\u00B5m)", y = "Frequency") + 
#     geom_vline(xintercept = max_density_x, color = "red", linetype = "dashed") +
#     annotate(geom = 'text', label = round(10^max_density_x-1, 2) %>% paste0("\u00B5m"), x = max_density_x+0.1, y = Inf, hjust = 0, vjust = 1.3, col="red")
#   
#   # Panel 2: Distribution of KDE ratio for each DBSCAN cluster
#   d = coords %>% rowwise %>% mutate(clusters=min(clusters,5)) %>% ungroup
#   p2 <- ggplot(d, aes(x=clusters %>% as.factor, y=ratio)) + geom_violin() +
#     theme_classic() + xlab("DBSCAN clusters") + ylab("KDE ratio") +
#     scale_x_discrete(breaks=min(d$clusters):max(d$clusters), labels=(min(d$clusters):max(d$clusters)) %>% {ifelse(.==5, "5+", .)}) +
#     ggtitle("KDE ratio per DBSCAN cluster") + 
#     geom_hline(yintercept = 1/3, color = "red", linetype = "dashed")
#   
#   # Panel 3: KDE ratio for disagreeing placements
#   p3 <- coords %>% filter(clusters==1) %>% ggplot(aes(x=dist, y=ratio))+geom_point(size=0.5)+theme_bw()+
#     xlab("Distance between assignments")+ylab("KDE ratio")+ggtitle("Density ratio of disagreements")+
#     geom_hline(yintercept = 1/3, color = "red", linetype = "dashed")
#   
#   # Panel 4: Contingency table of placements
#   d = coords %>% mutate(dbscan_pass=clusters==1, kde_pass=ratio<1/3) %>% group_by(dbscan_pass, kde_pass) %>% summarize(n=n(), .groups="drop") %>% mutate(pct=g("{round(n/sum(n)*100,2)}%\n{n}"))
#   p4 <- ggplot(d, aes(x=dbscan_pass,y=kde_pass,fill=n))+geom_tile()+geom_text(label=d$pct)+theme_bw()+
#     xlab("DBSCAN=1")+ylab("KDE < 1/3")+theme(legend.position="none")+coord_fixed()+ggtitle("Placement table")
#   
#   plot = plot_grid(gdraw("DBSCAN vs. KDE Comparison"),
#                    plot_grid(p1, p2, p3, p4, ncol=2),
#                    ncol=1, rel_heights=c(0.05,0.95))
#   return(plot)
# }