## ---- include = FALSE--------------------------------------------------------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)


## ----setup, results = "hide"-------------------------------------------------------------------------------------------------------------
# Attach the packages you will need for the analysis.
library(blaseRtools)
library(blaseRdata)
library(monocle3)
library(Seurat)
library(tidyverse)
library(cowplot)
theme_set(theme_cowplot(font_size = 12))


## ----------------------------------------------------------------------------------------------------------------------------------------
# Read in and inspect the configuration file.
vignette_config <- read_csv(system.file("extdata/vignette_config.csv", 
                                        package = "blaseRdata"), 
                            col_type = list(.default = col_character()))
vignette_config



## ----------------------------------------------------------------------------------------------------------------------------------------
# Fix the windows-style file path.
vignette_config <- vignette_config %>%
  mutate(targz_path = bb_fix_file_path(targz_path))
vignette_config


## ---- eval = FALSE-----------------------------------------------------------------------------------------------------------------------
## # Generate a list of CDS objects using purrr::map
## cds_list <- map(
##   .x = vignette_config$sample,
##   .f = function(x, conf = vignette_config) {
##     conf_filtered <- conf %>%
##       filter(sample == x)
##     cds <- bb_load_tenx_targz(targz_file = conf_filtered$targz_path,
##                               sample_metadata_tbl = conf_filtered %>%
##                                 select(-c(sample, targz_path))
##                               )
##     return(cds)
##   }
## ) %>%
##   set_names(nm = vignette_config$sample)


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # generate a list of qc results for individual CDS objects
## vig_qc_res <- pmap(.l = list(cds = cds_list,
##                              cds_name = names(cds_list),
##                              genome = rep("human", times = length(cds_list))),
##                    .f = bb_qc
##                    ) %>%
##   set_names(nm = names(cds_list))
## 


## ---- eval = FALSE, dev='png', fig.align='center', fig.width=4.5, fig.height=3.5---------------------------------------------------------
## vig_qc_res$chromium_controller[3]
## vig_qc_res$chromium_controller[4]


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## 
## # gets the number of cells in each cds and divides it by 100000
## anticipated_doublet_rate <- unlist(map(cds_list, ncol))/100000
## 
## # extracts the first element of the qc result list for each cds
## qc_calls <- map(vig_qc_res,1)
## 
## # generates a list of tables with doubletfinder results
## doubletfinder_list <-
##   pmap(
##     .l = list(
##       cds = cds_list,
##       doublet_prediction = anticipated_doublet_rate,
##       qc_table = qc_calls
##     ),
##     .f = bb_doubletfinder
##   ) %>%
##   set_names(names(cds_list))
## 
## 


## ---- eval = FALSE-----------------------------------------------------------------------------------------------------------------------
## # rejoins doubletfinder and qc data onto the list of CDS objects
## cds_list <- pmap(
##   .l = list(
##     cds = cds_list,
##     qc_data = qc_calls,
##     doubletfinder_data = doubletfinder_list
##   ),
##   .f = bb_rejoin
## )


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Merge the CDS list into a single CDS
## vignette_cds <- monocle3::combine_cds(cds_list = cds_list)


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Remove mitochondrial and ribosomal genes.
## vignette_cds <-
##   vignette_cds[rowData(vignette_cds)$gene_short_name %notin% hg38_remove_genes,]


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Remove the low-quality cells
## vignette_cds <- vignette_cds[,colData(vignette_cds)$qc.any == FALSE]
## 
## # Remove the high-confidence doublets
## vignette_cds <-
##   vignette_cds[,colData(vignette_cds)$doubletfinder_high_conf == "Singlet"]
## 
## # Now remove the qc and doubletfinder columns from the cell metadata because we are done with them.
## colData(vignette_cds)$qc.any <- NULL
## colData(vignette_cds)$doubletfinder_low_conf <- NULL
## colData(vignette_cds)$doubletfinder_high_conf <- NULL
## 


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Calculate the PCA dimensions
## vignette_cds <- preprocess_cds(vignette_cds)


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Calculate UMAP dimensions
## vignette_cds <- reduce_dimension(vignette_cds, cores = 40)


## ----------------------------------------------------------------------------------------------------------------------------------------
# Cell metadata
bb_cellmeta(vignette_cds) 

# Gene metadata
bb_rowmeta(vignette_cds)


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Align samples according to the equipment metadata column
## vignette_cds_aligned_temp <- bb_align(vignette_cds, align_by = "sample")
## 
## # Replace the original CDS with the Aligned CDS
## vignette_cds <- vignette_cds_aligned_temp
## rm(vignette_cds_aligned_temp)


## ---- dev='png', fig.align='center', fig.width=6.0, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "sample")


## ----------------------------------------------------------------------------------------------------------------------------------------
bb_cellmeta(vignette_cds)


## ---- dev='png', fig.align='center', fig.width=6.0, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "sample", 
            alt_dim_x = "prealignment_dim1", 
            alt_dim_y = "prealignment_dim2")


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Identify clusters and calculate top markers
## marker_file <- tempfile()
## vignette_cds <- bb_triplecluster(vignette_cds, n_top_markers = 50, outfile = marker_file, n_cores = 8)
## vignette_top_markers <- read_csv(marker_file)


## ----------------------------------------------------------------------------------------------------------------------------------------
vignette_top_markers


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "partition")
bb_var_umap(vignette_cds, var = "leiden")
bb_var_umap(vignette_cds, var = "louvain")


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Identify gene modules and add them to the gene metadata.
## vignette_cds <- bb_gene_modules(vignette_cds, n_cores = 24)


## ----------------------------------------------------------------------------------------------------------------------------------------
bb_rowmeta(vignette_cds)


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Annotate the PBMC data
## vignette_cds <- bb_seurat_anno(vignette_cds, reference = "~/workspace_pipelines/sc_refdata/seurat_pbmc_reference_20210506/pbmc_multimodal.h5seurat")


## ----------------------------------------------------------------------------------------------------------------------------------------
bb_cellmeta(vignette_cds)


## ---- dev='png', fig.align='center', fig.width=5.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, 
            var = "seurat_celltype_l1", 
            alt_dim_x = "seurat_dim1", 
            alt_dim_y = "seurat_dim2", 
            overwrite_labels = TRUE, 
            group_label_size = 4)


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "seurat_celltype_l1")


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "leiden", plot_title = "Leiden Clusters")


## ----------------------------------------------------------------------------------------------------------------------------------------
leiden_seurat <- bb_cellmeta(vignette_cds) %>%
  group_by(leiden, seurat_celltype_l1) %>%
  summarise(n = n())
leiden_seurat


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
ggplot(leiden_seurat, 
       mapping = aes(x = leiden, 
                     y = n, 
                     fill = seurat_celltype_l1)) +
  geom_bar(position="fill", stat="identity") + 
  scale_fill_brewer(palette = "Set1")


## ----eval = FALSE------------------------------------------------------------------------------------------------------------------------
## # Recode the leiden clusters with our cell assignments
## colData(vignette_cds)$leiden_assignment <- recode(colData(vignette_cds)$leiden,
##                                                   "1" = "T/NK",
##                                                   "2" = "DC/Mono",
##                                                   "3" = "B")


## ---- dev='png', fig.align='center', fig.width=6.0, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, var = "leiden_assignment")


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, 
            var = "leiden_assignment", 
            value_to_highlight = "T/NK", 
            cell_size = 2, 
            foreground_alpha = 0.4)


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, 
            var = "leiden_assignment", 
            value_to_highlight = "T/NK", 
            palette = "green4", 
            cell_size = 2, 
            foreground_alpha = 0.4)


## ---- dev='png', fig.align='center', fig.width=7.0, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, 
            var = "density", 
            facet_by = "equipment", 
            cell_size = 2, 
            plot_title = "Local Cell Density")


## ---- dev='png', fig.align='center', fig.width=7.0, fig.height=3.5-----------------------------------------------------------------------
bb_var_umap(vignette_cds, 
            var = "local_n", 
            nbin = 10, sample_equally = T, 
            facet_by = "equipment", 
            cell_size = 2, 
            plot_title = "Local N in 10 Bins")

bb_var_umap(vignette_cds, 
            var = "log_local_n", 
            nbin = 10, 
            sample_equally = T, 
            facet_by = "equipment", 
            cell_size = 2, 
            plot_title = "Log Local N in 10 Bins")


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_cluster_representation(cds = vignette_cds, 
                          cluster_var = "leiden_assignment", 
                          class_var = "equipment", 
                          experimental_class = "X", 
                          control_class = "chromium", 
                          return_value = "plot")


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_cluster_representation(cds = vignette_cds, 
                          cluster_var = "leiden_assignment", 
                          class_var = "equipment", 
                          experimental_class = "X", 
                          control_class = "chromium", 
                          return_value = "table")



## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_gene_umap(vignette_cds, 
             gene_or_genes = "CD3D")


## ---- dev='png', fig.align='center', fig.width=7.5, fig.height=3-------------------------------------------------------------------------
bb_gene_umap(vignette_cds, gene_or_genes = c("CD19", "CD3D", "CD14"))



## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
agg_genes <- 
  bb_rowmeta(vignette_cds) %>%
  select(id, module_labeled) %>%
  filter(module_labeled == "Module 1")
  

bb_gene_umap(vignette_cds, 
             gene_or_genes = agg_genes)


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_gene_dotplot(vignette_cds, 
                markers = c("CD3E", "CD14", "CD19"), 
                group_cells_by = "leiden_assignment")


## ---- dev='png', fig.align='center', fig.width=4.5, fig.height=3.5-----------------------------------------------------------------------
bb_gene_dotplot(vignette_cds, 
                markers = c("CD3E", "CD14", "CD19"), 
                group_cells_by = "leiden_assignment", 
                gene_ordering = c("CD19", "CD14", "CD3E"),
                group_ordering = c("T/NK", "DC/Mono", "B"))



## ---- dev='png', fig.align='center', fig.width=6.0, fig.height=3.5-----------------------------------------------------------------------
cell_groupings <-
  tribble(
  ~aesthetic,~variable, ~value, ~level,
  "facet","equipment", "chromium", 1,
  "facet","equipment", "X", 2,
  "axis", "leiden_assignment", "B", 1,
  "axis", "leiden_assignment", "T/NK", 2,
  "axis", "leiden_assignment", "DC/Mono", 3 
  )
    

bb_gene_dotplot(vignette_cds, 
                markers = c("CD3E", "CD14", "CD19"), 
                group_cells_by = "multifactorial", 
                gene_ordering = c("CD19", "CD14", "CD3E"),
                group_ordering = cell_groupings)


## ----------------------------------------------------------------------------------------------------------------------------------------
vignette_exp_design <- 
  bb_cellmeta(vignette_cds) %>%
  group_by(sample, leiden_assignment) %>%
  summarise()
vignette_exp_design


## ---- eval=FALSE-------------------------------------------------------------------------------------------------------------------------
## vignette_pseudobulk_res <-
##   bb_pseudobulk_mf(cds = vignette_cds,
##                    pseudosample_table = vignette_exp_design,
##                    design_formula = "~ leiden_assignment",
##                    result_recipe = c("leiden_assignment", "T/NK", "DC/Mono"))


## ----------------------------------------------------------------------------------------------------------------------------------------
# Detailed column headers for the results tables.
vignette_pseudobulk_res$Header 


## ----------------------------------------------------------------------------------------------------------------------------------------
# Differential expression results.  Positive L2FC indicates up in T/NK vs DC/Mono

vignette_pseudobulk_res$Result %>%
  filter(log2FoldChange > 0) %>%
  arrange(padj)
  
vignette_pseudobulk_res$Result %>%
  filter(log2FoldChange < 0) %>%
  arrange(padj)


## ----------------------------------------------------------------------------------------------------------------------------------------
vignette_regression_res <- bb_monocle_regression(cds = vignette_cds, 
                      gene_or_genes = c("CD19", "CD3D", "CD14"), 
                      form = "~leiden_assignment")
vignette_regression_res



## ----------------------------------------------------------------------------------------------------------------------------------------
vignette_regression_res$term