require(STEGO.R)

# large file merging and annotating
# as the program window may disappear, we recommend merging and annotating using the following commands
###### Step 3a. Automated file filtering ------
automated_sc_filtering(dataset_type = "10x")

# BD Rhapsody (immune panel)
automated_sc_filtering(dataset_type = "BD_rap",features.min = 45,features.max = 160, percent.mt = 0, percent.rb = 0)

#For those with many file (>10) or large number of cells (>50,000), we recommend using the command line process, rather than the Shiny window. This is becasue of the multiple merging process which be more efficient with these commands.

###### Step 3b. merging Seurat object ------
# Check that you are in the correct working directory with your RDS files
merging_multi_SeuratRDS(set_directory = "3_SCobj/3a/", merge_RDS = F, pattern_RDS = ".rds$")

# once that is check, switch merge_RDS to TRUE or T

# if you have a focused sample

sc_merge <- merging_multi_SeuratRDS(set_directory = "3_SCobj/3a/", merge_RDS = T, pattern_RDS = ".rds$",Seurat_Version = "V5")

sc <- readRDS("3_SCobj/3b/sc_merge.rds")
## perform the harmony batch correction ------
sc <- harmony_batch_correction_1_variableFeatures(file = sc)
sc <- harmony_batch_correction_2_Scaling(file = sc, Seruat_version = "V5")
sc <- harmony_batch_correction_3_PC(file = sc)
sc <- harmony_batch_correction_4_Harmony(file = sc)

saveRDS(sc,paste0("3_SCobj/3b/sc_harmony.rds"))

#### Step 3c. annotating Seurat object (human) -----
sc <- readRDS("3_SCobj/3b/sc_harmony.rds")
# if working with BD Rhapsody focused panel check the feature expression and
scGate_annotating(
  file = sc,
  Threshold_test = T,
  signature_for_testing = c("CD8A","CD8B"),
  Version = "V5",
  reductionType = "harmony",
  threshold = 0.25,
  chunk_size = dim(sc@meta.data)[1]
)

# once checking the thresholds update here. Also change the chunk as required.
sc <- scGate_annotating(
          file = sc,
          TcellFunction = T,
          immune_checkpoint = T,
          senescence = T,
          cycling = T,
          Th1_cytokines = T,
          TCRseq = T,
          threshold = 0.25, # change to 0.55 if you use the focused immuen panel from BD rhapsody
          reductionType = "harmony",
          # set number of cells for each loop of the annotations. We recommended to use a maximum of 100,000 with 32 Gb of RAM.
          # This will prevent R from crashing, as testing on 1.13M cells found a limit of ~150,000 cells and quite slow.
          Version = "V5",
          chunk_size = 50000
        )

sc@meta.data$Cell_Index_old <- sc@meta.data$Cell_Index
sc@meta.data$Cell_Index <- rownames(sc@meta.data)
head(sc@meta.data)
saveRDS(sc,"3_SCobj/3c/sc_anno.rds")

# use the interface for step 3d if you need to remove additional samples (e.g., keep only the TCR and GEx)

# save the keep file or the 3c into the 4_Analysis file and proceed to the analysis in the interface

# loading RDS file
sc <- readRDS("path/to/RDS/file")
