analysis_name: "colon-demo" run_synthetic: False # True if you want to run bin to cell assignment on synthetic dataset, False otherwise. cache_dir: "/home/oneai/enact-pipeline/ENACT_supporting_files/output_files" paths: wsi_path: "/home/oneai/enact-pipeline/ENACT_supporting_files/public_data/human_colorectal/input_files/Visium_HD_Human_Colon_Cancer_tissue_image.btf" visiumhd_h5_path: "/home/oneai/enact-pipeline/ENACT_supporting_files/public_data/human_colorectal/input_files/filtered_feature_bc_matrix.h5" tissue_positions_path: "/home/oneai/enact-pipeline/ENACT_supporting_files/public_data/human_colorectal/input_files/tissue_positions.parquet" steps: segmentation: True # True if you want to run segmentation bin_to_geodataframes: True # True to convert bin to geodataframes bin_to_cell_assignment: True # True to assign cells to bins cell_type_annotation: True # True to run cell type annotation params: seg_method: "stardist" # Stardist is the only option for now image_type: "he" # Image type: Options: ["he", "if"] (for H&E image or IF image, respectively.) nucleus_expansion: True # Flag to enable nuclei expansion to get cell boundaries expand_by_nbins: 2 # Number of bins to expand the nuclei by to get cell boundaries patch_size: 4000 # Defines the patch size. The whole resolution image will be broken into patches of this size bin_representation: "polygon" # or point TODO: Remove support for anything else bin_to_cell_method: "weighted_by_area" # or naive/ weighted_by_cluster/ weighted_by_gene cell_annotation_method: "celltypist" cell_typist_model: "Human_Colorectal_Cancer.pkl" # only needed if using cell_annotation_method = "celltypist" use_hvg: True # Only run analysis on highly variable genes + cell markers specified n_hvg: 1000 # Number of highly variable genes to use destripe_norm: False # Flag to enable destripe normalization (Bin2cell normalization) n_clusters: 4 # Number of clusters for Weighted-by-Cluster n_pcs: 250 # Number of principal components before clustering for Weighted-by-Cluster chunks_to_run: [] # Chunks to run ENACT on specific patches stardist: block_size: 4096 # the size of image blocks the model processes at a time prob_thresh: 0.005 # value between 0 and 1, higher values lead to fewer segmented objects, but will likely avoid false positives overlap_thresh: 0.001 # value between 0 and 1, higher values allow segmented objects to overlap substantially min_overlap: 128 # overlap between blocks, should it be larger than the size of a cell context: 128 # context pixels around the blocks to be included during prediction n_tiles: (4,4,1) # the input image is broken up into (overlapping) tiles that are processed independently and re-assembled. This parameter denotes a tuple of the number of tiles for every image axis stardist_modelname: "2D_versatile_he" # Specify one of the available Stardist models: 2D_versatile_fluo (for IF images) or 2D_versatile_he (for H&E images) channel_to_segment: 2 # Only applicable for IF images. This is the image channel to segment (usually the DAPI channel) cell_markers: # Human Colon Epithelial: ["CDH1","EPCAM","CLDN1","CD2"] Enterocytes: ["CD55", "ELF3", "PLIN2", "GSTM3", "KLF5", "CBR1", "APOA1", "CA1", "PDHA1", "EHF"] Goblet cells: ["MANF", "KRT7", "AQP3", "AGR2", "BACE2", "TFF3", "PHGR1", "MUC4", "MUC13", "GUCA2A"] Enteroendocrine cells: ["NUCB2", "FABP5", "CPE", "ALCAM", "GCG", "SST", "CHGB", "IAPP", "CHGA", "ENPP2"] Crypt cells: ["HOPX", "SLC12A2", "MSI1", "SMOC2", "OLFM4", "ASCL2", "PROM1", "BMI1", "EPHB2", "LRIG1"] Endothelial: ["PECAM1","CD34","KDR","CDH5","PROM1","PDPN","TEK","FLT1","VCAM1","PTPRC","VWF","ENG","MCAM","ICAM1","FLT4"] Fibroblast: ["COL1A1","COL3A1","COL5A2","PDGFRA","ACTA2","TCF21","FN"] Smooth muscle cell: ["BGN","MYL9","MYLK","FHL2","ITGA1","ACTA2","EHD2","OGN","SNCG","FABP4"] B cells: ["CD74", "HMGA1", "CD52", "PTPRC", "HLA-DRA", "CD24", "CXCR4", "SPCS3", "LTB", "IGKC"] T cells: ["JUNB", "S100A4", "CD52", "PFN1P1", "CD81", "EEF1B2P3", "CXCR4", "CREM", "IL32", "TGIF1"] NK cells: ["S100A4", "IL32", "CXCR4", "FHL2", "IL2RG", "CD69", "CD7", "NKG7", "CD2", "HOPX"] # # Human Pancreas # Acinar_cell: ["PRSS1", "KLK1","CTRC", "PNLIP"] # Alpha_cell: ["GCG", "ARX", "CLIM1", "CRYBA2", "FEV", "GBA", "HMGB3"] # Beta_cell: ["INS", "BMP-5", "CDKN1C", "CRTR1", "DLK1", "NPTX2", "PACAP"] # Delta_cell: ["SST", "CHE1", "ESE3B", "ETV1", "GABRG2", "HER4", "ISL1"] # Ductal_cell: ["PROM1"] # Epsilon cell: ["GHRL", "TM4SF5"] # Mesenchymal_cell: ["THY1"] # Pancreatic_polypeptide_cell: [ # "AQP3", "ARHGAP3", "ARX", "BHLHB26", "BHLHB27", # "CARTPT", "EGR3", "ENTPD2", "ETV1", "MEIS1", # "MEIS2", "PAX6", "PTGFR", "RBTN3", "SERTM1", # "SLITRK6", "THSD7A", "ZNF506" # ] # PP_cell: ["PPY"] # # Human breast cancer # Cancer stem cell: ["CD133", "ALDH1", "SOX2", "OCT4", "CD44"] # Epithelial cell: ["EPCAM", "KRT8", "KRT18", "CDH1", "CLDN1", "MUC1"] # Immune cell: ["CD45", "CD3", "CD19", "CD14", "CD56"] # Natural killer cell: ["CD56", "CD16", "NKp46", "NKG2D", "CD94"] # Progenitor cell: ["Nestin", "CD34", "Sox2", "GATA2", "LGR5"] # Stem cell: ["OCT4", "SOX2", "NANOG", "KLF4", "CD34"] # # Mouse intestine # Enterocytes: ["Cbr1", "Plin2", "Gls", "Plin3", "Dab1", "Pmepa1", "Acsl5", "Hmox1", "Abcg2", "Cd36"] # Goblet cells: ["Manf", "Krt7", "Ccl9", "Muc13", "Phgr1", "Cdx2", "Aqp3", "Creb3L1", "Guca2A", "Klk1"] # Enteroendocrine cells: ["Fabp5", "Cpe", "Enpp2", "Chgb", "Alcam", "Chga", "Pax6", "Neurod1", "Cck", "Isl1"] # Paneth cells: ["Gpx2", "Fabp4", "Lyz1", "Kcnn4", "Lgals2", "Guca2B", "Lgr4", "Defa24", "Il4Ra", "Guca2A"] # Crypt cells: ["Prom1", "Hopx", "Msi1", "Olfm4", "Kcne3", "Bmi1", "Axin2", "Kcnq1", "Ascl2", "Lrig1"] # Smooth muscle cells: ["Bgn", "Myl9", "Pcp4L1", "Itga1", "Nrp2", "Mylk", "Ehd2", "Fabp4", "Acta2", "Ogn"] # B cells: ["Cd52", "Bcl11A", "Ebf1", "Cd74", "Ptprc", "Pold4", "Ighm", "Cd14", "Creld2", "Fli1"] # T cells: ["Cd81", "Junb", "Cd52", "Ptprcap", "H2-Q7", "Ccl6", "Bcl2", "Maff", "Ccl4", "Ccl3"] # NK cells: ["Ctla2A", "Ccl4", "Cd3G", "Ccl3", "Nkg7", "Lat", "Dusp2", "Itgam", "Fhl2", "Ccl5"] # # Mouse embryo # 1-cell stage cell (Blastomere): ['Accsl', 'Acvr1b', 'Asf1b', 'Bcl2l10', 'Blcap', 'Cdk2ap2', 'Ciapin1', 'Dclk2', 'Dusp7', 'H1foo'] # Blood progenitor cell: ['Flk1', 'Runx1', 'Tal1', 'Runx1'] # Cardiomyocyte: ['Bmp4', 'Emcn', 'Fbn1', 'Gata4', 'Hand1', 'Hand2', 'Mef2c', 'Myl4', 'Neb', 'Nid1'] # Fibroblast: ['Col5a2', 'Thy1'] # Oocyte: ['Abi3bp', 'Ampd3', 'Ankra2', 'Cep78', 'Cnn3', 'Dclre1a', 'Dcun1d5', 'Depdc7', 'Dnajc3', 'Dpy30'] # Pharyngeal mesoderm cell: ['Prdm1', 'Tbx1'] # Pre-haematopoietic stem cell: ['2410004N09Rik', '9030617O03Rik', '9030619P08Rik', 'Ablim1', 'Acot11', 'Akr1c14', 'Angpt1', 'Ank', 'Anpep', 'Art4'] # Primitive erythroid cell: ['Gata1', 'Hbb-bh1', 'Klf1'] # Primitive streak cell: ['Nanog', 'Pou5f1'] # Venous cell: ['Apj', 'Coup-tf2', 'Dab2', 'EphB4', 'Nrp2', 'Tie-2'] # # Human Tonsil # Epithelial: ["EPCAM"] # Endothelial: ["PECAM1", "CD34", "KDR", "CDH5", "PROM1", "PDPN", "TEK", "FLT1", "VCAM1", "PTPRC", "VWF", "ENG", "MCAM", "ICAM1", "FLT4"] # Fibroblast: ["COL1A1", "COL3A1", "COL5A2", "PDGFRA", "ACTA2", "TCF21", "FN"] # B_cells: ["CD74", "HMGA1", "CD52", "PTPRC", "HLA-DRA", "CD24", "CXCR4", "SPCS3", "LTB", "IGKC"] # T_cells: ["JUNB", "S100A4", "CD52", "PFN1P1", "CD81", "EEF1B2P3", "CXCR4", "CREM", "IL32", "TGIF1"] # NK_cells: ["S100A4", "IL32", "CXCR4", "FHL2", "IL2RG", "CD69", "CD7", "NKG7", "CD2", "HOPX"]