[run] # What individual steps of HATCHet should we run in the pipeline? # Valid values are True or False download_panel = True count_reads = True genotype_snps = True phase_snps = True fixed_width = False # True uses older fixed-width versions of some commands count_alleles = True combine_counts = True cluster_bins = True loc_clust = True # True uses new locality-aware clustering plot_bins = True compute_cn = True plot_cn = True # What chromosome(s) do we wish to process in the pipeline? Leave unspecified to process # all chromosomes found in the normal/tumor bam files chromosomes = # Path to reference genome # Make sure you have also generated the reference dictionary as /path/to/reference.dict reference = /path/to/reference.fa # Make sure you have generated the .bam.bai files at the same locations as these bam files normal = /path/to/normal.bam # Space-delimited list of tumor BAM locations bams = /path/to/tumor1.bam /path/to/tumor2.bam # Space-delimited list of tumor names samples = tumor1 tumor2 # Output path of the run script output = output/ # How many cores to use for the end-end pipeline? # This parameter, if specified, will override corresponding 'processes' parameters in individual sections below. processes = 6 [download_panel] ref_panel = 1000GP_Phase3 refpaneldir = /path/to/reference/panel [genotype_snps] # Reference version used to select list of known germline SNPs; # Possible values are "hg19" or "hg38", or leave blank "" if you wish for all positions to be genotyped by bcftools reference_version = hg19 # Does your reference name chromosomes with "chr" prefix?; True or False chr_notation = True # Use 8 for WGS with >30x and 20 for WES with ~100x mincov = 8 # Use 300 for WGS with >30x and Use 1000 for WES with ~100x maxcov = 300 # Path to SNP list # If unspecified, HATCHet selects a list of known germline SNPs based on and # If not, please provide full path to a locally stored list (.vcf.gz) here. snps = [combine_counts] # Minimum number of SNP-covering reads per bin and sample msr = 5000 # Minimum number of total reads per bin and sample mtr = 5000 [cluster_bins_loc] diploidbaf = 0.08 # Minimum and maximum number of clusters to infer # (using silhouette score for model selection) minK = 2 maxK = 30 # You can instead specify an exact number of clusters: # exactK = 15 [plot_bins] sizethreshold = 0.01 figsize = "6,3" [compute_cn] clones = 2,6 seeds = 400 minprop = 0.03 diploidcmax = 6 tetraploidcmax = 12 ghostprop = 0.35 limitinc = 0.6