// Profile to change parameters if needed profiles { // Base autorun profile with setting that apply to all analysis types. autorun { // No cleanup for resume possibility cleanup = false params { // Specific nf-core/configs params config_profile_contact = 'Thiseas C. Lamnidis (@TCLamnidis)' config_profile_description = 'Autorun_eager profile for automated processing in EVA' // 22/10/2024 Use harl links to publish the output files instead of copying them over. // This should decrease the I/O load to the server, thus lowering the chances of filesystem hiccups. publish_dir_mode = 'link' } process { // queue = "all.q" queue = "archgen.q" } } // A profile with all the local paths to required files. // These will need to be provided manually by anyone wanting to reproduce the results outside of the EVA filesystem. local_paths { params { // Mapping reference and reference indexes // These are required by eager for Damage calculation etc. No mapping is taking place here. fasta = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa' fasta_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa.fai' bwa_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/' seq_dict = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.dict' // Qualimap bedfile for on-target coverage calculation snpcapture_bed = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed' // Genotyping pileupcaller_bedfile = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed' pileupcaller_snpfile = '/mnt/archgen/public_data/Datashare_Boston_Jena_June2018.backup/1240K.snp' // Sex Det sexdeterrmine_bedfile = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed' // 1240k depth calculation anno_file = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed' } } // Profile with parameters for runs using the Human_SG bams as input. SG { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Ignore SNP capture bed for coverage calculations in non TF data. snpcapture_bed = null // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true bamutils_clip_single_stranded_half_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_half_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping genotyping_source = 'trimmed' // Use trimmed bams for genotyping run_genotyping = true genotyping_tool = 'pileupcaller' pileupcaller_min_map_quality = 25 // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes. pileupcaller_min_base_quality = 30 //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation (for poseidonisation) run_bedtools_coverage = true } } // Profile with parameters for runs using the Human_RP bams as input. // Currently identical to SG profile, except it keeps the snpcapture_bed option. TF { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true bamutils_clip_single_stranded_half_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_half_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping genotyping_source = 'trimmed' // Use trimmed bams for genotyping run_genotyping = true genotyping_tool = 'pileupcaller' pileupcaller_min_map_quality = 25 // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes. pileupcaller_min_base_quality = 30 //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation (for poseidonisation) run_bedtools_coverage = true } } // Profile with parameters for runs using the Human_RP bams as input. // Currently identical to TF profile. Just keeps the RP data separate for comparison. RP { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true bamutils_clip_single_stranded_half_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_half_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping genotyping_source = 'trimmed' // Use trimmed bams for genotyping run_genotyping = true genotyping_tool = 'pileupcaller' pileupcaller_min_map_quality = 25 // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes. pileupcaller_min_base_quality = 30 //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation (for poseidonisation) run_bedtools_coverage = true } } // Profile with parameters for runs using the Human_RM bams as input. // Currently identical to TF profile. Just keeps the RP data separate for comparison. RM { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true bamutils_clip_single_stranded_half_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_half_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping genotyping_source = 'trimmed' // Use trimmed bams for genotyping run_genotyping = true genotyping_tool = 'pileupcaller' pileupcaller_min_map_quality = 25 // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes. pileupcaller_min_base_quality = 30 //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation (for poseidonisation) run_bedtools_coverage = true } } // Profile with parameters for runs using the Human_Y bams as input. YC { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true bamutils_clip_single_stranded_half_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_half_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_left = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_single_stranded_none_udg_right = 0 // Set to 0 so ssDNA do not get trimmed. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping run_genotyping = false //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation (for poseidonisation) run_bedtools_coverage = true // Local paths (to overwrite the ones in local_paths profile) // Qualimap bedfile for on-target coverage calculation snpcapture_bed = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/ISOGG_20_05_08_hs37d5.bed' // SNP depth calculation anno_file = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/ISOGG_20_05_08_hs37d5.bed' } } // Profile with parameters for runs using the Human_IM bams as input. IM { params{ // BAM filtering run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads. bam_mapping_quality_threshold = 0 // Keep all mapped reads bam_unmapped_type = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening. bam_filter_minreadlength = 30 // Do we need to add length filtering here at all? Does Kay's pre-processing do this? // mtDNA to nuclear ratio run_mtnucratio = true mtnucratio_header = "MT" // Bam Trimming // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those) // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp run_trim_bam = true // Trim 2 bp on all udg-half libs, and 7 on all non-udg libs. // Since genotyping happens with GATK HC for IM results, no exception is made for ssDNA libs. bamutils_clip_single_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_single_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_single_stranded_none_udg_left = 7 // Trim 7 bp of either side for half-UDG libraries. bamutils_clip_single_stranded_none_udg_right = 7 // Trim 7 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for half-UDG libraries. bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for non-UDG libraries. bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for non-UDG libraries. // Damage Calculation damage_calculation_tool = 'mapdamage' mapdamage_downsample = 100000 // Use 100k reads for damage calculation to lower runtime. // Genotyping genotyping_source = 'trimmed' // Use trimmed bams for genotyping run_genotyping = true genotyping_tool = 'hc' gatk_hc_out_mode = 'EMIT_ALL_ACTIVE_SITES' gatk_hc_emitrefconf = 'GVCF' //gatk_dbsnp = null // Decided not to add a dbSNP file for IM data, as it only provides some annotations. // BCF stats run_bcftools_stats = true //Sex determination run_sexdeterrmine = true // Nuclear contamination run_nuclear_contamination = true contamination_chrom_name = 'X' //1240k Coverage/Depth calculation run_bedtools_coverage = true // Local paths (to overwrite the ones in local_paths profile) // Qualimap bedfile for on-target coverage calculation snpcapture_bed = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/IM_capture_hs37d5_HLAremoved.bed' // SNP depth calculation anno_file = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/IM_capture_hs37d5_HLAremoved.bed' } process { withName: genotyping_hc { memory = { task.attempt == 3 ? 48.GB : task.attempt == 2 ? 32.GB : 24.GB } } } } }