// Profile to change parameters if needed
profiles {
  // Base autorun profile with setting that apply to all analysis types.
  autorun {
    // No cleanup for resume possibility
    cleanup = false
    params {
      // Specific nf-core/configs params
      config_profile_contact = 'Thiseas C. Lamnidis (@TCLamnidis)'
      config_profile_description = 'Autorun_eager profile for automated processing in EVA'
      // 22/10/2024 Use harl links to publish the output files instead of copying them over.
      //   This should decrease the I/O load to the server, thus lowering the chances of filesystem hiccups.
      publish_dir_mode = 'link'
    }

    process {
      // queue = "all.q"
      queue = "archgen.q"
    }
  }

  // A profile with all the local paths to required files. 
  // These will need to be provided manually by anyone wanting to reproduce the results outside of the EVA filesystem.
  local_paths {
    params {
      // Mapping reference and reference indexes
      // These are required by eager for Damage calculation etc. No mapping is taking place here.
      fasta                 = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa'
      fasta_index           = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa.fai'
      bwa_index             = '/mnt/archgen/Reference_Genomes/Human/hs37d5/'
      seq_dict              = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.dict'

      // Qualimap bedfile for on-target coverage calculation
      snpcapture_bed        = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
      
      // Genotyping
      pileupcaller_bedfile  = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
      pileupcaller_snpfile  = '/mnt/archgen/public_data/Datashare_Boston_Jena_June2018.backup/1240K.snp'

      // Sex Det
      sexdeterrmine_bedfile = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'

      // 1240k depth calculation
      anno_file             = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'

    }
  }
  // Profile with parameters for runs using the Human_SG bams as input.
  SG {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio    = true
      mtnucratio_header = "MT"

      // Ignore SNP capture bed for coverage calculations in non TF data.
      snpcapture_bed    = null

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      bamutils_clip_single_stranded_half_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_half_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      genotyping_source             = 'trimmed'      // Use trimmed bams for genotyping
      run_genotyping                = true
      genotyping_tool               = 'pileupcaller'
      pileupcaller_min_map_quality  = 25             // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes.
      pileupcaller_min_base_quality = 30

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation (for poseidonisation)
      run_bedtools_coverage = true
    }
  }

  // Profile with parameters for runs using the Human_RP bams as input.
  // Currently identical to SG profile, except it keeps the snpcapture_bed option.
  TF {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio    = true
      mtnucratio_header = "MT"

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      bamutils_clip_single_stranded_half_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_half_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      genotyping_source             = 'trimmed'      // Use trimmed bams for genotyping
      run_genotyping                = true
      genotyping_tool               = 'pileupcaller'
      pileupcaller_min_map_quality  = 25             // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes.
      pileupcaller_min_base_quality = 30

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation (for poseidonisation)
      run_bedtools_coverage = true
    }
  }

  // Profile with parameters for runs using the Human_RP bams as input.
  // Currently identical to TF profile. Just keeps the RP data separate for comparison.
  RP {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio    = true
      mtnucratio_header = "MT"

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      bamutils_clip_single_stranded_half_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_half_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      genotyping_source             = 'trimmed'      // Use trimmed bams for genotyping
      run_genotyping                = true
      genotyping_tool               = 'pileupcaller'
      pileupcaller_min_map_quality  = 25             // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes.
      pileupcaller_min_base_quality = 30

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation (for poseidonisation)
      run_bedtools_coverage = true
    }
  }

  // Profile with parameters for runs using the Human_RM bams as input.
  // Currently identical to TF profile. Just keeps the RP data separate for comparison.
  RM {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio    = true
      mtnucratio_header = "MT"

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      bamutils_clip_single_stranded_half_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_half_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      genotyping_source             = 'trimmed'      // Use trimmed bams for genotyping
      run_genotyping                = true
      genotyping_tool               = 'pileupcaller'
      pileupcaller_min_map_quality  = 25             // To allow for reads aligning with a mismatch, and reduce reference bias in genotypes.
      pileupcaller_min_base_quality = 30

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation (for poseidonisation)
      run_bedtools_coverage = true
    }
  }

  // Profile with parameters for runs using the Human_Y bams as input.
  YC {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio    = true
      mtnucratio_header = "MT"

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      bamutils_clip_single_stranded_half_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_half_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_left  = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_single_stranded_none_udg_right = 0    // Set to 0 so ssDNA do not get trimmed.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      // Usually for dsDNA non-UDG libraries this is between 5 and 10. I have set it to 7 arbitrarily since that was a good cutoff in my own projects so far.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      run_genotyping = false

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation (for poseidonisation)
      run_bedtools_coverage = true

      // Local paths (to overwrite the ones in local_paths profile)
      // Qualimap bedfile for on-target coverage calculation
      snpcapture_bed        = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/ISOGG_20_05_08_hs37d5.bed'

      // SNP depth calculation
      anno_file             = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/ISOGG_20_05_08_hs37d5.bed'
    }
  }

  // Profile with parameters for runs using the Human_IM bams as input.
  IM {
    params{
      // BAM filtering
      run_bam_filtering             = true  // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
      bam_mapping_quality_threshold = 0     // Keep all mapped reads 
      bam_unmapped_type             = 'bam' // Keep unmapped reads as a separate BAM file for possible future pathogen screening.
      bam_filter_minreadlength      = 30    // Do we need to add length filtering here at all? Does Kay's pre-processing do this?

      // mtDNA to nuclear ratio
      run_mtnucratio = true
      mtnucratio_header = "MT"

      // Bam Trimming
      // ssDNA libraries are left untrimmed (pileupcaller deals with damage in those)
      // dsDNA half-udg are clipped 2bp on either side, while non-UDG are clipper 7bp 
      run_trim_bam                                 = true
      // Trim 2 bp on all udg-half libs, and 7 on all non-udg libs.
      // Since genotyping happens with GATK HC for IM results, no exception is made for ssDNA libs.
      bamutils_clip_single_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_single_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_single_stranded_none_udg_left  = 7    // Trim 7 bp of either side for half-UDG libraries.
      bamutils_clip_single_stranded_none_udg_right = 7    // Trim 7 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_left  = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_half_udg_right = 2    // Trim 2 bp of either side for half-UDG libraries.
      bamutils_clip_double_stranded_none_udg_left  = 7    // Trim 7 bp of either side for non-UDG libraries.
      bamutils_clip_double_stranded_none_udg_right = 7    // Trim 7 bp of either side for non-UDG libraries.

      // Damage Calculation
      damage_calculation_tool = 'mapdamage'
      mapdamage_downsample    = 100000 // Use 100k reads for damage calculation to lower runtime.

      // Genotyping
      genotyping_source   = 'trimmed'  // Use trimmed bams for genotyping
      run_genotyping      = true
      genotyping_tool     = 'hc'
      gatk_hc_out_mode    = 'EMIT_ALL_ACTIVE_SITES'
      gatk_hc_emitrefconf = 'GVCF'
      //gatk_dbsnp = null  // Decided not to add a dbSNP file for IM data, as it only provides some annotations.

      // BCF stats
      run_bcftools_stats = true

      //Sex determination
      run_sexdeterrmine = true

      // Nuclear contamination
      run_nuclear_contamination = true
      contamination_chrom_name  = 'X'

      //1240k Coverage/Depth calculation
      run_bedtools_coverage = true

      // Local paths (to overwrite the ones in local_paths profile)
      // Qualimap bedfile for on-target coverage calculation
      snpcapture_bed        = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/IM_capture_hs37d5_HLAremoved.bed'

      // SNP depth calculation
      anno_file             = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/IM_capture_hs37d5_HLAremoved.bed'
    }

    process {
      withName: genotyping_hc {
        memory = { task.attempt == 3 ? 48.GB : task.attempt == 2 ? 32.GB : 24.GB }
      }
    }
  }
}