params.outdir = 'results' params.genome_build = "" //* @dropdown @options:"human_hg19, mouse_mm10, mousetest_mm10, custom" params.run_ChIP_MACS2 = "yes" //* @dropdown @options:"yes","no" @show_settings:"ChIP_Prep","bedtools_coverage" params.run_IGV_TDF_Conversion = "no" //* @dropdown @options:"yes","no" @show_settings:"IGV_BAM2TDF_converter" params.run_RSeQC = "no" //* @dropdown @options:"yes","no" params.run_Picard_CollectMultipleMetrics = "no" //* @dropdown @options:"yes","no" params.run_BigWig_Conversion = "no" //* @dropdown @options:"yes","no" def _species; def _build; def _share; //* autofill _nucleicAcidType = "dna" params.nucleicAcidType = "dna" if (params.genome_build == "mousetest_mm10"){ _species = "mousetest" _build = "mm10" } else if (params.genome_build == "human_hg19"){ _species = "human" _build = "hg19" } else if (params.genome_build == "mouse_mm10"){ _species = "mouse" _build = "mm10" } if ($HOSTNAME == "default"){ _share = "/mnt/efs/share/genome_data" $SINGULARITY_IMAGE = "shub://UMMS-Biocore/singularitysc" $SINGULARITY_OPTIONS = "--bind /mnt" } //* platform if ($HOSTNAME == "garberwiki.umassmed.edu"){ _share = "/share/dolphin/genome_data" $SINGULARITY_IMAGE = "shub://UMMS-Biocore/singularitysc" $SINGULARITY_OPTIONS = "--bind /project --bind /share" } else if ($HOSTNAME == "ghpcc06.umassrc.org"){ _share = "/share/data/umw_biocore/genome_data" $SINGULARITY_IMAGE = "/project/umw_biocore/singularity/UMMS-Biocore-singularitysc-master-latest.simg" $SINGULARITY_OPTIONS = "--bind /project --bind /share --bind /nl" $TIME = 500 $CPU = 1 $MEMORY = 32 $QUEUE = "long" } //* platform if (params.genome_build && $HOSTNAME){ params.genomeDir ="${_share}/${_species}/${_build}/" params.genome ="${_share}/${_species}/${_build}/${_build}.fa" params.bed_file_genome ="${_share}/${_species}/${_build}/${_build}.bed" params.ref_flat ="${_share}/${_species}/${_build}/ref_flat" params.genomeSizePath ="${_share}/${_species}/${_build}/${_build}.chrom.sizes" params.gtfFilePath ="${_share}/${_species}/${_build}/ucsc.gtf" params.genomeIndexPath ="${_share}/${_species}/${_build}/${_build}" params.bowtieInd_rRNA = "${_share}/${_species}/${_build}/commondb/rRNA/rRNA" params.bowtieInd_ercc = "${_share}/${_species}/${_build}/commondb/ercc/ercc" params.bowtieInd_miRNA ="${_share}/${_species}/${_build}/commondb/miRNA/miRNA" params.bowtieInd_tRNA = "${_share}/${_species}/${_build}/commondb/tRNA/tRNA" params.bowtieInd_piRNA = "${_share}/${_species}/${_build}/commondb/piRNA/piRNA" params.bowtieInd_snRNA = "${_share}/${_species}/${_build}/commondb/snRNA/snRNA" params.bowtieInd_rmsk = "${_share}/${_species}/${_build}/commondb/rmsk/rmsk" } if ($HOSTNAME){ params.igvtools_path = "/usr/local/bin/dolphin-bin/IGVTools/igvtools.jar" params.picard_path = "/usr/local/bin/dolphin-bin/picard-tools-1.131/picard.jar" params.bedtools_path = "/usr/local/bin/dolphin-bin/bedtools2/bedtools" params.samtools_path = "/usr/local/bin/dolphin-bin/samtools-1.2/samtools" params.bowtie2_path = "/usr/local/bin/dolphin-bin/bowtie2" params.trimmomatic_path = "/usr/local/bin/dolphin-bin/trimmomatic-0.32.jar" params.fastx_trimmer_path = "/usr/local/bin/dolphin-bin/fastx_trimmer" params.peakrescore_path = "/usr/local/bin/dolphin-bin/scripture/peakrescore.jar" params.peakrescore_class_path = "/usr/local/bin/dolphin-bin/scripture" params.pdfbox_path = "/usr/local/bin/dolphin-bin/pdfbox-app-2.0.0-RC2.jar" params.genomeCoverageBed_path = "/usr/local/bin/dolphin-bin/genomeCoverageBed" params.wigToBigWig_path = "/usr/local/bin/dolphin-bin/wigToBigWig" } //* if (!params.reads){params.reads = ""} if (!params.mate){params.mate = ""} Channel .fromFilePairs( params.reads , size: (params.mate != "pair") ? 1 : 2 ) .ifEmpty { error "Cannot find any reads matching: ${params.reads}" } .into{g_1_reads_g51_0;g_1_reads_g51_3} Channel.value(params.mate).into{g_2_mate_g_43;g_2_mate_g_48;g_2_mate_g_68;g_2_mate_g51_0;g_2_mate_g51_3;g_2_mate_g51_1;g_2_mate_g51_2;g_2_mate_g53_82;g_2_mate_g53_95;g_2_mate_g53_111;g_2_mate_g67_3;g_2_mate_g67_10;g_2_mate_g69_19;g_2_mate_g69_20} params.run_FastQC = "no" //* @dropdown @options:"yes","no" if (params.run_FastQC == "no") { println "INFO: FastQC will be skipped"} process Adapter_Trimmer_Quality_Module_FastQC { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.(html|zip)$/) "fastqc/$filename" } input: val mate from g_2_mate_g51_3 set val(name), file(reads) from g_1_reads_g51_3 output: file '*.{html,zip}' into g51_3_FastQCout_g_79 errorStrategy 'retry' maxRetries 3 when: (params.run_FastQC && (params.run_FastQC == "yes")) script: nameAll = reads.toString() if (nameAll.contains('.gz')) { file = nameAll - '.gz' - '.gz' runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { file = nameAll runGzip = '' } """ ${runGzip} fastqc ${file} """ } params.run_Adapter_Removal = "no" //* @dropdown @options:"yes","no" @show_settings:"Adapter_Removal" params.trimmomatic_path = "" //* @input Adapter_Sequence = "" //* @textbox @description:"Removes 3' Adapter Sequences. You can enter a single sequence or multiple sequences in different lines. Reverse sequences will not be removed." @tooltip:"Trimmomatic is used for adapter removal" if (!((params.run_Adapter_Removal && (params.run_Adapter_Removal == "yes")) || !params.run_Adapter_Removal)){ g_1_reads_g51_0.into{g51_0_reads_g51_1} } else { process Adapter_Trimmer_Quality_Module_Adapter_Removal { input: set val(name), file(reads) from g_1_reads_g51_0 val mate from g_2_mate_g51_0 output: set val(name), file("reads/*") into g51_0_reads_g51_1 when: (params.run_Adapter_Removal && (params.run_Adapter_Removal == "yes")) || !params.run_Adapter_Removal shell: nameAll = reads.toString() nameArray = nameAll.split(' ') file2 = "" if (nameAll.contains('.gz')) { newName = nameArray[0] - ~/(\.fastq.gz)?(\.fq.gz)?$/ file1 = nameArray[0] - '.gz' if (mate == "pair") {file2 = nameArray[1] - '.gz'} runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { newName = nameArray[0] - ~/(\.fastq)?(\.fq)?$/ file1 = nameArray[0] if (mate == "pair") {file2 = nameArray[1]} runGzip = '' } ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; system("mkdir reads adapter unpaired"); open(OUT, ">adapter/adapter.fa"); my @adaps=split(/\n/,"!{Adapter_Sequence}"); my $i=1; foreach my $adap (@adaps) { print OUT ">adapter$i\\n$adap\\n"; $i++; } close(OUT); my $cmd = ""; system("!{runGzip}"); $cmd="java -jar !{params.trimmomatic_path}"; if ("!{mate}" eq "pair") { system("$cmd PE -threads 1 -phred64 -trimlog !{name}.log !{file1} !{file2} reads/!{name}.1.fastq unpaired/!{name}.1.fastq.unpaired reads/!{name}.2.fastq unpaired/!{name}.1.fastq.unpaired ILLUMINACLIP:adapter/adapter.fa:1:30:5 MINLEN:20"); } else { system("$cmd SE -threads 1 -phred64 -trimlog !{name}.log !{file1} reads/!{name}.fastq ILLUMINACLIP:adapter/adapter.fa:1:30:5 MINLEN:15"); } ''' } } params.run_Trimmer = "no" //* @dropdown @options:"yes","no" @show_settings:"Trimmer" if (params.run_Trimmer != "yes") {println "INFO: Trimmer will be skipped"} params.fastx_trimmer_path = "" //* @input single_or_paired_end_reads = "" //* @dropdown @options:"single","pair" trim_length_5prime = 0 //* @input @description:"Trimming length from 5' end" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" trim_length_3prime = 0 //* @input @description:"Trimming length from 3' end" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" trim_length_5prime_R1 = 0 //* @input @description:"Trimming length from 5' end of R1 reads" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" trim_length_3prime_R1 = 0 //* @input @description:"Trimming length from 3' end of R1 reads" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" trim_length_5prime_R2 = 0 //* @input @description:"Trimming length from 5' end of R2 reads" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" trim_length_3prime_R2 = 0 //* @input @description:"Trimming length from 3' end of R2 reads" @tooltip:"Fastx trimmer (Version: 0.0.13) is used for trimming" //* @style @multicolumn:{trim_length_5prime,trim_length_3prime}, {trim_length_5prime_R1,trim_length_3prime_R1}, {trim_length_5prime_R2,trim_length_3prime_R2} @condition:{single_or_paired_end_reads="single", trim_length_5prime,trim_length_3prime}, {single_or_paired_end_reads="pair", trim_length_5prime_R1,trim_length_3prime_R1,trim_length_5prime_R2,trim_length_3prime_R2} //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill if (!((params.run_Trimmer && (params.run_Trimmer == "yes")) || !params.run_Trimmer)){ g51_0_reads_g51_1.into{g51_1_reads_g51_2} } else { process Adapter_Trimmer_Quality_Module_Trimmer { input: set val(name), file(reads) from g51_0_reads_g51_1 val mate from g_2_mate_g51_1 output: set val(name), file("reads/*") into g51_1_reads_g51_2 when: (params.run_Trimmer && (params.run_Trimmer == "yes")) || !params.run_Trimmer shell: nameAll = reads.toString() nameArray = nameAll.split(' ') file2 = "" if (nameAll.contains('.gz')) { newName = nameArray[0] - ~/(\.fastq.gz)?(\.fq.gz)?$/ file1 = nameArray[0] - '.gz' if (mate == "pair") {file2 = nameArray[1] - '.gz'} runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { newName = nameArray[0] - ~/(\.fastq)?(\.fq)?$/ file1 = nameArray[0] if (mate == "pair") {file2 = nameArray[1]} runGzip = '' } ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; system("mkdir reads"); system("!{runGzip}"); if ("!{mate}" eq "pair") { my $file1 = "!{file1}"; my $file2 = "!{file2}"; my $trim1 = "!{trim_length_5prime_R1}:!{trim_length_3prime_R1}"; my $trim2 = "!{trim_length_5prime_R2}:!{trim_length_3prime_R2}"; my ($format, $len)=getFormat($file1); print "length of $file1: $len\\n"; trimFiles($file1, $trim1, $format, $len); my ($format, $len)=getFormat($file2); print "length of $file2: $len\\n"; trimFiles($file2, $trim2, $format, $len); } else { my $file1 = "!{file1}"; my $trim1 = "!{trim_length_5prime}:!{trim_length_3prime}"; my ($format, $len)=getFormat($file1); print "length of file1 $len\\n"; trimFiles($file1, $trim1, $format, $len); } sub trimFiles { my ($file, $trim, $format, $len)=@_; my $cmd="!{params.fastx_trimmer_path}"; my @nts=split(/[,:\\s\\t]+/,$trim); my $inpfile=""; my $com=""; my $i=1; my $outfile=""; my $param=""; my $quality=""; #if ($format eq "sanger") #{ $quality="-Q33"; #} if (scalar(@nts)==2) { $param = "-f ".($nts[0]+1) if (exists($nts[0]) && $nts[0] >= 0 ); $param .= " -l ".($len-$nts[1]) if (exists($nts[0]) && $nts[1] > 0 ); print "parameters for $file: $param \\n "; $outfile="reads/$file"; $com="$cmd $quality -v $param -o $outfile -i $file " if ((exists($nts[0]) && $nts[0] > 0) || (exists($nts[0]) && $nts[1] > 0 )); print "$com\\n"; if ($com eq ""){ print "trimmer skipped for $file \\n"; system("mv $file reads/."); } else { system("$com"); print "trimmer executed for $file \\n"; } } } # automatic format detection sub getFormat { my ($filename)=@_; # set function variables open (IN, $filename); my $j=1; my $qualities=""; my $len=0; while(my $line=) { chomp($line); if ($j >50) { last;} if ($j%4==0) { $qualities.=$line; $len=length($line); } $j++; } close(IN); my $format = ""; # set regular expressions my $sanger_regexp = qr/[!"#$%&'()*+,-.\\/0123456789:]/; my $solexa_regexp = qr/[\\;<=>\\?]/; my $solill_regexp = qr/[JKLMNOPQRSTUVWXYZ\\[\\]\\^\\_\\`abcdefgh]/; my $all_regexp = qr/[\\@ABCDEFGHI]/; # set counters my $sanger_counter = 0; my $solexa_counter = 0; my $solill_counter = 0; # check qualities if( $qualities =~ m/$sanger_regexp/ ){ $sanger_counter = 1; } if( $qualities =~ m/$solexa_regexp/ ){ $solexa_counter = 1; } if( $qualities =~ m/$solill_regexp/ ){ $solill_counter = 1; } # determine format if( $sanger_counter ){ $format = "sanger"; }elsif($solexa_counter ){ $format = "solexa"; }elsif($solill_counter ){ $format = "illumina"; } # return file format return( $format,$len ); } ''' } } params.run_Quality_Filtering = "no" //* @dropdown @options:"yes","no" @show_settings:"Quality_Filtering" if (params.run_Quality_Filtering != "yes") {println "INFO: Quality_Filtering will be skipped"} params.trimmomatic_path = "" //* @input window_size = 10 //* @input @description:"Performs a sliding window trimming approach. It starts scanning at the 5' end and clips the read once the average quality within the window falls below a threshold (=required_quality). " @tooltip:"Trimmomatic 0.32 is used for quality filtering" required_quality_for_window_trimming = 15 //* @input @description:"specifies the average quality required for window trimming approach" @tooltip:"Trimmomatic 0.32 is used for quality filtering" leading = 5 //* @input @description:"Cut bases off the start of a read, if below a threshold quality" @tooltip:"Trimmomatic 0.32 is used for quality filtering" trailing = 5 //* @input @description:"Cut bases off the end of a read, if below a threshold quality" @tooltip:"Trimmomatic 0.32 is used for quality filtering" minlen = 36 //* @input @description:"Specifies the minimum length of reads to be kept" @tooltip:"Trimmomatic 0.32 is used for quality filtering" //* @style @multicolumn:{window_size,required_quality}, {leading,trailing,minlen} //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill if (!((params.run_Quality_Filtering && (params.run_Quality_Filtering == "yes")) || !params.run_Quality_Filtering)){ g51_1_reads_g51_2.into{g51_2_reads_g69_19} } else { process Adapter_Trimmer_Quality_Module_Quality_Filtering { input: set val(name), file(reads) from g51_1_reads_g51_2 val mate from g_2_mate_g51_2 output: set val(name), file("reads/*") into g51_2_reads_g69_19 when: (params.run_Quality_Filtering && (params.run_Quality_Filtering == "yes")) || !params.run_Quality_Filtering shell: nameAll = reads.toString() nameArray = nameAll.split(' ') file2 =""; if (nameAll.contains('.gz')) { newName = nameArray[0] - ~/(\.fastq.gz)?(\.fq.gz)?$/ file1 = nameArray[0] - '.gz' if (mate == "pair") {file2 = nameArray[1] - '.gz'} runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { newName = nameArray[0] - ~/(\.fastq)?(\.fq)?$/ file1 = nameArray[0] if (mate == "pair") {file2 = nameArray[1]} runGzip = '' } ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; system("mkdir reads unpaired publish"); if ("!{params.run_Quality_Filtering}" eq "yes") { system("!{runGzip}"); my $param = "SLIDINGWINDOW:"."!{window_size}".":"."!{required_quality_for_window_trimming}"; $param.=" LEADING:"."!{leading}"; $param.=" TRAILING:"."!{trailing}"; $param.=" MINLEN:"."!{minlen}"; my $format=getFormat("!{file1}"); my $quality=""; if ($format eq "sanger") { $quality="-phred33"; } elsif ($format eq "illumina") { $quality="-phred64"; } else { $quality="-phred33"; } print($quality); my $cmd="java -jar !{params.trimmomatic_path}"; if ("!{mate}" eq "pair") { system("$cmd PE -threads 1 $quality -trimlog !{name}.log !{file1} !{file2} reads/!{name}.1.fastq unpaired/!{name}.1.fastq.unpaired reads/!{name}.2.fastq unpaired/!{name}.1.fastq.unpaired $param"); } else { system("$cmd SE -threads 1 $quality -trimlog !{name}.log !{file1} reads/!{name}.fastq $param"); } system("cd publish && ln -s ./../reads/* ."); } else { system("mv !{reads} reads/."); system("touch publish/Quality_Filtering.skipped"); } # automatic format detection sub getFormat { my ($filename)=@_; # set function variables open (IN, $filename); my $j=1; my $qualities=""; while(my $line=) { if ($j >50) { last;} if ($j%4==0) { $qualities.=$line; } $j++; } close(IN); my $format = ""; # set regular expressions my $sanger_regexp = qr/[!"#$%&'()*+,-.\\/0123456789:]/; my $solexa_regexp = qr/[\\;<=>\\?]/; my $solill_regexp = qr/[JKLMNOPQRSTUVWXYZ\\[\\]\\^\\_\\`abcdefgh]/; my $all_regexp = qr/[\\@ABCDEFGHI]/; # set counters my $sanger_counter = 0; my $solexa_counter = 0; my $solill_counter = 0; # check qualities if( $qualities =~ m/$sanger_regexp/ ){ $sanger_counter = 1; } if( $qualities =~ m/$solexa_regexp/ ){ $solexa_counter = 1; } if( $qualities =~ m/$solill_regexp/ ){ $solill_counter = 1; } # determine format if( $sanger_counter ){ $format = "sanger"; }elsif($solexa_counter ){ $format = "solexa"; }elsif($solill_counter ){ $format = "illumina"; } # return file format return( $format ); } ''' } } params.bowtie2_path = "" //* @input params.samtools_path = "" //* @input params.bowtieInd_rRNA = "" //* @input params.bowtieInd_ercc = "" //* @input params.bowtieInd_miRNA = "" //* @input params.bowtieInd_tRNA = "" //* @input params.bowtieInd_piRNA = "" //* @input params.bowtieInd_snRNA = "" //* @input params.bowtieInd_rmsk = "" //* @input params.run_Sequential_Mapping = "no" //* @dropdown @options:"yes","no" @show_settings:"Sequential_Mapping" bowtieIndexes = [rRNA: params.bowtieInd_rRNA, ercc: params.bowtieInd_ercc, miRNA: params.bowtieInd_miRNA, tRNA: params.bowtieInd_tRNA, piRNA: params.bowtieInd_piRNA, snRNA: params.bowtieInd_snRNA, rmsk: params.bowtieInd_rmsk] //_nucleicAcidType="dna" should be defined in the autofill section of pipeline header in case dna is used. _select_sequence = "" //* @dropdown @description:"Select sequence for mapping" @title:"Sequence Set for Mapping" @options:{"rRNA","ercc","miRNA","tRNA","piRNA","snRNA","rmsk","custom"},{_nucleicAcidType="dna","ercc","rmsk","custom"} index_directory = "" //* @input @description:"index directory of sequence(full path)" @tooltip:"In order to map your reads to a custom sequence, you first must create an index file and that fasta must be in the same folder. Please remove all spacing from the naming of sequences within your fasta file in order for our pipeline to properly prepare quantification tables. The index directory must include the full path and the name of the index file must only be the prefix of the fasta. Index files and Fasta files also need to have the same prefix." name_of_the_index_file = "" //* @input @autofill:{_select_sequence=("rRNA","ercc","miRNA","tRNA","piRNA","snRNA","rmsk"), _select_sequence},{_select_sequence="custom", " "} @description:"Name of the index file (prefix)" @tooltip:"In order to map your reads to a custom sequence, you first must create an index file and that fasta must be in the same folder. Please remove all spacing from the naming of sequences within your fasta file in order for our pipeline to properly prepare quantification tables. The index directory must include the full path and the name of the index file must only be the prefix of the fasta. Index files and Fasta files also need to have the same prefix." bowtie_Parameters = "-N 1" //* @input @description:"Bowtie2 parameters." description = "" //* @input @autofill:{_select_sequence=("rRNA","ercc","miRNA","tRNA","piRNA","snRNA","rmsk"), _select_sequence},{_select_sequence="custom", " "} @description:"Description of index file (please don't use comma or quotes in this field" filter_Out = "Yes" //* @dropdown @dropdown @options:"Yes","No" @description:"Select whether or not you want the reads mapped to this index filtered out of your total reads." desc_all=[] description.eachWithIndex() {param,i -> if (param.isEmpty()){ desc_all[i] = name_of_the_index_file[i] } else { desc_all[i] = param.replaceAll("[ |.|;]", "_") } } custom_index=[] index_directory.eachWithIndex() {param,i -> if (_select_sequence[i] != "custom"){ custom_index[i] = bowtieIndexes[_select_sequence[i]] } else { custom_index[i] = param+"/"+name_of_the_index_file[i] } } mapList = [] paramList = [] filterList = [] indexList = [] //concat default mapping and custom mapping mapList = (desc_all) paramList = (bowtie_Parameters) filterList = (filter_Out) indexList = (custom_index) mappingList = mapList.join(" ") // convert into space separated format in order to use in bash for loop paramsList = paramList.join(",") // convert into comma separated format in order to use in as array in bash filtersList = filterList.join(",") // convert into comma separated format in order to use in as array in bash indexesList = indexList.join(",") // convert into comma separated format in order to use in as array in bash //* @style @condition:{_select_sequence="custom", index_directory,name_of_the_index_file,description,bowtie_Parameters,filter_Out},{_select_sequence=("rRNA","ercc","miRNA","tRNA","piRNA","snRNA","rmsk"),bowtie_Parameters,filter_Out} @array:{_select_sequence,_select_sequence, index_directory,name_of_the_index_file,bowtie_Parameters,filter_Out,description} @multicolumn:{_select_sequence,_select_sequence,index_directory,name_of_the_index_file,bowtie_Parameters,filter_Out, description} //* autofill if ($HOSTNAME == "default"){ $CPU = 1 $MEMORY = 20 } //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 2500 $CPU = 1 $MEMORY = 20 $QUEUE = "long" } //* platform //* autofill if (!(params.run_Sequential_Mapping == "yes")){ g51_2_reads_g69_19.into{g69_19_reads_g_68} g69_19_bowfiles_g69_20 = Channel.empty() g69_19_bowfiles_g_79 = Channel.empty() g69_19_bam_file_g69_23 = Channel.empty() g69_19_bam_index_g69_23 = Channel.empty() g69_19_filter_g69_20 = Channel.empty() } else { process Sequential_Mapping_Module_Sequential_Mapping { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /bowfiles\/.*$/) "sequential_mapping/$filename" else if (filename =~ /.*\/.*sorted.bam$/) "sequential_mapping/$filename" else if (filename =~ /.*\/.*sorted.bam.bai$/) "sequential_mapping/$filename" } input: set val(name), file(reads) from g51_2_reads_g69_19 val mate from g_2_mate_g69_19 output: set val(name), file("final_reads/*") into g69_19_reads_g_68 set val(name), file("bowfiles/*") optional true into g69_19_bowfiles_g69_20, g69_19_bowfiles_g_79 file "*/*sorted.bam" optional true into g69_19_bam_file_g69_23 file "*/*sorted.bam.bai" optional true into g69_19_bam_index_g69_23 val filtersList into g69_19_filter_g69_20 when: params.run_Sequential_Mapping == "yes" script: nameAll = reads.toString() nameArray = nameAll.split(' ') def file2; if (nameAll.contains('.gz')) { newName = nameArray[0] - ~/(\.fastq.gz)?(\.fq.gz)?$/ file1 = nameArray[0] - '.gz' if (mate == "pair") {file2 = nameArray[1] - '.gz'} runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { newName = nameArray[0] - ~/(\.fastq)?(\.fq)?$/ file1 = nameArray[0] if (mate == "pair") {file2 = nameArray[1]} runGzip = '' } """ #!/bin/bash mkdir reads final_reads bowfiles if [ -n "${mappingList}" ]; then $runGzip #rename files to standart format if [ "${mate}" == "pair" ]; then mv $file1 ${name}.1.fastq 2>/dev/null mv $file2 ${name}.2.fastq 2>/dev/null mv ${name}.1.fastq ${name}.2.fastq reads/. else mv $file1 ${name}.fastq 2>/dev/null mv ${name}.fastq reads/. fi #sequential mapping k=0 prev="reads" IFS=',' read -r -a paramsListAr <<< "${paramsList}" #create comma separated array IFS=',' read -r -a filtersListAr <<< "${filtersList}" IFS=',' read -r -a indexesListAr <<< "${indexesList}" wrkDir=\$(pwd) for rna_set in ${mappingList} do ((k++)) printf -v k2 "%02d" "\$k" #turn into two digit format mkdir -p \${rna_set}/unmapped cd \$rna_set if [ "\${filtersListAr[\$k-1]}" == "Yes" ]; then ln -s \${wrkDir}/\${prev}/* . prev=\${rna_set}/unmapped else ln -s \${wrkDir}/\${prev}/* . fi if [ -e "\${indexesListAr[\$k-1]}.1.bt2" -o -e "\${indexesListAr[\$k-1]}.fa" -o -e "\${indexesListAr[\$k-1]}.fasta" ]; then if [ -e "\${indexesListAr[\$k-1]}.fa" ] ; then fasta=\${indexesListAr[\$k-1]}.fa elif [ -e "\${indexesListAr[\$k-1]}.fasta" ] ; then fasta=\${indexesListAr[\$k-1]}.fasta fi if [ -e "\${indexesListAr[\$k-1]}.1.bt2" ] ; then echo "\${indexesListAr[\$k-1]}.1.bt2 index found." elif [ -e "\${indexesListAr[\$k-1]}.fa" ] ; then ${params.bowtie2_path}-build \${indexesListAr[\$k-1]}.fa \${indexesListAr[\$k-1]} elif [ -e "\${indexesListAr[\$k-1]}.fasta" ] ; then ${params.bowtie2_path}-build \${indexesListAr[\$k-1]}.fasta \${indexesListAr[\$k-1]} fi if [ "${mate}" == "pair" ]; then ${params.bowtie2_path} \${paramsListAr[\$k-1]} -x \${indexesListAr[\$k-1]} --no-unal --un-conc unmapped/${name}.unmapped.fastq -1 ${name}.1.fastq -2 ${name}.2.fastq --al-conc ${name}.fq.mapped -S \${rna_set}_${name}_alignment.sam > \${k2}_${name}.bow_\${rna_set} 2>&1 else ${params.bowtie2_path} \${paramsListAr[\$k-1]} -x \${indexesListAr[\$k-1]} --no-unal --un unmapped/${name}.unmapped.fastq -U ${name}.fastq --al ${name}.fq.mapped -S \${rna_set}_${name}_alignment.sam > \${k2}_${name}.bow_\${rna_set} 2>&1 fi ${params.samtools_path} view -bT \$fasta \${rna_set}_${name}_alignment.sam > \${rna_set}_${name}_alignment.bam if [ "${mate}" == "pair" ]; then mv \${rna_set}_${name}_alignment.bam \${rna_set}_${name}_alignment.tmp1.bam ${params.samtools_path} sort -n \${rna_set}_${name}_alignment.tmp1.bam \${rna_set}_${name}_alignment.tmp2 ${params.samtools_path} view -bf 0x02 \${rna_set}_${name}_alignment.tmp2.bam >\${rna_set}_${name}_alignment.bam fi ${params.samtools_path} sort \${rna_set}_${name}_alignment.bam \${rna_set}@${name}_sorted ${params.samtools_path} index \${rna_set}@${name}_sorted.bam for file in unmapped/*; do mv \$file \${file/.unmapped/}; done ##remove .unmapped from filename grep -v Warning \${k2}_${name}.bow_\${rna_set} > ${name}.tmp mv ${name}.tmp \${k2}_${name}.bow_\${rna_set} cp \${k2}_${name}.bow_\${rna_set} ./../bowfiles/. cd .. else cd unmapped ln -s \${wrkDir}/\${rna_set}/*fastq . cd .. cd .. fi done cd final_reads && ln -s \${wrkDir}/\${prev}/* . else mv ${reads} final_reads/. fi """ } } mappingListQuoteSep = mapList.collect{ '"' + it + '"'}.join(",") rawIndexList = indexList.collect{ '"' + it + '"'}.join(",") process Sequential_Mapping_Module_Sequential_Mapping_Bam_count { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.counts.tsv$/) "sequential_mapping_counts/$filename" } input: file bam from g69_19_bam_file_g69_23.collect() file index from g69_19_bam_index_g69_23.collect() output: file "*.counts.tsv" into g69_23_outputFileTSV shell: ''' #!/usr/bin/env perl use List::Util qw[min max]; use File::Basename; use Getopt::Long; use Pod::Usage; use Data::Dumper; my @header; my %all_files; my @mappingList = (!{mappingListQuoteSep}); my @rawIndexList = (!{rawIndexList}); my %indexHash; my $dedup = ""; @indexHash{@mappingList} = @rawIndexList; chomp(my $contents = `ls *.bam`); my @files = split(/[\\n]+/, $contents); foreach my $file (@files){ $file=~/(.*)@(.*)_sorted(.*)\\.bam/; my $mapper = $1; my $name = $2; ##header print $3; if ($3 eq ".dedup"){ $dedup = "dedup."; } push(@header, $name) unless grep{$_ eq $name} @header; #mapped element header push @{$all_files{$mapper}}, $file; } open OUT, ">header.tsv"; print OUT join ("\\t", "id","len",@header),"\\n"; close OUT; foreach my $key (sort keys %all_files) { my @array = @{ $all_files{$key} }; unless (-e "$indexHash{$key}.bed") { print "2: bed not found run makeBed\\n"; if (-e "$indexHash{$key}.fa") { makeBed("$indexHash{$key}.fa", $key, "$indexHash{$key}.bed"); } elsif(-e "$indexHash{$key}.fasta"){ makeBed("$indexHash{$key}.fasta", $key, "$indexHash{$key}.bed"); } } my $bamFiles = join ' ', @array; print "bedtools multicov -bams $bamFiles -bed $indexHash{$key}.bed >$key.${dedup}counts.tmp\\n"; `bedtools multicov -bams $bamFiles -bed $indexHash{$key}.bed >$key.${dedup}counts.tmp`; my $iniResColumn = int(countColumn("$indexHash{$key}.bed")) + 1; `awk -F \\"\\\\t\\" \\'{a=\\"\\";for (i=$iniResColumn;i<=NF;i++){a=a\\"\\\\t\\"\\$i;} print \\$4\\"\\\\t\\"(\\$3-\\$2)\\"\\"a}\\' $key.${dedup}counts.tmp> $key.${dedup}counts.tsv`; `sort -k3,3nr $key.${dedup}counts.tsv>$key.${dedup}sorted.tsv`; `cat header.tsv $key.${dedup}sorted.tsv> $key.${dedup}counts.tsv`; } sub countColumn { my ( \$file) = @_; open(IN, \$file); my $line=; chomp($line); my @cols = split('\\t', $line); my $n = @cols; close OUT; return $n; } sub makeBed { my ( \$fasta, \$type, \$bed) = @_; print "makeBed $fasta\\n"; print "makeBed $bed\\n"; open OUT, ">$bed"; open(IN, \$fasta); my $name=""; my $seq=""; my $i=0; while(my $line=){ chomp($line); if($line=~/^>(.*)/){ $i++ if (length($seq)>0); print OUT "$name\\t1\\t".length($seq)."\\t$name\\t0\\t+\\n" if (length($seq)>0); $name="$1"; $seq=""; } elsif($line=~/[ACGTNacgtn]+/){ $seq.=$line; } } print OUT "$name\\t1\\t".length($seq)."\\t$name\\t0\\t+\\n" if (length($seq)>0); close OUT; } ''' } //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill process Sequential_Mapping_Module_parseBow { input: set val(name), file(bowfile) from g69_19_bowfiles_g69_20 val mate from g_2_mate_g69_20 val filtersList from g69_19_filter_g69_20 output: file '*.tsv' into g69_20_outputFileTSV_g69_13 val "sequential_mapping_sum" into g69_20_name_g69_13 shell: ''' #!/usr/bin/env perl open(my \$fh, '>', "!{name}.tsv"); print $fh "Sample\\tGroup\\tTotal Reads\\tReads After Filtering\\tUniquely Mapped\\tMultimapped\\tMapped\\n"; my @bowArray = split(' ', "!{bowfile}"); my $group= "\\t"; my @filterArray = (!{filtersList}); foreach my $bowitem(@bowArray) { # get mapping id my @bowAr = $bowitem.split("_"); $bowCount = $bowAr[0] + -1; # if bowfiles ends with underscore (eg. bow_rRNA), parse rRNA as a group. if ($bowitem =~ m/bow_([^\\.]+)$/) { $group = "$1\\t"; } open(IN, $bowitem); my $i = 0; my ($RDS_T, $RDS_P, $RDS_C1, $RDS_C2, $ALGN_T, $a, $b, $aPer, $bPer)=(0, 0, 0, 0, 0, 0, 0, 0, 0); while(my $line=) { chomp($line); $line=~s/^ +//; my @arr=split(/ /, $line); $RDS_T=$arr[0] if ($i=~/^1$/); if ($i == 2){ if ($filterArray[$bowCount] eq "Yes"){ $RDS_P=$arr[0]; } else { $RDS_P=$RDS_T; } } # Reads After Filtering column depends on filtering type if ($i == 3) { $a=$arr[0]; $aPer=$arr[1]; $aPer=~ s/([()])//g; $RDS_C1=$arr[0]; } if ($i == 4) { $b=$arr[0]; $bPer=$arr[1]; $bPer=~ s/([()])//g; $RDS_C2=$arr[0]; } $ALGN_T=($a+$b) if (($i == 5 && "!{mate}" ne "pair" ) || ($i == 13 && "!{mate}" eq "pair" )) ; $i++; } close(IN); print $fh "!{name}\\t$group$RDS_T\\t$RDS_P\\t$RDS_C1\\t$RDS_C2\\t$ALGN_T\\n"; } close($fh); ''' } params.run_Split_Fastq = "no" //* @dropdown @options:"yes","no" @show_settings:"SplitFastq" readsPerFile = 5000000 //* @input @description:"The number of reads per file" //Since splitFastq operator requires flat file structure, first convert grouped structure to flat, execute splitFastq, and then return back to original grouped structure //.map(flatPairsClosure).splitFastq(splitFastqParams).map(groupPairsClosure) //Mapping grouped read structure to flat structure flatPairsClosure = {row -> if(row[1] instanceof Collection) { if (row[1][1]){ tuple(row[0], file(row[1][0]), file(row[1][1])) } else { tuple(row[0], file(row[1][0])) } } else { tuple(row[0], file(row[1])) } } //Mapping flat read structure to grouped read structure groupPairsClosure = {row -> tuple(row[0], (row[2]) ? [file(row[1]), file(row[2])] : [file(row[1])])} // if mate of split process different than rest of the pipeline, use "mate_split" as input parameter. Otherwise use default "mate" as input parameter mateParamName = (params.mate_split) ? "mate_split" : "mate" splitFastqParams = "" if (params[mateParamName] != "pair"){ splitFastqParams = [by: readsPerFile, file:true] }else { splitFastqParams = [by: readsPerFile, pe:true, file:true] } //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill if (!(params.run_Split_Fastq == "yes")){ g69_19_reads_g_68.into{g_68_reads_g67_3} } else { process SplitFastq { input: val mate from g_2_mate_g_68 set val(name), file(reads) from g69_19_reads_g_68.map(flatPairsClosure).splitFastq(splitFastqParams).map(groupPairsClosure) output: set val(name), file("split/*") into g_68_reads_g67_3 when: params.run_Split_Fastq == "yes" script: """ mkdir -p split mv ${reads} split/. """ } } params.bowtie2_path = "" //* @input params.genomeIndexPath = "" //* @input Map_Bowtie2_parameters = "" //* @input @description:"Bowtie2 parameters" //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 2000 $CPU = 1 $MEMORY = 32 $QUEUE = "long" } //* platform //* autofill process Bowtie2_Module_Map_Bowtie2 { input: set val(name), file(reads) from g_68_reads_g67_3 val mate from g_2_mate_g67_3 output: set val(name), file("${newName}.mapped*.fastq") into g67_3_mapped_fastq set val(name), file("${newName}.bow") into g67_3_bowfiles_g67_10, g67_3_bowfiles_g_79 set val(name), file("${newName}.unmap*.fastq") into g67_3_unmapped_fastq set val(name), file("${newName}_alignment.bam") into g67_3_bam_file_g67_4 when: (params.run_Bowtie2 && (params.run_Bowtie2 == "yes")) || !params.run_Bowtie2 script: nameAll = reads.toString() nameArray = nameAll.split(' ') file2 = ""; if (nameAll.contains('.gz')) { newName = nameArray[0] - ~/(\.fastq.gz)?(\.fq.gz)?$/ file1 = nameArray[0] - '.gz' if (mate == "pair") {file2 = nameArray[1] - '.gz'} runGzip = "ls *.gz | xargs -i echo gzip -df {} | sh" } else { newName = nameArray[0] - ~/(\.fastq)?(\.fq)?$/ file1 = nameArray[0] if (mate == "pair") {file2 = nameArray[1]} runGzip = '' } """ if [ "${mate}" == "pair" ]; then ${params.bowtie2_path} -x ${params.genomeIndexPath} ${Map_Bowtie2_parameters} --no-unal --un-conc ${newName}.unmapped.fastq -1 ${file1} -2 ${file2} --al-conc ${newName}.mapped.fastq -S ${newName}_alignment.sam > ${newName}.bow 2>&1 else ${params.bowtie2_path} -x ${params.genomeIndexPath} ${Map_Bowtie2_parameters} --un ${newName}.unmapped.fastq -U ${file1} --al ${newName}.mapped.fastq -S ${newName}_alignment.sam > ${newName}.bow 2>&1 fi grep -v Warning ${newName}.bow > ${newName}.tmp mv ${newName}.tmp ${newName}.bow samtools view -bS ${newName}_alignment.sam > ${newName}_alignment.bam """ } //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill process Bowtie2_Module_Bowtie_Summary { input: set val(name), file(bowfile) from g67_3_bowfiles_g67_10.groupTuple() val mate from g_2_mate_g67_10 output: file '*.tsv' into g67_10_outputFileTSV_g67_11 val "bowtie_sum" into g67_10_name_g67_11 shell: ''' #!/usr/bin/env perl open(my \$fh, '>', "!{name}.tsv"); print $fh "Sample\\tTotal Reads\\tUnique Reads Aligned (Bowtie2)\\tMultimapped Reads Aligned (Bowtie2)\\n"; my @bowArray = split(' ', "!{bowfile}"); my ($RDS_T, $RDS_C1, $RDS_C2)=(0, 0, 0); foreach my $bowitem(@bowArray) { # get mapping id open(IN, $bowitem); my $i = 0; while(my $line=) { chomp($line); $line=~s/^ +//; my @arr=split(/ /, $line); $RDS_T+=$arr[0] if ($i=~/^1$/); if ($i == 3){ $RDS_C1+=$arr[0]; } if ($i == 4){ $RDS_C2+=$arr[0]; } $i++; } close(IN); } print $fh "!{name}\\t$RDS_T\\t$RDS_C1\\t$RDS_C2\\n"; close($fh); ''' } process Bowtie2_Module_Merge_TSV_Files { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /${name}.tsv$/) "bowtie_summary/$filename" } input: file tsv from g67_10_outputFileTSV_g67_11.collect() val outputFileName from g67_10_name_g67_11.collect() output: file "${name}.tsv" into g67_11_outputFileTSV_g_75 script: name = outputFileName[0] """ awk 'FNR==1 && NR!=1 { getline; } 1 {print} ' *.tsv > ${name}.tsv """ } params.samtools_path = "" //* @input //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 8 $QUEUE = "short" } //* platform //* autofill process Bowtie2_Module_Merge_Bam { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*_sorted.*bam$/) "bowtie2/$filename" } input: set val(oldname), file(bamfiles) from g67_3_bam_file_g67_4.groupTuple() output: set val(oldname), file("${oldname}.bam") into g67_4_merged_bams set val(oldname), file("*_sorted*bai") into g67_4_bam_index set val(oldname), file("*_sorted*bam") into g67_4_sorted_bam_g_64 shell: ''' num=$(echo "!{bamfiles.join(" ")}" | awk -F" " '{print NF-1}') if [ "${num}" -gt 0 ]; then !{params.samtools_path} merge !{oldname}.bam !{bamfiles.join(" ")} && !{params.samtools_path} sort -O bam -T !{oldname} -o !{oldname}_sorted.bam !{oldname}.bam && !{params.samtools_path} index !{oldname}_sorted.bam else mv !{bamfiles.join(" ")} !{oldname}.bam 2>/dev/null || true !{params.samtools_path} sort -T !{oldname} -O bam -o !{oldname}_sorted.bam !{oldname}.bam && !{params.samtools_path} index !{oldname}_sorted.bam fi ''' } params.run_Remove_Multimappers = "no" //* @dropdown @options:"yes","no" @show_settings:"Remove_Multimappers" params.samtools_path = "" //* @input MAPQ_quality = "4" //* @input @description:"It removes reads with MAPQ quality less than entered value (4 by default)" if (!(params.run_Remove_Multimappers == "yes")){ g67_4_sorted_bam_g_64.into{g_64_mapped_reads_g_9} } else { process Remove_Multimappers { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /bam\/${name}.bam$/) "unique_bams/$filename" } input: set val(name), file(bam) from g67_4_sorted_bam_g_64 output: set val(name), file("bam/${name}.bam") into g_64_mapped_reads_g_9 when: params.run_Remove_Multimappers == "yes" script: """ mkdir bam head ${bam} ${params.samtools_path} view -hb -q ${MAPQ_quality} ${bam} > ${name}_unique.bam mv ${name}_unique.bam bam/${name}.bam """ } } params.picard_path = "" //* @input params.run_Picard_MarkDuplicates = "yes" //* @dropdown @options:"yes","no" //* autofill if ($HOSTNAME == "default"){ $CPU = 1 $MEMORY = 32 } //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 32 $QUEUE = "short" } //* platform //* autofill if (!((params.run_Picard_MarkDuplicates && (params.run_Picard_MarkDuplicates == "yes")) || !params.run_Picard_MarkDuplicates)){ g_64_mapped_reads_g_9.into{g_9_mapped_reads_g_43; g_9_mapped_reads_g53_109; g_9_mapped_reads_g53_110; g_9_mapped_reads_g53_111; g_9_mapped_reads_g53_112} g_9_publish = Channel.empty() } else { process Picard_MarkDuplicates { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /bam\/${name}.bam$/) "picard_deduplication/$filename" else if (filename =~ /${name}.*$/) "picard_deduplication/$filename" } input: set val(name), file(bam) from g_64_mapped_reads_g_9 output: set val(name), file("bam/${name}.bam") into g_9_mapped_reads_g_43, g_9_mapped_reads_g53_109, g_9_mapped_reads_g53_110, g_9_mapped_reads_g53_111, g_9_mapped_reads_g53_112 set val(name), file("${name}*") into g_9_publish when: (params.run_Picard_MarkDuplicates && (params.run_Picard_MarkDuplicates == "yes")) || !params.run_Picard_MarkDuplicates script: """ mkdir bam java -jar ${params.picard_path} MarkDuplicates OUTPUT=${name}_dedup.bam METRICS_FILE=${name}_PCR_duplicates VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true INPUT=${bam} mv ${name}_dedup.bam bam/${name}.bam grep "Unknown" *_PCR_duplicates|awk '{print "${name}\t" \$9}' > ${name}_picardDedup_summary.txt """ } } params.bedtools_path = "" //* @input params.genomeSizePath = "" //* @input macs2_callpeak_parameters = "--nomodel" //* @input @description:"macs2 callpeak parameters" peak_calling_type = "narrow" //* @dropdown @options:"narrow","broad" band_width = "300" //* @input @description:"Band width for picking regions to compute fragment size." bedtoolsCoverage_Parameters = "-sorted -nobuf -hist " //* @input @description:"bedtools Coverage parameters" compare_Custom_Bed = "" //* @input @description:"Enter custom bed file for comparison" output_prefix = "" //* @input @title:"Input Definitions" @description:"Output files will be created by using output_prefix" sample_prefix = "" //* @description:"Use prefix of the sample to match files. You can use comma separated format to enter multiples files. Eg.Sample1_L001,Sample2_L001" @input input_prefix = "" //* @description:"Use prefix of the input to match files. You can use comma separated format to enter multiples files. Eg.Sample1_L001,Sample2_L001" @input //* @array:{output_prefix,sample_prefix,input_prefix} @multicolumn:{output_prefix,sample_prefix,input_prefix},{macs2_callpeak_parameters,peak_calling_type,band_width,bedtoolsCoverage_Parameters} samplehash = [:] inputhash = [:] output_prefix.eachWithIndex { key, i -> inputhash[key] = input_prefix[i] } output_prefix.eachWithIndex { key, i -> samplehash[key] = sample_prefix[i] } // String nameList = output_prefix.collect { "\"$it\"" }.join( ' ' ) // String samplesList = sample_prefix.collect { "\"$it\"" }.join( ' ' ) // String inputsList = input_prefix.collect { "\"$it\"" }.join( ' ' ) process ChIP_Prep { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /bam\/.*.bam$/) "chip/$filename" } input: val mate from g_2_mate_g_43 set val(name), file(bam) from g_9_mapped_reads_g_43 output: file "bam/*.bam" into g_43_bam_file_g_48 val output_prefix into g_43_name_g_48 when: (params.run_ChIP_MACS2 && (params.run_ChIP_MACS2 == "yes")) || !params.run_ChIP_MACS2 script: """ mkdir -p bam mv ${bam} bam/${name}.bam """ } params.genomeSizePath = "" //* @input params.bedtools_path = "" //* @input params.samtools_path = "" //* @input process ChIP_MACS2 { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /${name}.*$/) "chip/$filename" } input: val mate from g_2_mate_g_48 file bam from g_43_bam_file_g_48.collect() val name from g_43_name_g_48.unique().flatten() output: val compare_bed into g_48_compare_bed_g_47 file "*${peak_calling_type}Peak" into g_48_bed_g_49 set val(name), file("bam/*.bam") into g_48_bam_file_g_49, g_48_bam_file_g_47 file "${name}*" into g_48_resultsdir_g_79 script: genomeSizeText = "" if (_build == "mm10"){ genomeSizeText = "-g mm" } else if (_build == "hg19"){ genomeSizeText = "-g hs" } if (peak_calling_type == "narrow"){ peakcallingType = "" } else if (peak_calling_type == "broad"){ peakcallingType = "--broad" } compare_bed = "merged.bed" compare_Custom_Bed = compare_Custom_Bed.trim(); if (compare_Custom_Bed != ""){ compare_bed = compare_Custom_Bed } inputsList = inputhash[name] samplesList = samplehash[name] """ echo ${samplesList} echo ${inputsList} echo $name mkdir -p bam #samplesList samplesList="\$(echo -e "${samplesList}" | tr -d '[:space:]')" IFS=',' read -ra eachSampleAr <<< "\${samplesList}" numSamples=\${#eachSampleAr[@]} eachSampleArBam=( "\${eachSampleAr[@]/%/.bam }" ) sample_set=\${eachSampleArBam[@]} bam_set=\${eachSampleArBam[@]} #inputsList input_set="" inputsList="\$(echo -e "${inputsList}" | tr -d '[:space:]')" if [ "\${inputsList}" != "" ]; then IFS=',' read -ra eachInputAr <<< "\${inputsList}" eachInputArbam=( "\${eachInputAr[@]/%/.bam }" ) input_set="-c \${eachInputArbam[@]}" fi echo \${eachSampleArBam[@]} macs2 callpeak --bw ${band_width} -t \${sample_set} \${input_set} -n ${name} ${genomeSizeText} ${macs2_callpeak_parameters} ${peakcallingType} #bam files if [ "\$numSamples" -gt "1" ]; then samtools merge bam/${name}.bam \$bam_set else rsync -a \$bam_set bam/${name}.bam fi """ } params.run_Scripture = "yes" //* @dropdown @options:"yes","no" @show_settings:"Scripture_peakrescore" params.peakrescore_path = "" //* @input params.peakrescore_class_path = "" //* @input window = "200" //* @input trimFraction = "0.15" //* @input if (!(params.run_Scripture == "yes")){ g_48_bed_g_49.into{g_49_bed_g_45} } else { process Scripture_peakrescore { input: file bed from g_48_bed_g_49 set val(name), file(bam) from g_48_bam_file_g_49 output: file "${name}_trim.bed" into g_49_bed_g_45 when: params.run_Scripture == "yes" script: windowText = (window.toString() != "") ? "-window ${window}" : "" trimFractionText = (trimFraction.toString() != "") ? "-trimFraction ${trimFraction}" : "" """ samtools index ${bam} cat ${bed} | awk '{print \$1"\t"\$2"\t"\$3"\t"\$4"\t"\$5}' > ${name}_clean java -cp ${params.peakrescore_path}:${params.peakrescore_class_path} peaks.PeakTrim -task trimByFractionOfScale -in ${name}_clean -libAlignment ${bam} $windowText $trimFractionText -out ${name}_trim.bed """ } } process bed_merge { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /merged.bed$/) "chip/$filename" } input: file bed from g_49_bed_g_45.collect() output: file "merged.bed" into g_45_bed_g_47 """ cat ${bed} | bedtools sort -i stdin | bedtools slop -i stdin -b 100 -g ${params.genomeSizePath} | bedtools merge -i stdin | awk '{print \$0"\t"\$1"_"\$2"_"\$3}' > merged.bed """ } bedtoolsCoverage_Parameters = "-sorted -hist " //* @input @description:"bedtools Coverage parameters" bedtoolsIntersect_Parameters = "" //* @input @description:"bedtools Intersect optional parameters" params.bedtools_path = "" //* @input params.samtools_path = "" //* @input process bedtools_coverage { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.mean.txt$/) "chip/$filename" } input: val compare_bed from g_48_compare_bed_g_47 file bed from g_45_bed_g_47 set val(name), file(bam) from g_48_bam_file_g_47 output: file "*.mean.txt" into g_47_outputFileTxt_g_50 """ echo ${compare_bed} if [ -s "${compare_bed}" ]; then echo " bed file exists and is not empty " ${params.samtools_path} view -H ${name}.bam | grep -P "@SQ\\tSN:" | sed 's/@SQ\\tSN://' | sed 's/\\tLN:/\\t/' > ${name}_chroms ${params.bedtools_path} intersect -abam ${name}.bam -b ${compare_bed} > temp_${name}.bam ${params.bedtools_path} sort -faidx ${name}_chroms -i ${compare_bed} | ${params.bedtools_path} coverage ${bedtoolsCoverage_Parameters} -a stdin -b temp_${name}.bam > temp_${name}.bed awk '{\$NF=\$(NF-3)*\$NF;print }' OFS="\\t" temp_${name}.bed | grep -v all > temp_${name}_hist.bed l=`awk '{print NF}' temp_${name}_hist.bed | head -1 | awk '{print \$1-4}'` k=`awk '{print NF}' temp_${name}_hist.bed | head -1` bedtools groupby -i temp_${name}_hist.bed -g 1-\$l -c \$k -o sum > ${name}.mean.txt #rm -rf temp_* else echo " bed file does not exist, or is empty " touch ${name}_empty.mean.txt fi """ } process ATAC_CHIP_summary { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.tsv$/) "chip_summary/$filename" } input: file file from g_47_outputFileTxt_g_50.collect() output: file "*.tsv" into g_50_outputFile shell: ''' #!/usr/bin/env perl my $indir = $ENV{'PWD'}; opendir D, $indir or die "Could not open $indir\n"; my @alndirs = sort { $a cmp $b } grep /.txt/, readdir(D); closedir D; my @a=(); my %b=(); my %c=(); my $i=0; foreach my $d (@alndirs){ my $file = "${indir}/$d"; print $d."\n"; my $libname=$d; $libname=~s/\\.mean\\.txt//; print $libname."\n"; $i++; $a[$i]=$libname; open IN,"${indir}/$d"; $_=; while() { my @v=split; $b{$v[3]}{$i}=$v[4]; } close IN; } my $outfile="${indir}/"."mean_counts.tsv"; open OUT, ">$outfile"; print OUT "Feature"; for(my $j=1;$j<=$i;$j++) { print OUT "\t$a[$j]"; } print OUT "\n"; foreach my $key (keys %b){ print OUT "$key"; for(my $j=1;$j<=$i;$j++){ print OUT "\t$b{$key}{$j}"; } print OUT "\n"; } close OUT; ''' } params.genomeCoverageBed_path = "" //* @input params.wigToBigWig_path = "" //* @input params.genomeSizePath = "" //* @input process BAM_Analysis_Module_UCSC_BAM2BigWig_converter { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.bw$/) "ucsc_bigwig/$filename" } input: set val(name), file(bam) from g_9_mapped_reads_g53_112 output: file "*.bw" into g53_112_outputFileBw when: (params.run_BigWig_Conversion && (params.run_BigWig_Conversion == "yes")) || !params.run_BigWig_Conversion script: nameAll = bam.toString() if (nameAll.contains('_sorted.bam')) { runSamtools = '' } else { runSamtools = "samtools sort " +bam+ " "+ name +"_sorted && samtools index "+ name+"_sorted.bam " } """ $runSamtools ${params.genomeCoverageBed_path} -split -bg -ibam ${name}_sorted.bam -g ${params.genomeSizePath} > ${name}.bg ${params.wigToBigWig_path} -clip -itemsPerSlot=1 ${name}.bg ${params.genomeSizePath} ${name}.bw """ } igv_extention_factor = "0" //* @input @description:"The read or feature is extended by the specified distance in bp prior to counting. This option is useful for chip-seq and rna-seq applications. The value is generally set to the average fragment length of the library minus the average read length." @tooltip:"igvtools is used" igv_window_size = "5" //* @input @description:"The window size over which coverage is averaged." @tooltip:"igvtools is used" params.genome = "" //* @input params.igvtools_path = "" //* @input process BAM_Analysis_Module_IGV_BAM2TDF_converter { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.tdf$/) "igv_tdf/$filename" } input: val mate from g_2_mate_g53_111 set val(name), file(bam) from g_9_mapped_reads_g53_111 output: file "*.tdf" into g53_111_outputFileOut when: (params.run_IGV_TDF_Conversion && (params.run_IGV_TDF_Conversion == "yes")) || !params.run_IGV_TDF_Conversion script: pairedText = (params.nucleicAcidType == "dna" && mate == "pair") ? " --pairs " : "" nameAll = bam.toString() if (nameAll.contains('_sorted.bam')) { runSamtools = '' } else { runSamtools = "samtools sort " +bam+ " "+ name +"_sorted && samtools index "+ name+"_sorted.bam " } """ $runSamtools java -Xmx1500m -jar ${params.igvtools_path} count -w ${igv_window_size} -e ${igv_extention_factor} ${pairedText} ${name}_sorted.bam ${name}.tdf ${params.genome} """ } params.bed_file_genome = "" //* @input process BAM_Analysis_Module_RSeQC { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /result\/.*.out$/) "rseqc/$filename" } input: set val(name), file(bam) from g_9_mapped_reads_g53_110 output: file "result/*.out" into g53_110_outputFileOut_g53_95, g53_110_outputFileOut_g_79 when: (params.run_RSeQC && (params.run_RSeQC == "yes")) || !params.run_RSeQC script: """ mkdir result read_distribution.py -i ${bam} -r ${params.bed_file_genome}> result/RSeQC.${name}.out """ } process BAM_Analysis_Module_RSeQC_Summary { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.tsv$/) "rseqc_summary/$filename" } input: file rseqcOut from g53_110_outputFileOut_g53_95.collect() val mate from g_2_mate_g53_95 output: file "*.tsv" into g53_95_outputFileTSV shell: ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; use Data::Dumper; my $indir = $ENV{'PWD'}; my $outd = $ENV{'PWD'}; my @files = (); my @outtypes = ("RSeQC"); my @order=( "Total Reads", "Total Tags" , "Total Assigned Tags", "CDS_Exons", "5'UTR_Exons", "3'UTR_Exons", "Introns", "TSS_up_1kb", "TSS_up_5kb", "TSS_up_10kb", "TES_down_1kb", "TES_down_5kb", "TES_down_10kb"); my %lines=( "Total Reads" => 1, "Total Tags" => 1, "Total Assigned Tags" => 1, "CDS_Exons" => 2, "5'UTR_Exons" => 2, "3'UTR_Exons" => 2, "Introns" => 2, "TSS_up_1kb" => 2, "TSS_up_5kb" => 2, "TSS_up_10kb" => 2, "TES_down_1kb" => 2, "TES_down_5kb" => 2, "TES_down_10kb" => 2 ); foreach my $outtype (@outtypes) { my $ext=".out"; @files = <$indir/$outtype*$ext>; my @rowheaders=(); my @libs=(); my %vals=(); my %normvals=(); my $type = "rsem"; foreach my $d (@files){ my $libname=basename($d, $ext); $libname=~s/RSeQC.//g; $libname=~s/rsem.out.//g; $libname=~s/.genome//g; print $libname."\\n"; push(@libs, $libname); getVals($d, $libname, \\%vals, \\%normvals, \\%lines); } #print Dumper(%vals); #print Dumper(%normvals); my $sizemetrics = keys %vals; write_results("$outd/$outtype.$type.counts.tsv", \\@libs,\\%vals, \\@order, "region") if ($sizemetrics>0); write_results("$outd/$outtype.$type.tagskb.tsv", \\@libs,\\%normvals, \\@order, "region") if ($sizemetrics>0); } sub write_results { my ($outfile, $libs, $vals, $order, $name )=@_; open(OUT, ">$outfile"); print OUT "$name\\t".join("\\t", @{$libs})."\\n"; my $lib=${$libs}[0]; foreach my $key ( @order ) { if (exists ${$vals}{$lib}{$key}) { print OUT $key; foreach my $lib (@{$libs}) { print OUT "\\t".${$vals}{$lib}{$key}; } print OUT "\\n"; } } close(OUT); } sub getVals{ my ($filename, $libname, $vals, $normvals, $lines)=@_; if (-e $filename){ open(IN, $filename); while(my $line=) { chomp($line); my @vals_arr=split(/\\s{2,}/,$line); if (exists ${$lines}{$vals_arr[0]}) { my $idx=${$lines}{$vals_arr[0]}; ${$vals}{$libname}{$vals_arr[0]}=$vals_arr[$idx] if (exists $vals_arr[$idx]); if ($idx==2) { ${$normvals}{$libname}{$vals_arr[0]}=$vals_arr[3] if (exists $vals_arr[3]); } } } } } ''' } //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 10 $QUEUE = "short" } //* platform //* autofill process MultiQC { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /multiqc_report.html$/) "multiQC/$filename" } input: file "fastqc/*" from g51_3_FastQCout_g_79.flatten().toList() file "sequential_mapping/*" from g69_19_bowfiles_g_79.flatten().toList() file "macs/*" from g_48_resultsdir_g_79.flatten().toList() file "rseqc_bowtie/*" from g53_110_outputFileOut_g_79.flatten().toList() file "bowtie/*" from g67_3_bowfiles_g_79.flatten().toList() output: file "multiqc_report.html" optional true into g_79_htmlout """ multiqc -e general_stats -d -dd 2 . """ } params.pdfbox_path = "" //* @input params.picard_path = "" //* @input //* autofill if ($HOSTNAME == "default"){ $CPU = 1 $MEMORY = 32 } //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 240 $CPU = 1 $MEMORY = 32 $QUEUE = "short" } //* platform //* autofill process BAM_Analysis_Module_Picard { input: set val(name), file(bam) from g_9_mapped_reads_g53_109 output: file "*_metrics" into g53_109_outputFileOut_g53_82 file "results/*.pdf" into g53_109_outputFilePdf_g53_82 when: (params.run_Picard_CollectMultipleMetrics && (params.run_Picard_CollectMultipleMetrics == "yes")) || !params.run_Picard_CollectMultipleMetrics script: """ java -jar ${params.picard_path} CollectMultipleMetrics OUTPUT=${name}_multiple.out VALIDATION_STRINGENCY=LENIENT INPUT=${bam} mkdir results && java -jar ${params.pdfbox_path} PDFMerger *.pdf results/${name}_multi_metrics.pdf """ } process BAM_Analysis_Module_Picard_Summary { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /.*.tsv$/) "picard_summary/$filename" else if (filename =~ /results\/.*.pdf$/) "picard/$filename" } input: file picardOut from g53_109_outputFileOut_g53_82.collect() val mate from g_2_mate_g53_82 file picardPdf from g53_109_outputFilePdf_g53_82.collect() output: file "*.tsv" into g53_82_outputFileTSV file "results/*.pdf" into g53_82_outputFilePdf shell: ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; use Data::Dumper; system("mkdir results && mv *.pdf results/. "); my $indir = $ENV{'PWD'}; my $outd = $ENV{'PWD'}; my @files = (); my @outtypes = ("CollectRnaSeqMetrics", "alignment_summary_metrics", "base_distribution_by_cycle_metrics", "insert_size_metrics", "quality_by_cycle_metrics", "quality_distribution_metrics" ); foreach my $outtype (@outtypes) { my $ext="_multiple.out"; $ext.=".$outtype" if ($outtype ne "CollectRnaSeqMetrics"); @files = <$indir/*$ext>; my @rowheaders=(); my @libs=(); my %metricvals=(); my %histvals=(); my $pdffile=""; my $libname=""; foreach my $d (@files){ my $libname=basename($d, $ext); print $libname."\\n"; push(@libs, $libname); getMetricVals($d, $libname, \\%metricvals, \\%histvals, \\@rowheaders); } my $sizemetrics = keys %metricvals; write_results("$outd/$outtype.stats.tsv", \\@libs,\\%metricvals, \\@rowheaders, "metric") if ($sizemetrics>0); my $sizehist = keys %histvals; write_results("$outd/$outtype.hist.tsv", \\@libs,\\%histvals, "none", "nt") if ($sizehist>0); } sub write_results { my ($outfile, $libs, $vals, $rowheaders, $name )=@_; open(OUT, ">$outfile"); print OUT "$name\\t".join("\\t", @{$libs})."\\n"; my $size=0; $size=scalar(@{${$vals}{${$libs}[0]}}) if(exists ${$libs}[0] and exists ${$vals}{${$libs}[0]} ); for (my $i=0; $i<$size;$i++) { my $rowname=$i; $rowname = ${$rowheaders}[$i] if ($name=~/metric/); print OUT $rowname; foreach my $lib (@{$libs}) { print OUT "\\t".${${$vals}{$lib}}[$i]; } print OUT "\\n"; } close(OUT); } sub getMetricVals{ my ($filename, $libname, $metricvals, $histvals,$rowheaders)=@_; if (-e $filename){ my $nextisheader=0; my $nextisvals=0; my $nexthist=0; open(IN, $filename); while(my $line=) { chomp($line); @{$rowheaders}=split(/\\t/, $line) if ($nextisheader && !scalar(@{$rowheaders})); if ($nextisvals) { @{${$metricvals}{$libname}}=split(/\\t/, $line); $nextisvals=0; } if($nexthist){ my @vals=split(/[\\s\\t]+/,$line); push(@{${$histvals}{$libname}}, $vals[1]) if (exists $vals[1]); } $nextisvals=1 if ($nextisheader); $nextisheader=0; $nextisheader=1 if ($line=~/METRICS CLASS/); $nexthist=1 if ($line=~/normalized_position/); } } } ''' } process Sequential_Mapping_Module_Merge_TSV_Files { input: file tsv from g69_20_outputFileTSV_g69_13.collect() val outputFileName from g69_20_name_g69_13.collect() output: file "${name}.tsv" into g69_13_outputFileTSV_g69_14 script: name = outputFileName[0] """ awk 'FNR==1 && NR!=1 { getline; } 1 {print} ' *.tsv > ${name}.tsv """ } process Sequential_Mapping_Module_Sequential_Mapping_Short_Summary { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /sequential_mapping_detailed_sum.tsv$/) "sequential_mapping_summary/$filename" } input: file mainSum from g69_13_outputFileTSV_g69_14 output: file "sequential_mapping_short_sum.tsv" into g69_14_outputFileTSV_g_75 file "sequential_mapping_detailed_sum.tsv" into g69_14_outputFile shell: ''' #!/usr/bin/env perl use List::Util qw[min max]; use File::Basename; use Getopt::Long; use Pod::Usage; use Data::Dumper; my @header; my %all_rows; my @seen_cols_short; my @seen_cols_detailed; my $ID_header; chomp(my $contents = `ls *.tsv`); my @files = split(/[\\n]+/, $contents); foreach my $file (@files){ open IN,"$file"; my $line1 = ; chomp($line1); ( $ID_header, my @h) = ( split("\\t", $line1) ); my $totalHeader = $h[1]; my $afterFilteringHeader = $h[2]; my $uniqueHeader = $h[3]; my $multiHeader = $h[4]; my $mappedHeader = $h[5]; push(@seen_cols_short, $totalHeader) unless grep{$_ eq $totalHeader} @seen_cols_short; #Total reads Header push(@seen_cols_detailed, $totalHeader) unless grep{$_ eq $totalHeader} @seen_cols_detailed; #Total reads Header my $n=0; while (my $line=) { chomp($line); my ( $ID, @fields ) = ( split("\\t", $line) ); #SHORT push(@seen_cols_short, $fields[0]) unless grep{$_ eq $fields[0]} @seen_cols_short; #mapped element header $all_rows{$ID}{$fields[0]} = $fields[5];#Mapped Reads #Grep first line $fields[1] as total reads. if (!exists $all_rows{$ID}{$totalHeader}){ $all_rows{$ID}{$totalHeader} = $fields[1]; } $all_rows{$ID}{$afterFilteringHeader} = $fields[2]; #only use last entry #DETAILED $uniqueHeadEach = "$fields[0] (${uniqueHeader})"; $multiHeadEach = "$fields[0] (${multiHeader})"; $mappedHeadEach = "$fields[0] (${mappedHeader})"; push(@seen_cols_detailed, $mappedHeadEach) unless grep{$_ eq $mappedHeadEach} @seen_cols_detailed; push(@seen_cols_detailed, $uniqueHeadEach) unless grep{$_ eq $uniqueHeadEach} @seen_cols_detailed; push(@seen_cols_detailed, $multiHeadEach) unless grep{$_ eq $multiHeadEach} @seen_cols_detailed; $all_rows{$ID}{$mappedHeadEach} = $fields[5]; $all_rows{$ID}{$uniqueHeadEach} = $fields[3]; $all_rows{$ID}{$multiHeadEach} = $fields[4]; } close IN; push(@seen_cols_short, $afterFilteringHeader) unless grep{$_ eq $afterFilteringHeader} @seen_cols_short; #After filtering Header } #print Dumper \\%all_rows; #print Dumper \\%seen_cols_short; printFiles("sequential_mapping_short_sum.tsv",@seen_cols_short,); printFiles("sequential_mapping_detailed_sum.tsv",@seen_cols_detailed); sub printFiles { my($summary, @cols_to_print) = @_; open OUT, ">$summary"; print OUT join ("\\t", $ID_header,@cols_to_print),"\\n"; foreach my $key ( keys %all_rows ) { print OUT join ("\\t", $key, (map { $all_rows{$key}{$_} // '' } @cols_to_print)),"\\n"; } close OUT; } ''' } g67_11_outputFileTSV_g_75= g67_11_outputFileTSV_g_75.ifEmpty(file('starSum', type: 'any')) g69_14_outputFileTSV_g_75= g69_14_outputFileTSV_g_75.ifEmpty(file('sequentialSum', type: 'any')) //* autofill //* platform if ($HOSTNAME == "ghpcc06.umassrc.org"){ $TIME = 30 $CPU = 1 $MEMORY = 10 $QUEUE = "short" } //* platform //* autofill process Alignment_Summary { publishDir params.outdir, overwrite: true, mode: 'copy', saveAs: {filename -> if (filename =~ /summary_data.tsv$/) "alignment_summary/$filename" } input: file starSum from g67_11_outputFileTSV_g_75 file sequentialSum from g69_14_outputFileTSV_g_75 output: file "summary_data.tsv" into g_75_outputFileTSV shell: ''' #!/usr/bin/env perl use List::Util qw[min max]; use strict; use File::Basename; use Getopt::Long; use Pod::Usage; use Data::Dumper; my @header; my %all_rows; my @seen_cols; my $ID_header; chomp(my $contents = `ls *.tsv`); my @files = split(/[\\n]+/, $contents); # if sequential_mapping_short_sum.tsv is exist push to the beginning of the array my $checkSeqMap = "false"; my $checkSeqMapVal = ""; for my $index (reverse 0..$#files) { if ( $files[$index] =~ /sequential_mapping/ ) { $checkSeqMap = "true"; $checkSeqMapVal = $files[$index]; splice(@files, $index, 1, ()); } } if ($checkSeqMap == "true"){ unshift @files, $checkSeqMapVal; } ##Merge each file according to array order foreach my $file (@files){ open IN,"$file"; my $line1 = ; chomp($line1); ( $ID_header, my @header) = ( split("\\t", $line1) ); push @seen_cols, @header; while (my $line=) { chomp($line); my ( $ID, @fields ) = ( split("\\t", $line) ); my %this_row; @this_row{@header} = @fields; #print Dumper \\%this_row; foreach my $column (@header) { if (! exists $all_rows{$ID}{$column}) { $all_rows{$ID}{$column} = $this_row{$column}; } } } close IN; } #print for debugging #print Dumper \\%all_rows; #print Dumper \\%seen_cols; #grab list of column headings we've seen, and order them. my @cols_to_print = uniq(@seen_cols); my $summary = "summary_data.tsv"; open OUT, ">$summary"; print OUT join ("\\t", $ID_header,@cols_to_print),"\\n"; foreach my $key ( keys %all_rows ) { #map iterates all the columns, and gives the value or an empty string. if it's undefined. (prevents errors) print OUT join ("\\t", $key, (map { $all_rows{$key}{$_} // '' } @cols_to_print)),"\\n"; } close OUT; sub uniq { my %seen; grep ! $seen{$_}++, @_; } ''' } workflow.onComplete { println "##Pipeline execution summary##" println "---------------------------" println "##Completed at: $workflow.complete" println "##Duration: ${workflow.duration}" println "##Success: ${workflow.success ? 'OK' : 'failed' }" println "##Exit status: ${workflow.exitStatus}" }