## Configuration file : to detect SNP from BAM files with GATK haplotype Caller ## http://toggle.southgreen.fr ############################### # 1 - PIPELINE CONFIGURATION ############################### #------------------------------ # Building Workflow #------------------------------ # After the tag $order define steps composing the pipeline and # their relative order. # Each line consists of the step number followed by # an equal symbol then by the softwareÕs name (e.g. 1=FastQC). # # - All the steps with a number <1000 # are carried out for each sample separately # # - All the steps with a number > or = 1000 # are performed as a global analysis # # - Step name is not casse-sensitive #------------------------------ $order #----------- sort sam 1=picardToolsSortSam #----------- extracting correctly mapped read 2=samtoolsview 3=samToolsIndex 4=gatkRealignerTargetCreator 5=gatkIndelRealigner 6=picardToolsMarkDuplicates 1000=gatkHaplotypeCaller #----------- Filtering step - see gatkVariantFiltration parameters 1001=gatkVariantFiltration #----------- SNP selection 1002=gatkSelectVariants #------------------------------ # Providing software parameters #------------------------------ # Foreach software, parameters can be provided after the line # composed of the symbol $ followed by the software name # # - If no software parameter provided, the default ones are # used. # - TOGGLe will handle itself the input and output files as # well as the references. # - see software manual for more details about #------------------------------ $picardToolsSortSam SORT_ORDER=coordinate VALIDATION_STRINGENCY=SILENT CREATE_INDEX=TRUE $samToolsView -h -b -f=0x02 $gatkRealignerTargetCreator -T=RealignerTargetCreator $gatkIndelRealigner -T=IndelRealigner $picardToolsMarkDuplicates VALIDATION_STRINGENCY=SILENT CREATE_INDEX=TRUE REMOVE_DUPLICATES=TRUE $gatkHaplotypeCaller -T=HaplotypeCaller -rf BadCigar $gatkVariantFiltration -T=VariantFiltration --filterName 'FILTER-DP' --filterExpression 'DP<10 || DP>600' --filterName 'LowQual' --filterExpression 'QUAL<30' $gatkSelectVariants -T=SelectVariants -selectType=SNP ############################### # 2 - DATA MANAGEMENT ############################### #------------------------------ # Removing intermediate data #------------------------------ # users can specify after the tag $cleaner, # the list of step number (one by line) #------------------------------ $cleaner #put the step number that TOGGLe have to manage data #------------------------------ # Compressing data #------------------------------ # users can specify after the tag $compress, # the list of step number (one by line) that data have to be compressed #------------------------------ #$compress #------------------------------ # Transferring data from project directory to work directory #------------------------------ # users can specify after the tag $scp, # the directory name where the data have to be copied (by scp) #------------------------------ #$scp #/scratch ############################### # 3 - CLUSTER CONFIGURATION ############################### #------------------------------ # Setting up jobs scheduler #------------------------------ # TOGGLe can run with either LSF, MPRUN, SLURM or SGE # jobs schedulers. # - According to the scheduler, after the tag $LSF or $MPRUN or $SLURM or $SGE # set scheduler options list #------------------------------ #Example for SGE #$sge #-q YOURQUEUE.q #-b Y #------------------------------ # Providing a specific environment variables #------------------------------ # User can specify ENVIRONMENT variables in their jobs (e.g. export or module load) # using tag $env #------------------------------ #$env