#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2021, Farid Rashidi Mehrabadi All rights reserved. # ====================================================================================== # Author : Farid Rashidi Mehrabadi (farid.rashidimehrabadi@nih.gov) # Last Update: July 11, 2020 # Description: config file includes paths, directories, formats and parameters # # **NOTICE**: for mouse: 'mmDBSNPs142' and 'mmDBIndels142' # for human: 'hgMillsIndels', 'hgKGIndels' and 'hgDBSNPs138' # ====================================================================================== ## PATHS infq: "./trisicell/datasets/test/mcalling" outdir: "./trisicell/datasets/test/_map" tmpdir: "./trisicell/datasets/test/_tmp" normals: [] # normal cells ## for paired-end infqpre1: "" infqpost1: "_1.fastq.gz" infqpre2: "" infqpost2: "_2.fastq.gz" ## for single-end # infqpre: "" # infqpost: ".fastq.gz" ## PARAMETERS isrna: True buildver: "mm10" #[mm10 for mouse | hg19 for human] dotrimming: True pairedend: True email: "myemail@gmail.com" ## STEPS steps: s01indexing: True # False by default when `isrna` False s02mapping: True s03indexing: True # False by default when `isrna` False s04mapping: True s05calling: True s06jointcalling: True s07merging: True s08annotating: True s09expressing: True # False by default when `isrna` False s10velocitying: True # False by default when `isrna` False ## TIME and MEM mem01: "50" time01: "02:00:00" mem02: "90" time02: "20:00:00" mem03: "50" time03: "02:00:00" mem04: "100" time04: "1-12:00:00" java04: "java -Xmx90g -jar" mem05: "100" time05: "2-00:00:00" java05: "java -Xmx90g -jar" mem06: "100" time06: "23:00:00" java06: "java -Xmx90g -jar" threads06: 16 mem07: "20" time07: "02:00:00" mem08: "20" time08: "15:00:00" mem09: "20" time09: "15:00:00" mem10: "100" time10: "2-00:00:00" ## TOOLS GATK: "/data/rashidimehrabf2/_software/GenomeAnalysisTK.jar" #version: 3.8-1-0-gf15c1c3ef PICARD: "/data/rashidimehrabf2/_software/picard.jar" #version: 2.22.3 ANNOVAR: "/data/rashidimehrabf2/_software/annovar.2019.10.24" #version: 2019.10.24 BWA: "bwa" #version: 0.7.17 STAR: "STAR" #version: 2.7.3a RSEM: "rsem" #version: 1.3.2 FASTQC: "fastqc" #version: 0.11.9 CUTADAPT: "cutadapt" #version: 2.9 TRIMGALORE: "trim_galore" #version: 0.6.5 TRIMMOMATIC: "trimmomatic" #version: 0.39 samtools: "samtools" #version: 1.11 bcftools: "bcftools" #version: 1.9 velocyto: "velocyto" #version: 0.17.17 ## REFERENCES and INDICES ref: "/data/rashidimehrabf2/_database/hg19/genome/GRCh37.p13.fa" fdict: "/data/rashidimehrabf2/_database/hg19/genome/GRCh37.p13.dict" annot: "/data/rashidimehrabf2/_database/hg19/annotation/gencode.v19.annotation.gtf" rsemdb: "/data/rashidimehrabf2/_database/hg19/rsem/rsem" whichdb: "humandb" dbtype: "wgEncodeGencodeBasicV19" velo: "/data/rashidimehrabf2/_database/hg19/velocyto/ucsc.v19.repeat_mask.gtf" # ref: "/data/rashidimehrabf2/_database/mm10/genome/GRCm38.p6.fa" # fdict: "/data/rashidimehrabf2/_database/mm10/genome/GRCm38.p6.dict" # annot: "/data/rashidimehrabf2/_database/mm10/annotation/gencode.vM25.annotation.gtf" # rsemdb: "/data/rashidimehrabf2/_database/mm10/rsem/rsem" # whichdb: "mousedb" # dbtype: "wgEncodeGencodeBasicVM25" # velo: "/data/rashidimehrabf2/_database/mm10/velocyto/ucsc.vM25.repeat_mask.gtf" ## DB SNPS and INDELS hgMillsIndels: "/data/rashidimehrabf2/_database/hg19/dbsnp/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz" #for human hgKGIndels: "/data/rashidimehrabf2/_database/hg19/dbsnp/1000G_phase1.indels.hg19.sites.vcf.gz" #for human hgDBSNPs138: "/data/rashidimehrabf2/_database/hg19/dbsnp/dbsnp_138.hg19.vcf.gz" #for human mmDBSNPs142: "/data/rashidimehrabf2/_database/mm10/dbsnp/mgp.v5.merged.snps_all.dbSNP142.chr.vcf.gz" #for mouse mmDBIndels142: "/data/rashidimehrabf2/_database/mm10/dbsnp/mgp.v5.merged.indels.dbSNP142.normed.chr.vcf.gz" #for mouse