# ERVsearch settings # Required parameters (no defaults) [genome] # Path to a single fasta file containing the genome or other sequence # you would like to screen for ORFs # e.g. /home/katy/genomes/hg38.fasta file=?! [paths] # Path to usearch executable # e.g. /usr/bin/usearch11.0.667_u86linux32 path_to_usearch=?! # Path to exonerate executable # e.g. /usr/bin/exonerate path_to_exonerate=?! # The following parameters are optional (as they have default values) [genomesplits] # If the genome has more than ~100 contigs or scaffolds, it is recommended # to batch these rather than running Exonerate on each contig individually, # to avoid creating an excessive number of output files # The default is 50 and is a manageable number for most systems. # If split is True the contigs will be batched, if it is False Exonerate # will run once for every gene-contig combination (usually 3x number of contigs) # If there are less contigs than batches this will be ignored # Default True split=True # Number of batches to split the contigs into. # Default 50. split_n=50 # The pipeline will error if running using these genome split settings will # run Exonerate greater than 500 times. If you want to force the pipeline # to run despite this, change force to True. # Default False force=False [output] # Log files will have this as a prefix (i.e. ERVsearch_log.txt) # Default ERVsearch outfile_stem=ERVsearch [database] # When screening using Exonerate, query sequences are used to identify ERV # like regions of the genome. # it is possible to use the default ERV database provided with the pipeline # (recommended) or to use a custom database. # False - use the default database # True - use a custom database # Default False use_custom_db=False # Path to a custom fasta file of gag amino acid sequences # To skip this gene use None as this value (only if use_custom_db is True) gag=None # Path to a custom fasta file of pol amino acid sequences # To skip this gene use None as this value (only if use_custom_db is True) pol=None # Path to a custom fasta file of env amino acid sequences # To skip this gene use None as this value (only if use_custom_db is True) env=None [exonerate] # Minimum Exonerate hit length on the chromosome. Shorter hits are filtered out. Default 100. min_hit_length=100 # Maximum distance between chromosome regions identified with Exonerate which are merged # into single regions. # Default 30. overlap=30 # Minimum score in the second exonerate pass (with ungapped algorithm). # Default 100. min_score=100 [usearch] # Percentage identity used by the UBLAST algorithm. # Used to set the -id UBLAST parameter # Default 0.5 min_id=0.5 # Minimum hit length for UBLAST # Used to set the -mincols UBLAST parameter # Default 100. min_hit_length=100 # Minimum proportion of the query sequence which should be covered # using UBLAST # Used to set the -query_cov UBLAST parameter # Default 0.5 min_coverage=0.5 [plots] # Dots per inch for output plots (from the summarise functions). dpi=300 # File format for summary plot files, can be svg, png, pdf or jpg format=png # Colour for *gag* gene in summary plots. Default is orange. gag_colour=#f38918 # Colour for *pol* gene in summary plots. Default is orange. pol_colour=#4876f9 # Colour for *env* gene in summary plots. Default is orange. env_colour=#d61f54 # Colour for any plot combining genes. Default is green other_colour=#33b54d # If True, when gag, pol and env are shown as subplots on the same figure # they should all have the same axis limits. If they do some # can be very small but they are more comparable. match_axes=False [orfs] # Minimum length of ORFs to characterise. # Default 50. min_orf_len=100 # Translation table to use when identifying ORFs - listed here # https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi # Usually table 1 is fine - default plus alternative initiation codons # Default 1. translation_table=1 [trees] # Use the colours specified in the plots section for each gene to highlight newly # identified ERVs # If this is True, highlightcolour will be ignored and the gene colours will be # used instead. # If it is False, highlightcolour is used to highlight newly identified ERVs. # Default False. use_gene_colour=True # Main colour for text in tree images (hex code starting with #) # Default #000000 (black) maincolour=#000000 # Text colour for leaves highlighted in tree images - newly identified ERV-like regions. # This is ignored if use_gene_colour above is True # Default #382bfe (blue) highlightcolour=#382bfe # text colour for outgroup in tree images # Default #0e954b (green) outgroupcolour=#0e954b # Dots per inch for phylogenetic tree images. # Default 300 dpi=300 # File format for phylogenetic tree images, can be svg, png, pdf or jpg # Default png format=png [regions] # Maximum distance (in nucleotides) between ORFs to be defined as part # of the same ERV region. # Default: 3000 maxdist=3000 # Maximum proportion of an ORF which can overlap with an ORF from another # gene before they are filtered out. # Default: 0.5 maxoverlap=0.5