# This Shasta assembly configuration for phased diploid assembly
# was tested under the following conditions:

#  - ONT R10 chemistry, fast mode, chimera rate around 2%.
#  - Guppy 6 basecaller with "super" accuracy.
#  - Human genome at low coverage (HG002, one flowcell, coverage around 35x).
#  - Phased diploid assembly.

# Under these conditions, a test run for HG002 produced an assembly 
# consisting of about 2.7 Gb in bubble chains and 0.3 Gb outside bubble chains.
# Of the 2.7 Gb in bubble chains, about 2.0 Gb were assembled diploid and phased. 

# The N50 for bubble chains was about 5 Mb, and the N50 for phased bubbles was about 0.5 Mb.
# This is longer than most genes, which means that for many genes 
# the assembly contains both haplotypes, entirely phased.

# When mapping each branch of a phased bubble against the correct reference haplotype
# for HG002, base level quality was around Q = 47 dB for mismatches,
# and around Q = 40 dB for indels.

# The fraction assembled diploid and phased can be improved with the use of 
# Ultra-Long reads (e. g. 2.7 Gb were assembled diploid and phased with R9, 
# Ultra-Long reads at high coverage). A separate assembly configuration
# for phased diploid assembly using R10 Ultra-Long reads will
# be provided when possible. 

# This configuration might also be usable under different conditions,
# but that was not tested.


[Reads]
representation = 0
minReadLength = 10000
noCache = True

[Kmers]
k = 14

[MinHash]
minHashIterationCount = 100
minBucketSize = 10
maxBucketSize = 40
minFrequency = 5

[Align]
alignMethod = 3
downsamplingFactor = 0.05
matchScore = 6
sameChannelReadAlignment.suppressDeltaThreshold = 30
minAlignedMarkerCount = 1000
minAlignedFraction = 0.85
maxSkip = 20
maxDrift = 10
maxTrim = 20

[ReadGraph]
creationMethod = 0
maxAlignmentCount = 15
strandSeparationMethod = 2

[MarkerGraph]
minCoverage = 6
minCoveragePerStrand = 1
minEdgeCoverage = 6
minEdgeCoveragePerStrand = 1

[Assembly]
mode = 2
consensusCaller = Modal
pruneLength = 100
mode2.bubbleRemoval.minConcordantReadCount = 2