# This Properties map is specified in the Java 'property list' text format
# http://java.sun.com/javase/6/docs/api/java/util/Properties.html#load%28java.io.Reader%29
###
### some of these overrides is actually just the default value, so they can be skipped
###
## Q: can overrides like 'fetchDns.enabled=false' be used to disable the beans?
metadata.jobName=crawlrss_cxml
metadata.description=Default Profile
metadata.operator=Admin
metadata.userAgentTemplate=Mozilla/5.0 (compatible; heritrix/3.3.0 +@OPERATOR_CONTACT_URL@)
## Edit the two following lines to match your setup.
metadata.operatorContactUrl=http://netarkivet.dk/webcrawler/
metadata.operatorFrom=info@netarkivet.dk
loggerModule.path=logs
crawlLimiter.maxBytesDownload=0
crawlLimiter.maxDocumentsDownload=0
## MaxTimeseconds inserted by NetarchiveSuite (Delete line, if behaviour unwanted)
crawlLimiter.maxTimeSeconds=%{MAX_TIME_SECONDS_PLACEHOLDER}
crawlController.maxToeThreads=50
crawlController.recorderOutBufferBytes=4096
crawlController.recorderInBufferBytes=65536
crawlController.pauseAtStart=false
crawlController.scratchDir=scratch
## org.archive.bdb.BdbModule overrides
bdb.dir=state
bdb.cachePercent=40
## seeds properties
seeds.sourceTagSeeds=false
## Politeness overrides
disposition.delayFactor=1.0
disposition.maxDelayMs=1000
disposition.minDelayMs=300
disposition.maxPerHostBandwidthUsageKbSec=500
## preparer.preferenceEmbedHops=1
## preparer.preferenceDepthHops=-1
frontier.maxRetries=3
frontier.retryDelaySeconds=300
frontier.recoveryLogEnabled=false
frontier.balanceReplenishAmount=3000
frontier.errorPenaltyAmount=100
## Can be used instead of the QuotaEnforcer module. In this case the following line should look
## like: frontier.queueTotalBudget=%{FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER}
## instead of: frontier.queueTotalBudget=
frontier.queueTotalBudget=%{FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER}
frontier.snoozeLongMs=300000
preselector.enabled=true
preselector.logToFile=false
preselector.recheckScope=true
preselector.blockAll=false
preconditions.enabled=true
preconditions.ipValidityDurationSeconds=21600
preconditions.robotsValidityDurationSeconds=86400
preconditions.calculateRobotsOnly=false
fetchDns.enabled=true
fetchDns.acceptNonDnsResolves=false
fetchDns.digestContent=true
fetchDns.digestAlgorithm=sha1
fetchHttp.enabled=true
fetchHttp.timeoutSeconds=1200
fetchHttp.soTimeoutMs=20000
fetchHttp.maxFetchKBSec=0
fetchHttp.maxLengthBytes=0
fetchHttp.ignoreCookies=false
fetchHttp.sslTrustLevel=OPEN
fetchHttp.defaultEncoding=ISO-8859-1
fetchHttp.digestContent=true
fetchHttp.digestAlgorithm=sha1
fetchHttp.sendIfModifiedSince=true
fetchHttp.sendIfNoneMatch=true
fetchHttp.sendConnectionClose=true
fetchHttp.sendReferer=true
fetchHttp.sendRange=false
extractorHttp.enabled=true
extractorHtml.enabled=true
extractorHtml.extractJavascript=true
extractorHtml.treatFramesAsEmbedLinks=false
extractorHtml.ignoreFormActionUrls=true
extractorHtml.extractValueAttributes=false
extractorHtml.ignoreUnexpectedHtml=true
extractorCss.enabled=true
extractorJs.enabled=true
extractorSwf.enabled=true
candidates.seedsRedirectNewSeeds=false
statisticsTracker.intervalSeconds=20
warcWriter.compress=true
%{ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER}
%{ARCHIVER_BEAN_REFERENCE_PLACEHOLDER}
https://www.dr.dk/nyheder/
http://www.dr.dk/nyheder/allenyheder/indland
http://www.dr.dk/nyheder/allenyheder/udland
http://www.dr.dk/nyheder/allenyheder/penge