#set($workdir = "c:\path\to\your\workdir")
#set($core = "com.norconex.collector.core")
#set($http = "com.norconex.collector.http")
#set($committer = "com.norconex.committer")
#set($httpClientFactory = "${http}.client.impl.GenericHttpClientFactory")
#set($filterExtension = "${core}.filter.impl.ExtensionReferenceFilter")
#set($filterRegexRef = "${core}.filter.impl.RegexReferenceFilter")
#set($filterRegexMeta = "${core}.filter.impl.RegexMetadataFilter")
#set($robotsTxt = "${http}.robot.impl.StandardRobotsTxtProvider")
#set($robotsMeta = "${http}.robot.impl.StandardRobotsMetaProvider")
#set($redirectProvider = "${http}.redirect.impl.GenericRedirectURLProvider")
#set($recrawlResolver = "${http}.recrawl.impl.GenericRecrawlableResolver")
#set($metaFetcher = "${http}.fetch.impl.GenericMetadataFetcher")
#set($docFetcher = "${http}.fetch.impl.GenericDocumentFetcher")
#set($linkExtractor = "${http}.url.impl.GenericLinkExtractor")
#set($canonLinkDetector = "${http}.url.impl.GenericCanonicalLinkDetector")
#set($urlNormalizer = "${http}.url.impl.GenericURLNormalizer")
#set($delayResolver = "${http}.delay.impl.GenericDelayResolver")
#set($sitemapFactory = "${http}.sitemap.impl.StandardSitemapResolverFactory")
#set($metaChecksummer = "${http}.checksum.impl.HttpMetadataChecksummer")
#set($docChecksummer = "${core}.checksum.impl.MD5DocumentChecksummer")
#set($dataStoreFactory = "${core}.data.store.impl.mvstore.MVStoreCrawlDataStoreFactory")
#set($spoiledStrategy = "${core}.spoil.impl.GenericSpoiledReferenceStrategizer")
#set($fsCommitter = "${committer}.core.impl.FileSystemCommitter")
$workdir\progress
$workdir\logs
-1
http://www.example.com
http://www.sample.com
/local/path/to/a/file/full/of/urls.txt
http://www.somewhere.com/sitemap.xml
Please identify your Crawler
lowerCaseSchemeHost, upperCaseEscapeSequence,
decodeUnreservedCharacters, removeDefaultPort
&view=print
&view=html
10000
2
5
-1
/crawler/workdir/path
false
false
false
PROCESS
com.norconex.committer.core.CommitterException
myusername
mypassword
field_username
field_password
https://www.example.com/login.php
www.example.com
80
PRIVATE
jpg,gif,png,ico,css,js
https://www.example.com/.*
/blogs/sitemap.xml
.*css.*
text/html, application/xhtml+xml, vnd.wap.xhtml+xml, x-asp
$workdir\crawledFiles