#set($workdir = "c:\path\to\your\workdir") #set($core = "com.norconex.collector.core") #set($http = "com.norconex.collector.http") #set($committer = "com.norconex.committer") #set($httpClientFactory = "${http}.client.impl.GenericHttpClientFactory") #set($filterExtension = "${core}.filter.impl.ExtensionReferenceFilter") #set($filterRegexRef = "${core}.filter.impl.RegexReferenceFilter") #set($filterRegexMeta = "${core}.filter.impl.RegexMetadataFilter") #set($robotsTxt = "${http}.robot.impl.StandardRobotsTxtProvider") #set($robotsMeta = "${http}.robot.impl.StandardRobotsMetaProvider") #set($redirectProvider = "${http}.redirect.impl.GenericRedirectURLProvider") #set($recrawlResolver = "${http}.recrawl.impl.GenericRecrawlableResolver") #set($metaFetcher = "${http}.fetch.impl.GenericMetadataFetcher") #set($docFetcher = "${http}.fetch.impl.GenericDocumentFetcher") #set($linkExtractor = "${http}.url.impl.GenericLinkExtractor") #set($canonLinkDetector = "${http}.url.impl.GenericCanonicalLinkDetector") #set($urlNormalizer = "${http}.url.impl.GenericURLNormalizer") #set($delayResolver = "${http}.delay.impl.GenericDelayResolver") #set($sitemapFactory = "${http}.sitemap.impl.StandardSitemapResolverFactory") #set($metaChecksummer = "${http}.checksum.impl.HttpMetadataChecksummer") #set($docChecksummer = "${core}.checksum.impl.MD5DocumentChecksummer") #set($dataStoreFactory = "${core}.data.store.impl.mvstore.MVStoreCrawlDataStoreFactory") #set($spoiledStrategy = "${core}.spoil.impl.GenericSpoiledReferenceStrategizer") #set($fsCommitter = "${committer}.core.impl.FileSystemCommitter") $workdir\progress $workdir\logs -1 http://www.example.com http://www.sample.com /local/path/to/a/file/full/of/urls.txt http://www.somewhere.com/sitemap.xml Please identify your Crawler lowerCaseSchemeHost, upperCaseEscapeSequence, decodeUnreservedCharacters, removeDefaultPort &view=print &view=html 10000 2 5 -1 /crawler/workdir/path false false false PROCESS com.norconex.committer.core.CommitterException myusername mypassword field_username field_password https://www.example.com/login.php www.example.com 80 PRIVATE jpg,gif,png,ico,css,js https://www.example.com/.* /blogs/sitemap.xml .*css.* text/html, application/xhtml+xml, vnd.wap.xhtml+xml, x-asp text/html, application/xhtml+xml, vnd.wap.xhtml+xml, x-asp $workdir\crawledFiles