...
This list of Nutch configuration properties is intended for development. It includes deprecated properties and properties used only "internally". The list is generated from nutch-default.xml and Java sources.
Legend:
*Def. *
...
Def: defined in nutch-default.xml
...
*Used *
indent |
---|
read or set from Java code
|
*Temp. *
indent |
---|
temporarily used to pass settings (eg, from command-line arguments) to map or reduce jobs
|
*Depr. *
indent |
---|
deprecated
|
*(owr.) *
...
Used: read or set from Java code
Temp: temporarily used to pass settings (eg, from command-line arguments) to map or reduce jobs
Depr.: deprecated
(owr.): some properties are defined in nutch-default.xml
...
(and
...
may
...
be
...
set
...
in
...
nutch-site.xml)
...
but
...
are
...
overwritten programmatically (tests
...
and
...
benchmarks
...
are
...
excluded)
...
, eg. via a command-line argument in some Nutch tools
*(test) *
indent |
---|
overwritten only in tests and benchmarks
|
Trunk
2.x
Property
Def.
1.X (master Branch) | 2.x (deprecated codebase) | |||||||
Property | Def. |
Used | Temp. | Depr. | Def. | Used | Temp. | Depr. |
anchorIndexingFilter. |
CrawlDBScanner.status
deduplicate | X | X |
X | X |
any23.content_types | X | X |
(test)
any23.extractors | X | X |
arc.url.version | - | X |
batch.proxy.port | - | X | ||||||
content.server.port | - | X | - | X |
cosine.goldstandard.file | X |
X |
crawl.datum.processor.overdue.time.limit | - | X |
crawl.gen.delay | X | X |
X |
X |
X
crawldb.inject.filter.normalize.all | - | X |
crawldb.url.filters | X | X |
(owr.) | X |
crawldb.url.normalizers | X |
X | (owr.) | |||||
crawldb.url.normalizers.scope |
X
- | X | ||||||
creativecommons.exclude.unlicensed | - | X |
- |
X |
db. |
fetch.interval.default | X | X |
NUTCH-1409
X | X |
NUTCH-1409
db.fetch.interval. |
max | X | X |
(test)
X | X |
db.fetch. |
retry.max | X | X |
X | X |
db.fetch. |
X
X
X
X
schedule.adaptive.dec_rate | X | X |
X | X |
db.fetch.schedule.adaptive.inc_rate | X | X |
X | X |
db.fetch.schedule.adaptive.max_interval | X | X |
X | X |
db.fetch.schedule.adaptive.min_interval | X | X |
X | X |
db.fetch.schedule.adaptive.sync_delta | X | X |
X | X |
db.fetch.schedule.adaptive.sync_delta_rate | X |
X
- |
X |
X
- | ||
db. |
fetch.schedule.class | X | X |
(test)
X | X |
db.fetch.schedule.mime.file | X | X |
db.ignore.also.redirects | X | X | ||||||
db.ignore.external.exemptions.file | X | - |
db.ignore.external.links | X | X | X |
X | ||
db.ignore.external.links.mode |
X | X |
db.ignore.internal.links | X | X |
X |
- | ||
db.injector.overwrite | X | X |
(owr.) | |||||
db.injector.update | X | X | ( |
owr.) | |||||
db.max.anchor.length | X |
X
X
- | ||
db.max. |
outlink. |
length | X |
X |
NUTCH-1409
db.max. |
outlinks.per.page | X | X |
X |
X |
db. |
parsemeta. |
to. |
crawldb | X | X |
X
X
db.parsemeta.to.crawldb
X
X
X
X | - | ||||
db.preserve.backup | X | X |
db.reader.stats.sort | - | X | X |
- | X | X |
db.reader.topn | - | X | X |
db.reader.topn.min | - | X | X |
db.score.count.filtered | X | X |
X | X |
db.score.injected | X | X |
X | X |
db.score.link.external | X | X |
X | X |
db.score.link.internal | X | X |
X | X |
db.signature.class | X | X |
X | X |
db.signature.text_profile.min_token_len | X |
X
- |
X |
- |
db.signature.text_profile.quant_rate | X | - | X |
- | ||
db.stats.score.quantiles |
X | X |
db.update.additions.allowed | X | X |
X | X |
db.update.max.inlinks | X | X |
X | X |
db.update.purge.404 | X | X |
db.update.purge.orphans | X | X | ||||||
dc.language | - | X |
domain.statistics.mode | - | X | X |
- | X | X |
elastic.cluster | X | - | ||||||
elastic. |
host |
X |
- |
elastic.index |
X |
- |
elastic.max.bulk.docs |
X
X | - | ||||||
elastic.max.bulk.size |
X
X | - | |||||||
elastic.port | X | - | ||||||
elasticsearch.conf | - | X |
encodingdetector.charset.min.confidence | X |
X |
X | X |
exchanges.exchanges.file | X | X |
fail.on.job.failure |
- |
X | ||
fetcher.bandwidth.target | X |
X |
fetcher.bandwidth.target.check.everyNSecs | X |
X | ||||||||
fetcher.filter.urls | X | X | ||||||
fetcher.follow.outlinks.depth | X | X |
fetcher.follow.outlinks.depth.divisor | X |
X
- | ||||||
fetcher.follow.outlinks.ignore.external | X |
X
- | ||||||
fetcher.follow.outlinks.num.links | X |
X
- | ||||||||
fetcher.job.resume | - | X | ||||||
fetcher.job.sitemap | - | X |
X
fetcher.job. |
sitemap.detect | - | X |
fetcher.max.crawl.delay | X | X |
X | X |
fetcher.max.exceptions.per.queue | X | - | X |
- | |||
fetcher.maxNum.threads | X |
X |
fetcher.min.crawl.delay | X | X |
fetcher. |
normalize.urls | X | X |
(test)
fetcher.parse | X | X |
X | X |
fetcher.publisher | X | X |
fetcher.queue.depth.multiplier | X | X |
X | X |
fetcher.queue.mode | X | X |
X | X |
fetcher.queue.use.host.settings |
X |
X | ||
fetcher.redirect.dedupcache.seconds | X |
X | |||||||
fetcher.redirect.dedupcache.size | X |
X |
fetcher.server.delay | X | X |
X | X |
fetcher.server.min.delay | X | X |
X | X | ||||
fetcher.signature |
X | X |
fetcher.store.content | X | X |
X | X | ||||
fetcher.store.robotstxt |
X | X |
fetcher.threads.fetch | X | X | (owr.) |
fetcher.threads.per.host
NUTCH-1409
NUTCH-1409
X | X |
fetcher.threads.per.host.by.ip |
X
- | X |
fetcher.threads.per.queue | X | X |
X | X |
fetcher.threads.timeout.divisor | X | X |
fetcher.throughput.threshold.check.after | X | X | (owr.) |
X |
- |
fetcher.throughput.threshold.pages | X | X |
X
X
X | - | ||||
fetcher.throughput.threshold.retries | X | X |
fetcher.throughput.threshold.sequence |
X |
- |
fetcher.timelimit | - | X | X |
- | X | X |
fetcher.timelimit.mins | X | X |
X | X |
fetcher.verbose | X |
X
X
- | ||
file.content.ignored | X |
- | X |
- |
file.content.limit | X | X | ( |
owr.) |
X | X |
(test)
file.crawl.parent | X | X |
X | X | ||||
file.crawl.redirect_noncanonical |
X | - | X |
- | ||
free.generator.filter | - | X |
free.generator.normalize | - | X |
ftp.content.limit | X | X |
X | X |
ftp.follow.talk | X | X |
X | X |
ftp.keep.connection | X | X |
X | X |
ftp.password | X | X |
X | X |
ftp.server.timeout | X | X |
X | X |
ftp.timeout | X | X |
X | X |
ftp.username | X | X |
X | X |
generate.batch.id |
- |
X |
generate.count |
- | X |
generate.count.mode | X | X |
X | X |
generate.curTime | - | X |
- |
X |
generate. |
expr | - | X |
generate.fetch.delay.expr | X | X | ||||||
generate.filter | - | X | - |
X |
generate.hostdb | X | X |
generate.max.count | X | X |
X | X | ||||
generate.max.count.expr |
X | X |
generate.max.distance |
X | X |
generate.max.num.segments | - | X |
generate. |
min. |
interval | X | X |
NUTCH-1409
NUTCH-1409
generate.max.per.host.by.ip
X
NUTCH-1409
NUTCH-1409
generate.min.score | X | X | X | X | ||||
generate.normalise | - | X | - | X | ||||
generate.partition.seed | - | X | ||||||
generate.restrict.status | X | X | ||||||
generate.sitemap | - | X | ||||||
generate.topN | - | X | - | X | ||||
generate.update.crawldb |
X | X |
generate.normalise
X
X
generate.partition.seed
X
generate.restrict.status
X
generate.topN
X
X
generate.update.crawldb
X
X
X
X
X |
generate.min.score
X
X
X
X | ||||||||
gora.buffer.read.limit | X | - | ||||||
gora.buffer.write.limit | X | - | ||||||
hbase.indexer.commit.size | X | - | ||||||
hbase.indexer.mapping.file | X | - | ||||||
hbase.indexer.zookeeper.property.clientPort | X | - | ||||||
hbase.indexer.zookeeper.quorum | X | - | ||||||
headings | X | - | ||||||
headings.multivalued | X | X | ||||||
hostdb.check.failed | X | X | ||||||
hostdb.check.known | X | X | ||||||
hostdb.check.new | X | X |
hostdb.concurrency.level |
- |
X | ||
hostdb.crawldatum.processors | X |
X |
hostdb. |
dump. |
field.header |
- |
X
htmlparsefilter.order
X
X
X
X
http.accept
X
X
X
X
http.accept.language
X
X
X
X
http.agent.description
X
X
X
X
http.agent.email
X
X
X
X
http.agent.host
X
X
X
X
http.agent.name
X
X
(test)
X
X
(test)
http.agent.url
X
X
X
X
http.agent.version
X
X
X
X
http.auth.file
X
X
X
X
http.auth.verbose
X
X
http.content.limit
X
X
X
X
http.max.delays
X
X
http.proxy.host
X
X
(test)
X
X
(test)
http.proxy.password
X
X
X
X
http.proxy.port
X
X
(test)
X
X
(test)
http.proxy.realm
X
X
X
X
http.proxy.username
X
X
X
X
http.redirect.max
X
X
http.robots.403.allow
X
X
X
X
http.robots.agents
X
X
(test)
X
X
(test)
http.timeout
X
X
X
X
http.useHttp11
X
X
X
X
http.verbose
X
X
X
X
index.content.md
X
X
index.db.md
X
X
index.parse.md
X
X
(test)
index.replace.regexp
X
X
index.static
X
X
indexer.add.domain
X
X
indexer.delete
X
indexer.delete.robots.noindex
X
indexer.max.content.length
X
X
indexer.max.title.length
X
X
X
X
(test)
indexer.score.power
X
X
X
X
indexer.skip.notmodified
X
X
indexer.url.filters
X
X
X
indexer.url.normalizers
X
indexer.writer.classes
X
X
X
X
indexingfilter.order
X
X
X
X
injector.current.time
X
X
X
X
lang.analyze.max.length
X
X
X
lang.extraction.policy
X
X
X
X
lang.identification.only.certain
X
X
X
X
lang.ngram.max.length
X
lang.ngram.min.length
X
link.analyze.damping.factor
X
X
link.analyze.initial.score
X
X
link.analyze.iteration
X
X
link.analyze.normalize.score
X
X
link.analyze.num.iterations
X
X
link.analyze.rank.one
X
X
X | ||||||||
hostdb.dump.homepages | - | X | ||||||
hostdb.dump.hostnames | - | X | ||||||
hostdb.filter.expression | - | X | ||||||
hostdb.force.check | X | X | ||||||
hostdb.lru.size | - | X | ||||||
hostdb.num.resolvers.threads | X | X | ||||||
hostdb.numeric.fields | X | X | ||||||
hostdb.percentiles | X | X | ||||||
hostdb.purge.failed.hosts.threshold | X | X | ||||||
hostdb.reading.crawldb | - | X | X | |||||
hostdb.recheck.interval | X | X | ||||||
hostdb.string.fields | X | X | ||||||
hostdb.url.filter | X | X | ||||||
hostdb.url.normalize | X | X | ||||||
htmlparsefilter.order | X | X | X | X | ||||
htmlunit.enable.css | X | X | ||||||
htmlunit.enable.javascript | X | X | ||||||
htmlunit.javascript.timeout | X | X | ||||||
http.accept | X | X | X | X | ||||
http.accept.charset | X | X | X | X | ||||
http.accept.language | X | X | X | X | ||||
http.agent.description | X | X | X | X | ||||
http.agent.email | X | X | X | X | ||||
http.agent.host | X | X | X | X | ||||
http.agent.host.cookie.file | X | X | ||||||
http.agent.name | X | X | (owr.) | X | X | |||
http.agent.rotate | X | X | X | X | ||||
http.agent.rotate.file | X | X | X | X | ||||
http.agent.url | X | X | X | X | ||||
http.agent.version | X | X | X | X | ||||
http.auth.file | X | X | X | X | ||||
http.auth.verbose | - | X | - | X | ||||
http.content.limit | X | X | (owr.) | X | X | |||
http.content.truncated | - | X | ||||||
http.content.truncated.reason | - | X | ||||||
http.enable.cookie.header | X | X | ||||||
http.enable.if.modified.since.header | X | X | ||||||
http.log.exceptions.suppress.stack | X | X | ||||||
http.max.delays | X | - | ||||||
http.partial.truncated | X | X | ||||||
http.proxy.exception.list | X | X | ||||||
http.proxy.host | X | X | X | X | ||||
http.proxy.password | X | X | X | X | ||||
http.proxy.port | X | X | X | X | ||||
http.proxy.realm | X | X | X | X | ||||
http.proxy.type | X | X | ||||||
http.proxy.username | X | X | X | X | ||||
http.redirect.max | X | X | ||||||
http.redirect.max.exceeded.skip | X | X | ||||||
http.robot.rules.whitelist | X | X | ||||||
http.robots.403.allow | X | X | X | X | ||||
http.robots.agents | X | X | (owr.) | X | X | |||
http.store.responsetime | X | X | X | X | ||||
http.time.limit | X | X | ||||||
http.timeout | X | X | X | X | ||||
http.tls.certificates.check | X | X | ||||||
http.tls.supported.cipher.suites | - | X | - | X | ||||
http.tls.supported.protocols | - | X | - | X | ||||
http.useHttp11 | X | X | X | X | ||||
http.useHttp2 | X | X | ||||||
http.verbose | X | X | ||||||
index.content.md | X | X | ||||||
index.db.md | X | X | ||||||
index.geoip.licensekey | X | X | ||||||
index.geoip.usage | X | X | ||||||
index.geoip.userid | X | X | ||||||
index.jexl.filter | X | X | ||||||
index.links.hosts.only | X | - | ||||||
index.links.inlinks.host.ignore | X | - | ||||||
index.links.outlinks.host.ignore | X | - | ||||||
index.metadata | X | X | ||||||
index.metadata.multivalued.fields | - | X | ||||||
index.metadata.separator | X | X | ||||||
index.parse.md | X | X | ||||||
index.replace.regexp | X | X | ||||||
index.static | X | X | ||||||
index.static.fieldsep | X | X | ||||||
index.static.keysep | X | X | ||||||
index.static.valuesep | X | X | ||||||
indexer.add.domain | X | X | ||||||
indexer.additional.params | - | X | ||||||
indexer.binary.base64 | - | X | ||||||
indexer.delete | - | X | ||||||
indexer.delete.robots.noindex | X | X | ||||||
indexer.delete.skipped.by.indexingfilter | X | X | ||||||
indexer.indexwriters.file | X | X | ||||||
indexer.max.content.length | X | X | ||||||
indexer.max.title.length | X | X | X | X | ||||
indexer.nocommit | - | X | ||||||
indexer.score.power | X | X | X | X | ||||
indexer.skip.notmodified | X | X | ||||||
indexer.url.filters | - | X | X | X | ||||
indexer.url.normalizers | - | X | ||||||
indexingfilter.order | X | X | X | X | ||||
injector.current.time | - | X | X | - | X | X | ||
interactiveselenium.handlers | X | X | ||||||
io.file.buffer.size | - | X | ||||||
io.serializations | X | - | X | - | ||||
jsoup.extractor.property.file | X | X | ||||||
lang.analyze.max.length | X | X | X | - | ||||
lang.extraction.policy | X | X | X | X | ||||
lang.identification.only.certain | X | X | X | X | ||||
lang.index.languages | X | X | ||||||
lang.ngram.max.length | X | - | ||||||
lang.ngram.min.length | X | - | ||||||
libselenium.page.load.delay | - | X | ||||||
link.analyze.damping.factor | X | X | ||||||
link.analyze.initial.score | X | X | ||||||
link.analyze.iteration | - | X | X | |||||
link.analyze.normalize.score | - | X | - | X | ||||
link.analyze.num.iterations | X | X | ||||||
link.analyze.rank.one | - | X | X | |||||
link.delete.gone | X | X | ||||||
link.ignore.internal.domain | X | X | ||||||
link.ignore.internal.host | X | X | ||||||
link.ignore.limit.domain | X | X | ||||||
link.ignore.limit.page | X | X | ||||||
link.score.updater.clear.score | X | X | ||||||
linkdb.ignore.external.links | X | X | ||||||
linkdb.ignore.internal.links | X | X | ||||||
linkdb.max.anchor.length | X | X | ||||||
linkdb.max.inlinks | X | X | ||||||
linkdb.regex | - | X | X | |||||
linkdb.url.filters | - | X | X | X | ||||
linkdb.url.normalizer | - | X | ||||||
linkdb.url.normalizer.scope | - | X | ||||||
metatag.description | - | X | ||||||
metatag.keyword | - | X | ||||||
metatag.keywords | - | X | ||||||
metatags.names | X | X | X | X | ||||
mime.type.magic | X | X | X | X | ||||
mime.types.file | X | X | X | X | ||||
mimetype.filter.file | X | X | ||||||
moreIndexingFilter.indexMimeTypeParts | X | X | X | X | ||||
moreIndexingFilter.mapMimeTypes | X | X | ||||||
moreIndexingFilter.mapMimeTypes.field | X | X | ||||||
nutch.conf.uuid | - | X | - | X | ||||
nutch.fetch.time | - | X | ||||||
org.apache.nutch.webui | - | X | ||||||
page.load.delay | X | X | ||||||
parse.filter.urls | X | X | (owr.) | |||||
parse.job.force | - | X | ||||||
parse.job.resume | - | X | ||||||
parse.normalize.urls | X | X | (owr.) | |||||
parse.plugin.file | X | X | X | X | ||||
parse.sitemap | - | X | ||||||
parsefilter.naivebayes.trainfile | X | X | ||||||
parsefilter.naivebayes.wordlist | X | X | ||||||
parsefilter.regex.file | - | X | ||||||
parsefilter.regex.rules | - | X | ||||||
parser.caching.forbidden.policy | X | X | X | X | ||||
parser.character.encoding.default | X | X | X | X | ||||
parser.html.form.use_action | X | X | X | X | ||||
parser.html.impl | X | X | X | X | ||||
parser.html.line.separators | X | X | ||||||
parser.html.outlinks.htmlnode_metadata_name | X | X | ||||||
parser.html.outlinks.ignore_tags | X | X | X | X | ||||
parser.html.outlinks.max.target.length | X | X | ||||||
parser.skip.truncated | X | X | X | X | ||||
parser.store.text | X | X | ||||||
parser.timeout | X | X | X | X | ||||
partition.url.mode | X | X | X | X | ||||
partition.url.seed | - | X | X | - | X | |||
plugin.auto-activation | X | X | X | X | ||||
plugin.excludes | X | X | X | X | ||||
plugin.folders | X | X | X | X | ||||
plugin.includes | X | X | X | X | ||||
preferred.schema.name | X | |||||||
publisher.order | X | - | ||||||
rabbitmq.publisher.binding | X | X | ||||||
rabbitmq.publisher.binding.arguments | X | X | ||||||
rabbitmq.publisher.exchange.name | X | X | ||||||
rabbitmq.publisher.exchange.options | X | X | ||||||
rabbitmq.publisher.headers.static | X | X | ||||||
rabbitmq.publisher.queue.name | X | X | ||||||
rabbitmq.publisher.queue.options | X | X | ||||||
rabbitmq.publisher.routingkey | X | X | ||||||
rabbitmq.publisher.server.uri | X | X | ||||||
restapi.auth | X | X | ||||||
restapi.auth.ssl.keypass | X | X | ||||||
restapi.auth.ssl.storepass | X | X | ||||||
restapi.auth.ssl.storepath | X | X | ||||||
restapi.auth.users | X | X | ||||||
scoring.content.md | X | X | ||||||
scoring.db.md | X | X | ||||||
scoring.depth.max | X | X | ||||||
scoring.filter.order | X | - | X | X | ||||
scoring.orphan.mark.gone.after | X | X | ||||||
scoring.orphan.mark.orphan.after | X | X | ||||||
scoring.parse.md | X | X | ||||||
scoring.similarity.model | X | X | ||||||
scoring.similarity.ngrams | X | X | ||||||
scoring.similarity.stopword.file | X | X | ||||||
screenshot.location | X | X | ||||||
segment.dump.dir | - | X | ||||||
segment.merger.filter | - | X | X | |||||
segment.merger.normalizer | - | X | X | |||||
segment.merger.segmentName | - | X | X | |||||
segment.merger.slice | - | X | X | |||||
segment.proxy.port | - | X | ||||||
segment.reader.content.recode | X | X | (owr.) | |||||
selenium.driver | X | X | ||||||
selenium.enable.headless | X | X | ||||||
selenium.firefox.allowed.hosts | X | - | ||||||
selenium.firefox.binary.timeout | X | - | ||||||
selenium.firefox.enable.flash | X | - | ||||||
selenium.firefox.load.image | X | - | ||||||
selenium.firefox.load.stylesheet | X | - | ||||||
selenium.grid.binary | X | X | ||||||
selenium.grid.driver | X | X | ||||||
selenium.hub.host | X | X | ||||||
selenium.hub.path | X | X | ||||||
selenium.hub.port | X | X | ||||||
selenium.hub.protocol | X | X | ||||||
sftp.password | - | X | ||||||
sftp.port | - | X | ||||||
sftp.server | - | X | ||||||
sftp.user | - | X | ||||||
sitemap.content.limit | X | - | ||||||
sitemap.parser.timeout | X | X | ||||||
sitemap.redir.max | X | X | ||||||
sitemap.size.max | X | X | ||||||
sitemap.strict.parsing | X | X | ||||||
sitemap.url.default.sitemap.xml | X | X | ||||||
sitemap.url.filter | X | X | ||||||
sitemap.url.normalize | X | X | ||||||
sitemap.url.overwrite.existing | X | X | ||||||
solr.auth | X | X | ||||||
solr.auth.password | - | X | ||||||
solr.auth.username | - | X | ||||||
solr.commit.index | X | X |
link.delete.gone
X
X
link.ignore.internal.domain
X
X
link.ignore.internal.host
X
X
link.ignore.limit.domain
X
X
link.ignore.limit.page
X
X
link.loops.depth
X
X
link.score.updater.clear.score
X
X
linkdb.url.filters
X
X
X
linkdb.url.normalizer
X
linkdb.url.normalizer.scope
X
metatag.description
X
metatag.keywords
X
metatags.names
X
X
(test)
mime.type.magic
X
X
X
X
mime.types.file
X
X
X
X
moreIndexingFilter.indexMimeTypeParts
X
X
(test)
X
X
(test)
moreIndexingFilter.mapMimeTypes
X
X
nutch.conf.uuid
X
X
parse.filter.urls
X
X
(owr.)
parse.job.force
X
parse.job.resume
X
parse.normalize.urls
X
X
(owr.)
parse.plugin.file
X
X
(test)
X
X
(test)
parser.caching.forbidden.policy
X
X
X
X
parser.character.encoding.default
X
X
X
X
parser.fix.embeddedparams
X
parser.html.form.use_action
X
X
(test)
X
X
(test)
parser.html.impl
X
X
X
X
parser.html.outlinks.ignore_tags
X
X
X
X
parser.skip.truncated
X
X
X
X
parser.timeout
X
X
X
X
partition.url.mode
X
X
X
X
partition.url.seed
X
X
X
plugin.auto-activation
X
X
X
X
plugin.excludes
X
X
X
X
plugin.folders
X
X
X
X
plugin.includes
X
X
(test)
X
X
(test)
schema.prefix
X
scoring.filter.order
X
X
X
X
segment.dump.dir
X
segment.merger.filter
X
X
segment.merger.normalizer
X
X
segment.merger.segmentName
X
X
segment.merger.slice
X
X
segment.proxy.port
X
X
segment.reader.co
X
X
segment.reader.fe
X
X
segment.reader.ge
X
X
segment.reader.pa
X
X
segment.reader.pd
X
X
segment.reader.pt
X
X
sftp.password
X
sftp.port
X
sftp.server
X
sftp.user
X
solr.auth
X
X
solr.auth.password
X
solr.auth.username
X
solr.commit.index
X
X
X
X
solr.commit.size | X | X |
solr.mapping.file | X | X |
solr. |
server. |
url |
- | X |
storage.crawl.id | X |
X | |||||
storage.data.store.class | X |
- | |||||
storage.schema.host | X |
X |
solr.server.url
X
X
storage.crawl.id
X
X
storage.data.store.class
X
X
(test)
storage.schema.host
X
X
storage.schema.webpage
X
X
subcollection.default.field
X
subcollection.default.fieldname
X
subcollections.config
X
X
subcollections.xml
X
X
tika.config.file
X
storage.schema.webpage | X | X | ||||||
store.http.headers | X | X | ||||||
store.http.request | X | X | ||||||
store.ip.address | X | X | X | X | ||||
subcollection.case.insensitive | X | X | ||||||
subcollection.default.fieldname | X | X | ||||||
subcollection.metadata.source | - | X | ||||||
subcollections.config | - | X | - | X | ||||
subcollections.xml | - | X | - | X | ||||
take.screenshot | X | X | ||||||
tika.boilerpipe | X | X | ||||||
tika.boilerpipe.extractor | X | X | ||||||
tika.config.file | X | X | ||||||
tika.extractor | X | X | ||||||
tika.extractor.boilerpipe.algorithm | X | X | ||||||
tika.extractor.boilerpipe.mime.types | X | X | ||||||
tika.htmlmapper.classname | X | X | X | X | ||||
tika.parse.embedded | X | X | ||||||
tika.uppercase.element.names | X | X |
urlfilter.automaton.file | X | X |
X | X |
urlfilter.automaton.rules | - | X |
- |
X |
urlfilter.domain.file |
X
X
X | X |
X |
X | ||
urlfilter.domain.rules | - | X |
- |
X |
urlfilter. |
urlfilter.domainblacklist.rules
X
domaindenylist.file | - | X |
urlfilter.domaindenylist.rules | - | X | ||||||
urlfilter.fast.file | X | X |
urlfilter.order | X | X |
X | X |
urlfilter.prefix.file | X | X |
X | X |
urlfilter.prefix.rules |
- |
X |
- |
X | ||
urlfilter.regex.file | X | X |
X | X |
urlfilter.regex.rules | - | X |
- |
X |
urlfilter.suffix.file | X | X |
(test)
X | X |
(test)
urlfilter.suffix.rules | - | X |
- | X | ||||
urlfilter.tld.length |
X |
X |
urlmeta.tags |
X | X | |||||||
urlnormalizer.basic.host.idn | X | - | ||||||
urlnormalizer.basic.host.trim-trailing-dot | X |
- | ||||||
urlnormalizer.hosts.file |
X
- | X | ||||||
urlnormalizer.hosts.rules | - | X |
urlnormalizer.loop.count | X | X | X |
X |
urlnormalizer.order | X | X |
X |
X | ||
urlnormalizer. |
protocols.file | X | X | ||||||
urlnormalizer.protocols.rules | X |
(test)
X | ||||||
urlnormalizer.regex.file |
X | X |
X |
X | ||
urlnormalizer.regex. |
rules | - | X | - | X |
urlnormalizer.slashes.file | - |
X |
X
urlnormalizer.regex.rules
X
X
urlnormalizer.slashes.rules | - | X | ||||||
warc.exporter.only.successful.responses | - | X | ||||||
warc.file.size.max | - | X | ||||||
webdriver.chrome.driver | X | - |
webgraph.url.filters | - | X | X |
X |
webgraph.url.normalizers |
X
- |
X | ||||||
webgui.auth.users | X | X |
webtable.dump.content |
X
- | X | ||||||
webtable.dump.headers |
X
- | X | ||||||
webtable.dump.links |
- | X |
webtable.dump.text |
- | X |
webtable.url.regex |
X
- | X |
back to FrontPage