From a1a2bbf952d44575afebe1ad4436bfb8753872d8 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Thu, 26 May 2016 10:19:46 +0200
Subject: [PATCH] doc:added multithreading section

---
 packaging/debian/buildppa.sh |   6 +-
 src/doc/user/usermanual.xml  |  97 ++++
 src/sampleconf/recoll.conf   | 995 ++++++++++++++++++++++-------------
 3 files changed, 732 insertions(+), 366 deletions(-)
diff --git a/packaging/debian/buildppa.sh b/packaging/debian/buildppa.sh
index cc86724e..e7bb4cd0 100644
--- a/packaging/debian/buildppa.sh
+++ b/packaging/debian/buildppa.sh
@@ -19,7 +19,7 @@ case $RCLVERS in
     1.14*) PPANAME=recoll-ppa;;
     *)     PPANAME=recoll15-ppa;;
 esac
-PPANAME=recollexp-ppa
+#PPANAME=recollexp-ppa
 echo "PPA: $PPANAME. Type CR if Ok, else ^C"
 read rep
 
@@ -42,7 +42,7 @@ check_recoll_orig()
 debdir=debian
 # Note: no new releases for lucid: no webkit. Or use old debianrclqt4 dir.
 series="precise trusty utopic vivid wily xenial"
-series=trusty
+series=
 
 if test "X$series" != X ; then
     check_recoll_orig
@@ -141,7 +141,7 @@ done
 
 ### Unity Scope
 series="trusty utopic vivid wily xenial"
-series=
+series=xenial
 
 debdir=debianunityscope
 if test ! -d ${debdir}/ ; then
diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml
index cee8e2ea..f196efb9 100644
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@@ -800,6 +800,103 @@ indexedmimetypes = application/pdf
       </sect2>
 
 
+
+
+
+      <sect2 id="RCL.INDEXING.CONFIG.THREADS">
+        <title>Indexing thread usage configuration GUI</title>
+
+        <para>The &RCL; indexing process 
+          <command>recollindex</command> can use multiple threads to
+          speed up indexing on multiprocessor systems. The work done
+          to index files is divided in several stages and some of the
+          stages can be executed by multiple threads. The stages are:
+          <orderedlist>
+            <listitem>File system walking: this is always performed by
+              the main thread.</listitem>
+            <listitem>File conversion and data extraction.</listitem>
+            <listitem>Text processing (splitting, stemming,
+            etc.)</listitem>
+            <listitem>&XAP; index update.</listitem>
+          </orderedlist>
+        </para>
+        <para>You can also read a 
+          <ulink url="http://www.recoll.org/idxthreads/threadingRecoll.html">
+            longer document</ulink> about the transformation of
+          &RCL; indexing to multithreading.</para>
+
+        <para>The threads configuration is controlled by two
+          configuration file parameters.</para>
+
+	 <variablelist>
+
+          <varlistentry><term><varname>thrQSizes</varname></term>
+            <listitem><para>This variable defines the job input queues
+                configuration. There are three possible queues for stages
+                2, 3 and 4, and this parameter should give the queue depth
+                for each stage (three integer values). If a value of -1 is
+                used for a given stage, no queue is used, and the thread
+                will go on performing the next stage. In practise, deep
+                queues have not been shown to increase performance. A value
+                of 0 for the first queue tells &RCL; to perform
+                autoconfiguration (no need for anything else in this case,
+                thrTCounts is not used) - this is the default
+                configuration.</para>
+            </listitem>
+          </varlistentry>
+
+          <varlistentry><term><varname>thrTCounts</varname></term>
+            <listitem><para>This defines the number of threads used
+                for each stage. If a value of -1 is used for one of
+                the queue depths, the corresponding thread count is
+                ignored. It makes no sense to use a value other than 1
+                for the last stage because updating the &XAP; index is
+                necessarily single-threaded (and protected by a
+                mutex).</para>
+            </listitem>
+          </varlistentry>
+
+         </variablelist>
+
+         <para>The following example would use three queues (of depth 2),
+         and 4 threads for converting source documents, 2 for
+         processing their text, and one to update the index. This was
+         tested to be the best configuration on the test system
+         (quadri-processor with multiple disks).
+<programlisting>
+thrQSizes = 2 2 2
+thrTCounts =  4 2 1
+</programlisting>
+         </para>
+
+         <para>The following example would use a single queue, and the
+           complete processing for each document would be performed by
+           a single thread (several documents will still be processed
+           in parallel in most cases). The threads will use mutual
+           exclusion when entering the index update stage. In practise
+           the performance would be close to the precedent case in
+           general, but worse in certain cases (e.g. a Zip archive
+           would be performed purely sequentially), so the previous
+           approach is preferred. YMMV...  The 2 last values for
+           thrTCounts are ignored.
+<programlisting>
+thrQSizes = 2 -1 -1
+thrTCounts =  6 1 1
+</programlisting>
+         </para>
+
+         <para>The following example would disable
+           multithreading. Indexing will be performed by a single
+           thread.
+<programlisting>
+thrQSizes = -1 -1 -1
+</programlisting>
+         </para>
+
+         </sect2>
+
+
+
       <sect2 id="RCL.INDEXING.CONFIG.GUI">
         <title>The index configuration GUI</title>
 
diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf
index feeb6868..38f34896 100644
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@@ -1,43 +1,52 @@
-# (C) 2004 J.F.Dockes. License: GPL
-#
-# Recoll default configuration file. This typically lives in
-# $prefix/share/recoll/examples and provides default values. You can
-# override selected parameters by adding assigments to
-# ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
-#
-# Almost all values in this file can be set from the GUI configuration
-# menus, which may be an easier approach than direct editing.
-#
+# <filetitle>Recoll default main configuration file</filetitle>
 
-# Space-separated list of directories to index. Next line indexes $HOME
+# The XML tags in the comments are used to help produce the documentation
+# from the sample/reference file, and not at all at run time, where
+# comments are just comments. Edit at will.
+
+# This typically lives in $prefix/share/recoll/examples and provides
+# default values. You can override selected parameters by adding assigments
+# to ~/.recoll/recoll.conf (or $RECOLL_CONFDIR/recoll.conf)
+#
+# Most of the important values in this file can be set from the GUI
+# configuration menus, which may be an easier approach than direct editing.
+
+# <grouptitle>Parameters affecting what documents we index</grouptitle>
+
+# <var name="topdirs" type="string"><brief>Space-separated list of files or
+# directories to recursively index.</brief><descr>Default to ~ (indexes
+# $HOME). You can use symbolic links in the list, they will be followed,
+# independantly of the value of the followLinks variable.</descr></var>
 topdirs = ~
 
-# Wildcard expressions for names of files and directories that we should
-# ignore. If you need index mozilla/thunderbird mail folders, don't put
-# ".*" in there (as was the case with an older sample config)
-# These are simple names, not paths (must contain no / )
+# <var name="skippedNames" type="string"><brief>Wildcard expressions for
+# names of files and directories that we should ignore.</brief>
+# <descr> White space separated list of wildcard patterns (simple
+# ones, not paths, must contain no / ), which will be tested against file
+# and directory names.  The list in the default configuration does not
+# exclude hidden directories (names beginning with a dot), which means that
+# it may index quite a few things that you do not want. On the other hand,
+# email user agents like Thunderbird usually store messages in hidden
+# directories, and you probably want this indexed. One possible solution is
+# to have '.*' in 'skippedNames', and add things like '~/.thunderbird'
+# '~/.evolution' to 'topdirs'.  Not even the file names are indexed for
+# patterns in this list, see the 'noContentSuffixes' variable for an
+# alternative approach which indexes the file names. Can be redefined for
+# any subtree.</descr></var>
 skippedNames = #* bin CVS  Cache cache* .cache caughtspam tmp \
      .thumbnails .svn \
      *~ .beagle .git .hg .bzr loop.ps .xsession-errors \
      .recoll* xapiandb recollrc recoll.conf
 
-# Wildcard expressions for paths we shouldn't go into. The database and
-# configuration directories will automatically be added in there.  
-# We add the usual mount point for removable media by default to remind
-# people that it is a bad idea to naively have recoll work on these
-# (esp. with the monitor: media gets indexed on mount, all data gets erased
-# on unmount...). Typically the presence of /media is mostly a reminder, it
-# would only have effect for someone who's indexing / ...
-# Explicitely adding /media/xxx to the topdirs will override this.
-skippedPaths = /media
-
-# List of suffixes for which we don't try mime type identification (and
-# don't uncompress or index content obviously). This complements the now
-# obsoleted mimemap recoll_noindex list, which will go away in a future
-# release (the move from mimemap to recoll.conf allows editing the list
-# through the GUI). This is different from skippedNames because these are
-# name ending matches only (not wildcard patterns), and the file name
-# itself gets indexed normally.
+# <var name="noContentSuffixes" type="string"><brief>List of name endings (not
+# necessarily dot-separated suffixes) for which we don't try MIME type
+# identification, and don't uncompress or index content.</brief><descr>Only
+# the names will be indexed. This complements the now obsoleted mimemap
+# recoll_noindex list, which will go away in a future release (the move
+# from mimemap to recoll.conf allows editing the list through the
+# GUI). This is different from skippedNames because these are name ending
+# matches only (not wildcard patterns), and the file name itself gets
+# indexed normally. This can be redefined for subdirectories.</descr></var>
 noContentSuffixes = .md5 .map \
        .o .lib .dll .a .sys .exe .com \
        .mpp .mpt .vsd \
@@ -45,89 +54,196 @@ noContentSuffixes = .md5 .map \
        .dat .bak .rdf .log.gz .log .db .msf .pid \
        ,v ~ #
 
-# Same for real time indexing. The idea here is that there is stuff that
-# you might want to initially index but not monitor. If daemSkippedPaths is
-# not set, the daemon uses skippedPaths.
-#daemSkippedPaths = 
+# <var name="skippedPaths" type="string"><brief>Space-separated list of
+# wildcard expressions for paths we shouldn't go into.</brief><descr>Can
+# contain files and directories. The database and configuration directories
+# will automatically be added.  The expressions are matched 'fnmatch(3)'
+# with the FNM_PATHNAME flag set by default. This means that '/' characters
+# must be matched explicitely. You can set 'skippedPathsFnmPathname' to 0
+# to disable the use of FNM_PATHNAME (meaning that '/*/dir3' will match
+# '/dir1/dir2/dir3').  The default contains the usual mount point for
+# removable media by default to remind people that it is a bad idea to
+# naively have recoll work on these (esp. with the monitor: media gets
+# indexed on mount, all data gets erased on unmount). Typically the
+# presence of '/media' is mostly a reminder, it would only have effect for
+# someone who is indexing '/'.  Explicitely adding '/media/xxx' to the
+# topdirs will override this.</descr></var>
+skippedPaths = /media
 
-# Recoll uses FNM_PATHNAME by default when matching skipped paths, which
-# means that /dir1/dir2/dir3 is not matched by */dir3. Can't change the
-# default now, but you can set the following variable to 0 to disable the
-# use of FNM_PATHNAME (see fnmatch(3) man page)
+# <var name="skippedPathsFnmPathname" type="bool"><brief>Set to 0 to
+# override use of FNM_PATHNAME for matching skipped
+# paths.</brief><descr></descr></var> 
 #skippedPathsFnmPathname = 1
 
-# Option to follow symbolic links. We normally don't, to avoid duplicated
-# indexing (in any case, no effort is made to identify or avoid multiple
-# indexing of linked files)
+# <var name="daemSkippedPaths"><brief>skippedPaths equivalent specific to
+# real time indexing.</brief><descr>This enables having parts of the tree
+# which are initially indexed but not monitored. If daemSkippedPaths is
+# not set, the daemon uses skippedPaths.</descr></var>
+#daemSkippedPaths = 
+
+
+# <var name="zipSkippedNames" type="string"><brief>Space-separated list of
+# wildcard expresions for names that should be ignored
+# inside zip archives.</brief><descr>This is used directly by the zip
+# handler, and has a function similar to skippedNames, but
+# works independantly. Can be redefined for subdirectories. Supported by
+# recoll 1.20 and newer. See
+# https://bitbucket.org/medoc/recoll/wiki/Filtering%20out%20Zip%20archive%20members
+# </descr></var>
+#zipSkippedNames =
+
+# <var name="followLinks" type="bool"><brief>Follow symbolic links during
+# indexing.</brief><descr>The default is to ignore symbolic links to avoid
+# multiple indexing of linked files. No effort is made to avoid duplication
+# when this option is set to true. This option can be set individually for
+# each of the 'topdirs' members by using sections. It can not be changed
+# below the 'topdirs' level. Links in the 'topdirs' list itself are always
+# followed.</descr></var>
 #followLinks = 0
 
-# Debug messages. 2 is errors/warnings only. 3 information like doc
-# updates, 4 is quite verbose and 6 very verbose
-loglevel = 3
-logfilename = stderr
+# <var name="indexedmimetypes" type="string"><brief>Restrictive list of
+# indexed mime types.</brief><descr>Normally not set (in which case all
+# supported types are indexed). If it is set, 
+# only the types from the list will have their contents indexed. The names
+# will be indexed anyway if indexallfilenames is set (default). MIME
+# type names should be taken from the mimemap file. Can be redefined for
+# subtrees.</descr></var>
+#indexedmimetypes = 
 
-# The following can be used to set different values for logging by the
-# indexer (recollindex). The default is to use loglevel/logfilename
-#idxloglevel = 3
-#idxlogfilename = stderr
+# <var name="excludedmimetypes" type="string"><brief>List of excluded MIME
+# types.</brief><descr>Lets you exclude some types from indexing. Can be
+# redefined for subtrees.</descr></var>
+#excludedmimetypes = 
 
-# Specific versions of log file name and level for the indexing daemon. The 
-# default is to use the idx... values if set, else the log... values.
-#daemloglevel = 3
-#daemlogfilename = /dev/null
+# <var name="compressedfilemaxkbs" type="int"><brief>Size limit for compressed
+# files.</brief><descr>We need to decompress these in a
+# temporary directory for identification, which can be wasteful in some
+# cases. Limit the waste. Negative means no limit. 0 results in no
+# processing of any compressed file.</descr></var>
+compressedfilemaxkbs = 50000
 
-# Run directory for the indexing process. The filters sometimes leave
-# garbage in the current directory, so it makes sense to have recollindex
-# chdir to some garbage bin. 3 possible values:
-#  - (literal) tmp : go to temp dir as set by env (RECOLL_TMPDIR else
-#    TMPDIR else /tmp)
-#  - Empty: stay where started
-#  - Absolute path value: go there.
-idxrundir = tmp
+# <var name="textfilemaxmbs" type="int"><brief>Size limit for text
+# files.</brief><descr>Mostly for skipping monster
+# logs.</descr></var> 
+textfilemaxmbs = 20
 
-# Decide if we store character case and diacritics in the index. If we do, 
+# <var name="indexallfilenames" type="bool"><brief>Index the file names of
+# unprocessed files</brief><descr>Index the names of files the contents of
+# which we don't index because of an excluded or unsupported MIME
+# type.</descr></var>
+indexallfilenames = 1
+
+# <var name="usesystemfilecommand" type="bool"><brief>Use a system command
+# for file MIME type guessing as a final step in file type
+# identification</brief><descr>This is generally useful, but will usually
+# cause the indexing of many bogus 'text' files. See 'systemfilecommand'
+# for the command used.</descr></var>
+usesystemfilecommand = 1
+
+# <var name="systemfilecommand" type="string"><brief>Command used to guess
+# MIME types if the internal methods fails</brief><descr>This should be a
+# "file -i" workalike.  The file path will be added as a last parameter to
+# the command line. 'xdg-mime' works better than the traditional 'file'
+# command, and is now the configured default (with a hard-coded fallback to
+# 'file')</descr></var>
+systemfilecommand = xdg-mime query filetype
+
+# <var name="processwebqueue" type="bool"><brief>Decide if we process the
+# Web queue.</brief><descr>The queue is a directory where the Recoll Web
+# browser plugins create the copies of visited pages.</descr></var>
+processwebqueue = 0
+
+# <var name="textfilepagekbs" type="int"><brief>Page size for text
+# files.</brief><descr>If this is set, text/plain files will be divided
+# into documents of approximately this size. Will reduce memory usage at
+# index time and help with loading data in the preview window at query
+# time. Particularly useful with very big files, such as application or
+# system logs.</descr></var>
+textfilepagekbs = 1000
+
+# <var name="membermaxkbs" type="int"><brief>Size limit for archive
+# members.</brief><descr>This is passed to the filters in the environment
+# as RECOLL_FILTER_MAXMEMBERKB.</descr></var>
+membermaxkbs = 50000
+
+
+
+# <grouptitle>Parameters affecting how we generate terms</grouptitle>
+
+# Changing some of these parameters will imply a full
+# reindex. Also, when using multiple indexes, it may not make sense
+# to search indexes that don't share the values for these parameters,
+# because they usually affect both search and index operations.
+
+
+# <var name="indexStripChars" type="bool"><brief>Decide if we store
+# character case and diacritics in the index.</brief><descr>If we do,
 # searches sensitive to case and diacritics can be performed, but the index
-# will be bigger, and some marginal weirdness may sometimes occur. We
-# default to a stripped index for now.
+# will be bigger, and some marginal weirdness may sometimes occur. The
+# default is a stripped index. When using multiple indexes for a search,
+# this parameter must be defined identically for all. Changing the value
+# implies an index reset.</descr></var>
 indexStripChars = 1
 
-# IF the index is not stripped. Decide if we automatically trigger
-# diacritics sensitivity if the search term has accented characters (not in
-# unac_except_trans). Else you need to use the query language and the "D"
-# modifier to specify diacritics sensitivity. Default is no.
-autodiacsens = 0
+# <var name="nonumbers" type="bool"><brief>Decides if terms will be
+# generated for numbers.</brief><descr>For example "123", "1.5e6",
+# 192.168.1.4, would not be indexed if nonumbers is set ("value123" would
+# still be). Numbers are often quite interesting to search for, and this
+# should probably not be set except for special situations, ie, scientific
+# documents with huge amounts of numbers in them, where setting nonumbers
+# will reduce the index size. This can only be set for a whole index, not
+# for a subtree.</descr></var>
+#nonumbers = 0
 
-# IF the index is not stripped. Decide if we automatically trigger
-# character case sensitivity if the search term has upper-case characters
-# in any but the first position. Else you need to use the query language
-# and the "C" modifier to specify character-case sensitivity. Default is
-# yes.
-autocasesens = 1
+# <var name="dehyphenate" type="bool"><brief>Determines if we index
+# 'coworker' also when the input is 'co-worker'.</brief><descr>This is new
+# in version 1.22, and on by default. Setting the variable to off allows
+# restoring the previous behaviour.</descr></var>
+#dehyphenate = 1
 
-# Languages for which to build stemming databases at the end of
-# indexing. Stemmer names can be found on http://www.xapian.org 
-# The flag to perform stem expansion at query time is now set from the GUI
+# <var name="nocjk" type="bool"><brief>Decides if specific east asian
+# (Chinese Korean Japanese) characters/word splitting is turned
+# off.</brief><descr>This will save a small amount of cpu if you have no CJK
+# documents. If your document base does include such text but you are not
+# interested in searching it, setting nocjk may be a
+# significant time and space saver.</descr></var>
+#nocjk = 0
+
+# <var name="cjkngramlen" type="int"><brief>This lets you adjust the size of
+# n-grams used for indexing CJK text.</brief><descr>The default value of 2 is
+# probably appropriate in most cases. A value of 3 would allow more precision
+# and efficiency on longer words, but the index will be approximately twice
+# as large.</descr></var>
+#cjkngramlen = 2
+
+# <var name="indexstemminglanguages" type="string"><brief>Languages for
+# which to create stemming expansion data.</brief><descr>Stemmer names can
+# be found on http://www.xapian.org, or by executing 'recollindex -l', or
+# this can also be set from a list in the GUI</descr></var>
 indexstemminglanguages = english 
 
-# Default character set. Values found inside files, ie content tag in html
-# documents, will override this. It can be specified per directory (see
-# below). Used when converting to utf-8 (internal storage format), so it
-# may be quite important for pure text files.
-# The default used to be set to iso8859-1, but we now take it from the nls 
-# environment (LC_ALL/LC_CTYPE/LANG). The ultimate hardwired default is
-# still 8859-1. If for some reason you want a general default which doesnt
-# match your LANG and is not 8859-1, set it here.
-# defaultcharset = iso-8859-1
+# <var name="defaultcharset" type="string"><brief>Default character
+# set.</brief><descr>This is used for files which do not contain a
+# character set definition (e.g.: text/plain). Values found inside files,
+# e.g. a 'charset' tag in HTML documents, will override it. If this is not
+# set, the default character set is the one defined by the NLS environment
+# ($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
+# If for some reason you want a general default which does not match your
+# LANG and is not 8859-1, use this variable. This can be redefined for any
+# sub-directory.</descr></var>
+#defaultcharset = iso-8859-1
 
-# A list of characters, encoded in UTF-8, which should be handled specially
-# when converting text to unaccented lowercase. For example, in Swedish,
-# the letter a with diaeresis has full alphabet citizenship and should not
-# be turned into an a. 
+# <var name="unac_except_trans" type="string"><brief>A list of characters,
+# encoded in UTF-8, which should be handled specially 
+# when converting text to unaccented lowercase.</brief><descr>For
+# example, in Swedish, the letter a with diaeresis has full alphabet
+# citizenship and should not be turned into an a. 
 # Each element in the space-separated list has the special character as
 # first element and the translation following. The handling of both the
 # lowercase and upper-case versions of a character should be specified, as
 # appartenance to the list will turn-off both standard accent and case
-# processing. Examples: 
+# processing. The value is global and affects both indexing and querying.
+# Examples: 
 # Swedish:
 # unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl åå Åå
 # German:
@@ -138,289 +254,48 @@ indexstemminglanguages = english
 # Reasonable default for all until someone protests. These decompositions
 # are not performed by unac, but I cant imagine someone typing the composed
 # forms in a search.
+# unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl</descr></var>
 unac_except_trans = ßss œoe Œoe æae Æae ﬀff ﬁfi ﬂfl
-# Turn off the indexing of numbers: may reduce the index size if you have
-# no use for them 
-# nonumbers = 0
 
-# Turn off indexing "coworker" for an input of "co-worker" (in addition to
-# co, worker, "co worker". Default is on as of version 1.22
-# dehyphenate = 1
+# <var name="maildefcharset" type="string"><brief>Overrides the default
+# character set for email messages which don't specify
+# one.</brief><descr>This is mainly useful for readpst (libpst) dumps,
+# which are utf-8 but do not say so.</descr></var>
+#maildefcharset=
 
-# Maximum expansion count for a single term (ie: when using wildcards).
-# We used to not limit this at all (except for filenames where the limit
-# was too low at 1000), but it is unreasonable with a big index. 
-# Default 10 000
-maxTermExpand = 10000
+# <var name="localfields" type="string"><brief>Set fields on all files
+# (usually of a specific fs area).</brief><descr>Syntax is the usual:
+# name = value ; attr1 = val1 ; [...]
+# value is empty so this needs an initial semi-colon. This is useful, e.g.,
+# for setting the rclaptg field for application selection inside
+# mimeview.</descr></var>
+#[/some/app/directory]
+#localfields = ; rclaptg = someapp; otherfield = somevalue
 
-# Maximum number of clauses we add to a single Xapian query. In some cases,
-# the result of term expansion can be multiplicative, and we want to avoid
-# eating all the memory. Default 50000
-maxXapianClauses = 50000
-
-# Recoll data directories are normally stored relative to the configuration
-# directory (e.g. ~/.recoll/xapiandb, ~/.recoll/mboxcache). If this is set,
-# the directories are stored under the specified value instead
-# (e.g. if cachedir is ~/.cache/recoll, the default dbdir would be
-# ~/.cache/recoll/xapiandb).
-# This affects dbdir, webcachedir, mboxcachedir, aspellDicDir, which can
-# still be individually specified to override cachedir.
-# Note that if you have multiple configurations, each must have a different
-# cachedir, there is no automatic computation of a subpath under cachedir.
-#cachedir = ~/.cache/recoll
-
-# Where to store the database (directory). This may be an absolute path,
-# else it is taken as relative to cachedir if set, or the configuration
-# directory (-c argument or $RECOLL_CONFDIR).  If nothing is specified, the
-# default is then ~/.recoll/xapiandb/
-dbdir = xapiandb
-
-# Indexing process threads configuration. If Recoll is configured for 
-# multithreading, this defines what queues are active and how many threads
-# to start for any of them. The default values were found good on a
-# quad-core processor. The three steps are file conversion, term extraction
-# and conversion and Xapian index update. The three queue values define the
-# max number of jobs waiting on one of the corresponding queues. Setting a
-# value to -1 disables a queue (replaced by a direct call). The thrTcounts
-# values define the number of threads to start for each queue. The last
-# value can only be one (as Xapian is single-threaded).
-# If the first element in thrQSizes is 0, recollindex will attempt to set
-# roughly guestimated values based on the number of CPUs. 
-#
-# The following are the best setup on my core i5 system (4 cores, no
-# hyperthreading, multiple disks).
-#thrQSizes = 2 2 2
-#thrTCounts =  4 2 1
-# The default is to let recoll guess.
-thrQSizes = 0
-
-# Maximum file system occupation before we stop indexing. The default value
-# is 0, meaning no checking. The value is a percentage, corresponding to
-# what the "Capacity" df output column shows.
-maxfsoccuppc = 0
-
-# Threshold (megabytes of new data) where we flush from memory to disk
-# index. Setting this (ie to 10) can help control memory usage. 
-#
-# A value of 0 means no explicit flushing, which lets Xapian perform its
-# own thing, meaning flushing every XAPIAN_FLUSH_THRESHOLD documents
-# created, modified or deleted. XAPIAN_FLUSH_THRESHOLD is an environment
-# variable. As memory usage depends on average document size, not only
-# document count, this is not very useful. 
-#
-# The default value of 10 MB may be a bit low. If you are looking for
-# maximum speed, you may want to experiment with values between 20 and
-# 80. In my experience, values beyond 100 are always counterproductive. If
-# you find otherwise, please drop me a note.
-idxflushmb = 10
-
-# Place to search for executable filters. If RECOLL_FILTERSDIR is set in
-# the environment, we use it instead. Defaults to $prefix/share/recoll/filters
-# filtersdir = /path/to/my/filters
-
-# Additional places to search for helper executables. This is only used on
-# Windows for now
-# recollhelperpath = c:/someprog/bin;c:/someotherprog/bin
-
-# Place to search for icons. The only reason to change this would be if you
-# want to change the icons displayed in the result list.
-# Defaults to $prefix/share/recoll/images
-# iconsdir = /path/to/my/icons
-
-# Should we use the system's 'file -i' command as a final step in file type
-# identification ? This may be useful, but will usually cause the
-# indexation of many bogus 'text' files
-usesystemfilecommand = 1
-# Actual command to use as "file -i" workalike.
-# The file path will be added as a last parameter to the command line. If
-# that's not what your preferred command would like, use an intermediary
-# script.
-# xdg-mime now works better than the traditional "file" command, and is now
-# the configured default (with a hard-coded fallback to "file")
-systemfilecommand = xdg-mime query filetype
-# systemfilecommand = file -i filetype
-
-# Should we index the file names of files with mime types we don't
-# know? (we can otherwise just ignore them)
-indexallfilenames = 1
-
-# A restrictive list of indexed mime types. Normally not set. If it is set,
-# only the types from the list will have their contents indexed (the names
-# will be indexed anyway if indexallfilenames is set as by default). Mime
-# type names should be taken from the mimemap file.
-#
-# indexedmimetypes = 
-
-# An excluded list of mime types. It can be redefined in subdirectories, 
-# so can be used to locally exclude some types.
-#
-# excludedmimetypes = 
-
-#
-# Size limit for archive members. This is passed to the filters in the
-# environment as RECOLL_FILTER_MAXMEMBERKB
-# 
-membermaxkbs = 50000
-
-# Size limit for compressed files. We need to decompress these in a
-# temporary directory for identification, which can be wasteful in some
-# cases. Limit the waste. Negative means no limit. 0 results in no
-# processing of any compressed file. Used to be -1 by default.
-compressedfilemaxkbs = 50000
-
-# Size limit for text files. This is for skipping monster logs
-textfilemaxmbs = 20
-
-# Page size for text files. If this is set, text/plain files will be
-# divided into documents of approximately this size. May be useful to
-# access pieces of big text files which would be problematic to load as one
-# piece into the preview window. Might be useful for big logs
-textfilepagekbs = 1000
-
-# Maximum external filter execution time. Default 20mn. This is mainly
-# to avoid infinite loops in postscript files (loop.ps)
-filtermaxseconds = 1200
-# Maximum virtual memory space for filter process (setrlimit(RLIMIT_AS)),
-# in megabytes. Note that this includes any mapped libs (there is no
-# reliable Linux way to limit the data space only), so we need to be a
-# bit generous here. Anything over 2000 will be ignored on 32 bits machines.
-filtermaxmbytes = 2000
-
-# Length of abstracts we store while indexing. Longer will make for a
-# bigger db
-# idxabsmlen = 250
-
-# Truncation length of stored metadata fields. This does not affect
-# indexing, just what can be displayed inside results.
-# idxmetastoredlen = 150
-
-# Language definitions to use when creating the aspell dictionary. 
-# The value must match a set of aspell language definition files. 
-# You can type "aspell dicts"  to see a list
-# The default if this is not set is to use the NLS environment to guess the
-# value
-# aspellLanguage = en
-
-# Somme aspell packages may need an additional option (e.g. on Debian
-# Jessie). See Debian bug 772415
-# aspellAddCreateParam = --local-data-dir=/usr/lib/aspell
-
-# The aspell dictionary (aspdict.(lang).rws) is normally stored in the
-# directory specified by cachedir if set, or under the configuration
-# directory. Set the following to change: 
-#aspellDicDir = 
-
-# You may also want to set this to have a look at aspell dictionary
-# creation errors. But there are always many, so this is mostly for debugging
-# aspellKeepStderr = 1
-
-# Disabling aspell use. The aspell dictionary generation takes some time,
-# and some combinations of aspell version, language, and local terms,
-# result in aspell dumping core each time. You can disable the aspell
-# dictionary generation by setting the following variable:
-# noaspell = 1
-
-# Timing parameters for the real time mode:
-#
-# Seconds between auxiliary databases updates (stemdb, aspell):
-# monauxinterval = 3600
-#
-# Resting time (seconds) during which we let the queue accumulate, in hope
-# that events to the same file will merge, before we start indexing:
-# monixinterval = 30
-#
-# Definitions for files which get a longer delay before reindexing is
-# allowed. This is for fast-changing files, that should only be reindexed
-# once in a while. A list of wildcardPattern:seconds pairs. The patterns
-# are matched with fnmatch(pattern, path, 0) You can quote entries containing
-# white space with double quotes. The default is empty, here follows an
-# example:
-# mondelaypatterns = *.log:20  "*with spaces.*:30"
-
-# ionice class for monitor (on platforms where this is supported)
-# monioniceclass = 3
-# ionice class param for monitor (on platforms where this is supported)
-# monioniceclassdata = 
-
-# If this is set, process the directory where the Recoll Web browser plugins
-# copy visited pages for indexing.
-processwebqueue = 0
-# The path to the Web indexing queue. This is hard-coded in the
-# plugin as ~/.recollweb/ToIndex so there should be no need to change it. 
-#webqueuedir = ~/.recollweb/ToIndex
-# This is only used by the web history indexing code, and
-# defines where the cache for visited pages will live. Default:
-# cachedir/webcache if cachedir is set, else $RECOLL_CONFDIR/webcache 
-webcachedir = webcache
-# This is only used by the web history indexing code, and
-# defines the maximum size for the web page cache. Default: 40 MB.
-# Reducing the size will not physically truncate the file.
-webcachemaxmbs = 40
-
-# The directory where mbox message offsets cache files are held. This is
-# normally named mboxcache under cachedir if set, or else under the
-# configuration directory, but it may be useful to share a
-# directory between different configurations. 
-#mboxcachedir = mboxcache
-
-# The minimum mbox file size over which we cache the offsets. There is
-# really no sense in caching offsets for small files. The default is 5 MB.
-#mboxcacheminmbs = 5
-
-# Maximum number of positions we walk while populating a snippet for the
-# result list. The default of 1 000 000 may be insufficient for big
-# documents, the consequence would be snippets with possibly
-# meaning-altering missing words. 
-snippetMaxPosWalk = 1000000
-
-# Use mtime instead of default ctime to determine if a file has been
-# modified (in addition to size, which is always used).
+# <var name="testmodifusemtime" type="bool"><brief>Use mtime instead of
+# ctime to test if a file has been modified.</brief><descr>The time is used
+# in in addition to the size, which is always used.
 # Setting this can reduce re-indexing on systems where extended attributes
-# are used (by some other applications), but not indexed (changing
-# ext. attrs. only affects ctime). 
+# are used (by some other application), but not indexed, because changing
+# extended attributes only affects ctime.
 # Notes:
-# - this may prevent detection of change in some marginal file rename cases
+# - This may prevent detection of change in some marginal file rename cases
 #   (the target would need to have the same size and mtime).
 # - You should probably also set noxattrfields to 1 in this case, except if
 #   you still prefer to perform xattr indexing, for example if the local
 #   file update pattern makes it of value (as in general, there is a risk
 #   for pure extended attributes updates without file modification to go
 #   undetected). Perform a full index reset after changing this.
+# </descr></var>
 testmodifusemtime = 0
 
-# Disable extended attributes conversion to metadata fields. This probably
-# needs to be set if testmodifusemtime is set.
+# <var name="noxattrfields" type="bool"><brief>Disable extended attributes
+# conversion to metadata fields.</brief><descr>This probably needs to be
+# set if testmodifusemtime is set.</descr></var>
 noxattrfields = 0
 
-# Script used to heuristically check if we need to retry indexing files
-# which previously failed. The default script checks the modified dates on
-# /usr/bin and /usr/local/bin. A relative path will be looked up in the
-# filters dirs, then in the path. Use an absolute path to do otherwise.
-checkneedretryindexscript = rclcheckneedretry.sh
-
-# Parameters for the PDF input script
-# Attempt OCR of PDF files with no text content if both tesseract and
-# pdftoppm are installed. The default is not to do it because OCR is so
-# very slow
-#pdfocr = 0
-# Enable PDF attachment extraction, using pdftk (if available). This is
-# normally disabled, because it does slow down PDF indexing a bit even if
-# not one attachment is ever found. 
-#pdfattach = 0
-
-# You could specify different parameters for a subdirectory like this:
-#[~/hungariandocs/plain]
-#defaultcharset = iso-8859-2
-
-# You can set fields on all files of a specific fs area. (rclaptg can be
-# used for application selection inside mimeview). 
-# Syntax is the usual name = value ; attr1 = val1 ; ... with an empty value
-# so needs initial semi-colon
-#[/some/app/directory]
-#localfields = ; rclaptg = someapp; otherfield = somevalue
-
-# It's also possible to execute external commands to gather external
-# metadata, for example tmsu tags.
+# <var name="metadatacmds" type="string"><brief>Define commands to
+# gather external metadata, e.g. tmsu tags.</brief><descr>
 # There can be several entries, separated by semi-colons, each defining
 # which field name the data goes into and the command to use. Don't forget the
 # initial semi-colon. All the field names must be different. You can use
@@ -430,14 +305,408 @@ checkneedretryindexscript = rclcheckneedretry.sh
 # returns multiple field values inside a text blob formatted as a recoll
 # configuration file ("fieldname = fieldvalue" lines). The rclmultixx name
 # will be ignored, and field names and values will be parsed from the data.
+# </descr></var>
 #[/some/area/of/the/fs]
 #metadatacmds = ; tags = tmsu tags %f; rclmulti1 = cmdOutputsConf %f
 
+
+
+
+# <grouptitle>Parameters affecting where and how we store things</grouptitle>
+
+# <var name="cachedir" type="dfn"><brief>Top directory for Recoll
+# data</brief><descr>Recoll data directories are normally located relative
+# to the configuration directory (e.g. ~/.recoll/xapiandb,
+# ~/.recoll/mboxcache). If 'cachedir' is set, the directories are stored under
+# the specified value instead (e.g. if cachedir is ~/.cache/recoll, the
+# default dbdir would be ~/.cache/recoll/xapiandb).  This affects dbdir,
+# webcachedir, mboxcachedir, aspellDicDir, which can still be individually
+# specified to override cachedir.  Note that if you have multiple
+# configurations, each must have a different cachedir, there is no
+# automatic computation of a subpath under cachedir.</descr></var>
+#cachedir = ~/.cache/recoll
+
+# <var name="maxfsoccuppc" type="int"><brief>Maximum file system occupation
+# over which we stop indexing.</brief><descr>The value is a percentage,
+# corresponding to what the "Capacity" df output column shows. The default
+# value is 0, meaning no checking.</descr></brief>
+maxfsoccuppc = 0
+
+# <var name="xapiandb" type="dfn"><brief>Xapian database directory
+# location.</brief><descr>This will be created on first indexing. If the
+# value is not an absolute path, it will be interpreted as relative to
+# cachedir if set, or the configuration directory (-c argument or
+# $RECOLL_CONFDIR).  If nothing is specified, the default is then
+# ~/.recoll/xapiandb/</descr></var>
+dbdir = xapiandb
+
+# <var name="idxstatusfile" type="fn"><brief>Name of the scratch file where
+# the indexer process updates its status. Default:
+# idxstatus.txt inside the configuration directory
+#idxstatusfile = idxstatus.txt
+
+# <var name="mboxcachedir" type="dfn">
+#
+# <brief>Directory location for storing mbox message offsets cache
+# files.</brief><descr>This is normally 'mboxcache' under cachedir if set,
+# or else under the configuration directory, but it may be useful to share
+# a directory between different configurations.</descr></var>
+#mboxcachedir = mboxcache
+
+# <var name="mboxcacheminmbs" type="int">
+#
+# <brief>Minimum mbox file size over which we cache the offsets.</brief>
+# <descr>There is really no sense in caching offsets for small files. The
+# default is 5 MB.</descr></var>
+#mboxcacheminmbs = 5
+
+# <var name="webcachedir" type="dfn">
+#
+# <brief>Directory where we store the archived web pages.</brief>
+# <descr>This is only used by the web history indexing code
+# Default: cachedir/webcache if cachedir is set, else
+# $RECOLL_CONFDIR/webcache</descr></var>
+webcachedir = webcache
+
+# <var name="webcachemaxmbs" type="int">
+# <brief>Maximum size in MB of the Web archive.</brief>
+# <descr>This is only used by the web history indexing code.
+# Default: 100 MB.
+# Reducing the size will not physically truncate the file.</descr></var>
+webcachemaxmbs = 100
+
+# <var name="webqueuedir" type="fn">
+#
+# <brief>The path to the Web indexing queue.</brief><descr>This is
+# hard-coded in the plugin as ~/.recollweb/ToIndex so there should be no
+# need or possibility to change it.</descr></var>
+#webqueuedir = ~/.recollweb/ToIndex
+
+# <var name="aspellDicDir" type="dfn">
+#
+# <brief>Aspell dictionary storage directory location.</brief> <descr>The
+# aspell dictionary (aspdict.(lang).rws) is normally stored in the
+# directory specified by cachedir if set, or under the configuration
+# directory.</descr></var>
+#aspellDicDir = 
+
+# <var name="filtersdir" type="dfn">
+#
+# <brief>Directory location for executable input handlers.</brief><descr>If
+# RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
+# to $prefix/share/recoll/filters. Can be redefined for
+# subdirectories.</descr></var>
+#filtersdir = /path/to/my/filters
+
+# <var name="iconsdir" type="dfn">
+#
+# <brief>Directory location for icons.</brief><descr>The only reason to
+# change this would be if you want to change the icons displayed in the
+# result list. Defaults to $prefix/share/recoll/images</descr></var>
+#iconsdir = /path/to/my/icons
+
+# <grouptitle>Parameters affecting indexing performance and resource
+# usage</grouptitle> 
+
+# <var name="idxflushmb" type="int">
+#
+# <brief>Threshold (megabytes of new data) where we flush from memory to disk
+# index.</brief>
+# <descr>Setting this allows some control over memory usage by the indexer
+# process. A value of 0 means no explicit flushing, which lets Xapian
+# perform its own thing, meaning flushing every XAPIAN_FLUSH_THRESHOLD
+# documents created, modified or deleted. XAPIAN_FLUSH_THRESHOLD is an
+# environment variable. As memory usage depends on average document size,
+# not only document count, this is not very useful.
+# The default value of 10 MB may be a bit low. If you are looking for
+# maximum speed, you may want to experiment with values between 20 and
+# 80. In my experience, values beyond 100 are always counterproductive. If
+# you find otherwise, please drop me a note.</descr></var>
+idxflushmb = 10
+
+# <var name="filtermaxseconds" type="int">
+# 
+# <brief>Maximum external filter execution time in
+# seconds.</brief><descr>Default 1200 (20mn). Set to 0 for no limit. This
+# is mainly to avoid infinite loops in postscript files
+# (loop.ps)</descr></var>
+filtermaxseconds = 1200
+
+# <var name="filtermaxmbytes" type="int">
+# 
+# <brief>Maximum virtual memory space for filter processes
+# (setrlimit(RLIMIT_AS)), in megabytes.</brief> <descr>Note that this
+# includes any mapped libs (there is no reliable Linux way to limit the
+# data space only), so we need to be a bit generous here. Anything over
+# 2000 will be ignored on 32 bits machines.</descr></var>
+filtermaxmbytes = 2000
+
+# <var name="thrQSizes" type="string">
+# 
+# <brief>Stage input queues configuration.</brief> <descr>There are three
+# internal queues in the indexing pipeline stages (file data extraction,
+# terms generation, index update). This parameter defines the queue depths
+# for each stage (three integer values). If a value of -1 is given for a
+# given stage, no queue is used, and the thread will go on performing the
+# next stage. In practise, deep queues have not been shown to increase
+# performance. Default: a value of 0 for the first queue tells &RCL; to
+# perform autoconfiguration based on the detected number of CPUs (no need
+# for the two other values in this case).  Use thrQSizes = -1 -1 -1 to
+# disable multithreading entirely.</descr></var>
+thrQSizes = 0
+
+# <var name="thrTCounts" type="string">
+#
+# <brief>Number of threads used for each indexing stage.</brief> <descr>The
+# three stages are: file data extraction, terms generation, index
+# update). The use of the counts is also controlled by some special values
+# in thrQSizes: if the first queue depth is 0, all counts are ignored
+# (autoconfigured); if a value of -1 is used for a queue depth, the
+# corresponding thread count is ignored. It makes no sense to use a value
+# other than 1 for the last stage because updating the &XAP; index is
+# necessarily single-threaded (and protected by a mutex).</descr></var>
+#thrTCounts = 4 2 1
+
+
+# <grouptitle>Miscellaneous parameters</grouptitle>
+
+# <var name="loglevel" type="int">
+#
+# <brief>Debug log verbosity 1-6</brief> <descr>2 is errors/warnings
+# only. 3 information like document updates, 4 is quite verbose and 6 very
+# verbose.</descr></var>
+loglevel = 3
+
+# <var name="logfilename" type="fn">
+#
+# <brief>Debug log destination. Use 'stderr' (default) to write to the
+# console.</brief><descr></descr></var>
+logfilename = stderr
+
+# <var name="idxloglevel" type="int">
+#
+# <brief>Override loglevel for the indexer.</brief><descr></descr></var>
+#idxloglevel = 3
+
+# <var name="idxlogfilename" type="fn">
+#
+# <brief>Override logfilename for the indexer.</brief><descr></descr></var>
+#idxlogfilename = stderr
+
+# <var name="daemloglevel" type="int">
+#
+# <brief>Override loglevel for the indexer in real time
+# mode.</brief><descr>The default is to use the idx... values if set, else
+# the log... values.</descr></var>
+#daemloglevel = 3
+
+# <var name="daemlogfilename" type="fn">
+#
+# <brief>Override logfilename for the indexer in real time
+# mode.</brief><descr>The default is to use the idx... values if set, else
+# the log... values.</descr></var>
+#daemlogfilename = /dev/null
+
+# <var name="idxrundir" type="dfn">
+#
+# <brief>Indexing process current directory.</brief> <descr>The input
+# handlers sometimes leave temporary files in the current directory, so it
+# makes sense to have recollindex chdir to some temporary directory. Three
+# possible types of values:
+#  - (literal) tmp : go to temp dir as set by environment (RECOLL_TMPDIR else
+#    TMPDIR else /tmp)
+#  - Empty: stay where started
+#  - Absolute path value: go there.</descr></var>
+idxrundir = tmp
+
+# <var name="checkneedretryindexscript" type="fn">
+#
+# <brief>Script used to heuristically check if we need to retry indexing
+# files which previously failed. </brief> <descr>The default script checks
+# the modified dates on /usr/bin and /usr/local/bin. A relative path will
+# be looked up in the filters dirs, then in the path. Use an absolute path
+# to do otherwise.</descr>
+checkneedretryindexscript = rclcheckneedretry.sh
+
+# <var name="recollhelperpath" type="string">
+#
+# <brief>Additional places to search for helper executables.</brief>
+# <descr>This is only used on Windows for now.</descr></var>
+#recollhelperpath = c:/someprog/bin;c:/someotherprog/bin
+
+# <var name="idxabsmlen" type="int">
+#
+# <brief>Length of abstracts we store while indexing.</brief>
+# <descr>Recoll stores an abstract for each indexed file.
+# The text can come from an actual 'abstract' section in the
+# document or will just be the beginning of the document. It is stored in
+# the index so that it can be displayed inside the result lists without
+# decoding the original file. The idxabsmlen parameter
+# defines the size of the stored abstract. The default value is 250
+# bytes. The search interface gives you the choice to display this stored
+# text or a synthetic abstract built by extracting text around the search
+# terms. If you always prefer the synthetic abstract, you can reduce this
+# value and save a little space.</descr></var>
+#idxabsmlen = 250
+
+# <var name="idxmetastoredlen" type="int">
+#
+# <brief>Truncation length of stored metadata fields.</brief><descr>This
+# does not affect indexing (the whole field is processed anyway), just the
+# amount of data stored in the index for the purpose of displaying fields
+# inside result lists or previews. The default value is 150 bytes which
+# may be too low if you have custom fields.</descr></var> 
+#idxmetastoredlen = 150
+
+# <var name="aspellLanguage" type="string">
+#
+# <brief>Language definitions to use when creating the aspell
+# dictionary.</brief><descr>The value must match a set of aspell language
+# definition files. You can type "aspell dicts"  to see a list The default
+# if this is not set is to use the NLS environment to guess the
+# value.</descr></var>
+#aspellLanguage = en
+
+# <var name="aspellAddCreateParam" type="string">
+#
+# <brief>Additional parameter to aspell dictionary creation
+# command.</brief><descr>Some aspell packages may need an additional option
+# (e.g. on Debian Jessie). See Debian bug 772415.</descr></var>
+#aspellAddCreateParam = --local-data-dir=/usr/lib/aspell
+
+# <var name="aspellKeepStderr" type="bool">
+#
+# <brief>Set this to have a look at aspell dictionary creation
+# errors.</brief><descr>There are always many, so this is mostly for 
+# debugging.</descr></var>
+#aspellKeepStderr = 1
+
+# <var name="noaspell" type="bool">
+#
+# <brief>Disable aspell use.</brief><descr>The aspell dictionary generation
+# takes time, and some combinations of aspell version, language, and local
+# terms, result in aspell crashing, so it sometimes makes sense to just
+# disable the thing.</descr></var>
+#noaspell = 1
+
+# <var name="monixinterval" type="int">
+#
+# <brief>Seconds between auxiliary databases updates (stemdb,
+# aspell).</brief><descr>The default is one hour.</descr></var>
+#monauxinterval = 3600
+
+# <var name="monixinterval" type="int">
+# 
+# <brief>Minimum interval (seconds) between processings of the indexing
+# queue.</brief> <descr>The real time monitor does not process each event
+# when it comes in, but lets the queue accumulate, to diminish overhead and
+# to aggregate multiple events to the same file. Default 30 S.</descr></var>
+#monixinterval = 30
+
+# <var name="mondelaypatterns" type="string">
+#
+# <brief>Timing parameters for the real time indexing.</brief>
+# <descr>Definitions for files which get a longer delay before reindexing
+# is allowed. This is for fast-changing files, that should only be
+# reindexed once in a while. A list of wildcardPattern:seconds pairs. The
+# patterns are matched with fnmatch(pattern, path, 0) You can quote entries
+# containing white space with double quotes (quote the whole entry, not the
+# pattern). The default is empty.  Example:mondelaypatterns = *.log:20
+# "*with spaces.*:30"</descr></brief>
+#mondelaypatterns = *.log:20  "*with spaces.*:30"
+
+# <var name="monioniceclass" type="int">
+#
+# <brief>ionice class for the real time indexing process</brief>
+# <descr>On platforms where this is supported, the default value is
+# 3.</descr></var> 
+# monioniceclass = 3
+
+# <var name="monioniceclassdata" type="string">
+#
+# <brief>ionice class parameter for the real time indexing process.</brief>
+# <descr>On platforms where this is supported. The default is
+# empty.</descr></var>
+#monioniceclassdata = 
+
+
+
+# <grouptitle>Query-time parameters (no impact on the index)</grouptitle>
+
+# <var name="autodiacsens" type="bool">
+#
+# <brief>auto-trigger diacritics sensitivity (raw index only)</brief>
+# <descr>IF the index is not stripped, decide if we automatically trigger
+# diacritics sensitivity if the search term has accented characters (not in
+# unac_except_trans). Else you need to use the query language and the "D"
+# modifier to specify diacritics sensitivity. Default is no.</descr></var>
+autodiacsens = 0
+
+# <var name="autocasesens" type="bool">
+#
+# <brief>auto-trigger case sensitivity (raw index only)</brief> <descr>IF
+# the index is not stripped (see indexStripChars), decide if we
+# automatically trigger character case sensitivity if the search term has
+# upper-case characters in any but the first position. Else you need to use
+# the query language and the "C" modifier to specify character-case
+# sensitivity. Default is yes.</descr></var>
+autocasesens = 1
+
+# <var name="maxTermExpand" type="int"><brief>Maximum query expansion count
+# for a single term (e.g.: when using wildcards).</brief><descr>This only
+# affects queries, not indexing. We used to not limit this at all (except
+# for filenames where the limit was too low at 1000), but it is
+# unreasonable with a big index. Default 10000.</descr></var>
+maxTermExpand = 10000
+
+# <var name="maxXapianClauses" type="int"><brief>Maximum number of clauses
+# we add to a single Xapian query.</brief><descr>This only affects queries,
+# not indexing. In some cases, the result of term expansion can be
+# multiplicative, and we want to avoid eating all the memory. Default
+# 50000.</descr></var>
+maxXapianClauses = 50000
+
+# <var name="snippetMaxPosWalk" type="int">
+#
+# <brief>Maximum number of positions we walk while populating a snippet for the
+# result list.</brief><descr>The default of 1,000,000 may be insufficient
+# for big documents, the consequence would be snippets with possibly
+# meaning-altering missing words.</descr></var>
+snippetMaxPosWalk = 1000000
+
+
+# <grouptitle>Parameters for the PDF input script</grouptitle>
+
+# <var name="pdfocr" type="bool">
+#
+# <brief>Attempt OCR of PDF files with no text content if both tesseract and
+# pdftoppm are installed.</brief><descr>The default is off because OCR is so
+# very slow.</descr></var>
+#pdfocr = 0
+
+# <var name="pdfattach" type="bool">
+#
+# <brief>Enable PDF attachment extraction by executing pdftk (if
+# available).</brief><descr>This is
+# normally disabled, because it does slow down PDF indexing a bit even if
+# not one attachment is ever found.</descr></var>
+#pdfattach = 0
+
+
+# <grouptitle>Parameters set for specific locations</grouptitle>
+
+# You could specify different parameters for a subdirectory like this:
+#[~/hungariandocs/plain]
+#defaultcharset = iso-8859-2
+
 [/usr/share/man]
 followLinks = 1
 
-# Enable thunderbird mbox format quirks where appropriate, and same for
-# mozilla/seamonkey
+# <var name="mhmboxquirks" type="string">
+#
+# <brief>Enable thunderbird/mozilla-seamonkey mbox format quirks</brief>
+# <descr>Set this for the directory where the email mbox files are
+# stored.</descr></var>
 [~/.thunderbird]
 mhmboxquirks = tbird
 [~/.mozilla]