Delete RCL_USE_XATTR configure/compile time variable, it was not

useful. Add configuration variable to use mtime instead of ctime for update
detection. Useful on a system where xattrs would be modified but not
indexed, to avoid excessive reindexing.
This commit is contained in:
Jean-Francois Dockes 2014-12-09 11:15:17 +01:00
parent f321a1582b
commit dc7ec0ac9f
11 changed files with 56 additions and 44 deletions

View file

@ -93,9 +93,6 @@
/* Compile the inotify interface */
#undef RCL_USE_INOTIFY
/* Use file extended attributes */
#undef RCL_USE_XATTR
/* Use multiple threads for indexing */
#undef IDX_THREADS

View file

@ -177,19 +177,6 @@ if test X$withFam != Xno ; then
fi
fi
# Disable use of file extended attributes. With xattrs disabled we can use
# mtime instead of ctime to look for file mods. Default is enabled.
AC_ARG_ENABLE(xattr,
AC_HELP_STRING([--disable-xattr],
[Enable fetching metadata from file extended attributes. This is only
useful if some application creates them on (part of) your data set. You also
need to set up appropriate mappings in the configuration.]),
xattrEnabled=$enableval, xattrEnabled=yes)
if test X$xattrEnabled = Xyes ; then
AC_DEFINE(RCL_USE_XATTR, 1, [Use file extended attributes])
fi
# Enable use of threads in the indexing pipeline.
# This is disabled by default as we usually care little about indexing
# absolute performance (more about impact on usability and total

View file

@ -5394,6 +5394,32 @@ unac_except_trans = åå Åå ää Ää öö Öö
</listitem>
</varlistentry>
<varlistentry><term><varname>testmodifusemtime</varname></term>
<listitem><para>If true, use mtime instead of default ctime to
determine if a file has been modified (in addition to
size, which is always used). Setting this can reduce
re-indexing on systems where extended attributes are
modified (by some other application), but not indexed
(changing extended attributes only affects
ctime). Notes:
<itemizedlist>
<listitem><para>This may prevent detection of change
in some marginal file rename cases (the target would
need to have the same size and
mtime).</para></listitem>
<listitem><para>You should probably also set
noxattrfields to 1 in this case, except if you still
prefer to perform xattr indexing, for example if the
local file update pattern makes it of value (as in
general, there is a risk for pure extended attributes
updates without file modification to go
undetected).</para></listitem>
</itemizedlist>
Perform a full index reset after changing the value of
this parameter.
</para></listitem>
</varlistentry>
<varlistentry><term><varname>noxattrfields</varname></term>
<listitem><para>Recoll versions 1.19 and later
automatically translate file extended attributes into

View file

@ -47,16 +47,7 @@
#include "execmd.h"
#include "extrameta.h"
// When using extended attributes, we have to use the ctime, because
// this is all that gets set when the attributes are modified.
// As of 1.19 we use ctime in all cases as this allows to detect a
// file renamed into an existing file (e.g. when shifting logs or
// other archives).
#ifdef RCL_USE_XATTR
#define RCL_STTIME st_ctime
#else
#define RCL_STTIME st_ctime
#endif // RCL_USE_XATTR
int FsIndexer::o_tstupdusemtime = -1;
using namespace std;
@ -119,6 +110,12 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
LOGDEB1(("FsIndexer::FsIndexer\n"));
m_havelocalfields = m_config->hasNameAnywhere("localfields");
m_config->getConfParam("detectxattronly", &m_detectxattronly);
if (o_tstupdusemtime == -1) {
bool b(false);
m_config->getConfParam("testmodifusemtime", &b);
o_tstupdusemtime = b ? 1 : 0;
}
#ifdef IDX_THREADS
m_stableconfig = new RclConfig(*m_config);
@ -499,7 +496,8 @@ void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
void FsIndexer::makesig(const struct stat *stp, string& out)
{
char cbuf[100];
sprintf(cbuf, "%lld" "%ld", (long long)stp->st_size, (long)stp->RCL_STTIME);
sprintf(cbuf, "%lld" "%ld", (long long)stp->st_size,
o_tstupdusemtime ? (long)stp->st_mtime : (long)stp->st_ctime);
out = cbuf;
}

View file

@ -135,6 +135,10 @@ class FsIndexer : public FsTreeWalkerCB {
// Activate detection of xattr-only document updates. Experimental, so
// needs a config option
bool m_detectxattronly;
// Use mtime instead of ctime for up-to-date tests. This is mostly
// incompatible with xattr indexing, in addition to other
// issues. See recoll.conf comments.
static int o_tstupdusemtime;
#ifdef IDX_THREADS
friend void *FsIndexerDbUpdWorker(void*);

View file

@ -601,12 +601,10 @@ bool RclIntf::addWatch(const string& path, bool)
// CLOSE_WRITE is covered through MODIFY. CREATE is needed for mkdirs
uint32_t mask = IN_MODIFY | IN_CREATE
| IN_MOVED_FROM | IN_MOVED_TO | IN_DELETE
#ifdef RCL_USE_XATTR
// IN_ATTRIB used to be not needed to receive extattr
// modification events, which was a bit weird because only ctime is
// set, and now it is...
| IN_ATTRIB
#endif
#ifdef IN_DONT_FOLLOW
| IN_DONT_FOLLOW
#endif

View file

@ -17,8 +17,6 @@
#include "autoconfig.h"
#ifdef RCL_USE_XATTR
#include <errno.h>
#include "rclconfig.h"
@ -145,5 +143,3 @@ void docFieldsFromMetaCmds(RclConfig *cfg, const map<string, string>& cfields,
}
}
}
#endif // RCL_USE_XATTR

View file

@ -19,7 +19,6 @@
#include "autoconfig.h"
#ifdef RCL_USE_XATTR
/** Extended attributes processing helper functions */
#include <map>
@ -47,6 +46,4 @@ extern void docFieldsFromMetaCmds(
RclConfig *cfg, const std::map<std::string, std::string>& xfields,
Rcl::Doc& doc);
#endif /* RCL_USE_XATTR */
#endif /* _REAPXATTRS_H_INCLUDED_ */

View file

@ -228,13 +228,11 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
m_forPreview ? "view" : "index");
df->set_property(Dijon::Filter::DJF_UDI, udi);
#ifdef RCL_USE_XATTR
// Get fields computed from extended attributes. We use the
// original file, not the m_fn which may be the uncompressed temp
// file
if (!m_noxattrs)
reapXAttrs(m_cfg, f, m_XAttrsFields);
#endif //RCL_USE_XATTR
// Gather metadata from external commands as configured.
reapMetaCmds(m_cfg, f, m_cmdFields);
@ -574,11 +572,9 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
LOGDEB2(("FileInterner::collectIpathAndMT\n"));
bool hasipath = false;
#ifdef RCL_USE_XATTR
if (!m_noxattrs) {
docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc);
}
#endif //RCL_USE_XATTR
docFieldsFromMetaCmds(m_cfg, m_cmdFields, doc);

View file

@ -16,7 +16,6 @@
*/
#ifndef _INTERNFILE_H_INCLUDED_
#define _INTERNFILE_H_INCLUDED_
// The class changes according to RCL_USE_XATTR
#include "autoconfig.h"
#include <string>
@ -252,13 +251,11 @@ class FileInterner {
string m_reachedMType; // target or text/plain
string m_tfile;
bool m_ok; // Set after construction if ok
#ifdef RCL_USE_XATTR
// Fields found in file extended attributes. This is kept here,
// not in the file-level handler because we are only interested in
// the top-level file, not any temp file necessitated by
// processing the internal doc hierarchy.
map<string, string> m_XAttrsFields;
#endif // RCL_USE_XATTR
// Fields gathered by executing configured external commands
map<string, string> m_cmdFields;

View file

@ -311,7 +311,23 @@ webcachemaxmbs = 40
# meaning-altering missing words.
snippetMaxPosWalk = 1000000
# Disable extended attributes conversion to metadata fields
# Use mtime instead of default ctime to determine if a file has been
# modified (in addition to size, which is always used).
# Setting this can reduce re-indexing on systems where extended attributes
# are used (by some other applications), but not indexed (changing
# ext. attrs. only affects ctime).
# Notes:
# - this may prevent detection of change in some marginal file rename cases
# (the target would need to have the same size and mtime).
# - You should probably also set noxattrfields to 1 in this case, except if
# you still prefer to perform xattr indexing, for example if the local
# file update pattern makes it of value (as in general, there is a risk
# for pure extended attributes updates without file modification to go
# undetected). Perform a full index reset after changing this.
testmodifusemtime = 0
# Disable extended attributes conversion to metadata fields. This probably
# needs to be set if testmodifusemtime is set.
noxattrfields = 0
# You could specify different parameters for a subdirectory like this: