move the execution of external metadata-gathering commands from fsindexer to internfile for consistency of handling with filter-generated metadata

This commit is contained in:
medoc 2013-09-06 11:51:00 +02:00
parent b2a485b95a
commit 70179d5189
7 changed files with 108 additions and 96 deletions

View file

@ -67,7 +67,7 @@ bool ParamStale::needrecompute()
{
LOGDEB2(("ParamStale:: needrecompute. parent gen %d mine %d\n",
parent->m_keydirgen, savedkeydirgen));
if (parent->m_keydirgen != savedkeydirgen) {
if (active && parent->m_keydirgen != savedkeydirgen) {
LOGDEB2(("ParamState:: needrecompute. conffile %p\n", conffile));
savedkeydirgen = parent->m_keydirgen;
@ -90,6 +90,9 @@ void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
parent = rconf;
conffile = cnf;
paramname = nm;
active = false;
if (conffile)
active = conffile->hasNameAnywhere(nm);
savedkeydirgen = -1;
}
@ -107,6 +110,7 @@ void RclConfig::zeroMe() {
m_stpsuffstate.init(this, 0, "recoll_noindex");
m_skpnstate.init(this, 0, "skippedNames");
m_rmtstate.init(this, 0, "indexedmimetypes");
m_mdrstate.init(this, 0, "metadatacmds");
}
bool RclConfig::isDefaultConfig() const
@ -246,6 +250,7 @@ RclConfig::RclConfig(const string *argcnf)
m_stpsuffstate.init(this, mimemap, "recoll_noindex");
m_skpnstate.init(this, m_conf, "skippedNames");
m_rmtstate.init(this, m_conf, "indexedmimetypes");
m_mdrstate.init(this, m_conf, "metadatacmds");
return;
}
@ -262,13 +267,14 @@ bool RclConfig::updateMainConfig()
m_ok = false;
m_skpnstate.init(this, 0, "skippedNames");
m_rmtstate.init(this, 0, "indexedmimetypes");
m_mdrstate.init(this, 0, "metadatacmds");
return false;
}
delete m_conf;
m_conf = newconf;
m_skpnstate.init(this, m_conf, "skippedNames");
m_rmtstate.init(this, m_conf, "indexedmimetypes");
m_mdrstate.init(this, m_conf, "metadatacmds");
setKeyDir(cstr_null);
bool nocjk = false;
@ -666,6 +672,32 @@ string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes)
return hs;
}
const vector<MDReaper>& RclConfig::getMDReapers()
{
string hs;
if (m_mdrstate.needrecompute()) {
m_mdreapers.clear();
// New value now stored in m_mdrstate.savedvalue
string& sreapers = m_mdrstate.savedvalue;
if (sreapers.empty())
return m_mdreapers;
string value;
ConfSimple attrs;
valueSplitAttributes(sreapers, value, attrs);
vector<string> nmlst = attrs.getNames(cstr_null);
for (vector<string>::const_iterator it = nmlst.begin();
it != nmlst.end(); it++) {
MDReaper reaper;
reaper.fieldname = fieldCanon(*it);
string s;
attrs.get(*it, s);
stringToStrings(s, reaper.cmdv);
m_mdreapers.push_back(reaper);
}
}
return m_mdreapers;
}
bool RclConfig::getGuiFilterNames(vector<string>& cats) const
{
if (!mimeconf)
@ -1346,6 +1378,7 @@ void RclConfig::initFrom(const RclConfig& r)
m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname);
m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname);
m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname);
m_mdrstate.init(this, m_conf, r.m_mdrstate.paramname);
m_thrConf = r.m_thrConf;
}

View file

@ -44,6 +44,7 @@ public:
RclConfig *parent;
ConfNull *conffile;
string paramname;
bool active; // Check at init if config defines name at all
int savedkeydirgen;
string savedvalue;
@ -51,6 +52,12 @@ public:
bool needrecompute();
};
// Hold the description for an external metadata-gathering command
struct MDReaper {
string fieldname;
vector<string> cmdv;
};
// Data associated to a indexed field name:
struct FieldTraits {
string pfx; // indexing prefix,
@ -244,6 +251,9 @@ class RclConfig {
* exceptions are found in the nouncompforviewmts mimeview list */
bool mimeViewerNeedsUncomp(const string &mimetype) const;
/** Retrieve extra metadata-gathering commands */
const vector<MDReaper>& getMDReapers();
/** Store/retrieve missing helpers description string */
bool getMissingHelperDesc(string&) const;
void storeMissingHelperDesc(const string &s);
@ -319,6 +329,11 @@ class RclConfig {
set<string> m_restrictMTypes;
vector<pair<int, int> > m_thrConf;
// Same idea with the metadata-gathering external commands,
// (e.g. used to reap tagging info: "tmsu tags %f")
ParamStale m_mdrstate;
vector<MDReaper> m_mdreapers;
/** Create initial user configuration */
bool initUserConfig();
/** Copy from other */

View file

@ -75,13 +75,12 @@ extern void *FsIndexerDbUpdWorker(void*);
class InternfileTask {
public:
InternfileTask(const std::string &f, const struct stat *i_stp,
map<string,string> lfields, vector<FsIndexer::MDReaper> reapers)
: fn(f), statbuf(*i_stp), localfields(lfields), mdreapers(reapers)
map<string,string> lfields)
: fn(f), statbuf(*i_stp), localfields(lfields)
{}
string fn;
struct stat statbuf;
map<string,string> localfields;
vector<FsIndexer::MDReaper> mdreapers;
};
extern void *FsIndexerInternfileWorker(void*);
#endif // IDX_THREADS
@ -113,7 +112,6 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
{
LOGDEB1(("FsIndexer::FsIndexer\n"));
m_havelocalfields = m_config->hasNameAnywhere("localfields");
m_havemdreapers = m_config->hasNameAnywhere("metadatacmds");
#ifdef IDX_THREADS
m_stableconfig = new RclConfig(*m_config);
@ -326,8 +324,6 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
m_config->setKeyDir(path_getfather(*it));
if (m_havelocalfields)
localfieldsfromconf();
if (m_havemdreapers)
mdreapersfromconf();
bool follow = false;
m_config->getConfParam("followLinks", &follow);
@ -465,58 +461,6 @@ void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
}
}
// Metadata gathering commands
void FsIndexer::mdreapersfromconf()
{
LOGDEB1(("FsIndexer::mdreapersfromconf\n"));
string sreapers;
m_config->getConfParam("metadatacmds", sreapers);
if (!sreapers.compare(m_smdreapers))
return;
m_smdreapers = sreapers;
m_mdreapers.clear();
if (sreapers.empty())
return;
string value;
ConfSimple attrs;
m_config->valueSplitAttributes(sreapers, value, attrs);
vector<string> nmlst = attrs.getNames(cstr_null);
for (vector<string>::const_iterator it = nmlst.begin();
it != nmlst.end(); it++) {
MDReaper reaper;
reaper.fieldname = m_config->fieldCanon(*it);
string s;
attrs.get(*it, s);
stringToStrings(s, reaper.cmdv);
m_mdreapers.push_back(reaper);
}
}
void FsIndexer::reapmetadata(const vector<MDReaper>& reapers, const string& fn,
Rcl::Doc& doc)
{
map<char,string> smap = create_map<char, string>('f', fn);
for (vector<MDReaper>::const_iterator rp = reapers.begin();
rp != reapers.end(); rp++) {
vector<string> cmd;
for (vector<string>::const_iterator it = rp->cmdv.begin();
it != rp->cmdv.end(); it++) {
string s;
pcSubst(*it, s, smap);
cmd.push_back(s);
}
string output;
if (ExecCmd::backtick(cmd, output)) {
// addmeta() creates or appends. fieldname is already
// canonic (see above)
doc.addmeta(rp->fieldname, output);
}
}
}
void FsIndexer::makesig(const struct stat *stp, string& out)
{
char cbuf[100];
@ -569,7 +513,7 @@ void *FsIndexerInternfileWorker(void * fsp)
}
LOGDEB0(("FsIndexerInternfileWorker: task fn %s\n", tsk->fn.c_str()));
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
tsk->localfields, tsk->mdreapers) !=
tsk->localfields) !=
FsTreeWalker::FtwOk) {
LOGERR(("FsIndexerInternfileWorker: processone failed\n"));
tqp->workerExit();
@ -617,8 +561,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
// Adjust local fields from config for this subtree
if (m_havelocalfields)
localfieldsfromconf();
if (m_havemdreapers)
mdreapersfromconf();
if (flg == FsTreeWalker::FtwDirReturn)
return FsTreeWalker::FtwOk;
@ -626,8 +568,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
#ifdef IDX_THREADS
if (m_haveInternQ) {
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields,
m_mdreapers);
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
if (m_iwqueue.put(tp)) {
return FsTreeWalker::FtwOk;
} else {
@ -636,7 +577,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
}
#endif
return processonefile(m_config, fn, stp, m_localfields, m_mdreapers);
return processonefile(m_config, fn, stp, m_localfields);
}
// File name transcoded to utf8 for indexing. If this fails, the file
@ -664,8 +605,7 @@ static string compute_utf8fn(RclConfig *config, const string& fn)
FsTreeWalker::Status
FsIndexer::processonefile(RclConfig *config,
const std::string &fn, const struct stat *stp,
const map<string, string>& localfields,
const vector<MDReaper>& mdreapers)
const map<string, string>& localfields)
{
////////////////////
// Check db up to date ? Doing this before file type
@ -742,8 +682,6 @@ FsIndexer::processonefile(RclConfig *config,
// for the main file.
if (doc.ipath.empty()) {
hadNullIpath = true;
if (m_havemdreapers)
reapmetadata(mdreapers, fn, doc);
if (hadNonNullIpath) {
// Note that only the filters can reliably compute
// this. What we do is dependant of the doc order (if
@ -843,8 +781,6 @@ FsIndexer::processonefile(RclConfig *config,
fileDoc.url = cstr_fileu + fn;
if (m_havelocalfields)
setlocalfields(localfields, fileDoc);
if (m_havemdreapers)
reapmetadata(mdreapers, fn, fileDoc);
char cbuf[100];
sprintf(cbuf, OFFTPC, stp->st_size);
fileDoc.pcbytes = cbuf;

View file

@ -76,12 +76,6 @@ class FsIndexer : public FsTreeWalkerCB {
/** Make signature for file up to date checks */
static void makesig(const struct stat *stp, string& out);
/* Hold the description for an external metadata-gathering command */
struct MDReaper {
string fieldname;
vector<string> cmdv;
};
private:
class PurgeCandidateRecorder {
@ -131,16 +125,13 @@ class FsIndexer : public FsTreeWalkerCB {
// all files in a file system area. Ie: set "rclaptg = thunderbird"
// inside ~/.thunderbird. The boolean is set at init to avoid
// further wasteful processing if no local fields are set.
// This should probably moved to internfile so that the
// localfields get exactly the same processing as those generated by the
// filters (as was done for metadatacmds fields)
bool m_havelocalfields;
string m_slocalfields;
map<string, string> m_localfields;
// Same idea with the metadata-gathering external commands,
// (e.g. used to reap tagging info: "tmsu tags %f")
bool m_havemdreapers;
string m_smdreapers;
vector<MDReaper> m_mdreapers;
#ifdef IDX_THREADS
friend void *FsIndexerDbUpdWorker(void*);
friend void *FsIndexerInternfileWorker(void*);
@ -154,15 +145,11 @@ class FsIndexer : public FsTreeWalkerCB {
bool init();
void localfieldsfromconf();
void mdreapersfromconf();
void setlocalfields(const map<string, string>& flds, Rcl::Doc& doc);
void reapmetadata(const vector<MDReaper>& reapers, const string &fn,
Rcl::Doc& doc);
string getDbDir() {return m_config->getDbDir();}
FsTreeWalker::Status
processonefile(RclConfig *config, const string &fn,
const struct stat *, const map<string,string>& localfields,
const vector<MDReaper>& mdreapers);
const struct stat *, const map<string,string>& localfields);
};
#endif /* _fsindexer_h_included_ */

View file

@ -118,6 +118,28 @@ void FileInterner::reapXAttrs(const string& path)
}
#endif // RCL_USE_XATTR
void FileInterner::reapCmdMetadata(const string& fn)
{
const vector<MDReaper>& reapers = m_cfg->getMDReapers();
if (reapers.empty())
return;
map<char,string> smap = create_map<char, string>('f', fn);
for (vector<MDReaper>::const_iterator rp = reapers.begin();
rp != reapers.end(); rp++) {
vector<string> cmd;
for (vector<string>::const_iterator it = rp->cmdv.begin();
it != rp->cmdv.end(); it++) {
string s;
pcSubst(*it, s, smap);
cmd.push_back(s);
}
string output;
if (ExecCmd::backtick(cmd, output)) {
m_cmdFields[rp->fieldname] = output;
}
}
}
// This is used when the user wants to retrieve a search result doc's parent
// (ie message having a given attachment)
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
@ -279,6 +301,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
// file
reapXAttrs(f);
#endif //RCL_USE_XATTR
reapCmdMetadata(f);
df->set_docsize(docsize);
if (!df->set_document_file(l_mime, m_fn)) {
@ -625,6 +648,21 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
}
#endif //RCL_USE_XATTR
// Set fields from external commands
// These override those from xattrs and can be later augmented by
// values from inside the file
for (map<string,string>::const_iterator it = m_cmdFields.begin();
it != m_cmdFields.end(); it++) {
string fieldname = m_cfg->fieldCanon(it->first);
LOGDEB0(("Internfile:: setting [%s] from cmd value [%s]\n",
fieldname.c_str(), it->second.c_str()));
if (fieldname == cstr_dj_keymd) {
doc.dmtime = it->second;
} else {
doc.meta[fieldname] = it->second;
}
}
// If there is no ipath stack, the mimetype is the one from the file
doc.mimetype = m_mimetype;

View file

@ -259,6 +259,8 @@ class FileInterner {
// processing the internal doc hierarchy.
map<string, string> m_XAttrsFields;
#endif // RCL_USE_XATTR
// Fields gathered by executing configured external commands
map<string, string> m_cmdFields;
// Filter stack, path to the current document from which we're
// fetching subdocs
@ -289,6 +291,7 @@ class FileInterner {
#ifdef RCL_USE_XATTR
void reapXAttrs(const string& fn);
#endif
void reapCmdMetadata(const string& fn);
};

View file

@ -4,7 +4,7 @@ include $(depth)/mk/sysconf
LIBRECOLL=librecoll.so.$(RCLLIBVERSION)
LIBS = librecoll.a
#LIBS = librecoll.a $(LIBRECOLL)
LIBS = librecoll.a $(LIBRECOLL)
all: $(LIBS)