move the execution of external metadata-gathering commands from fsindexer to internfile for consistency of handling with filter-generated metadata
This commit is contained in:
parent
b2a485b95a
commit
70179d5189
7 changed files with 108 additions and 96 deletions
|
@ -67,7 +67,7 @@ bool ParamStale::needrecompute()
|
|||
{
|
||||
LOGDEB2(("ParamStale:: needrecompute. parent gen %d mine %d\n",
|
||||
parent->m_keydirgen, savedkeydirgen));
|
||||
if (parent->m_keydirgen != savedkeydirgen) {
|
||||
if (active && parent->m_keydirgen != savedkeydirgen) {
|
||||
LOGDEB2(("ParamState:: needrecompute. conffile %p\n", conffile));
|
||||
|
||||
savedkeydirgen = parent->m_keydirgen;
|
||||
|
@ -90,6 +90,9 @@ void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
|
|||
parent = rconf;
|
||||
conffile = cnf;
|
||||
paramname = nm;
|
||||
active = false;
|
||||
if (conffile)
|
||||
active = conffile->hasNameAnywhere(nm);
|
||||
savedkeydirgen = -1;
|
||||
}
|
||||
|
||||
|
@ -107,6 +110,7 @@ void RclConfig::zeroMe() {
|
|||
m_stpsuffstate.init(this, 0, "recoll_noindex");
|
||||
m_skpnstate.init(this, 0, "skippedNames");
|
||||
m_rmtstate.init(this, 0, "indexedmimetypes");
|
||||
m_mdrstate.init(this, 0, "metadatacmds");
|
||||
}
|
||||
|
||||
bool RclConfig::isDefaultConfig() const
|
||||
|
@ -246,6 +250,7 @@ RclConfig::RclConfig(const string *argcnf)
|
|||
m_stpsuffstate.init(this, mimemap, "recoll_noindex");
|
||||
m_skpnstate.init(this, m_conf, "skippedNames");
|
||||
m_rmtstate.init(this, m_conf, "indexedmimetypes");
|
||||
m_mdrstate.init(this, m_conf, "metadatacmds");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -262,13 +267,14 @@ bool RclConfig::updateMainConfig()
|
|||
m_ok = false;
|
||||
m_skpnstate.init(this, 0, "skippedNames");
|
||||
m_rmtstate.init(this, 0, "indexedmimetypes");
|
||||
m_mdrstate.init(this, 0, "metadatacmds");
|
||||
return false;
|
||||
}
|
||||
delete m_conf;
|
||||
m_conf = newconf;
|
||||
m_skpnstate.init(this, m_conf, "skippedNames");
|
||||
m_rmtstate.init(this, m_conf, "indexedmimetypes");
|
||||
|
||||
m_mdrstate.init(this, m_conf, "metadatacmds");
|
||||
|
||||
setKeyDir(cstr_null);
|
||||
bool nocjk = false;
|
||||
|
@ -666,6 +672,32 @@ string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes)
|
|||
return hs;
|
||||
}
|
||||
|
||||
const vector<MDReaper>& RclConfig::getMDReapers()
|
||||
{
|
||||
string hs;
|
||||
if (m_mdrstate.needrecompute()) {
|
||||
m_mdreapers.clear();
|
||||
// New value now stored in m_mdrstate.savedvalue
|
||||
string& sreapers = m_mdrstate.savedvalue;
|
||||
if (sreapers.empty())
|
||||
return m_mdreapers;
|
||||
string value;
|
||||
ConfSimple attrs;
|
||||
valueSplitAttributes(sreapers, value, attrs);
|
||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||
for (vector<string>::const_iterator it = nmlst.begin();
|
||||
it != nmlst.end(); it++) {
|
||||
MDReaper reaper;
|
||||
reaper.fieldname = fieldCanon(*it);
|
||||
string s;
|
||||
attrs.get(*it, s);
|
||||
stringToStrings(s, reaper.cmdv);
|
||||
m_mdreapers.push_back(reaper);
|
||||
}
|
||||
}
|
||||
return m_mdreapers;
|
||||
}
|
||||
|
||||
bool RclConfig::getGuiFilterNames(vector<string>& cats) const
|
||||
{
|
||||
if (!mimeconf)
|
||||
|
@ -1346,6 +1378,7 @@ void RclConfig::initFrom(const RclConfig& r)
|
|||
m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname);
|
||||
m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname);
|
||||
m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname);
|
||||
m_mdrstate.init(this, m_conf, r.m_mdrstate.paramname);
|
||||
m_thrConf = r.m_thrConf;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ public:
|
|||
RclConfig *parent;
|
||||
ConfNull *conffile;
|
||||
string paramname;
|
||||
bool active; // Check at init if config defines name at all
|
||||
int savedkeydirgen;
|
||||
string savedvalue;
|
||||
|
||||
|
@ -51,6 +52,12 @@ public:
|
|||
bool needrecompute();
|
||||
};
|
||||
|
||||
// Hold the description for an external metadata-gathering command
|
||||
struct MDReaper {
|
||||
string fieldname;
|
||||
vector<string> cmdv;
|
||||
};
|
||||
|
||||
// Data associated to a indexed field name:
|
||||
struct FieldTraits {
|
||||
string pfx; // indexing prefix,
|
||||
|
@ -244,6 +251,9 @@ class RclConfig {
|
|||
* exceptions are found in the nouncompforviewmts mimeview list */
|
||||
bool mimeViewerNeedsUncomp(const string &mimetype) const;
|
||||
|
||||
/** Retrieve extra metadata-gathering commands */
|
||||
const vector<MDReaper>& getMDReapers();
|
||||
|
||||
/** Store/retrieve missing helpers description string */
|
||||
bool getMissingHelperDesc(string&) const;
|
||||
void storeMissingHelperDesc(const string &s);
|
||||
|
@ -319,6 +329,11 @@ class RclConfig {
|
|||
set<string> m_restrictMTypes;
|
||||
vector<pair<int, int> > m_thrConf;
|
||||
|
||||
// Same idea with the metadata-gathering external commands,
|
||||
// (e.g. used to reap tagging info: "tmsu tags %f")
|
||||
ParamStale m_mdrstate;
|
||||
vector<MDReaper> m_mdreapers;
|
||||
|
||||
/** Create initial user configuration */
|
||||
bool initUserConfig();
|
||||
/** Copy from other */
|
||||
|
|
|
@ -75,13 +75,12 @@ extern void *FsIndexerDbUpdWorker(void*);
|
|||
class InternfileTask {
|
||||
public:
|
||||
InternfileTask(const std::string &f, const struct stat *i_stp,
|
||||
map<string,string> lfields, vector<FsIndexer::MDReaper> reapers)
|
||||
: fn(f), statbuf(*i_stp), localfields(lfields), mdreapers(reapers)
|
||||
map<string,string> lfields)
|
||||
: fn(f), statbuf(*i_stp), localfields(lfields)
|
||||
{}
|
||||
string fn;
|
||||
struct stat statbuf;
|
||||
map<string,string> localfields;
|
||||
vector<FsIndexer::MDReaper> mdreapers;
|
||||
};
|
||||
extern void *FsIndexerInternfileWorker(void*);
|
||||
#endif // IDX_THREADS
|
||||
|
@ -113,7 +112,6 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
|||
{
|
||||
LOGDEB1(("FsIndexer::FsIndexer\n"));
|
||||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||
m_havemdreapers = m_config->hasNameAnywhere("metadatacmds");
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
m_stableconfig = new RclConfig(*m_config);
|
||||
|
@ -326,8 +324,6 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
|
|||
m_config->setKeyDir(path_getfather(*it));
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
if (m_havemdreapers)
|
||||
mdreapersfromconf();
|
||||
|
||||
bool follow = false;
|
||||
m_config->getConfParam("followLinks", &follow);
|
||||
|
@ -465,58 +461,6 @@ void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
|
|||
}
|
||||
}
|
||||
|
||||
// Metadata gathering commands
|
||||
void FsIndexer::mdreapersfromconf()
|
||||
{
|
||||
LOGDEB1(("FsIndexer::mdreapersfromconf\n"));
|
||||
|
||||
string sreapers;
|
||||
m_config->getConfParam("metadatacmds", sreapers);
|
||||
if (!sreapers.compare(m_smdreapers))
|
||||
return;
|
||||
|
||||
m_smdreapers = sreapers;
|
||||
m_mdreapers.clear();
|
||||
if (sreapers.empty())
|
||||
return;
|
||||
|
||||
string value;
|
||||
ConfSimple attrs;
|
||||
m_config->valueSplitAttributes(sreapers, value, attrs);
|
||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||
for (vector<string>::const_iterator it = nmlst.begin();
|
||||
it != nmlst.end(); it++) {
|
||||
MDReaper reaper;
|
||||
reaper.fieldname = m_config->fieldCanon(*it);
|
||||
string s;
|
||||
attrs.get(*it, s);
|
||||
stringToStrings(s, reaper.cmdv);
|
||||
m_mdreapers.push_back(reaper);
|
||||
}
|
||||
}
|
||||
|
||||
void FsIndexer::reapmetadata(const vector<MDReaper>& reapers, const string& fn,
|
||||
Rcl::Doc& doc)
|
||||
{
|
||||
map<char,string> smap = create_map<char, string>('f', fn);
|
||||
for (vector<MDReaper>::const_iterator rp = reapers.begin();
|
||||
rp != reapers.end(); rp++) {
|
||||
vector<string> cmd;
|
||||
for (vector<string>::const_iterator it = rp->cmdv.begin();
|
||||
it != rp->cmdv.end(); it++) {
|
||||
string s;
|
||||
pcSubst(*it, s, smap);
|
||||
cmd.push_back(s);
|
||||
}
|
||||
string output;
|
||||
if (ExecCmd::backtick(cmd, output)) {
|
||||
// addmeta() creates or appends. fieldname is already
|
||||
// canonic (see above)
|
||||
doc.addmeta(rp->fieldname, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FsIndexer::makesig(const struct stat *stp, string& out)
|
||||
{
|
||||
char cbuf[100];
|
||||
|
@ -569,7 +513,7 @@ void *FsIndexerInternfileWorker(void * fsp)
|
|||
}
|
||||
LOGDEB0(("FsIndexerInternfileWorker: task fn %s\n", tsk->fn.c_str()));
|
||||
if (fip->processonefile(&myconf, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields, tsk->mdreapers) !=
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR(("FsIndexerInternfileWorker: processone failed\n"));
|
||||
tqp->workerExit();
|
||||
|
@ -617,8 +561,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
// Adjust local fields from config for this subtree
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
if (m_havemdreapers)
|
||||
mdreapersfromconf();
|
||||
|
||||
if (flg == FsTreeWalker::FtwDirReturn)
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
@ -626,8 +568,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ) {
|
||||
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields,
|
||||
m_mdreapers);
|
||||
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
|
||||
if (m_iwqueue.put(tp)) {
|
||||
return FsTreeWalker::FtwOk;
|
||||
} else {
|
||||
|
@ -636,7 +577,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
}
|
||||
#endif
|
||||
|
||||
return processonefile(m_config, fn, stp, m_localfields, m_mdreapers);
|
||||
return processonefile(m_config, fn, stp, m_localfields);
|
||||
}
|
||||
|
||||
// File name transcoded to utf8 for indexing. If this fails, the file
|
||||
|
@ -664,8 +605,7 @@ static string compute_utf8fn(RclConfig *config, const string& fn)
|
|||
FsTreeWalker::Status
|
||||
FsIndexer::processonefile(RclConfig *config,
|
||||
const std::string &fn, const struct stat *stp,
|
||||
const map<string, string>& localfields,
|
||||
const vector<MDReaper>& mdreapers)
|
||||
const map<string, string>& localfields)
|
||||
{
|
||||
////////////////////
|
||||
// Check db up to date ? Doing this before file type
|
||||
|
@ -742,8 +682,6 @@ FsIndexer::processonefile(RclConfig *config,
|
|||
// for the main file.
|
||||
if (doc.ipath.empty()) {
|
||||
hadNullIpath = true;
|
||||
if (m_havemdreapers)
|
||||
reapmetadata(mdreapers, fn, doc);
|
||||
if (hadNonNullIpath) {
|
||||
// Note that only the filters can reliably compute
|
||||
// this. What we do is dependant of the doc order (if
|
||||
|
@ -843,8 +781,6 @@ FsIndexer::processonefile(RclConfig *config,
|
|||
fileDoc.url = cstr_fileu + fn;
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, fileDoc);
|
||||
if (m_havemdreapers)
|
||||
reapmetadata(mdreapers, fn, fileDoc);
|
||||
char cbuf[100];
|
||||
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||
fileDoc.pcbytes = cbuf;
|
||||
|
|
|
@ -76,12 +76,6 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||
/** Make signature for file up to date checks */
|
||||
static void makesig(const struct stat *stp, string& out);
|
||||
|
||||
/* Hold the description for an external metadata-gathering command */
|
||||
struct MDReaper {
|
||||
string fieldname;
|
||||
vector<string> cmdv;
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
class PurgeCandidateRecorder {
|
||||
|
@ -131,16 +125,13 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||
// all files in a file system area. Ie: set "rclaptg = thunderbird"
|
||||
// inside ~/.thunderbird. The boolean is set at init to avoid
|
||||
// further wasteful processing if no local fields are set.
|
||||
// This should probably moved to internfile so that the
|
||||
// localfields get exactly the same processing as those generated by the
|
||||
// filters (as was done for metadatacmds fields)
|
||||
bool m_havelocalfields;
|
||||
string m_slocalfields;
|
||||
map<string, string> m_localfields;
|
||||
|
||||
// Same idea with the metadata-gathering external commands,
|
||||
// (e.g. used to reap tagging info: "tmsu tags %f")
|
||||
bool m_havemdreapers;
|
||||
string m_smdreapers;
|
||||
vector<MDReaper> m_mdreapers;
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
friend void *FsIndexerDbUpdWorker(void*);
|
||||
friend void *FsIndexerInternfileWorker(void*);
|
||||
|
@ -154,15 +145,11 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||
|
||||
bool init();
|
||||
void localfieldsfromconf();
|
||||
void mdreapersfromconf();
|
||||
void setlocalfields(const map<string, string>& flds, Rcl::Doc& doc);
|
||||
void reapmetadata(const vector<MDReaper>& reapers, const string &fn,
|
||||
Rcl::Doc& doc);
|
||||
string getDbDir() {return m_config->getDbDir();}
|
||||
FsTreeWalker::Status
|
||||
processonefile(RclConfig *config, const string &fn,
|
||||
const struct stat *, const map<string,string>& localfields,
|
||||
const vector<MDReaper>& mdreapers);
|
||||
const struct stat *, const map<string,string>& localfields);
|
||||
};
|
||||
|
||||
#endif /* _fsindexer_h_included_ */
|
||||
|
|
|
@ -118,6 +118,28 @@ void FileInterner::reapXAttrs(const string& path)
|
|||
}
|
||||
#endif // RCL_USE_XATTR
|
||||
|
||||
void FileInterner::reapCmdMetadata(const string& fn)
|
||||
{
|
||||
const vector<MDReaper>& reapers = m_cfg->getMDReapers();
|
||||
if (reapers.empty())
|
||||
return;
|
||||
map<char,string> smap = create_map<char, string>('f', fn);
|
||||
for (vector<MDReaper>::const_iterator rp = reapers.begin();
|
||||
rp != reapers.end(); rp++) {
|
||||
vector<string> cmd;
|
||||
for (vector<string>::const_iterator it = rp->cmdv.begin();
|
||||
it != rp->cmdv.end(); it++) {
|
||||
string s;
|
||||
pcSubst(*it, s, smap);
|
||||
cmd.push_back(s);
|
||||
}
|
||||
string output;
|
||||
if (ExecCmd::backtick(cmd, output)) {
|
||||
m_cmdFields[rp->fieldname] = output;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is used when the user wants to retrieve a search result doc's parent
|
||||
// (ie message having a given attachment)
|
||||
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
|
||||
|
@ -279,6 +301,7 @@ void FileInterner::init(const string &f, const struct stat *stp, RclConfig *cnf,
|
|||
// file
|
||||
reapXAttrs(f);
|
||||
#endif //RCL_USE_XATTR
|
||||
reapCmdMetadata(f);
|
||||
|
||||
df->set_docsize(docsize);
|
||||
if (!df->set_document_file(l_mime, m_fn)) {
|
||||
|
@ -625,6 +648,21 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
|||
}
|
||||
#endif //RCL_USE_XATTR
|
||||
|
||||
// Set fields from external commands
|
||||
// These override those from xattrs and can be later augmented by
|
||||
// values from inside the file
|
||||
for (map<string,string>::const_iterator it = m_cmdFields.begin();
|
||||
it != m_cmdFields.end(); it++) {
|
||||
string fieldname = m_cfg->fieldCanon(it->first);
|
||||
LOGDEB0(("Internfile:: setting [%s] from cmd value [%s]\n",
|
||||
fieldname.c_str(), it->second.c_str()));
|
||||
if (fieldname == cstr_dj_keymd) {
|
||||
doc.dmtime = it->second;
|
||||
} else {
|
||||
doc.meta[fieldname] = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no ipath stack, the mimetype is the one from the file
|
||||
doc.mimetype = m_mimetype;
|
||||
|
||||
|
|
|
@ -259,6 +259,8 @@ class FileInterner {
|
|||
// processing the internal doc hierarchy.
|
||||
map<string, string> m_XAttrsFields;
|
||||
#endif // RCL_USE_XATTR
|
||||
// Fields gathered by executing configured external commands
|
||||
map<string, string> m_cmdFields;
|
||||
|
||||
// Filter stack, path to the current document from which we're
|
||||
// fetching subdocs
|
||||
|
@ -289,6 +291,7 @@ class FileInterner {
|
|||
#ifdef RCL_USE_XATTR
|
||||
void reapXAttrs(const string& fn);
|
||||
#endif
|
||||
void reapCmdMetadata(const string& fn);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ include $(depth)/mk/sysconf
|
|||
|
||||
LIBRECOLL=librecoll.so.$(RCLLIBVERSION)
|
||||
LIBS = librecoll.a
|
||||
#LIBS = librecoll.a $(LIBRECOLL)
|
||||
LIBS = librecoll.a $(LIBRECOLL)
|
||||
|
||||
all: $(LIBS)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue