Add new kind of aliases for field names, to be used only at query time

This commit is contained in:
Jean-Francois Dockes 2014-07-23 16:52:08 +02:00
parent e1576b8b53
commit 80c0787e3d
9 changed files with 57 additions and 14 deletions

View file

@ -879,6 +879,21 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
}
}
// Query aliases map
tps = m_fields->getNames("queryaliases");
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++){
string canonic = stringtolower(*it); // canonic name
string aliases;
m_fields->get(canonic, aliases, "queryaliases");
vector<string> l;
stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin();
ait != l.end(); ait++) {
m_aliastoqcanon[stringtolower(*ait)] = canonic;
}
}
#if 0
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
it != m_fldtotraits.end(); it++) {
@ -910,10 +925,10 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
}
// Return specifics for field name:
bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp)
const
bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp,
bool isquery) const
{
string fld = fieldCanon(_fld);
string fld = isquery ? fieldQCanon(_fld) : fieldCanon(_fld);
map<string, FieldTraits>::const_iterator pit = m_fldtotraits.find(fld);
if (pit != m_fldtotraits.end()) {
*ftpp = &pit->second;
@ -952,6 +967,18 @@ string RclConfig::fieldCanon(const string& f) const
return fld;
}
string RclConfig::fieldQCanon(const string& f) const
{
string fld = stringtolower(f);
map<string, string>::const_iterator it = m_aliastoqcanon.find(fld);
if (it != m_aliastoqcanon.end()) {
LOGDEB1(("RclConfig::fieldQCanon: [%s] -> [%s]\n",
f.c_str(), it->second.c_str()));
return it->second;
}
return fieldCanon(f);
}
vector<string> RclConfig::getFieldSectNames(const string &sk, const char* patrn)
const
{
@ -1423,6 +1450,7 @@ void RclConfig::initFrom(const RclConfig& r)
m_ptrans = new ConfSimple(*(r.m_ptrans));
m_fldtotraits = r.m_fldtotraits;
m_aliastocanon = r.m_aliastocanon;
m_aliastoqcanon = r.m_aliastoqcanon;
m_storedFields = r.m_storedFields;
m_xattrtofld = r.m_xattrtofld;
if (r.m_stopsuffixes)

View file

@ -241,8 +241,10 @@ class RclConfig {
/** mimeconf: get query lang frag for named filter */
bool getGuiFilter(const string& filtername, string& frag) const;
/** fields: get field prefix from field name */
bool getFieldTraits(const string& fldname, const FieldTraits **) const;
/** fields: get field prefix from field name. Use additional query
aliases if isquery is set */
bool getFieldTraits(const string& fldname, const FieldTraits **,
bool isquery = false) const;
const set<string>& getStoredFields() const {return m_storedFields;}
@ -251,6 +253,9 @@ class RclConfig {
/** Get canonic name for possible alias */
string fieldCanon(const string& fld) const;
/** Get canonic name for possible alias, including query-only aliases */
string fieldQCanon(const string& fld) const;
/** Get xattr name to field names translations */
const map<string, string>& getXattrToField() const {return m_xattrtofld;}
@ -323,6 +328,7 @@ class RclConfig {
ConfSimple *m_ptrans; // Paths translations
map<string, FieldTraits> m_fldtotraits; // Field to field params
map<string, string> m_aliastocanon;
map<string, string> m_aliastoqcanon;
set<string> m_storedFields;
map<string, string> m_xattrtofld;

View file

@ -518,7 +518,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
Py_RETURN_NONE;
}
key = rclconfig->fieldCanon(string(name));
key = rclconfig->fieldQCanon(string(name));
switch (key.at(0)) {
case 'u':
@ -640,7 +640,7 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
}
char* uvalue = PyBytes_AsString(putf8);
Py_DECREF(putf8);
string key = rclconfig->fieldCanon(string(name));
string key = rclconfig->fieldQCanon(string(name));
LOGDEB0(("Doc_setattr: [%s] (%s) -> [%s]\n", key.c_str(), name, uvalue));
// We set the value in the meta array in all cases. Good idea ? or do it

View file

@ -1033,9 +1033,10 @@ bool Db::isopen()
}
// Try to translate field specification into field prefix.
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp,
bool isquery)
{
if (m_config && m_config->getFieldTraits(fld, ftpp))
if (m_config && m_config->getFieldTraits(fld, ftpp, isquery))
return true;
*ftpp = 0;

View file

@ -223,7 +223,8 @@ class Db {
/* Return configured stop words */
const StopList& getStopList() const {return m_stops;}
/* Field name to prefix translation (ie: author -> 'A') */
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp);
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp,
bool isquery = false);
/* Update-related methods ******************************************/

View file

@ -160,7 +160,7 @@ void Query::setSortBy(const string& fld, bool ascending) {
if (fld.empty()) {
m_sortField.erase();
} else {
m_sortField = m_db->getConf()->fieldCanon(fld);
m_sortField = m_db->getConf()->fieldQCanon(fld);
m_sortAscending = ascending;
}
LOGDEB0(("RclQuery::setSortBy: [%s] %s\n", m_sortField.c_str(),

View file

@ -328,7 +328,7 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
string prefix;
if (!field.empty()) {
const FieldTraits *ftp = 0;
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) {
if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
field.c_str()));
} else {

View file

@ -686,7 +686,7 @@ void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
string prefix;
const FieldTraits *ftp;
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
prefix = wrap_prefix(ftp->pfx);
}
@ -743,7 +743,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
string prefix;
const FieldTraits *ftp;
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
prefix = wrap_prefix(ftp->pfx);
}

View file

@ -109,6 +109,13 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
recipient = to xesam:recipient
url = dc:identifier xesam:url
##################
# The queryaliases section defines aliases which are used exclusively at
# query time: there is no risk to pick up a random field from a document
# (e.g. an HTML meta field) and index it.
[queryaliases]
#filename = fn
[xattrtofields]
######################
# Processing for extended file attributes.