Prevent highligting of bogus terms in results (prevent path elts, negative queries or internal stuff)
This commit is contained in:
parent
fe6174652b
commit
e37284f05f
8 changed files with 58 additions and 46 deletions
|
@ -867,6 +867,8 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||||
ft.boost = atof(tval.c_str());
|
ft.boost = atof(tval.c_str());
|
||||||
if (attrs.get("pfxonly", tval))
|
if (attrs.get("pfxonly", tval))
|
||||||
ft.pfxonly = stringToBool(tval);
|
ft.pfxonly = stringToBool(tval);
|
||||||
|
if (attrs.get("noterms", tval))
|
||||||
|
ft.noterms = stringToBool(tval);
|
||||||
m_fldtotraits[stringtolower(*it)] = ft;
|
m_fldtotraits[stringtolower(*it)] = ft;
|
||||||
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
||||||
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
||||||
|
|
|
@ -66,9 +66,9 @@ struct FieldTraits {
|
||||||
int wdfinc; // Index time term frequency increment (default 1)
|
int wdfinc; // Index time term frequency increment (default 1)
|
||||||
double boost; // Query time boost (default 1.0)
|
double boost; // Query time boost (default 1.0)
|
||||||
bool pfxonly; // Suppress prefix-less indexing
|
bool pfxonly; // Suppress prefix-less indexing
|
||||||
|
bool noterms; // Don't add term to highlight data (e.g.: rclbes)
|
||||||
FieldTraits()
|
FieldTraits()
|
||||||
: wdfinc(1), boost(1.0), pfxonly(false)
|
: wdfinc(1), boost(1.0), pfxonly(false), noterms(false)
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,8 @@ static const string xapday_prefix = "D";
|
||||||
static const string xapmonth_prefix = "M";
|
static const string xapmonth_prefix = "M";
|
||||||
static const string xapyear_prefix = "Y";
|
static const string xapyear_prefix = "Y";
|
||||||
const string pathelt_prefix = "XP";
|
const string pathelt_prefix = "XP";
|
||||||
const string udi_prefix("Q");
|
static const string udi_prefix("Q");
|
||||||
const string parent_prefix("F");
|
static const string parent_prefix("F");
|
||||||
|
|
||||||
// Special terms to mark begin/end of field (for anchored searches), and
|
// Special terms to mark begin/end of field (for anchored searches), and
|
||||||
// page breaks
|
// page breaks
|
||||||
|
|
|
@ -533,8 +533,6 @@ private:
|
||||||
string version_string();
|
string version_string();
|
||||||
|
|
||||||
extern const string pathelt_prefix;
|
extern const string pathelt_prefix;
|
||||||
extern const string udi_prefix;
|
|
||||||
extern const string parent_prefix;
|
|
||||||
extern const string mimetype_prefix;
|
extern const string mimetype_prefix;
|
||||||
extern const string unsplitFilenameFieldName;
|
extern const string unsplitFilenameFieldName;
|
||||||
extern string start_of_field_term;
|
extern string start_of_field_term;
|
||||||
|
|
|
@ -181,7 +181,8 @@ bool SearchData::addClause(SearchDataClause* cl)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Am I a file name only search ? This is to turn off term highlighting
|
// Am I a file name only search ? This is to turn off term highlighting.
|
||||||
|
// There can't be a subclause in a filename search: no possible need to recurse
|
||||||
bool SearchData::fileNameOnly()
|
bool SearchData::fileNameOnly()
|
||||||
{
|
{
|
||||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||||
|
@ -190,6 +191,7 @@ bool SearchData::fileNameOnly()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The query language creates a lot of subqueries. See if we can merge them.
|
||||||
void SearchData::simplify()
|
void SearchData::simplify()
|
||||||
{
|
{
|
||||||
for (unsigned int i = 0; i < m_query.size(); i++) {
|
for (unsigned int i = 0; i < m_query.size(); i++) {
|
||||||
|
@ -249,30 +251,35 @@ void SearchData::simplify()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SearchData::singleSimple()
|
// Extract terms and groups for highlighting
|
||||||
{
|
|
||||||
if (m_query.size() != 1 || !m_filetypes.empty() || !m_nfiletypes.empty() ||
|
|
||||||
m_haveDates || m_maxSize != size_t(-1) || m_minSize != size_t(-1) ||
|
|
||||||
m_haveWildCards)
|
|
||||||
return false;
|
|
||||||
SearchDataClause *clp = *m_query.begin();
|
|
||||||
if (clp->getTp() != SCLT_AND && clp->getTp() != SCLT_OR) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract all term data
|
|
||||||
void SearchData::getTerms(HighlightData &hld) const
|
void SearchData::getTerms(HighlightData &hld) const
|
||||||
{
|
{
|
||||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||||
(*it)->getTerms(hld);
|
if (!((*it)->getmodifiers() & SearchDataClause::SDCM_NOTERMS) &&
|
||||||
|
!(*it)->getexclude()) {
|
||||||
|
(*it)->getTerms(hld);
|
||||||
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char * tpToString(SClType t)
|
||||||
|
{
|
||||||
|
switch (t) {
|
||||||
|
case SCLT_AND: return "AND";
|
||||||
|
case SCLT_OR: return "OR";
|
||||||
|
case SCLT_FILENAME: return "FILENAME";
|
||||||
|
case SCLT_PHRASE: return "PHRASE";
|
||||||
|
case SCLT_NEAR: return "NEAR";
|
||||||
|
case SCLT_PATH: return "PATH";
|
||||||
|
case SCLT_SUB: return "SUB";
|
||||||
|
default: return "UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SearchData::dump(ostream& o) const
|
void SearchData::dump(ostream& o) const
|
||||||
{
|
{
|
||||||
o << "SearchData: " << " qs " << int(m_query.size()) <<
|
o << "SearchData: " << tpToString(m_tp) << " qs " << int(m_query.size()) <<
|
||||||
" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<
|
" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<
|
||||||
" hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " <<
|
" hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " <<
|
||||||
int(m_minSize) << " wc " << m_haveWildCards << "\n";
|
int(m_minSize) << " wc " << m_haveWildCards << "\n";
|
||||||
|
@ -291,7 +298,7 @@ void SearchDataClause::dump(ostream& o) const
|
||||||
|
|
||||||
void SearchDataClauseSimple::dump(ostream& o) const
|
void SearchDataClauseSimple::dump(ostream& o) const
|
||||||
{
|
{
|
||||||
o << "ClauseSimple: ";
|
o << "ClauseSimple: " << tpToString(m_tp) << " ";
|
||||||
if (m_exclude)
|
if (m_exclude)
|
||||||
o << "- ";
|
o << "- ";
|
||||||
o << "[" ;
|
o << "[" ;
|
||||||
|
@ -319,9 +326,9 @@ void SearchDataClausePath::dump(ostream& o) const
|
||||||
void SearchDataClauseDist::dump(ostream& o) const
|
void SearchDataClauseDist::dump(ostream& o) const
|
||||||
{
|
{
|
||||||
if (m_tp == SCLT_NEAR)
|
if (m_tp == SCLT_NEAR)
|
||||||
o << "ClauseDist: NEAR: ";
|
o << "ClauseDist: NEAR ";
|
||||||
else
|
else
|
||||||
o << "ClauseDist: PHRA: ";
|
o << "ClauseDist: PHRA ";
|
||||||
|
|
||||||
if (m_exclude)
|
if (m_exclude)
|
||||||
o << " - ";
|
o << " - ";
|
||||||
|
|
|
@ -96,9 +96,6 @@ public:
|
||||||
/** Is there anything but a file name search in here ? */
|
/** Is there anything but a file name search in here ? */
|
||||||
bool fileNameOnly();
|
bool fileNameOnly();
|
||||||
|
|
||||||
/** Are we a simple query with one clause? */
|
|
||||||
bool singleSimple();
|
|
||||||
|
|
||||||
/** Do we have wildcards anywhere apart from filename searches ? */
|
/** Do we have wildcards anywhere apart from filename searches ? */
|
||||||
bool haveWildCards() {return m_haveWildCards;}
|
bool haveWildCards() {return m_haveWildCards;}
|
||||||
|
|
||||||
|
@ -228,7 +225,9 @@ private:
|
||||||
class SearchDataClause {
|
class SearchDataClause {
|
||||||
public:
|
public:
|
||||||
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
||||||
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
|
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16,
|
||||||
|
SDCM_NOTERMS=32 // Don't include terms for highlighting
|
||||||
|
};
|
||||||
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};
|
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};
|
||||||
|
|
||||||
SearchDataClause(SClType tp)
|
SearchDataClause(SClType tp)
|
||||||
|
@ -278,13 +277,12 @@ public:
|
||||||
{
|
{
|
||||||
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
|
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
|
||||||
}
|
}
|
||||||
virtual void setModifiers(Modifier mod)
|
|
||||||
{
|
|
||||||
m_modifiers = mod;
|
|
||||||
}
|
|
||||||
virtual void addModifier(Modifier mod)
|
virtual void addModifier(Modifier mod)
|
||||||
{
|
{
|
||||||
m_modifiers = Modifier(m_modifiers | mod);
|
m_modifiers = m_modifiers | mod;
|
||||||
|
}
|
||||||
|
virtual unsigned int getmodifiers() {
|
||||||
|
return m_modifiers;
|
||||||
}
|
}
|
||||||
virtual void setWeight(float w)
|
virtual void setWeight(float w)
|
||||||
{
|
{
|
||||||
|
@ -312,7 +310,7 @@ protected:
|
||||||
SClType m_tp;
|
SClType m_tp;
|
||||||
SearchData *m_parentSearch;
|
SearchData *m_parentSearch;
|
||||||
bool m_haveWildCards;
|
bool m_haveWildCards;
|
||||||
Modifier m_modifiers;
|
unsigned int m_modifiers;
|
||||||
float m_weight;
|
float m_weight;
|
||||||
bool m_exclude;
|
bool m_exclude;
|
||||||
Relation m_rel;
|
Relation m_rel;
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "xapian.h"
|
#include "xapian.h"
|
||||||
|
@ -53,9 +54,10 @@ typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
||||||
|
|
||||||
static const int original_term_wqf_booster = 10;
|
static const int original_term_wqf_booster = 10;
|
||||||
|
|
||||||
// Expand categories and mime type wild card exps Categories are
|
// Expand doc categories and mime type wild card expressions
|
||||||
// expanded against the configuration, mimetypes against the index
|
//
|
||||||
// (for wildcards).
|
// Categories are expanded against the configuration, mimetypes
|
||||||
|
// against the index.
|
||||||
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
||||||
{
|
{
|
||||||
const RclConfig *cfg = db.getConf();
|
const RclConfig *cfg = db.getConf();
|
||||||
|
@ -101,6 +103,8 @@ static const char *maxXapClauseCaseDiacMsg =
|
||||||
"wildcards ?"
|
"wildcards ?"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
// Walk the clauses list, translate each and add to top Xapian Query
|
||||||
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
||||||
vector<SearchDataClause*>& query,
|
vector<SearchDataClause*>& query,
|
||||||
string& reason, void *d)
|
string& reason, void *d)
|
||||||
|
@ -484,7 +488,8 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
||||||
if (noexpansion) {
|
if (noexpansion) {
|
||||||
oexp.push_back(prefix + term);
|
oexp.push_back(prefix + term);
|
||||||
m_hldata.terms[term] = term;
|
m_hldata.terms[term] = term;
|
||||||
LOGDEB(("ExpandTerm: noexpansion: final: %s\n", stringsToString(oexp).c_str()));
|
LOGDEB(("ExpandTerm: noexpansion: final: %s\n",
|
||||||
|
stringsToString(oexp).c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -568,6 +573,8 @@ void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
|
||||||
string prefix;
|
string prefix;
|
||||||
const FieldTraits *ftp;
|
const FieldTraits *ftp;
|
||||||
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
|
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
|
||||||
|
if (ftp->noterms)
|
||||||
|
addModifier(SDCM_NOTERMS);
|
||||||
prefix = wrap_prefix(ftp->pfx);
|
prefix = wrap_prefix(ftp->pfx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,12 +43,12 @@ keywords= K
|
||||||
xapyearmon = M
|
xapyearmon = M
|
||||||
title = S ; wdfinc = 10
|
title = S ; wdfinc = 10
|
||||||
mtype = T
|
mtype = T
|
||||||
ext = XE
|
ext = XE; noterms = 1
|
||||||
rclmd5 = XM
|
rclmd5 = XM
|
||||||
dir = XP
|
dir = XP ; noterms = 1
|
||||||
abstract = XS
|
abstract = XS
|
||||||
filename = XSFN
|
filename = XSFN ; noterms = 1
|
||||||
containerfilename = XCFN ; pfxonly = 1
|
containerfilename = XCFN ; pfxonly = 1 ; noterms = 1
|
||||||
rclUnsplitFN = XSFS
|
rclUnsplitFN = XSFS
|
||||||
xapyear = Y
|
xapyear = Y
|
||||||
recipient = XTO
|
recipient = XTO
|
||||||
|
@ -58,7 +58,7 @@ recipient = XTO
|
||||||
# by default.
|
# by default.
|
||||||
# Some values are internally reserved by recoll:
|
# Some values are internally reserved by recoll:
|
||||||
# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
|
# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
|
||||||
rclbes = XB
|
rclbes = XB ; noterms = 1
|
||||||
# Using XX was not a good idea.
|
# Using XX was not a good idea.
|
||||||
#
|
#
|
||||||
# I hereby commit to not using XY for Recoll:
|
# I hereby commit to not using XY for Recoll:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue