Prevent highligting of bogus terms in results (prevent path elts, negative queries or internal stuff)
This commit is contained in:
parent
fe6174652b
commit
e37284f05f
8 changed files with 58 additions and 46 deletions
|
@ -867,6 +867,8 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
|||
ft.boost = atof(tval.c_str());
|
||||
if (attrs.get("pfxonly", tval))
|
||||
ft.pfxonly = stringToBool(tval);
|
||||
if (attrs.get("noterms", tval))
|
||||
ft.noterms = stringToBool(tval);
|
||||
m_fldtotraits[stringtolower(*it)] = ft;
|
||||
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
||||
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
||||
|
|
|
@ -66,9 +66,9 @@ struct FieldTraits {
|
|||
int wdfinc; // Index time term frequency increment (default 1)
|
||||
double boost; // Query time boost (default 1.0)
|
||||
bool pfxonly; // Suppress prefix-less indexing
|
||||
|
||||
bool noterms; // Don't add term to highlight data (e.g.: rclbes)
|
||||
FieldTraits()
|
||||
: wdfinc(1), boost(1.0), pfxonly(false)
|
||||
: wdfinc(1), boost(1.0), pfxonly(false), noterms(false)
|
||||
{}
|
||||
};
|
||||
|
||||
|
|
|
@ -71,8 +71,8 @@ static const string xapday_prefix = "D";
|
|||
static const string xapmonth_prefix = "M";
|
||||
static const string xapyear_prefix = "Y";
|
||||
const string pathelt_prefix = "XP";
|
||||
const string udi_prefix("Q");
|
||||
const string parent_prefix("F");
|
||||
static const string udi_prefix("Q");
|
||||
static const string parent_prefix("F");
|
||||
|
||||
// Special terms to mark begin/end of field (for anchored searches), and
|
||||
// page breaks
|
||||
|
|
|
@ -533,8 +533,6 @@ private:
|
|||
string version_string();
|
||||
|
||||
extern const string pathelt_prefix;
|
||||
extern const string udi_prefix;
|
||||
extern const string parent_prefix;
|
||||
extern const string mimetype_prefix;
|
||||
extern const string unsplitFilenameFieldName;
|
||||
extern string start_of_field_term;
|
||||
|
|
|
@ -181,7 +181,8 @@ bool SearchData::addClause(SearchDataClause* cl)
|
|||
return true;
|
||||
}
|
||||
|
||||
// Am I a file name only search ? This is to turn off term highlighting
|
||||
// Am I a file name only search ? This is to turn off term highlighting.
|
||||
// There can't be a subclause in a filename search: no possible need to recurse
|
||||
bool SearchData::fileNameOnly()
|
||||
{
|
||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
|
@ -190,6 +191,7 @@ bool SearchData::fileNameOnly()
|
|||
return true;
|
||||
}
|
||||
|
||||
// The query language creates a lot of subqueries. See if we can merge them.
|
||||
void SearchData::simplify()
|
||||
{
|
||||
for (unsigned int i = 0; i < m_query.size(); i++) {
|
||||
|
@ -249,30 +251,35 @@ void SearchData::simplify()
|
|||
}
|
||||
}
|
||||
|
||||
bool SearchData::singleSimple()
|
||||
{
|
||||
if (m_query.size() != 1 || !m_filetypes.empty() || !m_nfiletypes.empty() ||
|
||||
m_haveDates || m_maxSize != size_t(-1) || m_minSize != size_t(-1) ||
|
||||
m_haveWildCards)
|
||||
return false;
|
||||
SearchDataClause *clp = *m_query.begin();
|
||||
if (clp->getTp() != SCLT_AND && clp->getTp() != SCLT_OR) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Extract all term data
|
||||
// Extract terms and groups for highlighting
|
||||
void SearchData::getTerms(HighlightData &hld) const
|
||||
{
|
||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
(*it)->getTerms(hld);
|
||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||
if (!((*it)->getmodifiers() & SearchDataClause::SDCM_NOTERMS) &&
|
||||
!(*it)->getexclude()) {
|
||||
(*it)->getTerms(hld);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static const char * tpToString(SClType t)
|
||||
{
|
||||
switch (t) {
|
||||
case SCLT_AND: return "AND";
|
||||
case SCLT_OR: return "OR";
|
||||
case SCLT_FILENAME: return "FILENAME";
|
||||
case SCLT_PHRASE: return "PHRASE";
|
||||
case SCLT_NEAR: return "NEAR";
|
||||
case SCLT_PATH: return "PATH";
|
||||
case SCLT_SUB: return "SUB";
|
||||
default: return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
void SearchData::dump(ostream& o) const
|
||||
{
|
||||
o << "SearchData: " << " qs " << int(m_query.size()) <<
|
||||
o << "SearchData: " << tpToString(m_tp) << " qs " << int(m_query.size()) <<
|
||||
" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<
|
||||
" hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " <<
|
||||
int(m_minSize) << " wc " << m_haveWildCards << "\n";
|
||||
|
@ -291,7 +298,7 @@ void SearchDataClause::dump(ostream& o) const
|
|||
|
||||
void SearchDataClauseSimple::dump(ostream& o) const
|
||||
{
|
||||
o << "ClauseSimple: ";
|
||||
o << "ClauseSimple: " << tpToString(m_tp) << " ";
|
||||
if (m_exclude)
|
||||
o << "- ";
|
||||
o << "[" ;
|
||||
|
@ -319,9 +326,9 @@ void SearchDataClausePath::dump(ostream& o) const
|
|||
void SearchDataClauseDist::dump(ostream& o) const
|
||||
{
|
||||
if (m_tp == SCLT_NEAR)
|
||||
o << "ClauseDist: NEAR: ";
|
||||
o << "ClauseDist: NEAR ";
|
||||
else
|
||||
o << "ClauseDist: PHRA: ";
|
||||
o << "ClauseDist: PHRA ";
|
||||
|
||||
if (m_exclude)
|
||||
o << " - ";
|
||||
|
|
|
@ -96,9 +96,6 @@ public:
|
|||
/** Is there anything but a file name search in here ? */
|
||||
bool fileNameOnly();
|
||||
|
||||
/** Are we a simple query with one clause? */
|
||||
bool singleSimple();
|
||||
|
||||
/** Do we have wildcards anywhere apart from filename searches ? */
|
||||
bool haveWildCards() {return m_haveWildCards;}
|
||||
|
||||
|
@ -228,7 +225,9 @@ private:
|
|||
class SearchDataClause {
|
||||
public:
|
||||
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
||||
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
|
||||
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16,
|
||||
SDCM_NOTERMS=32 // Don't include terms for highlighting
|
||||
};
|
||||
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};
|
||||
|
||||
SearchDataClause(SClType tp)
|
||||
|
@ -278,13 +277,12 @@ public:
|
|||
{
|
||||
return m_parentSearch ? m_parentSearch->getSoftMaxExp() : -1;
|
||||
}
|
||||
virtual void setModifiers(Modifier mod)
|
||||
{
|
||||
m_modifiers = mod;
|
||||
}
|
||||
virtual void addModifier(Modifier mod)
|
||||
{
|
||||
m_modifiers = Modifier(m_modifiers | mod);
|
||||
m_modifiers = m_modifiers | mod;
|
||||
}
|
||||
virtual unsigned int getmodifiers() {
|
||||
return m_modifiers;
|
||||
}
|
||||
virtual void setWeight(float w)
|
||||
{
|
||||
|
@ -312,7 +310,7 @@ protected:
|
|||
SClType m_tp;
|
||||
SearchData *m_parentSearch;
|
||||
bool m_haveWildCards;
|
||||
Modifier m_modifiers;
|
||||
unsigned int m_modifiers;
|
||||
float m_weight;
|
||||
bool m_exclude;
|
||||
Relation m_rel;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
|
||||
#include "xapian.h"
|
||||
|
@ -53,9 +54,10 @@ typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
|||
|
||||
static const int original_term_wqf_booster = 10;
|
||||
|
||||
// Expand categories and mime type wild card exps Categories are
|
||||
// expanded against the configuration, mimetypes against the index
|
||||
// (for wildcards).
|
||||
// Expand doc categories and mime type wild card expressions
|
||||
//
|
||||
// Categories are expanded against the configuration, mimetypes
|
||||
// against the index.
|
||||
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
||||
{
|
||||
const RclConfig *cfg = db.getConf();
|
||||
|
@ -101,6 +103,8 @@ static const char *maxXapClauseCaseDiacMsg =
|
|||
"wildcards ?"
|
||||
;
|
||||
|
||||
|
||||
// Walk the clauses list, translate each and add to top Xapian Query
|
||||
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
||||
vector<SearchDataClause*>& query,
|
||||
string& reason, void *d)
|
||||
|
@ -484,7 +488,8 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
|||
if (noexpansion) {
|
||||
oexp.push_back(prefix + term);
|
||||
m_hldata.terms[term] = term;
|
||||
LOGDEB(("ExpandTerm: noexpansion: final: %s\n", stringsToString(oexp).c_str()));
|
||||
LOGDEB(("ExpandTerm: noexpansion: final: %s\n",
|
||||
stringsToString(oexp).c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -568,6 +573,8 @@ void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
|
|||
string prefix;
|
||||
const FieldTraits *ftp;
|
||||
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
|
||||
if (ftp->noterms)
|
||||
addModifier(SDCM_NOTERMS);
|
||||
prefix = wrap_prefix(ftp->pfx);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,12 +43,12 @@ keywords= K
|
|||
xapyearmon = M
|
||||
title = S ; wdfinc = 10
|
||||
mtype = T
|
||||
ext = XE
|
||||
ext = XE; noterms = 1
|
||||
rclmd5 = XM
|
||||
dir = XP
|
||||
dir = XP ; noterms = 1
|
||||
abstract = XS
|
||||
filename = XSFN
|
||||
containerfilename = XCFN ; pfxonly = 1
|
||||
filename = XSFN ; noterms = 1
|
||||
containerfilename = XCFN ; pfxonly = 1 ; noterms = 1
|
||||
rclUnsplitFN = XSFS
|
||||
xapyear = Y
|
||||
recipient = XTO
|
||||
|
@ -58,7 +58,7 @@ recipient = XTO
|
|||
# by default.
|
||||
# Some values are internally reserved by recoll:
|
||||
# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
|
||||
rclbes = XB
|
||||
rclbes = XB ; noterms = 1
|
||||
# Using XX was not a good idea.
|
||||
#
|
||||
# I hereby commit to not using XY for Recoll:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue