implemented proper limitation and error reporting in case of truncation for term and query expansions

This commit is contained in:
Jean-Francois Dockes 2012-10-05 12:36:19 +02:00
parent 86515ce52a
commit a4a7246a12
13 changed files with 217 additions and 105 deletions

View file

@ -569,9 +569,9 @@ recoll
<sect2 id="rcl.indexing.config.gui">
<title>The indexing configuration GUI</title>
<title>The index configuration GUI</title>
<para>Most parameters for a given indexing configuration can
<para>Most parameters for a given index configuration can
be set from a <command>recoll</command> GUI running on this
configuration (either as default, or by setting
<envar>RECOLL_CONFDIR</envar> or the <option>-c</option>
@ -4219,6 +4219,24 @@ skippedPaths = ~/somedir/&lowast;.txt
</listitem>
</varlistentry>
<varlistentry><term><varname>maxTermExpand</varname></term>
<listitem><para>Maximum expansion count for a single term (e.g.:
when using wildcards). The default of 10000 is reasonable and
will avoid queries that appear frozen while the engine is
walking the term list.</para>
</listitem>
</varlistentry>
<varlistentry><term><varname>maxXapianClauses</varname></term>
<listitem><para>Maximum number of elementary clauses we can add
to a single Xapian query. In some cases, the result of term
expansion can be multiplicative, and we want to avoid using
excessive memory. The default of 100 000 should be both
high enough in most cases and compatible with current
typical hardware configurations.</para>
</listitem>
</varlistentry>
<varlistentry><term><varname>nonumbers</varname></term>
<listitem><para>If this set to true, no terms will be generated
for numbers. For example "123", "1.5e6", 192.168.1.4, would not

View file

@ -195,6 +195,34 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
));
vboxLayout->addWidget(cp2);
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
ConfParamIntW* cp3 =
new ConfParamIntW(this, lnk3,
tr("Maximum term expansion count"),
tr("<p>Maximum expansion count for a single term "
"(e.g.: when using wildcards). The default "
"of 10 000 is reasonable and will avoid "
"queries that appear frozen while the engine is "
"walking the term list."
));
vboxLayout->addWidget(cp3);
ConfLink lnk4(new ConfLinkRclRep(config, "maxXapianClauses"));
ConfParamIntW* cp4 =
new ConfParamIntW(this, lnk4,
tr("Maximum Xapian clauses count"),
tr("<p>Maximum number of elementary clauses we "
"add to a single Xapian query. In some cases, "
"the result of term expansion can be "
"multiplicative, and we want to avoid using "
"excessive memory. The default of 100 000 "
"should be both high enough in most cases "
"and compatible with current typical hardware "
"configurations."
));
vboxLayout->addWidget(cp4);
vboxLayout->insertStretch(-1);
}

View file

@ -138,7 +138,10 @@ class DocSequence {
{
return std::list<std::string>();
}
virtual std::string getReason()
{
return m_reason;
}
/** Optional functionality. */
virtual bool canFilter() {return false;}
virtual bool canSort() {return false;}
@ -154,6 +157,7 @@ class DocSequence {
protected:
static std::string o_sort_trans;
static std::string o_filt_trans;
std::string m_reason;
private:
std::string m_title;
};
@ -206,6 +210,12 @@ public:
return false;
return m_seq->getEnclosing(doc, pdoc);
}
virtual std::string getReason()
{
if (m_seq.isNull())
return false;
return m_seq->getReason();
}
virtual std::string title() {return m_seq->title();}
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}

View file

@ -51,14 +51,16 @@ string DocSequenceDb::getDescription()
bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, string *sh)
{
setQuery();
if (!setQuery())
return false;
if (sh) sh->erase();
return m_q->getDoc(num, doc);
}
int DocSequenceDb::getResCnt()
{
setQuery();
if (!setQuery())
return false;
if (m_rescnt < 0) {
m_rescnt= m_q->getResCnt();
}
@ -71,7 +73,8 @@ static const string cstr_mre("[...]");
bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<Rcl::Snippet>& vpabs)
{
LOGDEB(("DocSequenceDb::getAbstract/pair\n"));
setQuery();
if (!setQuery())
return false;
// Have to put the limit somewhere.
int maxoccs = 500;
@ -93,7 +96,8 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<Rcl::Snippet>& vpabs)
bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
{
setQuery();
if (!setQuery())
return false;
if (m_q->whatDb() &&
m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
m_q->makeDocAbstract(doc, vabs);
@ -105,7 +109,8 @@ bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector<string>& vabs)
int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term)
{
setQuery();
if (!setQuery())
return false;
if (m_q->whatDb()) {
return m_q->getFirstMatchPage(doc, term);
}
@ -114,7 +119,8 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term)
bool DocSequenceDb::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
{
setQuery();
if (!setQuery())
return false;
string udi;
if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath,
udi))
@ -124,7 +130,8 @@ bool DocSequenceDb::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
list<string> DocSequenceDb::expand(Rcl::Doc &doc)
{
setQuery();
if (!setQuery())
return list<string>();
vector<string> v = m_q->expand(doc);
return list<string>(v.begin(), v.end());
}
@ -209,13 +216,10 @@ bool DocSequenceDb::setQuery()
return true;
m_rescnt = -1;
m_needSetQuery = !m_q->setQuery(m_fsdata);
#if 0
HighlightData hld;
m_fsdata->getTerms(hld);
string str;
hld.toString(str);
fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
#endif
if (m_needSetQuery) {
m_reason = m_q->getReason();
LOGERR(("DocSequenceDb::setQuery: rclquery::setQuery failed: %s\n",
m_reason.c_str()));
}
return !m_needSetQuery;
}

View file

@ -67,6 +67,7 @@ class DocSequenceDb : public DocSequence {
bool m_isFiltered;
bool m_isSorted;
bool m_needSetQuery; // search data changed, need to reapply before fetch
bool setQuery();
};

View file

@ -319,7 +319,10 @@ int recollq(RclConfig **cfp, int argc, char **argv)
query.setSortBy(sortfield, (op_flags & OPT_D) ? false : true);
}
Chrono chron;
query.setQuery(rq);
if (!query.setQuery(rq)) {
cerr << "Query setup failed: " << query.getReason() << endl;
return(1);
}
int cnt = query.getResCnt();
if (!(op_flags & OPT_b)) {
cout << "Recoll query: " << rq->getDescription() << endl;

View file

@ -337,6 +337,11 @@ void ResListPager::displayPage(RclConfig *config)
if (pageEmpty()) {
chunk << trans("<p><b>No results found</b><br>");
string reason = m_docSource->getReason();
if (!reason.empty()) {
chunk << "<blockquote>" << escapeHtml(reason) <<
"</blockquote></p>";
} else {
HighlightData hldata;
m_docSource->getTerms(hldata);
vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
@ -368,6 +373,7 @@ void ResListPager::displayPage(RclConfig *config)
chunk << "</blockquote></p>";
}
}
}
} else {
unsigned int resCnt = m_docSource->getResCnt();
if (m_winfirst + m_respage.size() < resCnt) {

View file

@ -1431,7 +1431,7 @@ bool Db::purgeFile(const string &udi, bool *existed)
}
// File name wild card expansion. This is a specialisation ot termMatch
bool Db::filenameWildExp(const string& fnexp, vector<string>& names)
bool Db::filenameWildExp(const string& fnexp, vector<string>& names, int max)
{
string pattern = fnexp;
names.clear();
@ -1449,7 +1449,7 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names)
LOGDEB(("Rcl::Db::filenameWildExp: pattern: [%s]\n", pattern.c_str()));
TermMatchResult result;
if (!termMatch(ET_WILD, string(), pattern, result, -1,
if (!termMatch(ET_WILD, string(), pattern, result, max,
unsplitFilenameFieldName))
return false;
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
@ -1459,7 +1459,7 @@ bool Db::filenameWildExp(const string& fnexp, vector<string>& names)
if (names.empty()) {
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back("XNONENoMatchingTerms");
names.push_back(wrap_prefix("XNONE") + "NoMatchingTerms");
}
return true;
}

View file

@ -315,7 +315,7 @@ class Db {
bool maxYearSpan(int *minyear, int *maxyear);
/** Wildcard expansion specific to file names. Internal/sdata use only */
bool filenameWildExp(const string& exp, vector<string>& names);
bool filenameWildExp(const string& exp, vector<string>& names, int max);
/** Set parameters for synthetic abstract generation */
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);

View file

@ -193,8 +193,13 @@ bool Query::setQuery(RefCntr<SearchData> sdata)
m_nq->clear();
m_sd = sdata;
int maxexp = 10000;
m_db->getConf()->getConfParam("maxTermExpand", &maxexp);
int maxcl = 100000;
m_db->getConf()->getConfParam("maxXapianClauses", &maxcl);
Xapian::Query xq;
if (!sdata->toNativeQuery(*m_db, &xq)) {
if (!sdata->toNativeQuery(*m_db, &xq, maxexp, maxcl)) {
m_reason += sdata->getReason();
return false;
}

View file

@ -201,14 +201,16 @@ bool SearchData::expandFileTypes(RclConfig *cfg, vector<string>& tps)
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
vector<SearchDataClause*>& query,
string& reason, void *d)
string& reason, void *d,
int maxexp, int maxcl)
{
Xapian::Query xq;
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
Xapian::Query nq;
if (!(*it)->toNativeQuery(db, &nq)) {
LOGERR(("SearchData::clausesToQuery: toNativeQuery failed\n"));
reason = (*it)->getReason();
if (!(*it)->toNativeQuery(db, &nq, maxexp, maxcl)) {
LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
(*it)->getReason().c_str()));
reason += (*it)->getReason() + " ";
return false;
}
if (nq.empty()) {
@ -236,6 +238,13 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
} else {
xq = Xapian::Query(op, xq, nq);
}
if (int(xq.get_length()) >= maxcl) {
LOGERR(("Maximum Xapian query size exceeded."
" Maybe increase maxXapianClauses."));
m_reason += "Maximum Xapian query size exceeded."
" Maybe increase maxXapianClauses.";
return false;
}
}
if (xq.empty())
xq = Xapian::Query::MatchAll;
@ -244,7 +253,7 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
return true;
}
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
bool SearchData::toNativeQuery(Rcl::Db &db, void *d, int maxexp, int maxcl)
{
LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
m_reason.erase();
@ -252,8 +261,9 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
// Walk the clause list translating each in turn and building the
// Xapian query tree
Xapian::Query xq;
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
LOGERR(("SearchData::toNativeQuery: clausesToQuery failed\n"));
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq, maxexp, maxcl)) {
LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n",
m_reason.c_str()));
return false;
}
@ -620,10 +630,10 @@ private:
class StringToXapianQ {
public:
StringToXapianQ(Db& db, HighlightData& hld, const string& field,
const string &stmlng, bool boostUser)
const string &stmlng, bool boostUser, int maxexp, int maxcl)
: m_db(db), m_field(field), m_stemlang(stmlng),
m_doBoostUserTerms(boostUser), m_hld(hld), m_autodiacsens(false),
m_autocasesens(true)
m_autocasesens(true), m_maxexp(maxexp), m_maxcl(maxcl), m_curcl(0)
{
m_db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
m_db.getConf()->getConfParam("autocasesens", &m_autocasesens);
@ -635,15 +645,15 @@ public:
vector<Xapian::Query> &pqueries,
int slack = 0, bool useNear = false);
private:
void expandTerm(int mods,
bool expandTerm(string& ermsg, int mods,
const string& term, vector<string>& exp,
string& sterm, const string& prefix);
// After splitting entry on whitespace: process non-phrase element
void processSimpleSpan(const string& span,
void processSimpleSpan(string& ermsg, const string& span,
int mods,
vector<Xapian::Query> &pqueries);
// Process phrase/near element
void processPhraseOrNear(TextSplitQ *splitData,
void processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
int mods,
vector<Xapian::Query> &pqueries,
bool useNear, int slack);
@ -655,6 +665,9 @@ private:
HighlightData& m_hld;
bool m_autodiacsens;
bool m_autocasesens;
int m_maxexp;
int m_maxcl;
int m_curcl;
};
#if 1
@ -679,7 +692,7 @@ static void listVector(const string& what, const vector<string>&l)
* has it already. Used in the simple case where there is nothing to expand,
* and we just return the prefixed term (else Db::termMatch deals with it).
*/
void StringToXapianQ::expandTerm(int mods,
bool StringToXapianQ::expandTerm(string& ermsg, int mods,
const string& term,
vector<string>& oexp, string &sterm,
const string& prefix)
@ -689,7 +702,7 @@ void StringToXapianQ::expandTerm(int mods,
sterm.clear();
oexp.clear();
if (term.empty())
return;
return true;
bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
@ -753,7 +766,7 @@ void StringToXapianQ::expandTerm(int mods,
oexp.push_back(prefix + term);
m_hld.terms[term] = m_hld.uterms.size() - 1;
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
return;
return true;
}
// Make objects before the goto jungle to avoid compiler complaints
@ -770,7 +783,7 @@ void StringToXapianQ::expandTerm(int mods,
// expansion, which means that we are casediac-sensitive. There
// would be nothing to prevent us to expand from the casediac
// synonyms first. To be done later
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang,term,res,m_maxexp,m_field);
goto termmatchtoresult;
}
@ -778,14 +791,14 @@ void StringToXapianQ::expandTerm(int mods,
#ifdef RCL_INDEX_STRIPCHARS
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, m_maxexp, m_field);
#else
if (o_index_stripchars) {
// If the index is raw, we can only come here if nostemexp is unset
// and we just need stem expansion.
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang,term,res,m_maxexp,m_field);
goto termmatchtoresult;
}
@ -854,12 +867,17 @@ exptotermatch:
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
for (vector<string>::const_iterator it = lexp.begin();
it != lexp.end(); it++) {
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, -1, m_field);
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res,m_maxexp,m_field);
}
#endif
// Term match entries to vector of terms
termmatchtoresult:
if (int(res.entries.size()) >= m_maxexp) {
ermsg = "Maximum term expansion size exceeded."
" Maybe increase maxTermExpand.";
return false;
}
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
it != res.entries.end(); it++) {
oexp.push_back(it->term);
@ -876,6 +894,7 @@ termmatchtoresult:
m_hld.terms[strip_prefix(*it)] = term;
}
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
return true;
}
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -912,7 +931,7 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
}
}
void StringToXapianQ::processSimpleSpan(const string& span,
void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span,
int mods,
vector<Xapian::Query> &pqueries)
{
@ -927,7 +946,8 @@ void StringToXapianQ::processSimpleSpan(const string& span,
prefix = wrap_prefix(ftp->pfx);
}
expandTerm(mods, span, exp, sterm, prefix);
if (!expandTerm(ermsg, mods, span, exp, sterm, prefix))
return;
// Set up the highlight data. No prefix should go in there
for (vector<string>::const_iterator it = exp.begin();
@ -939,6 +959,7 @@ void StringToXapianQ::processSimpleSpan(const string& span,
// Push either term or OR of stem-expanded set
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
m_curcl += exp.size();
// If sterm (simplified original user term) is not null, give it a
// relevance boost. We do this even if no expansion occurred (else
@ -957,7 +978,7 @@ void StringToXapianQ::processSimpleSpan(const string& span,
// NEAR xapian query, the elements of which can themselves be OR
// queries if the terms get expanded by stemming or wildcards (we
// don't do stemming for PHRASE though)
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
int mods,
vector<Xapian::Query> &pqueries,
bool useNear, int slack)
@ -999,7 +1020,8 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
lmods |= SearchDataClause::SDCM_NOSTEMMING;
string sterm;
vector<string> exp;
expandTerm(lmods, *it, exp, sterm, prefix);
if (!expandTerm(ermsg, lmods, *it, exp, sterm, prefix))
return;
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
listVector("", exp);
// groups is used for highlighting, we don't want prefixes in there.
@ -1011,6 +1033,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
groups.push_back(noprefs);
orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
exp.begin(), exp.end()));
m_curcl += exp.size();
if (m_curcl >= m_maxcl)
return;
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
if (exp.size() > 1)
hadmultiple = true;
@ -1099,7 +1124,7 @@ bool StringToXapianQ::processUserString(const string &iq,
"slack %d near %d\n",
iq.c_str(), m_field.c_str(), mods, slack, useNear));
ermsg.erase();
m_curcl = 0;
const StopList stops = m_db.getStopList();
// Simple whitespace-split input into user-level words and
@ -1165,12 +1190,18 @@ bool StringToXapianQ::processUserString(const string &iq,
if (splitter.nostemexps.front())
lmods |= SearchDataClause::SDCM_NOSTEMMING;
m_hld.ugroups.push_back(vector<string>(1, *it));
processSimpleSpan(splitter.terms.front(), lmods, pqueries);
processSimpleSpan(ermsg,splitter.terms.front(),lmods, pqueries);
}
break;
default:
m_hld.ugroups.push_back(vector<string>(1, *it));
processPhraseOrNear(&splitter, mods, pqueries, useNear, slack);
processPhraseOrNear(ermsg, &splitter, mods, pqueries,
useNear, slack);
}
if (m_curcl >= m_maxcl) {
ermsg = "Maximum Xapian query size exceeded."
" Maybe increase maxXapianClauses.";
break;
}
}
} catch (const Xapian::Error &e) {
@ -1190,7 +1221,8 @@ bool StringToXapianQ::processUserString(const string &iq,
}
// Translate a simple OR, AND, or EXCL search clause.
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
int maxexp, int maxcl)
{
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
getStemLang().c_str()));
@ -1216,7 +1248,8 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
(m_parentSearch == 0 && !m_haveWildCards);
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
maxexp, maxcl);
if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
return false;
if (pqueries.empty()) {
@ -1240,13 +1273,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
// about expanding multiple fragments in the past. We just take the
// value blanks and all and expand this against the indexed unsplit
// file names
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
int maxexp, int)
{
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
vector<string> names;
db.filenameWildExp(m_text, names);
db.filenameWildExp(m_text, names, maxexp);
*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
if (m_weight != 1.0) {
@ -1256,7 +1290,8 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
}
// Translate NEAR or PHRASE clause.
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
int maxexp, int maxcl)
{
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
@ -1281,7 +1316,8 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
}
string s = cstr_dquote + m_text + cstr_dquote;
bool useNear = (m_tp == SCLT_NEAR);
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
maxexp, maxcl);
if (!tr.processUserString(s, getModifiers(), m_reason, pqueries,
m_slack, useNear))
return false;

View file

@ -89,8 +89,7 @@ public:
bool haveWildCards() {return m_haveWildCards;}
/** Translate to Xapian query. rcldb knows about the void* */
bool toNativeQuery(Rcl::Db &db, void *);
bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl);
/** We become the owner of cl and will delete it */
bool addClause(SearchDataClause *cl);
@ -175,7 +174,7 @@ private:
bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
bool clausesToQuery(Rcl::Db &db, SClType tp,
std::vector<SearchDataClause*>& query,
string& reason, void *d);
string& reason, void *d, int, int);
/* Copyconst and assignment private and forbidden */
SearchData(const SearchData &) {}
@ -192,7 +191,7 @@ public:
m_modifiers(SDCM_NONE), m_weight(1.0)
{}
virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
virtual bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl) = 0;
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
virtual std::string getReason() const {return m_reason;}
virtual void getTerms(HighlightData & hldata) const = 0;
@ -266,7 +265,7 @@ public:
}
/** Translate to Xapian query */
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
virtual void getTerms(HighlightData& hldata) const
{
@ -307,7 +306,7 @@ public:
{
}
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
};
/**
@ -326,7 +325,7 @@ public:
{
}
virtual bool toNativeQuery(Rcl::Db &, void *);
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
private:
int m_slack;
};
@ -338,9 +337,12 @@ public:
: SearchDataClause(tp), m_sub(sub)
{
}
virtual bool toNativeQuery(Rcl::Db &db, void *p)
virtual bool toNativeQuery(Rcl::Db &db, void *p, int maxexp, int maxcl)
{
return m_sub->toNativeQuery(db, p);
bool ret = m_sub->toNativeQuery(db, p, maxexp, maxcl);
if (!ret)
m_reason = m_sub->getReason();
return ret;
}
virtual void getTerms(HighlightData& hldata) const

View file

@ -103,6 +103,17 @@ indexstemminglanguages = english
# Actually, this seems a reasonable default for all until someone protests.
unac_except_trans = åå Åå ää Ää öö Öö üü Üü ßss œoe Œoe æae ÆAE fifi flfl
# Maximum expansion count for a single term (ie: when using wildcards).
# We used to not limit this at all (except for filenames where the limit
# was too low at 1000), but it is unreasonable with a big index.
# Default 10 000
maxTermExpand = 10000
# Maximum number of clauses we add to a single Xapian query. In some cases,
# the result of term expansion can be multiplicative, and we want to avoid
# eating all the memory. Default 100 000
maxXapianClauses = 100000
# Where to store the database (directory). This may be an absolute path,
# else it is taken as relative to the configuration directory (-c argument
# or $RECOLL_CONFDIR).
@ -132,18 +143,6 @@ filtersdir = @prefix@/share/recoll/filters
# want to change the icons displayed in the result list
iconsdir = @prefix@/share/recoll/images
# A list of characters, encoded in UTF-8, which should be handled specially
# when converting text to unaccented lowercase. For example, in Swedish,
# the letter a with diaeresis has full alphabet citizenship and should not
# be turned into an a. Each element in the space-separated list has the
# special character as first element and the translation following
# (multiple chars allowed. The handling of both the lowercase and
# upper-case versions of a character should be specified, as appartenance
# to the list will turn-off both standard accent and case
# processing. ** Changing the list implies a full reindex **
# Example for Swedish:
# unac_except_trans = åå Åå ää Ää öö Öö
# Should we use the system's 'file -i' command as a final step in file type
# identification ? This may be useful, but will usually cause the
# indexation of many bogus 'text' files