Snippet generation: limit positions walk to max hit position. Return status code when truncated walk possibly generated incomplete snippets. Implement config variabl for max pos walk
This commit is contained in:
parent
46b7f87e51
commit
97bc58201b
5 changed files with 32 additions and 15 deletions
|
@ -342,6 +342,10 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||
// them with their snippets.
|
||||
unordered_set<unsigned int> searchTermPositions;
|
||||
|
||||
// Remember max position. Used to stop walking positions lists while
|
||||
// populating the adjacent slots.
|
||||
unsigned int maxpos = 0;
|
||||
|
||||
// Total number of occurences for all terms. We stop when we have too much
|
||||
unsigned int totaloccs = 0;
|
||||
|
||||
|
@ -419,6 +423,8 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||
if (ii == (unsigned int)ipos) {
|
||||
sparseDoc[ii] = qterm;
|
||||
searchTermPositions.insert(ii);
|
||||
if (ii > maxpos)
|
||||
maxpos = ii;
|
||||
} else if (ii > (unsigned int)ipos &&
|
||||
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
||||
sparseDoc[ii] = occupiedmarker;
|
||||
|
@ -460,6 +466,7 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||
}
|
||||
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
|
||||
chron.millis(), totaloccs));
|
||||
maxpos += ctxwords + 1;
|
||||
|
||||
// This can happen if there are term occurences in the keywords
|
||||
// etc. but not elsewhere ?
|
||||
|
@ -475,28 +482,34 @@ abstract_result Query::Native::makeAbstract(Xapian::docid docid,
|
|||
// which is bad.
|
||||
{
|
||||
Xapian::TermIterator term;
|
||||
int cutoff = 500 * 1000;
|
||||
|
||||
int cutoff = m_q->m_snipMaxPosWalk;
|
||||
for (term = xrdb.termlist_begin(docid);
|
||||
term != xrdb.termlist_end(docid); term++) {
|
||||
// Ignore prefixed terms
|
||||
if (has_prefix(*term))
|
||||
continue;
|
||||
if (cutoff-- < 0) {
|
||||
ret = ABSRES_TRUNC;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||
ret = ABSRES_TERMMISS;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||
m_q->m_snipMaxPosWalk));
|
||||
break;
|
||||
}
|
||||
|
||||
map<unsigned int, string>::iterator vit;
|
||||
Xapian::PositionIterator pos;
|
||||
for (pos = xrdb.positionlist_begin(docid, *term);
|
||||
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
||||
if (cutoff-- < 0) {
|
||||
ret = ABSRES_TRUNC;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff\n"));
|
||||
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
||||
ret = ABSRES_TERMMISS;
|
||||
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
||||
m_q->m_snipMaxPosWalk));
|
||||
break;
|
||||
}
|
||||
// If we are beyond the max possible position, stop
|
||||
// for this term
|
||||
if (*pos > maxpos) {
|
||||
break;
|
||||
}
|
||||
map<unsigned int, string>::iterator vit;
|
||||
if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) {
|
||||
// Don't replace a term: the terms list is in
|
||||
// alphabetic order, and we may have several terms
|
||||
|
|
|
@ -1618,7 +1618,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
|||
case 0: is = prefix; break;
|
||||
default: is = prefix + droot.substr(0, es); break;
|
||||
}
|
||||
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
LOGDEB1(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
|
|
|
@ -141,8 +141,10 @@ private:
|
|||
|
||||
Query::Query(Db *db)
|
||||
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
||||
m_collapseDuplicates(false), m_resCnt(-1)
|
||||
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
||||
{
|
||||
if (db)
|
||||
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
||||
}
|
||||
|
||||
Query::~Query()
|
||||
|
|
|
@ -32,7 +32,8 @@ class Doc;
|
|||
enum abstract_result {
|
||||
ABSRES_ERROR = 0,
|
||||
ABSRES_OK = 1,
|
||||
ABSRES_TRUNC = 2
|
||||
ABSRES_TRUNC = 2,
|
||||
ABSRES_TERMMISS = 3
|
||||
};
|
||||
|
||||
// Snippet entry for makeDocAbstract
|
||||
|
@ -126,6 +127,7 @@ private:
|
|||
bool m_collapseDuplicates;
|
||||
int m_resCnt;
|
||||
RefCntr<SearchData> m_sd;
|
||||
int m_snipMaxPosWalk;
|
||||
|
||||
/* Copyconst and assignement private and forbidden */
|
||||
Query(const Query &) {}
|
||||
|
|
|
@ -598,7 +598,7 @@ public:
|
|||
if (m_ts->lastpos < pos)
|
||||
m_ts->lastpos = pos;
|
||||
bool noexpand = be ? m_ts->curnostemexp : true;
|
||||
LOGDEB(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n",
|
||||
LOGDEB1(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n",
|
||||
term.c_str(), pos, noexpand));
|
||||
if (m_terms[pos].size() < term.size()) {
|
||||
m_terms[pos] = term;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue