For case-sensitive indexes, wildcard expansion was not performed correctly for lowercase terms if no upped/mixed case version existed
This commit is contained in:
parent
5f057e61c6
commit
cb1e6a56be
3 changed files with 30 additions and 6 deletions
|
@ -81,8 +81,14 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
// Walk the list of all terms, and stem/unac each.
|
||||
string ermsg;
|
||||
try {
|
||||
for (Xapian::TermIterator it = wdb.allterms_begin();
|
||||
it != wdb.allterms_end(); it++) {
|
||||
Xapian::TermIterator it = wdb.allterms_begin();
|
||||
// We'd want to skip to the first non-prefixed term, but this is a bit
|
||||
// complicated, so we just jump over most of the prefixed term and then
|
||||
// skip the rest one by one.
|
||||
it.skip_to(wrap_prefix("Z"));
|
||||
for ( ;it != wdb.allterms_end(); it++) {
|
||||
if (has_prefix(*it))
|
||||
continue;
|
||||
|
||||
// Detect and skip CJK terms.
|
||||
Utf8Iter utfit(*it);
|
||||
|
|
|
@ -129,6 +129,17 @@ static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
|||
it->term.insert(0, prefix);
|
||||
}
|
||||
|
||||
static const char *tmtptostr(int typ)
|
||||
{
|
||||
switch (typ) {
|
||||
case Db::ET_WILD: return "wildcard";
|
||||
case Db::ET_REGEXP: return "regexp";
|
||||
case Db::ET_STEM: return "stem";
|
||||
case Db::ET_NONE:
|
||||
default: return "none";
|
||||
}
|
||||
}
|
||||
|
||||
// Find all index terms that match an input along different expansion modes:
|
||||
// wildcard, regular expression, or stemming. Depending on flags we perform
|
||||
// case and/or diacritics expansion (this can be the only thing requested).
|
||||
|
@ -157,9 +168,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
stripped = o_index_stripchars;
|
||||
#endif
|
||||
|
||||
LOGDEB(("Db::TermMatch: typ %d diacsens %d casesens %d lang [%s] term [%s] "
|
||||
LOGDEB(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s] "
|
||||
"max %d field [%s] stripped %d\n",
|
||||
matchtyp, diac_sensitive, case_sensitive, lang.c_str(),
|
||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||
_term.c_str(), max, field.c_str(), stripped));
|
||||
|
||||
// If index is stripped, no case or diac expansion can be needed:
|
||||
|
@ -213,6 +224,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
it != exp.end(); it++) {
|
||||
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
||||
}
|
||||
// And also expand the original expresionn against the
|
||||
// main index: for the common case where the expression
|
||||
// had no case/diac expansion (no entry in the exp db if
|
||||
// the original term is lowercase and without accents).
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
} else {
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
}
|
||||
|
|
|
@ -203,7 +203,7 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp,
|
|||
string::size_type es = inexp->baseprefixlen();
|
||||
string is = inexp->exp().substr(0, es);
|
||||
string::size_type preflen = m_prefix.size();
|
||||
LOGDEB2(("XapCompSynFam::is: [%s]\n", is.c_str()));
|
||||
LOGDEB2(("XapCompSynFam::synKeyExpand: init section: [%s]\n", is.c_str()));
|
||||
|
||||
string ermsg;
|
||||
try {
|
||||
|
@ -246,9 +246,11 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp,
|
|||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
LOGERR(("XapCompSynFam::keyWildExpand: xapian: [%s]\n", ermsg.c_str()));
|
||||
LOGERR(("XapCompSynFam::synKeyExpand: xapian: [%s]\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
LOGDEB1(("XapCompSynFam::synKeyExpand: final: [%s]\n",
|
||||
stringsToString(result).c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue