For case-sensitive indexes, wildcard expansion was not performed correctly for lowercase terms if no upped/mixed case version existed

This commit is contained in:
Jean-Francois Dockes 2013-02-16 16:44:14 +01:00
parent 5f057e61c6
commit cb1e6a56be
3 changed files with 30 additions and 6 deletions

View file

@ -81,8 +81,14 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// Walk the list of all terms, and stem/unac each. // Walk the list of all terms, and stem/unac each.
string ermsg; string ermsg;
try { try {
for (Xapian::TermIterator it = wdb.allterms_begin(); Xapian::TermIterator it = wdb.allterms_begin();
it != wdb.allterms_end(); it++) { // We'd want to skip to the first non-prefixed term, but this is a bit
// complicated, so we just jump over most of the prefixed term and then
// skip the rest one by one.
it.skip_to(wrap_prefix("Z"));
for ( ;it != wdb.allterms_end(); it++) {
if (has_prefix(*it))
continue;
// Detect and skip CJK terms. // Detect and skip CJK terms.
Utf8Iter utfit(*it); Utf8Iter utfit(*it);

View file

@ -129,6 +129,17 @@ static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
it->term.insert(0, prefix); it->term.insert(0, prefix);
} }
static const char *tmtptostr(int typ)
{
switch (typ) {
case Db::ET_WILD: return "wildcard";
case Db::ET_REGEXP: return "regexp";
case Db::ET_STEM: return "stem";
case Db::ET_NONE:
default: return "none";
}
}
// Find all index terms that match an input along different expansion modes: // Find all index terms that match an input along different expansion modes:
// wildcard, regular expression, or stemming. Depending on flags we perform // wildcard, regular expression, or stemming. Depending on flags we perform
// case and/or diacritics expansion (this can be the only thing requested). // case and/or diacritics expansion (this can be the only thing requested).
@ -157,9 +168,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
stripped = o_index_stripchars; stripped = o_index_stripchars;
#endif #endif
LOGDEB(("Db::TermMatch: typ %d diacsens %d casesens %d lang [%s] term [%s] " LOGDEB(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s] "
"max %d field [%s] stripped %d\n", "max %d field [%s] stripped %d\n",
matchtyp, diac_sensitive, case_sensitive, lang.c_str(), tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
_term.c_str(), max, field.c_str(), stripped)); _term.c_str(), max, field.c_str(), stripped));
// If index is stripped, no case or diac expansion can be needed: // If index is stripped, no case or diac expansion can be needed:
@ -213,6 +224,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
it != exp.end(); it++) { it != exp.end(); it++) {
idxTermMatch(ET_NONE, "", *it, res, max, field); idxTermMatch(ET_NONE, "", *it, res, max, field);
} }
// And also expand the original expresionn against the
// main index: for the common case where the expression
// had no case/diac expansion (no entry in the exp db if
// the original term is lowercase and without accents).
idxTermMatch(typ_sens, lang, term, res, max, field);
} else { } else {
idxTermMatch(typ_sens, lang, term, res, max, field); idxTermMatch(typ_sens, lang, term, res, max, field);
} }

View file

@ -203,7 +203,7 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp,
string::size_type es = inexp->baseprefixlen(); string::size_type es = inexp->baseprefixlen();
string is = inexp->exp().substr(0, es); string is = inexp->exp().substr(0, es);
string::size_type preflen = m_prefix.size(); string::size_type preflen = m_prefix.size();
LOGDEB2(("XapCompSynFam::is: [%s]\n", is.c_str())); LOGDEB2(("XapCompSynFam::synKeyExpand: init section: [%s]\n", is.c_str()));
string ermsg; string ermsg;
try { try {
@ -246,9 +246,11 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp,
} }
} XCATCHERROR(ermsg); } XCATCHERROR(ermsg);
if (!ermsg.empty()) { if (!ermsg.empty()) {
LOGERR(("XapCompSynFam::keyWildExpand: xapian: [%s]\n", ermsg.c_str())); LOGERR(("XapCompSynFam::synKeyExpand: xapian: [%s]\n", ermsg.c_str()));
return false; return false;
} }
LOGDEB1(("XapCompSynFam::synKeyExpand: final: [%s]\n",
stringsToString(result).c_str()));
return true; return true;
} }