diff --git a/src/rcldb/expansiondbs.cpp b/src/rcldb/expansiondbs.cpp index 35e35773..742f9b67 100644 --- a/src/rcldb/expansiondbs.cpp +++ b/src/rcldb/expansiondbs.cpp @@ -81,8 +81,14 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, // Walk the list of all terms, and stem/unac each. string ermsg; try { - for (Xapian::TermIterator it = wdb.allterms_begin(); - it != wdb.allterms_end(); it++) { + Xapian::TermIterator it = wdb.allterms_begin(); + // We'd want to skip to the first non-prefixed term, but this is a bit + // complicated, so we just jump over most of the prefixed term and then + // skip the rest one by one. + it.skip_to(wrap_prefix("Z")); + for ( ;it != wdb.allterms_end(); it++) { + if (has_prefix(*it)) + continue; // Detect and skip CJK terms. Utf8Iter utfit(*it); diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp index 3eb84ab8..8d380efb 100644 --- a/src/rcldb/rclterms.cpp +++ b/src/rcldb/rclterms.cpp @@ -129,6 +129,17 @@ static void addPrefix(vector& terms, const string& prefix) it->term.insert(0, prefix); } +static const char *tmtptostr(int typ) +{ + switch (typ) { + case Db::ET_WILD: return "wildcard"; + case Db::ET_REGEXP: return "regexp"; + case Db::ET_STEM: return "stem"; + case Db::ET_NONE: + default: return "none"; + } +} + // Find all index terms that match an input along different expansion modes: // wildcard, regular expression, or stemming. Depending on flags we perform // case and/or diacritics expansion (this can be the only thing requested). @@ -157,9 +168,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, stripped = o_index_stripchars; #endif - LOGDEB(("Db::TermMatch: typ %d diacsens %d casesens %d lang [%s] term [%s] " + LOGDEB(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s] " "max %d field [%s] stripped %d\n", - matchtyp, diac_sensitive, case_sensitive, lang.c_str(), + tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), _term.c_str(), max, field.c_str(), stripped)); // If index is stripped, no case or diac expansion can be needed: @@ -213,6 +224,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, it != exp.end(); it++) { idxTermMatch(ET_NONE, "", *it, res, max, field); } + // And also expand the original expresionn against the + // main index: for the common case where the expression + // had no case/diac expansion (no entry in the exp db if + // the original term is lowercase and without accents). + idxTermMatch(typ_sens, lang, term, res, max, field); } else { idxTermMatch(typ_sens, lang, term, res, max, field); } diff --git a/src/rcldb/synfamily.cpp b/src/rcldb/synfamily.cpp index 3f7cf09c..1cf079fd 100644 --- a/src/rcldb/synfamily.cpp +++ b/src/rcldb/synfamily.cpp @@ -203,7 +203,7 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp, string::size_type es = inexp->baseprefixlen(); string is = inexp->exp().substr(0, es); string::size_type preflen = m_prefix.size(); - LOGDEB2(("XapCompSynFam::is: [%s]\n", is.c_str())); + LOGDEB2(("XapCompSynFam::synKeyExpand: init section: [%s]\n", is.c_str())); string ermsg; try { @@ -246,9 +246,11 @@ bool XapComputableSynFamMember::synKeyExpand(StrMatcher* inexp, } } XCATCHERROR(ermsg); if (!ermsg.empty()) { - LOGERR(("XapCompSynFam::keyWildExpand: xapian: [%s]\n", ermsg.c_str())); + LOGERR(("XapCompSynFam::synKeyExpand: xapian: [%s]\n", ermsg.c_str())); return false; } + LOGDEB1(("XapCompSynFam::synKeyExpand: final: [%s]\n", + stringsToString(result).c_str())); return true; }