diff --git a/src/aspell/rclaspell.cpp b/src/aspell/rclaspell.cpp index d2df6781..1db433f7 100644 --- a/src/aspell/rclaspell.cpp +++ b/src/aspell/rclaspell.cpp @@ -263,14 +263,12 @@ public: LOGDEB2(("Aspell::buildDict: SKIP\n")); continue; } -#ifndef RCL_INDEX_STRIPCHARS if (!o_index_stripchars) { string lower; if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD)) continue; m_input->swap(lower); } -#endif // Got a non-empty sort-of appropriate term, let's send it to // aspell LOGDEB2(("Apell::buildDict: SEND\n")); @@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason) if (iterm.empty()) return true; //?? -#ifndef RCL_INDEX_STRIPCHARS if (!o_index_stripchars) { string lower; if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) { @@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason) } mterm.swap(lower); } -#endif int ret = aapi.aspell_speller_check(m_data->m_speller, mterm.c_str(), mterm.length()); @@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term, if (mterm.empty()) return true; //?? -#ifndef RCL_INDEX_STRIPCHARS if (!o_index_stripchars) { string lower; if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) { @@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term, } mterm.swap(lower); } -#endif AspellCanHaveError *ret; diff --git a/src/common/autoconfig.h.in b/src/common/autoconfig.h.in index aaca9445..eb3dd173 100644 --- a/src/common/autoconfig.h.in +++ b/src/common/autoconfig.h.in @@ -99,9 +99,6 @@ /* Use multiple threads for indexing */ #undef IDX_THREADS -/* Remove case and accents from terms */ -#undef RCL_INDEX_STRIPCHARS - /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index c1c50ec9..ece3add9 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -51,10 +51,10 @@ using namespace std; // Static, logically const, RclConfig members are initialized once from the // first object build during process initialization. -#ifndef RCL_INDEX_STRIPCHARS + // We default to a case- and diacritics-less index for now bool o_index_stripchars = true; -#endif + string RclConfig::o_localecharset; bool ParamStale::needrecompute() @@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf) } else { const char *cp = getenv("RECOLL_CONFDIR"); if (cp) { - m_confdir = cp; + m_confdir = path_canon(cp); } else { autoconfdir = true; m_confdir = path_cat(path_home(), ".recoll/"); @@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig() FsTreeWalker::setNoFnmPathname(); } -#ifndef RCL_INDEX_STRIPCHARS static int m_index_stripchars_init = 0; if (!m_index_stripchars_init) { getConfParam("indexStripChars", &o_index_stripchars); m_index_stripchars_init = 1; } -#endif return true; } diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 750f2c87..b914f77a 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -319,9 +319,8 @@ class RclConfig { // stripped of accents and case or a raw one. Ideally, it should be // constant, but it needs to be initialized from the configuration, so // there is no way to do this. It never changes after initialization -// of course. When set, it is supposed to get all of recoll to behave like if -// if was compiled with RCL_INDEX_STRIPCHARS -#ifndef RCL_INDEX_STRIPCHARS +// of course. Changing the value on a given index imposes a +// reset. When using multiple indexes, all must have the same value extern bool o_index_stripchars; -#endif + #endif /* _RCLCONFIG_H_INCLUDED_ */ diff --git a/src/configure b/src/configure index 853c8497..ff0148c6 100755 --- a/src/configure +++ b/src/configure @@ -717,7 +717,6 @@ with_fam enable_xattr enable_idxthreads enable_camelcase -enable_stripchars enable_python_module enable_pic enable_qtgui @@ -1367,9 +1366,6 @@ Optional Features: manual" and "my sql manual" are the same, but not the same as "mysql manual" (in phrases only and you could raise the phrase slack to get a match). - --enable-stripchars Remove diacritics and fold character case in indexed - terms. This will yield less precise searches but the - index will be smaller --disable-python-module Do not build the Python module. --disable-pic Do not compile library objects as position independant code. This is incompatible with the php @@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h fi -# Not by default as these are little used for now. -# Check whether --enable-stripchars was given. -if test "${enable_stripchars+set}" = set; then : - enableval=$enable_stripchars; stripcharsEnabled=$enableval -else - stripcharsEnabled=no -fi - - -if test X$stripcharsEnabled = Xyes ; then - -$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h - -fi - # Disable building the python module. This is built by default, because # it's really the easiest way to interface and extend recoll. It forces PIC # objects for everything (indexing performance impact: 1%), because it's diff --git a/src/configure.ac b/src/configure.ac index 0dff1ac9..b322e1a1 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words]) fi -# Not by default as these are little used for now. -AC_ARG_ENABLE(stripchars, - AC_HELP_STRING([--enable-stripchars], - [Remove diacritics and fold character case in indexed terms. This will - yield less precise searches but the index will be smaller]), - stripcharsEnabled=$enableval, stripcharsEnabled=no) - -if test X$stripcharsEnabled = Xyes ; then - AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms]) -fi - # Disable building the python module. This is built by default, because # it's really the easiest way to interface and extend recoll. It forces PIC # objects for everything (indexing performance impact: 1%), because it's diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp index 8f80e814..74dd88d9 100644 --- a/src/qtgui/confgui/confguiindex.cpp +++ b/src/qtgui/confgui/confguiindex.cpp @@ -164,34 +164,32 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config) vboxLayout->setSpacing(spacing); vboxLayout->setMargin(margin); -#ifndef RCL_INDEX_STRIPCHARS if (!o_index_stripchars) { - ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens")); - ConfParamBoolW* cp1 = - new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"), - tr("

Automatically trigger diacritics sensitivity " - "if the search term has accented characters " - "(not in unac_except_trans). Else you need to " - "use the query language and the D " - "modifier to specify " - "diacritics sensitivity." - )); - vboxLayout->addWidget(cp1); + ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens")); + ConfParamBoolW* cp1 = + new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"), + tr("

Automatically trigger diacritics sensitivity " + "if the search term has accented characters " + "(not in unac_except_trans). Else you need to " + "use the query language and the D " + "modifier to specify " + "diacritics sensitivity." + )); + vboxLayout->addWidget(cp1); - ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens")); - ConfParamBoolW* cp2 = - new ConfParamBoolW(this, lnk2, - tr("Automatic character case sensitivity"), - tr("

Automatically trigger character case " - "sensitivity if the entry has upper-case " - "characters in any but the first position. " - "Else you need to use the query language and " - "the C modifier to specify character-case " - "sensitivity." - )); - vboxLayout->addWidget(cp2); + ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens")); + ConfParamBoolW* cp2 = + new ConfParamBoolW(this, lnk2, + tr("Automatic character case sensitivity"), + tr("

Automatically trigger character case " + "sensitivity if the entry has upper-case " + "characters in any but the first position. " + "Else you need to use the query language and " + "the C modifier to specify character-case " + "sensitivity." + )); + vboxLayout->addWidget(cp2); } -#endif ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand")); ConfParamIntW* cp3 = diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp index fa51bd36..b5d3548a 100644 --- a/src/qtgui/spell_w.cpp +++ b/src/qtgui/spell_w.cpp @@ -119,13 +119,7 @@ void SpellW::init() resTW->setColumnWidth(1, 150); resTW->installEventFilter(this); - bool stripped = false; -#ifdef RCL_INDEX_STRIPCHARS - stripped = true; -#else - stripped = o_index_stripchars; -#endif - if (stripped) { + if (o_index_stripchars) { caseSensCB->setEnabled(false); caseSensCB->setEnabled(false); } diff --git a/src/query/plaintorich.cpp b/src/query/plaintorich.cpp index 76e6997b..9ea08fdc 100644 --- a/src/query/plaintorich.cpp +++ b/src/query/plaintorich.cpp @@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit { // (phrase or near), update positions list. virtual bool takeword(const std::string& term, int pos, int bts, int bte) { string dumb = term; -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#endif if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) { LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n", term.c_str())); return true; } -#ifndef RCL_INDEX_STRIPCHARS } -#endif //LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(), // pos, bts, bte)); diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp index ab3493c5..71e134df 100644 --- a/src/query/reslistpager.cpp +++ b/src/query/reslistpager.cpp @@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config) map > spellings; suggest(uterms, spellings); if (!spellings.empty()) { -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#else - if (true) { -#endif chunk << trans("

Alternate spellings (accents suppressed): ") << "

"; diff --git a/src/query/xadump.cpp b/src/query/xadump.cpp index dd64a9ef..6fad27ae 100644 --- a/src/query/xadump.cpp +++ b/src/query/xadump.cpp @@ -116,21 +116,15 @@ static void sigcleanup(int sig) exit(1); } -#ifndef RCL_INDEX_STRIPCHARS bool o_index_stripchars; -#endif inline bool has_prefix(const string& trm) { -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#endif return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z'; -#ifndef RCL_INDEX_STRIPCHARS } else { return trm.size() > 0 && trm[0] == ':'; } -#endif } int main(int argc, char **argv) @@ -212,7 +206,6 @@ int main(int argc, char **argv) cout << "DB: ndocs " << db->get_doccount() << " lastdocid " << db->get_lastdocid() << " avglength " << db->get_avlength() << endl; -#ifndef RCL_INDEX_STRIPCHARS // If we have terms with a leading ':' it's a new style, // unstripped index { @@ -223,7 +216,6 @@ int main(int argc, char **argv) o_index_stripchars = false; cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")< unacstemdbs; // We can reuse the same stemmer pointers, the objects are stateless. @@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, diacasedb(wdb, synFamDiCa, "all", &transunac); if (!o_index_stripchars) diacasedb.recreate(); -#endif // Walk the list of all terms, and stem/unac each. string ermsg; @@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, } string lower = *it; -#ifndef RCL_INDEX_STRIPCHARS // If the index is raw, compute the case-folded term which // is the input to the stem db, and add a synonym from the // stripped term to the cased and accented one, for accent @@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD); diacasedb.addSynonym(*it); } -#endif // Dont' apply stemming to terms which don't look like // natural language words. @@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, stemdbs[i].addSynonym(lower); } -#ifndef RCL_INDEX_STRIPCHARS // For a raw index, also maybe create a stem expansion for // the unaccented term. While this may be incorrect, it is // also necessary for searching in a diacritic-unsensitive @@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb, } } } -#endif } } XCATCHERROR(ermsg); if (!ermsg.empty()) { diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 65ac5251..2d5804b5 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -76,15 +76,9 @@ const string parent_prefix("F"); // Special terms to mark begin/end of field (for anchored searches), and // page breaks -#ifdef RCL_INDEX_STRIPCHARS -const string start_of_field_term = "XXST"; -const string end_of_field_term = "XXND"; -static const string page_break_term = "XXPG"; -#else string start_of_field_term; string end_of_field_term; const string page_break_term = "XXPG/"; -#endif // Field name for the unsplit file name. Has to exist in the field file // because of usage in termmatch() @@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp) m_flushMb(-1), m_maxFsOccupPc(0) { m_config = new RclConfig(*cfp); -#ifndef RCL_INDEX_STRIPCHARS if (start_of_field_term.empty()) { if (o_index_stripchars) { start_of_field_term = "XXST"; @@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp) end_of_field_term = "XXND/"; } } -#endif m_ndb = new Native(this); if (m_config) { @@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error) m_reason = "Null configuration or Xapian Db"; return false; } - LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, - m_ndb->m_iswritable)); + LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen, + m_ndb->m_iswritable, mode)); if (m_ndb->m_isopen) { // We used to return an error here but I see no reason to @@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term) return -1; string term = _term; -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) -#endif if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str())); return 0; @@ -851,13 +841,11 @@ string Db::getSpellingSuggestion(const string& word) string term = word; -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) -#endif - if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) { - LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str())); - return string(); - } + if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) { + LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str())); + return string(); + } if (!isSpellingCandidate(term)) return string(); @@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) //TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon; TermProcPrep tpprep(nxt); -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) -#endif nxt = &tpprep; TextSplitDb splitter(newdocument, nxt); diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 91863d50..06e87531 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -133,15 +133,11 @@ public: inline bool has_prefix(const string& trm) { -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#endif return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z'; -#ifndef RCL_INDEX_STRIPCHARS } else { return !trm.empty() && trm[0] == ':'; } -#endif } inline string strip_prefix(const string& trm) @@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm) if (trm.empty()) return trm; string::size_type st = 0; -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#endif st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ"); if (st == string::npos) return string(); -#ifndef RCL_INDEX_STRIPCHARS } else { if (has_prefix(trm)) { st = trm.find_last_of(":") + 1; @@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm) return trm; } } -#endif return trm.substr(st); } inline string wrap_prefix(const string& pfx) { -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) { -#endif return pfx; -#ifndef RCL_INDEX_STRIPCHARS } else { return cstr_colon + pfx + cstr_colon; } -#endif } /** @@ -462,13 +450,9 @@ extern const string udi_prefix; extern const string parent_prefix; extern const string mimetype_prefix; extern const string unsplitFilenameFieldName; -#ifdef RCL_INDEX_STRIPCHARS -extern const string start_of_field_term; -extern const string end_of_field_term; -#else extern string start_of_field_term; extern string end_of_field_term; -#endif + } #endif /* _DB_H_INCLUDED_ */ diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp index 82d02fb6..786858de 100644 --- a/src/rcldb/rclterms.cpp +++ b/src/rcldb/rclterms.cpp @@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0; bool case_sensitive = (typ_sens & ET_CASESENS) != 0; - bool stripped = false; -#ifdef RCL_INDEX_STRIPCHARS - stripped = true; -#else - stripped = o_index_stripchars; -#endif - LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]" " max %d field [%s] stripped %d init res.size %u\n", tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), - _term.c_str(), max, field.c_str(), stripped, res.entries.size())); + _term.c_str(), max, field.c_str(), o_index_stripchars, + res.entries.size())); // If index is stripped, no case or diac expansion can be needed: // for the processing inside this routine, everything looks like // we're all-sensitive: no use of expansion db. // Also, convert input to lowercase and strip its accents. string term = _term; - if (stripped) { + if (o_index_stripchars) { diac_sensitive = case_sensitive = true; if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str())); @@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, } } -#ifndef RCL_INDEX_STRIPCHARS // The case/diac expansion db SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD); XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans); -#endif // RCL_INDEX_STRIPCHARS - if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) { -#ifdef RCL_INDEX_STRIPCHARS - idxTermMatch(typ_sens, lang, term, res, max, field); -#else RefCntr matcher; if (matchtyp == ET_WILD) { matcher = RefCntr(new StrWildMatcher(term)); @@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, idxTermMatch(typ_sens, lang, term, res, max, field); } -#endif // RCL_INDEX_STRIPCHARS - } else { // Expansion is STEM or NONE (which may still need case/diac exp) -#ifdef RCL_INDEX_STRIPCHARS - - idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field); - -#else vector lexp; if (diac_sensitive && case_sensitive) { // No case/diac expansion @@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term, idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field); } } -#endif TermMatchCmpByTerm tcmp; sort(res.entries.begin(), res.entries.end(), tcmp); @@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, tmtptostr(typ), lang.c_str(), root.c_str(), max, field.c_str(), res.entries.size())); -#ifndef RCL_INDEX_STRIPCHARS if (typ == ET_STEM) { LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n")); abort(); } -#endif Xapian::Database xdb = m_ndb->xrdb; @@ -346,109 +324,87 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, } res.prefix = prefix; -#ifdef RCL_INDEX_STRIPCHARS - if (typ == ET_STEM) { - vector exp; - StemDb db(m_ndb->xrdb); - if (!db.stemExpand(langs, term, exp)) - return false; - res.entries.insert(result.entries.end(), exp.begin(), exp.end()); - for (vector::iterator it = res.entries.begin(); - it != res.entries.end(); it++) { - XAPTRY(it->wcf = xdb.get_collection_freq(it->term); - it->docs = xdb.get_termfreq(it->term), - xdb, m_reason); - if (!m_reason.empty()) - return false; - LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str())); - } - if (!prefix.empty()) - addPrefix(res.entries, prefix); - } else -#endif - { - RefCntr matcher; - if (typ == ET_REGEXP) { - matcher = RefCntr(new StrRegexpMatcher(root)); - if (!matcher->ok()) { - LOGERR(("termMatch: regcomp failed: %s\n", - matcher->getreason().c_str())) - return false; - } - } else if (typ == ET_WILD) { - matcher = RefCntr(new StrWildMatcher(root)); + RefCntr matcher; + if (typ == ET_REGEXP) { + matcher = RefCntr(new StrRegexpMatcher(root)); + if (!matcher->ok()) { + LOGERR(("termMatch: regcomp failed: %s\n", + matcher->getreason().c_str())) + return false; } + } else if (typ == ET_WILD) { + matcher = RefCntr(new StrWildMatcher(root)); + } - // Find the initial section before any special char - string::size_type es = string::npos; - if (matcher.isNotNull()) { - es = matcher->baseprefixlen(); - } + // Find the initial section before any special char + string::size_type es = string::npos; + if (matcher.isNotNull()) { + es = matcher->baseprefixlen(); + } - // Initial section: the part of the prefix+expr before the - // first wildcard character. We only scan the part of the - // index where this matches - string is; - switch (es) { - case string::npos: is = prefix + root; break; - case 0: is = prefix; break; - default: is = prefix + root.substr(0, es); break; - } - LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str())); + // Initial section: the part of the prefix+expr before the + // first wildcard character. We only scan the part of the + // index where this matches + string is; + switch (es) { + case string::npos: is = prefix + root; break; + case 0: is = prefix; break; + default: is = prefix + root.substr(0, es); break; + } + LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str())); - for (int tries = 0; tries < 2; tries++) { - try { - Xapian::TermIterator it = xdb.allterms_begin(); - if (!is.empty()) - it.skip_to(is.c_str()); - for (int rcnt = 0; it != xdb.allterms_end(); it++) { - // If we're beyond the terms matching the initial - // section, end - if (!is.empty() && (*it).find(is) != 0) - break; + for (int tries = 0; tries < 2; tries++) { + try { + Xapian::TermIterator it = xdb.allterms_begin(); + if (!is.empty()) + it.skip_to(is.c_str()); + for (int rcnt = 0; it != xdb.allterms_end(); it++) { + // If we're beyond the terms matching the initial + // section, end + if (!is.empty() && (*it).find(is) != 0) + break; - // Else try to match the term. The matcher content - // is without prefix, so we remove this if any. We - // just checked that the index term did begin with - // the prefix. - string term; - if (!prefix.empty()) { - term = (*it).substr(prefix.length()); - } else { - if (has_prefix(*it)) { - continue; - } - term = *it; - } - - if (matcher.isNotNull() && !matcher->match(term)) + // Else try to match the term. The matcher content + // is without prefix, so we remove this if any. We + // just checked that the index term did begin with + // the prefix. + string term; + if (!prefix.empty()) { + term = (*it).substr(prefix.length()); + } else { + if (has_prefix(*it)) { continue; + } + term = *it; + } - res.entries.push_back( - TermMatchEntry(*it, xdb.get_collection_freq(*it), - it.get_termfreq())); + if (matcher.isNotNull() && !matcher->match(term)) + continue; - // The problem with truncating here is that this is done - // alphabetically and we may not keep the most frequent - // terms. OTOH, not doing it may stall the program if - // we are walking the whole term list. We compromise - // by cutting at 2*max - if (max > 0 && ++rcnt >= 2*max) - break; - } - m_reason.erase(); - break; - } catch (const Xapian::DatabaseModifiedError &e) { - m_reason = e.get_msg(); - xdb.reopen(); - continue; - } XCATCHERROR(m_reason); - break; - } - if (!m_reason.empty()) { - LOGERR(("termMatch: %s\n", m_reason.c_str())); - return false; - } + res.entries.push_back( + TermMatchEntry(*it, xdb.get_collection_freq(*it), + it.get_termfreq())); + + // The problem with truncating here is that this is done + // alphabetically and we may not keep the most frequent + // terms. OTOH, not doing it may stall the program if + // we are walking the whole term list. We compromise + // by cutting at 2*max + if (max > 0 && ++rcnt >= 2*max) + break; + } + m_reason.erase(); + break; + } catch (const Xapian::DatabaseModifiedError &e) { + m_reason = e.get_msg(); + xdb.reopen(); + continue; + } XCATCHERROR(m_reason); + break; + } + if (!m_reason.empty()) { + LOGERR(("termMatch: %s\n", m_reason.c_str())); + return false; } return true; diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index fb44f04e..7581fb19 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, int termmatchsens = 0; -#ifndef RCL_INDEX_STRIPCHARS bool diac_sensitive = (mods & SDCM_DIACSENS) != 0; bool case_sensitive = (mods & SDCM_CASESENS) != 0; @@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, termmatchsens |= Db::ET_CASESENS; if (diac_sensitive) termmatchsens |= Db::ET_DIACSENS; -#endif if (noexpansion) { oexp.push_back(prefix + term); @@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon; //tpcommon.onlygrams(true); TermProcPrep tpprep(nxt); -#ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) -#endif nxt = &tpprep; TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp index e1e0d304..1a5d40fe 100644 --- a/src/rcldb/stemdb.cpp +++ b/src/rcldb/stemdb.cpp @@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term, (void)expander.synExpand(term, result); } -#ifndef RCL_INDEX_STRIPCHARS if (!o_index_stripchars) { string unac; unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC); @@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term, (void)expander.synExpand(unac, result); } } -#endif if (result.empty()) result.push_back(term); diff --git a/src/rcldb/synfamily.h b/src/rcldb/synfamily.h index a6169b06..219eb0ee 100644 --- a/src/rcldb/synfamily.h +++ b/src/rcldb/synfamily.h @@ -212,7 +212,6 @@ private: // Lowercase accented stem to expansion. Family member name: language static const std::string synFamStem("Stm"); -#ifndef RCL_INDEX_STRIPCHARS // Lowercase unaccented stem to expansion. Family member name: language static const std::string synFamStemUnac("StU"); @@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU"); // member, named "all". This set is used for separate case/diac // expansion by post-filtering the results of dual expansion. static const std::string synFamDiCa("DCa"); -#endif // !RCL_INDEX_STRIPCHARS } // end namespace Rcl diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 44917de6..b2a8d4fc 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -580,7 +580,7 @@ int Pidfile::flopen() { const char *path = m_path.c_str(); if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) { - m_reason = "Open failed"; + m_reason = "Open failed: [" + m_path + "]: " + strerror(errno); return -1; }