diff --git a/src/aspell/rclaspell.cpp b/src/aspell/rclaspell.cpp
index d2df6781..1db433f7 100644
--- a/src/aspell/rclaspell.cpp
+++ b/src/aspell/rclaspell.cpp
@@ -263,14 +263,12 @@ public:
LOGDEB2(("Aspell::buildDict: SKIP\n"));
continue;
}
-#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
continue;
m_input->swap(lower);
}
-#endif
// Got a non-empty sort-of appropriate term, let's send it to
// aspell
LOGDEB2(("Apell::buildDict: SEND\n"));
@@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason)
if (iterm.empty())
return true; //??
-#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason)
}
mterm.swap(lower);
}
-#endif
int ret = aapi.aspell_speller_check(m_data->m_speller,
mterm.c_str(), mterm.length());
@@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
if (mterm.empty())
return true; //??
-#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
}
mterm.swap(lower);
}
-#endif
AspellCanHaveError *ret;
diff --git a/src/common/autoconfig.h.in b/src/common/autoconfig.h.in
index aaca9445..eb3dd173 100644
--- a/src/common/autoconfig.h.in
+++ b/src/common/autoconfig.h.in
@@ -99,9 +99,6 @@
/* Use multiple threads for indexing */
#undef IDX_THREADS
-/* Remove case and accents from terms */
-#undef RCL_INDEX_STRIPCHARS
-
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp
index c1c50ec9..ece3add9 100644
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@@ -51,10 +51,10 @@ using namespace std;
// Static, logically const, RclConfig members are initialized once from the
// first object build during process initialization.
-#ifndef RCL_INDEX_STRIPCHARS
+
// We default to a case- and diacritics-less index for now
bool o_index_stripchars = true;
-#endif
+
string RclConfig::o_localecharset;
bool ParamStale::needrecompute()
@@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf)
} else {
const char *cp = getenv("RECOLL_CONFDIR");
if (cp) {
- m_confdir = cp;
+ m_confdir = path_canon(cp);
} else {
autoconfdir = true;
m_confdir = path_cat(path_home(), ".recoll/");
@@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig()
FsTreeWalker::setNoFnmPathname();
}
-#ifndef RCL_INDEX_STRIPCHARS
static int m_index_stripchars_init = 0;
if (!m_index_stripchars_init) {
getConfParam("indexStripChars", &o_index_stripchars);
m_index_stripchars_init = 1;
}
-#endif
return true;
}
diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h
index 750f2c87..b914f77a 100644
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@@ -319,9 +319,8 @@ class RclConfig {
// stripped of accents and case or a raw one. Ideally, it should be
// constant, but it needs to be initialized from the configuration, so
// there is no way to do this. It never changes after initialization
-// of course. When set, it is supposed to get all of recoll to behave like if
-// if was compiled with RCL_INDEX_STRIPCHARS
-#ifndef RCL_INDEX_STRIPCHARS
+// of course. Changing the value on a given index imposes a
+// reset. When using multiple indexes, all must have the same value
extern bool o_index_stripchars;
-#endif
+
#endif /* _RCLCONFIG_H_INCLUDED_ */
diff --git a/src/configure b/src/configure
index 853c8497..ff0148c6 100755
--- a/src/configure
+++ b/src/configure
@@ -717,7 +717,6 @@ with_fam
enable_xattr
enable_idxthreads
enable_camelcase
-enable_stripchars
enable_python_module
enable_pic
enable_qtgui
@@ -1367,9 +1366,6 @@ Optional Features:
manual" and "my sql manual" are the same, but not
the same as "mysql manual" (in phrases only and you
could raise the phrase slack to get a match).
- --enable-stripchars Remove diacritics and fold character case in indexed
- terms. This will yield less precise searches but the
- index will be smaller
--disable-python-module Do not build the Python module.
--disable-pic Do not compile library objects as position
independant code. This is incompatible with the php
@@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
fi
-# Not by default as these are little used for now.
-# Check whether --enable-stripchars was given.
-if test "${enable_stripchars+set}" = set; then :
- enableval=$enable_stripchars; stripcharsEnabled=$enableval
-else
- stripcharsEnabled=no
-fi
-
-
-if test X$stripcharsEnabled = Xyes ; then
-
-$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
-
-fi
-
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's
diff --git a/src/configure.ac b/src/configure.ac
index 0dff1ac9..b322e1a1 100644
--- a/src/configure.ac
+++ b/src/configure.ac
@@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
fi
-# Not by default as these are little used for now.
-AC_ARG_ENABLE(stripchars,
- AC_HELP_STRING([--enable-stripchars],
- [Remove diacritics and fold character case in indexed terms. This will
- yield less precise searches but the index will be smaller]),
- stripcharsEnabled=$enableval, stripcharsEnabled=no)
-
-if test X$stripcharsEnabled = Xyes ; then
- AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
-fi
-
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's
diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp
index 8f80e814..74dd88d9 100644
--- a/src/qtgui/confgui/confguiindex.cpp
+++ b/src/qtgui/confgui/confguiindex.cpp
@@ -164,34 +164,32 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
vboxLayout->setSpacing(spacing);
vboxLayout->setMargin(margin);
-#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
- ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
- ConfParamBoolW* cp1 =
- new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"),
- tr("
Automatically trigger diacritics sensitivity "
- "if the search term has accented characters "
- "(not in unac_except_trans). Else you need to "
- "use the query language and the D "
- "modifier to specify "
- "diacritics sensitivity."
- ));
- vboxLayout->addWidget(cp1);
+ ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
+ ConfParamBoolW* cp1 =
+ new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"),
+ tr("
Automatically trigger diacritics sensitivity "
+ "if the search term has accented characters "
+ "(not in unac_except_trans). Else you need to "
+ "use the query language and the D "
+ "modifier to specify "
+ "diacritics sensitivity."
+ ));
+ vboxLayout->addWidget(cp1);
- ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens"));
- ConfParamBoolW* cp2 =
- new ConfParamBoolW(this, lnk2,
- tr("Automatic character case sensitivity"),
- tr("
Automatically trigger character case "
- "sensitivity if the entry has upper-case "
- "characters in any but the first position. "
- "Else you need to use the query language and "
- "the C modifier to specify character-case "
- "sensitivity."
- ));
- vboxLayout->addWidget(cp2);
+ ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens"));
+ ConfParamBoolW* cp2 =
+ new ConfParamBoolW(this, lnk2,
+ tr("Automatic character case sensitivity"),
+ tr("
Automatically trigger character case "
+ "sensitivity if the entry has upper-case "
+ "characters in any but the first position. "
+ "Else you need to use the query language and "
+ "the C modifier to specify character-case "
+ "sensitivity."
+ ));
+ vboxLayout->addWidget(cp2);
}
-#endif
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
ConfParamIntW* cp3 =
diff --git a/src/qtgui/spell_w.cpp b/src/qtgui/spell_w.cpp
index fa51bd36..b5d3548a 100644
--- a/src/qtgui/spell_w.cpp
+++ b/src/qtgui/spell_w.cpp
@@ -119,13 +119,7 @@ void SpellW::init()
resTW->setColumnWidth(1, 150);
resTW->installEventFilter(this);
- bool stripped = false;
-#ifdef RCL_INDEX_STRIPCHARS
- stripped = true;
-#else
- stripped = o_index_stripchars;
-#endif
- if (stripped) {
+ if (o_index_stripchars) {
caseSensCB->setEnabled(false);
caseSensCB->setEnabled(false);
}
diff --git a/src/query/plaintorich.cpp b/src/query/plaintorich.cpp
index 76e6997b..9ea08fdc 100644
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit {
// (phrase or near), update positions list.
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
string dumb = term;
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#endif
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
term.c_str()));
return true;
}
-#ifndef RCL_INDEX_STRIPCHARS
}
-#endif
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
// pos, bts, bte));
diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp
index ab3493c5..71e134df 100644
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config)
map > spellings;
suggest(uterms, spellings);
if (!spellings.empty()) {
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#else
- if (true) {
-#endif
chunk <<
trans("Alternate spellings (accents suppressed): ")
<< "
";
diff --git a/src/query/xadump.cpp b/src/query/xadump.cpp
index dd64a9ef..6fad27ae 100644
--- a/src/query/xadump.cpp
+++ b/src/query/xadump.cpp
@@ -116,21 +116,15 @@ static void sigcleanup(int sig)
exit(1);
}
-#ifndef RCL_INDEX_STRIPCHARS
bool o_index_stripchars;
-#endif
inline bool has_prefix(const string& trm)
{
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#endif
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
-#ifndef RCL_INDEX_STRIPCHARS
} else {
return trm.size() > 0 && trm[0] == ':';
}
-#endif
}
int main(int argc, char **argv)
@@ -212,7 +206,6 @@ int main(int argc, char **argv)
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
-#ifndef RCL_INDEX_STRIPCHARS
// If we have terms with a leading ':' it's a new style,
// unstripped index
{
@@ -223,7 +216,6 @@ int main(int argc, char **argv)
o_index_stripchars = false;
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")< unacstemdbs;
// We can reuse the same stemmer pointers, the objects are stateless.
@@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
diacasedb(wdb, synFamDiCa, "all", &transunac);
if (!o_index_stripchars)
diacasedb.recreate();
-#endif
// Walk the list of all terms, and stem/unac each.
string ermsg;
@@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
}
string lower = *it;
-#ifndef RCL_INDEX_STRIPCHARS
// If the index is raw, compute the case-folded term which
// is the input to the stem db, and add a synonym from the
// stripped term to the cased and accented one, for accent
@@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
diacasedb.addSynonym(*it);
}
-#endif
// Dont' apply stemming to terms which don't look like
// natural language words.
@@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
stemdbs[i].addSynonym(lower);
}
-#ifndef RCL_INDEX_STRIPCHARS
// For a raw index, also maybe create a stem expansion for
// the unaccented term. While this may be incorrect, it is
// also necessary for searching in a diacritic-unsensitive
@@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
}
}
}
-#endif
}
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index 65ac5251..2d5804b5 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -76,15 +76,9 @@ const string parent_prefix("F");
// Special terms to mark begin/end of field (for anchored searches), and
// page breaks
-#ifdef RCL_INDEX_STRIPCHARS
-const string start_of_field_term = "XXST";
-const string end_of_field_term = "XXND";
-static const string page_break_term = "XXPG";
-#else
string start_of_field_term;
string end_of_field_term;
const string page_break_term = "XXPG/";
-#endif
// Field name for the unsplit file name. Has to exist in the field file
// because of usage in termmatch()
@@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp)
m_flushMb(-1), m_maxFsOccupPc(0)
{
m_config = new RclConfig(*cfp);
-#ifndef RCL_INDEX_STRIPCHARS
if (start_of_field_term.empty()) {
if (o_index_stripchars) {
start_of_field_term = "XXST";
@@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp)
end_of_field_term = "XXND/";
}
}
-#endif
m_ndb = new Native(this);
if (m_config) {
@@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error)
m_reason = "Null configuration or Xapian Db";
return false;
}
- LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
- m_ndb->m_iswritable));
+ LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
+ m_ndb->m_iswritable, mode));
if (m_ndb->m_isopen) {
// We used to return an error here but I see no reason to
@@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term)
return -1;
string term = _term;
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
-#endif
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
return 0;
@@ -851,13 +841,11 @@ string Db::getSpellingSuggestion(const string& word)
string term = word;
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
-#endif
- if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
- LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
- return string();
- }
+ if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
+ LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
+ return string();
+ }
if (!isSpellingCandidate(term))
return string();
@@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
TermProcPrep tpprep(nxt);
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
-#endif
nxt = &tpprep;
TextSplitDb splitter(newdocument, nxt);
diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h
index 91863d50..06e87531 100644
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@@ -133,15 +133,11 @@ public:
inline bool has_prefix(const string& trm)
{
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#endif
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
-#ifndef RCL_INDEX_STRIPCHARS
} else {
return !trm.empty() && trm[0] == ':';
}
-#endif
}
inline string strip_prefix(const string& trm)
@@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm)
if (trm.empty())
return trm;
string::size_type st = 0;
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#endif
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
if (st == string::npos)
return string();
-#ifndef RCL_INDEX_STRIPCHARS
} else {
if (has_prefix(trm)) {
st = trm.find_last_of(":") + 1;
@@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm)
return trm;
}
}
-#endif
return trm.substr(st);
}
inline string wrap_prefix(const string& pfx)
{
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
-#endif
return pfx;
-#ifndef RCL_INDEX_STRIPCHARS
} else {
return cstr_colon + pfx + cstr_colon;
}
-#endif
}
/**
@@ -462,13 +450,9 @@ extern const string udi_prefix;
extern const string parent_prefix;
extern const string mimetype_prefix;
extern const string unsplitFilenameFieldName;
-#ifdef RCL_INDEX_STRIPCHARS
-extern const string start_of_field_term;
-extern const string end_of_field_term;
-#else
extern string start_of_field_term;
extern string end_of_field_term;
-#endif
+
}
#endif /* _DB_H_INCLUDED_ */
diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp
index 82d02fb6..786858de 100644
--- a/src/rcldb/rclterms.cpp
+++ b/src/rcldb/rclterms.cpp
@@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
- bool stripped = false;
-#ifdef RCL_INDEX_STRIPCHARS
- stripped = true;
-#else
- stripped = o_index_stripchars;
-#endif
-
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
" max %d field [%s] stripped %d init res.size %u\n",
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
- _term.c_str(), max, field.c_str(), stripped, res.entries.size()));
+ _term.c_str(), max, field.c_str(), o_index_stripchars,
+ res.entries.size()));
// If index is stripped, no case or diac expansion can be needed:
// for the processing inside this routine, everything looks like
// we're all-sensitive: no use of expansion db.
// Also, convert input to lowercase and strip its accents.
string term = _term;
- if (stripped) {
+ if (o_index_stripchars) {
diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
@@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
}
}
-#ifndef RCL_INDEX_STRIPCHARS
// The case/diac expansion db
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
-#endif // RCL_INDEX_STRIPCHARS
-
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
-#ifdef RCL_INDEX_STRIPCHARS
- idxTermMatch(typ_sens, lang, term, res, max, field);
-#else
RefCntr matcher;
if (matchtyp == ET_WILD) {
matcher = RefCntr(new StrWildMatcher(term));
@@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(typ_sens, lang, term, res, max, field);
}
-#endif // RCL_INDEX_STRIPCHARS
-
} else {
// Expansion is STEM or NONE (which may still need case/diac exp)
-#ifdef RCL_INDEX_STRIPCHARS
-
- idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
-
-#else
vector lexp;
if (diac_sensitive && case_sensitive) {
// No case/diac expansion
@@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
}
}
-#endif
TermMatchCmpByTerm tcmp;
sort(res.entries.begin(), res.entries.end(), tcmp);
@@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
tmtptostr(typ), lang.c_str(), root.c_str(),
max, field.c_str(), res.entries.size()));
-#ifndef RCL_INDEX_STRIPCHARS
if (typ == ET_STEM) {
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
abort();
}
-#endif
Xapian::Database xdb = m_ndb->xrdb;
@@ -346,109 +324,87 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
}
res.prefix = prefix;
-#ifdef RCL_INDEX_STRIPCHARS
- if (typ == ET_STEM) {
- vector exp;
- StemDb db(m_ndb->xrdb);
- if (!db.stemExpand(langs, term, exp))
- return false;
- res.entries.insert(result.entries.end(), exp.begin(), exp.end());
- for (vector::iterator it = res.entries.begin();
- it != res.entries.end(); it++) {
- XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
- it->docs = xdb.get_termfreq(it->term),
- xdb, m_reason);
- if (!m_reason.empty())
- return false;
- LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
- }
- if (!prefix.empty())
- addPrefix(res.entries, prefix);
- } else
-#endif
- {
- RefCntr matcher;
- if (typ == ET_REGEXP) {
- matcher = RefCntr(new StrRegexpMatcher(root));
- if (!matcher->ok()) {
- LOGERR(("termMatch: regcomp failed: %s\n",
- matcher->getreason().c_str()))
- return false;
- }
- } else if (typ == ET_WILD) {
- matcher = RefCntr(new StrWildMatcher(root));
+ RefCntr matcher;
+ if (typ == ET_REGEXP) {
+ matcher = RefCntr(new StrRegexpMatcher(root));
+ if (!matcher->ok()) {
+ LOGERR(("termMatch: regcomp failed: %s\n",
+ matcher->getreason().c_str()))
+ return false;
}
+ } else if (typ == ET_WILD) {
+ matcher = RefCntr(new StrWildMatcher(root));
+ }
- // Find the initial section before any special char
- string::size_type es = string::npos;
- if (matcher.isNotNull()) {
- es = matcher->baseprefixlen();
- }
+ // Find the initial section before any special char
+ string::size_type es = string::npos;
+ if (matcher.isNotNull()) {
+ es = matcher->baseprefixlen();
+ }
- // Initial section: the part of the prefix+expr before the
- // first wildcard character. We only scan the part of the
- // index where this matches
- string is;
- switch (es) {
- case string::npos: is = prefix + root; break;
- case 0: is = prefix; break;
- default: is = prefix + root.substr(0, es); break;
- }
- LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
+ // Initial section: the part of the prefix+expr before the
+ // first wildcard character. We only scan the part of the
+ // index where this matches
+ string is;
+ switch (es) {
+ case string::npos: is = prefix + root; break;
+ case 0: is = prefix; break;
+ default: is = prefix + root.substr(0, es); break;
+ }
+ LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
- for (int tries = 0; tries < 2; tries++) {
- try {
- Xapian::TermIterator it = xdb.allterms_begin();
- if (!is.empty())
- it.skip_to(is.c_str());
- for (int rcnt = 0; it != xdb.allterms_end(); it++) {
- // If we're beyond the terms matching the initial
- // section, end
- if (!is.empty() && (*it).find(is) != 0)
- break;
+ for (int tries = 0; tries < 2; tries++) {
+ try {
+ Xapian::TermIterator it = xdb.allterms_begin();
+ if (!is.empty())
+ it.skip_to(is.c_str());
+ for (int rcnt = 0; it != xdb.allterms_end(); it++) {
+ // If we're beyond the terms matching the initial
+ // section, end
+ if (!is.empty() && (*it).find(is) != 0)
+ break;
- // Else try to match the term. The matcher content
- // is without prefix, so we remove this if any. We
- // just checked that the index term did begin with
- // the prefix.
- string term;
- if (!prefix.empty()) {
- term = (*it).substr(prefix.length());
- } else {
- if (has_prefix(*it)) {
- continue;
- }
- term = *it;
- }
-
- if (matcher.isNotNull() && !matcher->match(term))
+ // Else try to match the term. The matcher content
+ // is without prefix, so we remove this if any. We
+ // just checked that the index term did begin with
+ // the prefix.
+ string term;
+ if (!prefix.empty()) {
+ term = (*it).substr(prefix.length());
+ } else {
+ if (has_prefix(*it)) {
continue;
+ }
+ term = *it;
+ }
- res.entries.push_back(
- TermMatchEntry(*it, xdb.get_collection_freq(*it),
- it.get_termfreq()));
+ if (matcher.isNotNull() && !matcher->match(term))
+ continue;
- // The problem with truncating here is that this is done
- // alphabetically and we may not keep the most frequent
- // terms. OTOH, not doing it may stall the program if
- // we are walking the whole term list. We compromise
- // by cutting at 2*max
- if (max > 0 && ++rcnt >= 2*max)
- break;
- }
- m_reason.erase();
- break;
- } catch (const Xapian::DatabaseModifiedError &e) {
- m_reason = e.get_msg();
- xdb.reopen();
- continue;
- } XCATCHERROR(m_reason);
- break;
- }
- if (!m_reason.empty()) {
- LOGERR(("termMatch: %s\n", m_reason.c_str()));
- return false;
- }
+ res.entries.push_back(
+ TermMatchEntry(*it, xdb.get_collection_freq(*it),
+ it.get_termfreq()));
+
+ // The problem with truncating here is that this is done
+ // alphabetically and we may not keep the most frequent
+ // terms. OTOH, not doing it may stall the program if
+ // we are walking the whole term list. We compromise
+ // by cutting at 2*max
+ if (max > 0 && ++rcnt >= 2*max)
+ break;
+ }
+ m_reason.erase();
+ break;
+ } catch (const Xapian::DatabaseModifiedError &e) {
+ m_reason = e.get_msg();
+ xdb.reopen();
+ continue;
+ } XCATCHERROR(m_reason);
+ break;
+ }
+ if (!m_reason.empty()) {
+ LOGERR(("termMatch: %s\n", m_reason.c_str()));
+ return false;
}
return true;
diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
index fb44f04e..7581fb19 100644
--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
int termmatchsens = 0;
-#ifndef RCL_INDEX_STRIPCHARS
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
bool case_sensitive = (mods & SDCM_CASESENS) != 0;
@@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
termmatchsens |= Db::ET_CASESENS;
if (diac_sensitive)
termmatchsens |= Db::ET_DIACSENS;
-#endif
if (noexpansion) {
oexp.push_back(prefix + term);
@@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
//tpcommon.onlygrams(true);
TermProcPrep tpprep(nxt);
-#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
-#endif
nxt = &tpprep;
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp
index e1e0d304..1a5d40fe 100644
--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(term, result);
}
-#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string unac;
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
@@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(unac, result);
}
}
-#endif
if (result.empty())
result.push_back(term);
diff --git a/src/rcldb/synfamily.h b/src/rcldb/synfamily.h
index a6169b06..219eb0ee 100644
--- a/src/rcldb/synfamily.h
+++ b/src/rcldb/synfamily.h
@@ -212,7 +212,6 @@ private:
// Lowercase accented stem to expansion. Family member name: language
static const std::string synFamStem("Stm");
-#ifndef RCL_INDEX_STRIPCHARS
// Lowercase unaccented stem to expansion. Family member name: language
static const std::string synFamStemUnac("StU");
@@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU");
// member, named "all". This set is used for separate case/diac
// expansion by post-filtering the results of dual expansion.
static const std::string synFamDiCa("DCa");
-#endif // !RCL_INDEX_STRIPCHARS
} // end namespace Rcl
diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp
index 44917de6..b2a8d4fc 100644
--- a/src/utils/pathut.cpp
+++ b/src/utils/pathut.cpp
@@ -580,7 +580,7 @@ int Pidfile::flopen()
{
const char *path = m_path.c_str();
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
- m_reason = "Open failed";
+ m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
return -1;
}