got rid of STRIPCHARS compile-time variable to make the code more readable, and corresponding configure option. + make sure that CONFDIR from environment gets translated to absolute path

This commit is contained in:
Jean-Francois Dockes 2013-03-27 17:38:11 +01:00
parent b3c602db73
commit 09c6ae2d60
20 changed files with 114 additions and 275 deletions

View file

@ -263,14 +263,12 @@ public:
LOGDEB2(("Aspell::buildDict: SKIP\n")); LOGDEB2(("Aspell::buildDict: SKIP\n"));
continue; continue;
} }
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) { if (!o_index_stripchars) {
string lower; string lower;
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD)) if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
continue; continue;
m_input->swap(lower); m_input->swap(lower);
} }
#endif
// Got a non-empty sort-of appropriate term, let's send it to // Got a non-empty sort-of appropriate term, let's send it to
// aspell // aspell
LOGDEB2(("Apell::buildDict: SEND\n")); LOGDEB2(("Apell::buildDict: SEND\n"));
@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason)
if (iterm.empty()) if (iterm.empty())
return true; //?? return true; //??
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) { if (!o_index_stripchars) {
string lower; string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) { if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason)
} }
mterm.swap(lower); mterm.swap(lower);
} }
#endif
int ret = aapi.aspell_speller_check(m_data->m_speller, int ret = aapi.aspell_speller_check(m_data->m_speller,
mterm.c_str(), mterm.length()); mterm.c_str(), mterm.length());
@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
if (mterm.empty()) if (mterm.empty())
return true; //?? return true; //??
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) { if (!o_index_stripchars) {
string lower; string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) { if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
} }
mterm.swap(lower); mterm.swap(lower);
} }
#endif
AspellCanHaveError *ret; AspellCanHaveError *ret;

View file

@ -99,9 +99,6 @@
/* Use multiple threads for indexing */ /* Use multiple threads for indexing */
#undef IDX_THREADS #undef IDX_THREADS
/* Remove case and accents from terms */
#undef RCL_INDEX_STRIPCHARS
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS #undef STDC_HEADERS

View file

@ -51,10 +51,10 @@ using namespace std;
// Static, logically const, RclConfig members are initialized once from the // Static, logically const, RclConfig members are initialized once from the
// first object build during process initialization. // first object build during process initialization.
#ifndef RCL_INDEX_STRIPCHARS
// We default to a case- and diacritics-less index for now // We default to a case- and diacritics-less index for now
bool o_index_stripchars = true; bool o_index_stripchars = true;
#endif
string RclConfig::o_localecharset; string RclConfig::o_localecharset;
bool ParamStale::needrecompute() bool ParamStale::needrecompute()
@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf)
} else { } else {
const char *cp = getenv("RECOLL_CONFDIR"); const char *cp = getenv("RECOLL_CONFDIR");
if (cp) { if (cp) {
m_confdir = cp; m_confdir = path_canon(cp);
} else { } else {
autoconfdir = true; autoconfdir = true;
m_confdir = path_cat(path_home(), ".recoll/"); m_confdir = path_cat(path_home(), ".recoll/");
@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig()
FsTreeWalker::setNoFnmPathname(); FsTreeWalker::setNoFnmPathname();
} }
#ifndef RCL_INDEX_STRIPCHARS
static int m_index_stripchars_init = 0; static int m_index_stripchars_init = 0;
if (!m_index_stripchars_init) { if (!m_index_stripchars_init) {
getConfParam("indexStripChars", &o_index_stripchars); getConfParam("indexStripChars", &o_index_stripchars);
m_index_stripchars_init = 1; m_index_stripchars_init = 1;
} }
#endif
return true; return true;
} }

View file

@ -319,9 +319,8 @@ class RclConfig {
// stripped of accents and case or a raw one. Ideally, it should be // stripped of accents and case or a raw one. Ideally, it should be
// constant, but it needs to be initialized from the configuration, so // constant, but it needs to be initialized from the configuration, so
// there is no way to do this. It never changes after initialization // there is no way to do this. It never changes after initialization
// of course. When set, it is supposed to get all of recoll to behave like if // of course. Changing the value on a given index imposes a
// if was compiled with RCL_INDEX_STRIPCHARS // reset. When using multiple indexes, all must have the same value
#ifndef RCL_INDEX_STRIPCHARS
extern bool o_index_stripchars; extern bool o_index_stripchars;
#endif
#endif /* _RCLCONFIG_H_INCLUDED_ */ #endif /* _RCLCONFIG_H_INCLUDED_ */

19
src/configure vendored
View file

@ -717,7 +717,6 @@ with_fam
enable_xattr enable_xattr
enable_idxthreads enable_idxthreads
enable_camelcase enable_camelcase
enable_stripchars
enable_python_module enable_python_module
enable_pic enable_pic
enable_qtgui enable_qtgui
@ -1367,9 +1366,6 @@ Optional Features:
manual" and "my sql manual" are the same, but not manual" and "my sql manual" are the same, but not
the same as "mysql manual" (in phrases only and you the same as "mysql manual" (in phrases only and you
could raise the phrase slack to get a match). could raise the phrase slack to get a match).
--enable-stripchars Remove diacritics and fold character case in indexed
terms. This will yield less precise searches but the
index will be smaller
--disable-python-module Do not build the Python module. --disable-python-module Do not build the Python module.
--disable-pic Do not compile library objects as position --disable-pic Do not compile library objects as position
independant code. This is incompatible with the php independant code. This is incompatible with the php
@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
fi fi
# Not by default as these are little used for now.
# Check whether --enable-stripchars was given.
if test "${enable_stripchars+set}" = set; then :
enableval=$enable_stripchars; stripcharsEnabled=$enableval
else
stripcharsEnabled=no
fi
if test X$stripcharsEnabled = Xyes ; then
$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
fi
# Disable building the python module. This is built by default, because # Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC # it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's # objects for everything (indexing performance impact: 1%), because it's

View file

@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words]) AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
fi fi
# Not by default as these are little used for now.
AC_ARG_ENABLE(stripchars,
AC_HELP_STRING([--enable-stripchars],
[Remove diacritics and fold character case in indexed terms. This will
yield less precise searches but the index will be smaller]),
stripcharsEnabled=$enableval, stripcharsEnabled=no)
if test X$stripcharsEnabled = Xyes ; then
AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
fi
# Disable building the python module. This is built by default, because # Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC # it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's # objects for everything (indexing performance impact: 1%), because it's

View file

@ -164,7 +164,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
vboxLayout->setSpacing(spacing); vboxLayout->setSpacing(spacing);
vboxLayout->setMargin(margin); vboxLayout->setMargin(margin);
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) { if (!o_index_stripchars) {
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens")); ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
ConfParamBoolW* cp1 = ConfParamBoolW* cp1 =
@ -191,7 +190,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
)); ));
vboxLayout->addWidget(cp2); vboxLayout->addWidget(cp2);
} }
#endif
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand")); ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
ConfParamIntW* cp3 = ConfParamIntW* cp3 =

View file

@ -119,13 +119,7 @@ void SpellW::init()
resTW->setColumnWidth(1, 150); resTW->setColumnWidth(1, 150);
resTW->installEventFilter(this); resTW->installEventFilter(this);
bool stripped = false; if (o_index_stripchars) {
#ifdef RCL_INDEX_STRIPCHARS
stripped = true;
#else
stripped = o_index_stripchars;
#endif
if (stripped) {
caseSensCB->setEnabled(false); caseSensCB->setEnabled(false);
caseSensCB->setEnabled(false); caseSensCB->setEnabled(false);
} }

View file

@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit {
// (phrase or near), update positions list. // (phrase or near), update positions list.
virtual bool takeword(const std::string& term, int pos, int bts, int bte) { virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
string dumb = term; string dumb = term;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#endif
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) { if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n", LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
term.c_str())); term.c_str()));
return true; return true;
} }
#ifndef RCL_INDEX_STRIPCHARS
} }
#endif
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(), //LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
// pos, bts, bte)); // pos, bts, bte));

View file

@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config)
map<string, vector<string> > spellings; map<string, vector<string> > spellings;
suggest(uterms, spellings); suggest(uterms, spellings);
if (!spellings.empty()) { if (!spellings.empty()) {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#else
if (true) {
#endif
chunk << chunk <<
trans("<p><i>Alternate spellings (accents suppressed): </i>") trans("<p><i>Alternate spellings (accents suppressed): </i>")
<< "<br /><blockquote>"; << "<br /><blockquote>";

View file

@ -116,21 +116,15 @@ static void sigcleanup(int sig)
exit(1); exit(1);
} }
#ifndef RCL_INDEX_STRIPCHARS
bool o_index_stripchars; bool o_index_stripchars;
#endif
inline bool has_prefix(const string& trm) inline bool has_prefix(const string& trm)
{ {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#endif
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z'; return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
#ifndef RCL_INDEX_STRIPCHARS
} else { } else {
return trm.size() > 0 && trm[0] == ':'; return trm.size() > 0 && trm[0] == ':';
} }
#endif
} }
int main(int argc, char **argv) int main(int argc, char **argv)
@ -212,7 +206,6 @@ int main(int argc, char **argv)
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " << cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
db->get_lastdocid() << " avglength " << db->get_avlength() << endl; db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
#ifndef RCL_INDEX_STRIPCHARS
// If we have terms with a leading ':' it's a new style, // If we have terms with a leading ':' it's a new style,
// unstripped index // unstripped index
{ {
@ -223,7 +216,6 @@ int main(int argc, char **argv)
o_index_stripchars = false; o_index_stripchars = false;
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl; cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
} }
#endif
if (op_flags & OPT_T) { if (op_flags & OPT_T) {
Xapian::TermIterator term; Xapian::TermIterator term;

View file

@ -36,10 +36,6 @@ using namespace std;
namespace Rcl { namespace Rcl {
#ifdef RCL_INDEX_STRIPCHARS
#define bufprefix(BUF, L) {(BUF)[0] = L;}
#define bpoffs() 1
#else
static inline void bufprefix(char *buf, char c) static inline void bufprefix(char *buf, char c)
{ {
if (o_index_stripchars) { if (o_index_stripchars) {
@ -54,7 +50,6 @@ static inline int bpoffs()
{ {
return o_index_stripchars ? 1 : 3; return o_index_stripchars ? 1 : 3;
} }
#endif
Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2) Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
{ {

View file

@ -48,9 +48,7 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// If langs is empty and we don't need casediac expansion, then no need to // If langs is empty and we don't need casediac expansion, then no need to
// walk the big list // walk the big list
if (langs.empty()) { if (langs.empty()) {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) if (o_index_stripchars)
#endif
return true; return true;
} }
@ -68,7 +66,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
stemdbs.back().recreate(); stemdbs.back().recreate();
} }
#ifndef RCL_INDEX_STRIPCHARS
// Unaccented stem dbs // Unaccented stem dbs
vector<XapWritableComputableSynFamMember> unacstemdbs; vector<XapWritableComputableSynFamMember> unacstemdbs;
// We can reuse the same stemmer pointers, the objects are stateless. // We can reuse the same stemmer pointers, the objects are stateless.
@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
diacasedb(wdb, synFamDiCa, "all", &transunac); diacasedb(wdb, synFamDiCa, "all", &transunac);
if (!o_index_stripchars) if (!o_index_stripchars)
diacasedb.recreate(); diacasedb.recreate();
#endif
// Walk the list of all terms, and stem/unac each. // Walk the list of all terms, and stem/unac each.
string ermsg; string ermsg;
@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
} }
string lower = *it; string lower = *it;
#ifndef RCL_INDEX_STRIPCHARS
// If the index is raw, compute the case-folded term which // If the index is raw, compute the case-folded term which
// is the input to the stem db, and add a synonym from the // is the input to the stem db, and add a synonym from the
// stripped term to the cased and accented one, for accent // stripped term to the cased and accented one, for accent
@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD); unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
diacasedb.addSynonym(*it); diacasedb.addSynonym(*it);
} }
#endif
// Dont' apply stemming to terms which don't look like // Dont' apply stemming to terms which don't look like
// natural language words. // natural language words.
@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
stemdbs[i].addSynonym(lower); stemdbs[i].addSynonym(lower);
} }
#ifndef RCL_INDEX_STRIPCHARS
// For a raw index, also maybe create a stem expansion for // For a raw index, also maybe create a stem expansion for
// the unaccented term. While this may be incorrect, it is // the unaccented term. While this may be incorrect, it is
// also necessary for searching in a diacritic-unsensitive // also necessary for searching in a diacritic-unsensitive
@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
} }
} }
} }
#endif
} }
} XCATCHERROR(ermsg); } XCATCHERROR(ermsg);
if (!ermsg.empty()) { if (!ermsg.empty()) {

View file

@ -76,15 +76,9 @@ const string parent_prefix("F");
// Special terms to mark begin/end of field (for anchored searches), and // Special terms to mark begin/end of field (for anchored searches), and
// page breaks // page breaks
#ifdef RCL_INDEX_STRIPCHARS
const string start_of_field_term = "XXST";
const string end_of_field_term = "XXND";
static const string page_break_term = "XXPG";
#else
string start_of_field_term; string start_of_field_term;
string end_of_field_term; string end_of_field_term;
const string page_break_term = "XXPG/"; const string page_break_term = "XXPG/";
#endif
// Field name for the unsplit file name. Has to exist in the field file // Field name for the unsplit file name. Has to exist in the field file
// because of usage in termmatch() // because of usage in termmatch()
@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp)
m_flushMb(-1), m_maxFsOccupPc(0) m_flushMb(-1), m_maxFsOccupPc(0)
{ {
m_config = new RclConfig(*cfp); m_config = new RclConfig(*cfp);
#ifndef RCL_INDEX_STRIPCHARS
if (start_of_field_term.empty()) { if (start_of_field_term.empty()) {
if (o_index_stripchars) { if (o_index_stripchars) {
start_of_field_term = "XXST"; start_of_field_term = "XXST";
@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp)
end_of_field_term = "XXND/"; end_of_field_term = "XXND/";
} }
} }
#endif
m_ndb = new Native(this); m_ndb = new Native(this);
if (m_config) { if (m_config) {
@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error)
m_reason = "Null configuration or Xapian Db"; m_reason = "Null configuration or Xapian Db";
return false; return false;
} }
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
m_ndb->m_iswritable)); m_ndb->m_iswritable, mode));
if (m_ndb->m_isopen) { if (m_ndb->m_isopen) {
// We used to return an error here but I see no reason to // We used to return an error here but I see no reason to
@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term)
return -1; return -1;
string term = _term; string term = _term;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) if (o_index_stripchars)
#endif
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str())); LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
return 0; return 0;
@ -851,9 +841,7 @@ string Db::getSpellingSuggestion(const string& word)
string term = word; string term = word;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) if (o_index_stripchars)
#endif
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) { if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str())); LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
return string(); return string();
@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon; //TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
TermProcPrep tpprep(nxt); TermProcPrep tpprep(nxt);
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) if (o_index_stripchars)
#endif
nxt = &tpprep; nxt = &tpprep;
TextSplitDb splitter(newdocument, nxt); TextSplitDb splitter(newdocument, nxt);

View file

@ -133,15 +133,11 @@ public:
inline bool has_prefix(const string& trm) inline bool has_prefix(const string& trm)
{ {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#endif
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z'; return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
#ifndef RCL_INDEX_STRIPCHARS
} else { } else {
return !trm.empty() && trm[0] == ':'; return !trm.empty() && trm[0] == ':';
} }
#endif
} }
inline string strip_prefix(const string& trm) inline string strip_prefix(const string& trm)
@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm)
if (trm.empty()) if (trm.empty())
return trm; return trm;
string::size_type st = 0; string::size_type st = 0;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#endif
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ"); st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
if (st == string::npos) if (st == string::npos)
return string(); return string();
#ifndef RCL_INDEX_STRIPCHARS
} else { } else {
if (has_prefix(trm)) { if (has_prefix(trm)) {
st = trm.find_last_of(":") + 1; st = trm.find_last_of(":") + 1;
@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm)
return trm; return trm;
} }
} }
#endif
return trm.substr(st); return trm.substr(st);
} }
inline string wrap_prefix(const string& pfx) inline string wrap_prefix(const string& pfx)
{ {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) { if (o_index_stripchars) {
#endif
return pfx; return pfx;
#ifndef RCL_INDEX_STRIPCHARS
} else { } else {
return cstr_colon + pfx + cstr_colon; return cstr_colon + pfx + cstr_colon;
} }
#endif
} }
/** /**
@ -462,13 +450,9 @@ extern const string udi_prefix;
extern const string parent_prefix; extern const string parent_prefix;
extern const string mimetype_prefix; extern const string mimetype_prefix;
extern const string unsplitFilenameFieldName; extern const string unsplitFilenameFieldName;
#ifdef RCL_INDEX_STRIPCHARS
extern const string start_of_field_term;
extern const string end_of_field_term;
#else
extern string start_of_field_term; extern string start_of_field_term;
extern string end_of_field_term; extern string end_of_field_term;
#endif
} }
#endif /* _DB_H_INCLUDED_ */ #endif /* _DB_H_INCLUDED_ */

View file

@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0; bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
bool case_sensitive = (typ_sens & ET_CASESENS) != 0; bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
bool stripped = false;
#ifdef RCL_INDEX_STRIPCHARS
stripped = true;
#else
stripped = o_index_stripchars;
#endif
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]" LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
" max %d field [%s] stripped %d init res.size %u\n", " max %d field [%s] stripped %d init res.size %u\n",
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
_term.c_str(), max, field.c_str(), stripped, res.entries.size())); _term.c_str(), max, field.c_str(), o_index_stripchars,
res.entries.size()));
// If index is stripped, no case or diac expansion can be needed: // If index is stripped, no case or diac expansion can be needed:
// for the processing inside this routine, everything looks like // for the processing inside this routine, everything looks like
// we're all-sensitive: no use of expansion db. // we're all-sensitive: no use of expansion db.
// Also, convert input to lowercase and strip its accents. // Also, convert input to lowercase and strip its accents.
string term = _term; string term = _term;
if (stripped) { if (o_index_stripchars) {
diac_sensitive = case_sensitive = true; diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str())); LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
} }
} }
#ifndef RCL_INDEX_STRIPCHARS
// The case/diac expansion db // The case/diac expansion db
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD); SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans); XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
#endif // RCL_INDEX_STRIPCHARS
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) { if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
#ifdef RCL_INDEX_STRIPCHARS
idxTermMatch(typ_sens, lang, term, res, max, field);
#else
RefCntr<StrMatcher> matcher; RefCntr<StrMatcher> matcher;
if (matchtyp == ET_WILD) { if (matchtyp == ET_WILD) {
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term)); matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(typ_sens, lang, term, res, max, field); idxTermMatch(typ_sens, lang, term, res, max, field);
} }
#endif // RCL_INDEX_STRIPCHARS
} else { } else {
// Expansion is STEM or NONE (which may still need case/diac exp) // Expansion is STEM or NONE (which may still need case/diac exp)
#ifdef RCL_INDEX_STRIPCHARS
idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
#else
vector<string> lexp; vector<string> lexp;
if (diac_sensitive && case_sensitive) { if (diac_sensitive && case_sensitive) {
// No case/diac expansion // No case/diac expansion
@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field); idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
} }
} }
#endif
TermMatchCmpByTerm tcmp; TermMatchCmpByTerm tcmp;
sort(res.entries.begin(), res.entries.end(), tcmp); sort(res.entries.begin(), res.entries.end(), tcmp);
@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
tmtptostr(typ), lang.c_str(), root.c_str(), tmtptostr(typ), lang.c_str(), root.c_str(),
max, field.c_str(), res.entries.size())); max, field.c_str(), res.entries.size()));
#ifndef RCL_INDEX_STRIPCHARS
if (typ == ET_STEM) { if (typ == ET_STEM) {
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n")); LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
abort(); abort();
} }
#endif
Xapian::Database xdb = m_ndb->xrdb; Xapian::Database xdb = m_ndb->xrdb;
@ -346,27 +324,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
} }
res.prefix = prefix; res.prefix = prefix;
#ifdef RCL_INDEX_STRIPCHARS
if (typ == ET_STEM) {
vector<string> exp;
StemDb db(m_ndb->xrdb);
if (!db.stemExpand(langs, term, exp))
return false;
res.entries.insert(result.entries.end(), exp.begin(), exp.end());
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
it != res.entries.end(); it++) {
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
it->docs = xdb.get_termfreq(it->term),
xdb, m_reason);
if (!m_reason.empty())
return false;
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
}
if (!prefix.empty())
addPrefix(res.entries, prefix);
} else
#endif
{
RefCntr<StrMatcher> matcher; RefCntr<StrMatcher> matcher;
if (typ == ET_REGEXP) { if (typ == ET_REGEXP) {
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root)); matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
@ -449,7 +406,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
LOGERR(("termMatch: %s\n", m_reason.c_str())); LOGERR(("termMatch: %s\n", m_reason.c_str()));
return false; return false;
} }
}
return true; return true;
} }

View file

@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
int termmatchsens = 0; int termmatchsens = 0;
#ifndef RCL_INDEX_STRIPCHARS
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0; bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
bool case_sensitive = (mods & SDCM_CASESENS) != 0; bool case_sensitive = (mods & SDCM_CASESENS) != 0;
@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
termmatchsens |= Db::ET_CASESENS; termmatchsens |= Db::ET_CASESENS;
if (diac_sensitive) if (diac_sensitive)
termmatchsens |= Db::ET_DIACSENS; termmatchsens |= Db::ET_DIACSENS;
#endif
if (noexpansion) { if (noexpansion) {
oexp.push_back(prefix + term); oexp.push_back(prefix + term);
@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon; //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
//tpcommon.onlygrams(true); //tpcommon.onlygrams(true);
TermProcPrep tpprep(nxt); TermProcPrep tpprep(nxt);
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) if (o_index_stripchars)
#endif
nxt = &tpprep; nxt = &tpprep;
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |

View file

@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(term, result); (void)expander.synExpand(term, result);
} }
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) { if (!o_index_stripchars) {
string unac; string unac;
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC); unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(unac, result); (void)expander.synExpand(unac, result);
} }
} }
#endif
if (result.empty()) if (result.empty())
result.push_back(term); result.push_back(term);

View file

@ -212,7 +212,6 @@ private:
// Lowercase accented stem to expansion. Family member name: language // Lowercase accented stem to expansion. Family member name: language
static const std::string synFamStem("Stm"); static const std::string synFamStem("Stm");
#ifndef RCL_INDEX_STRIPCHARS
// Lowercase unaccented stem to expansion. Family member name: language // Lowercase unaccented stem to expansion. Family member name: language
static const std::string synFamStemUnac("StU"); static const std::string synFamStemUnac("StU");
@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU");
// member, named "all". This set is used for separate case/diac // member, named "all". This set is used for separate case/diac
// expansion by post-filtering the results of dual expansion. // expansion by post-filtering the results of dual expansion.
static const std::string synFamDiCa("DCa"); static const std::string synFamDiCa("DCa");
#endif // !RCL_INDEX_STRIPCHARS
} // end namespace Rcl } // end namespace Rcl

View file

@ -580,7 +580,7 @@ int Pidfile::flopen()
{ {
const char *path = m_path.c_str(); const char *path = m_path.c_str();
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) { if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
m_reason = "Open failed"; m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
return -1; return -1;
} }