got rid of STRIPCHARS compile-time variable to make the code more readable, and corresponding configure option. + make sure that CONFDIR from environment gets translated to absolute path

This commit is contained in:
Jean-Francois Dockes 2013-03-27 17:38:11 +01:00
parent b3c602db73
commit 09c6ae2d60
20 changed files with 114 additions and 275 deletions

View file

@ -263,14 +263,12 @@ public:
LOGDEB2(("Aspell::buildDict: SKIP\n"));
continue;
}
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
continue;
m_input->swap(lower);
}
#endif
// Got a non-empty sort-of appropriate term, let's send it to
// aspell
LOGDEB2(("Apell::buildDict: SEND\n"));
@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason)
if (iterm.empty())
return true; //??
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason)
}
mterm.swap(lower);
}
#endif
int ret = aapi.aspell_speller_check(m_data->m_speller,
mterm.c_str(), mterm.length());
@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
if (mterm.empty())
return true; //??
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string lower;
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
}
mterm.swap(lower);
}
#endif
AspellCanHaveError *ret;

View file

@ -99,9 +99,6 @@
/* Use multiple threads for indexing */
#undef IDX_THREADS
/* Remove case and accents from terms */
#undef RCL_INDEX_STRIPCHARS
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS

View file

@ -51,10 +51,10 @@ using namespace std;
// Static, logically const, RclConfig members are initialized once from the
// first object build during process initialization.
#ifndef RCL_INDEX_STRIPCHARS
// We default to a case- and diacritics-less index for now
bool o_index_stripchars = true;
#endif
string RclConfig::o_localecharset;
bool ParamStale::needrecompute()
@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf)
} else {
const char *cp = getenv("RECOLL_CONFDIR");
if (cp) {
m_confdir = cp;
m_confdir = path_canon(cp);
} else {
autoconfdir = true;
m_confdir = path_cat(path_home(), ".recoll/");
@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig()
FsTreeWalker::setNoFnmPathname();
}
#ifndef RCL_INDEX_STRIPCHARS
static int m_index_stripchars_init = 0;
if (!m_index_stripchars_init) {
getConfParam("indexStripChars", &o_index_stripchars);
m_index_stripchars_init = 1;
}
#endif
return true;
}

View file

@ -319,9 +319,8 @@ class RclConfig {
// stripped of accents and case or a raw one. Ideally, it should be
// constant, but it needs to be initialized from the configuration, so
// there is no way to do this. It never changes after initialization
// of course. When set, it is supposed to get all of recoll to behave like if
// if was compiled with RCL_INDEX_STRIPCHARS
#ifndef RCL_INDEX_STRIPCHARS
// of course. Changing the value on a given index imposes a
// reset. When using multiple indexes, all must have the same value
extern bool o_index_stripchars;
#endif
#endif /* _RCLCONFIG_H_INCLUDED_ */

19
src/configure vendored
View file

@ -717,7 +717,6 @@ with_fam
enable_xattr
enable_idxthreads
enable_camelcase
enable_stripchars
enable_python_module
enable_pic
enable_qtgui
@ -1367,9 +1366,6 @@ Optional Features:
manual" and "my sql manual" are the same, but not
the same as "mysql manual" (in phrases only and you
could raise the phrase slack to get a match).
--enable-stripchars Remove diacritics and fold character case in indexed
terms. This will yield less precise searches but the
index will be smaller
--disable-python-module Do not build the Python module.
--disable-pic Do not compile library objects as position
independant code. This is incompatible with the php
@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
fi
# Not by default as these are little used for now.
# Check whether --enable-stripchars was given.
if test "${enable_stripchars+set}" = set; then :
enableval=$enable_stripchars; stripcharsEnabled=$enableval
else
stripcharsEnabled=no
fi
if test X$stripcharsEnabled = Xyes ; then
$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
fi
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's

View file

@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
fi
# Not by default as these are little used for now.
AC_ARG_ENABLE(stripchars,
AC_HELP_STRING([--enable-stripchars],
[Remove diacritics and fold character case in indexed terms. This will
yield less precise searches but the index will be smaller]),
stripcharsEnabled=$enableval, stripcharsEnabled=no)
if test X$stripcharsEnabled = Xyes ; then
AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
fi
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's

View file

@ -164,7 +164,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
vboxLayout->setSpacing(spacing);
vboxLayout->setMargin(margin);
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
ConfParamBoolW* cp1 =
@ -191,7 +190,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
));
vboxLayout->addWidget(cp2);
}
#endif
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
ConfParamIntW* cp3 =

View file

@ -119,13 +119,7 @@ void SpellW::init()
resTW->setColumnWidth(1, 150);
resTW->installEventFilter(this);
bool stripped = false;
#ifdef RCL_INDEX_STRIPCHARS
stripped = true;
#else
stripped = o_index_stripchars;
#endif
if (stripped) {
if (o_index_stripchars) {
caseSensCB->setEnabled(false);
caseSensCB->setEnabled(false);
}

View file

@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit {
// (phrase or near), update positions list.
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
string dumb = term;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#endif
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
term.c_str()));
return true;
}
#ifndef RCL_INDEX_STRIPCHARS
}
#endif
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
// pos, bts, bte));

View file

@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config)
map<string, vector<string> > spellings;
suggest(uterms, spellings);
if (!spellings.empty()) {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#else
if (true) {
#endif
chunk <<
trans("<p><i>Alternate spellings (accents suppressed): </i>")
<< "<br /><blockquote>";

View file

@ -116,21 +116,15 @@ static void sigcleanup(int sig)
exit(1);
}
#ifndef RCL_INDEX_STRIPCHARS
bool o_index_stripchars;
#endif
inline bool has_prefix(const string& trm)
{
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#endif
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
#ifndef RCL_INDEX_STRIPCHARS
} else {
return trm.size() > 0 && trm[0] == ':';
}
#endif
}
int main(int argc, char **argv)
@ -212,7 +206,6 @@ int main(int argc, char **argv)
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
#ifndef RCL_INDEX_STRIPCHARS
// If we have terms with a leading ':' it's a new style,
// unstripped index
{
@ -223,7 +216,6 @@ int main(int argc, char **argv)
o_index_stripchars = false;
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
}
#endif
if (op_flags & OPT_T) {
Xapian::TermIterator term;

View file

@ -36,10 +36,6 @@ using namespace std;
namespace Rcl {
#ifdef RCL_INDEX_STRIPCHARS
#define bufprefix(BUF, L) {(BUF)[0] = L;}
#define bpoffs() 1
#else
static inline void bufprefix(char *buf, char c)
{
if (o_index_stripchars) {
@ -54,7 +50,6 @@ static inline int bpoffs()
{
return o_index_stripchars ? 1 : 3;
}
#endif
Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
{

View file

@ -48,9 +48,7 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
// If langs is empty and we don't need casediac expansion, then no need to
// walk the big list
if (langs.empty()) {
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
#endif
return true;
}
@ -68,7 +66,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
stemdbs.back().recreate();
}
#ifndef RCL_INDEX_STRIPCHARS
// Unaccented stem dbs
vector<XapWritableComputableSynFamMember> unacstemdbs;
// We can reuse the same stemmer pointers, the objects are stateless.
@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
diacasedb(wdb, synFamDiCa, "all", &transunac);
if (!o_index_stripchars)
diacasedb.recreate();
#endif
// Walk the list of all terms, and stem/unac each.
string ermsg;
@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
}
string lower = *it;
#ifndef RCL_INDEX_STRIPCHARS
// If the index is raw, compute the case-folded term which
// is the input to the stem db, and add a synonym from the
// stripped term to the cased and accented one, for accent
@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
diacasedb.addSynonym(*it);
}
#endif
// Dont' apply stemming to terms which don't look like
// natural language words.
@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
stemdbs[i].addSynonym(lower);
}
#ifndef RCL_INDEX_STRIPCHARS
// For a raw index, also maybe create a stem expansion for
// the unaccented term. While this may be incorrect, it is
// also necessary for searching in a diacritic-unsensitive
@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
}
}
}
#endif
}
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {

View file

@ -76,15 +76,9 @@ const string parent_prefix("F");
// Special terms to mark begin/end of field (for anchored searches), and
// page breaks
#ifdef RCL_INDEX_STRIPCHARS
const string start_of_field_term = "XXST";
const string end_of_field_term = "XXND";
static const string page_break_term = "XXPG";
#else
string start_of_field_term;
string end_of_field_term;
const string page_break_term = "XXPG/";
#endif
// Field name for the unsplit file name. Has to exist in the field file
// because of usage in termmatch()
@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp)
m_flushMb(-1), m_maxFsOccupPc(0)
{
m_config = new RclConfig(*cfp);
#ifndef RCL_INDEX_STRIPCHARS
if (start_of_field_term.empty()) {
if (o_index_stripchars) {
start_of_field_term = "XXST";
@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp)
end_of_field_term = "XXND/";
}
}
#endif
m_ndb = new Native(this);
if (m_config) {
@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error)
m_reason = "Null configuration or Xapian Db";
return false;
}
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
m_ndb->m_iswritable));
LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
m_ndb->m_iswritable, mode));
if (m_ndb->m_isopen) {
// We used to return an error here but I see no reason to
@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term)
return -1;
string term = _term;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
#endif
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
return 0;
@ -851,9 +841,7 @@ string Db::getSpellingSuggestion(const string& word)
string term = word;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
#endif
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
return string();
@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
TermProcPrep tpprep(nxt);
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
#endif
nxt = &tpprep;
TextSplitDb splitter(newdocument, nxt);

View file

@ -133,15 +133,11 @@ public:
inline bool has_prefix(const string& trm)
{
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#endif
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
#ifndef RCL_INDEX_STRIPCHARS
} else {
return !trm.empty() && trm[0] == ':';
}
#endif
}
inline string strip_prefix(const string& trm)
@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm)
if (trm.empty())
return trm;
string::size_type st = 0;
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#endif
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
if (st == string::npos)
return string();
#ifndef RCL_INDEX_STRIPCHARS
} else {
if (has_prefix(trm)) {
st = trm.find_last_of(":") + 1;
@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm)
return trm;
}
}
#endif
return trm.substr(st);
}
inline string wrap_prefix(const string& pfx)
{
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars) {
#endif
return pfx;
#ifndef RCL_INDEX_STRIPCHARS
} else {
return cstr_colon + pfx + cstr_colon;
}
#endif
}
/**
@ -462,13 +450,9 @@ extern const string udi_prefix;
extern const string parent_prefix;
extern const string mimetype_prefix;
extern const string unsplitFilenameFieldName;
#ifdef RCL_INDEX_STRIPCHARS
extern const string start_of_field_term;
extern const string end_of_field_term;
#else
extern string start_of_field_term;
extern string end_of_field_term;
#endif
}
#endif /* _DB_H_INCLUDED_ */

View file

@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
bool stripped = false;
#ifdef RCL_INDEX_STRIPCHARS
stripped = true;
#else
stripped = o_index_stripchars;
#endif
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
" max %d field [%s] stripped %d init res.size %u\n",
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
_term.c_str(), max, field.c_str(), stripped, res.entries.size()));
_term.c_str(), max, field.c_str(), o_index_stripchars,
res.entries.size()));
// If index is stripped, no case or diac expansion can be needed:
// for the processing inside this routine, everything looks like
// we're all-sensitive: no use of expansion db.
// Also, convert input to lowercase and strip its accents.
string term = _term;
if (stripped) {
if (o_index_stripchars) {
diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
}
}
#ifndef RCL_INDEX_STRIPCHARS
// The case/diac expansion db
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
#endif // RCL_INDEX_STRIPCHARS
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
#ifdef RCL_INDEX_STRIPCHARS
idxTermMatch(typ_sens, lang, term, res, max, field);
#else
RefCntr<StrMatcher> matcher;
if (matchtyp == ET_WILD) {
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(typ_sens, lang, term, res, max, field);
}
#endif // RCL_INDEX_STRIPCHARS
} else {
// Expansion is STEM or NONE (which may still need case/diac exp)
#ifdef RCL_INDEX_STRIPCHARS
idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
#else
vector<string> lexp;
if (diac_sensitive && case_sensitive) {
// No case/diac expansion
@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
}
}
#endif
TermMatchCmpByTerm tcmp;
sort(res.entries.begin(), res.entries.end(), tcmp);
@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
tmtptostr(typ), lang.c_str(), root.c_str(),
max, field.c_str(), res.entries.size()));
#ifndef RCL_INDEX_STRIPCHARS
if (typ == ET_STEM) {
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
abort();
}
#endif
Xapian::Database xdb = m_ndb->xrdb;
@ -346,27 +324,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
}
res.prefix = prefix;
#ifdef RCL_INDEX_STRIPCHARS
if (typ == ET_STEM) {
vector<string> exp;
StemDb db(m_ndb->xrdb);
if (!db.stemExpand(langs, term, exp))
return false;
res.entries.insert(result.entries.end(), exp.begin(), exp.end());
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
it != res.entries.end(); it++) {
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
it->docs = xdb.get_termfreq(it->term),
xdb, m_reason);
if (!m_reason.empty())
return false;
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
}
if (!prefix.empty())
addPrefix(res.entries, prefix);
} else
#endif
{
RefCntr<StrMatcher> matcher;
if (typ == ET_REGEXP) {
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
@ -449,7 +406,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
LOGERR(("termMatch: %s\n", m_reason.c_str()));
return false;
}
}
return true;
}

View file

@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
int termmatchsens = 0;
#ifndef RCL_INDEX_STRIPCHARS
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
bool case_sensitive = (mods & SDCM_CASESENS) != 0;
@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
termmatchsens |= Db::ET_CASESENS;
if (diac_sensitive)
termmatchsens |= Db::ET_DIACSENS;
#endif
if (noexpansion) {
oexp.push_back(prefix + term);
@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
//tpcommon.onlygrams(true);
TermProcPrep tpprep(nxt);
#ifndef RCL_INDEX_STRIPCHARS
if (o_index_stripchars)
#endif
nxt = &tpprep;
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |

View file

@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(term, result);
}
#ifndef RCL_INDEX_STRIPCHARS
if (!o_index_stripchars) {
string unac;
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
(void)expander.synExpand(unac, result);
}
}
#endif
if (result.empty())
result.push_back(term);

View file

@ -212,7 +212,6 @@ private:
// Lowercase accented stem to expansion. Family member name: language
static const std::string synFamStem("Stm");
#ifndef RCL_INDEX_STRIPCHARS
// Lowercase unaccented stem to expansion. Family member name: language
static const std::string synFamStemUnac("StU");
@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU");
// member, named "all". This set is used for separate case/diac
// expansion by post-filtering the results of dual expansion.
static const std::string synFamDiCa("DCa");
#endif // !RCL_INDEX_STRIPCHARS
} // end namespace Rcl

View file

@ -580,7 +580,7 @@ int Pidfile::flopen()
{
const char *path = m_path.c_str();
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
m_reason = "Open failed";
m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
return -1;
}