got rid of STRIPCHARS compile-time variable to make the code more readable, and corresponding configure option. + make sure that CONFDIR from environment gets translated to absolute path
This commit is contained in:
parent
b3c602db73
commit
09c6ae2d60
20 changed files with 114 additions and 275 deletions
|
@ -263,14 +263,12 @@ public:
|
|||
LOGDEB2(("Aspell::buildDict: SKIP\n"));
|
||||
continue;
|
||||
}
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (!o_index_stripchars) {
|
||||
string lower;
|
||||
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
|
||||
continue;
|
||||
m_input->swap(lower);
|
||||
}
|
||||
#endif
|
||||
// Got a non-empty sort-of appropriate term, let's send it to
|
||||
// aspell
|
||||
LOGDEB2(("Apell::buildDict: SEND\n"));
|
||||
|
@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason)
|
|||
if (iterm.empty())
|
||||
return true; //??
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (!o_index_stripchars) {
|
||||
string lower;
|
||||
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
||||
|
@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason)
|
|||
}
|
||||
mterm.swap(lower);
|
||||
}
|
||||
#endif
|
||||
|
||||
int ret = aapi.aspell_speller_check(m_data->m_speller,
|
||||
mterm.c_str(), mterm.length());
|
||||
|
@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
|
|||
if (mterm.empty())
|
||||
return true; //??
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (!o_index_stripchars) {
|
||||
string lower;
|
||||
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
||||
|
@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
|
|||
}
|
||||
mterm.swap(lower);
|
||||
}
|
||||
#endif
|
||||
|
||||
AspellCanHaveError *ret;
|
||||
|
||||
|
|
|
@ -99,9 +99,6 @@
|
|||
/* Use multiple threads for indexing */
|
||||
#undef IDX_THREADS
|
||||
|
||||
/* Remove case and accents from terms */
|
||||
#undef RCL_INDEX_STRIPCHARS
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
|
|
|
@ -51,10 +51,10 @@ using namespace std;
|
|||
|
||||
// Static, logically const, RclConfig members are initialized once from the
|
||||
// first object build during process initialization.
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
|
||||
// We default to a case- and diacritics-less index for now
|
||||
bool o_index_stripchars = true;
|
||||
#endif
|
||||
|
||||
string RclConfig::o_localecharset;
|
||||
|
||||
bool ParamStale::needrecompute()
|
||||
|
@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf)
|
|||
} else {
|
||||
const char *cp = getenv("RECOLL_CONFDIR");
|
||||
if (cp) {
|
||||
m_confdir = cp;
|
||||
m_confdir = path_canon(cp);
|
||||
} else {
|
||||
autoconfdir = true;
|
||||
m_confdir = path_cat(path_home(), ".recoll/");
|
||||
|
@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig()
|
|||
FsTreeWalker::setNoFnmPathname();
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
static int m_index_stripchars_init = 0;
|
||||
if (!m_index_stripchars_init) {
|
||||
getConfParam("indexStripChars", &o_index_stripchars);
|
||||
m_index_stripchars_init = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -319,9 +319,8 @@ class RclConfig {
|
|||
// stripped of accents and case or a raw one. Ideally, it should be
|
||||
// constant, but it needs to be initialized from the configuration, so
|
||||
// there is no way to do this. It never changes after initialization
|
||||
// of course. When set, it is supposed to get all of recoll to behave like if
|
||||
// if was compiled with RCL_INDEX_STRIPCHARS
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// of course. Changing the value on a given index imposes a
|
||||
// reset. When using multiple indexes, all must have the same value
|
||||
extern bool o_index_stripchars;
|
||||
#endif
|
||||
|
||||
#endif /* _RCLCONFIG_H_INCLUDED_ */
|
||||
|
|
19
src/configure
vendored
19
src/configure
vendored
|
@ -717,7 +717,6 @@ with_fam
|
|||
enable_xattr
|
||||
enable_idxthreads
|
||||
enable_camelcase
|
||||
enable_stripchars
|
||||
enable_python_module
|
||||
enable_pic
|
||||
enable_qtgui
|
||||
|
@ -1367,9 +1366,6 @@ Optional Features:
|
|||
manual" and "my sql manual" are the same, but not
|
||||
the same as "mysql manual" (in phrases only and you
|
||||
could raise the phrase slack to get a match).
|
||||
--enable-stripchars Remove diacritics and fold character case in indexed
|
||||
terms. This will yield less precise searches but the
|
||||
index will be smaller
|
||||
--disable-python-module Do not build the Python module.
|
||||
--disable-pic Do not compile library objects as position
|
||||
independant code. This is incompatible with the php
|
||||
|
@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
|
|||
|
||||
fi
|
||||
|
||||
# Not by default as these are little used for now.
|
||||
# Check whether --enable-stripchars was given.
|
||||
if test "${enable_stripchars+set}" = set; then :
|
||||
enableval=$enable_stripchars; stripcharsEnabled=$enableval
|
||||
else
|
||||
stripcharsEnabled=no
|
||||
fi
|
||||
|
||||
|
||||
if test X$stripcharsEnabled = Xyes ; then
|
||||
|
||||
$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
# Disable building the python module. This is built by default, because
|
||||
# it's really the easiest way to interface and extend recoll. It forces PIC
|
||||
# objects for everything (indexing performance impact: 1%), because it's
|
||||
|
|
|
@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then
|
|||
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
||||
fi
|
||||
|
||||
# Not by default as these are little used for now.
|
||||
AC_ARG_ENABLE(stripchars,
|
||||
AC_HELP_STRING([--enable-stripchars],
|
||||
[Remove diacritics and fold character case in indexed terms. This will
|
||||
yield less precise searches but the index will be smaller]),
|
||||
stripcharsEnabled=$enableval, stripcharsEnabled=no)
|
||||
|
||||
if test X$stripcharsEnabled = Xyes ; then
|
||||
AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
|
||||
fi
|
||||
|
||||
# Disable building the python module. This is built by default, because
|
||||
# it's really the easiest way to interface and extend recoll. It forces PIC
|
||||
# objects for everything (indexing performance impact: 1%), because it's
|
||||
|
|
|
@ -164,34 +164,32 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
|
|||
vboxLayout->setSpacing(spacing);
|
||||
vboxLayout->setMargin(margin);
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (!o_index_stripchars) {
|
||||
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
|
||||
ConfParamBoolW* cp1 =
|
||||
new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"),
|
||||
tr("<p>Automatically trigger diacritics sensitivity "
|
||||
"if the search term has accented characters "
|
||||
"(not in unac_except_trans). Else you need to "
|
||||
"use the query language and the <i>D</i> "
|
||||
"modifier to specify "
|
||||
"diacritics sensitivity."
|
||||
));
|
||||
vboxLayout->addWidget(cp1);
|
||||
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
|
||||
ConfParamBoolW* cp1 =
|
||||
new ConfParamBoolW(this, lnk1, tr("Automatic diacritics sensitivity"),
|
||||
tr("<p>Automatically trigger diacritics sensitivity "
|
||||
"if the search term has accented characters "
|
||||
"(not in unac_except_trans). Else you need to "
|
||||
"use the query language and the <i>D</i> "
|
||||
"modifier to specify "
|
||||
"diacritics sensitivity."
|
||||
));
|
||||
vboxLayout->addWidget(cp1);
|
||||
|
||||
ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens"));
|
||||
ConfParamBoolW* cp2 =
|
||||
new ConfParamBoolW(this, lnk2,
|
||||
tr("Automatic character case sensitivity"),
|
||||
tr("<p>Automatically trigger character case "
|
||||
"sensitivity if the entry has upper-case "
|
||||
"characters in any but the first position. "
|
||||
"Else you need to use the query language and "
|
||||
"the <i>C</i> modifier to specify character-case "
|
||||
"sensitivity."
|
||||
));
|
||||
vboxLayout->addWidget(cp2);
|
||||
ConfLink lnk2(new ConfLinkRclRep(config, "autocasesens"));
|
||||
ConfParamBoolW* cp2 =
|
||||
new ConfParamBoolW(this, lnk2,
|
||||
tr("Automatic character case sensitivity"),
|
||||
tr("<p>Automatically trigger character case "
|
||||
"sensitivity if the entry has upper-case "
|
||||
"characters in any but the first position. "
|
||||
"Else you need to use the query language and "
|
||||
"the <i>C</i> modifier to specify character-case "
|
||||
"sensitivity."
|
||||
));
|
||||
vboxLayout->addWidget(cp2);
|
||||
}
|
||||
#endif
|
||||
|
||||
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
|
||||
ConfParamIntW* cp3 =
|
||||
|
|
|
@ -119,13 +119,7 @@ void SpellW::init()
|
|||
resTW->setColumnWidth(1, 150);
|
||||
resTW->installEventFilter(this);
|
||||
|
||||
bool stripped = false;
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
stripped = true;
|
||||
#else
|
||||
stripped = o_index_stripchars;
|
||||
#endif
|
||||
if (stripped) {
|
||||
if (o_index_stripchars) {
|
||||
caseSensCB->setEnabled(false);
|
||||
caseSensCB->setEnabled(false);
|
||||
}
|
||||
|
|
|
@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit {
|
|||
// (phrase or near), update positions list.
|
||||
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
||||
string dumb = term;
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#endif
|
||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
|
||||
term.c_str()));
|
||||
return true;
|
||||
}
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
}
|
||||
#endif
|
||||
|
||||
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
||||
// pos, bts, bte));
|
||||
|
|
|
@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config)
|
|||
map<string, vector<string> > spellings;
|
||||
suggest(uterms, spellings);
|
||||
if (!spellings.empty()) {
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#else
|
||||
if (true) {
|
||||
#endif
|
||||
chunk <<
|
||||
trans("<p><i>Alternate spellings (accents suppressed): </i>")
|
||||
<< "<br /><blockquote>";
|
||||
|
|
|
@ -116,21 +116,15 @@ static void sigcleanup(int sig)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
bool o_index_stripchars;
|
||||
#endif
|
||||
|
||||
inline bool has_prefix(const string& trm)
|
||||
{
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#endif
|
||||
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
} else {
|
||||
return trm.size() > 0 && trm[0] == ':';
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
@ -212,7 +206,6 @@ int main(int argc, char **argv)
|
|||
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
|
||||
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// If we have terms with a leading ':' it's a new style,
|
||||
// unstripped index
|
||||
{
|
||||
|
@ -223,7 +216,6 @@ int main(int argc, char **argv)
|
|||
o_index_stripchars = false;
|
||||
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (op_flags & OPT_T) {
|
||||
Xapian::TermIterator term;
|
||||
|
|
|
@ -36,10 +36,6 @@ using namespace std;
|
|||
|
||||
namespace Rcl {
|
||||
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
#define bufprefix(BUF, L) {(BUF)[0] = L;}
|
||||
#define bpoffs() 1
|
||||
#else
|
||||
static inline void bufprefix(char *buf, char c)
|
||||
{
|
||||
if (o_index_stripchars) {
|
||||
|
@ -54,7 +50,6 @@ static inline int bpoffs()
|
|||
{
|
||||
return o_index_stripchars ? 1 : 3;
|
||||
}
|
||||
#endif
|
||||
|
||||
Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
||||
{
|
||||
|
|
|
@ -48,9 +48,7 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
// If langs is empty and we don't need casediac expansion, then no need to
|
||||
// walk the big list
|
||||
if (langs.empty()) {
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars)
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -68,7 +66,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
stemdbs.back().recreate();
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// Unaccented stem dbs
|
||||
vector<XapWritableComputableSynFamMember> unacstemdbs;
|
||||
// We can reuse the same stemmer pointers, the objects are stateless.
|
||||
|
@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
diacasedb(wdb, synFamDiCa, "all", &transunac);
|
||||
if (!o_index_stripchars)
|
||||
diacasedb.recreate();
|
||||
#endif
|
||||
|
||||
// Walk the list of all terms, and stem/unac each.
|
||||
string ermsg;
|
||||
|
@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
}
|
||||
|
||||
string lower = *it;
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// If the index is raw, compute the case-folded term which
|
||||
// is the input to the stem db, and add a synonym from the
|
||||
// stripped term to the cased and accented one, for accent
|
||||
|
@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
|
||||
diacasedb.addSynonym(*it);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Dont' apply stemming to terms which don't look like
|
||||
// natural language words.
|
||||
|
@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
stemdbs[i].addSynonym(lower);
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// For a raw index, also maybe create a stem expansion for
|
||||
// the unaccented term. While this may be incorrect, it is
|
||||
// also necessary for searching in a diacritic-unsensitive
|
||||
|
@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
|
|
|
@ -76,15 +76,9 @@ const string parent_prefix("F");
|
|||
|
||||
// Special terms to mark begin/end of field (for anchored searches), and
|
||||
// page breaks
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
const string start_of_field_term = "XXST";
|
||||
const string end_of_field_term = "XXND";
|
||||
static const string page_break_term = "XXPG";
|
||||
#else
|
||||
string start_of_field_term;
|
||||
string end_of_field_term;
|
||||
const string page_break_term = "XXPG/";
|
||||
#endif
|
||||
|
||||
// Field name for the unsplit file name. Has to exist in the field file
|
||||
// because of usage in termmatch()
|
||||
|
@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp)
|
|||
m_flushMb(-1), m_maxFsOccupPc(0)
|
||||
{
|
||||
m_config = new RclConfig(*cfp);
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (start_of_field_term.empty()) {
|
||||
if (o_index_stripchars) {
|
||||
start_of_field_term = "XXST";
|
||||
|
@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp)
|
|||
end_of_field_term = "XXND/";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
m_ndb = new Native(this);
|
||||
if (m_config) {
|
||||
|
@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error)
|
|||
m_reason = "Null configuration or Xapian Db";
|
||||
return false;
|
||||
}
|
||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
m_ndb->m_iswritable));
|
||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
|
||||
m_ndb->m_iswritable, mode));
|
||||
|
||||
if (m_ndb->m_isopen) {
|
||||
// We used to return an error here but I see no reason to
|
||||
|
@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term)
|
|||
return -1;
|
||||
|
||||
string term = _term;
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars)
|
||||
#endif
|
||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
||||
return 0;
|
||||
|
@ -851,13 +841,11 @@ string Db::getSpellingSuggestion(const string& word)
|
|||
|
||||
string term = word;
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars)
|
||||
#endif
|
||||
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
||||
return string();
|
||||
}
|
||||
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
||||
return string();
|
||||
}
|
||||
|
||||
if (!isSpellingCandidate(term))
|
||||
return string();
|
||||
|
@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
|
||||
|
||||
TermProcPrep tpprep(nxt);
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars)
|
||||
#endif
|
||||
nxt = &tpprep;
|
||||
|
||||
TextSplitDb splitter(newdocument, nxt);
|
||||
|
|
|
@ -133,15 +133,11 @@ public:
|
|||
|
||||
inline bool has_prefix(const string& trm)
|
||||
{
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#endif
|
||||
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
} else {
|
||||
return !trm.empty() && trm[0] == ':';
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline string strip_prefix(const string& trm)
|
||||
|
@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm)
|
|||
if (trm.empty())
|
||||
return trm;
|
||||
string::size_type st = 0;
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#endif
|
||||
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
|
||||
if (st == string::npos)
|
||||
return string();
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
} else {
|
||||
if (has_prefix(trm)) {
|
||||
st = trm.find_last_of(":") + 1;
|
||||
|
@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm)
|
|||
return trm;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return trm.substr(st);
|
||||
}
|
||||
|
||||
inline string wrap_prefix(const string& pfx)
|
||||
{
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars) {
|
||||
#endif
|
||||
return pfx;
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
} else {
|
||||
return cstr_colon + pfx + cstr_colon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -462,13 +450,9 @@ extern const string udi_prefix;
|
|||
extern const string parent_prefix;
|
||||
extern const string mimetype_prefix;
|
||||
extern const string unsplitFilenameFieldName;
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
extern const string start_of_field_term;
|
||||
extern const string end_of_field_term;
|
||||
#else
|
||||
extern string start_of_field_term;
|
||||
extern string end_of_field_term;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif /* _DB_H_INCLUDED_ */
|
||||
|
|
|
@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
||||
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
||||
|
||||
bool stripped = false;
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
stripped = true;
|
||||
#else
|
||||
stripped = o_index_stripchars;
|
||||
#endif
|
||||
|
||||
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
||||
" max %d field [%s] stripped %d init res.size %u\n",
|
||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||
_term.c_str(), max, field.c_str(), stripped, res.entries.size()));
|
||||
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
||||
res.entries.size()));
|
||||
|
||||
// If index is stripped, no case or diac expansion can be needed:
|
||||
// for the processing inside this routine, everything looks like
|
||||
// we're all-sensitive: no use of expansion db.
|
||||
// Also, convert input to lowercase and strip its accents.
|
||||
string term = _term;
|
||||
if (stripped) {
|
||||
if (o_index_stripchars) {
|
||||
diac_sensitive = case_sensitive = true;
|
||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
||||
|
@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// The case/diac expansion db
|
||||
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
||||
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
||||
#endif // RCL_INDEX_STRIPCHARS
|
||||
|
||||
|
||||
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
#else
|
||||
RefCntr<StrMatcher> matcher;
|
||||
if (matchtyp == ET_WILD) {
|
||||
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
|
||||
|
@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||
}
|
||||
|
||||
#endif // RCL_INDEX_STRIPCHARS
|
||||
|
||||
} else {
|
||||
// Expansion is STEM or NONE (which may still need case/diac exp)
|
||||
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
|
||||
idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
|
||||
|
||||
#else
|
||||
vector<string> lexp;
|
||||
if (diac_sensitive && case_sensitive) {
|
||||
// No case/diac expansion
|
||||
|
@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
|||
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TermMatchCmpByTerm tcmp;
|
||||
sort(res.entries.begin(), res.entries.end(), tcmp);
|
||||
|
@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
|||
tmtptostr(typ), lang.c_str(), root.c_str(),
|
||||
max, field.c_str(), res.entries.size()));
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (typ == ET_STEM) {
|
||||
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
|
||||
Xapian::Database xdb = m_ndb->xrdb;
|
||||
|
||||
|
@ -346,109 +324,87 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
|||
}
|
||||
res.prefix = prefix;
|
||||
|
||||
#ifdef RCL_INDEX_STRIPCHARS
|
||||
if (typ == ET_STEM) {
|
||||
vector<string> exp;
|
||||
StemDb db(m_ndb->xrdb);
|
||||
if (!db.stemExpand(langs, term, exp))
|
||||
return false;
|
||||
res.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
||||
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
||||
it != res.entries.end(); it++) {
|
||||
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
||||
it->docs = xdb.get_termfreq(it->term),
|
||||
xdb, m_reason);
|
||||
if (!m_reason.empty())
|
||||
return false;
|
||||
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
|
||||
}
|
||||
if (!prefix.empty())
|
||||
addPrefix(res.entries, prefix);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
RefCntr<StrMatcher> matcher;
|
||||
if (typ == ET_REGEXP) {
|
||||
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
||||
if (!matcher->ok()) {
|
||||
LOGERR(("termMatch: regcomp failed: %s\n",
|
||||
matcher->getreason().c_str()))
|
||||
return false;
|
||||
}
|
||||
} else if (typ == ET_WILD) {
|
||||
matcher = RefCntr<StrMatcher>(new StrWildMatcher(root));
|
||||
RefCntr<StrMatcher> matcher;
|
||||
if (typ == ET_REGEXP) {
|
||||
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
||||
if (!matcher->ok()) {
|
||||
LOGERR(("termMatch: regcomp failed: %s\n",
|
||||
matcher->getreason().c_str()))
|
||||
return false;
|
||||
}
|
||||
} else if (typ == ET_WILD) {
|
||||
matcher = RefCntr<StrMatcher>(new StrWildMatcher(root));
|
||||
}
|
||||
|
||||
// Find the initial section before any special char
|
||||
string::size_type es = string::npos;
|
||||
if (matcher.isNotNull()) {
|
||||
es = matcher->baseprefixlen();
|
||||
}
|
||||
// Find the initial section before any special char
|
||||
string::size_type es = string::npos;
|
||||
if (matcher.isNotNull()) {
|
||||
es = matcher->baseprefixlen();
|
||||
}
|
||||
|
||||
// Initial section: the part of the prefix+expr before the
|
||||
// first wildcard character. We only scan the part of the
|
||||
// index where this matches
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = prefix + root; break;
|
||||
case 0: is = prefix; break;
|
||||
default: is = prefix + root.substr(0, es); break;
|
||||
}
|
||||
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
// Initial section: the part of the prefix+expr before the
|
||||
// first wildcard character. We only scan the part of the
|
||||
// index where this matches
|
||||
string is;
|
||||
switch (es) {
|
||||
case string::npos: is = prefix + root; break;
|
||||
case 0: is = prefix; break;
|
||||
default: is = prefix + root.substr(0, es); break;
|
||||
}
|
||||
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
||||
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::TermIterator it = xdb.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial
|
||||
// section, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
for (int tries = 0; tries < 2; tries++) {
|
||||
try {
|
||||
Xapian::TermIterator it = xdb.allterms_begin();
|
||||
if (!is.empty())
|
||||
it.skip_to(is.c_str());
|
||||
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
||||
// If we're beyond the terms matching the initial
|
||||
// section, end
|
||||
if (!is.empty() && (*it).find(is) != 0)
|
||||
break;
|
||||
|
||||
// Else try to match the term. The matcher content
|
||||
// is without prefix, so we remove this if any. We
|
||||
// just checked that the index term did begin with
|
||||
// the prefix.
|
||||
string term;
|
||||
if (!prefix.empty()) {
|
||||
term = (*it).substr(prefix.length());
|
||||
} else {
|
||||
if (has_prefix(*it)) {
|
||||
continue;
|
||||
}
|
||||
term = *it;
|
||||
}
|
||||
|
||||
if (matcher.isNotNull() && !matcher->match(term))
|
||||
// Else try to match the term. The matcher content
|
||||
// is without prefix, so we remove this if any. We
|
||||
// just checked that the index term did begin with
|
||||
// the prefix.
|
||||
string term;
|
||||
if (!prefix.empty()) {
|
||||
term = (*it).substr(prefix.length());
|
||||
} else {
|
||||
if (has_prefix(*it)) {
|
||||
continue;
|
||||
}
|
||||
term = *it;
|
||||
}
|
||||
|
||||
res.entries.push_back(
|
||||
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
||||
it.get_termfreq()));
|
||||
if (matcher.isNotNull() && !matcher->match(term))
|
||||
continue;
|
||||
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
break;
|
||||
}
|
||||
m_reason.erase();
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
break;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
}
|
||||
res.entries.push_back(
|
||||
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
||||
it.get_termfreq()));
|
||||
|
||||
// The problem with truncating here is that this is done
|
||||
// alphabetically and we may not keep the most frequent
|
||||
// terms. OTOH, not doing it may stall the program if
|
||||
// we are walking the whole term list. We compromise
|
||||
// by cutting at 2*max
|
||||
if (max > 0 && ++rcnt >= 2*max)
|
||||
break;
|
||||
}
|
||||
m_reason.erase();
|
||||
break;
|
||||
} catch (const Xapian::DatabaseModifiedError &e) {
|
||||
m_reason = e.get_msg();
|
||||
xdb.reopen();
|
||||
continue;
|
||||
} XCATCHERROR(m_reason);
|
||||
break;
|
||||
}
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
|||
|
||||
int termmatchsens = 0;
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
|
||||
bool case_sensitive = (mods & SDCM_CASESENS) != 0;
|
||||
|
||||
|
@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
|||
termmatchsens |= Db::ET_CASESENS;
|
||||
if (diac_sensitive)
|
||||
termmatchsens |= Db::ET_DIACSENS;
|
||||
#endif
|
||||
|
||||
if (noexpansion) {
|
||||
oexp.push_back(prefix + term);
|
||||
|
@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
|
|||
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
|
||||
//tpcommon.onlygrams(true);
|
||||
TermProcPrep tpprep(nxt);
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (o_index_stripchars)
|
||||
#endif
|
||||
nxt = &tpprep;
|
||||
|
||||
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
||||
|
|
|
@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
|
|||
(void)expander.synExpand(term, result);
|
||||
}
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (!o_index_stripchars) {
|
||||
string unac;
|
||||
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
|
||||
|
@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
|
|||
(void)expander.synExpand(unac, result);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (result.empty())
|
||||
result.push_back(term);
|
||||
|
|
|
@ -212,7 +212,6 @@ private:
|
|||
// Lowercase accented stem to expansion. Family member name: language
|
||||
static const std::string synFamStem("Stm");
|
||||
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
// Lowercase unaccented stem to expansion. Family member name: language
|
||||
static const std::string synFamStemUnac("StU");
|
||||
|
||||
|
@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU");
|
|||
// member, named "all". This set is used for separate case/diac
|
||||
// expansion by post-filtering the results of dual expansion.
|
||||
static const std::string synFamDiCa("DCa");
|
||||
#endif // !RCL_INDEX_STRIPCHARS
|
||||
|
||||
} // end namespace Rcl
|
||||
|
||||
|
|
|
@ -580,7 +580,7 @@ int Pidfile::flopen()
|
|||
{
|
||||
const char *path = m_path.c_str();
|
||||
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
|
||||
m_reason = "Open failed";
|
||||
m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue