got rid of STRIPCHARS compile-time variable to make the code more readable, and corresponding configure option. + make sure that CONFDIR from environment gets translated to absolute path
This commit is contained in:
parent
b3c602db73
commit
09c6ae2d60
20 changed files with 114 additions and 275 deletions
|
@ -263,14 +263,12 @@ public:
|
||||||
LOGDEB2(("Aspell::buildDict: SKIP\n"));
|
LOGDEB2(("Aspell::buildDict: SKIP\n"));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (!o_index_stripchars) {
|
if (!o_index_stripchars) {
|
||||||
string lower;
|
string lower;
|
||||||
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
|
if (!unacmaybefold(*m_input, lower, "UTF-8", UNACOP_FOLD))
|
||||||
continue;
|
continue;
|
||||||
m_input->swap(lower);
|
m_input->swap(lower);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
// Got a non-empty sort-of appropriate term, let's send it to
|
// Got a non-empty sort-of appropriate term, let's send it to
|
||||||
// aspell
|
// aspell
|
||||||
LOGDEB2(("Apell::buildDict: SEND\n"));
|
LOGDEB2(("Apell::buildDict: SEND\n"));
|
||||||
|
@ -382,7 +380,6 @@ bool Aspell::check(const string &iterm, string& reason)
|
||||||
if (iterm.empty())
|
if (iterm.empty())
|
||||||
return true; //??
|
return true; //??
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (!o_index_stripchars) {
|
if (!o_index_stripchars) {
|
||||||
string lower;
|
string lower;
|
||||||
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
||||||
|
@ -391,7 +388,6 @@ bool Aspell::check(const string &iterm, string& reason)
|
||||||
}
|
}
|
||||||
mterm.swap(lower);
|
mterm.swap(lower);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
int ret = aapi.aspell_speller_check(m_data->m_speller,
|
int ret = aapi.aspell_speller_check(m_data->m_speller,
|
||||||
mterm.c_str(), mterm.length());
|
mterm.c_str(), mterm.length());
|
||||||
|
@ -416,7 +412,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
|
||||||
if (mterm.empty())
|
if (mterm.empty())
|
||||||
return true; //??
|
return true; //??
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (!o_index_stripchars) {
|
if (!o_index_stripchars) {
|
||||||
string lower;
|
string lower;
|
||||||
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
if (!unacmaybefold(mterm, lower, "UTF-8", UNACOP_FOLD)) {
|
||||||
|
@ -425,7 +420,6 @@ bool Aspell::suggest(Rcl::Db &db, const string &_term,
|
||||||
}
|
}
|
||||||
mterm.swap(lower);
|
mterm.swap(lower);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
AspellCanHaveError *ret;
|
AspellCanHaveError *ret;
|
||||||
|
|
||||||
|
|
|
@ -99,9 +99,6 @@
|
||||||
/* Use multiple threads for indexing */
|
/* Use multiple threads for indexing */
|
||||||
#undef IDX_THREADS
|
#undef IDX_THREADS
|
||||||
|
|
||||||
/* Remove case and accents from terms */
|
|
||||||
#undef RCL_INDEX_STRIPCHARS
|
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#undef STDC_HEADERS
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
|
|
|
@ -51,10 +51,10 @@ using namespace std;
|
||||||
|
|
||||||
// Static, logically const, RclConfig members are initialized once from the
|
// Static, logically const, RclConfig members are initialized once from the
|
||||||
// first object build during process initialization.
|
// first object build during process initialization.
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// We default to a case- and diacritics-less index for now
|
// We default to a case- and diacritics-less index for now
|
||||||
bool o_index_stripchars = true;
|
bool o_index_stripchars = true;
|
||||||
#endif
|
|
||||||
string RclConfig::o_localecharset;
|
string RclConfig::o_localecharset;
|
||||||
|
|
||||||
bool ParamStale::needrecompute()
|
bool ParamStale::needrecompute()
|
||||||
|
@ -138,7 +138,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||||
} else {
|
} else {
|
||||||
const char *cp = getenv("RECOLL_CONFDIR");
|
const char *cp = getenv("RECOLL_CONFDIR");
|
||||||
if (cp) {
|
if (cp) {
|
||||||
m_confdir = cp;
|
m_confdir = path_canon(cp);
|
||||||
} else {
|
} else {
|
||||||
autoconfdir = true;
|
autoconfdir = true;
|
||||||
m_confdir = path_cat(path_home(), ".recoll/");
|
m_confdir = path_cat(path_home(), ".recoll/");
|
||||||
|
@ -274,13 +274,11 @@ bool RclConfig::updateMainConfig()
|
||||||
FsTreeWalker::setNoFnmPathname();
|
FsTreeWalker::setNoFnmPathname();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
static int m_index_stripchars_init = 0;
|
static int m_index_stripchars_init = 0;
|
||||||
if (!m_index_stripchars_init) {
|
if (!m_index_stripchars_init) {
|
||||||
getConfParam("indexStripChars", &o_index_stripchars);
|
getConfParam("indexStripChars", &o_index_stripchars);
|
||||||
m_index_stripchars_init = 1;
|
m_index_stripchars_init = 1;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -319,9 +319,8 @@ class RclConfig {
|
||||||
// stripped of accents and case or a raw one. Ideally, it should be
|
// stripped of accents and case or a raw one. Ideally, it should be
|
||||||
// constant, but it needs to be initialized from the configuration, so
|
// constant, but it needs to be initialized from the configuration, so
|
||||||
// there is no way to do this. It never changes after initialization
|
// there is no way to do this. It never changes after initialization
|
||||||
// of course. When set, it is supposed to get all of recoll to behave like if
|
// of course. Changing the value on a given index imposes a
|
||||||
// if was compiled with RCL_INDEX_STRIPCHARS
|
// reset. When using multiple indexes, all must have the same value
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
extern bool o_index_stripchars;
|
extern bool o_index_stripchars;
|
||||||
#endif
|
|
||||||
#endif /* _RCLCONFIG_H_INCLUDED_ */
|
#endif /* _RCLCONFIG_H_INCLUDED_ */
|
||||||
|
|
19
src/configure
vendored
19
src/configure
vendored
|
@ -717,7 +717,6 @@ with_fam
|
||||||
enable_xattr
|
enable_xattr
|
||||||
enable_idxthreads
|
enable_idxthreads
|
||||||
enable_camelcase
|
enable_camelcase
|
||||||
enable_stripchars
|
|
||||||
enable_python_module
|
enable_python_module
|
||||||
enable_pic
|
enable_pic
|
||||||
enable_qtgui
|
enable_qtgui
|
||||||
|
@ -1367,9 +1366,6 @@ Optional Features:
|
||||||
manual" and "my sql manual" are the same, but not
|
manual" and "my sql manual" are the same, but not
|
||||||
the same as "mysql manual" (in phrases only and you
|
the same as "mysql manual" (in phrases only and you
|
||||||
could raise the phrase slack to get a match).
|
could raise the phrase slack to get a match).
|
||||||
--enable-stripchars Remove diacritics and fold character case in indexed
|
|
||||||
terms. This will yield less precise searches but the
|
|
||||||
index will be smaller
|
|
||||||
--disable-python-module Do not build the Python module.
|
--disable-python-module Do not build the Python module.
|
||||||
--disable-pic Do not compile library objects as position
|
--disable-pic Do not compile library objects as position
|
||||||
independant code. This is incompatible with the php
|
independant code. This is incompatible with the php
|
||||||
|
@ -4396,21 +4392,6 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Not by default as these are little used for now.
|
|
||||||
# Check whether --enable-stripchars was given.
|
|
||||||
if test "${enable_stripchars+set}" = set; then :
|
|
||||||
enableval=$enable_stripchars; stripcharsEnabled=$enableval
|
|
||||||
else
|
|
||||||
stripcharsEnabled=no
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if test X$stripcharsEnabled = Xyes ; then
|
|
||||||
|
|
||||||
$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Disable building the python module. This is built by default, because
|
# Disable building the python module. This is built by default, because
|
||||||
# it's really the easiest way to interface and extend recoll. It forces PIC
|
# it's really the easiest way to interface and extend recoll. It forces PIC
|
||||||
# objects for everything (indexing performance impact: 1%), because it's
|
# objects for everything (indexing performance impact: 1%), because it's
|
||||||
|
|
|
@ -211,17 +211,6 @@ if test X$camelcaseEnabled = Xyes ; then
|
||||||
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Not by default as these are little used for now.
|
|
||||||
AC_ARG_ENABLE(stripchars,
|
|
||||||
AC_HELP_STRING([--enable-stripchars],
|
|
||||||
[Remove diacritics and fold character case in indexed terms. This will
|
|
||||||
yield less precise searches but the index will be smaller]),
|
|
||||||
stripcharsEnabled=$enableval, stripcharsEnabled=no)
|
|
||||||
|
|
||||||
if test X$stripcharsEnabled = Xyes ; then
|
|
||||||
AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Disable building the python module. This is built by default, because
|
# Disable building the python module. This is built by default, because
|
||||||
# it's really the easiest way to interface and extend recoll. It forces PIC
|
# it's really the easiest way to interface and extend recoll. It forces PIC
|
||||||
# objects for everything (indexing performance impact: 1%), because it's
|
# objects for everything (indexing performance impact: 1%), because it's
|
||||||
|
|
|
@ -164,7 +164,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
|
||||||
vboxLayout->setSpacing(spacing);
|
vboxLayout->setSpacing(spacing);
|
||||||
vboxLayout->setMargin(margin);
|
vboxLayout->setMargin(margin);
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (!o_index_stripchars) {
|
if (!o_index_stripchars) {
|
||||||
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
|
ConfLink lnk1(new ConfLinkRclRep(config, "autodiacsens"));
|
||||||
ConfParamBoolW* cp1 =
|
ConfParamBoolW* cp1 =
|
||||||
|
@ -191,7 +190,6 @@ ConfSearchPanelW::ConfSearchPanelW(QWidget *parent, ConfNull *config)
|
||||||
));
|
));
|
||||||
vboxLayout->addWidget(cp2);
|
vboxLayout->addWidget(cp2);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
|
ConfLink lnk3(new ConfLinkRclRep(config, "maxTermExpand"));
|
||||||
ConfParamIntW* cp3 =
|
ConfParamIntW* cp3 =
|
||||||
|
|
|
@ -119,13 +119,7 @@ void SpellW::init()
|
||||||
resTW->setColumnWidth(1, 150);
|
resTW->setColumnWidth(1, 150);
|
||||||
resTW->installEventFilter(this);
|
resTW->installEventFilter(this);
|
||||||
|
|
||||||
bool stripped = false;
|
if (o_index_stripchars) {
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
stripped = true;
|
|
||||||
#else
|
|
||||||
stripped = o_index_stripchars;
|
|
||||||
#endif
|
|
||||||
if (stripped) {
|
|
||||||
caseSensCB->setEnabled(false);
|
caseSensCB->setEnabled(false);
|
||||||
caseSensCB->setEnabled(false);
|
caseSensCB->setEnabled(false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,17 +93,13 @@ class TextSplitPTR : public TextSplit {
|
||||||
// (phrase or near), update positions list.
|
// (phrase or near), update positions list.
|
||||||
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
||||||
string dumb = term;
|
string dumb = term;
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#endif
|
|
||||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
|
LOGINFO(("PlainToRich::takeword: unac failed for [%s]\n",
|
||||||
term.c_str()));
|
term.c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
||||||
// pos, bts, bte));
|
// pos, bts, bte));
|
||||||
|
|
|
@ -358,11 +358,7 @@ void ResListPager::displayPage(RclConfig *config)
|
||||||
map<string, vector<string> > spellings;
|
map<string, vector<string> > spellings;
|
||||||
suggest(uterms, spellings);
|
suggest(uterms, spellings);
|
||||||
if (!spellings.empty()) {
|
if (!spellings.empty()) {
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#else
|
|
||||||
if (true) {
|
|
||||||
#endif
|
|
||||||
chunk <<
|
chunk <<
|
||||||
trans("<p><i>Alternate spellings (accents suppressed): </i>")
|
trans("<p><i>Alternate spellings (accents suppressed): </i>")
|
||||||
<< "<br /><blockquote>";
|
<< "<br /><blockquote>";
|
||||||
|
|
|
@ -116,21 +116,15 @@ static void sigcleanup(int sig)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
bool o_index_stripchars;
|
bool o_index_stripchars;
|
||||||
#endif
|
|
||||||
|
|
||||||
inline bool has_prefix(const string& trm)
|
inline bool has_prefix(const string& trm)
|
||||||
{
|
{
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#endif
|
|
||||||
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
|
return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
} else {
|
} else {
|
||||||
return trm.size() > 0 && trm[0] == ':';
|
return trm.size() > 0 && trm[0] == ':';
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
|
@ -212,7 +206,6 @@ int main(int argc, char **argv)
|
||||||
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
|
cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
|
||||||
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
|
db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// If we have terms with a leading ':' it's a new style,
|
// If we have terms with a leading ':' it's a new style,
|
||||||
// unstripped index
|
// unstripped index
|
||||||
{
|
{
|
||||||
|
@ -223,7 +216,6 @@ int main(int argc, char **argv)
|
||||||
o_index_stripchars = false;
|
o_index_stripchars = false;
|
||||||
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
|
cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
if (op_flags & OPT_T) {
|
if (op_flags & OPT_T) {
|
||||||
Xapian::TermIterator term;
|
Xapian::TermIterator term;
|
||||||
|
|
|
@ -36,10 +36,6 @@ using namespace std;
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
#define bufprefix(BUF, L) {(BUF)[0] = L;}
|
|
||||||
#define bpoffs() 1
|
|
||||||
#else
|
|
||||||
static inline void bufprefix(char *buf, char c)
|
static inline void bufprefix(char *buf, char c)
|
||||||
{
|
{
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
|
@ -54,7 +50,6 @@ static inline int bpoffs()
|
||||||
{
|
{
|
||||||
return o_index_stripchars ? 1 : 3;
|
return o_index_stripchars ? 1 : 3;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
Xapian::Query date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
||||||
{
|
{
|
||||||
|
|
|
@ -48,9 +48,7 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
// If langs is empty and we don't need casediac expansion, then no need to
|
// If langs is empty and we don't need casediac expansion, then no need to
|
||||||
// walk the big list
|
// walk the big list
|
||||||
if (langs.empty()) {
|
if (langs.empty()) {
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
#endif
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,7 +66,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
stemdbs.back().recreate();
|
stemdbs.back().recreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// Unaccented stem dbs
|
// Unaccented stem dbs
|
||||||
vector<XapWritableComputableSynFamMember> unacstemdbs;
|
vector<XapWritableComputableSynFamMember> unacstemdbs;
|
||||||
// We can reuse the same stemmer pointers, the objects are stateless.
|
// We can reuse the same stemmer pointers, the objects are stateless.
|
||||||
|
@ -85,7 +82,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
diacasedb(wdb, synFamDiCa, "all", &transunac);
|
diacasedb(wdb, synFamDiCa, "all", &transunac);
|
||||||
if (!o_index_stripchars)
|
if (!o_index_stripchars)
|
||||||
diacasedb.recreate();
|
diacasedb.recreate();
|
||||||
#endif
|
|
||||||
|
|
||||||
// Walk the list of all terms, and stem/unac each.
|
// Walk the list of all terms, and stem/unac each.
|
||||||
string ermsg;
|
string ermsg;
|
||||||
|
@ -107,7 +103,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
}
|
}
|
||||||
|
|
||||||
string lower = *it;
|
string lower = *it;
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// If the index is raw, compute the case-folded term which
|
// If the index is raw, compute the case-folded term which
|
||||||
// is the input to the stem db, and add a synonym from the
|
// is the input to the stem db, and add a synonym from the
|
||||||
// stripped term to the cased and accented one, for accent
|
// stripped term to the cased and accented one, for accent
|
||||||
|
@ -116,7 +111,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
|
unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
|
||||||
diacasedb.addSynonym(*it);
|
diacasedb.addSynonym(*it);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// Dont' apply stemming to terms which don't look like
|
// Dont' apply stemming to terms which don't look like
|
||||||
// natural language words.
|
// natural language words.
|
||||||
|
@ -131,7 +125,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
stemdbs[i].addSynonym(lower);
|
stemdbs[i].addSynonym(lower);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// For a raw index, also maybe create a stem expansion for
|
// For a raw index, also maybe create a stem expansion for
|
||||||
// the unaccented term. While this may be incorrect, it is
|
// the unaccented term. While this may be incorrect, it is
|
||||||
// also necessary for searching in a diacritic-unsensitive
|
// also necessary for searching in a diacritic-unsensitive
|
||||||
|
@ -145,7 +138,6 @@ bool createExpansionDbs(Xapian::WritableDatabase& wdb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
if (!ermsg.empty()) {
|
if (!ermsg.empty()) {
|
||||||
|
|
|
@ -76,15 +76,9 @@ const string parent_prefix("F");
|
||||||
|
|
||||||
// Special terms to mark begin/end of field (for anchored searches), and
|
// Special terms to mark begin/end of field (for anchored searches), and
|
||||||
// page breaks
|
// page breaks
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
const string start_of_field_term = "XXST";
|
|
||||||
const string end_of_field_term = "XXND";
|
|
||||||
static const string page_break_term = "XXPG";
|
|
||||||
#else
|
|
||||||
string start_of_field_term;
|
string start_of_field_term;
|
||||||
string end_of_field_term;
|
string end_of_field_term;
|
||||||
const string page_break_term = "XXPG/";
|
const string page_break_term = "XXPG/";
|
||||||
#endif
|
|
||||||
|
|
||||||
// Field name for the unsplit file name. Has to exist in the field file
|
// Field name for the unsplit file name. Has to exist in the field file
|
||||||
// because of usage in termmatch()
|
// because of usage in termmatch()
|
||||||
|
@ -356,7 +350,6 @@ Db::Db(const RclConfig *cfp)
|
||||||
m_flushMb(-1), m_maxFsOccupPc(0)
|
m_flushMb(-1), m_maxFsOccupPc(0)
|
||||||
{
|
{
|
||||||
m_config = new RclConfig(*cfp);
|
m_config = new RclConfig(*cfp);
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (start_of_field_term.empty()) {
|
if (start_of_field_term.empty()) {
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
start_of_field_term = "XXST";
|
start_of_field_term = "XXST";
|
||||||
|
@ -366,7 +359,6 @@ Db::Db(const RclConfig *cfp)
|
||||||
end_of_field_term = "XXND/";
|
end_of_field_term = "XXND/";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
m_ndb = new Native(this);
|
m_ndb = new Native(this);
|
||||||
if (m_config) {
|
if (m_config) {
|
||||||
|
@ -402,8 +394,8 @@ bool Db::open(OpenMode mode, OpenError *error)
|
||||||
m_reason = "Null configuration or Xapian Db";
|
m_reason = "Null configuration or Xapian Db";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
|
||||||
m_ndb->m_iswritable));
|
m_ndb->m_iswritable, mode));
|
||||||
|
|
||||||
if (m_ndb->m_isopen) {
|
if (m_ndb->m_isopen) {
|
||||||
// We used to return an error here but I see no reason to
|
// We used to return an error here but I see no reason to
|
||||||
|
@ -571,9 +563,7 @@ int Db::termDocCnt(const string& _term)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
string term = _term;
|
string term = _term;
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
#endif
|
|
||||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -851,9 +841,7 @@ string Db::getSpellingSuggestion(const string& word)
|
||||||
|
|
||||||
string term = word;
|
string term = word;
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
#endif
|
|
||||||
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
||||||
return string();
|
return string();
|
||||||
|
@ -903,9 +891,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||||
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
|
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
|
||||||
|
|
||||||
TermProcPrep tpprep(nxt);
|
TermProcPrep tpprep(nxt);
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
#endif
|
|
||||||
nxt = &tpprep;
|
nxt = &tpprep;
|
||||||
|
|
||||||
TextSplitDb splitter(newdocument, nxt);
|
TextSplitDb splitter(newdocument, nxt);
|
||||||
|
|
|
@ -133,15 +133,11 @@ public:
|
||||||
|
|
||||||
inline bool has_prefix(const string& trm)
|
inline bool has_prefix(const string& trm)
|
||||||
{
|
{
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#endif
|
|
||||||
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
|
return !trm.empty() && 'A' <= trm[0] && trm[0] <= 'Z';
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
} else {
|
} else {
|
||||||
return !trm.empty() && trm[0] == ':';
|
return !trm.empty() && trm[0] == ':';
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline string strip_prefix(const string& trm)
|
inline string strip_prefix(const string& trm)
|
||||||
|
@ -149,13 +145,10 @@ inline string strip_prefix(const string& trm)
|
||||||
if (trm.empty())
|
if (trm.empty())
|
||||||
return trm;
|
return trm;
|
||||||
string::size_type st = 0;
|
string::size_type st = 0;
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#endif
|
|
||||||
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
|
st = trm.find_first_not_of("ABCDEFIJKLMNOPQRSTUVWXYZ");
|
||||||
if (st == string::npos)
|
if (st == string::npos)
|
||||||
return string();
|
return string();
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
} else {
|
} else {
|
||||||
if (has_prefix(trm)) {
|
if (has_prefix(trm)) {
|
||||||
st = trm.find_last_of(":") + 1;
|
st = trm.find_last_of(":") + 1;
|
||||||
|
@ -163,21 +156,16 @@ inline string strip_prefix(const string& trm)
|
||||||
return trm;
|
return trm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
return trm.substr(st);
|
return trm.substr(st);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline string wrap_prefix(const string& pfx)
|
inline string wrap_prefix(const string& pfx)
|
||||||
{
|
{
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
#endif
|
|
||||||
return pfx;
|
return pfx;
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
} else {
|
} else {
|
||||||
return cstr_colon + pfx + cstr_colon;
|
return cstr_colon + pfx + cstr_colon;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -462,13 +450,9 @@ extern const string udi_prefix;
|
||||||
extern const string parent_prefix;
|
extern const string parent_prefix;
|
||||||
extern const string mimetype_prefix;
|
extern const string mimetype_prefix;
|
||||||
extern const string unsplitFilenameFieldName;
|
extern const string unsplitFilenameFieldName;
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
extern const string start_of_field_term;
|
|
||||||
extern const string end_of_field_term;
|
|
||||||
#else
|
|
||||||
extern string start_of_field_term;
|
extern string start_of_field_term;
|
||||||
extern string end_of_field_term;
|
extern string end_of_field_term;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* _DB_H_INCLUDED_ */
|
#endif /* _DB_H_INCLUDED_ */
|
||||||
|
|
|
@ -161,24 +161,18 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||||
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
||||||
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
||||||
|
|
||||||
bool stripped = false;
|
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
stripped = true;
|
|
||||||
#else
|
|
||||||
stripped = o_index_stripchars;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
||||||
" max %d field [%s] stripped %d init res.size %u\n",
|
" max %d field [%s] stripped %d init res.size %u\n",
|
||||||
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
||||||
_term.c_str(), max, field.c_str(), stripped, res.entries.size()));
|
_term.c_str(), max, field.c_str(), o_index_stripchars,
|
||||||
|
res.entries.size()));
|
||||||
|
|
||||||
// If index is stripped, no case or diac expansion can be needed:
|
// If index is stripped, no case or diac expansion can be needed:
|
||||||
// for the processing inside this routine, everything looks like
|
// for the processing inside this routine, everything looks like
|
||||||
// we're all-sensitive: no use of expansion db.
|
// we're all-sensitive: no use of expansion db.
|
||||||
// Also, convert input to lowercase and strip its accents.
|
// Also, convert input to lowercase and strip its accents.
|
||||||
string term = _term;
|
string term = _term;
|
||||||
if (stripped) {
|
if (o_index_stripchars) {
|
||||||
diac_sensitive = case_sensitive = true;
|
diac_sensitive = case_sensitive = true;
|
||||||
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
||||||
|
@ -186,17 +180,11 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// The case/diac expansion db
|
// The case/diac expansion db
|
||||||
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
||||||
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
||||||
#endif // RCL_INDEX_STRIPCHARS
|
|
||||||
|
|
||||||
|
|
||||||
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
|
||||||
#else
|
|
||||||
RefCntr<StrMatcher> matcher;
|
RefCntr<StrMatcher> matcher;
|
||||||
if (matchtyp == ET_WILD) {
|
if (matchtyp == ET_WILD) {
|
||||||
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
|
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
|
||||||
|
@ -233,16 +221,9 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||||
idxTermMatch(typ_sens, lang, term, res, max, field);
|
idxTermMatch(typ_sens, lang, term, res, max, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // RCL_INDEX_STRIPCHARS
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Expansion is STEM or NONE (which may still need case/diac exp)
|
// Expansion is STEM or NONE (which may still need case/diac exp)
|
||||||
|
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
|
|
||||||
idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
|
|
||||||
|
|
||||||
#else
|
|
||||||
vector<string> lexp;
|
vector<string> lexp;
|
||||||
if (diac_sensitive && case_sensitive) {
|
if (diac_sensitive && case_sensitive) {
|
||||||
// No case/diac expansion
|
// No case/diac expansion
|
||||||
|
@ -297,7 +278,6 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
|
||||||
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
TermMatchCmpByTerm tcmp;
|
TermMatchCmpByTerm tcmp;
|
||||||
sort(res.entries.begin(), res.entries.end(), tcmp);
|
sort(res.entries.begin(), res.entries.end(), tcmp);
|
||||||
|
@ -325,12 +305,10 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||||
tmtptostr(typ), lang.c_str(), root.c_str(),
|
tmtptostr(typ), lang.c_str(), root.c_str(),
|
||||||
max, field.c_str(), res.entries.size()));
|
max, field.c_str(), res.entries.size()));
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (typ == ET_STEM) {
|
if (typ == ET_STEM) {
|
||||||
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
Xapian::Database xdb = m_ndb->xrdb;
|
Xapian::Database xdb = m_ndb->xrdb;
|
||||||
|
|
||||||
|
@ -346,27 +324,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||||
}
|
}
|
||||||
res.prefix = prefix;
|
res.prefix = prefix;
|
||||||
|
|
||||||
#ifdef RCL_INDEX_STRIPCHARS
|
|
||||||
if (typ == ET_STEM) {
|
|
||||||
vector<string> exp;
|
|
||||||
StemDb db(m_ndb->xrdb);
|
|
||||||
if (!db.stemExpand(langs, term, exp))
|
|
||||||
return false;
|
|
||||||
res.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
|
||||||
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
|
||||||
it != res.entries.end(); it++) {
|
|
||||||
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
|
||||||
it->docs = xdb.get_termfreq(it->term),
|
|
||||||
xdb, m_reason);
|
|
||||||
if (!m_reason.empty())
|
|
||||||
return false;
|
|
||||||
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
|
|
||||||
}
|
|
||||||
if (!prefix.empty())
|
|
||||||
addPrefix(res.entries, prefix);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
RefCntr<StrMatcher> matcher;
|
RefCntr<StrMatcher> matcher;
|
||||||
if (typ == ET_REGEXP) {
|
if (typ == ET_REGEXP) {
|
||||||
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
||||||
|
@ -449,7 +406,6 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
||||||
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -572,7 +572,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
||||||
|
|
||||||
int termmatchsens = 0;
|
int termmatchsens = 0;
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
|
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
|
||||||
bool case_sensitive = (mods & SDCM_CASESENS) != 0;
|
bool case_sensitive = (mods & SDCM_CASESENS) != 0;
|
||||||
|
|
||||||
|
@ -616,7 +615,6 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
|
||||||
termmatchsens |= Db::ET_CASESENS;
|
termmatchsens |= Db::ET_CASESENS;
|
||||||
if (diac_sensitive)
|
if (diac_sensitive)
|
||||||
termmatchsens |= Db::ET_DIACSENS;
|
termmatchsens |= Db::ET_DIACSENS;
|
||||||
#endif
|
|
||||||
|
|
||||||
if (noexpansion) {
|
if (noexpansion) {
|
||||||
oexp.push_back(prefix + term);
|
oexp.push_back(prefix + term);
|
||||||
|
@ -936,9 +934,7 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
|
||||||
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
|
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
|
||||||
//tpcommon.onlygrams(true);
|
//tpcommon.onlygrams(true);
|
||||||
TermProcPrep tpprep(nxt);
|
TermProcPrep tpprep(nxt);
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (o_index_stripchars)
|
if (o_index_stripchars)
|
||||||
#endif
|
|
||||||
nxt = &tpprep;
|
nxt = &tpprep;
|
||||||
|
|
||||||
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
||||||
|
|
|
@ -63,7 +63,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
|
||||||
(void)expander.synExpand(term, result);
|
(void)expander.synExpand(term, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
if (!o_index_stripchars) {
|
if (!o_index_stripchars) {
|
||||||
string unac;
|
string unac;
|
||||||
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
|
unacmaybefold(term, unac, "UTF-8", UNACOP_UNAC);
|
||||||
|
@ -78,7 +77,6 @@ bool StemDb::stemExpand(const std::string& langs, const std::string& _term,
|
||||||
(void)expander.synExpand(unac, result);
|
(void)expander.synExpand(unac, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
if (result.empty())
|
if (result.empty())
|
||||||
result.push_back(term);
|
result.push_back(term);
|
||||||
|
|
|
@ -212,7 +212,6 @@ private:
|
||||||
// Lowercase accented stem to expansion. Family member name: language
|
// Lowercase accented stem to expansion. Family member name: language
|
||||||
static const std::string synFamStem("Stm");
|
static const std::string synFamStem("Stm");
|
||||||
|
|
||||||
#ifndef RCL_INDEX_STRIPCHARS
|
|
||||||
// Lowercase unaccented stem to expansion. Family member name: language
|
// Lowercase unaccented stem to expansion. Family member name: language
|
||||||
static const std::string synFamStemUnac("StU");
|
static const std::string synFamStemUnac("StU");
|
||||||
|
|
||||||
|
@ -220,7 +219,6 @@ static const std::string synFamStemUnac("StU");
|
||||||
// member, named "all". This set is used for separate case/diac
|
// member, named "all". This set is used for separate case/diac
|
||||||
// expansion by post-filtering the results of dual expansion.
|
// expansion by post-filtering the results of dual expansion.
|
||||||
static const std::string synFamDiCa("DCa");
|
static const std::string synFamDiCa("DCa");
|
||||||
#endif // !RCL_INDEX_STRIPCHARS
|
|
||||||
|
|
||||||
} // end namespace Rcl
|
} // end namespace Rcl
|
||||||
|
|
||||||
|
|
|
@ -580,7 +580,7 @@ int Pidfile::flopen()
|
||||||
{
|
{
|
||||||
const char *path = m_path.c_str();
|
const char *path = m_path.c_str();
|
||||||
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
|
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
|
||||||
m_reason = "Open failed";
|
m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue