From 1f9e9d200aedac29a9b6b573d98514db3dce52dc Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 14 Jan 2013 09:57:04 +0100 Subject: [PATCH] small fixups and compilation issues --- src/rcldb/rcldb.cpp | 16 +++++++--------- src/rcldb/searchdata.cpp | 13 ++++++++----- src/rcldb/searchdata.h | 13 ++++--------- src/rcldb/stemdb.cpp | 9 ++++++++- src/rcldb/synfamily.h | 2 ++ 5 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 8cef4821..2b6fee49 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1697,7 +1697,7 @@ static void addPrefix(vector& terms, const string& prefix) // If field is set, we return a list of appropriately prefixed terms (which // are going to be used to build a Xapian query). bool Db::termMatch(MatchType typ, const string &lang, - const string &root, + const string &_root, TermMatchResult& res, int max, const string& field) @@ -1714,15 +1714,14 @@ bool Db::termMatch(MatchType typ, const string &lang, if (!m_reason.empty()) return false; - // Get rid of capitals and accents - - string droot = root; + string droot = _root; + // If index is stripped, get rid of capitals and accents #ifndef RCL_INDEX_STRIPCHARS if (o_index_stripchars) #endif - if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) { - LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str())); + if (!unacmaybefold(_root, droot, "UTF-8", UNACOP_UNACFOLD)) { + LOGERR(("Db::termMatch: unac failed for [%s]\n", _root.c_str())); return false; } @@ -1742,7 +1741,7 @@ bool Db::termMatch(MatchType typ, const string &lang, res.prefix = prefix; if (typ == ET_STEM) { - if (!stemExpand(lang, root, res)) + if (!stemExpand(lang, droot, res)) return false; for (vector::iterator it = res.entries.begin(); it != res.entries.end(); it++) { @@ -1759,8 +1758,7 @@ bool Db::termMatch(MatchType typ, const string &lang, regex_t reg; int errcode; if (typ == ET_REGEXP) { - string mroot = droot; - if ((errcode = regcomp(®, mroot.c_str(), + if ((errcode = regcomp(®, droot.c_str(), REG_EXTENDED|REG_NOSUB))) { char errbuf[200]; regerror(errcode, ®, errbuf, 199); diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 694aab2d..bcfbf81f 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -606,10 +606,13 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, return true; } +#ifndef RCL_INDEX_STRIPCHARS // The case/diac expansion db SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD); XapComputableSynFamMember synac(db.m_ndb->xrdb, synFamDiCa, "all", &unacfoldtrans); +#endif // RCL_INDEX_STRIPCHARS + TermMatchResult res; if (haswild) { @@ -969,10 +972,11 @@ static int stringToMods(string& s) * count) */ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, - int mods, string &ermsg, - void *pq, int slack, bool useNear) + string &ermsg, void *pq, + int slack, bool useNear) { vector &pqueries(*(vector*)pq); + int mods = m_modifiers; LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x " "slack %d near %d\n", @@ -1094,7 +1098,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) } vector pqueries; - if (!processUserString(db, m_text, getModifiers(), m_reason, &pqueries)) + if (!processUserString(db, m_text, m_reason, &pqueries)) return false; if (pqueries.empty()) { LOGERR(("SearchDataClauseSimple: resolved to null query\n")); @@ -1185,8 +1189,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p) } string s = cstr_dquote + m_text + cstr_dquote; bool useNear = (m_tp == SCLT_NEAR); - if (!processUserString(db, s, getModifiers(), m_reason, &pqueries, - m_slack, useNear)) + if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear)) return false; if (pqueries.empty()) { LOGERR(("SearchDataClauseDist: resolved to null query\n")); diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index b3dc7b80..6747cc47 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -265,15 +265,9 @@ public: { m_modifiers = mod; } - virtual int getModifiers() - { - return m_modifiers; - } virtual void addModifier(Modifier mod) { - int imod = getModifiers(); - imod |= mod; - setModifiers(Modifier(imod)); + m_modifiers = Modifier(m_modifiers | mod); } virtual void setWeight(float w) { @@ -316,7 +310,7 @@ class SearchDataClauseSimple : public SearchDataClause { public: SearchDataClauseSimple(SClType tp, const std::string& txt, const std::string& fld = std::string()) - : SearchDataClause(tp), m_text(txt), m_field(fld) + : SearchDataClause(tp), m_text(txt), m_field(fld), m_curcl(0) { m_haveWildCards = (txt.find_first_of(cstr_minwilds) != std::string::npos); @@ -345,9 +339,10 @@ protected: std::string m_text; // Raw user entry text. std::string m_field; // Field specification if any HighlightData m_hldata; + // Current count of Xapian clauses, to check against expansion limit int m_curcl; - bool processUserString(Rcl::Db &db, const string &iq, int mods, + bool processUserString(Rcl::Db &db, const string &iq, std::string &ermsg, void* pq, int slack = 0, bool useNear = false); bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods, diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp index f2fd518b..e1e0d304 100644 --- a/src/rcldb/stemdb.cpp +++ b/src/rcldb/stemdb.cpp @@ -43,11 +43,18 @@ namespace Rcl { /** * Expand for one or several languages */ -bool StemDb::stemExpand(const std::string& langs, const std::string& term, +bool StemDb::stemExpand(const std::string& langs, const std::string& _term, vector& result) { vector llangs; stringToStrings(langs, llangs); + + // The stemdb keys may have kept their diacritics or not but they + // are always lower-case. It would be more logical for the term + // transformers to perform before doing the stemming, but this + // would be inefficient when there are several stemming languages + string term; + unacmaybefold(_term, term, "UTF-8", UNACOP_FOLD); for (vector::const_iterator it = llangs.begin(); it != llangs.end(); it++) { diff --git a/src/rcldb/synfamily.h b/src/rcldb/synfamily.h index c3689a8e..56923518 100644 --- a/src/rcldb/synfamily.h +++ b/src/rcldb/synfamily.h @@ -210,6 +210,7 @@ private: // Lowercase accented stem to expansion. Family member name: language static const std::string synFamStem("Stm"); +#ifndef RCL_INDEX_STRIPCHARS // Lowercase unaccented stem to expansion. Family member name: language static const std::string synFamStemUnac("StU"); @@ -217,6 +218,7 @@ static const std::string synFamStemUnac("StU"); // member, named "all". This set is used for separate case/diac // expansion by post-filtering the results of dual expansion. static const std::string synFamDiCa("DCa"); +#endif // !RCL_INDEX_STRIPCHARS } // end namespace Rcl