diff --git a/src/rcldb/Makefile b/src/rcldb/Makefile index 52f5f65f..bee50bc5 100644 --- a/src/rcldb/Makefile +++ b/src/rcldb/Makefile @@ -3,7 +3,7 @@ depth = .. include $(depth)/mk/sysconf # Only test executables get build in here -PROGS = stoplist +PROGS = synfamily stoplist all: $(BIGLIB) $(PROGS) @@ -19,6 +19,14 @@ trstoplist.o : stoplist.cpp $(CXX) $(ALL_CXXFLAGS) -DTEST_STOPLIST -c -o trstoplist.o \ stoplist.cpp +SYNFAMILY_OBJS= trsynfamily.o $(BIGLIB) +synfamily : $(SYNFAMILY_OBJS) + $(CXX) $(ALL_CXXFLAGS) -o synfamily $(SYNFAMILY_OBJS) \ + $(BIGLIB) $(LIBICONV) $(LIBXAPIAN) $(LIBSYS) +trsynfamily.o : synfamily.cpp + $(CXX) $(ALL_CXXFLAGS) -DTEST_SYNFAMILY -c -o trsynfamily.o \ + synfamily.cpp + clean: rm -f *.o $(PROGS) diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index a7ca0801..6fb64261 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -24,39 +24,10 @@ #include "workqueue.h" #endif // IDX_THREADS #include "xapian.h" +#include "xmacros.h" namespace Rcl { -// Generic Xapian exception catching code. We do this quite often, -// and I have no idea how to do this except for a macro -#define XCATCHERROR(MSG) \ - catch (const Xapian::Error &e) { \ - MSG = e.get_msg(); \ - if (MSG.empty()) MSG = "Empty error message"; \ - } catch (const string &s) { \ - MSG = s; \ - if (MSG.empty()) MSG = "Empty error message"; \ - } catch (const char *s) { \ - MSG = s; \ - if (MSG.empty()) MSG = "Empty error message"; \ - } catch (...) { \ - MSG = "Caught unknown xapian exception"; \ - } - -#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \ - for (int tries = 0; tries < 2; tries++) { \ - try { \ - STMTTOTRY; \ - ERSTR.erase(); \ - break; \ - } catch (const Xapian::DatabaseModifiedError &e) { \ - ERSTR = e.get_msg(); \ - XAPDB.reopen(); \ - continue; \ - } XCATCHERROR(ERSTR); \ - break; \ - } - class Query; #ifdef IDX_THREADS @@ -143,16 +114,5 @@ class Db::Native { }; -// Xapian synonyms table abuse: -// The Xapian synonyms mechanisms can be put to many uses, but, -// unfortunately, it has a global name space (we'd like to be able to open -// different synonym tables, but there is only one). -// We use prefixes to create separate name spaces, in mostly the same way -// that they are used in the main index. See synfamily.h -// Prefixes are centrally defined here to avoid collisions -// -// Stem expansion family prefix. The family member name is the language -static const std::string synprefStem("Stm"); - } #endif /* _rcldb_p_h_included_ */ diff --git a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp index beaf355a..b6ec5a8e 100644 --- a/src/rcldb/stemdb.cpp +++ b/src/rcldb/stemdb.cpp @@ -46,7 +46,7 @@ namespace StemDb { vector getLangs(Xapian::Database& xdb) { - XapSynFamily fam(xdb, synprefStem); + XapSynFamily fam(xdb, synFamStem); vector langs; (void)fam.getMembers(langs); return langs; @@ -54,7 +54,7 @@ vector getLangs(Xapian::Database& xdb) bool deleteDb(Xapian::WritableDatabase& xdb, const string& lang) { - XapWritableSynFamily fam(xdb, synprefStem); + XapWritableSynFamily fam(xdb, synFamStem); return fam.deleteMember(lang); } @@ -137,7 +137,7 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang) LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n", lang.c_str(), cron.secs())); - XapWritableSynFamily fam(xdb, synprefStem); + XapWritableSynFamily fam(xdb, synFamStem); fam.createMember(lang); for (map >::const_iterator it = assocs.begin(); @@ -161,16 +161,6 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang) return true; } -static string stringlistdisp(const vector& sl) -{ - string s; - for (vector::const_iterator it = sl.begin(); it!= sl.end(); it++) - s += "[" + *it + "] "; - if (!s.empty()) - s.erase(s.length()-1); - return s; -} - /** * Expand term to list of all terms which stem to the same term, for one * expansion language @@ -186,7 +176,7 @@ static bool stemExpandOne(Xapian::Database& xdb, LOGDEB(("stemExpand:%s: [%s] stem-> [%s]\n", lang.c_str(), term.c_str(), stem.c_str())); - XapSynFamily fam(xdb, synprefStem); + XapSynFamily fam(xdb, synFamStem); if (!fam.synExpand(lang, stem, result)) { // ? } @@ -199,7 +189,7 @@ static bool stemExpandOne(Xapian::Database& xdb, result.push_back(stem); } LOGDEB0(("stemExpand:%s: %s -> %s\n", lang.c_str(), stem.c_str(), - stringlistdisp(result).c_str())); + stringsToString(result).c_str())); } catch (...) { LOGERR(("stemExpand: error accessing stem db. lang [%s]\n", diff --git a/src/rcldb/synfamily.cpp b/src/rcldb/synfamily.cpp index 5027ba14..fd8ae16b 100644 --- a/src/rcldb/synfamily.cpp +++ b/src/rcldb/synfamily.cpp @@ -19,8 +19,7 @@ #include "autoconfig.h" #include "debuglog.h" -#include "rcldb.h" -#include "rcldb_p.h" +#include "xmacros.h" #include "synfamily.h" #include @@ -148,4 +147,145 @@ bool XapWritableSynFamily::addSynonyms(const string& membername, } #else // TEST_SYNFAMILY +#include "autoconfig.h" + +#include +#include +#include +#include + +#include +#include +#include +using namespace std; + +#include "xapian.h" + +#include "smallut.h" +#include "pathut.h" +#include "xmacros.h" +#include "synfamily.h" + +static string thisprog; +static int op_flags; +#define OPT_a 0x4 +#define OPT_c 0x8 +#define OPT_D 0x1 +#define OPT_d 0x10 +#define OPT_L 0x2 +#define OPT_l 0x20 +#define OPT_s 0x40 +#define OPT_e 0x80 +static string usage = + " -d {-s|-a|-c} database dir and synfamily: stem accents case\n" + " -l : list members\n" + " -L : list entries for given member\n" + " -e : list expansion for given member and key\n" + " -D : delete member\n" + " \n\n" + ; +static void Usage(void) +{ + cerr << thisprog << ": usage:\n" << usage; + exit(1); +} + +int main(int argc, char **argv) +{ + string dbdir(path_tildexpand("~/.recoll/xapiandb")); + string outencoding = "UTF-8"; + string member; + string key; + + thisprog = argv[0]; + argc--; argv++; + + while (argc > 0 && **argv == '-') { + (*argv)++; + if (!(**argv)) + /* Cas du "adb - core" */ + Usage(); + while (**argv) + switch (*(*argv)++) { + case 'a': op_flags |= OPT_a; break; + case 'c': op_flags |= OPT_c; break; + case 'D': op_flags |= OPT_D; break; + case 'd': op_flags |= OPT_d; if (argc < 2) Usage(); + dbdir = *(++argv); argc--; + goto b1; + case 'e': op_flags |= OPT_e; if (argc < 3) Usage(); + member = *(++argv);argc--; + key = *(++argv); argc--; + goto b1; + case 'l': op_flags |= OPT_l; break; + case 'L': op_flags |= OPT_L; if (argc < 2) Usage(); + member = *(++argv); argc--; + goto b1; + case 's': op_flags |= OPT_s; break; + default: Usage(); break; + } + b1: argc--; argv++; + } + + if (argc != 0) + Usage(); + + // We do stem only for now + string familyname; + if (op_flags & (OPT_a|OPT_c)) { + cerr << "Accents and case not ready" << endl; + return 1; + } else { + op_flags |= OPT_s; + familyname = Rcl::synFamStem; + } + if ((op_flags & (OPT_l|OPT_L|OPT_D|OPT_e)) == 0) + Usage(); + + string ermsg; + try { + if ((op_flags & (OPT_D)) == 0) { // Need write ? + Xapian::Database db(dbdir); + Rcl::XapSynFamily fam(db, familyname); + if (op_flags & OPT_l) { + vector members; + if (!fam.getMembers(members)) { + cerr << "getMembers error" << endl; + return 1; + } + string out; + stringsToString(members, out); + cout << "Family: " << familyname << " Members: " << out << endl; + } else if (op_flags & OPT_L) { + fam.listMap(member); + } else if (op_flags & OPT_e) { + vector exp; + if (!fam.synExpand(member, key, exp)) { + cerr << "expand error" << endl; + return 1; + } + string out; + stringsToString(exp, out); + cout << "Family: " << familyname << " Key: " << key + << " Expansion: " << out << endl; + } else { + Usage(); + } + + } else { + Xapian::WritableDatabase db(dbdir, Xapian::DB_CREATE_OR_OPEN); + Rcl::XapWritableSynFamily fam(db, familyname); + if (op_flags & OPT_D) { + } else { + Usage(); + } + } + } XCATCHERROR (ermsg); + if (!ermsg.empty()) { + cerr << "Xapian Exception: " << ermsg << endl; + return 1; + } + return 0; +} + #endif // TEST_SYNFAMILY diff --git a/src/rcldb/synfamily.h b/src/rcldb/synfamily.h index 9d45753f..36e1470b 100644 --- a/src/rcldb/synfamily.h +++ b/src/rcldb/synfamily.h @@ -25,9 +25,9 @@ * can provide different applications each with a family of keyspaces. * Two characters are reserved by the class and should not be used inside * either family or member names: ':' and ';' - * A synonym key for family "stemdb", member "french", key "thisstem" + * A synonym key for family "stemdb", member "french", key "somestem" * looks like: - * :stemdb:french:stem -> stem siblings + * :stemdb:french:somestem -> somestem expansions * A special entry is used to list all the members for a family, e.g.: * :stemdb;members -> french, english ... */ @@ -47,29 +47,30 @@ public: XapSynFamily(Xapian::Database xdb, const std::string& familyname) : m_rdb(xdb) { - m_prefix1 = string(":") + familyname; + m_prefix1 = std::string(":") + familyname; } /** Expand one term (e.g.: familier) inside one family number (e.g: french) */ - bool synExpand(const std::string& fammember, - const std::string& term, + virtual bool synExpand(const std::string& fammember, + const std::string& key, std::vector& result); /** Retrieve all members of this family (e.g: french english german...) */ - bool getMembers(std::vector&); + virtual bool getMembers(std::vector&); /** debug: list map for one member to stdout */ - bool listMap(const std::string& fam); + virtual bool listMap(const std::string& fam); protected: Xapian::Database m_rdb; std::string m_prefix1; - string entryprefix(const string& member) + + virtual std::string entryprefix(const std::string& member) { return m_prefix1 + ":" + member + ":"; } - string memberskey() + virtual std::string memberskey() { return m_prefix1 + ";" + "members"; } @@ -86,22 +87,32 @@ public: /** Delete all entries for one member (e.g. french), and remove from list * of members */ - bool deleteMember(const std::string& membername); + virtual bool deleteMember(const std::string& membername); /** Add to list of members. Idempotent, does not affect actual expansions */ - bool createMember(const std::string& membername); + virtual bool createMember(const std::string& membername); /** Add expansion list for term inside family member (e.g., inside * the french member, add expansion for familier -> familier, * familierement, ... */ - bool addSynonyms(const string& membername, - const string& term, const vector& trans); + virtual bool addSynonyms(const std::string& membername, + const std::string& term, + const std::vector& trans); protected: Xapian::WritableDatabase m_wdb; }; +// +// Prefixes are centrally defined here to avoid collisions +// +// Stem expansion family prefix. The family member name is the language +static const std::string synFamStem("Stm"); +static const std::string synFamDiac("Dia"); +static const std::string synFamCase("Cse"); + + } #endif /* _SYNFAMILY_H_INCLUDED_ */ diff --git a/src/rcldb/xmacros.h b/src/rcldb/xmacros.h new file mode 100644 index 00000000..030cc8fb --- /dev/null +++ b/src/rcldb/xmacros.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2007 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _xmacros_h_included_ +#define _xmacros_h_included_ + +// Generic Xapian exception catching code. We do this quite often, +// and I have no idea how to do this except for a macro +#define XCATCHERROR(MSG) \ + catch (const Xapian::Error &e) { \ + MSG = e.get_msg(); \ + if (MSG.empty()) MSG = "Empty error message"; \ + } catch (const string &s) { \ + MSG = s; \ + if (MSG.empty()) MSG = "Empty error message"; \ + } catch (const char *s) { \ + MSG = s; \ + if (MSG.empty()) MSG = "Empty error message"; \ + } catch (...) { \ + MSG = "Caught unknown xapian exception"; \ + } + +#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \ + for (int tries = 0; tries < 2; tries++) { \ + try { \ + STMTTOTRY; \ + ERSTR.erase(); \ + break; \ + } catch (const Xapian::DatabaseModifiedError &e) { \ + ERSTR = e.get_msg(); \ + XAPDB.reopen(); \ + continue; \ + } XCATCHERROR(ERSTR); \ + break; \ + } + +#endif diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 2c3d65c2..701d7b2d 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -321,6 +321,15 @@ template void stringsToString(const T &tokens, string &s) template void stringsToString >(const list &, string &); template void stringsToString >(const vector &,string &); template void stringsToString >(const set &, string &); +template string stringsToString(const T &tokens) +{ + string out; + stringsToString(tokens, out); + return out; +} +template string stringsToString >(const list &); +template string stringsToString >(const vector &); +template string stringsToString >(const set &); template void stringsToCSV(const T &tokens, string &s, char sep) diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 715f572f..424953ae 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -90,6 +90,7 @@ template bool stringToStrings(const string& s, T &tokens, * Inverse operation: */ template void stringsToString(const T &tokens, string &s); +template std::string stringsToString(const T &tokens); /** * Strings to CSV string. tokens containing the separator are quoted (")