small code reorg

This commit is contained in:
Jean-Francois Dockes 2012-08-25 19:08:42 +02:00
parent b149fe1e8c
commit d1d6d65ef1
8 changed files with 242 additions and 72 deletions

View file

@ -3,7 +3,7 @@ depth = ..
include $(depth)/mk/sysconf
# Only test executables get build in here
PROGS = stoplist
PROGS = synfamily stoplist
all: $(BIGLIB) $(PROGS)
@ -19,6 +19,14 @@ trstoplist.o : stoplist.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_STOPLIST -c -o trstoplist.o \
stoplist.cpp
SYNFAMILY_OBJS= trsynfamily.o $(BIGLIB)
synfamily : $(SYNFAMILY_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o synfamily $(SYNFAMILY_OBJS) \
$(BIGLIB) $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)
trsynfamily.o : synfamily.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_SYNFAMILY -c -o trsynfamily.o \
synfamily.cpp
clean:
rm -f *.o $(PROGS)

View file

@ -24,39 +24,10 @@
#include "workqueue.h"
#endif // IDX_THREADS
#include "xapian.h"
#include "xmacros.h"
namespace Rcl {
// Generic Xapian exception catching code. We do this quite often,
// and I have no idea how to do this except for a macro
#define XCATCHERROR(MSG) \
catch (const Xapian::Error &e) { \
MSG = e.get_msg(); \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (const string &s) { \
MSG = s; \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (const char *s) { \
MSG = s; \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (...) { \
MSG = "Caught unknown xapian exception"; \
}
#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \
for (int tries = 0; tries < 2; tries++) { \
try { \
STMTTOTRY; \
ERSTR.erase(); \
break; \
} catch (const Xapian::DatabaseModifiedError &e) { \
ERSTR = e.get_msg(); \
XAPDB.reopen(); \
continue; \
} XCATCHERROR(ERSTR); \
break; \
}
class Query;
#ifdef IDX_THREADS
@ -143,16 +114,5 @@ class Db::Native {
};
// Xapian synonyms table abuse:
// The Xapian synonyms mechanisms can be put to many uses, but,
// unfortunately, it has a global name space (we'd like to be able to open
// different synonym tables, but there is only one).
// We use prefixes to create separate name spaces, in mostly the same way
// that they are used in the main index. See synfamily.h
// Prefixes are centrally defined here to avoid collisions
//
// Stem expansion family prefix. The family member name is the language
static const std::string synprefStem("Stm");
}
#endif /* _rcldb_p_h_included_ */

View file

@ -46,7 +46,7 @@ namespace StemDb {
vector<string> getLangs(Xapian::Database& xdb)
{
XapSynFamily fam(xdb, synprefStem);
XapSynFamily fam(xdb, synFamStem);
vector<string> langs;
(void)fam.getMembers(langs);
return langs;
@ -54,7 +54,7 @@ vector<string> getLangs(Xapian::Database& xdb)
bool deleteDb(Xapian::WritableDatabase& xdb, const string& lang)
{
XapWritableSynFamily fam(xdb, synprefStem);
XapWritableSynFamily fam(xdb, synFamStem);
return fam.deleteMember(lang);
}
@ -137,7 +137,7 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang)
LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n",
lang.c_str(), cron.secs()));
XapWritableSynFamily fam(xdb, synprefStem);
XapWritableSynFamily fam(xdb, synFamStem);
fam.createMember(lang);
for (map<string, vector<string> >::const_iterator it = assocs.begin();
@ -161,16 +161,6 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang)
return true;
}
static string stringlistdisp(const vector<string>& sl)
{
string s;
for (vector<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
s += "[" + *it + "] ";
if (!s.empty())
s.erase(s.length()-1);
return s;
}
/**
* Expand term to list of all terms which stem to the same term, for one
* expansion language
@ -186,7 +176,7 @@ static bool stemExpandOne(Xapian::Database& xdb,
LOGDEB(("stemExpand:%s: [%s] stem-> [%s]\n",
lang.c_str(), term.c_str(), stem.c_str()));
XapSynFamily fam(xdb, synprefStem);
XapSynFamily fam(xdb, synFamStem);
if (!fam.synExpand(lang, stem, result)) {
// ?
}
@ -199,7 +189,7 @@ static bool stemExpandOne(Xapian::Database& xdb,
result.push_back(stem);
}
LOGDEB0(("stemExpand:%s: %s -> %s\n", lang.c_str(), stem.c_str(),
stringlistdisp(result).c_str()));
stringsToString(result).c_str()));
} catch (...) {
LOGERR(("stemExpand: error accessing stem db. lang [%s]\n",

View file

@ -19,8 +19,7 @@
#include "autoconfig.h"
#include "debuglog.h"
#include "rcldb.h"
#include "rcldb_p.h"
#include "xmacros.h"
#include "synfamily.h"
#include <iostream>
@ -148,4 +147,145 @@ bool XapWritableSynFamily::addSynonyms(const string& membername,
}
#else // TEST_SYNFAMILY
#include "autoconfig.h"
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <strings.h>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
#include "xapian.h"
#include "smallut.h"
#include "pathut.h"
#include "xmacros.h"
#include "synfamily.h"
static string thisprog;
static int op_flags;
#define OPT_a 0x4
#define OPT_c 0x8
#define OPT_D 0x1
#define OPT_d 0x10
#define OPT_L 0x2
#define OPT_l 0x20
#define OPT_s 0x40
#define OPT_e 0x80
static string usage =
" -d <dbdir> {-s|-a|-c} database dir and synfamily: stem accents case\n"
" -l : list members\n"
" -L <member>: list entries for given member\n"
" -e <member> <key> : list expansion for given member and key\n"
" -D <member>: delete member\n"
" \n\n"
;
static void Usage(void)
{
cerr << thisprog << ": usage:\n" << usage;
exit(1);
}
int main(int argc, char **argv)
{
string dbdir(path_tildexpand("~/.recoll/xapiandb"));
string outencoding = "UTF-8";
string member;
string key;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'a': op_flags |= OPT_a; break;
case 'c': op_flags |= OPT_c; break;
case 'D': op_flags |= OPT_D; break;
case 'd': op_flags |= OPT_d; if (argc < 2) Usage();
dbdir = *(++argv); argc--;
goto b1;
case 'e': op_flags |= OPT_e; if (argc < 3) Usage();
member = *(++argv);argc--;
key = *(++argv); argc--;
goto b1;
case 'l': op_flags |= OPT_l; break;
case 'L': op_flags |= OPT_L; if (argc < 2) Usage();
member = *(++argv); argc--;
goto b1;
case 's': op_flags |= OPT_s; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc != 0)
Usage();
// We do stem only for now
string familyname;
if (op_flags & (OPT_a|OPT_c)) {
cerr << "Accents and case not ready" << endl;
return 1;
} else {
op_flags |= OPT_s;
familyname = Rcl::synFamStem;
}
if ((op_flags & (OPT_l|OPT_L|OPT_D|OPT_e)) == 0)
Usage();
string ermsg;
try {
if ((op_flags & (OPT_D)) == 0) { // Need write ?
Xapian::Database db(dbdir);
Rcl::XapSynFamily fam(db, familyname);
if (op_flags & OPT_l) {
vector<string> members;
if (!fam.getMembers(members)) {
cerr << "getMembers error" << endl;
return 1;
}
string out;
stringsToString(members, out);
cout << "Family: " << familyname << " Members: " << out << endl;
} else if (op_flags & OPT_L) {
fam.listMap(member);
} else if (op_flags & OPT_e) {
vector<string> exp;
if (!fam.synExpand(member, key, exp)) {
cerr << "expand error" << endl;
return 1;
}
string out;
stringsToString(exp, out);
cout << "Family: " << familyname << " Key: " << key
<< " Expansion: " << out << endl;
} else {
Usage();
}
} else {
Xapian::WritableDatabase db(dbdir, Xapian::DB_CREATE_OR_OPEN);
Rcl::XapWritableSynFamily fam(db, familyname);
if (op_flags & OPT_D) {
} else {
Usage();
}
}
} XCATCHERROR (ermsg);
if (!ermsg.empty()) {
cerr << "Xapian Exception: " << ermsg << endl;
return 1;
}
return 0;
}
#endif // TEST_SYNFAMILY

View file

@ -25,9 +25,9 @@
* can provide different applications each with a family of keyspaces.
* Two characters are reserved by the class and should not be used inside
* either family or member names: ':' and ';'
* A synonym key for family "stemdb", member "french", key "thisstem"
* A synonym key for family "stemdb", member "french", key "somestem"
* looks like:
* :stemdb:french:stem -> stem siblings
* :stemdb:french:somestem -> somestem expansions
* A special entry is used to list all the members for a family, e.g.:
* :stemdb;members -> french, english ...
*/
@ -47,29 +47,30 @@ public:
XapSynFamily(Xapian::Database xdb, const std::string& familyname)
: m_rdb(xdb)
{
m_prefix1 = string(":") + familyname;
m_prefix1 = std::string(":") + familyname;
}
/** Expand one term (e.g.: familier) inside one family number (e.g: french)
*/
bool synExpand(const std::string& fammember,
const std::string& term,
virtual bool synExpand(const std::string& fammember,
const std::string& key,
std::vector<std::string>& result);
/** Retrieve all members of this family (e.g: french english german...) */
bool getMembers(std::vector<std::string>&);
virtual bool getMembers(std::vector<std::string>&);
/** debug: list map for one member to stdout */
bool listMap(const std::string& fam);
virtual bool listMap(const std::string& fam);
protected:
Xapian::Database m_rdb;
std::string m_prefix1;
string entryprefix(const string& member)
virtual std::string entryprefix(const std::string& member)
{
return m_prefix1 + ":" + member + ":";
}
string memberskey()
virtual std::string memberskey()
{
return m_prefix1 + ";" + "members";
}
@ -86,22 +87,32 @@ public:
/** Delete all entries for one member (e.g. french), and remove from list
* of members */
bool deleteMember(const std::string& membername);
virtual bool deleteMember(const std::string& membername);
/** Add to list of members. Idempotent, does not affect actual expansions */
bool createMember(const std::string& membername);
virtual bool createMember(const std::string& membername);
/** Add expansion list for term inside family member (e.g., inside
* the french member, add expansion for familier -> familier,
* familierement, ... */
bool addSynonyms(const string& membername,
const string& term, const vector<string>& trans);
virtual bool addSynonyms(const std::string& membername,
const std::string& term,
const std::vector<std::string>& trans);
protected:
Xapian::WritableDatabase m_wdb;
};
//
// Prefixes are centrally defined here to avoid collisions
//
// Stem expansion family prefix. The family member name is the language
static const std::string synFamStem("Stm");
static const std::string synFamDiac("Dia");
static const std::string synFamCase("Cse");
}
#endif /* _SYNFAMILY_H_INCLUDED_ */

51
src/rcldb/xmacros.h Normal file
View file

@ -0,0 +1,51 @@
/* Copyright (C) 2007 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _xmacros_h_included_
#define _xmacros_h_included_
// Generic Xapian exception catching code. We do this quite often,
// and I have no idea how to do this except for a macro
#define XCATCHERROR(MSG) \
catch (const Xapian::Error &e) { \
MSG = e.get_msg(); \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (const string &s) { \
MSG = s; \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (const char *s) { \
MSG = s; \
if (MSG.empty()) MSG = "Empty error message"; \
} catch (...) { \
MSG = "Caught unknown xapian exception"; \
}
#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \
for (int tries = 0; tries < 2; tries++) { \
try { \
STMTTOTRY; \
ERSTR.erase(); \
break; \
} catch (const Xapian::DatabaseModifiedError &e) { \
ERSTR = e.get_msg(); \
XAPDB.reopen(); \
continue; \
} XCATCHERROR(ERSTR); \
break; \
}
#endif

View file

@ -321,6 +321,15 @@ template <class T> void stringsToString(const T &tokens, string &s)
template void stringsToString<list<string> >(const list<string> &, string &);
template void stringsToString<vector<string> >(const vector<string> &,string &);
template void stringsToString<set<string> >(const set<string> &, string &);
template <class T> string stringsToString(const T &tokens)
{
string out;
stringsToString<T>(tokens, out);
return out;
}
template string stringsToString<list<string> >(const list<string> &);
template string stringsToString<vector<string> >(const vector<string> &);
template string stringsToString<set<string> >(const set<string> &);
template <class T> void stringsToCSV(const T &tokens, string &s,
char sep)

View file

@ -90,6 +90,7 @@ template <class T> bool stringToStrings(const string& s, T &tokens,
* Inverse operation:
*/
template <class T> void stringsToString(const T &tokens, string &s);
template <class T> std::string stringsToString(const T &tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")