small code reorg
This commit is contained in:
parent
b149fe1e8c
commit
d1d6d65ef1
8 changed files with 242 additions and 72 deletions
|
@ -3,7 +3,7 @@ depth = ..
|
|||
include $(depth)/mk/sysconf
|
||||
|
||||
# Only test executables get build in here
|
||||
PROGS = stoplist
|
||||
PROGS = synfamily stoplist
|
||||
|
||||
all: $(BIGLIB) $(PROGS)
|
||||
|
||||
|
@ -19,6 +19,14 @@ trstoplist.o : stoplist.cpp
|
|||
$(CXX) $(ALL_CXXFLAGS) -DTEST_STOPLIST -c -o trstoplist.o \
|
||||
stoplist.cpp
|
||||
|
||||
SYNFAMILY_OBJS= trsynfamily.o $(BIGLIB)
|
||||
synfamily : $(SYNFAMILY_OBJS)
|
||||
$(CXX) $(ALL_CXXFLAGS) -o synfamily $(SYNFAMILY_OBJS) \
|
||||
$(BIGLIB) $(LIBICONV) $(LIBXAPIAN) $(LIBSYS)
|
||||
trsynfamily.o : synfamily.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -DTEST_SYNFAMILY -c -o trsynfamily.o \
|
||||
synfamily.cpp
|
||||
|
||||
clean:
|
||||
rm -f *.o $(PROGS)
|
||||
|
||||
|
|
|
@ -24,39 +24,10 @@
|
|||
#include "workqueue.h"
|
||||
#endif // IDX_THREADS
|
||||
#include "xapian.h"
|
||||
#include "xmacros.h"
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
#define XCATCHERROR(MSG) \
|
||||
catch (const Xapian::Error &e) { \
|
||||
MSG = e.get_msg(); \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const string &s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const char *s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (...) { \
|
||||
MSG = "Caught unknown xapian exception"; \
|
||||
}
|
||||
|
||||
#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \
|
||||
for (int tries = 0; tries < 2; tries++) { \
|
||||
try { \
|
||||
STMTTOTRY; \
|
||||
ERSTR.erase(); \
|
||||
break; \
|
||||
} catch (const Xapian::DatabaseModifiedError &e) { \
|
||||
ERSTR = e.get_msg(); \
|
||||
XAPDB.reopen(); \
|
||||
continue; \
|
||||
} XCATCHERROR(ERSTR); \
|
||||
break; \
|
||||
}
|
||||
|
||||
class Query;
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
|
@ -143,16 +114,5 @@ class Db::Native {
|
|||
|
||||
};
|
||||
|
||||
// Xapian synonyms table abuse:
|
||||
// The Xapian synonyms mechanisms can be put to many uses, but,
|
||||
// unfortunately, it has a global name space (we'd like to be able to open
|
||||
// different synonym tables, but there is only one).
|
||||
// We use prefixes to create separate name spaces, in mostly the same way
|
||||
// that they are used in the main index. See synfamily.h
|
||||
// Prefixes are centrally defined here to avoid collisions
|
||||
//
|
||||
// Stem expansion family prefix. The family member name is the language
|
||||
static const std::string synprefStem("Stm");
|
||||
|
||||
}
|
||||
#endif /* _rcldb_p_h_included_ */
|
||||
|
|
|
@ -46,7 +46,7 @@ namespace StemDb {
|
|||
|
||||
vector<string> getLangs(Xapian::Database& xdb)
|
||||
{
|
||||
XapSynFamily fam(xdb, synprefStem);
|
||||
XapSynFamily fam(xdb, synFamStem);
|
||||
vector<string> langs;
|
||||
(void)fam.getMembers(langs);
|
||||
return langs;
|
||||
|
@ -54,7 +54,7 @@ vector<string> getLangs(Xapian::Database& xdb)
|
|||
|
||||
bool deleteDb(Xapian::WritableDatabase& xdb, const string& lang)
|
||||
{
|
||||
XapWritableSynFamily fam(xdb, synprefStem);
|
||||
XapWritableSynFamily fam(xdb, synFamStem);
|
||||
return fam.deleteMember(lang);
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang)
|
|||
LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n",
|
||||
lang.c_str(), cron.secs()));
|
||||
|
||||
XapWritableSynFamily fam(xdb, synprefStem);
|
||||
XapWritableSynFamily fam(xdb, synFamStem);
|
||||
fam.createMember(lang);
|
||||
|
||||
for (map<string, vector<string> >::const_iterator it = assocs.begin();
|
||||
|
@ -161,16 +161,6 @@ bool createDb(Xapian::WritableDatabase& xdb, const string& lang)
|
|||
return true;
|
||||
}
|
||||
|
||||
static string stringlistdisp(const vector<string>& sl)
|
||||
{
|
||||
string s;
|
||||
for (vector<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
|
||||
s += "[" + *it + "] ";
|
||||
if (!s.empty())
|
||||
s.erase(s.length()-1);
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand term to list of all terms which stem to the same term, for one
|
||||
* expansion language
|
||||
|
@ -186,7 +176,7 @@ static bool stemExpandOne(Xapian::Database& xdb,
|
|||
LOGDEB(("stemExpand:%s: [%s] stem-> [%s]\n",
|
||||
lang.c_str(), term.c_str(), stem.c_str()));
|
||||
|
||||
XapSynFamily fam(xdb, synprefStem);
|
||||
XapSynFamily fam(xdb, synFamStem);
|
||||
if (!fam.synExpand(lang, stem, result)) {
|
||||
// ?
|
||||
}
|
||||
|
@ -199,7 +189,7 @@ static bool stemExpandOne(Xapian::Database& xdb,
|
|||
result.push_back(stem);
|
||||
}
|
||||
LOGDEB0(("stemExpand:%s: %s -> %s\n", lang.c_str(), stem.c_str(),
|
||||
stringlistdisp(result).c_str()));
|
||||
stringsToString(result).c_str()));
|
||||
|
||||
} catch (...) {
|
||||
LOGERR(("stemExpand: error accessing stem db. lang [%s]\n",
|
||||
|
|
|
@ -19,8 +19,7 @@
|
|||
#include "autoconfig.h"
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldb_p.h"
|
||||
#include "xmacros.h"
|
||||
#include "synfamily.h"
|
||||
|
||||
#include <iostream>
|
||||
|
@ -148,4 +147,145 @@ bool XapWritableSynFamily::addSynonyms(const string& membername,
|
|||
}
|
||||
|
||||
#else // TEST_SYNFAMILY
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
#include "xapian.h"
|
||||
|
||||
#include "smallut.h"
|
||||
#include "pathut.h"
|
||||
#include "xmacros.h"
|
||||
#include "synfamily.h"
|
||||
|
||||
static string thisprog;
|
||||
static int op_flags;
|
||||
#define OPT_a 0x4
|
||||
#define OPT_c 0x8
|
||||
#define OPT_D 0x1
|
||||
#define OPT_d 0x10
|
||||
#define OPT_L 0x2
|
||||
#define OPT_l 0x20
|
||||
#define OPT_s 0x40
|
||||
#define OPT_e 0x80
|
||||
static string usage =
|
||||
" -d <dbdir> {-s|-a|-c} database dir and synfamily: stem accents case\n"
|
||||
" -l : list members\n"
|
||||
" -L <member>: list entries for given member\n"
|
||||
" -e <member> <key> : list expansion for given member and key\n"
|
||||
" -D <member>: delete member\n"
|
||||
" \n\n"
|
||||
;
|
||||
static void Usage(void)
|
||||
{
|
||||
cerr << thisprog << ": usage:\n" << usage;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
string dbdir(path_tildexpand("~/.recoll/xapiandb"));
|
||||
string outencoding = "UTF-8";
|
||||
string member;
|
||||
string key;
|
||||
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'a': op_flags |= OPT_a; break;
|
||||
case 'c': op_flags |= OPT_c; break;
|
||||
case 'D': op_flags |= OPT_D; break;
|
||||
case 'd': op_flags |= OPT_d; if (argc < 2) Usage();
|
||||
dbdir = *(++argv); argc--;
|
||||
goto b1;
|
||||
case 'e': op_flags |= OPT_e; if (argc < 3) Usage();
|
||||
member = *(++argv);argc--;
|
||||
key = *(++argv); argc--;
|
||||
goto b1;
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
case 'L': op_flags |= OPT_L; if (argc < 2) Usage();
|
||||
member = *(++argv); argc--;
|
||||
goto b1;
|
||||
case 's': op_flags |= OPT_s; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
b1: argc--; argv++;
|
||||
}
|
||||
|
||||
if (argc != 0)
|
||||
Usage();
|
||||
|
||||
// We do stem only for now
|
||||
string familyname;
|
||||
if (op_flags & (OPT_a|OPT_c)) {
|
||||
cerr << "Accents and case not ready" << endl;
|
||||
return 1;
|
||||
} else {
|
||||
op_flags |= OPT_s;
|
||||
familyname = Rcl::synFamStem;
|
||||
}
|
||||
if ((op_flags & (OPT_l|OPT_L|OPT_D|OPT_e)) == 0)
|
||||
Usage();
|
||||
|
||||
string ermsg;
|
||||
try {
|
||||
if ((op_flags & (OPT_D)) == 0) { // Need write ?
|
||||
Xapian::Database db(dbdir);
|
||||
Rcl::XapSynFamily fam(db, familyname);
|
||||
if (op_flags & OPT_l) {
|
||||
vector<string> members;
|
||||
if (!fam.getMembers(members)) {
|
||||
cerr << "getMembers error" << endl;
|
||||
return 1;
|
||||
}
|
||||
string out;
|
||||
stringsToString(members, out);
|
||||
cout << "Family: " << familyname << " Members: " << out << endl;
|
||||
} else if (op_flags & OPT_L) {
|
||||
fam.listMap(member);
|
||||
} else if (op_flags & OPT_e) {
|
||||
vector<string> exp;
|
||||
if (!fam.synExpand(member, key, exp)) {
|
||||
cerr << "expand error" << endl;
|
||||
return 1;
|
||||
}
|
||||
string out;
|
||||
stringsToString(exp, out);
|
||||
cout << "Family: " << familyname << " Key: " << key
|
||||
<< " Expansion: " << out << endl;
|
||||
} else {
|
||||
Usage();
|
||||
}
|
||||
|
||||
} else {
|
||||
Xapian::WritableDatabase db(dbdir, Xapian::DB_CREATE_OR_OPEN);
|
||||
Rcl::XapWritableSynFamily fam(db, familyname);
|
||||
if (op_flags & OPT_D) {
|
||||
} else {
|
||||
Usage();
|
||||
}
|
||||
}
|
||||
} XCATCHERROR (ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
cerr << "Xapian Exception: " << ermsg << endl;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // TEST_SYNFAMILY
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
* can provide different applications each with a family of keyspaces.
|
||||
* Two characters are reserved by the class and should not be used inside
|
||||
* either family or member names: ':' and ';'
|
||||
* A synonym key for family "stemdb", member "french", key "thisstem"
|
||||
* A synonym key for family "stemdb", member "french", key "somestem"
|
||||
* looks like:
|
||||
* :stemdb:french:stem -> stem siblings
|
||||
* :stemdb:french:somestem -> somestem expansions
|
||||
* A special entry is used to list all the members for a family, e.g.:
|
||||
* :stemdb;members -> french, english ...
|
||||
*/
|
||||
|
@ -47,29 +47,30 @@ public:
|
|||
XapSynFamily(Xapian::Database xdb, const std::string& familyname)
|
||||
: m_rdb(xdb)
|
||||
{
|
||||
m_prefix1 = string(":") + familyname;
|
||||
m_prefix1 = std::string(":") + familyname;
|
||||
}
|
||||
|
||||
/** Expand one term (e.g.: familier) inside one family number (e.g: french)
|
||||
*/
|
||||
bool synExpand(const std::string& fammember,
|
||||
const std::string& term,
|
||||
virtual bool synExpand(const std::string& fammember,
|
||||
const std::string& key,
|
||||
std::vector<std::string>& result);
|
||||
|
||||
/** Retrieve all members of this family (e.g: french english german...) */
|
||||
bool getMembers(std::vector<std::string>&);
|
||||
virtual bool getMembers(std::vector<std::string>&);
|
||||
|
||||
/** debug: list map for one member to stdout */
|
||||
bool listMap(const std::string& fam);
|
||||
virtual bool listMap(const std::string& fam);
|
||||
|
||||
protected:
|
||||
Xapian::Database m_rdb;
|
||||
std::string m_prefix1;
|
||||
string entryprefix(const string& member)
|
||||
|
||||
virtual std::string entryprefix(const std::string& member)
|
||||
{
|
||||
return m_prefix1 + ":" + member + ":";
|
||||
}
|
||||
string memberskey()
|
||||
virtual std::string memberskey()
|
||||
{
|
||||
return m_prefix1 + ";" + "members";
|
||||
}
|
||||
|
@ -86,22 +87,32 @@ public:
|
|||
|
||||
/** Delete all entries for one member (e.g. french), and remove from list
|
||||
* of members */
|
||||
bool deleteMember(const std::string& membername);
|
||||
virtual bool deleteMember(const std::string& membername);
|
||||
|
||||
/** Add to list of members. Idempotent, does not affect actual expansions */
|
||||
bool createMember(const std::string& membername);
|
||||
virtual bool createMember(const std::string& membername);
|
||||
|
||||
/** Add expansion list for term inside family member (e.g., inside
|
||||
* the french member, add expansion for familier -> familier,
|
||||
* familierement, ... */
|
||||
bool addSynonyms(const string& membername,
|
||||
const string& term, const vector<string>& trans);
|
||||
virtual bool addSynonyms(const std::string& membername,
|
||||
const std::string& term,
|
||||
const std::vector<std::string>& trans);
|
||||
|
||||
protected:
|
||||
Xapian::WritableDatabase m_wdb;
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// Prefixes are centrally defined here to avoid collisions
|
||||
//
|
||||
// Stem expansion family prefix. The family member name is the language
|
||||
static const std::string synFamStem("Stm");
|
||||
static const std::string synFamDiac("Dia");
|
||||
static const std::string synFamCase("Cse");
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif /* _SYNFAMILY_H_INCLUDED_ */
|
||||
|
|
51
src/rcldb/xmacros.h
Normal file
51
src/rcldb/xmacros.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* Copyright (C) 2007 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef _xmacros_h_included_
|
||||
#define _xmacros_h_included_
|
||||
|
||||
// Generic Xapian exception catching code. We do this quite often,
|
||||
// and I have no idea how to do this except for a macro
|
||||
#define XCATCHERROR(MSG) \
|
||||
catch (const Xapian::Error &e) { \
|
||||
MSG = e.get_msg(); \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const string &s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (const char *s) { \
|
||||
MSG = s; \
|
||||
if (MSG.empty()) MSG = "Empty error message"; \
|
||||
} catch (...) { \
|
||||
MSG = "Caught unknown xapian exception"; \
|
||||
}
|
||||
|
||||
#define XAPTRY(STMTTOTRY, XAPDB, ERSTR) \
|
||||
for (int tries = 0; tries < 2; tries++) { \
|
||||
try { \
|
||||
STMTTOTRY; \
|
||||
ERSTR.erase(); \
|
||||
break; \
|
||||
} catch (const Xapian::DatabaseModifiedError &e) { \
|
||||
ERSTR = e.get_msg(); \
|
||||
XAPDB.reopen(); \
|
||||
continue; \
|
||||
} XCATCHERROR(ERSTR); \
|
||||
break; \
|
||||
}
|
||||
|
||||
#endif
|
|
@ -321,6 +321,15 @@ template <class T> void stringsToString(const T &tokens, string &s)
|
|||
template void stringsToString<list<string> >(const list<string> &, string &);
|
||||
template void stringsToString<vector<string> >(const vector<string> &,string &);
|
||||
template void stringsToString<set<string> >(const set<string> &, string &);
|
||||
template <class T> string stringsToString(const T &tokens)
|
||||
{
|
||||
string out;
|
||||
stringsToString<T>(tokens, out);
|
||||
return out;
|
||||
}
|
||||
template string stringsToString<list<string> >(const list<string> &);
|
||||
template string stringsToString<vector<string> >(const vector<string> &);
|
||||
template string stringsToString<set<string> >(const set<string> &);
|
||||
|
||||
template <class T> void stringsToCSV(const T &tokens, string &s,
|
||||
char sep)
|
||||
|
|
|
@ -90,6 +90,7 @@ template <class T> bool stringToStrings(const string& s, T &tokens,
|
|||
* Inverse operation:
|
||||
*/
|
||||
template <class T> void stringsToString(const T &tokens, string &s);
|
||||
template <class T> std::string stringsToString(const T &tokens);
|
||||
|
||||
/**
|
||||
* Strings to CSV string. tokens containing the separator are quoted (")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue