more delistification
This commit is contained in:
parent
a17b7523e7
commit
44732da80e
16 changed files with 92 additions and 96 deletions
|
@ -780,9 +780,9 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextSplit::stringToStrings(const string &s, list<string> &tokens)
|
bool TextSplit::stringToStrings(const string &s, vector<string> &tokens)
|
||||||
{
|
{
|
||||||
return u8stringToStrings<list<string> >(s, tokens);
|
return u8stringToStrings<vector<string> >(s, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // TEST driver ->
|
#else // TEST driver ->
|
||||||
|
|
|
@ -18,11 +18,11 @@
|
||||||
#define _TEXTSPLIT_H_INCLUDED_
|
#define _TEXTSPLIT_H_INCLUDED_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <vector>
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::list;
|
using std::vector;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class Utf8Iter;
|
class Utf8Iter;
|
||||||
|
@ -94,7 +94,7 @@ public:
|
||||||
* non-utf-8 input (iso-8859 config files work ok). This hopefully
|
* non-utf-8 input (iso-8859 config files work ok). This hopefully
|
||||||
* handles all Unicode whitespace, but needs correct utf-8 input
|
* handles all Unicode whitespace, but needs correct utf-8 input
|
||||||
*/
|
*/
|
||||||
static bool stringToStrings(const string &s, list<string> &tokens);
|
static bool stringToStrings(const string &s, vector<string> &tokens);
|
||||||
|
|
||||||
/** Is char CJK ? */
|
/** Is char CJK ? */
|
||||||
static bool isCJK(int c);
|
static bool isCJK(int c);
|
||||||
|
|
|
@ -310,7 +310,7 @@ bool ConfIndexer::createAspellDict()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> ConfIndexer::getStemmerNames()
|
vector<string> ConfIndexer::getStemmerNames()
|
||||||
{
|
{
|
||||||
return Rcl::Db::getStemmerNames();
|
return Rcl::Db::getStemmerNames();
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,7 +107,7 @@ class ConfIndexer {
|
||||||
bool createAspellDict();
|
bool createAspellDict();
|
||||||
|
|
||||||
/** List possible stemmer names */
|
/** List possible stemmer names */
|
||||||
static list<string> getStemmerNames();
|
static vector<string> getStemmerNames();
|
||||||
|
|
||||||
/** Index a list of files. No db cleaning or stemdb updating */
|
/** Index a list of files. No db cleaning or stemdb updating */
|
||||||
bool indexFiles(list<string> &files, IxFlag f = IxFNone);
|
bool indexFiles(list<string> &files, IxFlag f = IxFNone);
|
||||||
|
|
|
@ -35,6 +35,9 @@
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
using std::list;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rclmon.h"
|
#include "rclmon.h"
|
||||||
|
@ -501,7 +504,7 @@ bool startMonitor(RclConfig *conf, int opts)
|
||||||
// purge.
|
// purge.
|
||||||
deleted.push_back(ev.m_path);
|
deleted.push_back(ev.m_path);
|
||||||
if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) {
|
if (ev.evflags() & RclMonEvent::RCLEVT_ISDIR) {
|
||||||
list<string> paths;
|
vector<string> paths;
|
||||||
if (subtreelist(conf, ev.m_path, paths)) {
|
if (subtreelist(conf, ev.m_path, paths)) {
|
||||||
deleted.insert(deleted.end(),
|
deleted.insert(deleted.end(),
|
||||||
paths.begin(), paths.end());
|
paths.begin(), paths.end());
|
||||||
|
|
|
@ -394,8 +394,8 @@ int main(int argc, char **argv)
|
||||||
} else if (op_flags & OPT_l) {
|
} else if (op_flags & OPT_l) {
|
||||||
if (argc != 0)
|
if (argc != 0)
|
||||||
Usage();
|
Usage();
|
||||||
list<string> stemmers = ConfIndexer::getStemmerNames();
|
vector<string> stemmers = ConfIndexer::getStemmerNames();
|
||||||
for (list<string>::const_iterator it = stemmers.begin();
|
for (vector<string>::const_iterator it = stemmers.begin();
|
||||||
it != stemmers.end(); it++) {
|
it != stemmers.end(); it++) {
|
||||||
cout << *it << endl;
|
cout << *it << endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
|
|
||||||
bool subtreelist(RclConfig *config, const string& top,
|
bool subtreelist(RclConfig *config, const string& top,
|
||||||
list<string>& paths)
|
vector<string>& paths)
|
||||||
{
|
{
|
||||||
LOGDEB(("subtreelist: top: [%s]\n", top.c_str()));
|
LOGDEB(("subtreelist: top: [%s]\n", top.c_str()));
|
||||||
Rcl::Db rcldb(config);
|
Rcl::Db rcldb(config);
|
||||||
|
@ -65,7 +65,7 @@ bool subtreelist(RclConfig *config, const string& top,
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <list>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -115,12 +115,12 @@ int main(int argc, char **argv)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> paths;
|
vector<string> paths;
|
||||||
if (!subtreelist(config, top, paths)) {
|
if (!subtreelist(config, top, paths)) {
|
||||||
cerr << "subtreelist failed" << endl;
|
cerr << "subtreelist failed" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
for (list<string>::const_iterator it = paths.begin();
|
for (vector<string>::const_iterator it = paths.begin();
|
||||||
it != paths.end(); it++) {
|
it != paths.end(); it++) {
|
||||||
cout << *it << endl;
|
cout << *it << endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
#ifndef _SUBTREELIST_H_INCLUDED_
|
#ifndef _SUBTREELIST_H_INCLUDED_
|
||||||
#define _SUBTREELIST_H_INCLUDED_
|
#define _SUBTREELIST_H_INCLUDED_
|
||||||
|
|
||||||
#include <list>
|
#include <vector>
|
||||||
using std::list;
|
#include <string>
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
|
||||||
|
@ -28,6 +28,6 @@ class RclConfig;
|
||||||
// the real time indexer to purge entries when a top directory is
|
// the real time indexer to purge entries when a top directory is
|
||||||
// renamed. This is really convoluted, I'd like a better way.
|
// renamed. This is really convoluted, I'd like a better way.
|
||||||
extern bool subtreelist(RclConfig *config, const string& top,
|
extern bool subtreelist(RclConfig *config, const string& top,
|
||||||
list<string>& paths);
|
std::vector<std::string>& paths);
|
||||||
|
|
||||||
#endif /* _SUBTREELIST_H_INCLUDED_ */
|
#endif /* _SUBTREELIST_H_INCLUDED_ */
|
||||||
|
|
|
@ -184,9 +184,9 @@ ConfTopPanelW::ConfTopPanelW(QWidget *parent, ConfNull *config)
|
||||||
setSzPol(eskp, QSizePolicy::Preferred, QSizePolicy::Preferred, 1, 3);
|
setSzPol(eskp, QSizePolicy::Preferred, QSizePolicy::Preferred, 1, 3);
|
||||||
gl1->addWidget(eskp, 1, 0, 1, 2);
|
gl1->addWidget(eskp, 1, 0, 1, 2);
|
||||||
|
|
||||||
list<string> cstemlangs = Rcl::Db::getStemmerNames();
|
vector<string> cstemlangs = Rcl::Db::getStemmerNames();
|
||||||
QStringList stemlangs;
|
QStringList stemlangs;
|
||||||
for (list<string>::const_iterator it = cstemlangs.begin();
|
for (vector<string>::const_iterator it = cstemlangs.begin();
|
||||||
it != cstemlangs.end(); it++) {
|
it != cstemlangs.end(); it++) {
|
||||||
stemlangs.push_back(QString::fromUtf8(it->c_str()));
|
stemlangs.push_back(QString::fromUtf8(it->c_str()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1083,7 +1083,8 @@ void RclMain::showActiveTypes()
|
||||||
|
|
||||||
// Build the set of mtypes, stripping the prefix
|
// Build the set of mtypes, stripping the prefix
|
||||||
set<string> mtypesfromdb;
|
set<string> mtypesfromdb;
|
||||||
for (list<Rcl::TermMatchEntry>::const_iterator it = matches.entries.begin();
|
for (vector<Rcl::TermMatchEntry>::const_iterator it =
|
||||||
|
matches.entries.begin();
|
||||||
it != matches.entries.end(); it++) {
|
it != matches.entries.end(); it++) {
|
||||||
mtypesfromdb.insert(it->term.substr(prefix.size()));
|
mtypesfromdb.insert(it->term.substr(prefix.size()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -184,7 +184,7 @@ void SpellW::doExpand()
|
||||||
resTW->setItem(0, 0, new QTableWidgetItem(tr("No expansion found")));
|
resTW->setItem(0, 0, new QTableWidgetItem(tr("No expansion found")));
|
||||||
} else {
|
} else {
|
||||||
int row = 0;
|
int row = 0;
|
||||||
for (list<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
|
for (vector<Rcl::TermMatchEntry>::iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
|
LOGDEB(("SpellW::expand: %6d [%s]\n", it->wcf, it->term.c_str()));
|
||||||
char num[30];
|
char num[30];
|
||||||
|
|
|
@ -299,7 +299,7 @@ void SSearch::completion()
|
||||||
ok = true;
|
ok = true;
|
||||||
} else {
|
} else {
|
||||||
QStringList lst;
|
QStringList lst;
|
||||||
for (list<Rcl::TermMatchEntry>::iterator it = tmres.entries.begin();
|
for (vector<Rcl::TermMatchEntry>::iterator it = tmres.entries.begin();
|
||||||
it != tmres.entries.end(); it++) {
|
it != tmres.entries.end(); it++) {
|
||||||
lst.push_back(QString::fromUtf8(it->term.c_str()));
|
lst.push_back(QString::fromUtf8(it->term.c_str()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,7 +227,7 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove prefixes (caps) from a list of terms.
|
// Remove prefixes (caps) from terms.
|
||||||
static void noPrefixList(const vector<string>& in, vector<string>& out)
|
static void noPrefixList(const vector<string>& in, vector<string>& out)
|
||||||
{
|
{
|
||||||
for (vector<string>::const_iterator qit = in.begin();
|
for (vector<string>::const_iterator qit = in.begin();
|
||||||
|
@ -592,9 +592,9 @@ Db::~Db()
|
||||||
i_close(true);
|
i_close(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> Db::getStemmerNames()
|
vector<string> Db::getStemmerNames()
|
||||||
{
|
{
|
||||||
list<string> res;
|
vector<string> res;
|
||||||
stringToStrings(Xapian::Stem::get_available_languages(), res);
|
stringToStrings(Xapian::Stem::get_available_languages(), res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -652,7 +652,7 @@ bool Db::open(OpenMode mode, OpenError *error)
|
||||||
default:
|
default:
|
||||||
m_ndb->m_iswritable = false;
|
m_ndb->m_iswritable = false;
|
||||||
m_ndb->xrdb = Xapian::Database(dir);
|
m_ndb->xrdb = Xapian::Database(dir);
|
||||||
for (list<string>::iterator it = m_extraDbs.begin();
|
for (vector<string>::iterator it = m_extraDbs.begin();
|
||||||
it != m_extraDbs.end(); it++) {
|
it != m_extraDbs.end(); it++) {
|
||||||
if (error)
|
if (error)
|
||||||
*error = DbOpenExtraDb;
|
*error = DbOpenExtraDb;
|
||||||
|
@ -814,7 +814,7 @@ bool Db::rmQueryDb(const string &dir)
|
||||||
if (dir.empty()) {
|
if (dir.empty()) {
|
||||||
m_extraDbs.clear();
|
m_extraDbs.clear();
|
||||||
} else {
|
} else {
|
||||||
list<string>::iterator it = find(m_extraDbs.begin(),
|
vector<string>::iterator it = find(m_extraDbs.begin(),
|
||||||
m_extraDbs.end(), dir);
|
m_extraDbs.end(), dir);
|
||||||
if (it != m_extraDbs.end()) {
|
if (it != m_extraDbs.end()) {
|
||||||
m_extraDbs.erase(it);
|
m_extraDbs.erase(it);
|
||||||
|
@ -1454,7 +1454,7 @@ bool Db::needUpdate(const string &udi, const string& sig)
|
||||||
// Set the existence flag for all the subdocs (if any)
|
// Set the existence flag for all the subdocs (if any)
|
||||||
vector<Xapian::docid> docids;
|
vector<Xapian::docid> docids;
|
||||||
if (!m_ndb->subDocs(udi, docids)) {
|
if (!m_ndb->subDocs(udi, docids)) {
|
||||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
LOGERR(("Rcl::Db::needUpdate: can't get subdocs\n"));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||||
|
@ -1480,7 +1480,7 @@ bool Db::needUpdate(const string &udi, const string& sig)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Return list of existing stem db languages
|
// Return existing stem db languages
|
||||||
vector<string> Db::getStemLangs()
|
vector<string> Db::getStemLangs()
|
||||||
{
|
{
|
||||||
LOGDEB(("Db::getStemLang\n"));
|
LOGDEB(("Db::getStemLang\n"));
|
||||||
|
@ -1645,7 +1645,7 @@ bool Db::purgeFile(const string &udi, bool *existed)
|
||||||
}
|
}
|
||||||
|
|
||||||
// File name wild card expansion. This is a specialisation ot termMatch
|
// File name wild card expansion. This is a specialisation ot termMatch
|
||||||
bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
bool Db::filenameWildExp(const string& fnexp, vector<string>& names)
|
||||||
{
|
{
|
||||||
string pattern = fnexp;
|
string pattern = fnexp;
|
||||||
names.clear();
|
names.clear();
|
||||||
|
@ -1665,7 +1665,7 @@ bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
||||||
TermMatchResult result;
|
TermMatchResult result;
|
||||||
if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn))
|
if (!termMatch(ET_WILD, string(), pattern, result, 1000, Doc::keyfn))
|
||||||
return false;
|
return false;
|
||||||
for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||||
it != result.entries.end(); it++)
|
it != result.entries.end(); it++)
|
||||||
names.push_back(it->term);
|
names.push_back(it->term);
|
||||||
|
|
||||||
|
@ -1685,7 +1685,7 @@ bool Db::maxYearSpan(int *minyear, int *maxyear)
|
||||||
TermMatchResult result;
|
TermMatchResult result;
|
||||||
if (!termMatch(ET_WILD, string(), "*", result, 5000, "xapyear"))
|
if (!termMatch(ET_WILD, string(), "*", result, 5000, "xapyear"))
|
||||||
return false;
|
return false;
|
||||||
for (list<TermMatchEntry>::const_iterator it = result.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = result.entries.begin();
|
||||||
it != result.entries.end(); it++) {
|
it != result.entries.end(); it++) {
|
||||||
if (!it->term.empty()) {
|
if (!it->term.empty()) {
|
||||||
int year = atoi(it->term.c_str()+1);
|
int year = atoi(it->term.c_str()+1);
|
||||||
|
@ -1721,9 +1721,9 @@ public:
|
||||||
bool Db::stemExpand(const string &lang, const string &term,
|
bool Db::stemExpand(const string &lang, const string &term,
|
||||||
TermMatchResult& result, int max)
|
TermMatchResult& result, int max)
|
||||||
{
|
{
|
||||||
list<string> dirs = m_extraDbs;
|
vector<string> dirs(1, m_basedir);
|
||||||
dirs.push_front(m_basedir);
|
dirs.insert(dirs.end(), m_extraDbs.begin(), m_extraDbs.end());
|
||||||
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
for (vector<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
||||||
vector<string> more;
|
vector<string> more;
|
||||||
StemDb::stemExpand(*it, lang, term, more);
|
StemDb::stemExpand(*it, lang, term, more);
|
||||||
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
||||||
|
@ -1737,11 +1737,11 @@ bool Db::stemExpand(const string &lang, const string &term,
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add prefix to all strings in list */
|
/** Add prefix to all strings in list */
|
||||||
static void addPrefix(list<TermMatchEntry>& terms, const string& prefix)
|
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
||||||
{
|
{
|
||||||
if (prefix.empty())
|
if (prefix.empty())
|
||||||
return;
|
return;
|
||||||
for (list<TermMatchEntry>::iterator it = terms.begin();
|
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
||||||
it != terms.end(); it++)
|
it != terms.end(); it++)
|
||||||
it->term.insert(0, prefix);
|
it->term.insert(0, prefix);
|
||||||
}
|
}
|
||||||
|
@ -1795,9 +1795,9 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
if (typ == ET_STEM) {
|
if (typ == ET_STEM) {
|
||||||
if (!stemExpand(lang, root, res, max))
|
if (!stemExpand(lang, root, res, max))
|
||||||
return false;
|
return false;
|
||||||
res.entries.sort();
|
sort(res.entries.begin(), res.entries.end());
|
||||||
res.entries.unique();
|
unique(res.entries.begin(), res.entries.end());
|
||||||
for (list<TermMatchEntry>::iterator it = res.entries.begin();
|
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
||||||
it->docs = xdb.get_termfreq(it->term),
|
it->docs = xdb.get_termfreq(it->term),
|
||||||
|
@ -1884,11 +1884,11 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
}
|
}
|
||||||
|
|
||||||
TermMatchCmpByTerm tcmp;
|
TermMatchCmpByTerm tcmp;
|
||||||
res.entries.sort(tcmp);
|
sort(res.entries.begin(), res.entries.end(), tcmp);
|
||||||
TermMatchTermEqual teq;
|
TermMatchTermEqual teq;
|
||||||
res.entries.unique(teq);
|
unique(res.entries.begin(), res.entries.end(), teq);
|
||||||
TermMatchCmpByWcf wcmp;
|
TermMatchCmpByWcf wcmp;
|
||||||
res.entries.sort(wcmp);
|
sort(res.entries.begin(), res.entries.end(), wcmp);
|
||||||
if (max > 0) {
|
if (max > 0) {
|
||||||
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
|
@ -29,7 +28,6 @@
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::list;
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -80,8 +78,8 @@ public:
|
||||||
TermMatchEntry() : wcf(0) {}
|
TermMatchEntry() : wcf(0) {}
|
||||||
TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {}
|
TermMatchEntry(const string&t, int f, int d) : term(t), wcf(f), docs(d) {}
|
||||||
TermMatchEntry(const string&t) : term(t), wcf(0) {}
|
TermMatchEntry(const string&t) : term(t), wcf(0) {}
|
||||||
bool operator==(const TermMatchEntry &o) { return term == o.term;}
|
bool operator==(const TermMatchEntry &o) const { return term == o.term;}
|
||||||
bool operator<(const TermMatchEntry &o) { return term < o.term;}
|
bool operator<(const TermMatchEntry &o) const { return term < o.term;}
|
||||||
string term;
|
string term;
|
||||||
int wcf; // Total count of occurrences within collection.
|
int wcf; // Total count of occurrences within collection.
|
||||||
int docs; // Number of documents countaining term.
|
int docs; // Number of documents countaining term.
|
||||||
|
@ -91,7 +89,7 @@ class TermMatchResult {
|
||||||
public:
|
public:
|
||||||
TermMatchResult() {clear();}
|
TermMatchResult() {clear();}
|
||||||
void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;}
|
void clear() {entries.clear(); dbdoccount = 0; dbavgdoclen = 0;}
|
||||||
list<TermMatchEntry> entries;
|
vector<TermMatchEntry> entries;
|
||||||
unsigned int dbdoccount;
|
unsigned int dbdoccount;
|
||||||
double dbavgdoclen;
|
double dbavgdoclen;
|
||||||
};
|
};
|
||||||
|
@ -124,8 +122,11 @@ class Db {
|
||||||
/** Get explanation about last error */
|
/** Get explanation about last error */
|
||||||
string getReason() const {return m_reason;}
|
string getReason() const {return m_reason;}
|
||||||
|
|
||||||
/** List possible stemmer names */
|
/** Return all possible stemmer names */
|
||||||
static list<string> getStemmerNames();
|
static vector<string> getStemmerNames();
|
||||||
|
|
||||||
|
/** Return existing stemming databases */
|
||||||
|
vector<string> getStemLangs();
|
||||||
|
|
||||||
/** Test word for spelling correction candidate: not too long, no
|
/** Test word for spelling correction candidate: not too long, no
|
||||||
special chars... */
|
special chars... */
|
||||||
|
@ -139,8 +140,6 @@ class Db {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** List existing stemming databases */
|
|
||||||
std::vector<std::string> getStemLangs();
|
|
||||||
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
#ifdef TESTING_XAPIAN_SPELL
|
||||||
/** Return spelling suggestion */
|
/** Return spelling suggestion */
|
||||||
|
@ -148,7 +147,7 @@ class Db {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The next two, only for searchdata, should be somehow hidden */
|
/* The next two, only for searchdata, should be somehow hidden */
|
||||||
/* Return list of configured stop words */
|
/* Return configured stop words */
|
||||||
const StopList& getStopList() const {return m_stops;}
|
const StopList& getStopList() const {return m_stops;}
|
||||||
/* Field name to prefix translation (ie: author -> 'A') */
|
/* Field name to prefix translation (ie: author -> 'A') */
|
||||||
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp);
|
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp);
|
||||||
|
@ -201,7 +200,7 @@ class Db {
|
||||||
/** Tell if directory seems to hold xapian db */
|
/** Tell if directory seems to hold xapian db */
|
||||||
static bool testDbDir(const string &dir);
|
static bool testDbDir(const string &dir);
|
||||||
|
|
||||||
/** Return a list of index terms that match the input string
|
/** Return the index terms that match the input string
|
||||||
* Expansion is performed either with either wildcard or regexp processing
|
* Expansion is performed either with either wildcard or regexp processing
|
||||||
* Stem expansion is performed if lang is not empty */
|
* Stem expansion is performed if lang is not empty */
|
||||||
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
||||||
|
@ -215,7 +214,7 @@ class Db {
|
||||||
|
|
||||||
/** Special filename wildcard to XSFN terms expansion.
|
/** Special filename wildcard to XSFN terms expansion.
|
||||||
internal/searchdata use only */
|
internal/searchdata use only */
|
||||||
bool filenameWildExp(const string& exp, list<string>& names);
|
bool filenameWildExp(const string& exp, vector<string>& names);
|
||||||
|
|
||||||
/** Set parameters for synthetic abstract generation */
|
/** Set parameters for synthetic abstract generation */
|
||||||
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
|
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
|
||||||
|
@ -287,8 +286,8 @@ private:
|
||||||
int m_maxFsOccupPc;
|
int m_maxFsOccupPc;
|
||||||
// Database directory
|
// Database directory
|
||||||
string m_basedir;
|
string m_basedir;
|
||||||
// List of directories for additional databases to query
|
// Xapian directories for additional databases to query
|
||||||
list<string> m_extraDbs;
|
vector<string> m_extraDbs;
|
||||||
OpenMode m_mode;
|
OpenMode m_mode;
|
||||||
// File existence vector: this is filled during the indexing pass. Any
|
// File existence vector: this is filled during the indexing pass. Any
|
||||||
// document whose bit is not set at the end is purged
|
// document whose bit is not set at the end is purged
|
||||||
|
|
|
@ -364,7 +364,7 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||||
}
|
}
|
||||||
|
|
||||||
string field;
|
string field;
|
||||||
list<string> words;
|
vector<string> words;
|
||||||
// Walk the clause list. If we find any non simple clause or different
|
// Walk the clause list. If we find any non simple clause or different
|
||||||
// field names, bail out.
|
// field names, bail out.
|
||||||
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
||||||
|
@ -409,7 +409,7 @@ bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)
|
||||||
if (!doccnt)
|
if (!doccnt)
|
||||||
doccnt = 1;
|
doccnt = 1;
|
||||||
string swords;
|
string swords;
|
||||||
for (list<string>::iterator it = words.begin();
|
for (vector<string>::iterator it = words.begin();
|
||||||
it != words.end(); it++) {
|
it != words.end(); it++) {
|
||||||
double freq = double(db.termDocCnt(*it)) / doccnt;
|
double freq = double(db.termDocCnt(*it)) / doccnt;
|
||||||
if (freq < freqThreshold) {
|
if (freq < freqThreshold) {
|
||||||
|
@ -598,7 +598,7 @@ public:
|
||||||
|
|
||||||
bool processUserString(const string &iq,
|
bool processUserString(const string &iq,
|
||||||
string &ermsg,
|
string &ermsg,
|
||||||
list<Xapian::Query> &pqueries,
|
vector<Xapian::Query> &pqueries,
|
||||||
const StopList &stops,
|
const StopList &stops,
|
||||||
int slack = 0, bool useNear = false);
|
int slack = 0, bool useNear = false);
|
||||||
// After processing the string: return search terms and term
|
// After processing the string: return search terms and term
|
||||||
|
@ -616,13 +616,14 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void expandTerm(bool dont, const string& term, list<string>& exp,
|
void expandTerm(bool dont, const string& term, vector<string>& exp,
|
||||||
string& sterm, const string& prefix);
|
string& sterm, const string& prefix);
|
||||||
// After splitting entry on whitespace: process non-phrase element
|
// After splitting entry on whitespace: process non-phrase element
|
||||||
void processSimpleSpan(const string& span, bool nostemexp, list<Xapian::Query> &pqueries);
|
void processSimpleSpan(const string& span, bool nostemexp,
|
||||||
|
vector<Xapian::Query> &pqueries);
|
||||||
// Process phrase/near element
|
// Process phrase/near element
|
||||||
void processPhraseOrNear(TextSplitQ *splitData,
|
void processPhraseOrNear(TextSplitQ *splitData,
|
||||||
list<Xapian::Query> &pqueries,
|
vector<Xapian::Query> &pqueries,
|
||||||
bool useNear, int slack, int mods);
|
bool useNear, int slack, int mods);
|
||||||
|
|
||||||
Db& m_db;
|
Db& m_db;
|
||||||
|
@ -644,14 +645,6 @@ static void listVector(const string& what, const vector<string>&l)
|
||||||
}
|
}
|
||||||
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
||||||
}
|
}
|
||||||
static void listList(const string& what, const list<string>& l)
|
|
||||||
{
|
|
||||||
string a;
|
|
||||||
for (list<string>::const_iterator it = l.begin(); it != l.end(); it++) {
|
|
||||||
a = a + *it + " ";
|
|
||||||
}
|
|
||||||
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Expand stem and wildcards
|
/** Expand stem and wildcards
|
||||||
|
@ -668,7 +661,7 @@ static void listList(const string& what, const list<string>& l)
|
||||||
*/
|
*/
|
||||||
void StringToXapianQ::expandTerm(bool nostemexp,
|
void StringToXapianQ::expandTerm(bool nostemexp,
|
||||||
const string& term,
|
const string& term,
|
||||||
list<string>& exp,
|
vector<string>& exp,
|
||||||
string &sterm, const string& prefix)
|
string &sterm, const string& prefix)
|
||||||
{
|
{
|
||||||
LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",
|
LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",
|
||||||
|
@ -690,8 +683,8 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
||||||
if (nostemexp && !haswild) {
|
if (nostemexp && !haswild) {
|
||||||
sterm = term;
|
sterm = term;
|
||||||
m_uterms.push_back(sterm);
|
m_uterms.push_back(sterm);
|
||||||
exp.push_front(prefix + term);
|
|
||||||
exp.resize(1);
|
exp.resize(1);
|
||||||
|
exp[0] = prefix + term;
|
||||||
} else {
|
} else {
|
||||||
TermMatchResult res;
|
TermMatchResult res;
|
||||||
if (haswild) {
|
if (haswild) {
|
||||||
|
@ -703,7 +696,7 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
||||||
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
||||||
m_field);
|
m_field);
|
||||||
}
|
}
|
||||||
for (list<TermMatchEntry>::const_iterator it = res.entries.begin();
|
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
exp.push_back(it->term);
|
exp.push_back(it->term);
|
||||||
}
|
}
|
||||||
|
@ -746,11 +739,11 @@ void multiply_groups(vector<vector<string> >::const_iterator vvit,
|
||||||
}
|
}
|
||||||
|
|
||||||
void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
||||||
list<Xapian::Query> &pqueries)
|
vector<Xapian::Query> &pqueries)
|
||||||
{
|
{
|
||||||
LOGDEB2(("StringToXapianQ::processSimpleSpan: [%s] nostemexp %d\n",
|
LOGDEB2(("StringToXapianQ::processSimpleSpan: [%s] nostemexp %d\n",
|
||||||
span.c_str(), int(nostemexp)));
|
span.c_str(), int(nostemexp)));
|
||||||
list<string> exp;
|
vector<string> exp;
|
||||||
string sterm; // dumb version of user term
|
string sterm; // dumb version of user term
|
||||||
|
|
||||||
string prefix;
|
string prefix;
|
||||||
|
@ -762,7 +755,7 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
||||||
expandTerm(nostemexp, span, exp, sterm, prefix);
|
expandTerm(nostemexp, span, exp, sterm, prefix);
|
||||||
|
|
||||||
// m_terms is used for highlighting, we don't want prefixes in there.
|
// m_terms is used for highlighting, we don't want prefixes in there.
|
||||||
for (list<string>::const_iterator it = exp.begin();
|
for (vector<string>::const_iterator it = exp.begin();
|
||||||
it != exp.end(); it++) {
|
it != exp.end(); it++) {
|
||||||
m_terms.push_back(it->substr(prefix.size()));
|
m_terms.push_back(it->substr(prefix.size()));
|
||||||
}
|
}
|
||||||
|
@ -787,12 +780,12 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
||||||
// queries if the terms get expanded by stemming or wildcards (we
|
// queries if the terms get expanded by stemming or wildcards (we
|
||||||
// don't do stemming for PHRASE though)
|
// don't do stemming for PHRASE though)
|
||||||
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||||
list<Xapian::Query> &pqueries,
|
vector<Xapian::Query> &pqueries,
|
||||||
bool useNear, int slack, int mods)
|
bool useNear, int slack, int mods)
|
||||||
{
|
{
|
||||||
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
||||||
Xapian::Query::OP_PHRASE;
|
Xapian::Query::OP_PHRASE;
|
||||||
list<Xapian::Query> orqueries;
|
vector<Xapian::Query> orqueries;
|
||||||
bool hadmultiple = false;
|
bool hadmultiple = false;
|
||||||
vector<vector<string> >groups;
|
vector<vector<string> >groups;
|
||||||
|
|
||||||
|
@ -818,13 +811,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||||
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple;
|
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple;
|
||||||
|
|
||||||
string sterm;
|
string sterm;
|
||||||
list<string> exp;
|
vector<string> exp;
|
||||||
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
||||||
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size()));
|
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size()));
|
||||||
listList("", exp);
|
listVector("", exp);
|
||||||
// groups is used for highlighting, we don't want prefixes in there.
|
// groups is used for highlighting, we don't want prefixes in there.
|
||||||
vector<string> noprefs;
|
vector<string> noprefs;
|
||||||
for (list<string>::const_iterator it = exp.begin();
|
for (vector<string>::const_iterator it = exp.begin();
|
||||||
it != exp.end(); it++) {
|
it != exp.end(); it++) {
|
||||||
noprefs.push_back(it->substr(prefix.size()));
|
noprefs.push_back(it->substr(prefix.size()));
|
||||||
}
|
}
|
||||||
|
@ -894,7 +887,7 @@ static int stringToMods(string& s)
|
||||||
*/
|
*/
|
||||||
bool StringToXapianQ::processUserString(const string &iq,
|
bool StringToXapianQ::processUserString(const string &iq,
|
||||||
string &ermsg,
|
string &ermsg,
|
||||||
list<Xapian::Query> &pqueries,
|
vector<Xapian::Query> &pqueries,
|
||||||
const StopList& stops,
|
const StopList& stops,
|
||||||
int slack,
|
int slack,
|
||||||
bool useNear
|
bool useNear
|
||||||
|
@ -913,13 +906,13 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||||
// "words" are really phrases, this depends on separators:
|
// "words" are really phrases, this depends on separators:
|
||||||
// [paul@dom.net] would still be a word (span), but [about:me]
|
// [paul@dom.net] would still be a word (span), but [about:me]
|
||||||
// will probably be handled as a phrase.
|
// will probably be handled as a phrase.
|
||||||
list<string> phrases;
|
vector<string> phrases;
|
||||||
TextSplit::stringToStrings(iq, phrases);
|
TextSplit::stringToStrings(iq, phrases);
|
||||||
|
|
||||||
// Process each element: textsplit into terms, handle stem/wildcard
|
// Process each element: textsplit into terms, handle stem/wildcard
|
||||||
// expansion and transform into an appropriate Xapian::Query
|
// expansion and transform into an appropriate Xapian::Query
|
||||||
try {
|
try {
|
||||||
for (list<string>::iterator it = phrases.begin();
|
for (vector<string>::iterator it = phrases.begin();
|
||||||
it != phrases.end(); it++) {
|
it != phrases.end(); it++) {
|
||||||
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
||||||
int mods = stringToMods(*it);
|
int mods = stringToMods(*it);
|
||||||
|
@ -1006,7 +999,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||||
LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
|
LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
list<Xapian::Query> pqueries;
|
vector<Xapian::Query> pqueries;
|
||||||
|
|
||||||
// We normally boost the original term in the stem expansion list. Don't
|
// We normally boost the original term in the stem expansion list. Don't
|
||||||
// do it if there are wildcards anywhere, this would skew the results.
|
// do it if there are wildcards anywhere, this would skew the results.
|
||||||
|
@ -1046,12 +1039,12 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
||||||
Xapian::Query *qp = (Xapian::Query *)p;
|
Xapian::Query *qp = (Xapian::Query *)p;
|
||||||
*qp = Xapian::Query();
|
*qp = Xapian::Query();
|
||||||
|
|
||||||
list<string> patterns;
|
vector<string> patterns;
|
||||||
TextSplit::stringToStrings(m_text, patterns);
|
TextSplit::stringToStrings(m_text, patterns);
|
||||||
list<string> names;
|
vector<string> names;
|
||||||
for (list<string>::iterator it = patterns.begin();
|
for (vector<string>::iterator it = patterns.begin();
|
||||||
it != patterns.end(); it++) {
|
it != patterns.end(); it++) {
|
||||||
list<string> more;
|
vector<string> more;
|
||||||
db.filenameWildExp(*it, more);
|
db.filenameWildExp(*it, more);
|
||||||
Xapian::Query tq = Xapian::Query(Xapian::Query::OP_OR, more.begin(),
|
Xapian::Query tq = Xapian::Query(Xapian::Query::OP_OR, more.begin(),
|
||||||
more.end());
|
more.end());
|
||||||
|
@ -1076,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||||
Xapian::Query *qp = (Xapian::Query *)p;
|
Xapian::Query *qp = (Xapian::Query *)p;
|
||||||
*qp = Xapian::Query();
|
*qp = Xapian::Query();
|
||||||
|
|
||||||
list<Xapian::Query> pqueries;
|
vector<Xapian::Query> pqueries;
|
||||||
Xapian::Query nq;
|
Xapian::Query nq;
|
||||||
|
|
||||||
// We normally boost the original term in the stem expansion list. Don't
|
// We normally boost the original term in the stem expansion list. Don't
|
||||||
|
|
|
@ -90,13 +90,13 @@ p_notlowerascii(unsigned int c)
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool addAssoc(Xapian::WritableDatabase &sdb, const string& stem,
|
static bool addAssoc(Xapian::WritableDatabase &sdb, const string& stem,
|
||||||
const list<string>& derivs)
|
const vector<string>& derivs)
|
||||||
{
|
{
|
||||||
Xapian::Document newdocument;
|
Xapian::Document newdocument;
|
||||||
newdocument.add_term(stem);
|
newdocument.add_term(stem);
|
||||||
// The doc data is just parents=blank-separated-list
|
// The doc data is just parents=blank-separated-list
|
||||||
string record = "parents=";
|
string record = "parents=";
|
||||||
for (list<string>::const_iterator it = derivs.begin();
|
for (vector<string>::const_iterator it = derivs.begin();
|
||||||
it != derivs.end(); it++) {
|
it != derivs.end(); it++) {
|
||||||
record += *it + " ";
|
record += *it + " ";
|
||||||
}
|
}
|
||||||
|
@ -215,7 +215,7 @@ bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
|
||||||
|
|
||||||
// Enter pseud-docs in db by walking the multimap.
|
// Enter pseud-docs in db by walking the multimap.
|
||||||
string stem;
|
string stem;
|
||||||
list<string> derivs;
|
vector<string> derivs;
|
||||||
for (multimap<string,string>::const_iterator it = assocs.begin();
|
for (multimap<string,string>::const_iterator it = assocs.begin();
|
||||||
it != assocs.end(); it++) {
|
it != assocs.end(); it++) {
|
||||||
if (stem == it->first) {
|
if (stem == it->first) {
|
||||||
|
@ -350,9 +350,9 @@ bool stemExpand(const std::string& dbdir,
|
||||||
vector<string>& result)
|
vector<string>& result)
|
||||||
{
|
{
|
||||||
|
|
||||||
list<string> llangs;
|
vector<string> llangs;
|
||||||
stringToStrings(langs, llangs);
|
stringToStrings(langs, llangs);
|
||||||
for (list<string>::const_iterator it = llangs.begin();
|
for (vector<string>::const_iterator it = llangs.begin();
|
||||||
it != llangs.end(); it++) {
|
it != llangs.end(); it++) {
|
||||||
vector<string> oneexp;
|
vector<string> oneexp;
|
||||||
stemExpandOne(dbdir, *it, term, oneexp);
|
stemExpandOne(dbdir, *it, term, oneexp);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue