more config isolation. Still crashing
This commit is contained in:
parent
45d56f17de
commit
bc94f9f83f
8 changed files with 66 additions and 67 deletions
|
@ -219,7 +219,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
|||
if (!stringlowercmp("bookmark", hittype)) {
|
||||
// Just index the dotdoc
|
||||
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
return m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc);
|
||||
return m_db->addOrUpdate(udi, cstr_null, dotdoc);
|
||||
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
|
||||
(dotdoc.mimetype.compare("text/html") &&
|
||||
dotdoc.mimetype.compare(cstr_textplain))) {
|
||||
|
@ -249,7 +249,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
|||
doc.pcbytes = dotdoc.pcbytes;
|
||||
doc.sig.clear();
|
||||
doc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
return m_db->addOrUpdate(m_config, udi, cstr_null, doc);
|
||||
return m_db->addOrUpdate(udi, cstr_null, doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -420,7 +420,7 @@ BeagleQueueIndexer::processone(const string &path,
|
|||
dotdoc.sig.clear();
|
||||
|
||||
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
if (!m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc))
|
||||
if (!m_db->addOrUpdate(udi, cstr_null, dotdoc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
|
||||
|
@ -467,7 +467,7 @@ BeagleQueueIndexer::processone(const string &path,
|
|||
doc.url = dotdoc.url;
|
||||
|
||||
doc.meta[Rcl::Doc::keybcknd] = "BGL";
|
||||
if (!m_db->addOrUpdate(m_config, udi, cstr_null, doc))
|
||||
if (!m_db->addOrUpdate(udi, cstr_null, doc))
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
|
||||
|
|
|
@ -59,24 +59,24 @@ using namespace std;
|
|||
#ifdef IDX_THREADS
|
||||
class DbUpdTask {
|
||||
public:
|
||||
DbUpdTask(RclConfig *cnf, const string& u, const string& p,
|
||||
const Rcl::Doc& d)
|
||||
: udi(u), parent_udi(p), doc(d), config(cnf)
|
||||
DbUpdTask(const string& u, const string& p, const Rcl::Doc& d)
|
||||
: udi(u), parent_udi(p), doc(d)
|
||||
{}
|
||||
string udi;
|
||||
string parent_udi;
|
||||
Rcl::Doc doc;
|
||||
RclConfig *config;
|
||||
};
|
||||
extern void *FsIndexerDbUpdWorker(void*);
|
||||
|
||||
class InternfileTask {
|
||||
public:
|
||||
InternfileTask(const std::string &f, const struct stat *i_stp)
|
||||
: fn(f), statbuf(*i_stp)
|
||||
InternfileTask(const std::string &f, const struct stat *i_stp,
|
||||
map<string,string> lfields)
|
||||
: fn(f), statbuf(*i_stp), localfields(lfields)
|
||||
{}
|
||||
string fn;
|
||||
struct stat statbuf;
|
||||
map<string,string> localfields;
|
||||
};
|
||||
extern void *FsIndexerInternfileWorker(void*);
|
||||
#endif // IDX_THREADS
|
||||
|
@ -110,6 +110,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
|
|||
m_havelocalfields = m_config->hasNameAnywhere("localfields");
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
m_stableconfig = new RclConfig(*m_config);
|
||||
m_loglevel = DebugLog::getdbl()->getlevel();
|
||||
m_haveInternQ = m_haveSplitQ = false;
|
||||
int internqlen = cnf->getThrConf(RclConfig::ThrIntern).first;
|
||||
|
@ -152,6 +153,7 @@ FsIndexer::~FsIndexer()
|
|||
LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n",
|
||||
long(status)));
|
||||
}
|
||||
delete m_stableconfig;
|
||||
#endif // IDX_THREADS
|
||||
|
||||
delete m_missing;
|
||||
|
@ -178,7 +180,7 @@ bool FsIndexer::index()
|
|||
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker locker(m_mutex);
|
||||
PTMutexLocker locker(m_updater->m_mutex);
|
||||
#endif
|
||||
m_updater->status.reset();
|
||||
m_updater->status.dbtotdocs = m_db->docCnt();
|
||||
|
@ -293,6 +295,10 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
|
|||
if (!init())
|
||||
return false;
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
// We use an FsTreeWalker just for handling the skipped path/name lists
|
||||
FsTreeWalker walker;
|
||||
walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||
|
@ -323,10 +329,6 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
|
|||
it++; continue;
|
||||
}
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR(("FsIndexer::indexFiles: processone failed\n"));
|
||||
|
@ -383,10 +385,10 @@ void FsIndexer::localfieldsfromconf()
|
|||
}
|
||||
|
||||
//
|
||||
void FsIndexer::setlocalfields(Rcl::Doc& doc)
|
||||
void FsIndexer::setlocalfields(map<string, string> fields, Rcl::Doc& doc)
|
||||
{
|
||||
for (map<string, string>::const_iterator it = m_localfields.begin();
|
||||
it != m_localfields.end(); it++) {
|
||||
for (map<string, string>::const_iterator it = fields.begin();
|
||||
it != fields.end(); it++) {
|
||||
// Should local fields override those coming from the document
|
||||
// ? I think not, but not too sure
|
||||
if (doc.meta.find(it->second) == doc.meta.end()) {
|
||||
|
@ -422,8 +424,7 @@ void *FsIndexerDbUpdWorker(void * fsp)
|
|||
return (void*)1;
|
||||
}
|
||||
LOGDEB0(("FsIndexerDbUpdWorker: task ql %d\n", int(qsz)));
|
||||
if (!fip->m_db->addOrUpdate(tsk->config, tsk->udi, tsk->parent_udi,
|
||||
tsk->doc)) {
|
||||
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
|
||||
LOGERR(("FsIndexerDbUpdWorker: addOrUpdate failed\n"));
|
||||
tqp->workerExit();
|
||||
return (void*)0;
|
||||
|
@ -439,7 +440,7 @@ void *FsIndexerInternfileWorker(void * fsp)
|
|||
WorkQueue<InternfileTask*> *tqp = &fip->m_iwqueue;
|
||||
DebugLog::getdbl()->setloglevel(fip->m_loglevel);
|
||||
TempDir tmpdir;
|
||||
RclConfig *myconf = new RclConfig(*(fip->m_config));
|
||||
RclConfig myconf(*(fip->m_stableconfig));
|
||||
|
||||
InternfileTask *tsk;
|
||||
for (;;) {
|
||||
|
@ -448,7 +449,8 @@ void *FsIndexerInternfileWorker(void * fsp)
|
|||
return (void*)1;
|
||||
}
|
||||
LOGDEB0(("FsIndexerInternfileWorker: task fn %s\n", tsk->fn.c_str()));
|
||||
if (fip->processonefile(myconf, tmpdir, tsk->fn, &tsk->statbuf) !=
|
||||
if (fip->processonefile(&myconf, tmpdir, tsk->fn, &tsk->statbuf,
|
||||
tsk->localfields) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR(("FsIndexerInternfileWorker: processone failed\n"));
|
||||
tqp->workerExit();
|
||||
|
@ -477,7 +479,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
{
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker locker(m_mutex);
|
||||
PTMutexLocker locker(m_updater->m_mutex);
|
||||
#endif
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
|
@ -493,10 +495,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
// Set up skipped patterns for this subtree.
|
||||
m_walker.setSkippedNames(m_config->getSkippedNames());
|
||||
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db->setAbstractParams(abslen, -1, -1);
|
||||
|
||||
// Adjust local fields from config for this subtree
|
||||
if (m_havelocalfields)
|
||||
localfieldsfromconf();
|
||||
|
@ -507,7 +505,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ) {
|
||||
InternfileTask *tp = new InternfileTask(fn, stp);
|
||||
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
|
||||
if (m_iwqueue.put(tp)) {
|
||||
return FsTreeWalker::FtwOk;
|
||||
} else {
|
||||
|
@ -516,20 +514,15 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||
}
|
||||
#endif
|
||||
|
||||
return processonefile(m_config, m_tmpdir, fn, stp);
|
||||
|
||||
return processonefile(m_config, m_tmpdir, fn, stp, m_localfields);
|
||||
}
|
||||
|
||||
|
||||
FsTreeWalker::Status
|
||||
FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
||||
const std::string &fn, const struct stat *stp)
|
||||
const std::string &fn, const struct stat *stp,
|
||||
map<string, string> localfields)
|
||||
{
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
config->setKeyDir(path_getfather(fn));
|
||||
#endif
|
||||
|
||||
////////////////////
|
||||
// Check db up to date ? Doing this before file type
|
||||
// identification means that, if usesystemfilecommand is switched
|
||||
|
@ -551,7 +544,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
LOGDEB0(("processone: up to date: %s\n", fn.c_str()));
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker locker(m_mutex);
|
||||
PTMutexLocker locker(m_updater->m_mutex);
|
||||
#endif
|
||||
// Status bar update, abort request etc.
|
||||
m_updater->status.fn = fn;
|
||||
|
@ -579,7 +572,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
// Note that we used to do the full path here, but I ended up believing
|
||||
// that it made more sense to use only the file name
|
||||
// The charset is used is the one from the locale.
|
||||
string charset = m_config->getDefCharset(true);
|
||||
string charset = config->getDefCharset(true);
|
||||
string utf8fn; int ercnt;
|
||||
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
|
||||
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
|
||||
|
@ -646,7 +639,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
|
||||
// Possibly add fields from local config
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(doc);
|
||||
setlocalfields(localfields, doc);
|
||||
// Add document to database. If there is an ipath, add it as a children
|
||||
// of the file document.
|
||||
string udi;
|
||||
|
@ -654,16 +647,15 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveSplitQ) {
|
||||
DbUpdTask *tp = new DbUpdTask(config, udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc);
|
||||
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc);
|
||||
if (!m_dwqueue.put(tp)) {
|
||||
LOGERR(("processonefile: wqueue.put failed\n"));
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
if (!m_db->addOrUpdate(config, udi, doc.ipath.empty() ? cstr_null :
|
||||
parent_udi, doc)) {
|
||||
if (!m_db->addOrUpdate(udi, doc.ipath.empty() ?
|
||||
cstr_null : parent_udi, doc)) {
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
|
@ -673,7 +665,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
// Tell what we are doing and check for interrupt request
|
||||
if (m_updater) {
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker locker(m_mutex);
|
||||
PTMutexLocker locker(m_updater->m_mutex);
|
||||
#endif
|
||||
++(m_updater->status.docsdone);
|
||||
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
|
||||
|
@ -697,6 +689,8 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
fileDoc.meta[Rcl::Doc::keyfn] = utf8fn;
|
||||
fileDoc.mimetype = interner.getMimetype();
|
||||
fileDoc.url = cstr_fileu + fn;
|
||||
if (m_havelocalfields)
|
||||
setlocalfields(localfields, fileDoc);
|
||||
|
||||
char cbuf[100];
|
||||
sprintf(cbuf, OFFTPC, stp->st_size);
|
||||
|
@ -706,15 +700,14 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
|
|||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveSplitQ) {
|
||||
DbUpdTask *tp = new DbUpdTask(config, parent_udi, cstr_null,
|
||||
fileDoc);
|
||||
DbUpdTask *tp = new DbUpdTask(parent_udi, cstr_null, fileDoc);
|
||||
if (!m_dwqueue.put(tp))
|
||||
return FsTreeWalker::FtwError;
|
||||
else
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
#endif
|
||||
if (!m_db->addOrUpdate(config, parent_udi, cstr_null, fileDoc))
|
||||
if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc))
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
|
||||
|
|
|
@ -97,11 +97,6 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||
map<string, string> m_localfields;
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
// Used to protect all ops from processonefile to class members:
|
||||
// m_missing, m_db. It would be possible to be more fine-grained
|
||||
// but probably not worth it. m_config and m_updater have separate
|
||||
// protections
|
||||
PTMutexInit m_mutex;
|
||||
friend void *FsIndexerDbUpdWorker(void*);
|
||||
friend void *FsIndexerInternfileWorker(void*);
|
||||
int m_loglevel;
|
||||
|
@ -109,15 +104,16 @@ class FsIndexer : public FsTreeWalkerCB {
|
|||
WorkQueue<DbUpdTask*> m_dwqueue;
|
||||
bool m_haveInternQ;
|
||||
bool m_haveSplitQ;
|
||||
RclConfig *m_stableconfig;
|
||||
#endif // IDX_THREADS
|
||||
|
||||
bool init();
|
||||
void localfieldsfromconf();
|
||||
void setlocalfields(Rcl::Doc& doc);
|
||||
void setlocalfields(const map<string, string> flds, Rcl::Doc& doc);
|
||||
string getDbDir() {return m_config->getDbDir();}
|
||||
FsTreeWalker::Status
|
||||
processonefile(RclConfig *config, TempDir& tmpdir, const string &fn,
|
||||
const struct stat *);
|
||||
const struct stat *, map<string,string> localfields);
|
||||
};
|
||||
|
||||
#endif /* _fsindexer_h_included_ */
|
||||
|
|
|
@ -32,6 +32,9 @@ using std::vector;
|
|||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
#include "rcldoc.h"
|
||||
#ifdef IDX_THREADS
|
||||
#include "ptmutex.h"
|
||||
#endif
|
||||
|
||||
class FsIndexer;
|
||||
class BeagleQueueIndexer;
|
||||
|
@ -60,12 +63,18 @@ class DbIxStatus {
|
|||
* stop as soon as possible without corrupting state */
|
||||
class DbIxStatusUpdater {
|
||||
public:
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexInit m_mutex;
|
||||
#endif
|
||||
DbIxStatus status;
|
||||
virtual ~DbIxStatusUpdater(){}
|
||||
|
||||
// Convenience: change phase/fn and update
|
||||
virtual bool update(DbIxStatus::Phase phase, const string& fn)
|
||||
{
|
||||
#ifdef IDX_THREADS
|
||||
PTMutexLocker lock(m_mutex);
|
||||
#endif
|
||||
status.phase = phase;
|
||||
status.fn = fn;
|
||||
return update();
|
||||
|
|
|
@ -88,7 +88,7 @@ int stopindexing;
|
|||
// should subsequently orderly terminate what it is doing.
|
||||
class MyUpdater : public DbIxStatusUpdater {
|
||||
public:
|
||||
MyUpdater(RclConfig *config)
|
||||
MyUpdater(const RclConfig *config)
|
||||
: m_prevphase(DbIxStatus::DBIXS_NONE)
|
||||
{
|
||||
m_fd = open(config->getIdxStatusFile().c_str(),
|
||||
|
@ -165,7 +165,7 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
|
|||
}
|
||||
}
|
||||
|
||||
void rclIxIonice(RclConfig *config)
|
||||
void rclIxIonice(const RclConfig *config)
|
||||
{
|
||||
string clss, classdata;
|
||||
if (!config->getConfParam("monioniceclass", clss) || clss.empty())
|
||||
|
|
|
@ -1149,7 +1149,7 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
|
|||
PyErr_SetString(PyExc_AttributeError, "doc");
|
||||
return 0;
|
||||
}
|
||||
if (!self->db->addOrUpdate(rclconfig, udi, parent_udi, *pydoc->doc)) {
|
||||
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
|
||||
LOGERR(("Db_addOrUpdate: rcldb error\n"));
|
||||
PyErr_SetString(PyExc_AttributeError, "rcldb error");
|
||||
return 0;
|
||||
|
|
|
@ -338,11 +338,12 @@ int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
|
|||
bool Db::o_inPlaceReset;
|
||||
|
||||
Db::Db(const RclConfig *cfp)
|
||||
: m_ndb(0), m_config(cfp), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
|
||||
: m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
|
||||
m_occtxtsz(0), m_occFirstCheck(1),
|
||||
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
||||
m_flushMb(-1), m_maxFsOccupPc(0)
|
||||
{
|
||||
m_config = new RclConfig(*cfp);
|
||||
#ifndef RCL_INDEX_STRIPCHARS
|
||||
if (start_of_field_term.empty()) {
|
||||
if (o_index_stripchars) {
|
||||
|
@ -370,6 +371,7 @@ Db::~Db()
|
|||
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
||||
m_ndb->m_iswritable));
|
||||
i_close(true);
|
||||
delete m_config;
|
||||
}
|
||||
|
||||
vector<string> Db::getStemmerNames()
|
||||
|
@ -867,8 +869,7 @@ static const string cstr_nc("\n\r\x0c");
|
|||
// the title abstract and body and add special terms for file name,
|
||||
// date, mime type etc. , create the document data record (more
|
||||
// metadata), and update database
|
||||
bool Db::addOrUpdate(RclConfig *config, const string &udi,
|
||||
const string &parent_udi, Doc &doc)
|
||||
bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
{
|
||||
LOGDEB(("Db::add: udi [%s] parent [%s]\n",
|
||||
udi.c_str(), parent_udi.c_str()));
|
||||
|
@ -1097,10 +1098,10 @@ bool Db::addOrUpdate(RclConfig *config, const string &udi,
|
|||
if (!doc.meta[Doc::keyabs].empty())
|
||||
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
|
||||
const set<string>& stored = config->getStoredFields();
|
||||
const set<string>& stored = m_config->getStoredFields();
|
||||
for (set<string>::const_iterator it = stored.begin();
|
||||
it != stored.end(); it++) {
|
||||
string nm = config->fieldCanon(*it);
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
if (!doc.meta[*it].empty()) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
|
||||
|
|
|
@ -256,7 +256,7 @@ class Db {
|
|||
* @param doc container for document data. Should have been filled as
|
||||
* much as possible depending on the document type.
|
||||
*/
|
||||
bool addOrUpdate(RclConfig *config, const string &udi,
|
||||
bool addOrUpdate(const string &udi,
|
||||
const string &parent_udi, Doc &doc);
|
||||
#ifdef IDX_THREADS
|
||||
void waitUpdIdle();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue