more config isolation. Still crashing

This commit is contained in:
Jean-Francois Dockes 2012-11-30 16:45:02 +01:00
parent 45d56f17de
commit bc94f9f83f
8 changed files with 66 additions and 67 deletions

View file

@ -219,7 +219,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
if (!stringlowercmp("bookmark", hittype)) {
// Just index the dotdoc
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc);
return m_db->addOrUpdate(udi, cstr_null, dotdoc);
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
(dotdoc.mimetype.compare("text/html") &&
dotdoc.mimetype.compare(cstr_textplain))) {
@ -249,7 +249,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
doc.pcbytes = dotdoc.pcbytes;
doc.sig.clear();
doc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(m_config, udi, cstr_null, doc);
return m_db->addOrUpdate(udi, cstr_null, doc);
}
}
@ -420,7 +420,7 @@ BeagleQueueIndexer::processone(const string &path,
dotdoc.sig.clear();
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc))
if (!m_db->addOrUpdate(udi, cstr_null, dotdoc))
return FsTreeWalker::FtwError;
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
@ -467,7 +467,7 @@ BeagleQueueIndexer::processone(const string &path,
doc.url = dotdoc.url;
doc.meta[Rcl::Doc::keybcknd] = "BGL";
if (!m_db->addOrUpdate(m_config, udi, cstr_null, doc))
if (!m_db->addOrUpdate(udi, cstr_null, doc))
return FsTreeWalker::FtwError;
}

View file

@ -59,24 +59,24 @@ using namespace std;
#ifdef IDX_THREADS
class DbUpdTask {
public:
DbUpdTask(RclConfig *cnf, const string& u, const string& p,
const Rcl::Doc& d)
: udi(u), parent_udi(p), doc(d), config(cnf)
DbUpdTask(const string& u, const string& p, const Rcl::Doc& d)
: udi(u), parent_udi(p), doc(d)
{}
string udi;
string parent_udi;
Rcl::Doc doc;
RclConfig *config;
};
extern void *FsIndexerDbUpdWorker(void*);
class InternfileTask {
public:
InternfileTask(const std::string &f, const struct stat *i_stp)
: fn(f), statbuf(*i_stp)
InternfileTask(const std::string &f, const struct stat *i_stp,
map<string,string> lfields)
: fn(f), statbuf(*i_stp), localfields(lfields)
{}
string fn;
struct stat statbuf;
map<string,string> localfields;
};
extern void *FsIndexerInternfileWorker(void*);
#endif // IDX_THREADS
@ -110,6 +110,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
m_havelocalfields = m_config->hasNameAnywhere("localfields");
#ifdef IDX_THREADS
m_stableconfig = new RclConfig(*m_config);
m_loglevel = DebugLog::getdbl()->getlevel();
m_haveInternQ = m_haveSplitQ = false;
int internqlen = cnf->getThrConf(RclConfig::ThrIntern).first;
@ -152,6 +153,7 @@ FsIndexer::~FsIndexer()
LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n",
long(status)));
}
delete m_stableconfig;
#endif // IDX_THREADS
delete m_missing;
@ -178,7 +180,7 @@ bool FsIndexer::index()
if (m_updater) {
#ifdef IDX_THREADS
PTMutexLocker locker(m_mutex);
PTMutexLocker locker(m_updater->m_mutex);
#endif
m_updater->status.reset();
m_updater->status.dbtotdocs = m_db->docCnt();
@ -293,6 +295,10 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
if (!init())
return false;
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
// We use an FsTreeWalker just for handling the skipped path/name lists
FsTreeWalker walker;
walker.setSkippedPaths(m_config->getSkippedPaths());
@ -323,10 +329,6 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
it++; continue;
}
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
FsTreeWalker::FtwOk) {
LOGERR(("FsIndexer::indexFiles: processone failed\n"));
@ -383,10 +385,10 @@ void FsIndexer::localfieldsfromconf()
}
//
void FsIndexer::setlocalfields(Rcl::Doc& doc)
void FsIndexer::setlocalfields(map<string, string> fields, Rcl::Doc& doc)
{
for (map<string, string>::const_iterator it = m_localfields.begin();
it != m_localfields.end(); it++) {
for (map<string, string>::const_iterator it = fields.begin();
it != fields.end(); it++) {
// Should local fields override those coming from the document
// ? I think not, but not too sure
if (doc.meta.find(it->second) == doc.meta.end()) {
@ -422,8 +424,7 @@ void *FsIndexerDbUpdWorker(void * fsp)
return (void*)1;
}
LOGDEB0(("FsIndexerDbUpdWorker: task ql %d\n", int(qsz)));
if (!fip->m_db->addOrUpdate(tsk->config, tsk->udi, tsk->parent_udi,
tsk->doc)) {
if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) {
LOGERR(("FsIndexerDbUpdWorker: addOrUpdate failed\n"));
tqp->workerExit();
return (void*)0;
@ -439,7 +440,7 @@ void *FsIndexerInternfileWorker(void * fsp)
WorkQueue<InternfileTask*> *tqp = &fip->m_iwqueue;
DebugLog::getdbl()->setloglevel(fip->m_loglevel);
TempDir tmpdir;
RclConfig *myconf = new RclConfig(*(fip->m_config));
RclConfig myconf(*(fip->m_stableconfig));
InternfileTask *tsk;
for (;;) {
@ -448,7 +449,8 @@ void *FsIndexerInternfileWorker(void * fsp)
return (void*)1;
}
LOGDEB0(("FsIndexerInternfileWorker: task fn %s\n", tsk->fn.c_str()));
if (fip->processonefile(myconf, tmpdir, tsk->fn, &tsk->statbuf) !=
if (fip->processonefile(&myconf, tmpdir, tsk->fn, &tsk->statbuf,
tsk->localfields) !=
FsTreeWalker::FtwOk) {
LOGERR(("FsIndexerInternfileWorker: processone failed\n"));
tqp->workerExit();
@ -477,7 +479,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
{
if (m_updater) {
#ifdef IDX_THREADS
PTMutexLocker locker(m_mutex);
PTMutexLocker locker(m_updater->m_mutex);
#endif
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
@ -493,10 +495,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
// Set up skipped patterns for this subtree.
m_walker.setSkippedNames(m_config->getSkippedNames());
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
// Adjust local fields from config for this subtree
if (m_havelocalfields)
localfieldsfromconf();
@ -507,7 +505,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
#ifdef IDX_THREADS
if (m_haveInternQ) {
InternfileTask *tp = new InternfileTask(fn, stp);
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
if (m_iwqueue.put(tp)) {
return FsTreeWalker::FtwOk;
} else {
@ -516,20 +514,15 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
}
#endif
return processonefile(m_config, m_tmpdir, fn, stp);
return processonefile(m_config, m_tmpdir, fn, stp, m_localfields);
}
FsTreeWalker::Status
FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
const std::string &fn, const struct stat *stp)
const std::string &fn, const struct stat *stp,
map<string, string> localfields)
{
#ifdef IDX_THREADS
config->setKeyDir(path_getfather(fn));
#endif
////////////////////
// Check db up to date ? Doing this before file type
// identification means that, if usesystemfilecommand is switched
@ -551,7 +544,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
LOGDEB0(("processone: up to date: %s\n", fn.c_str()));
if (m_updater) {
#ifdef IDX_THREADS
PTMutexLocker locker(m_mutex);
PTMutexLocker locker(m_updater->m_mutex);
#endif
// Status bar update, abort request etc.
m_updater->status.fn = fn;
@ -579,7 +572,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
// Note that we used to do the full path here, but I ended up believing
// that it made more sense to use only the file name
// The charset is used is the one from the locale.
string charset = m_config->getDefCharset(true);
string charset = config->getDefCharset(true);
string utf8fn; int ercnt;
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
@ -646,7 +639,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
// Possibly add fields from local config
if (m_havelocalfields)
setlocalfields(doc);
setlocalfields(localfields, doc);
// Add document to database. If there is an ipath, add it as a children
// of the file document.
string udi;
@ -654,16 +647,15 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
#ifdef IDX_THREADS
if (m_haveSplitQ) {
DbUpdTask *tp = new DbUpdTask(config, udi, doc.ipath.empty() ?
cstr_null : parent_udi, doc);
DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc);
if (!m_dwqueue.put(tp)) {
LOGERR(("processonefile: wqueue.put failed\n"));
return FsTreeWalker::FtwError;
}
} else {
#endif
if (!m_db->addOrUpdate(config, udi, doc.ipath.empty() ? cstr_null :
parent_udi, doc)) {
if (!m_db->addOrUpdate(udi, doc.ipath.empty() ?
cstr_null : parent_udi, doc)) {
return FsTreeWalker::FtwError;
}
#ifdef IDX_THREADS
@ -673,7 +665,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
// Tell what we are doing and check for interrupt request
if (m_updater) {
#ifdef IDX_THREADS
PTMutexLocker locker(m_mutex);
PTMutexLocker locker(m_updater->m_mutex);
#endif
++(m_updater->status.docsdone);
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
@ -697,6 +689,8 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
fileDoc.meta[Rcl::Doc::keyfn] = utf8fn;
fileDoc.mimetype = interner.getMimetype();
fileDoc.url = cstr_fileu + fn;
if (m_havelocalfields)
setlocalfields(localfields, fileDoc);
char cbuf[100];
sprintf(cbuf, OFFTPC, stp->st_size);
@ -706,15 +700,14 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
#ifdef IDX_THREADS
if (m_haveSplitQ) {
DbUpdTask *tp = new DbUpdTask(config, parent_udi, cstr_null,
fileDoc);
DbUpdTask *tp = new DbUpdTask(parent_udi, cstr_null, fileDoc);
if (!m_dwqueue.put(tp))
return FsTreeWalker::FtwError;
else
return FsTreeWalker::FtwOk;
}
#endif
if (!m_db->addOrUpdate(config, parent_udi, cstr_null, fileDoc))
if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc))
return FsTreeWalker::FtwError;
}

View file

@ -97,11 +97,6 @@ class FsIndexer : public FsTreeWalkerCB {
map<string, string> m_localfields;
#ifdef IDX_THREADS
// Used to protect all ops from processonefile to class members:
// m_missing, m_db. It would be possible to be more fine-grained
// but probably not worth it. m_config and m_updater have separate
// protections
PTMutexInit m_mutex;
friend void *FsIndexerDbUpdWorker(void*);
friend void *FsIndexerInternfileWorker(void*);
int m_loglevel;
@ -109,15 +104,16 @@ class FsIndexer : public FsTreeWalkerCB {
WorkQueue<DbUpdTask*> m_dwqueue;
bool m_haveInternQ;
bool m_haveSplitQ;
RclConfig *m_stableconfig;
#endif // IDX_THREADS
bool init();
void localfieldsfromconf();
void setlocalfields(Rcl::Doc& doc);
void setlocalfields(const map<string, string> flds, Rcl::Doc& doc);
string getDbDir() {return m_config->getDbDir();}
FsTreeWalker::Status
processonefile(RclConfig *config, TempDir& tmpdir, const string &fn,
const struct stat *);
const struct stat *, map<string,string> localfields);
};
#endif /* _fsindexer_h_included_ */

View file

@ -32,6 +32,9 @@ using std::vector;
#include "rclconfig.h"
#include "rcldb.h"
#include "rcldoc.h"
#ifdef IDX_THREADS
#include "ptmutex.h"
#endif
class FsIndexer;
class BeagleQueueIndexer;
@ -60,12 +63,18 @@ class DbIxStatus {
* stop as soon as possible without corrupting state */
class DbIxStatusUpdater {
public:
#ifdef IDX_THREADS
PTMutexInit m_mutex;
#endif
DbIxStatus status;
virtual ~DbIxStatusUpdater(){}
// Convenience: change phase/fn and update
virtual bool update(DbIxStatus::Phase phase, const string& fn)
{
#ifdef IDX_THREADS
PTMutexLocker lock(m_mutex);
#endif
status.phase = phase;
status.fn = fn;
return update();

View file

@ -88,7 +88,7 @@ int stopindexing;
// should subsequently orderly terminate what it is doing.
class MyUpdater : public DbIxStatusUpdater {
public:
MyUpdater(RclConfig *config)
MyUpdater(const RclConfig *config)
: m_prevphase(DbIxStatus::DBIXS_NONE)
{
m_fd = open(config->getIdxStatusFile().c_str(),
@ -165,7 +165,7 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset)
}
}
void rclIxIonice(RclConfig *config)
void rclIxIonice(const RclConfig *config)
{
string clss, classdata;
if (!config->getConfParam("monioniceclass", clss) || clss.empty())

View file

@ -1149,7 +1149,7 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *)
PyErr_SetString(PyExc_AttributeError, "doc");
return 0;
}
if (!self->db->addOrUpdate(rclconfig, udi, parent_udi, *pydoc->doc)) {
if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
LOGERR(("Db_addOrUpdate: rcldb error\n"));
PyErr_SetString(PyExc_AttributeError, "rcldb error");
return 0;

View file

@ -338,11 +338,12 @@ int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
bool Db::o_inPlaceReset;
Db::Db(const RclConfig *cfp)
: m_ndb(0), m_config(cfp), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
: m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
m_occtxtsz(0), m_occFirstCheck(1),
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
m_flushMb(-1), m_maxFsOccupPc(0)
{
m_config = new RclConfig(*cfp);
#ifndef RCL_INDEX_STRIPCHARS
if (start_of_field_term.empty()) {
if (o_index_stripchars) {
@ -370,6 +371,7 @@ Db::~Db()
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
m_ndb->m_iswritable));
i_close(true);
delete m_config;
}
vector<string> Db::getStemmerNames()
@ -867,8 +869,7 @@ static const string cstr_nc("\n\r\x0c");
// the title abstract and body and add special terms for file name,
// date, mime type etc. , create the document data record (more
// metadata), and update database
bool Db::addOrUpdate(RclConfig *config, const string &udi,
const string &parent_udi, Doc &doc)
bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
{
LOGDEB(("Db::add: udi [%s] parent [%s]\n",
udi.c_str(), parent_udi.c_str()));
@ -1097,10 +1098,10 @@ bool Db::addOrUpdate(RclConfig *config, const string &udi,
if (!doc.meta[Doc::keyabs].empty())
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
const set<string>& stored = config->getStoredFields();
const set<string>& stored = m_config->getStoredFields();
for (set<string>::const_iterator it = stored.begin();
it != stored.end(); it++) {
string nm = config->fieldCanon(*it);
string nm = m_config->fieldCanon(*it);
if (!doc.meta[*it].empty()) {
string value =
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);

View file

@ -256,7 +256,7 @@ class Db {
* @param doc container for document data. Should have been filled as
* much as possible depending on the document type.
*/
bool addOrUpdate(RclConfig *config, const string &udi,
bool addOrUpdate(const string &udi,
const string &parent_udi, Doc &doc);
#ifdef IDX_THREADS
void waitUpdIdle();