implemented configuration for thread structure. Still crashes, but not often...

This commit is contained in:
Jean-Francois Dockes 2012-11-29 17:26:48 +01:00
parent 30516ceb52
commit 2f800e3eb3
10 changed files with 368 additions and 177 deletions

View file

@ -127,6 +127,29 @@ static inline string make_parentterm(const string& udi)
return pterm;
}
Db::Native::Native(Db *db)
: m_rcldb(db), m_isopen(false), m_iswritable(false),
m_noversionwrite(false)
#ifdef IDX_THREADS
, m_wqueue("DbUpd",
m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),
m_totalworkns(0LL)
#endif // IDX_THREADS
{
LOGDEB1(("Native::Native: me %p\n", this));
}
Db::Native::~Native()
{
LOGDEB1(("Native::~Native: me %p\n", this));
#ifdef IDX_THREADS
if (m_haveWriteQ) {
void *status = m_wqueue.setTerminateAndWait();
LOGDEB2(("Native::~Native: worker status %ld\n", long(status)));
}
#endif // IDX_THREADS
}
#ifdef IDX_THREADS
void *DbUpdWorker(void* vdbp)
{
@ -143,8 +166,14 @@ void *DbUpdWorker(void* vdbp)
return (void*)1;
}
LOGDEB(("DbUpdWorker: got task, ql %d\n", int(qsz)));
if (!ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,
tsk->doc, tsk->txtlen)) {
bool status;
if (tsk->txtlen == (size_t)-1) {
status = ndbp->m_rcldb->purgeFileWrite(tsk->udi, tsk->uniterm);
} else {
status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,
tsk->doc, tsk->txtlen);
}
if (!status) {
LOGERR(("DbUpdWorker: addOrUpdateWrite failed\n"));
tqp->workerExit();
delete tsk;
@ -153,35 +182,31 @@ void *DbUpdWorker(void* vdbp)
delete tsk;
}
}
#endif // IDX_THREADS
Db::Native::Native(Db *db)
: m_rcldb(db), m_isopen(false), m_iswritable(false),
m_noversionwrite(false)
#ifdef IDX_THREADS
, m_wqueue("DbUpd", 2), m_totalworkns(0LL)
#endif // IDX_THREADS
{
LOGDEB1(("Native::Native: me %p\n", this));
#ifdef IDX_THREADS
void Db::Native::maybeStartThreads()
{
m_loglevel = DebugLog::getdbl()->getlevel();
if (!m_wqueue.start(1, DbUpdWorker, this)) {
LOGERR(("Db::Db: Worker start failed\n"));
return;
m_haveWriteQ = false;
const RclConfig *cnf = m_rcldb->m_config;
int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;
int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;
if (writethreads > 1) {
LOGINFO(("RclDb: write threads count was forced down to 1\n"));
writethreads = 1;
}
#endif // IDX_THREADS
if (writeqlen >= 0 && writethreads > 0) {
if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {
LOGERR(("Db::Db: Worker start failed\n"));
return;
}
m_haveWriteQ = true;
}
LOGDEB(("RclDb:: threads: haveWriteQ %d, wqlen %d wqts %d\n",
m_haveWriteQ, writeqlen, writethreads));
}
Db::Native::~Native()
{
LOGDEB1(("Native::~Native: me %p\n", this));
#ifdef IDX_THREADS
if (m_iswritable) {
void *status = m_wqueue.setTerminateAndWait();
LOGDEB2(("Native::~Native: worker status %ld\n", long(status)));
}
#endif // IDX_THREADS
}
/* See comment in class declaration: return all subdocuments of a
* document given by its unique id.
@ -313,11 +338,11 @@ int Db::Native::getPageNumberForPosition(const vector<int>& pbreaks,
bool Db::o_inPlaceReset;
Db::Db(RclConfig *cfp)
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
m_synthAbsWordCtxLen(4), m_flushMb(-1),
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
m_maxFsOccupPc(0), m_mode(Db::DbRO)
Db::Db(const RclConfig *cfp)
: m_ndb(0), m_config(cfp), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
m_occtxtsz(0), m_occFirstCheck(1),
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
m_flushMb(-1), m_maxFsOccupPc(0)
{
#ifndef RCL_INDEX_STRIPCHARS
if (start_of_field_term.empty()) {
@ -390,6 +415,7 @@ bool Db::open(OpenMode mode, OpenError *error)
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
cstr_RCL_IDX_VERSION);
m_ndb->m_iswritable = true;
m_ndb->maybeStartThreads();
// We open a readonly object in all cases (possibly in
// addition to the r/w one) because some operations
// are faster when performed through a Database: no
@ -424,8 +450,8 @@ bool Db::open(OpenMode mode, OpenError *error)
// Check index format version. Must not try to check a just created or
// truncated db
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
string version = m_ndb->xdb().get_metadata(cstr_RCL_IDX_VERSION_KEY);
if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {
string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);
if (version.compare(cstr_RCL_IDX_VERSION)) {
m_ndb->m_noversionwrite = true;
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
@ -541,7 +567,7 @@ int Db::termDocCnt(const string& _term)
return 0;
}
XAPTRY(res = m_ndb->xdb().get_termfreq(term), m_ndb->xrdb, m_reason);
XAPTRY(res = m_ndb->xrdb.get_termfreq(term), m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::termDocCnt: got error: %s\n", m_reason.c_str()));
@ -1110,16 +1136,20 @@ bool Db::addOrUpdate(RclConfig *config, const string &udi,
newdocument.set_data(record);
#ifdef IDX_THREADS
DbUpdTask *tp = new DbUpdTask(udi, uniterm, newdocument, doc.text.length());
if (!m_ndb->m_wqueue.put(tp)) {
LOGERR(("Db::addOrUpdate:Cant queue task\n"));
return false;
if (m_ndb->m_haveWriteQ) {
DbUpdTask *tp = new DbUpdTask(udi, uniterm, newdocument,
doc.text.length());
if (!m_ndb->m_wqueue.put(tp)) {
LOGERR(("Db::addOrUpdate:Cant queue task\n"));
return false;
} else {
return true;
}
}
return true;
#else
#endif
return m_ndb->addOrUpdateWrite(udi, uniterm, newdocument,
doc.text.length());
#endif // IDX_THREADS
}
bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
@ -1127,7 +1157,13 @@ bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
{
#ifdef IDX_THREADS
Chrono chron;
// In the case where there is a separate (single) db update
// thread, we only need to protect the update map update below
// (against interaction with threads calling needUpdate()). Else,
// all threads from above need to synchronize here
PTMutexLocker lock(m_mutex, m_haveWriteQ);
#endif
// Check file system full every mbyte of indexed text. It's a bit wasteful
// to do this after having prepared the document, but it needs to be in
// the single-threaded section.
@ -1155,7 +1191,7 @@ bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
#ifdef IDX_THREADS
// Need to protect against interaction with the up-to-date checks
// which also update the existence map
PTMutexLocker lock(m_rcldb->m_ndb->m_mutex);
PTMutexLocker lock(m_mutex, !m_haveWriteQ);
#endif
if (did < m_rcldb->updated.size()) {
m_rcldb->updated[did] = true;
@ -1191,18 +1227,21 @@ bool Db::Native::addOrUpdateWrite(const string& udi, const string& uniterm,
#ifdef IDX_THREADS
void Db::waitUpdIdle()
{
Chrono chron;
m_ndb->m_wqueue.waitIdle();
string ermsg;
try {
m_ndb->xwdb.flush();
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("Db::waitUpdIdle: flush() failed: %s\n", ermsg.c_str()));
if (m_ndb->m_haveWriteQ) {
Chrono chron;
m_ndb->m_wqueue.waitIdle();
// We flush here just for correct measurement of the thread work time
string ermsg;
try {
m_ndb->xwdb.flush();
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("Db::waitUpdIdle: flush() failed: %s\n", ermsg.c_str()));
}
m_ndb->m_totalworkns += chron.nanos();
LOGDEB(("Db::waitUpdIdle: total work %lld mS\n",
m_ndb->m_totalworkns/1000000));
}
m_ndb->m_totalworkns += chron.nanos();
LOGDEB(("Db::waitUpdIdle: total work %lld mS\n",
m_ndb->m_totalworkns/1000000));
}
#endif
@ -1243,6 +1282,13 @@ bool Db::needUpdate(const string &udi, const string& sig)
string uniterm = make_uniterm(udi);
string ermsg;
#ifdef IDX_THREADS
// Need to protect against interaction with the doc update/insert
// thread which also updates the existence map, and even multiple
// accesses to the readonly Xapian::Database are not allowed
// anyway
PTMutexLocker lock(m_ndb->m_mutex);
#endif
// We look up the document indexed by the uniterm. This is either
// the actual document file, or, for a multi-document file, the
// pseudo-doc we create to stand for the file itself.
@ -1277,12 +1323,6 @@ bool Db::needUpdate(const string &udi, const string& sig)
// Set the uptodate flag for doc / pseudo doc
if (m_mode != DbRO) {
#ifdef IDX_THREADS
// Need to protect against interaction with the doc
// update/insert thread which also updates the
// existence map
PTMutexLocker lock(m_ndb->m_mutex);
#endif
updated[*docid] = true;
// Set the existence flag for all the subdocs (if any)
@ -1372,7 +1412,13 @@ bool Db::purge()
return false;
#ifdef IDX_THREADS
m_ndb->m_wqueue.waitIdle();
// If we manage our own write queue, make sure it's drained and closed
if (m_ndb->m_haveWriteQ)
m_ndb->m_wqueue.setTerminateAndWait();
// else we need to lock out other top level threads. This is just
// a precaution as they should have been waited for by the top
// level actor at this point
PTMutexLocker lock(m_ndb->m_mutex, m_ndb->m_haveWriteQ);
#endif // IDX_THREADS
// For xapian versions up to 1.0.1, deleting a non-existant
@ -1390,8 +1436,6 @@ bool Db::purge()
// Walk the document array and delete any xapian document whose
// flag is not set (we did not see its source during indexing).
// Threads: we do not need a mutex here as the indexing threads
// are necessarily done at this point.
int purgecount = 0;
for (Xapian::docid docid = 1; docid < updated.size(); ++docid) {
if (!updated[docid]) {
@ -1436,6 +1480,30 @@ bool Db::purge()
return true;
}
// Test for doc existence.
bool Db::docExists(const string& uniterm)
{
#ifdef IDX_THREADS
// If we're not running our own (single) thread, need to protect
// read db against multiaccess (e.g. from needUpdate(), or this method).
PTMutexLocker lock(m_ndb->m_mutex, m_ndb->m_haveWriteQ);
#endif
string ermsg;
try {
Xapian::PostingIterator docid = m_ndb->xrdb.postlist_begin(uniterm);
if (docid == m_ndb->xrdb.postlist_end(uniterm)) {
return false;
} else {
return true;
}
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
LOGERR(("Db::docExists(%s) %s\n", uniterm.c_str(), ermsg.c_str()));
}
return false;
}
/* Delete document(s) for given unique identifier (doc and descendents) */
bool Db::purgeFile(const string &udi, bool *existed)
{
@ -1443,21 +1511,44 @@ bool Db::purgeFile(const string &udi, bool *existed)
if (m_ndb == 0 || !m_ndb->m_iswritable)
return false;
string uniterm = make_uniterm(udi);
bool exists = docExists(uniterm);
if (existed)
*existed = exists;
if (!exists)
return true;
#ifdef IDX_THREADS
m_ndb->m_wqueue.waitIdle();
if (m_ndb->m_haveWriteQ) {
Xapian::Document xdoc;
DbUpdTask *tp = new DbUpdTask(udi, uniterm, xdoc, (size_t)-1);
if (!m_ndb->m_wqueue.put(tp)) {
LOGERR(("Db::purgeFile:Cant queue task\n"));
return false;
} else {
return true;
}
}
#endif
return purgeFileWrite(udi, uniterm);
}
bool Db::purgeFileWrite(const string& udi, const string& uniterm)
{
#if defined(IDX_THREADS)
// If we have a write queue we're called from there, and single threaded, no locking.
// Else need to mutex other threads from above
PTMutexLocker lock(m_ndb->m_mutex, m_ndb->m_haveWriteQ);
#endif // IDX_THREADS
Xapian::WritableDatabase db = m_ndb->xwdb;
string uniterm = make_uniterm(udi);
string ermsg;
try {
Xapian::PostingIterator docid = db.postlist_begin(uniterm);
if (docid == db.postlist_end(uniterm)) {
if (existed)
*existed = false;
return true;
}
*existed = true;
LOGDEB(("purgeFile: delete docid %d\n", *docid));
if (m_flushMb > 0) {
Xapian::termcount trms = m_ndb->xwdb.get_doclength(*docid);
@ -1613,7 +1704,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
{
if (!m_ndb || !m_ndb->m_isopen)
return false;
Xapian::Database xdb = m_ndb->xdb();
Xapian::Database xdb = m_ndb->xrdb;
XAPTRY(res.dbdoccount = xdb.get_doccount();
res.dbavgdoclen = xdb.get_avlength(), xdb, m_reason);
@ -1769,7 +1860,7 @@ TermIter *Db::termWalkOpen()
return 0;
TermIter *tit = new TermIter;
if (tit) {
tit->db = m_ndb->xdb();
tit->db = m_ndb->xrdb;
XAPTRY(tit->it = tit->db.allterms_begin(), tit->db, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::termWalkOpen: xapian error: %s\n", m_reason.c_str()));
@ -1804,7 +1895,7 @@ bool Db::termExists(const string& word)
if (!m_ndb || !m_ndb->m_isopen)
return 0;
XAPTRY(if (!m_ndb->xdb().term_exists(word)) return false,
XAPTRY(if (!m_ndb->xrdb.term_exists(word)) return false,
m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {