get rid of numerous probably inocuous valgrind/helgrind messages by ensuring that actual string copies are passed between threads, without refcount/shared data magic

This commit is contained in:
Jean-Francois Dockes 2014-05-05 19:01:58 +02:00
parent 43c5c4c59f
commit f70e760759
8 changed files with 73 additions and 17 deletions

View file

@ -63,9 +63,12 @@ using namespace std;
#ifdef IDX_THREADS
class DbUpdTask {
public:
// Take some care to avoid sharing string data (if string impl is cow)
DbUpdTask(const string& u, const string& p, const Rcl::Doc& d)
: udi(u), parent_udi(p), doc(d)
{}
: udi(u.begin(), u.end()), parent_udi(p.begin(), p.end())
{
d.copyto(&doc);
}
string udi;
string parent_udi;
Rcl::Doc doc;
@ -74,10 +77,13 @@ extern void *FsIndexerDbUpdWorker(void*);
class InternfileTask {
public:
// Take some care to avoid sharing string data (if string impl is cow)
InternfileTask(const std::string &f, const struct stat *i_stp,
map<string,string> lfields)
: fn(f), statbuf(*i_stp), localfields(lfields)
{}
: fn(f.begin(), f.end()), statbuf(*i_stp)
{
map_ss_cp_noshr(lfields, &localfields);
}
string fn;
struct stat statbuf;
map<string,string> localfields;

View file

@ -373,6 +373,7 @@ MyHtmlParser::opening_tag(const string &tag)
// Specific to Recoll filters.
decode_entities(content);
struct tm tm;
memset(&tm, 0, sizeof(tm));
if (strptime(content.c_str(),
" %Y-%m-%d %H:%M:%S ", &tm) ||
strptime(content.c_str(),

View file

@ -1419,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
// Dates etc.
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
doc.dmtime.c_str());
struct tm *tm = localtime(&mtime);
struct tm tmb;
localtime_r(&mtime, &tmb);
char buf[9];
snprintf(buf, 9, "%04d%02d%02d",
tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
// Date (YYYYMMDD)
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
// Month (YYYYMM)

View file

@ -50,9 +50,11 @@ public:
// Note that udi and uniterm are strictly equivalent and are
// passed both just to avoid recomputing uniterm which is
// available on the caller site.
// Take some care to avoid sharing string data (if string impl is cow)
DbUpdTask(Op _op, const string& ud, const string& un,
Xapian::Document *d, size_t tl)
: op(_op), udi(ud), uniterm(un), doc(d), txtlen(tl)
: op(_op), udi(ud.begin(), ud.end()), uniterm(un.begin(), un.end()),
doc(d), txtlen(tl)
{}
// Udi and uniterm equivalently designate the doc
Op op;

View file

@ -19,12 +19,12 @@
#include <string>
#include <map>
#ifndef NO_NAMESPACES
using std::string;
using std::map;
#include "smallut.h"
namespace Rcl {
#endif
/**
* Dumb holder for document attributes and data.
@ -139,6 +139,8 @@ class Doc {
void erase() {
url.erase();
idxurl.erase();
idxi = 0;
ipath.erase();
mimetype.erase();
fmtime.erase();
@ -160,6 +162,30 @@ class Doc {
haschildren = false;
onlyxattr = false;
}
// Copy ensuring no shared string data, for threading issues.
void copyto(Doc *d) const {
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
Doc()
: idxi(0), syntabs(false), pc(0), xdocid(0),
haspages(false), haschildren(false), onlyxattr(false)
@ -247,8 +273,6 @@ class Doc {
};
#ifndef NO_NAMESPACES
}
#endif
#endif /* _RCLDOC_H_INCLUDED_ */

View file

@ -30,6 +30,18 @@
using namespace std;
#endif /* NO_NAMESPACES */
// Bogus code to avoid bogus valgrind mt warnings about the
// initialization of treat_mbox_... which I can't even remember the
// use of (it's not documented or ever set)
static int treat_mbox_as_rfc822;
class InitTMAR {
public:
InitTMAR() {
treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
}
};
static InitTMAR initTM;
/**
* This code is currently ONLY used to identify mbox and mail message files
* which are badly handled by standard mime type identifiers
@ -49,11 +61,6 @@ const int wantnhead = 3;
// fn is for message printing
static string idFileInternal(istream& input, const char *fn)
{
static int treat_mbox_as_rfc822;
if (treat_mbox_as_rfc822 == 0) {
treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
}
bool line1HasFrom = false;
bool gotnonempty = false;
int lookslikemail = 0;

View file

@ -42,6 +42,16 @@ using namespace std;
#include "hldata.h"
#include "cstr.h"
void map_ss_cp_noshr(const map<string,string> s, map<string,string> *d)
{
for (map<string,string>::const_iterator it= s.begin();
it != s.end(); it++) {
d->insert(
pair<string,string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
int stringicmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();

View file

@ -197,6 +197,11 @@ inline void leftzeropad(string& s, unsigned len)
s = s.insert(0, len - s.length(), '0');
}
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string,std::string> s,
std::map<std::string,std::string> *d);
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);