get rid of numerous probably inocuous valgrind/helgrind messages by ensuring that actual string copies are passed between threads, without refcount/shared data magic

This commit is contained in:
Jean-Francois Dockes 2014-05-05 19:01:58 +02:00
parent 43c5c4c59f
commit f70e760759
8 changed files with 73 additions and 17 deletions

View file

@ -63,9 +63,12 @@ using namespace std;
#ifdef IDX_THREADS #ifdef IDX_THREADS
class DbUpdTask { class DbUpdTask {
public: public:
// Take some care to avoid sharing string data (if string impl is cow)
DbUpdTask(const string& u, const string& p, const Rcl::Doc& d) DbUpdTask(const string& u, const string& p, const Rcl::Doc& d)
: udi(u), parent_udi(p), doc(d) : udi(u.begin(), u.end()), parent_udi(p.begin(), p.end())
{} {
d.copyto(&doc);
}
string udi; string udi;
string parent_udi; string parent_udi;
Rcl::Doc doc; Rcl::Doc doc;
@ -74,10 +77,13 @@ extern void *FsIndexerDbUpdWorker(void*);
class InternfileTask { class InternfileTask {
public: public:
// Take some care to avoid sharing string data (if string impl is cow)
InternfileTask(const std::string &f, const struct stat *i_stp, InternfileTask(const std::string &f, const struct stat *i_stp,
map<string,string> lfields) map<string,string> lfields)
: fn(f), statbuf(*i_stp), localfields(lfields) : fn(f.begin(), f.end()), statbuf(*i_stp)
{} {
map_ss_cp_noshr(lfields, &localfields);
}
string fn; string fn;
struct stat statbuf; struct stat statbuf;
map<string,string> localfields; map<string,string> localfields;

View file

@ -373,6 +373,7 @@ MyHtmlParser::opening_tag(const string &tag)
// Specific to Recoll filters. // Specific to Recoll filters.
decode_entities(content); decode_entities(content);
struct tm tm; struct tm tm;
memset(&tm, 0, sizeof(tm));
if (strptime(content.c_str(), if (strptime(content.c_str(),
" %Y-%m-%d %H:%M:%S ", &tm) || " %Y-%m-%d %H:%M:%S ", &tm) ||
strptime(content.c_str(), strptime(content.c_str(),

View file

@ -1419,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
// Dates etc. // Dates etc.
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() : time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
doc.dmtime.c_str()); doc.dmtime.c_str());
struct tm *tm = localtime(&mtime); struct tm tmb;
localtime_r(&mtime, &tmb);
char buf[9]; char buf[9];
snprintf(buf, 9, "%04d%02d%02d", snprintf(buf, 9, "%04d%02d%02d",
tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday); tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday);
// Date (YYYYMMDD) // Date (YYYYMMDD)
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf)); newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
// Month (YYYYMM) // Month (YYYYMM)

View file

@ -50,9 +50,11 @@ public:
// Note that udi and uniterm are strictly equivalent and are // Note that udi and uniterm are strictly equivalent and are
// passed both just to avoid recomputing uniterm which is // passed both just to avoid recomputing uniterm which is
// available on the caller site. // available on the caller site.
// Take some care to avoid sharing string data (if string impl is cow)
DbUpdTask(Op _op, const string& ud, const string& un, DbUpdTask(Op _op, const string& ud, const string& un,
Xapian::Document *d, size_t tl) Xapian::Document *d, size_t tl)
: op(_op), udi(ud), uniterm(un), doc(d), txtlen(tl) : op(_op), udi(ud.begin(), ud.end()), uniterm(un.begin(), un.end()),
doc(d), txtlen(tl)
{} {}
// Udi and uniterm equivalently designate the doc // Udi and uniterm equivalently designate the doc
Op op; Op op;

View file

@ -19,12 +19,12 @@
#include <string> #include <string>
#include <map> #include <map>
#ifndef NO_NAMESPACES
using std::string; using std::string;
using std::map; using std::map;
#include "smallut.h"
namespace Rcl { namespace Rcl {
#endif
/** /**
* Dumb holder for document attributes and data. * Dumb holder for document attributes and data.
@ -139,6 +139,8 @@ class Doc {
void erase() { void erase() {
url.erase(); url.erase();
idxurl.erase();
idxi = 0;
ipath.erase(); ipath.erase();
mimetype.erase(); mimetype.erase();
fmtime.erase(); fmtime.erase();
@ -160,6 +162,30 @@ class Doc {
haschildren = false; haschildren = false;
onlyxattr = false; onlyxattr = false;
} }
// Copy ensuring no shared string data, for threading issues.
void copyto(Doc *d) const {
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
Doc() Doc()
: idxi(0), syntabs(false), pc(0), xdocid(0), : idxi(0), syntabs(false), pc(0), xdocid(0),
haspages(false), haschildren(false), onlyxattr(false) haspages(false), haschildren(false), onlyxattr(false)
@ -247,8 +273,6 @@ class Doc {
}; };
#ifndef NO_NAMESPACES
} }
#endif
#endif /* _RCLDOC_H_INCLUDED_ */ #endif /* _RCLDOC_H_INCLUDED_ */

View file

@ -30,6 +30,18 @@
using namespace std; using namespace std;
#endif /* NO_NAMESPACES */ #endif /* NO_NAMESPACES */
// Bogus code to avoid bogus valgrind mt warnings about the
// initialization of treat_mbox_... which I can't even remember the
// use of (it's not documented or ever set)
static int treat_mbox_as_rfc822;
class InitTMAR {
public:
InitTMAR() {
treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
}
};
static InitTMAR initTM;
/** /**
* This code is currently ONLY used to identify mbox and mail message files * This code is currently ONLY used to identify mbox and mail message files
* which are badly handled by standard mime type identifiers * which are badly handled by standard mime type identifiers
@ -49,11 +61,6 @@ const int wantnhead = 3;
// fn is for message printing // fn is for message printing
static string idFileInternal(istream& input, const char *fn) static string idFileInternal(istream& input, const char *fn)
{ {
static int treat_mbox_as_rfc822;
if (treat_mbox_as_rfc822 == 0) {
treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1;
}
bool line1HasFrom = false; bool line1HasFrom = false;
bool gotnonempty = false; bool gotnonempty = false;
int lookslikemail = 0; int lookslikemail = 0;

View file

@ -42,6 +42,16 @@ using namespace std;
#include "hldata.h" #include "hldata.h"
#include "cstr.h" #include "cstr.h"
void map_ss_cp_noshr(const map<string,string> s, map<string,string> *d)
{
for (map<string,string>::const_iterator it= s.begin();
it != s.end(); it++) {
d->insert(
pair<string,string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
int stringicmp(const string & s1, const string& s2) int stringicmp(const string & s1, const string& s2)
{ {
string::const_iterator it1 = s1.begin(); string::const_iterator it1 = s1.begin();

View file

@ -197,6 +197,11 @@ inline void leftzeropad(string& s, unsigned len)
s = s.insert(0, len - s.length(), '0'); s = s.insert(0, len - s.length(), '0');
} }
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string,std::string> s,
std::map<std::string,std::string> *d);
// Code for static initialization of an stl map. Somewhat like Boost.assign. // Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c // Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8); // Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);