moved code around to make smallut and pathut less recoll-specific and reusable. No actual changes

2016-03-21 12:55:31 +01:00 · 2016-03-21 12:55:31 +01:00 · 35de51985b
commit 35de51985b
parent 7b2a455b80
26 changed files with 1821 additions and 1493 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -207,6 +207,7 @@ utils/fileudi.h \
 utils/fstreewalk.cpp \
 utils/fstreewalk.h \
 utils/hldata.h \
+utils/hldata.cpp \
 utils/idfile.cpp \
 utils/idfile.h \
 utils/md5.cpp \
@ -224,6 +225,8 @@ utils/pxattr.cpp \
 utils/pxattr.h \
 utils/rclionice.cpp \
 utils/rclionice.h \
+utils/rclutil.h \
+utils/rclutil.cpp \
 utils/readfile.cpp \
 utils/readfile.h \
 utils/refcntr.h \
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -39,6 +39,7 @@

 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclconfig.h"
 #include "conftree.h"
 #include "debuglog.h"
@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf)
    }

    // Compute our data dir name, typically /usr/local/share/recoll
-    m_datadir = path_sharedatadir();
+    m_datadir = path_pkgdatadir();
    // We only do the automatic configuration creation thing for the default
    // config dir, not if it was specified through -c or RECOLL_CONFDIR
    bool autoconfdir = false;
--- a/src/common/rclinit.cpp
+++ b/src/common/rclinit.cpp
@ -32,6 +32,7 @@
 #include "rclconfig.h"
 #include "rclinit.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "unac.h"
 #include "smallut.h"
 #include "execmd.h"
@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags,
    // Init smallut and pathut static values
    pathut_init_mt();
    smallut_init_mt();
+    rclutil_init_mt();
+    
    // Init execmd.h static PATH and PATHELT splitting
    {string bogus;
        ExecCmd::which("nosuchcmd", bogus);
@ -389,4 +392,3 @@ bool recoll_ismainthread()
    return pthread_equal(pthread_self(), mainthread_id);
 }

-
--- a/src/common/rclinit.h
+++ b/src/common/rclinit.h
@ -18,9 +18,6 @@
 #define _RCLINIT_H_INCLUDED_

 #include <string>
-#ifndef NO_NAMESPACES
-using std::string;
-#endif

 class RclConfig;
 /**
@ -42,12 +39,14 @@ class RclConfig;
 *               default and environment
 * @return the parsed configuration.
 */
-enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2};
+enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2};
 extern RclConfig *recollinit(RclInitFlags flags,
                             void (*cleanup)(void), void (*sigcleanup)(int),
-			     string &reason, const string *argcnf = 0);
+                             std::string& reason, const string *argcnf = 0);
 inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
-			     string &reason, const string *argcnf = 0) {
+                             std::string& reason,
+                             const std::string *argcnf = 0)
+{
    return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
 }

--- a/src/index/beaglequeue.cpp
+++ b/src/index/beaglequeue.cpp
@ -23,6 +23,7 @@

 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "debuglog.h"
 #include "fstreewalk.h"
 #include "beaglequeue.h"
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -28,6 +28,7 @@

 #include "cstr.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "conftree.h"
 #include "rclconfig.h"
 #include "fstreewalk.h"
--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@ -42,6 +42,7 @@ using namespace std;
 #include "smallut.h"
 #include "chrono.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclmon.h"
 #include "x11mon.h"
 #include "cancelcheck.h"
--- a/src/internfile/uncomp.h
+++ b/src/internfile/uncomp.h
@ -21,6 +21,7 @@
 #include <string>

 #include "pathut.h"
+#include "rclutil.h"
 #include "ptmutex.h"

 /// Uncompression script interface.
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -32,6 +32,7 @@ using namespace std;
 #include "searchdata.h"
 #include "rclquery.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "wasatorcl.h"
 #include "debuglog.h"
 #include "pathut.h"
--- a/src/qtgui/preview_load.h
+++ b/src/qtgui/preview_load.h
@ -23,6 +23,7 @@

 #include "rcldoc.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "rclconfig.h"

 /* 
--- a/src/qtgui/recoll.h
+++ b/src/qtgui/recoll.h
@ -21,6 +21,7 @@

 #include "rclconfig.h"
 #include "rcldb.h"
+#include "rclutil.h"
 #include "ptmutex.h"

 #include <QString>
--- a/src/query/reslistpager.cpp
+++ b/src/query/reslistpager.cpp
@ -34,6 +34,7 @@ using std::list;
 #include "debuglog.h"
 #include "rclconfig.h"
 #include "smallut.h"
+#include "rclutil.h"
 #include "plaintorich.h"
 #include "mimehandler.h"

--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -42,6 +42,7 @@ using namespace std;
 #include "unacpp.h"
 #include "conftree.h"
 #include "pathut.h"
+#include "rclutil.h"
 #include "smallut.h"
 #include "chrono.h"
 #include "utf8iter.h"
@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi)
    return pterm;
 }

+static void utf8truncate(string& s, int maxlen)
+{
+    if (s.size() <= string::size_type(maxlen)) {
+        return;
+    }
+    Utf8Iter iter(s);
+    string::size_type pos = 0;
+    while (iter++ != string::npos)
+        if (iter.getBpos() < string::size_type(maxlen)) {
+            pos = iter.getBpos();
+        }
+
+    s.erase(pos);
+}
+
 Db::Native::Native(Db *db) 
    : m_rcldb(db), m_isopen(false), m_iswritable(false),
      m_noversionwrite(false)
--- a/src/rcldb/rcldoc.cpp
+++ b/src/rcldb/rcldoc.cpp
@ -14,9 +14,11 @@
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
+#include "autoconfig.h"

 #include "rcldoc.h"
 #include "debuglog.h"
+#include "rclutil.h"

 namespace Rcl {
    const string Doc::keyabs("abstract");
@ -70,5 +72,31 @@ namespace Rcl {
        if (dotext)
            LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str()));
    }
+
+    // Copy ensuring no shared string data, for threading issues.
+    void Doc::copyto(Doc *d) const
+    {
+	d->url.assign(url.begin(), url.end());
+        d->idxurl.assign(idxurl.begin(), idxurl.end());
+        d->idxi = idxi;
+	d->ipath.assign(ipath.begin(), ipath.end());
+	d->mimetype.assign(mimetype.begin(), mimetype.end());
+	d->fmtime.assign(fmtime.begin(), fmtime.end());
+	d->dmtime.assign(dmtime.begin(), dmtime.end());
+	d->origcharset.assign(origcharset.begin(), origcharset.end());
+        map_ss_cp_noshr(meta, &d->meta);
+	d->syntabs = syntabs;
+	d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
+	d->fbytes.assign(fbytes.begin(), fbytes.end());
+	d->dbytes.assign(dbytes.begin(), dbytes.end());
+	d->sig.assign(sig.begin(), sig.end());
+        d->text.assign(text.begin(), text.end());
+	d->pc = pc;
+	d->xdocid = xdocid;
+	d->idxi = idxi;
+	d->haspages = haspages;
+	d->haschildren = haschildren;
+	d->onlyxattr = onlyxattr;
+    }
 }

--- a/src/rcldb/rcldoc.h
+++ b/src/rcldb/rcldoc.h
@ -163,33 +163,11 @@ class Doc {
 	onlyxattr = false;
    }
    // Copy ensuring no shared string data, for threading issues.
-    void copyto(Doc *d) const {
-	d->url.assign(url.begin(), url.end());
-        d->idxurl.assign(idxurl.begin(), idxurl.end());
-        d->idxi = idxi;
-	d->ipath.assign(ipath.begin(), ipath.end());
-	d->mimetype.assign(mimetype.begin(), mimetype.end());
-	d->fmtime.assign(fmtime.begin(), fmtime.end());
-	d->dmtime.assign(dmtime.begin(), dmtime.end());
-	d->origcharset.assign(origcharset.begin(), origcharset.end());
-        map_ss_cp_noshr(meta, &d->meta);
-	d->syntabs = syntabs;
-	d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
-	d->fbytes.assign(fbytes.begin(), fbytes.end());
-	d->dbytes.assign(dbytes.begin(), dbytes.end());
-	d->sig.assign(sig.begin(), sig.end());
-        d->text.assign(text.begin(), text.end());
-	d->pc = pc;
-	d->xdocid = xdocid;
-	d->idxi = idxi;
-	d->haspages = haspages;
-	d->haschildren = haschildren;
-	d->onlyxattr = onlyxattr;
-    }
+    void copyto(Doc *d) const;
+
    Doc()
 	: idxi(0), syntabs(false), pc(0), xdocid(0),
-	  haspages(false), haschildren(false), onlyxattr(false)
-    {
+	  haspages(false), haschildren(false), onlyxattr(false) {
    }
    /** Get value for named field. If value pointer is 0, just test existence */
    bool getmeta(const string& nm, string *value = 0) const
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@ -32,10 +32,12 @@
 #include <iostream>
 #include UNORDERED_MAP_INCLUDE

-using std::string;

 #include "smallut.h"

+using std::string;
+using std::vector;
+
 /* 
   Storage for the exception translations. These are chars which
   should not be translated according to what UnicodeData says, but
--- a/src/utils/circache.cpp
+++ b/src/utils/circache.cpp
@ -71,6 +71,28 @@ typedef unsigned char UCHAR;
 typedef unsigned int UINT;
 typedef unsigned long ULONG;

+/** Temp buffer with automatic deallocation */
+struct TempBuf {
+    TempBuf()
+        : m_buf(0) {
+    }
+    TempBuf(int n) {
+        m_buf = (char *)malloc(n);
+    }
+    ~TempBuf() {
+        if (m_buf) {
+            free(m_buf);
+        }
+    }
+    char *setsize(int n) {
+        return (m_buf = (char *)realloc(m_buf, n));
+    }
+    char *buf() {
+        return m_buf;
+    }
+    char *m_buf;
+};
+
 static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);

 /*
--- a/src/utils/cpuconf.cpp
+++ b/src/utils/cpuconf.cpp
@ -18,10 +18,16 @@
 #ifndef TEST_CPUCONF

 #include "autoconfig.h"
+
+#include <stdlib.h>
+
 #include "cpuconf.h"
 #include "execmd.h"
 #include "smallut.h"

+using std::string;
+using std::vector;
+
 #if defined(__gnu_linux__) 
 bool getCpuConf(CpuConf& conf)
 {
--- a/src/utils/hldata.cpp
+++ b/src/utils/hldata.cpp
@ -0,0 +1,78 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include "autoconfig.h"
+
+#include <stdio.h>
+
+#include "hldata.h"
+
+using std::string;
+using std::map;
+
+void HighlightData::toString(string& out)
+{
+    out.append("\nUser terms (orthograph): ");
+    for (std::set<string>::const_iterator it = uterms.begin();
+            it != uterms.end(); it++) {
+        out.append(" [").append(*it).append("]");
+    }
+    out.append("\nUser terms to Query terms:");
+    for (map<string, string>::const_iterator it = terms.begin();
+            it != terms.end(); it++) {
+        out.append("[").append(it->first).append("]->[");
+        out.append(it->second).append("] ");
+    }
+    out.append("\nGroups: ");
+    char cbuf[200];
+    sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
+            int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
+    out.append(cbuf);
+
+    size_t ugidx = (size_t) - 1;
+    for (unsigned int i = 0; i < groups.size(); i++) {
+        if (ugidx != grpsugidx[i]) {
+            ugidx = grpsugidx[i];
+            out.append("\n(");
+            for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
+                out.append("[").append(ugroups[ugidx][j]).append("] ");
+            }
+            out.append(") ->");
+        }
+        out.append(" {");
+        for (unsigned int j = 0; j < groups[i].size(); j++) {
+            out.append("[").append(groups[i][j]).append("]");
+        }
+        sprintf(cbuf, "%d", slacks[i]);
+        out.append("}").append(cbuf);
+    }
+    out.append("\n");
+}
+
+void HighlightData::append(const HighlightData& hl)
+{
+    uterms.insert(hl.uterms.begin(), hl.uterms.end());
+    terms.insert(hl.terms.begin(), hl.terms.end());
+    size_t ugsz0 = ugroups.size();
+    ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
+
+    groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
+    slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
+    for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
+            it != hl.grpsugidx.end(); it++) {
+        grpsugidx.push_back(*it + ugsz0);
+    }
+}
--- a/src/utils/hldata.h
+++ b/src/utils/hldata.h
@ -4,6 +4,7 @@
 #include <vector>
 #include <string>
 #include <set>
+#include <map>

 /** Store data about user search terms and their expansions. This is used
 * mostly for highlighting result text and walking the matches, generating 
--- a/src/utils/pathut.cpp
+++ b/src/utils/pathut.cpp
@ -16,25 +16,31 @@
 */

 #ifndef TEST_PATHUT
+#ifdef BUILDING_RECOLL
 #include "autoconfig.h"
+#else
+#include "config.h"
+#endif

 #include <stdio.h>
+#ifdef _WIN32
+#include "dirent.h"
 #include "safefcntl.h"
 #include "safeunistd.h"
-#include "dirent.h"
-#include "cstr.h"
-#ifdef _WIN32
 #include "safewindows.h"
+#include "safesysstat.h"
 #else
+#include <fcntl.h>
+#include <unistd.h>
 #include <sys/param.h>
 #include <pwd.h>
 #include <sys/file.h>
+#include <sys/stat.h>
+#include <dirent.h>
 #endif
 #include <math.h>
 #include <errno.h>
 #include <sys/types.h>
-#include "safesysstat.h"
-#include "ptmutex.h"

 // Let's include all files where statfs can be defined and hope for no
 // conflict...
@ -60,9 +66,6 @@
 #include <vector>

 #include "pathut.h"
-#include "transcode.h"
-#include "wipedir.h"
-#include "md5ut.h"

 using namespace std;

@ -71,9 +74,10 @@ using namespace std;
 void path_slashize(string& s)
 {
    for (string::size_type i = 0; i < s.size(); i++) {
-        if (s[i] == '\\')
+        if (s[i] == '\\') {
            s[i] = '/';
        }
+    }
 }
 static bool path_strlookslikedrive(const string& s)
 {
@ -82,14 +86,16 @@ static bool path_strlookslikedrive(const string& s)

 static bool path_hasdrive(const string& s)
 {
-    if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':')
+    if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':') {
        return true;
+    }
    return false;
 }
 static bool path_isdriveabs(const string& s)
 {
-    if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/')
+    if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/') {
        return true;
+    }
    return false;
 }

@ -130,17 +136,18 @@ string path_thisexecpath()
    PathRemoveFileSpec(text);
 #endif
    string path = path_tchartoutf8(text);
-    if (path.empty())
+    if (path.empty()) {
        path = "c:/";
+    }

    return path;
 }

 string path_wingettempfilename(TCHAR *pref)
 {
-    TCHAR buf[(MAX_PATH +1)*sizeof(TCHAR)];
-    TCHAR dbuf[(MAX_PATH +1)*sizeof(TCHAR)];
-    GetTempPath(MAX_PATH+1, dbuf);
+    TCHAR buf[(MAX_PATH + 1)*sizeof(TCHAR)];
+    TCHAR dbuf[(MAX_PATH + 1)*sizeof(TCHAR)];
+    GetTempPath(MAX_PATH + 1, dbuf);
    GetTempFileName(dbuf, pref, 0, buf);
    // Windows will have created a temp file, we delete it.
    string filename = path_tchartoutf8(buf);
@ -150,10 +157,11 @@ string path_wingettempfilename(TCHAR *pref)
 }
 #endif

-
-bool fsocc(const string &path, int *pc, long long *avmbs)
+#if defined(HAVE_SYS_MOUNT_H) || defined(HAVE_SYS_STATFS_H) || \
+    defined(HAVE_SYS_STATVFS_H) || defined(HAVE_SYS_VFS_H)
+bool fsocc(const string& path, int *pc, long long *avmbs)
 {
-    static const int FSOCC_MB = 1024*1024;
+    static const int FSOCC_MB = 1024 * 1024;
 #ifdef _WIN32
    ULARGE_INTEGER freebytesavail;
    ULARGE_INTEGER totalbytes;
@ -161,10 +169,12 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
                            &totalbytes, NULL)) {
        return false;
    }
-    if (pc)
+    if (pc) {
        *pc = int((100 * freebytesavail.QuadPart) / totalbytes.QuadPart);
-    if (avmbs)
+    }
+    if (avmbs) {
        *avmbs = int(totalbytes.QuadPart / FSOCC_MB);
+    }
    return true;
 #else
 #ifdef sun
@ -186,8 +196,9 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
    if (FSOCC_TOTAVAIL > 0) {
        fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL;
    }
-    if (pc)
+    if (pc) {
        *pc = int(fpc);
+    }
    if (avmbs) {
        *avmbs = 0;
        if (buf.f_bsize > 0) {
@ -202,54 +213,7 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
    return true;
 #endif
 }
-
-const string& tmplocation()
-{
-    static string stmpdir;
-    if (stmpdir.empty()) {
-        const char *tmpdir = getenv("RECOLL_TMPDIR");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TMPDIR");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TMP");
-        if (tmpdir == 0) 
-            tmpdir = getenv("TEMP");
-        if (tmpdir == 0) {
-#ifdef _WIN32
-            TCHAR bufw[(MAX_PATH+1)*sizeof(TCHAR)];
-            GetTempPath(MAX_PATH+1, bufw);
-            stmpdir = path_tchartoutf8(bufw);
-#else
-            stmpdir = "/tmp";
-#endif
-        } else {
-            stmpdir = tmpdir;
-        }
-        stmpdir = path_canon(stmpdir);
-    }
-
-    return stmpdir;
-}
-
-// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
-const string& path_sharedatadir()
-{
-    static string datadir;
-    if (datadir.empty()) {
-#ifdef _WIN32
-        datadir = path_cat(path_thisexecpath(), "Share");
-#else
-        const char *cdatadir = getenv("RECOLL_DATADIR");
-        if (cdatadir == 0) {
-            // If not in environment, use the compiled-in constant. 
-            datadir = RECOLL_DATADIR;
-        } else {
-            datadir = cdatadir;
-        }
-#endif
-    }
-    return datadir;
-}
+#endif // we have found an appropriate include file

 string path_PATHsep()
 {
@ -262,150 +226,17 @@ string path_PATHsep()
 #endif
 }

-bool maketmpdir(string& tdir, string& reason)
-{
-#ifndef _WIN32
-    tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
-
-    char *cp = strdup(tdir.c_str());
-    if (!cp) {
-	reason = "maketmpdir: out of memory (for file name !)\n";
-	tdir.erase();
-	return false;
-    }
-
-    // There is a race condition between name computation and
-    // mkdir. try to make sure that we at least don't shoot ourselves
-    // in the foot
-#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-#endif
-
-    if (!
-#ifdef HAVE_MKDTEMP
-	mkdtemp(cp)
-#else
-	mktemp(cp)
-#endif // HAVE_MKDTEMP
-	) {
-	free(cp);
-	reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
-	    strerror(errno);
-	tdir.erase();
-	return false;
-    }	
-    tdir = cp;
-    free(cp);
-#else // _WIN32
-    // There is a race condition between name computation and
-    // mkdir. try to make sure that we at least don't shoot ourselves
-    // in the foot
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-    tdir = path_wingettempfilename(TEXT("rcltmp"));
-#endif
-
-    // At this point the directory does not exist yet except if we used
-    // mkdtemp
-
-#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
-    if (mkdir(tdir.c_str(), 0700) < 0) {
-	reason = string("maketmpdir: mkdir ") + tdir + " failed";
-	tdir.erase();
-	return false;
-    }
-#endif
-
-    return true;
-}
-
-TempFileInternal::TempFileInternal(const string& suffix)
-    : m_noremove(false)
-{
-    // Because we need a specific suffix, can't use mkstemp
-    // well. There is a race condition between name computation and
-    // file creation. try to make sure that we at least don't shoot
-    // our own selves in the foot. maybe we'll use mkstemps one day.
-    static PTMutexInit mlock;
-    PTMutexLocker lock(mlock);
-
-#ifndef _WIN32
-    string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
-    char *cp = strdup(filename.c_str());
-    if (!cp) {
-	m_reason = "Out of memory (for file name !)\n";
-	return;
-    }
-
-    // Using mkstemp this way is awful (bot the suffix adding and
-    // using mkstemp() instead of mktemp just to avoid the warnings)
-    int fd;
-    if ((fd = mkstemp(cp)) < 0) {
-	free(cp);
-	m_reason = "TempFileInternal: mkstemp failed\n";
-	return;
-    }
-    close(fd);
-    unlink(cp);
-    filename = cp;
-    free(cp);
-#else
-    string filename = path_wingettempfilename(TEXT("recoll"));
-#endif
-
-    m_filename = filename + suffix;
-    if (close(open(m_filename.c_str(), O_CREAT|O_EXCL, 0600)) != 0) {
-	m_reason = string("Could not open/create") + m_filename;
-	m_filename.erase();
-    }
-}
-
-TempFileInternal::~TempFileInternal()
-{
-    if (!m_filename.empty() && !m_noremove)
-	unlink(m_filename.c_str());
-}
-
-TempDir::TempDir()
-{
-    if (!maketmpdir(m_dirname, m_reason)) {
-	m_dirname.erase();
-	return;
-    }
-}
-
-TempDir::~TempDir()
-{
-    if (!m_dirname.empty()) {
-	(void)wipedir(m_dirname, true, true);
-	m_dirname.erase();
-    }
-}
-
-bool TempDir::wipe()
-{
-    if (m_dirname.empty()) {
-	m_reason = "TempDir::wipe: no directory !\n";
-	return false;
-    }
-    if (wipedir(m_dirname, false, true)) {
-	m_reason = "TempDir::wipe: wipedir failed\n";
-	return false;
-    }
-    return true;
-}
-
-void path_catslash(string &s)
+void path_catslash(string& s)
 {
 #ifdef _WIN32
    path_slashize(s);
 #endif
-    if (s.empty() || s[s.length() - 1] != '/')
+    if (s.empty() || s[s.length() - 1] != '/') {
        s += '/';
+    }
 }

-string path_cat(const string &s1, const string &s2)
+string path_cat(const string& s1, const string& s2)
 {
    string res = s1;
    path_catslash(res);
@ -413,7 +244,7 @@ string path_cat(const string &s1, const string &s2)
    return res;
 }

-string path_getfather(const string &s)
+string path_getfather(const string& s)
 {
    string father = s;
 #ifdef _WIN32
@ -421,62 +252,69 @@ string path_getfather(const string &s)
 #endif

    // ??
-    if (father.empty())
+    if (father.empty()) {
        return "./";
+    }

-    if (path_isroot(father))
+    if (path_isroot(father)) {
        return father;
+    }

    if (father[father.length() - 1] == '/') {
        // Input ends with /. Strip it, root special case was tested above
-	father.erase(father.length()-1);
+        father.erase(father.length() - 1);
    }

    string::size_type slp = father.rfind('/');
-    if (slp == string::npos)
+    if (slp == string::npos) {
        return "./";
+    }

    father.erase(slp);
    path_catslash(father);
    return father;
 }

-string path_getsimple(const string &s)
+string path_getsimple(const string& s)
 {
    string simple = s;
 #ifdef _WIN32
    path_slashize(simple);
 #endif

-    if (simple.empty())
+    if (simple.empty()) {
        return simple;
+    }

    string::size_type slp = simple.rfind('/');
-    if (slp == string::npos)
+    if (slp == string::npos) {
        return simple;
+    }

-    simple.erase(0, slp+1);
+    simple.erase(0, slp + 1);
    return simple;
 }

-string path_basename(const string &s, const string &suff)
+string path_basename(const string& s, const string& suff)
 {
    string simple = path_getsimple(s);
    string::size_type pos = string::npos;
    if (suff.length() && simple.length() > suff.length()) {
        pos = simple.rfind(suff);
-	if (pos != string::npos && pos + suff.length() == simple.length())
+        if (pos != string::npos && pos + suff.length() == simple.length()) {
            return simple.substr(0, pos);
        }
+    }
    return simple;
 }

 string path_suffix(const string& s)
 {
    string::size_type dotp = s.rfind('.');
-    if (dotp == string::npos)
+    if (dotp == string::npos) {
        return string();
-    return s.substr(dotp+1);
+    }
+    return s.substr(dotp + 1);
 }

 string path_home()
@ -508,11 +346,12 @@ string path_home()
    struct passwd *entry = getpwuid(uid);
    if (entry == 0) {
        const char *cp = getenv("HOME");
-	if (cp)
+        if (cp) {
            return cp;
-	else 
+        } else {
            return "/";
        }
+    }

    string homedir = entry->pw_dir;
    path_catslash(homedir);
@ -539,10 +378,11 @@ string path_homedata()
 #endif
 }

-string path_tildexpand(const string &s) 
+string path_tildexpand(const string& s)
 {
-    if (s.empty() || s[0] != '~')
+    if (s.empty() || s[0] != '~') {
        return s;
+    }
    string o = s;
 #ifdef _WIN32
    path_slashize(o);
@ -557,11 +397,12 @@ string path_tildexpand(const string &s)
        string::size_type l = (pos == string::npos) ? s.length() - 1 : pos - 1;
 #ifdef _WIN32
        // Dont know what this means. Just replace with HOME
-        o.replace(0, l+1, path_home());
+        o.replace(0, l + 1, path_home());
 #else
        struct passwd *entry = getpwnam(s.substr(1, l).c_str());
-	if (entry)
-	    o.replace(0, l+1, entry->pw_dir);
+        if (entry) {
+            o.replace(0, l + 1, entry->pw_dir);
+        }
 #endif
    }
    return o;
@ -569,17 +410,19 @@ string path_tildexpand(const string &s)

 bool path_isroot(const string& path)
 {
-    if (path.size() == 1 && path[0] == '/')
+    if (path.size() == 1 && path[0] == '/') {
        return true;
+    }
 #ifdef _WIN32
    if (path.size() == 3 && isalpha(path[0]) && path[1] == ':' &&
-        (path[2] == '/' || path[2] == '\\'))
+            (path[2] == '/' || path[2] == '\\')) {
        return true;
+    }
 #endif
    return false;
 }

-bool path_isabsolute(const string &path)
+bool path_isabsolute(const string& path)
 {
    if (!path.empty() && (path[0] == '/'
 #ifdef _WIN32
@ -591,10 +434,11 @@ bool path_isabsolute(const string &path)
    return false;
 }

-string path_absolute(const string &is)
+string path_absolute(const string& is)
 {
-    if (is.length() == 0)
+    if (is.length() == 0) {
        return is;
+    }
    string s = is;
    if (!path_isabsolute(s)) {
        char buf[MAXPATHLEN];
@ -610,10 +454,11 @@ string path_absolute(const string &is)
 }

 #include <smallut.h>
-string path_canon(const string &is, const string* cwd)
+string path_canon(const string& is, const string* cwd)
 {
-    if (is.length() == 0)
+    if (is.length() == 0) {
        return is;
+    }
    string s = is;
 #ifdef _WIN32
    path_slashize(s);
@ -639,10 +484,11 @@ string path_canon(const string &is, const string* cwd)
    stringToTokens(s, elems, "/");
    vector<string> cleaned;
    for (vector<string>::const_iterator it = elems.begin();
-	 it != elems.end(); it++){
+            it != elems.end(); it++) {
        if (*it == "..") {
-	    if (!cleaned.empty())
+            if (!cleaned.empty()) {
                cleaned.pop_back();
+            }
        } else if (it->empty() || *it == ".") {
        } else {
            cleaned.push_back(*it);
@ -674,10 +520,11 @@ bool makepath(const string& ipath)
    stringToTokens(path, elems, "/");
    path = "/";
    for (vector<string>::const_iterator it = elems.begin();
-	 it != elems.end(); it++){
+            it != elems.end(); it++) {
 #ifdef _WIN32
-        if (it == elems.begin() && path_strlookslikedrive(*it))
+        if (it == elems.begin() && path_strlookslikedrive(*it)) {
            path = "";
+        }
 #endif
        path += *it;
        // Not using path_isdir() here, because this cant grok symlinks
@ -695,30 +542,35 @@ bool makepath(const string& ipath)
 bool path_isdir(const string& path)
 {
    struct stat st;
-    if (lstat(path.c_str(), &st) < 0) 
+    if (lstat(path.c_str(), &st) < 0) {
        return false;
-    if (S_ISDIR(st.st_mode))
+    }
+    if (S_ISDIR(st.st_mode)) {
        return true;
+    }
    return false;
 }

 long long path_filesize(const string& path)
 {
    struct stat st;
-    if (stat(path.c_str(), &st) < 0) 
+    if (stat(path.c_str(), &st) < 0) {
        return -1;
+    }
    return (long long)st.st_size;
 }

 int path_fileprops(const std::string path, struct stat *stp, bool follow)
 {
-    if (!stp)
+    if (!stp) {
        return -1;
+    }
    memset(stp, 0, sizeof(struct stat));
    struct stat mst;
    int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
-    if (ret != 0)
+    if (ret != 0) {
        return ret;
+    }
    stp->st_size = mst.st_size;
    stp->st_mode = mst.st_mode;
    stp->st_mtime = mst.st_mtime;
@ -807,7 +659,7 @@ string url_encode(const string& url, string::size_type offs)
                c == '`' ||
                c == '{' ||
                c == '|' ||
-	   c == '}' ) {
+                c == '}') {
            out += '%';
            out += h[(c >> 4) & 0xf];
            out += h[c & 0xf];
@ -822,45 +674,24 @@ string url_gpath(const string& url)
 {
    // Remove the access schema part (or whatever it's called)
    string::size_type colon = url.find_first_of(":");
-    if (colon == string::npos || colon == url.size() - 1)
+    if (colon == string::npos || colon == url.size() - 1) {
        return url;
+    }
    // If there are non-alphanum chars before the ':', then there
    // probably is no scheme. Whatever...
    for (string::size_type i = 0; i < colon; i++) {
-        if (!isalnum(url.at(i)))
+        if (!isalnum(url.at(i))) {
            return url;
        }
+    }

    // In addition we canonize the path to remove empty host parts
    // (for compatibility with older versions of recoll where file://
    // was hardcoded, but the local path was used for doc
    // identification.
-    return path_canon(url.substr(colon+1));
+    return path_canon(url.substr(colon + 1));
 }

-string url_gpathS(const string& url)
-{
-#ifdef _WIN32
-    string u = url_gpath(url);
-    string nu;
-    if (path_hasdrive(u)) {
-        nu.append(1, '/');
-        nu.append(1, u[0]);
-        if (path_isdriveabs(u)) {
-            nu.append(u.substr(2));
-        } else {
-            // This should be an error really
-            nu.append(1, '/');
-            nu.append(u.substr(2));
-        }
-    }
-    return nu;
-#else
-    return url_gpath(url);
-#endif
-}
-
-
 string url_parentfolder(const string& url)
 {
    // In general, the parent is the directory above the full path
@ -876,24 +707,16 @@ string url_parentfolder(const string& url)
 }


-string path_defaultrecollconfsubdir()
-{
-#ifdef _WIN32
-    return "Recoll";
-#else
-    return ".recoll";
-#endif
-}
-
 // Convert to file path if url is like file:
 // Note: this only works with our internal pseudo-urls which are not
 // encoded/escaped
 string fileurltolocalpath(string url)
 {
-    if (url.find("file://") == 0)
+    if (url.find("file://") == 0) {
        url = url.substr(7, string::npos);
-    else
+    } else {
        return string();
+    }

 #ifdef _WIN32
    // Absolute file urls are like: file:///c:/mydir/...
@ -908,21 +731,24 @@ string fileurltolocalpath(string url)
    // part after # if it is preceded by .html
    string::size_type pos;
    if ((pos = url.rfind(".html#")) != string::npos) {
-        url.erase(pos+5);
+        url.erase(pos + 5);
    } else if ((pos = url.rfind(".htm#")) != string::npos) {
-        url.erase(pos+4);
+        url.erase(pos + 4);
    }

    return url;
 }

+static const string cstr_fileu("file://");
+
 string path_pathtofileurl(const string& path)
 {
    // We're supposed to receive a canonic absolute path, but on windows we
    // may need to add a '/' in front of the drive spec
    string url(cstr_fileu);
-	if (path.empty() || path[0] != '/')
+    if (path.empty() || path[0] != '/') {
        url.push_back('/');
+    }
    url += path;
    return url;
 }
@ -932,17 +758,6 @@ bool urlisfileurl(const string& url)
    return url.find("file://") == 0;
 }

-// Printable url: this is used to transcode from the system charset
-// into either utf-8 if transcoding succeeds, or url-encoded
-bool printableUrl(const string &fcharset, const string &in, string &out)
-{
-    int ecnt = 0;
-    if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
-	out = url_encode(in, 7);
-    }
-    return true;
-}
-
 bool readdir(const string& dir, string& reason, set<string>& entries)
 {
    struct stat st;
@ -971,17 +786,20 @@ bool readdir(const string& dir, string& reason, set<string>& entries)

    struct dirent *ent;
    while ((ent = readdir(d)) != 0) {
-	if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) 
+        if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
            continue;
+        }
        entries.insert(ent->d_name);
    }

 out:
-    if (d)
+    if (d) {
        closedir(d);
+    }
    reason = msg.str();
-    if (reason.empty())
+    if (reason.empty()) {
        return true;
+    }
    return false;
 }

@ -992,34 +810,38 @@ out:
 // alone.
 Pidfile::~Pidfile()
 {
-    if (m_fd >= 0)
+    if (m_fd >= 0) {
        ::close(m_fd);
+    }
    m_fd = -1;
 }

 pid_t Pidfile::read_pid()
 {
    int fd = ::open(m_path.c_str(), O_RDONLY);
-    if (fd == -1)
-	return (pid_t)-1;
+    if (fd == -1) {
+        return (pid_t) - 1;
+    }

    char buf[16];
    int i = read(fd, buf, sizeof(buf) - 1);
    ::close(fd);
-    if (i <= 0)
-	return (pid_t)-1;
+    if (i <= 0) {
+        return (pid_t) - 1;
+    }
    buf[i] = '\0';
    char *endptr;
    pid_t pid = strtol(buf, &endptr, 10);
-    if (endptr != &buf[i])
-	return (pid_t)-1;
+    if (endptr != &buf[i]) {
+        return (pid_t) - 1;
+    }
    return pid;
 }

 int Pidfile::flopen()
 {
    const char *path = m_path.c_str();
-    if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
+    if ((m_fd = ::open(path, O_RDWR | O_CREAT, 0644)) == -1) {
        m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
        return -1;
    }
@ -1098,79 +920,10 @@ int Pidfile::remove()
    return unlink(m_path.c_str());
 }

-
-// Freedesktop standard paths for cache directory (thumbnails are now in there)
-static const string& xdgcachedir()
-{
-    static string xdgcache;
-    if (xdgcache.empty()) {
-	const char *cp = getenv("XDG_CACHE_HOME");
-	if (cp == 0) 
-	    xdgcache = path_cat(path_home(), ".cache");
-	else
-	    xdgcache = string(cp);
-    }
-    return xdgcache;
-}
-static const string& thumbnailsdir()
-{
-    static string thumbnailsd;
-    if (thumbnailsd.empty()) {
-	thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
-	if (access(thumbnailsd.c_str(), 0) != 0) {
-	    thumbnailsd = path_cat(path_home(), ".thumbnails");
-	}
-    }
-    return thumbnailsd;
-}
-
-// Place for 256x256 files
-static const string thmbdirlarge = "large";
-// 128x128
-static const string thmbdirnormal = "normal";
-
-static void thumbname(const string& url, string& name)
-{
-    string digest;
-    string l_url = url_encode(url);
-    MD5String(l_url, digest);
-    MD5HexPrint(digest, name);
-    name += ".png";
-}
-
-bool thumbPathForUrl(const string& url, int size, string& path)
-{
-    string name;
-    thumbname(url, name);
-    if (size <= 128) {
-	path = path_cat(thumbnailsdir(), thmbdirnormal);
-	path = path_cat(path, name);
-	if (access(path.c_str(), R_OK) == 0) {
-	    return true;
-	}
-    } 
-    path = path_cat(thumbnailsdir(), thmbdirlarge);
-    path = path_cat(path, name);
-    if (access(path.c_str(), R_OK) == 0) {
-	return true;
-    }
-
-    // File does not exist. Path corresponds to the large version at this point,
-    // fix it if needed.
-    if (size <= 128) {
-	path = path_cat(path_home(), thmbdirnormal);
-	path = path_cat(path, name);
-    }
-    return false;
-}
-
 // Call funcs that need static init (not initially reentrant)
 void pathut_init_mt()
 {
    path_home();
-    tmplocation();
-    thumbnailsdir();
-    path_sharedatadir();
 }


@ -1185,8 +938,9 @@ void path_to_thumb(const string& _input)
 {
    string input(_input);
    // Make absolute path if needed
-    if (input[0] != '/')
+    if (input[0] != '/') {
        input = path_absolute(input);
+    }

    input = string("file://") + path_canon(input);

@ -1200,28 +954,30 @@ const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
                        "/dir1/dir2",
                        "./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
                        "/dir/.c", "/dir/toto.txt", "toto.txt1"
-};
+                       };

 const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub",
-		 "~nosuch", "~nosuch/sub"};
+                        "~nosuch", "~nosuch/sub"
+                       };
 int nttvec = sizeof(ttvec) / sizeof(string);

 const char *thisprog;

 int main(int argc, const char **argv)
 {
-    thisprog = *argv++;argc--;
+    thisprog = *argv++;
+    argc--;

    string s;
    vector<string>::const_iterator it;
 #if 0
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
        cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl;
    }
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
        cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl;
    }
-    for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
+    for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
        cout << tstvec[i] << " Basename " <<
             path_basename(tstvec[i], ".txt") << endl;
    }
@ -1251,10 +1007,12 @@ int main(int argc, const char **argv)
        cerr << "Usage: trpathut <dir> <pattern>" << endl;
        exit(1);
    }
-    string dir = *argv++;argc--;
-    string pattern =  *argv++;argc--;
+    string dir = *argv++;
+    argc--;
+    string pattern =  *argv++;
+    argc--;
    vector<string> matched = path_dirglob(dir, pattern);
-    for (it = matched.begin(); it != matched.end();it++) {
+    for (it = matched.begin(); it != matched.end(); it++) {
        cout << *it << endl;
    }
 #endif
@ -1264,7 +1022,8 @@ int main(int argc, const char **argv)
        fprintf(stderr, "Usage: fsocc: trpathut <path>\n");
        exit(1);
    }
-  string path = *argv++;argc--;
+    string path = *argv++;
+    argc--;

    int pc;
    long long blocks;
@ -1303,9 +1062,10 @@ int main(int argc, const char **argv)
        }
        path_to_thumb(input);
    } else {
-      while (getline(cin, input))
+        while (getline(cin, input)) {
            path_to_thumb(input);
        }
+    }


    exit(0);
@ -1316,7 +1076,8 @@ int main(int argc, const char **argv)
        cerr << "Usage: trpathut <filename>" << endl;
        exit(1);
    }
-    string fn = *argv++;argc--;
+    string fn = *argv++;
+    argc--;
    string ext = path_suffix(fn);
    cout << "Suffix: [" << ext << "]" << endl;
    return 0;
@ -1327,7 +1088,8 @@ int main(int argc, const char **argv)
        cerr << "Usage: trpathut url" << endl;
        exit(1);
    }
-    string url = *argv++;argc--;
+    string url = *argv++;
+    argc--;

    cout << "File: [" << fileurltolocalpath(url) << "]\n";
    return 0;
--- a/src/utils/pathut.h
+++ b/src/utils/pathut.h
@ -16,45 +16,42 @@
 */
 #ifndef _PATHUT_H_INCLUDED_
 #define _PATHUT_H_INCLUDED_
-#include "autoconfig.h"

 #include <string>
 #include <vector>
 #include <set>

-#include MEMORY_INCLUDE
+// Must be called in main thread before starting other threads
+extern void pathut_init_mt();

 /// Add a / at the end if none there yet.
-extern void path_catslash(std::string &s);
+extern void path_catslash(std::string& s);
 /// Concatenate 2 paths
-extern std::string path_cat(const std::string &s1, const std::string &s2);
+extern std::string path_cat(const std::string& s1, const std::string& s2);
 /// Get the simple file name (get rid of any directory path prefix
-extern std::string path_getsimple(const std::string &s);
+extern std::string path_getsimple(const std::string& s);
 /// Simple file name + optional suffix stripping
-extern std::string path_basename(const std::string &s, 
-				 const std::string &suff = std::string());
+extern std::string path_basename(const std::string& s,
+                                 const std::string& suff = std::string());
 /// Component after last '.'
-extern std::string path_suffix(const std::string &s);
+extern std::string path_suffix(const std::string& s);
 /// Get the father directory
-extern std::string path_getfather(const std::string &s);
+extern std::string path_getfather(const std::string& s);
 /// Get the current user's home directory
 extern std::string path_home();
 /// Expand ~ at the beginning of std::string
-extern std::string path_tildexpand(const std::string &s);
+extern std::string path_tildexpand(const std::string& s);
 /// Use getcwd() to make absolute path if needed. Beware: ***this can fail***
 /// we return an empty path in this case.
-extern std::string path_absolute(const std::string &s);
+extern std::string path_absolute(const std::string& s);
 /// Clean up path by removing duplicated / and resolving ../ + make it absolute
-extern std::string path_canon(const std::string &s, const std::string *cwd=0);
+extern std::string path_canon(const std::string& s, const std::string *cwd = 0);
 /// Use glob(3) to return the file names matching pattern inside dir
-extern std::vector<std::string> path_dirglob(const std::string &dir, 
+extern std::vector<std::string> path_dirglob(const std::string& dir,
        const std::string pattern);
 /// Encode according to rfc 1738
 extern std::string url_encode(const std::string& url,
                              std::string::size_type offs = 0);
-/// Transcode to utf-8 if possible or url encoding, for display.
-extern bool printableUrl(const std::string &fcharset, 
-			 const std::string &in, std::string &out);
 //// Convert to file path if url is like file://. This modifies the
 //// input (and returns a copy for convenience)
 extern std::string fileurltolocalpath(std::string url);
@ -67,12 +64,6 @@ extern std::string url_parentfolder(const std::string& url);
 /// routine, it does the right thing only in the recoll context
 extern std::string url_gpath(const std::string& url);

-/// Same but, in the case of a Windows local path, also turn "c:/" into
-/// "/c/" This should be used only for splitting the path in rcldb, it
-/// would better be local in there, but I prefer to keep all the
-/// system-specific path stuff in pathut
-extern std::string url_gpathS(const std::string& url);
-
 /// Stat parameter and check if it's a directory
 extern bool path_isdir(const std::string& path);

@ -103,26 +94,17 @@ extern bool readdir(const std::string& dir, std::string& reason,
                    std::set<std::string>& entries);

 /** A small wrapper around statfs et al, to return percentage of disk
-    occupation */
-bool fsocc(const std::string &path, int *pc, // Percent occupied
-	   long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024
-	   );
-
-/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
-extern const std::string& tmplocation();
-
-/// Create temporary directory (inside the temp location)
-extern bool maketmpdir(std::string& tdir, std::string& reason);
+    occupation
+    @param[output] pc percent occupied
+    @param[output] avmbs Mbs available to non-superuser. Mb=1024*1024
+*/
+bool fsocc(const std::string& path, int *pc, long long *avmbs = 0);

 /// mkdir -p
 extern bool makepath(const std::string& path);

-/// Sub-directory for default recoll config (e.g: .recoll)
-extern std::string path_defaultrecollconfsubdir();
 /// Where we create the user data subdirs
 extern std::string path_homedata();
-/// e.g. /usr/share/recoll. Depends on OS and config
-extern const std::string& path_sharedatadir();
 /// Test if path is absolute
 extern bool path_isabsolute(const std::string& s);

@ -137,52 +119,6 @@ extern std::string path_pathtofileurl(const std::string& path);
 void path_slashize(std::string& s);
 #endif

-/// Temporary file class
-class TempFileInternal {
-public:
-    TempFileInternal(const std::string& suffix);
-    ~TempFileInternal();
-    const char *filename() 
-    {
-	return m_filename.c_str();
-    }
-    const std::string &getreason() 
-    {
-	return m_reason;
-    }
-    void setnoremove(bool onoff)
-    {
-	m_noremove = onoff;
-    }
-    bool ok() 
-    {
-	return !m_filename.empty();
-    }
-private:
-    std::string m_filename;
-    std::string m_reason;
-    bool m_noremove;
-};
-
-typedef STD_SHARED_PTR<TempFileInternal> TempFile;
-
-/// Temporary directory class. Recursively deleted by destructor.
-class TempDir {
-public:
-    TempDir();
-    ~TempDir();
-    const char *dirname() {return m_dirname.c_str();}
-    const std::string &getreason() {return m_reason;}
-    bool ok() {return !m_dirname.empty();}
-    /// Recursively delete contents but not self.
-    bool wipe();
-private:
-    std::string m_dirname;
-    std::string m_reason;
-    TempDir(const TempDir &) {}
-    TempDir& operator=(const TempDir &) {return *this;};
-};
-
 /// Lock/pid file class. This is quite close to the pidfile_xxx
 /// utilities in FreeBSD with a bit more encapsulation. I'd have used
 /// the freebsd code if it was available elsewhere
@ -200,7 +136,9 @@ public:
    int close();
    /// Delete the pid file
    int remove();
-    const std::string& getreason() {return m_reason;}
+    const std::string& getreason() {
+        return m_reason;
+    }
 private:
    std::string m_path;
    int    m_fd;
@ -209,14 +147,4 @@ private:
    int flopen();
 };

-
-
-// Freedesktop thumbnail standard path routine
-// On return, path will have the appropriate value in all cases,
-// returns true if the file already exists
-extern bool thumbPathForUrl(const std::string& url, int size, std::string& path);
-
-// Must be called in main thread before starting other threads
-extern void pathut_init_mt();
-
 #endif /* _PATHUT_H_INCLUDED_ */
--- a/src/utils/rclutil.cpp
+++ b/src/utils/rclutil.cpp
@ -0,0 +1,411 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#ifndef TEST_RCLUTIL
+#include "autoconfig.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "safefcntl.h"
+#include "safeunistd.h"
+#include "dirent.h"
+#include "cstr.h"
+#ifdef _WIN32
+#include "safewindows.h"
+#else
+#include <sys/param.h>
+#include <pwd.h>
+#include <sys/file.h>
+#endif
+#include <math.h>
+#include <errno.h>
+#include <sys/types.h>
+#include "safesysstat.h"
+#include "ptmutex.h"
+
+#include "rclutil.h"
+#include "pathut.h"
+#include "wipedir.h"
+#include "transcode.h"
+#include "md5ut.h"
+
+using namespace std;
+
+
+void map_ss_cp_noshr(const map<string, string> s, map<string, string> *d)
+{
+    for (map<string, string>::const_iterator it = s.begin();
+            it != s.end(); it++) {
+        d->insert(
+            pair<string, string>(string(it->first.begin(), it->first.end()),
+                                 string(it->second.begin(), it->second.end())));
+    }
+}
+
+string path_defaultrecollconfsubdir()
+{
+#ifdef _WIN32
+    return "Recoll";
+#else
+    return ".recoll";
+#endif
+}
+
+// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
+const string& path_pkgdatadir()
+{
+    static string datadir;
+    if (datadir.empty()) {
+#ifdef _WIN32
+        datadir = path_cat(path_thisexecpath(), "Share");
+#else
+        const char *cdatadir = getenv("RECOLL_DATADIR");
+        if (cdatadir == 0) {
+            // If not in environment, use the compiled-in constant.
+            datadir = RECOLL_DATADIR;
+        } else {
+            datadir = cdatadir;
+        }
+#endif
+    }
+    return datadir;
+}
+
+// Printable url: this is used to transcode from the system charset
+// into either utf-8 if transcoding succeeds, or url-encoded
+bool printableUrl(const string& fcharset, const string& in, string& out)
+{
+    int ecnt = 0;
+    if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
+        out = url_encode(in, 7);
+    }
+    return true;
+}
+
+string url_gpathS(const string& url)
+{
+#ifdef _WIN32
+    string u = url_gpath(url);
+    string nu;
+    if (path_hasdrive(u)) {
+        nu.append(1, '/');
+        nu.append(1, u[0]);
+        if (path_isdriveabs(u)) {
+            nu.append(u.substr(2));
+        } else {
+            // This should be an error really
+            nu.append(1, '/');
+            nu.append(u.substr(2));
+        }
+    }
+    return nu;
+#else
+    return url_gpath(url);
+#endif
+}
+
+const string& tmplocation()
+{
+    static string stmpdir;
+    if (stmpdir.empty()) {
+        const char *tmpdir = getenv("RECOLL_TMPDIR");
+        if (tmpdir == 0) {
+            tmpdir = getenv("TMPDIR");
+        }
+        if (tmpdir == 0) {
+            tmpdir = getenv("TMP");
+        }
+        if (tmpdir == 0) {
+            tmpdir = getenv("TEMP");
+        }
+        if (tmpdir == 0) {
+#ifdef _WIN32
+            TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)];
+            GetTempPath(MAX_PATH + 1, bufw);
+            stmpdir = path_tchartoutf8(bufw);
+#else
+            stmpdir = "/tmp";
+#endif
+        } else {
+            stmpdir = tmpdir;
+        }
+        stmpdir = path_canon(stmpdir);
+    }
+
+    return stmpdir;
+}
+
+bool maketmpdir(string& tdir, string& reason)
+{
+#ifndef _WIN32
+    tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
+
+    char *cp = strdup(tdir.c_str());
+    if (!cp) {
+        reason = "maketmpdir: out of memory (for file name !)\n";
+        tdir.erase();
+        return false;
+    }
+
+    // There is a race condition between name computation and
+    // mkdir. try to make sure that we at least don't shoot ourselves
+    // in the foot
+#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+#endif
+
+    if (!
+#ifdef HAVE_MKDTEMP
+            mkdtemp(cp)
+#else
+            mktemp(cp)
+#endif // HAVE_MKDTEMP
+       ) {
+        free(cp);
+        reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
+                 strerror(errno);
+        tdir.erase();
+        return false;
+    }
+    tdir = cp;
+    free(cp);
+#else // _WIN32
+    // There is a race condition between name computation and
+    // mkdir. try to make sure that we at least don't shoot ourselves
+    // in the foot
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+    tdir = path_wingettempfilename(TEXT("rcltmp"));
+#endif
+
+    // At this point the directory does not exist yet except if we used
+    // mkdtemp
+
+#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
+    if (mkdir(tdir.c_str(), 0700) < 0) {
+        reason = string("maketmpdir: mkdir ") + tdir + " failed";
+        tdir.erase();
+        return false;
+    }
+#endif
+
+    return true;
+}
+
+TempFileInternal::TempFileInternal(const string& suffix)
+    : m_noremove(false)
+{
+    // Because we need a specific suffix, can't use mkstemp
+    // well. There is a race condition between name computation and
+    // file creation. try to make sure that we at least don't shoot
+    // our own selves in the foot. maybe we'll use mkstemps one day.
+    static PTMutexInit mlock;
+    PTMutexLocker lock(mlock);
+
+#ifndef _WIN32
+    string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
+    char *cp = strdup(filename.c_str());
+    if (!cp) {
+        m_reason = "Out of memory (for file name !)\n";
+        return;
+    }
+
+    // Using mkstemp this way is awful (bot the suffix adding and
+    // using mkstemp() instead of mktemp just to avoid the warnings)
+    int fd;
+    if ((fd = mkstemp(cp)) < 0) {
+        free(cp);
+        m_reason = "TempFileInternal: mkstemp failed\n";
+        return;
+    }
+    close(fd);
+    unlink(cp);
+    filename = cp;
+    free(cp);
+#else
+    string filename = path_wingettempfilename(TEXT("recoll"));
+#endif
+
+    m_filename = filename + suffix;
+    if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) {
+        m_reason = string("Could not open/create") + m_filename;
+        m_filename.erase();
+    }
+}
+
+TempFileInternal::~TempFileInternal()
+{
+    if (!m_filename.empty() && !m_noremove) {
+        unlink(m_filename.c_str());
+    }
+}
+
+TempDir::TempDir()
+{
+    if (!maketmpdir(m_dirname, m_reason)) {
+        m_dirname.erase();
+        return;
+    }
+}
+
+TempDir::~TempDir()
+{
+    if (!m_dirname.empty()) {
+        (void)wipedir(m_dirname, true, true);
+        m_dirname.erase();
+    }
+}
+
+bool TempDir::wipe()
+{
+    if (m_dirname.empty()) {
+        m_reason = "TempDir::wipe: no directory !\n";
+        return false;
+    }
+    if (wipedir(m_dirname, false, true)) {
+        m_reason = "TempDir::wipe: wipedir failed\n";
+        return false;
+    }
+    return true;
+}
+
+// Freedesktop standard paths for cache directory (thumbnails are now in there)
+static const string& xdgcachedir()
+{
+    static string xdgcache;
+    if (xdgcache.empty()) {
+        const char *cp = getenv("XDG_CACHE_HOME");
+        if (cp == 0) {
+            xdgcache = path_cat(path_home(), ".cache");
+        } else {
+            xdgcache = string(cp);
+        }
+    }
+    return xdgcache;
+}
+static const string& thumbnailsdir()
+{
+    static string thumbnailsd;
+    if (thumbnailsd.empty()) {
+        thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
+        if (access(thumbnailsd.c_str(), 0) != 0) {
+            thumbnailsd = path_cat(path_home(), ".thumbnails");
+        }
+    }
+    return thumbnailsd;
+}
+
+// Place for 256x256 files
+static const string thmbdirlarge = "large";
+// 128x128
+static const string thmbdirnormal = "normal";
+
+static void thumbname(const string& url, string& name)
+{
+    string digest;
+    string l_url = url_encode(url);
+    MD5String(l_url, digest);
+    MD5HexPrint(digest, name);
+    name += ".png";
+}
+
+bool thumbPathForUrl(const string& url, int size, string& path)
+{
+    string name;
+    thumbname(url, name);
+    if (size <= 128) {
+        path = path_cat(thumbnailsdir(), thmbdirnormal);
+        path = path_cat(path, name);
+        if (access(path.c_str(), R_OK) == 0) {
+            return true;
+        }
+    }
+    path = path_cat(thumbnailsdir(), thmbdirlarge);
+    path = path_cat(path, name);
+    if (access(path.c_str(), R_OK) == 0) {
+        return true;
+    }
+
+    // File does not exist. Path corresponds to the large version at this point,
+    // fix it if needed.
+    if (size <= 128) {
+        path = path_cat(path_home(), thmbdirnormal);
+        path = path_cat(path, name);
+    }
+    return false;
+}
+
+void rclutil_init_mt()
+{
+    path_pkgdatadir();
+    tmplocation();
+    thumbnailsdir();
+}
+
+#else // TEST_RCLUTIL
+
+void path_to_thumb(const string& _input)
+{
+    string input(_input);
+    // Make absolute path if needed
+    if (input[0] != '/') {
+        input = path_absolute(input);
+    }
+
+    input = string("file://") + path_canon(input);
+
+    string path;
+    //path = url_encode(input, 7);
+    thumbPathForUrl(input, 7, path);
+    cout << path << endl;
+}
+
+const char *thisprog;
+
+int main(int argc, const char **argv)
+{
+    thisprog = *argv++;
+    argc--;
+
+    string s;
+    vector<string>::const_iterator it;
+
+#if 0
+    if (argc > 1) {
+        cerr <<  "Usage: thumbpath <filepath>" << endl;
+        exit(1);
+    }
+    string input;
+    if (argc == 1) {
+        input = *argv++;
+        if (input.empty())  {
+            cerr << "Usage: thumbpath <filepath>" << endl;
+            exit(1);
+        }
+        path_to_thumb(input);
+    } else {
+        while (getline(cin, input)) {
+            path_to_thumb(input);
+        }
+    }
+    exit(0);
+#endif
+}
+
+#endif // TEST_RCLUTIL
+
--- a/src/utils/rclutil.h
+++ b/src/utils/rclutil.h
@ -0,0 +1,112 @@
+/* Copyright (C) 2016 J.F.Dockes
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef _RCLUTIL_H_INCLUDED_
+#define _RCLUTIL_H_INCLUDED_
+#include "autoconfig.h"
+
+// Misc stuff not generic enough to get into smallut or pathut
+
+#include <map>
+#include <string>
+#include MEMORY_INCLUDE
+
+
+extern void rclutil_init_mt();
+
+/// Sub-directory for default recoll config (e.g: .recoll)
+extern std::string path_defaultrecollconfsubdir();
+
+/// e.g. /usr/share/recoll. Depends on OS and config
+extern const std::string& path_pkgdatadir();
+
+/// Transcode to utf-8 if possible or url encoding, for display.
+extern bool printableUrl(const std::string& fcharset,
+                         const std::string& in, std::string& out);
+/// Same but, in the case of a Windows local path, also turn "c:/" into
+/// "/c/" This should be used only for splitting the path in rcldb.
+extern std::string url_gpathS(const std::string& url);
+
+/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
+extern const std::string& tmplocation();
+
+/// Create temporary directory (inside the temp location)
+extern bool maketmpdir(std::string& tdir, std::string& reason);
+
+/// Temporary file class
+class TempFileInternal {
+public:
+    TempFileInternal(const std::string& suffix);
+    ~TempFileInternal();
+    const char *filename() {
+        return m_filename.c_str();
+    }
+    const std::string& getreason() {
+        return m_reason;
+    }
+    void setnoremove(bool onoff) {
+        m_noremove = onoff;
+    }
+    bool ok() {
+        return !m_filename.empty();
+    }
+private:
+    std::string m_filename;
+    std::string m_reason;
+    bool m_noremove;
+};
+
+typedef STD_SHARED_PTR<TempFileInternal> TempFile;
+
+/// Temporary directory class. Recursively deleted by destructor.
+class TempDir {
+public:
+    TempDir();
+    ~TempDir();
+    const char *dirname() {
+        return m_dirname.c_str();
+    }
+    const std::string& getreason() {
+        return m_reason;
+    }
+    bool ok() {
+        return !m_dirname.empty();
+    }
+    /// Recursively delete contents but not self.
+    bool wipe();
+private:
+    std::string m_dirname;
+    std::string m_reason;
+    TempDir(const TempDir&) {}
+    TempDir& operator=(const TempDir&) {
+        return *this;
+    };
+};
+
+// Freedesktop thumbnail standard path routine
+// On return, path will have the appropriate value in all cases,
+// returns true if the file already exists
+extern bool thumbPathForUrl(const std::string& url, int size,
+                            std::string& path);
+
+// Duplicate map<string,string> while ensuring no shared string data (to pass
+// to other thread):
+void map_ss_cp_noshr(const std::map<std::string, std::string> s,
+                     std::map<std::string, std::string> *d);
+
+
+#endif /* _RCLUTIL_H_INCLUDED_ */
--- a/src/utils/smallut.cpp
+++ b/src/utils/smallut.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2004 J.F.Dockes
+/* Copyright (C) 2004-2016 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -16,7 +16,11 @@
 */

 #ifndef TEST_SMALLUT
+#ifdef BUILDING_RECOLL
 #include "autoconfig.h"
+#else
+#include "config.h"
+#endif

 #include <stdio.h>
 #include <stdlib.h>
@ -34,23 +38,10 @@
 #include UNORDERED_SET_INCLUDE

 #include "smallut.h"
-#include "utf8iter.h"
-#include "hldata.h"
-#include "cstr.h"

 using namespace std;

-void map_ss_cp_noshr(const map<string,string> s, map<string,string> *d)
-{
-    for (map<string,string>::const_iterator it= s.begin();
-         it != s.end(); it++) {
-        d->insert(
-            pair<string,string>(string(it->first.begin(), it->first.end()),
-                                string(it->second.begin(), it->second.end())));
-    }
-}
-
-int stringicmp(const string & s1, const string& s2) 
+int stringicmp(const string& s1, const string& s2)
 {
    string::const_iterator it1 = s1.begin();
    string::const_iterator it2 = s2.begin();
@ -64,7 +55,8 @@ int stringicmp(const string & s1, const string& s2)
            if (c1 != c2) {
                return c1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : -1;
    } else {
@ -74,7 +66,8 @@ int stringicmp(const string & s1, const string& s2)
            if (c1 != c2) {
                return c1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : 1;
    }
@ -104,13 +97,14 @@ extern int stringisuffcmp(const string& s1, const string& s2)
        if (c1 != c2) {
            return c1 > c2 ? 1 : -1;
        }
-	++r1; ++r2;
+        ++r1;
+        ++r2;
    }
    return 0;
 }

 //  s1 is already lowercase
-int stringlowercmp(const string & s1, const string& s2) 
+int stringlowercmp(const string& s1, const string& s2)
 {
    string::const_iterator it1 = s1.begin();
    string::const_iterator it2 = s2.begin();
@ -123,7 +117,8 @@ int stringlowercmp(const string & s1, const string& s2)
            if (*it1 != c2) {
                return *it1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : -1;
    } else {
@ -132,14 +127,15 @@ int stringlowercmp(const string & s1, const string& s2)
            if (*it1 != c2) {
                return *it1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : 1;
    }
 }

 //  s1 is already uppercase
-int stringuppercmp(const string & s1, const string& s2) 
+int stringuppercmp(const string& s1, const string& s2)
 {
    string::const_iterator it1 = s1.begin();
    string::const_iterator it2 = s2.begin();
@ -152,7 +148,8 @@ int stringuppercmp(const string & s1, const string& s2)
            if (*it1 != c2) {
                return *it1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : -1;
    } else {
@ -161,23 +158,24 @@ int stringuppercmp(const string & s1, const string& s2)
            if (*it1 != c2) {
                return *it1 > c2 ? 1 : -1;
            }
-	    ++it1; ++it2;
+            ++it1;
+            ++it2;
        }
        return size1 == size2 ? 0 : 1;
    }
 }

 // Compare charset names, removing the more common spelling variations
-bool samecharset(const string &cs1, const string &cs2)
+bool samecharset(const string& cs1, const string& cs2)
 {
    string mcs1, mcs2;
    // Remove all - and _, turn to lowecase
-    for (unsigned int i = 0; i < cs1.length();i++) {
+    for (unsigned int i = 0; i < cs1.length(); i++) {
        if (cs1[i] != '_' && cs1[i] != '-') {
            mcs1 += ::tolower(cs1[i]);
        }
    }
-    for (unsigned int i = 0; i < cs2.length();i++) {
+    for (unsigned int i = 0; i < cs2.length(); i++) {
        if (cs2[i] != '_' && cs2[i] != '-') {
            mcs2 += ::tolower(cs2[i]);
        }
@ -185,7 +183,7 @@ bool samecharset(const string &cs1, const string &cs2)
    return mcs1 == mcs2;
 }

-template <class T> bool stringToStrings(const string &s, T &tokens, 
+template <class T> bool stringToStrings(const string& s, T& tokens,
                                        const string& addseps)
 {
    string current;
@ -195,9 +193,10 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
    for (unsigned int i = 0; i < s.length(); i++) {
        switch (s[i]) {
        case '"':
-	    switch(state) {
+            switch (state) {
            case SPACE:
-		state=INQUOTE; continue;
+                state = INQUOTE;
+                continue;
            case TOKEN:
                current += '"';
                continue;
@ -213,11 +212,11 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
            }
            break;
        case '\\':
-	    switch(state) {
+            switch (state) {
            case SPACE:
            case TOKEN:
                current += '\\';
-                state=TOKEN; 
+                state = TOKEN;
                continue;
            case INQUOTE:
                state = ESCAPE;
@ -233,7 +232,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
        case '\t':
        case '\n':
        case '\r':
-	    switch(state) {
+            switch (state) {
            case SPACE:
                continue;
            case TOKEN:
@ -250,7 +249,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,

        default:
            if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
-                switch(state) {
+                switch (state) {
                case ESCAPE:
                    state = INQUOTE;
                    break;
@ -266,7 +265,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
                    state = SPACE;
                    continue;
                }
-            } else switch(state) {
+            } else switch (state) {
                case ESCAPE:
                    state = INQUOTE;
                    break;
@ -280,7 +279,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
            current += s[i];
        }
    }
-    switch(state) {
+    switch (state) {
    case SPACE:
        break;
    case TOKEN:
@ -293,26 +292,29 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
    return true;
 }

-template bool stringToStrings<list<string> >(const string &, 
-					     list<string> &, const string&);
-template bool stringToStrings<vector<string> >(const string &, 
-					       vector<string> &,const string&);
-template bool stringToStrings<set<string> >(const string &,
-					    set<string> &, const string&);
+template bool stringToStrings<list<string> >(const string&,
+        list<string>&, const string&);
+template bool stringToStrings<vector<string> >(const string&,
+        vector<string>&, const string&);
+template bool stringToStrings<set<string> >(const string&,
+        set<string>&, const string&);
 template bool stringToStrings<STD_UNORDERED_SET<string> >
-(const string &, STD_UNORDERED_SET<string> &, const string&);
+(const string&, STD_UNORDERED_SET<string>&, const string&);

-template <class T> void stringsToString(const T &tokens, string &s) 
+template <class T> void stringsToString(const T& tokens, string& s)
 {
    for (typename T::const_iterator it = tokens.begin();
            it != tokens.end(); it++) {
        bool hasblanks = false;
-	if (it->find_first_of(" \t\n") != string::npos)
+        if (it->find_first_of(" \t\n") != string::npos) {
            hasblanks = true;
-	if (it != tokens.begin())
+        }
+        if (it != tokens.begin()) {
            s.append(1, ' ');
-	if (hasblanks)
+        }
+        if (hasblanks) {
            s.append(1, '"');
+        }
        for (unsigned int i = 0; i < it->length(); i++) {
            char car = it->at(i);
            if (car == '"') {
@ -322,24 +324,25 @@ template <class T> void stringsToString(const T &tokens, string &s)
                s.append(1, car);
            }
        }
-	if (hasblanks)
+        if (hasblanks) {
            s.append(1, '"');
        }
+    }
 }
-template void stringsToString<list<string> >(const list<string> &, string &);
-template void stringsToString<vector<string> >(const vector<string> &,string &);
-template void stringsToString<set<string> >(const set<string> &, string &);
-template <class T> string stringsToString(const T &tokens)
+template void stringsToString<list<string> >(const list<string>&, string&);
+template void stringsToString<vector<string> >(const vector<string>&, string&);
+template void stringsToString<set<string> >(const set<string>&, string&);
+template <class T> string stringsToString(const T& tokens)
 {
    string out;
    stringsToString<T>(tokens, out);
    return out;
 }
-template string stringsToString<list<string> >(const list<string> &);
-template string stringsToString<vector<string> >(const vector<string> &);
-template string stringsToString<set<string> >(const set<string> &);
+template string stringsToString<list<string> >(const list<string>&);
+template string stringsToString<vector<string> >(const vector<string>&);
+template string stringsToString<set<string> >(const set<string>&);

-template <class T> void stringsToCSV(const T &tokens, string &s, 
+template <class T> void stringsToCSV(const T& tokens, string& s,
                                     char sep)
 {
    s.erase();
@ -347,12 +350,15 @@ template <class T> void stringsToCSV(const T &tokens, string &s,
            it != tokens.end(); it++) {
        bool needquotes = false;
        if (it->empty() ||
-	    it->find_first_of(string(1, sep) + "\"\n") != string::npos)
+                it->find_first_of(string(1, sep) + "\"\n") != string::npos) {
            needquotes = true;
-	if (it != tokens.begin())
+        }
+        if (it != tokens.begin()) {
            s.append(1, sep);
-	if (needquotes)
+        }
+        if (needquotes) {
            s.append(1, '"');
+        }
        for (unsigned int i = 0; i < it->length(); i++) {
            char car = it->at(i);
            if (car == '"') {
@ -361,12 +367,13 @@ template <class T> void stringsToCSV(const T &tokens, string &s,
                s.append(1, car);
            }
        }
-	if (needquotes)
+        if (needquotes) {
            s.append(1, '"');
        }
+    }
 }
-template void stringsToCSV<list<string> >(const list<string> &, string &, char);
-template void stringsToCSV<vector<string> >(const vector<string> &,string &, 
+template void stringsToCSV<list<string> >(const list<string>&, string&, char);
+template void stringsToCSV<vector<string> >(const vector<string>&, string&,
        char);

 void stringToTokens(const string& str, vector<string>& tokens,
@ -389,8 +396,9 @@ void stringToTokens(const string& str, vector<string>& tokens,
            break;
        } else if (pos == startPos) {
            // Dont' push empty tokens after first
-	    if (tokens.empty())
+            if (tokens.empty()) {
                tokens.push_back(string());
+            }
            startPos = ++pos;
        } else {
            tokens.push_back(str.substr(startPos, pos - startPos));
@ -399,20 +407,22 @@ void stringToTokens(const string& str, vector<string>& tokens,
    }
 }

-bool stringToBool(const string &s)
+bool stringToBool(const string& s)
 {
-    if (s.empty())
+    if (s.empty()) {
        return false;
+    }
    if (isdigit(s[0])) {
        int val = atoi(s.c_str());
        return val ? true : false;
    }
-    if (s.find_first_of("yYtT") == 0)
+    if (s.find_first_of("yYtT") == 0) {
        return true;
+    }
    return false;
 }

-void trimstring(string &s, const char *ws)
+void trimstring(string& s, const char *ws)
 {
    string::size_type pos = s.find_first_not_of(ws);
    if (pos == string::npos) {
@ -422,25 +432,27 @@ void trimstring(string &s, const char *ws)
    s.replace(0, pos, string());

    pos = s.find_last_not_of(ws);
-    if (pos != string::npos && pos != s.length()-1)
-	s.replace(pos+1, string::npos, string());
+    if (pos != string::npos && pos != s.length() - 1) {
+        s.replace(pos + 1, string::npos, string());
+    }
 }

 // Remove some chars and replace them with spaces
-string neutchars(const string &str, const string &chars)
+string neutchars(const string& str, const string& chars)
 {
    string out;
    neutchars(str, out, chars);
    return out;
 }
-void neutchars(const string &str, string &out, const string& chars)
+void neutchars(const string& str, string& out, const string& chars)
 {
    string::size_type startPos, pos;

    for (pos = 0;;) {
        // Skip initial chars, break if this eats all.
-        if ((startPos = str.find_first_not_of(chars, pos)) == string::npos)
+        if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) {
            break;
+        }
        // Find next delimiter or end of string (end of token)
        pos = str.find_first_of(chars, startPos);
        // Add token to the output. Note: token cant be empty here
@ -458,7 +470,7 @@ void neutchars(const string &str, string &out, const string& chars)
 * we have enough, this would be cleanly utf8-aware but would remove
 * punctuation */
 static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
-string truncate_to_word(const string &input, string::size_type maxlen)
+string truncate_to_word(const string& input, string::size_type maxlen)
 {
    string output;
    if (input.length() <= maxlen) {
@ -481,25 +493,12 @@ string truncate_to_word(const string &input, string::size_type maxlen)
    return output;
 }

-void utf8truncate(string &s, int maxlen)
-{
-    if (s.size() <= string::size_type(maxlen))
-	return;
-    Utf8Iter iter(s);
-    string::size_type pos = 0;
-    while (iter++ != string::npos) 
-	if (iter.getBpos() < string::size_type(maxlen))
-	    pos = iter.getBpos();
-
-    s.erase(pos);
-}
-
 // Escape things that would look like markup
-string escapeHtml(const string &in)
+string escapeHtml(const string& in)
 {
    string out;
    for (string::size_type pos = 0; pos < in.length(); pos++) {
-	switch(in.at(pos)) {
+        switch (in.at(pos)) {
        case '<':
            out += "&lt;";
            break;
@ -513,12 +512,12 @@ string escapeHtml(const string &in)
    return out;
 }

-string escapeShell(const string &in)
+string escapeShell(const string& in)
 {
    string out;
    out += "\"";
    for (string::size_type pos = 0; pos < in.length(); pos++) {
-	switch(in.at(pos)) {
+        switch (in.at(pos)) {
        case '$':
            out += "\\$";
            break;
@ -547,7 +546,7 @@ string escapeShell(const string &in)
 bool pcSubst(const string& in, string& out, const map<char, string>& subs)
 {
    string::const_iterator it;
-    for (it = in.begin(); it != in.end();it++) {
+    for (it = in.begin(); it != in.end(); it++) {
        if (*it == '%') {
            if (++it == in.end()) {
                out += '%';
@ -557,7 +556,7 @@ bool pcSubst(const string& in, string& out, const map<char, string>& subs)
                out += '%';
                continue;
            }
-	    map<char,string>::const_iterator tr;
+            map<char, string>::const_iterator tr;
            if ((tr = subs.find(*it)) != subs.end()) {
                out += tr->second;
            } else {
@ -594,15 +593,15 @@ bool pcSubst(const string& in, string& out, const map<string, string>& subs)
                string::size_type j = in.find_first_of(")", i);
                if (j == string::npos) {
                    // ??concatenate remaining part and stop
-                    out += in.substr(i-2);
+                    out += in.substr(i - 2);
                    break;
                }
-                key = in.substr(i, j-i);
+                key = in.substr(i, j - i);
                i = j;
            } else {
                key = in[i];
            }
-	    map<string,string>::const_iterator tr;
+            map<string, string>::const_iterator tr;
            if ((tr = subs.find(key)) != subs.end()) {
                out += tr->second;
            } else {
@ -622,14 +621,15 @@ inline static int ulltorbuf(unsigned long long val, char *rbuf)
    for (idx = 0; val; idx++) {
        rbuf[idx] = '0' + val % 10;
        val /= 10;
-    } while (val);
+    }
+    while (val);
    rbuf[idx] = 0;
    return idx;
 }

 inline static void ullcopyreverse(const char *rbuf, string& buf, int idx)
 {
-    buf.reserve(idx+1);
+    buf.reserve(idx + 1);
    for (int i = idx - 1; i >= 0; i--) {
        buf.push_back(rbuf[i]);
    }
@ -659,14 +659,16 @@ void lltodecstr(long long val, string& buf)
    }

    bool neg = val < 0;
-    if (neg)
+    if (neg) {
        val = -val;
+    }

    char rbuf[30];
    int idx = ulltorbuf(val, rbuf);

-    if (neg)
+    if (neg) {
        rbuf[idx++] = '-';
+    }
    rbuf[idx] = 0;

    ullcopyreverse(rbuf, buf, idx);
@ -722,12 +724,13 @@ string breakIntoLines(const string& in, unsigned int ll,
            string::size_type pos = ss.find_last_of(" ");
            if (pos == string::npos) {
                pos = query.find_first_of(" ");
-		if (pos != string::npos)
-		    ss = query.substr(0, pos+1);
-		else 
-		    ss = query;
+                if (pos != string::npos) {
+                    ss = query.substr(0, pos + 1);
                } else {
-		ss = ss.substr(0, pos+1);
+                    ss = query;
+                }
+            } else {
+                ss = ss.substr(0, pos + 1);
            }
        }
        // This cant happen, but anyway. Be very sure to avoid an infinite loop
@ -740,7 +743,7 @@ string breakIntoLines(const string& in, unsigned int ll,
            oq += " ... \n";
            break;
        }
-	query= query.substr(ss.length());
+        query = query.substr(ss.length());
    }
    return oq;
 }
@ -757,8 +760,9 @@ static bool parsedate(vector<string>::const_iterator& it,
    if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
        return false;
    }
-    if (it == end || *it == "/")
+    if (it == end || *it == "/") {
        return true;
+    }
    if (*it++ != "-") {
        return false;
    }
@ -770,8 +774,9 @@ static bool parsedate(vector<string>::const_iterator& it,
    if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
        return false;
    }
-    if (it == end || *it == "/")
+    if (it == end || *it == "/") {
        return true;
+    }
    if (*it++ != "-") {
        return false;
    }
@ -802,17 +807,29 @@ static bool parseperiod(vector<string>::const_iterator& it,
        if (sscanf(it++->c_str(), "%d", &value) != 1) {
            return false;
        }
-        if (it == end || it->empty())
+        if (it == end || it->empty()) {
            return false;
+        }
        switch (it->at(0)) {
-        case 'Y': case 'y': dip->y1 = value;break;
-        case 'M': case 'm': dip->m1 = value;break;
-        case 'D': case 'd': dip->d1 = value;break;
-        default: return false;
+        case 'Y':
+        case 'y':
+            dip->y1 = value;
+            break;
+        case 'M':
+        case 'm':
+            dip->m1 = value;
+            break;
+        case 'D':
+        case 'd':
+            dip->d1 = value;
+            break;
+        default:
+            return false;
        }
        it++;
-        if (it == end)
+        if (it == end) {
            return true;
+        }
        if (*it == "/") {
            return true;
        }
@ -823,11 +840,12 @@ static bool parseperiod(vector<string>::const_iterator& it,
 #ifdef _WIN32
 int setenv(const char *name, const char *value, int overwrite)
 {
-    if(!overwrite) {
+    if (!overwrite) {
        const char *cp = getenv(name);
-        if (cp)
+        if (cp) {
            return -1;
        }
+    }
    return _putenv_s(name, value);
 }
 void unsetenv(const char *name)
@ -845,10 +863,11 @@ time_t portable_timegm(struct tm *tm)
    setenv("TZ", "", 1);
    tzset();
    ret = mktime(tm);
-    if (tz)
+    if (tz) {
        setenv("TZ", tz, 1);
-    else
+    } else {
        unsetenv("TZ");
+    }
    tzset();
    return ret;
 }
@ -873,7 +892,7 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
    // timegm sort it out
    memset(&tm, 0, sizeof(tm));
    tm.tm_year = dp->y1 - 1900 + pp->y1;
-    tm.tm_mon = dp->m1 + pp->m1 -1;
+    tm.tm_mon = dp->m1 + pp->m1 - 1;
    tm.tm_mday = dp->d1 + pp->d1;
    time_t tres = mktime(&tm);
    localtime_r(&tres, &tm);
@ -886,10 +905,19 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
 int monthdays(int mon, int year)
 {
    switch (mon) {
-    // We are returning a few two many 29 days februaries, no problem
-    case 2: return (year % 4) == 0 ? 29 : 28;
-    case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31;
-    default: return 30;
+    // We are returning a few too many 29 days februaries, no problem
+    case 2:
+        return (year % 4) == 0 ? 29 : 28;
+    case 1:
+    case 3:
+    case 5:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+        return 31;
+    default:
+        return 30;
    }
 }
 bool parsedateinterval(const string& s, DateInterval *dip)
@ -904,8 +932,9 @@ bool parsedateinterval(const string& s, DateInterval *dip)
    if (!stringToStrings(s, vs, "PYMDpymd-/")) {
        return false;
    }
-    if (vs.empty())
+    if (vs.empty()) {
        return false;
+    }

    vector<string>::const_iterator it = vs.begin();
    if (*it == "P" || *it == "p") {
@ -1042,10 +1071,12 @@ secondelt:

 void catstrerror(string *reason, const char *what, int _errno)
 {
-    if (!reason)
+    if (!reason) {
        return;
-    if (what)
+    }
+    if (what) {
        reason->append(what);
+    }

    reason->append(": errno: ");

@ -1080,59 +1111,6 @@ void catstrerror(string *reason, const char *what, int _errno)
 #endif
 }

-void HighlightData::toString(std::string& out)
-{
-    out.append("\nUser terms (orthograph): ");
-    for (std::set<std::string>::const_iterator it = uterms.begin();
-	 it != uterms.end(); it++) {
-	out.append(" [").append(*it).append("]");
-    }
-    out.append("\nUser terms to Query terms:");
-    for (map<string, string>::const_iterator it = terms.begin();
-	 it != terms.end(); it++) {
-	out.append("[").append(it->first).append("]->[");
-	out.append(it->second).append("] ");
-    }
-    out.append("\nGroups: ");
-    char cbuf[200];
-    sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
-	    int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
-    out.append(cbuf);
-
-    size_t ugidx = (size_t)-1;
-    for (unsigned int i = 0; i < groups.size(); i++) {
-	if (ugidx != grpsugidx[i]) {
-	    ugidx = grpsugidx[i];
-	    out.append("\n(");
-	    for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
-		out.append("[").append(ugroups[ugidx][j]).append("] ");
-	    }
-	    out.append(") ->");
-	}
-	out.append(" {");
-	for (unsigned int j = 0; j < groups[i].size(); j++) {
-	    out.append("[").append(groups[i][j]).append("]");
-	}
-	sprintf(cbuf, "%d", slacks[i]);
-	out.append("}").append(cbuf);
-    }
-    out.append("\n");
-}
-
-void HighlightData::append(const HighlightData& hl)
-{
-    uterms.insert(hl.uterms.begin(), hl.uterms.end());
-    terms.insert(hl.terms.begin(), hl.terms.end());
-    size_t ugsz0 = ugroups.size();
-    ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
-
-    groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
-    slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
-    for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin(); 
-	 it != hl.grpsugidx.end(); it++) {
-	grpsugidx.push_back(*it + ugsz0);
-    }
-}

 static const char *vlang_to_code[] = {
    "be", "cp1251",
@ -1159,21 +1137,24 @@ static const char *vlang_to_code[] = {
    "uk", "koi8-u",
 };

+static const string cstr_cp1252("CP1252");
+
 string langtocode(const string& lang)
 {
    static STD_UNORDERED_MAP<string, string> lang_to_code;
    if (lang_to_code.empty()) {
        for (unsigned int i = 0;
                i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
-	    lang_to_code[vlang_to_code[i]] = vlang_to_code[i+1];
+            lang_to_code[vlang_to_code[i]] = vlang_to_code[i + 1];
        }
    }
-    STD_UNORDERED_MAP<string,string>::const_iterator it = 
+    STD_UNORDERED_MAP<string, string>::const_iterator it =
        lang_to_code.find(lang);

    // Use cp1252 by default...
-    if (it == lang_to_code.end())
+    if (it == lang_to_code.end()) {
        return cstr_cp1252;
+    }

    return it->second;
 }
@ -1182,12 +1163,15 @@ string localelang()
 {
    const char *lang = getenv("LANG");

-    if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || !strcmp(lang, "POSIX"))
+    if (lang == 0 || *lang == 0 || !strcmp(lang, "C") ||
+            !strcmp(lang, "POSIX")) {
        return "en";
+    }
    string locale(lang);
    string::size_type under = locale.find_first_of("_");
-    if (under == string::npos)
+    if (under == string::npos) {
        return locale;
+    }
    return locale.substr(0, under);
 }

@ -1262,41 +1246,47 @@ static void cerrdip(const string& s, DateInterval *dip)

 int main(int argc, char **argv)
 {
-    thisprog = *argv++;argc--;
+    thisprog = *argv++;
+    argc--;

 #if 1
-    if (argc <=0 ) {
+    if (argc <= 0) {
        cerr << "Usage: smallut <stringtosplit>" << endl;
        exit(1);
    }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
    vector<string> vs;
    stringToTokens(s, vs, "/");
-    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
+    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
        cerr << "[" << *it << "] ";
+    }
    cerr << endl;
    exit(0);
 #elif 0
-    if (argc <=0 ) {
+    if (argc <= 0) {
        cerr << "Usage: smallut <stringtosplit>" << endl;
        exit(1);
    }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
    vector<string> vs;
    if (!stringToStrings(s, vs, ":-()")) {
        cerr << "Bad entry" << endl;
        exit(1);
    }
-    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
+    for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
        cerr << "[" << *it << "] ";
+    }
    cerr << endl;
    exit(0);
 #elif 0
-    if (argc <=0 ) {
+    if (argc <= 0) {
        cerr << "Usage: smallut <dateinterval>" << endl;
        exit(1);
    }
-    string s = *argv++;argc--;
+    string s = *argv++;
+    argc--;
    DateInterval di;
    if (!parsedateinterval(s, &di)) {
        cerr << "Parse failed" << endl;
@ -1386,26 +1376,26 @@ int main(int argc, char **argv)
    string slong("ABCD");
    string sshortsmaller("ABB");

-    vector<pair<string,string> > cmps;
-    cmps.push_back(pair<string,string>(sshort,sshort));
-    cmps.push_back(pair<string,string>(sshort,slong));
-    cmps.push_back(pair<string,string>(slong,sshort));
-    cmps.push_back(pair<string,string>(sshortsmaller,sshort));
-    cmps.push_back(pair<string,string>(sshort, sshortsmaller));
+    vector<pair<string, string> > cmps;
+    cmps.push_back(pair<string, string>(sshort, sshort));
+    cmps.push_back(pair<string, string>(sshort, slong));
+    cmps.push_back(pair<string, string>(slong, sshort));
+    cmps.push_back(pair<string, string>(sshortsmaller, sshort));
+    cmps.push_back(pair<string, string>(sshort, sshortsmaller));

-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
            it != cmps.end(); it++) {
        cout << it->first << " " << it->second << " " <<
             stringicmp(it->first, it->second) << endl;
    }
    cout << endl;
-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
            it != cmps.end(); it++) {
        cout << it->first << " " << it->second << " " <<
             stringlowercmp(stringtolower(it->first), it->second) << endl;
    }
    cout << endl;
-    for (vector<pair<string,string> >::const_iterator it = cmps.begin();
+    for (vector<pair<string, string> >::const_iterator it = cmps.begin();
            it != cmps.end(); it++) {
        cout << it->first << " " << it->second << " " <<
             stringuppercmp(it->first, it->second) << endl;
--- a/src/utils/smallut.h
+++ b/src/utils/smallut.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2004 J.F.Dockes
+/* Copyright (C) 2004-2016 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -17,47 +17,63 @@
 #ifndef _SMALLUT_H_INCLUDED_
 #define _SMALLUT_H_INCLUDED_

-#include <stdlib.h>
+#include <sys/types.h>

 #include <string>
 #include <vector>
 #include <map>
 #include <set>

-using std::string;
-using std::vector;
-using std::map;
-using std::set;
+// Miscellaneous mostly string-oriented small utilities
+// Note that none of the following code knows about utf-8.
+
+// Call this before going multithread.
+void smallut_init_mt();
+
+#ifndef SMALLUT_DISABLE_MACROS
+#ifndef MIN
+#define MIN(A,B) (((A)<(B)) ? (A) : (B))
+#endif
+#ifndef MAX
+#define MAX(A,B) (((A)>(B)) ? (A) : (B))
+#endif
+#ifndef deleteZ
+#define deleteZ(X) {delete X;X = 0;}
+#endif
+#endif /* SMALLUT_DISABLE_MACROS */
+
+// Case-insensitive compare. ASCII ONLY !
+extern int stringicmp(const std::string& s1, const std::string& s2);

-// Note these are all ascii routines
-extern int stringicmp(const string& s1, const string& s2);
 // For find_if etc.
 struct StringIcmpPred {
-    StringIcmpPred(const string& s1) 
-        : m_s1(s1) 
-    {}
-    bool operator()(const string& s2) {
+    StringIcmpPred(const std::string& s1)
+        : m_s1(s1) {
+    }
+    bool operator()(const std::string& s2) {
        return stringicmp(m_s1, s2) == 0;
    }
-    const string& m_s1;
+    const std::string& m_s1;
 };

-extern int stringlowercmp(const string& alreadylower, const string& s2);
-extern int stringuppercmp(const string& alreadyupper, const string& s2); 
+extern int stringlowercmp(const std::string& alreadylower,
+                          const std::string& s2);
+extern int stringuppercmp(const std::string& alreadyupper,
+                          const std::string& s2);

-extern void stringtolower(string& io);
-extern string stringtolower(const string& io);
+extern void stringtolower(std::string& io);
+extern std::string stringtolower(const std::string& io);

 // Is one string the end part of the other ?
-extern int stringisuffcmp(const string& s1, const string& s2);
+extern int stringisuffcmp(const std::string& s1, const std::string& s2);

 // Divine language from locale
 extern std::string localelang();
 // Divine 8bit charset from language
-extern std::string langtocode(const string& lang);
+extern std::string langtocode(const std::string& lang);

 // Compare charset names, removing the more common spelling variations
-extern bool samecharset(const string &cs1, const string &cs2);
+extern bool samecharset(const std::string& cs1, const std::string& cs2);

 // Parse date interval specifier into pair of y,m,d dates.  The format
 // for the time interval is based on a subset of iso 8601 with
@ -71,9 +87,14 @@ extern bool samecharset(const string &cs1, const string &cs2);
 // YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
 // This returns a pair of y,m,d dates.
 struct DateInterval {
-    int y1;int m1;int d1; int y2;int m2;int d2;
+    int y1;
+    int m1;
+    int d1;
+    int y2;
+    int m2;
+    int d2;
 };
-extern bool parsedateinterval(const string&s, DateInterval *di);
+extern bool parsedateinterval(const std::string& s, DateInterval *di);
 extern int monthdays(int mon, int year);

 /**
@ -81,172 +102,132 @@ extern int monthdays(int mon, int year);
 *
 * Token delimiter is " \t\n" except inside dquotes. dquote inside
 * dquotes can be escaped with \ etc...
- * Input is handled a byte at a time, things will work as long as space tab etc.
- * have the ascii values and can't appear as part of a multibyte char. utf-8 ok
- * but so are the iso-8859-x and surely others. addseps do have to be 
- * single-bytes
+ * Input is handled a byte at a time, things will work as long as
+ * space tab etc. have the ascii values and can't appear as part of a
+ * multibyte char. utf-8 ok but so are the iso-8859-x and surely
+ * others. addseps do have to be single-bytes
 */
-template <class T> bool stringToStrings(const string& s, T &tokens, 
-					const string& addseps = "");
+template <class T> bool stringToStrings(const std::string& s, T& tokens,
+                                        const std::string& addseps = "");

 /**
 * Inverse operation:
 */
-template <class T> void stringsToString(const T &tokens, string &s);
-template <class T> std::string stringsToString(const T &tokens);
+template <class T> void stringsToString(const T& tokens, std::string& s);
+template <class T> std::string stringsToString(const T& tokens);

 /**
 * Strings to CSV string. tokens containing the separator are quoted (")
 * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
 */
-template <class T> void stringsToCSV(const T &tokens, string &s, 
+template <class T> void stringsToCSV(const T& tokens, std::string& s,
                                     char sep = ',');

 /**
 * Split input string. No handling of quoting
 */
-extern void stringToTokens(const string &s, vector<string> &tokens, 
-			   const string &delims = " \t", bool skipinit=true);
+extern void stringToTokens(const std::string& s,
+                           std::vector<std::string>& tokens,
+                           const std::string& delims = " \t",
+                           bool skipinit = true);

 /** Convert string to boolean */
-extern bool stringToBool(const string &s);
+extern bool stringToBool(const std::string& s);

 /** Remove instances of characters belonging to set (default {space,
    tab}) at beginning and end of input string */
-extern void trimstring(string &s, const char *ws = " \t");
+extern void trimstring(std::string& s, const char *ws = " \t");

 /** Escape things like < or & by turning them into entities */
-extern string escapeHtml(const string &in);
+extern std::string escapeHtml(const std::string& in);

-/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
- *  so chars should only contain ascii */
-extern string neutchars(const string &str, const string &chars);
-extern void neutchars(const string &str, string& out, const string &chars);
+/** Replace some chars with spaces (ie: newline chars). */
+extern std::string neutchars(const std::string& str, const std::string& chars);
+extern void neutchars(const std::string& str, std::string& out,
+                      const std::string& chars);

 /** Turn string into something that won't be expanded by a shell. In practise
 *  quote with double-quotes and escape $`\ */
-extern string escapeShell(const string &str);
+extern std::string escapeShell(const std::string& str);

 /** Truncate a string to a given maxlength, avoiding cutting off midword
 *  if reasonably possible. */
-extern string truncate_to_word(const string &input, string::size_type maxlen);
+extern std::string truncate_to_word(const std::string& input,
+                                    std::string::size_type maxlen);

-/** Truncate in place in an utf8-legal way */
-extern void utf8truncate(string &s, int maxlen);
-
-void ulltodecstr(unsigned long long val, string& buf);
-void lltodecstr(long long val, string& buf);
-string lltodecstr(long long val);
-string ulltodecstr(unsigned long long val);
+void ulltodecstr(unsigned long long val, std::string& buf);
+void lltodecstr(long long val, std::string& buf);
+std::string lltodecstr(long long val);
+std::string ulltodecstr(unsigned long long val);

 /** Convert byte count into unit (KB/MB...) appropriate for display */
-string displayableBytes(off_t size);
+std::string displayableBytes(off_t size);

 /** Break big string into lines */
-string breakIntoLines(const string& in, unsigned int ll = 100, 
-		      unsigned int maxlines= 50);
+std::string breakIntoLines(const std::string& in, unsigned int ll = 100,
+                           unsigned int maxlines = 50);
+
 /** Small utility to substitute printf-like percents cmds in a string */
-bool pcSubst(const string& in, string& out, const map<char, string>& subs);
+bool pcSubst(const std::string& in, std::string& out,
+             const std::map<char, std::string>& subs);
 /** Substitute printf-like percents and also %(key) */
-bool pcSubst(const string& in, string& out, const map<string, string>& subs);
+bool pcSubst(const std::string& in, std::string& out,
+             const std::map<std::string, std::string>& subs);

 /** Append system error message */
-void catstrerror(string *reason, const char *what, int _errno);
+void catstrerror(std::string *reason, const char *what, int _errno);

 /** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
 * makes it inaccessible */
 struct tm;
 time_t portable_timegm(struct tm *tm);

-/** Temp buffer with automatic deallocation */
-struct TempBuf {
-    TempBuf() 
-        : m_buf(0)
-    {}
-    TempBuf(int n)
-    {
-        m_buf = (char *)malloc(n);
-    }
-    ~TempBuf()
-    { 
-        if (m_buf)
-            free(m_buf);
-    }
-    char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); }
-    char *buf() {return m_buf;}
-    char *m_buf;
-};
-
-inline void leftzeropad(string& s, unsigned len)
+inline void leftzeropad(std::string& s, unsigned len)
 {
-    if (s.length() && s.length() < len)
+    if (s.length() && s.length() < len) {
        s = s.insert(0, len - s.length(), '0');
+    }
 }

-// Duplicate map<string,string> while ensuring no shared string data (to pass
-// to other thread):
-void map_ss_cp_noshr(const std::map<std::string,std::string> s,
-                      std::map<std::string,std::string> *d);
-
 // Code for static initialization of an stl map. Somewhat like Boost.assign.
 // Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
 // Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);

 template <typename T, typename U>
-class create_map
-{
+class create_map {
 private:
    std::map<T, U> m_map;
 public:
-    create_map(const T& key, const U& val)
-    {
+    create_map(const T& key, const U& val) {
        m_map[key] = val;
    }

-    create_map<T, U>& operator()(const T& key, const U& val)
-    {
+    create_map<T, U>& operator()(const T& key, const U& val) {
        m_map[key] = val;
        return *this;
    }

-    operator std::map<T, U>()
-    {
+    operator std::map<T, U>() {
        return m_map;
    }
 };
 template <typename T>
-class create_vector
-{
+class create_vector {
 private:
    std::vector<T> m_vector;
 public:
-    create_vector(const T& val)
-    {
+    create_vector(const T& val) {
        m_vector.push_back(val);
    }

-    create_vector<T>& operator()(const T& val)
-    {
+    create_vector<T>& operator()(const T& val) {
        m_vector.push_back(val);
        return *this;
    }

-    operator std::vector<T>()
-    {
+    operator std::vector<T>() {
        return m_vector;
    }
 };

-#ifndef MIN
-#define MIN(A,B) (((A)<(B)) ? (A) : (B))
-#endif
-#ifndef MAX
-#define MAX(A,B) (((A)>(B)) ? (A) : (B))
-#endif
-#ifndef deleteZ
-#define deleteZ(X) {delete X;X = 0;}
-#endif
-
-void smallut_init_mt();
-
 #endif /* _SMALLUT_H_INCLUDED_ */