moved code around to make smallut and pathut less recoll-specific and reusable. No actual changes

This commit is contained in:
Jean-Francois Dockes 2016-03-21 12:55:31 +01:00
parent 7b2a455b80
commit 35de51985b
26 changed files with 1821 additions and 1493 deletions

View file

@ -207,6 +207,7 @@ utils/fileudi.h \
utils/fstreewalk.cpp \
utils/fstreewalk.h \
utils/hldata.h \
utils/hldata.cpp \
utils/idfile.cpp \
utils/idfile.h \
utils/md5.cpp \
@ -224,6 +225,8 @@ utils/pxattr.cpp \
utils/pxattr.h \
utils/rclionice.cpp \
utils/rclionice.h \
utils/rclutil.h \
utils/rclutil.cpp \
utils/readfile.cpp \
utils/readfile.h \
utils/refcntr.h \

View file

@ -39,6 +39,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h"
#include "conftree.h"
#include "debuglog.h"
@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf)
}
// Compute our data dir name, typically /usr/local/share/recoll
m_datadir = path_sharedatadir();
m_datadir = path_pkgdatadir();
// We only do the automatic configuration creation thing for the default
// config dir, not if it was specified through -c or RECOLL_CONFDIR
bool autoconfdir = false;

View file

@ -32,6 +32,7 @@
#include "rclconfig.h"
#include "rclinit.h"
#include "pathut.h"
#include "rclutil.h"
#include "unac.h"
#include "smallut.h"
#include "execmd.h"
@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags,
// Init smallut and pathut static values
pathut_init_mt();
smallut_init_mt();
rclutil_init_mt();
// Init execmd.h static PATH and PATHELT splitting
{string bogus;
ExecCmd::which("nosuchcmd", bogus);
@ -389,4 +392,3 @@ bool recoll_ismainthread()
return pthread_equal(pthread_self(), mainthread_id);
}

View file

@ -18,9 +18,6 @@
#define _RCLINIT_H_INCLUDED_
#include <string>
#ifndef NO_NAMESPACES
using std::string;
#endif
class RclConfig;
/**
@ -42,12 +39,14 @@ class RclConfig;
* default and environment
* @return the parsed configuration.
*/
enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2};
enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2};
extern RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0);
std::string& reason, const string *argcnf = 0);
inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0) {
std::string& reason,
const std::string *argcnf = 0)
{
return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
}

View file

@ -23,6 +23,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "debuglog.h"
#include "fstreewalk.h"
#include "beaglequeue.h"

View file

@ -28,6 +28,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "conftree.h"
#include "rclconfig.h"
#include "fstreewalk.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "smallut.h"
#include "chrono.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclmon.h"
#include "x11mon.h"
#include "cancelcheck.h"

View file

@ -21,6 +21,7 @@
#include <string>
#include "pathut.h"
#include "rclutil.h"
#include "ptmutex.h"
/// Uncompression script interface.

View file

@ -32,6 +32,7 @@ using namespace std;
#include "searchdata.h"
#include "rclquery.h"
#include "pathut.h"
#include "rclutil.h"
#include "wasatorcl.h"
#include "debuglog.h"
#include "pathut.h"

View file

@ -23,6 +23,7 @@
#include "rcldoc.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h"
/*

View file

@ -21,6 +21,7 @@
#include "rclconfig.h"
#include "rcldb.h"
#include "rclutil.h"
#include "ptmutex.h"
#include <QString>

View file

@ -34,6 +34,7 @@ using std::list;
#include "debuglog.h"
#include "rclconfig.h"
#include "smallut.h"
#include "rclutil.h"
#include "plaintorich.h"
#include "mimehandler.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "unacpp.h"
#include "conftree.h"
#include "pathut.h"
#include "rclutil.h"
#include "smallut.h"
#include "chrono.h"
#include "utf8iter.h"
@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi)
return pterm;
}
static void utf8truncate(string& s, int maxlen)
{
if (s.size() <= string::size_type(maxlen)) {
return;
}
Utf8Iter iter(s);
string::size_type pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen)) {
pos = iter.getBpos();
}
s.erase(pos);
}
Db::Native::Native(Db *db)
: m_rcldb(db), m_isopen(false), m_iswritable(false),
m_noversionwrite(false)

View file

@ -14,9 +14,11 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include "rcldoc.h"
#include "debuglog.h"
#include "rclutil.h"
namespace Rcl {
const string Doc::keyabs("abstract");
@ -70,5 +72,31 @@ namespace Rcl {
if (dotext)
LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str()));
}
// Copy ensuring no shared string data, for threading issues.
void Doc::copyto(Doc *d) const
{
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
}

View file

@ -163,33 +163,11 @@ class Doc {
onlyxattr = false;
}
// Copy ensuring no shared string data, for threading issues.
void copyto(Doc *d) const {
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
void copyto(Doc *d) const;
Doc()
: idxi(0), syntabs(false), pc(0), xdocid(0),
haspages(false), haschildren(false), onlyxattr(false)
{
haspages(false), haschildren(false), onlyxattr(false) {
}
/** Get value for named field. If value pointer is 0, just test existence */
bool getmeta(const string& nm, string *value = 0) const

View file

@ -32,10 +32,12 @@
#include <iostream>
#include UNORDERED_MAP_INCLUDE
using std::string;
#include "smallut.h"
using std::string;
using std::vector;
/*
Storage for the exception translations. These are chars which
should not be translated according to what UnicodeData says, but

View file

@ -71,6 +71,28 @@ typedef unsigned char UCHAR;
typedef unsigned int UINT;
typedef unsigned long ULONG;
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0) {
}
TempBuf(int n) {
m_buf = (char *)malloc(n);
}
~TempBuf() {
if (m_buf) {
free(m_buf);
}
}
char *setsize(int n) {
return (m_buf = (char *)realloc(m_buf, n));
}
char *buf() {
return m_buf;
}
char *m_buf;
};
static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);
/*

View file

@ -18,10 +18,16 @@
#ifndef TEST_CPUCONF
#include "autoconfig.h"
#include <stdlib.h>
#include "cpuconf.h"
#include "execmd.h"
#include "smallut.h"
using std::string;
using std::vector;
#if defined(__gnu_linux__)
bool getCpuConf(CpuConf& conf)
{

78
src/utils/hldata.cpp Normal file
View file

@ -0,0 +1,78 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include "hldata.h"
using std::string;
using std::map;
void HighlightData::toString(string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
out.append("\nUser terms to Query terms:");
for (map<string, string>::const_iterator it = terms.begin();
it != terms.end(); it++) {
out.append("[").append(it->first).append("]->[");
out.append(it->second).append("] ");
}
out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
size_t ugidx = (size_t) - 1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
terms.insert(hl.terms.begin(), hl.terms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}

View file

@ -4,6 +4,7 @@
#include <vector>
#include <string>
#include <set>
#include <map>
/** Store data about user search terms and their expansions. This is used
* mostly for highlighting result text and walking the matches, generating

View file

@ -16,25 +16,31 @@
*/
#ifndef TEST_PATHUT
#ifdef BUILDING_RECOLL
#include "autoconfig.h"
#else
#include "config.h"
#endif
#include <stdio.h>
#ifdef _WIN32
#include "dirent.h"
#include "safefcntl.h"
#include "safeunistd.h"
#include "dirent.h"
#include "cstr.h"
#ifdef _WIN32
#include "safewindows.h"
#include "safesysstat.h"
#else
#include <fcntl.h>
#include <unistd.h>
#include <sys/param.h>
#include <pwd.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <dirent.h>
#endif
#include <math.h>
#include <errno.h>
#include <sys/types.h>
#include "safesysstat.h"
#include "ptmutex.h"
// Let's include all files where statfs can be defined and hope for no
// conflict...
@ -60,9 +66,6 @@
#include <vector>
#include "pathut.h"
#include "transcode.h"
#include "wipedir.h"
#include "md5ut.h"
using namespace std;
@ -71,9 +74,10 @@ using namespace std;
void path_slashize(string& s)
{
for (string::size_type i = 0; i < s.size(); i++) {
if (s[i] == '\\')
if (s[i] == '\\') {
s[i] = '/';
}
}
}
static bool path_strlookslikedrive(const string& s)
{
@ -82,14 +86,16 @@ static bool path_strlookslikedrive(const string& s)
static bool path_hasdrive(const string& s)
{
if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':')
if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':') {
return true;
}
return false;
}
static bool path_isdriveabs(const string& s)
{
if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/')
if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/') {
return true;
}
return false;
}
@ -130,17 +136,18 @@ string path_thisexecpath()
PathRemoveFileSpec(text);
#endif
string path = path_tchartoutf8(text);
if (path.empty())
if (path.empty()) {
path = "c:/";
}
return path;
}
string path_wingettempfilename(TCHAR *pref)
{
TCHAR buf[(MAX_PATH +1)*sizeof(TCHAR)];
TCHAR dbuf[(MAX_PATH +1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH+1, dbuf);
TCHAR buf[(MAX_PATH + 1)*sizeof(TCHAR)];
TCHAR dbuf[(MAX_PATH + 1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH + 1, dbuf);
GetTempFileName(dbuf, pref, 0, buf);
// Windows will have created a temp file, we delete it.
string filename = path_tchartoutf8(buf);
@ -150,10 +157,11 @@ string path_wingettempfilename(TCHAR *pref)
}
#endif
bool fsocc(const string &path, int *pc, long long *avmbs)
#if defined(HAVE_SYS_MOUNT_H) || defined(HAVE_SYS_STATFS_H) || \
defined(HAVE_SYS_STATVFS_H) || defined(HAVE_SYS_VFS_H)
bool fsocc(const string& path, int *pc, long long *avmbs)
{
static const int FSOCC_MB = 1024*1024;
static const int FSOCC_MB = 1024 * 1024;
#ifdef _WIN32
ULARGE_INTEGER freebytesavail;
ULARGE_INTEGER totalbytes;
@ -161,10 +169,12 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
&totalbytes, NULL)) {
return false;
}
if (pc)
if (pc) {
*pc = int((100 * freebytesavail.QuadPart) / totalbytes.QuadPart);
if (avmbs)
}
if (avmbs) {
*avmbs = int(totalbytes.QuadPart / FSOCC_MB);
}
return true;
#else
#ifdef sun
@ -186,8 +196,9 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
if (FSOCC_TOTAVAIL > 0) {
fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL;
}
if (pc)
if (pc) {
*pc = int(fpc);
}
if (avmbs) {
*avmbs = 0;
if (buf.f_bsize > 0) {
@ -202,54 +213,7 @@ bool fsocc(const string &path, int *pc, long long *avmbs)
return true;
#endif
}
const string& tmplocation()
{
static string stmpdir;
if (stmpdir.empty()) {
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (tmpdir == 0)
tmpdir = getenv("TMPDIR");
if (tmpdir == 0)
tmpdir = getenv("TMP");
if (tmpdir == 0)
tmpdir = getenv("TEMP");
if (tmpdir == 0) {
#ifdef _WIN32
TCHAR bufw[(MAX_PATH+1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH+1, bufw);
stmpdir = path_tchartoutf8(bufw);
#else
stmpdir = "/tmp";
#endif
} else {
stmpdir = tmpdir;
}
stmpdir = path_canon(stmpdir);
}
return stmpdir;
}
// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
const string& path_sharedatadir()
{
static string datadir;
if (datadir.empty()) {
#ifdef _WIN32
datadir = path_cat(path_thisexecpath(), "Share");
#else
const char *cdatadir = getenv("RECOLL_DATADIR");
if (cdatadir == 0) {
// If not in environment, use the compiled-in constant.
datadir = RECOLL_DATADIR;
} else {
datadir = cdatadir;
}
#endif
}
return datadir;
}
#endif // we have found an appropriate include file
string path_PATHsep()
{
@ -262,150 +226,17 @@ string path_PATHsep()
#endif
}
bool maketmpdir(string& tdir, string& reason)
{
#ifndef _WIN32
tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
char *cp = strdup(tdir.c_str());
if (!cp) {
reason = "maketmpdir: out of memory (for file name !)\n";
tdir.erase();
return false;
}
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#endif
if (!
#ifdef HAVE_MKDTEMP
mkdtemp(cp)
#else
mktemp(cp)
#endif // HAVE_MKDTEMP
) {
free(cp);
reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
strerror(errno);
tdir.erase();
return false;
}
tdir = cp;
free(cp);
#else // _WIN32
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
tdir = path_wingettempfilename(TEXT("rcltmp"));
#endif
// At this point the directory does not exist yet except if we used
// mkdtemp
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
if (mkdir(tdir.c_str(), 0700) < 0) {
reason = string("maketmpdir: mkdir ") + tdir + " failed";
tdir.erase();
return false;
}
#endif
return true;
}
TempFileInternal::TempFileInternal(const string& suffix)
: m_noremove(false)
{
// Because we need a specific suffix, can't use mkstemp
// well. There is a race condition between name computation and
// file creation. try to make sure that we at least don't shoot
// our own selves in the foot. maybe we'll use mkstemps one day.
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#ifndef _WIN32
string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
char *cp = strdup(filename.c_str());
if (!cp) {
m_reason = "Out of memory (for file name !)\n";
return;
}
// Using mkstemp this way is awful (bot the suffix adding and
// using mkstemp() instead of mktemp just to avoid the warnings)
int fd;
if ((fd = mkstemp(cp)) < 0) {
free(cp);
m_reason = "TempFileInternal: mkstemp failed\n";
return;
}
close(fd);
unlink(cp);
filename = cp;
free(cp);
#else
string filename = path_wingettempfilename(TEXT("recoll"));
#endif
m_filename = filename + suffix;
if (close(open(m_filename.c_str(), O_CREAT|O_EXCL, 0600)) != 0) {
m_reason = string("Could not open/create") + m_filename;
m_filename.erase();
}
}
TempFileInternal::~TempFileInternal()
{
if (!m_filename.empty() && !m_noremove)
unlink(m_filename.c_str());
}
TempDir::TempDir()
{
if (!maketmpdir(m_dirname, m_reason)) {
m_dirname.erase();
return;
}
}
TempDir::~TempDir()
{
if (!m_dirname.empty()) {
(void)wipedir(m_dirname, true, true);
m_dirname.erase();
}
}
bool TempDir::wipe()
{
if (m_dirname.empty()) {
m_reason = "TempDir::wipe: no directory !\n";
return false;
}
if (wipedir(m_dirname, false, true)) {
m_reason = "TempDir::wipe: wipedir failed\n";
return false;
}
return true;
}
void path_catslash(string &s)
void path_catslash(string& s)
{
#ifdef _WIN32
path_slashize(s);
#endif
if (s.empty() || s[s.length() - 1] != '/')
if (s.empty() || s[s.length() - 1] != '/') {
s += '/';
}
}
string path_cat(const string &s1, const string &s2)
string path_cat(const string& s1, const string& s2)
{
string res = s1;
path_catslash(res);
@ -413,7 +244,7 @@ string path_cat(const string &s1, const string &s2)
return res;
}
string path_getfather(const string &s)
string path_getfather(const string& s)
{
string father = s;
#ifdef _WIN32
@ -421,62 +252,69 @@ string path_getfather(const string &s)
#endif
// ??
if (father.empty())
if (father.empty()) {
return "./";
}
if (path_isroot(father))
if (path_isroot(father)) {
return father;
}
if (father[father.length() - 1] == '/') {
// Input ends with /. Strip it, root special case was tested above
father.erase(father.length()-1);
father.erase(father.length() - 1);
}
string::size_type slp = father.rfind('/');
if (slp == string::npos)
if (slp == string::npos) {
return "./";
}
father.erase(slp);
path_catslash(father);
return father;
}
string path_getsimple(const string &s)
string path_getsimple(const string& s)
{
string simple = s;
#ifdef _WIN32
path_slashize(simple);
#endif
if (simple.empty())
if (simple.empty()) {
return simple;
}
string::size_type slp = simple.rfind('/');
if (slp == string::npos)
if (slp == string::npos) {
return simple;
}
simple.erase(0, slp+1);
simple.erase(0, slp + 1);
return simple;
}
string path_basename(const string &s, const string &suff)
string path_basename(const string& s, const string& suff)
{
string simple = path_getsimple(s);
string::size_type pos = string::npos;
if (suff.length() && simple.length() > suff.length()) {
pos = simple.rfind(suff);
if (pos != string::npos && pos + suff.length() == simple.length())
if (pos != string::npos && pos + suff.length() == simple.length()) {
return simple.substr(0, pos);
}
}
return simple;
}
string path_suffix(const string& s)
{
string::size_type dotp = s.rfind('.');
if (dotp == string::npos)
if (dotp == string::npos) {
return string();
return s.substr(dotp+1);
}
return s.substr(dotp + 1);
}
string path_home()
@ -508,11 +346,12 @@ string path_home()
struct passwd *entry = getpwuid(uid);
if (entry == 0) {
const char *cp = getenv("HOME");
if (cp)
if (cp) {
return cp;
else
} else {
return "/";
}
}
string homedir = entry->pw_dir;
path_catslash(homedir);
@ -539,10 +378,11 @@ string path_homedata()
#endif
}
string path_tildexpand(const string &s)
string path_tildexpand(const string& s)
{
if (s.empty() || s[0] != '~')
if (s.empty() || s[0] != '~') {
return s;
}
string o = s;
#ifdef _WIN32
path_slashize(o);
@ -557,11 +397,12 @@ string path_tildexpand(const string &s)
string::size_type l = (pos == string::npos) ? s.length() - 1 : pos - 1;
#ifdef _WIN32
// Dont know what this means. Just replace with HOME
o.replace(0, l+1, path_home());
o.replace(0, l + 1, path_home());
#else
struct passwd *entry = getpwnam(s.substr(1, l).c_str());
if (entry)
o.replace(0, l+1, entry->pw_dir);
if (entry) {
o.replace(0, l + 1, entry->pw_dir);
}
#endif
}
return o;
@ -569,17 +410,19 @@ string path_tildexpand(const string &s)
bool path_isroot(const string& path)
{
if (path.size() == 1 && path[0] == '/')
if (path.size() == 1 && path[0] == '/') {
return true;
}
#ifdef _WIN32
if (path.size() == 3 && isalpha(path[0]) && path[1] == ':' &&
(path[2] == '/' || path[2] == '\\'))
(path[2] == '/' || path[2] == '\\')) {
return true;
}
#endif
return false;
}
bool path_isabsolute(const string &path)
bool path_isabsolute(const string& path)
{
if (!path.empty() && (path[0] == '/'
#ifdef _WIN32
@ -591,10 +434,11 @@ bool path_isabsolute(const string &path)
return false;
}
string path_absolute(const string &is)
string path_absolute(const string& is)
{
if (is.length() == 0)
if (is.length() == 0) {
return is;
}
string s = is;
if (!path_isabsolute(s)) {
char buf[MAXPATHLEN];
@ -610,10 +454,11 @@ string path_absolute(const string &is)
}
#include <smallut.h>
string path_canon(const string &is, const string* cwd)
string path_canon(const string& is, const string* cwd)
{
if (is.length() == 0)
if (is.length() == 0) {
return is;
}
string s = is;
#ifdef _WIN32
path_slashize(s);
@ -639,10 +484,11 @@ string path_canon(const string &is, const string* cwd)
stringToTokens(s, elems, "/");
vector<string> cleaned;
for (vector<string>::const_iterator it = elems.begin();
it != elems.end(); it++){
it != elems.end(); it++) {
if (*it == "..") {
if (!cleaned.empty())
if (!cleaned.empty()) {
cleaned.pop_back();
}
} else if (it->empty() || *it == ".") {
} else {
cleaned.push_back(*it);
@ -674,10 +520,11 @@ bool makepath(const string& ipath)
stringToTokens(path, elems, "/");
path = "/";
for (vector<string>::const_iterator it = elems.begin();
it != elems.end(); it++){
it != elems.end(); it++) {
#ifdef _WIN32
if (it == elems.begin() && path_strlookslikedrive(*it))
if (it == elems.begin() && path_strlookslikedrive(*it)) {
path = "";
}
#endif
path += *it;
// Not using path_isdir() here, because this cant grok symlinks
@ -695,30 +542,35 @@ bool makepath(const string& ipath)
bool path_isdir(const string& path)
{
struct stat st;
if (lstat(path.c_str(), &st) < 0)
if (lstat(path.c_str(), &st) < 0) {
return false;
if (S_ISDIR(st.st_mode))
}
if (S_ISDIR(st.st_mode)) {
return true;
}
return false;
}
long long path_filesize(const string& path)
{
struct stat st;
if (stat(path.c_str(), &st) < 0)
if (stat(path.c_str(), &st) < 0) {
return -1;
}
return (long long)st.st_size;
}
int path_fileprops(const std::string path, struct stat *stp, bool follow)
{
if (!stp)
if (!stp) {
return -1;
}
memset(stp, 0, sizeof(struct stat));
struct stat mst;
int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
if (ret != 0)
if (ret != 0) {
return ret;
}
stp->st_size = mst.st_size;
stp->st_mode = mst.st_mode;
stp->st_mtime = mst.st_mtime;
@ -807,7 +659,7 @@ string url_encode(const string& url, string::size_type offs)
c == '`' ||
c == '{' ||
c == '|' ||
c == '}' ) {
c == '}') {
out += '%';
out += h[(c >> 4) & 0xf];
out += h[c & 0xf];
@ -822,45 +674,24 @@ string url_gpath(const string& url)
{
// Remove the access schema part (or whatever it's called)
string::size_type colon = url.find_first_of(":");
if (colon == string::npos || colon == url.size() - 1)
if (colon == string::npos || colon == url.size() - 1) {
return url;
}
// If there are non-alphanum chars before the ':', then there
// probably is no scheme. Whatever...
for (string::size_type i = 0; i < colon; i++) {
if (!isalnum(url.at(i)))
if (!isalnum(url.at(i))) {
return url;
}
}
// In addition we canonize the path to remove empty host parts
// (for compatibility with older versions of recoll where file://
// was hardcoded, but the local path was used for doc
// identification.
return path_canon(url.substr(colon+1));
return path_canon(url.substr(colon + 1));
}
string url_gpathS(const string& url)
{
#ifdef _WIN32
string u = url_gpath(url);
string nu;
if (path_hasdrive(u)) {
nu.append(1, '/');
nu.append(1, u[0]);
if (path_isdriveabs(u)) {
nu.append(u.substr(2));
} else {
// This should be an error really
nu.append(1, '/');
nu.append(u.substr(2));
}
}
return nu;
#else
return url_gpath(url);
#endif
}
string url_parentfolder(const string& url)
{
// In general, the parent is the directory above the full path
@ -876,24 +707,16 @@ string url_parentfolder(const string& url)
}
string path_defaultrecollconfsubdir()
{
#ifdef _WIN32
return "Recoll";
#else
return ".recoll";
#endif
}
// Convert to file path if url is like file:
// Note: this only works with our internal pseudo-urls which are not
// encoded/escaped
string fileurltolocalpath(string url)
{
if (url.find("file://") == 0)
if (url.find("file://") == 0) {
url = url.substr(7, string::npos);
else
} else {
return string();
}
#ifdef _WIN32
// Absolute file urls are like: file:///c:/mydir/...
@ -908,21 +731,24 @@ string fileurltolocalpath(string url)
// part after # if it is preceded by .html
string::size_type pos;
if ((pos = url.rfind(".html#")) != string::npos) {
url.erase(pos+5);
url.erase(pos + 5);
} else if ((pos = url.rfind(".htm#")) != string::npos) {
url.erase(pos+4);
url.erase(pos + 4);
}
return url;
}
static const string cstr_fileu("file://");
string path_pathtofileurl(const string& path)
{
// We're supposed to receive a canonic absolute path, but on windows we
// may need to add a '/' in front of the drive spec
string url(cstr_fileu);
if (path.empty() || path[0] != '/')
if (path.empty() || path[0] != '/') {
url.push_back('/');
}
url += path;
return url;
}
@ -932,17 +758,6 @@ bool urlisfileurl(const string& url)
return url.find("file://") == 0;
}
// Printable url: this is used to transcode from the system charset
// into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string &fcharset, const string &in, string &out)
{
int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7);
}
return true;
}
bool readdir(const string& dir, string& reason, set<string>& entries)
{
struct stat st;
@ -971,17 +786,20 @@ bool readdir(const string& dir, string& reason, set<string>& entries)
struct dirent *ent;
while ((ent = readdir(d)) != 0) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
continue;
}
entries.insert(ent->d_name);
}
out:
if (d)
if (d) {
closedir(d);
}
reason = msg.str();
if (reason.empty())
if (reason.empty()) {
return true;
}
return false;
}
@ -992,34 +810,38 @@ out:
// alone.
Pidfile::~Pidfile()
{
if (m_fd >= 0)
if (m_fd >= 0) {
::close(m_fd);
}
m_fd = -1;
}
pid_t Pidfile::read_pid()
{
int fd = ::open(m_path.c_str(), O_RDONLY);
if (fd == -1)
return (pid_t)-1;
if (fd == -1) {
return (pid_t) - 1;
}
char buf[16];
int i = read(fd, buf, sizeof(buf) - 1);
::close(fd);
if (i <= 0)
return (pid_t)-1;
if (i <= 0) {
return (pid_t) - 1;
}
buf[i] = '\0';
char *endptr;
pid_t pid = strtol(buf, &endptr, 10);
if (endptr != &buf[i])
return (pid_t)-1;
if (endptr != &buf[i]) {
return (pid_t) - 1;
}
return pid;
}
int Pidfile::flopen()
{
const char *path = m_path.c_str();
if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) {
if ((m_fd = ::open(path, O_RDWR | O_CREAT, 0644)) == -1) {
m_reason = "Open failed: [" + m_path + "]: " + strerror(errno);
return -1;
}
@ -1098,79 +920,10 @@ int Pidfile::remove()
return unlink(m_path.c_str());
}
// Freedesktop standard paths for cache directory (thumbnails are now in there)
static const string& xdgcachedir()
{
static string xdgcache;
if (xdgcache.empty()) {
const char *cp = getenv("XDG_CACHE_HOME");
if (cp == 0)
xdgcache = path_cat(path_home(), ".cache");
else
xdgcache = string(cp);
}
return xdgcache;
}
static const string& thumbnailsdir()
{
static string thumbnailsd;
if (thumbnailsd.empty()) {
thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
if (access(thumbnailsd.c_str(), 0) != 0) {
thumbnailsd = path_cat(path_home(), ".thumbnails");
}
}
return thumbnailsd;
}
// Place for 256x256 files
static const string thmbdirlarge = "large";
// 128x128
static const string thmbdirnormal = "normal";
static void thumbname(const string& url, string& name)
{
string digest;
string l_url = url_encode(url);
MD5String(l_url, digest);
MD5HexPrint(digest, name);
name += ".png";
}
bool thumbPathForUrl(const string& url, int size, string& path)
{
string name;
thumbname(url, name);
if (size <= 128) {
path = path_cat(thumbnailsdir(), thmbdirnormal);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
}
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// File does not exist. Path corresponds to the large version at this point,
// fix it if needed.
if (size <= 128) {
path = path_cat(path_home(), thmbdirnormal);
path = path_cat(path, name);
}
return false;
}
// Call funcs that need static init (not initially reentrant)
void pathut_init_mt()
{
path_home();
tmplocation();
thumbnailsdir();
path_sharedatadir();
}
@ -1185,8 +938,9 @@ void path_to_thumb(const string& _input)
{
string input(_input);
// Make absolute path if needed
if (input[0] != '/')
if (input[0] != '/') {
input = path_absolute(input);
}
input = string("file://") + path_canon(input);
@ -1200,28 +954,30 @@ const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
"/dir1/dir2",
"./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
"/dir/.c", "/dir/toto.txt", "toto.txt1"
};
};
const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub",
"~nosuch", "~nosuch/sub"};
"~nosuch", "~nosuch/sub"
};
int nttvec = sizeof(ttvec) / sizeof(string);
const char *thisprog;
int main(int argc, const char **argv)
{
thisprog = *argv++;argc--;
thisprog = *argv++;
argc--;
string s;
vector<string>::const_iterator it;
#if 0
for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl;
}
for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl;
}
for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) {
cout << tstvec[i] << " Basename " <<
path_basename(tstvec[i], ".txt") << endl;
}
@ -1251,10 +1007,12 @@ int main(int argc, const char **argv)
cerr << "Usage: trpathut <dir> <pattern>" << endl;
exit(1);
}
string dir = *argv++;argc--;
string pattern = *argv++;argc--;
string dir = *argv++;
argc--;
string pattern = *argv++;
argc--;
vector<string> matched = path_dirglob(dir, pattern);
for (it = matched.begin(); it != matched.end();it++) {
for (it = matched.begin(); it != matched.end(); it++) {
cout << *it << endl;
}
#endif
@ -1264,7 +1022,8 @@ int main(int argc, const char **argv)
fprintf(stderr, "Usage: fsocc: trpathut <path>\n");
exit(1);
}
string path = *argv++;argc--;
string path = *argv++;
argc--;
int pc;
long long blocks;
@ -1303,9 +1062,10 @@ int main(int argc, const char **argv)
}
path_to_thumb(input);
} else {
while (getline(cin, input))
while (getline(cin, input)) {
path_to_thumb(input);
}
}
exit(0);
@ -1316,7 +1076,8 @@ int main(int argc, const char **argv)
cerr << "Usage: trpathut <filename>" << endl;
exit(1);
}
string fn = *argv++;argc--;
string fn = *argv++;
argc--;
string ext = path_suffix(fn);
cout << "Suffix: [" << ext << "]" << endl;
return 0;
@ -1327,7 +1088,8 @@ int main(int argc, const char **argv)
cerr << "Usage: trpathut url" << endl;
exit(1);
}
string url = *argv++;argc--;
string url = *argv++;
argc--;
cout << "File: [" << fileurltolocalpath(url) << "]\n";
return 0;

View file

@ -16,45 +16,42 @@
*/
#ifndef _PATHUT_H_INCLUDED_
#define _PATHUT_H_INCLUDED_
#include "autoconfig.h"
#include <string>
#include <vector>
#include <set>
#include MEMORY_INCLUDE
// Must be called in main thread before starting other threads
extern void pathut_init_mt();
/// Add a / at the end if none there yet.
extern void path_catslash(std::string &s);
extern void path_catslash(std::string& s);
/// Concatenate 2 paths
extern std::string path_cat(const std::string &s1, const std::string &s2);
extern std::string path_cat(const std::string& s1, const std::string& s2);
/// Get the simple file name (get rid of any directory path prefix
extern std::string path_getsimple(const std::string &s);
extern std::string path_getsimple(const std::string& s);
/// Simple file name + optional suffix stripping
extern std::string path_basename(const std::string &s,
const std::string &suff = std::string());
extern std::string path_basename(const std::string& s,
const std::string& suff = std::string());
/// Component after last '.'
extern std::string path_suffix(const std::string &s);
extern std::string path_suffix(const std::string& s);
/// Get the father directory
extern std::string path_getfather(const std::string &s);
extern std::string path_getfather(const std::string& s);
/// Get the current user's home directory
extern std::string path_home();
/// Expand ~ at the beginning of std::string
extern std::string path_tildexpand(const std::string &s);
extern std::string path_tildexpand(const std::string& s);
/// Use getcwd() to make absolute path if needed. Beware: ***this can fail***
/// we return an empty path in this case.
extern std::string path_absolute(const std::string &s);
extern std::string path_absolute(const std::string& s);
/// Clean up path by removing duplicated / and resolving ../ + make it absolute
extern std::string path_canon(const std::string &s, const std::string *cwd=0);
extern std::string path_canon(const std::string& s, const std::string *cwd = 0);
/// Use glob(3) to return the file names matching pattern inside dir
extern std::vector<std::string> path_dirglob(const std::string &dir,
extern std::vector<std::string> path_dirglob(const std::string& dir,
const std::string pattern);
/// Encode according to rfc 1738
extern std::string url_encode(const std::string& url,
std::string::size_type offs = 0);
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string &fcharset,
const std::string &in, std::string &out);
//// Convert to file path if url is like file://. This modifies the
//// input (and returns a copy for convenience)
extern std::string fileurltolocalpath(std::string url);
@ -67,12 +64,6 @@ extern std::string url_parentfolder(const std::string& url);
/// routine, it does the right thing only in the recoll context
extern std::string url_gpath(const std::string& url);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb, it
/// would better be local in there, but I prefer to keep all the
/// system-specific path stuff in pathut
extern std::string url_gpathS(const std::string& url);
/// Stat parameter and check if it's a directory
extern bool path_isdir(const std::string& path);
@ -103,26 +94,17 @@ extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries);
/** A small wrapper around statfs et al, to return percentage of disk
occupation */
bool fsocc(const std::string &path, int *pc, // Percent occupied
long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024
);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
occupation
@param[output] pc percent occupied
@param[output] avmbs Mbs available to non-superuser. Mb=1024*1024
*/
bool fsocc(const std::string& path, int *pc, long long *avmbs = 0);
/// mkdir -p
extern bool makepath(const std::string& path);
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// Where we create the user data subdirs
extern std::string path_homedata();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_sharedatadir();
/// Test if path is absolute
extern bool path_isabsolute(const std::string& s);
@ -137,52 +119,6 @@ extern std::string path_pathtofileurl(const std::string& path);
void path_slashize(std::string& s);
#endif
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename()
{
return m_filename.c_str();
}
const std::string &getreason()
{
return m_reason;
}
void setnoremove(bool onoff)
{
m_noremove = onoff;
}
bool ok()
{
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {return m_dirname.c_str();}
const std::string &getreason() {return m_reason;}
bool ok() {return !m_dirname.empty();}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir &) {}
TempDir& operator=(const TempDir &) {return *this;};
};
/// Lock/pid file class. This is quite close to the pidfile_xxx
/// utilities in FreeBSD with a bit more encapsulation. I'd have used
/// the freebsd code if it was available elsewhere
@ -200,7 +136,9 @@ public:
int close();
/// Delete the pid file
int remove();
const std::string& getreason() {return m_reason;}
const std::string& getreason() {
return m_reason;
}
private:
std::string m_path;
int m_fd;
@ -209,14 +147,4 @@ private:
int flopen();
};
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size, std::string& path);
// Must be called in main thread before starting other threads
extern void pathut_init_mt();
#endif /* _PATHUT_H_INCLUDED_ */

411
src/utils/rclutil.cpp Normal file
View file

@ -0,0 +1,411 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_RCLUTIL
#include "autoconfig.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "safefcntl.h"
#include "safeunistd.h"
#include "dirent.h"
#include "cstr.h"
#ifdef _WIN32
#include "safewindows.h"
#else
#include <sys/param.h>
#include <pwd.h>
#include <sys/file.h>
#endif
#include <math.h>
#include <errno.h>
#include <sys/types.h>
#include "safesysstat.h"
#include "ptmutex.h"
#include "rclutil.h"
#include "pathut.h"
#include "wipedir.h"
#include "transcode.h"
#include "md5ut.h"
using namespace std;
void map_ss_cp_noshr(const map<string, string> s, map<string, string> *d)
{
for (map<string, string>::const_iterator it = s.begin();
it != s.end(); it++) {
d->insert(
pair<string, string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
string path_defaultrecollconfsubdir()
{
#ifdef _WIN32
return "Recoll";
#else
return ".recoll";
#endif
}
// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
const string& path_pkgdatadir()
{
static string datadir;
if (datadir.empty()) {
#ifdef _WIN32
datadir = path_cat(path_thisexecpath(), "Share");
#else
const char *cdatadir = getenv("RECOLL_DATADIR");
if (cdatadir == 0) {
// If not in environment, use the compiled-in constant.
datadir = RECOLL_DATADIR;
} else {
datadir = cdatadir;
}
#endif
}
return datadir;
}
// Printable url: this is used to transcode from the system charset
// into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out)
{
int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7);
}
return true;
}
string url_gpathS(const string& url)
{
#ifdef _WIN32
string u = url_gpath(url);
string nu;
if (path_hasdrive(u)) {
nu.append(1, '/');
nu.append(1, u[0]);
if (path_isdriveabs(u)) {
nu.append(u.substr(2));
} else {
// This should be an error really
nu.append(1, '/');
nu.append(u.substr(2));
}
}
return nu;
#else
return url_gpath(url);
#endif
}
const string& tmplocation()
{
static string stmpdir;
if (stmpdir.empty()) {
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (tmpdir == 0) {
tmpdir = getenv("TMPDIR");
}
if (tmpdir == 0) {
tmpdir = getenv("TMP");
}
if (tmpdir == 0) {
tmpdir = getenv("TEMP");
}
if (tmpdir == 0) {
#ifdef _WIN32
TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH + 1, bufw);
stmpdir = path_tchartoutf8(bufw);
#else
stmpdir = "/tmp";
#endif
} else {
stmpdir = tmpdir;
}
stmpdir = path_canon(stmpdir);
}
return stmpdir;
}
bool maketmpdir(string& tdir, string& reason)
{
#ifndef _WIN32
tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
char *cp = strdup(tdir.c_str());
if (!cp) {
reason = "maketmpdir: out of memory (for file name !)\n";
tdir.erase();
return false;
}
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#endif
if (!
#ifdef HAVE_MKDTEMP
mkdtemp(cp)
#else
mktemp(cp)
#endif // HAVE_MKDTEMP
) {
free(cp);
reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
strerror(errno);
tdir.erase();
return false;
}
tdir = cp;
free(cp);
#else // _WIN32
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
tdir = path_wingettempfilename(TEXT("rcltmp"));
#endif
// At this point the directory does not exist yet except if we used
// mkdtemp
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
if (mkdir(tdir.c_str(), 0700) < 0) {
reason = string("maketmpdir: mkdir ") + tdir + " failed";
tdir.erase();
return false;
}
#endif
return true;
}
TempFileInternal::TempFileInternal(const string& suffix)
: m_noremove(false)
{
// Because we need a specific suffix, can't use mkstemp
// well. There is a race condition between name computation and
// file creation. try to make sure that we at least don't shoot
// our own selves in the foot. maybe we'll use mkstemps one day.
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#ifndef _WIN32
string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
char *cp = strdup(filename.c_str());
if (!cp) {
m_reason = "Out of memory (for file name !)\n";
return;
}
// Using mkstemp this way is awful (bot the suffix adding and
// using mkstemp() instead of mktemp just to avoid the warnings)
int fd;
if ((fd = mkstemp(cp)) < 0) {
free(cp);
m_reason = "TempFileInternal: mkstemp failed\n";
return;
}
close(fd);
unlink(cp);
filename = cp;
free(cp);
#else
string filename = path_wingettempfilename(TEXT("recoll"));
#endif
m_filename = filename + suffix;
if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) {
m_reason = string("Could not open/create") + m_filename;
m_filename.erase();
}
}
TempFileInternal::~TempFileInternal()
{
if (!m_filename.empty() && !m_noremove) {
unlink(m_filename.c_str());
}
}
TempDir::TempDir()
{
if (!maketmpdir(m_dirname, m_reason)) {
m_dirname.erase();
return;
}
}
TempDir::~TempDir()
{
if (!m_dirname.empty()) {
(void)wipedir(m_dirname, true, true);
m_dirname.erase();
}
}
bool TempDir::wipe()
{
if (m_dirname.empty()) {
m_reason = "TempDir::wipe: no directory !\n";
return false;
}
if (wipedir(m_dirname, false, true)) {
m_reason = "TempDir::wipe: wipedir failed\n";
return false;
}
return true;
}
// Freedesktop standard paths for cache directory (thumbnails are now in there)
static const string& xdgcachedir()
{
static string xdgcache;
if (xdgcache.empty()) {
const char *cp = getenv("XDG_CACHE_HOME");
if (cp == 0) {
xdgcache = path_cat(path_home(), ".cache");
} else {
xdgcache = string(cp);
}
}
return xdgcache;
}
static const string& thumbnailsdir()
{
static string thumbnailsd;
if (thumbnailsd.empty()) {
thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
if (access(thumbnailsd.c_str(), 0) != 0) {
thumbnailsd = path_cat(path_home(), ".thumbnails");
}
}
return thumbnailsd;
}
// Place for 256x256 files
static const string thmbdirlarge = "large";
// 128x128
static const string thmbdirnormal = "normal";
static void thumbname(const string& url, string& name)
{
string digest;
string l_url = url_encode(url);
MD5String(l_url, digest);
MD5HexPrint(digest, name);
name += ".png";
}
bool thumbPathForUrl(const string& url, int size, string& path)
{
string name;
thumbname(url, name);
if (size <= 128) {
path = path_cat(thumbnailsdir(), thmbdirnormal);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
}
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// File does not exist. Path corresponds to the large version at this point,
// fix it if needed.
if (size <= 128) {
path = path_cat(path_home(), thmbdirnormal);
path = path_cat(path, name);
}
return false;
}
void rclutil_init_mt()
{
path_pkgdatadir();
tmplocation();
thumbnailsdir();
}
#else // TEST_RCLUTIL
void path_to_thumb(const string& _input)
{
string input(_input);
// Make absolute path if needed
if (input[0] != '/') {
input = path_absolute(input);
}
input = string("file://") + path_canon(input);
string path;
//path = url_encode(input, 7);
thumbPathForUrl(input, 7, path);
cout << path << endl;
}
const char *thisprog;
int main(int argc, const char **argv)
{
thisprog = *argv++;
argc--;
string s;
vector<string>::const_iterator it;
#if 0
if (argc > 1) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
string input;
if (argc == 1) {
input = *argv++;
if (input.empty()) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
path_to_thumb(input);
} else {
while (getline(cin, input)) {
path_to_thumb(input);
}
}
exit(0);
#endif
}
#endif // TEST_RCLUTIL

112
src/utils/rclutil.h Normal file
View file

@ -0,0 +1,112 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLUTIL_H_INCLUDED_
#define _RCLUTIL_H_INCLUDED_
#include "autoconfig.h"
// Misc stuff not generic enough to get into smallut or pathut
#include <map>
#include <string>
#include MEMORY_INCLUDE
extern void rclutil_init_mt();
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_pkgdatadir();
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string& fcharset,
const std::string& in, std::string& out);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb.
extern std::string url_gpathS(const std::string& url);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename() {
return m_filename.c_str();
}
const std::string& getreason() {
return m_reason;
}
void setnoremove(bool onoff) {
m_noremove = onoff;
}
bool ok() {
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {
return m_dirname.c_str();
}
const std::string& getreason() {
return m_reason;
}
bool ok() {
return !m_dirname.empty();
}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir&) {}
TempDir& operator=(const TempDir&) {
return *this;
};
};
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size,
std::string& path);
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string, std::string> s,
std::map<std::string, std::string> *d);
#endif /* _RCLUTIL_H_INCLUDED_ */

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes
/* Copyright (C) 2004-2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -16,7 +16,11 @@
*/
#ifndef TEST_SMALLUT
#ifdef BUILDING_RECOLL
#include "autoconfig.h"
#else
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
@ -34,23 +38,10 @@
#include UNORDERED_SET_INCLUDE
#include "smallut.h"
#include "utf8iter.h"
#include "hldata.h"
#include "cstr.h"
using namespace std;
void map_ss_cp_noshr(const map<string,string> s, map<string,string> *d)
{
for (map<string,string>::const_iterator it= s.begin();
it != s.end(); it++) {
d->insert(
pair<string,string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
int stringicmp(const string & s1, const string& s2)
int stringicmp(const string& s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
@ -64,7 +55,8 @@ int stringicmp(const string & s1, const string& s2)
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : -1;
} else {
@ -74,7 +66,8 @@ int stringicmp(const string & s1, const string& s2)
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : 1;
}
@ -104,13 +97,14 @@ extern int stringisuffcmp(const string& s1, const string& s2)
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++r1; ++r2;
++r1;
++r2;
}
return 0;
}
// s1 is already lowercase
int stringlowercmp(const string & s1, const string& s2)
int stringlowercmp(const string& s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
@ -123,7 +117,8 @@ int stringlowercmp(const string & s1, const string& s2)
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : -1;
} else {
@ -132,14 +127,15 @@ int stringlowercmp(const string & s1, const string& s2)
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : 1;
}
}
// s1 is already uppercase
int stringuppercmp(const string & s1, const string& s2)
int stringuppercmp(const string& s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
@ -152,7 +148,8 @@ int stringuppercmp(const string & s1, const string& s2)
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : -1;
} else {
@ -161,23 +158,24 @@ int stringuppercmp(const string & s1, const string& s2)
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
++it1;
++it2;
}
return size1 == size2 ? 0 : 1;
}
}
// Compare charset names, removing the more common spelling variations
bool samecharset(const string &cs1, const string &cs2)
bool samecharset(const string& cs1, const string& cs2)
{
string mcs1, mcs2;
// Remove all - and _, turn to lowecase
for (unsigned int i = 0; i < cs1.length();i++) {
for (unsigned int i = 0; i < cs1.length(); i++) {
if (cs1[i] != '_' && cs1[i] != '-') {
mcs1 += ::tolower(cs1[i]);
}
}
for (unsigned int i = 0; i < cs2.length();i++) {
for (unsigned int i = 0; i < cs2.length(); i++) {
if (cs2[i] != '_' && cs2[i] != '-') {
mcs2 += ::tolower(cs2[i]);
}
@ -185,7 +183,7 @@ bool samecharset(const string &cs1, const string &cs2)
return mcs1 == mcs2;
}
template <class T> bool stringToStrings(const string &s, T &tokens,
template <class T> bool stringToStrings(const string& s, T& tokens,
const string& addseps)
{
string current;
@ -195,9 +193,10 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
for (unsigned int i = 0; i < s.length(); i++) {
switch (s[i]) {
case '"':
switch(state) {
switch (state) {
case SPACE:
state=INQUOTE; continue;
state = INQUOTE;
continue;
case TOKEN:
current += '"';
continue;
@ -213,11 +212,11 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
}
break;
case '\\':
switch(state) {
switch (state) {
case SPACE:
case TOKEN:
current += '\\';
state=TOKEN;
state = TOKEN;
continue;
case INQUOTE:
state = ESCAPE;
@ -233,7 +232,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
case '\t':
case '\n':
case '\r':
switch(state) {
switch (state) {
case SPACE:
continue;
case TOKEN:
@ -250,7 +249,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
default:
if (!addseps.empty() && addseps.find(s[i]) != string::npos) {
switch(state) {
switch (state) {
case ESCAPE:
state = INQUOTE;
break;
@ -266,7 +265,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
state = SPACE;
continue;
}
} else switch(state) {
} else switch (state) {
case ESCAPE:
state = INQUOTE;
break;
@ -280,7 +279,7 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
current += s[i];
}
}
switch(state) {
switch (state) {
case SPACE:
break;
case TOKEN:
@ -293,26 +292,29 @@ template <class T> bool stringToStrings(const string &s, T &tokens,
return true;
}
template bool stringToStrings<list<string> >(const string &,
list<string> &, const string&);
template bool stringToStrings<vector<string> >(const string &,
vector<string> &,const string&);
template bool stringToStrings<set<string> >(const string &,
set<string> &, const string&);
template bool stringToStrings<list<string> >(const string&,
list<string>&, const string&);
template bool stringToStrings<vector<string> >(const string&,
vector<string>&, const string&);
template bool stringToStrings<set<string> >(const string&,
set<string>&, const string&);
template bool stringToStrings<STD_UNORDERED_SET<string> >
(const string &, STD_UNORDERED_SET<string> &, const string&);
(const string&, STD_UNORDERED_SET<string>&, const string&);
template <class T> void stringsToString(const T &tokens, string &s)
template <class T> void stringsToString(const T& tokens, string& s)
{
for (typename T::const_iterator it = tokens.begin();
it != tokens.end(); it++) {
bool hasblanks = false;
if (it->find_first_of(" \t\n") != string::npos)
if (it->find_first_of(" \t\n") != string::npos) {
hasblanks = true;
if (it != tokens.begin())
}
if (it != tokens.begin()) {
s.append(1, ' ');
if (hasblanks)
}
if (hasblanks) {
s.append(1, '"');
}
for (unsigned int i = 0; i < it->length(); i++) {
char car = it->at(i);
if (car == '"') {
@ -322,24 +324,25 @@ template <class T> void stringsToString(const T &tokens, string &s)
s.append(1, car);
}
}
if (hasblanks)
if (hasblanks) {
s.append(1, '"');
}
}
}
template void stringsToString<list<string> >(const list<string> &, string &);
template void stringsToString<vector<string> >(const vector<string> &,string &);
template void stringsToString<set<string> >(const set<string> &, string &);
template <class T> string stringsToString(const T &tokens)
template void stringsToString<list<string> >(const list<string>&, string&);
template void stringsToString<vector<string> >(const vector<string>&, string&);
template void stringsToString<set<string> >(const set<string>&, string&);
template <class T> string stringsToString(const T& tokens)
{
string out;
stringsToString<T>(tokens, out);
return out;
}
template string stringsToString<list<string> >(const list<string> &);
template string stringsToString<vector<string> >(const vector<string> &);
template string stringsToString<set<string> >(const set<string> &);
template string stringsToString<list<string> >(const list<string>&);
template string stringsToString<vector<string> >(const vector<string>&);
template string stringsToString<set<string> >(const set<string>&);
template <class T> void stringsToCSV(const T &tokens, string &s,
template <class T> void stringsToCSV(const T& tokens, string& s,
char sep)
{
s.erase();
@ -347,12 +350,15 @@ template <class T> void stringsToCSV(const T &tokens, string &s,
it != tokens.end(); it++) {
bool needquotes = false;
if (it->empty() ||
it->find_first_of(string(1, sep) + "\"\n") != string::npos)
it->find_first_of(string(1, sep) + "\"\n") != string::npos) {
needquotes = true;
if (it != tokens.begin())
}
if (it != tokens.begin()) {
s.append(1, sep);
if (needquotes)
}
if (needquotes) {
s.append(1, '"');
}
for (unsigned int i = 0; i < it->length(); i++) {
char car = it->at(i);
if (car == '"') {
@ -361,12 +367,13 @@ template <class T> void stringsToCSV(const T &tokens, string &s,
s.append(1, car);
}
}
if (needquotes)
if (needquotes) {
s.append(1, '"');
}
}
}
template void stringsToCSV<list<string> >(const list<string> &, string &, char);
template void stringsToCSV<vector<string> >(const vector<string> &,string &,
template void stringsToCSV<list<string> >(const list<string>&, string&, char);
template void stringsToCSV<vector<string> >(const vector<string>&, string&,
char);
void stringToTokens(const string& str, vector<string>& tokens,
@ -389,8 +396,9 @@ void stringToTokens(const string& str, vector<string>& tokens,
break;
} else if (pos == startPos) {
// Dont' push empty tokens after first
if (tokens.empty())
if (tokens.empty()) {
tokens.push_back(string());
}
startPos = ++pos;
} else {
tokens.push_back(str.substr(startPos, pos - startPos));
@ -399,20 +407,22 @@ void stringToTokens(const string& str, vector<string>& tokens,
}
}
bool stringToBool(const string &s)
bool stringToBool(const string& s)
{
if (s.empty())
if (s.empty()) {
return false;
}
if (isdigit(s[0])) {
int val = atoi(s.c_str());
return val ? true : false;
}
if (s.find_first_of("yYtT") == 0)
if (s.find_first_of("yYtT") == 0) {
return true;
}
return false;
}
void trimstring(string &s, const char *ws)
void trimstring(string& s, const char *ws)
{
string::size_type pos = s.find_first_not_of(ws);
if (pos == string::npos) {
@ -422,25 +432,27 @@ void trimstring(string &s, const char *ws)
s.replace(0, pos, string());
pos = s.find_last_not_of(ws);
if (pos != string::npos && pos != s.length()-1)
s.replace(pos+1, string::npos, string());
if (pos != string::npos && pos != s.length() - 1) {
s.replace(pos + 1, string::npos, string());
}
}
// Remove some chars and replace them with spaces
string neutchars(const string &str, const string &chars)
string neutchars(const string& str, const string& chars)
{
string out;
neutchars(str, out, chars);
return out;
}
void neutchars(const string &str, string &out, const string& chars)
void neutchars(const string& str, string& out, const string& chars)
{
string::size_type startPos, pos;
for (pos = 0;;) {
// Skip initial chars, break if this eats all.
if ((startPos = str.find_first_not_of(chars, pos)) == string::npos)
if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) {
break;
}
// Find next delimiter or end of string (end of token)
pos = str.find_first_of(chars, startPos);
// Add token to the output. Note: token cant be empty here
@ -458,7 +470,7 @@ void neutchars(const string &str, string &out, const string& chars)
* we have enough, this would be cleanly utf8-aware but would remove
* punctuation */
static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
string truncate_to_word(const string &input, string::size_type maxlen)
string truncate_to_word(const string& input, string::size_type maxlen)
{
string output;
if (input.length() <= maxlen) {
@ -481,25 +493,12 @@ string truncate_to_word(const string &input, string::size_type maxlen)
return output;
}
void utf8truncate(string &s, int maxlen)
{
if (s.size() <= string::size_type(maxlen))
return;
Utf8Iter iter(s);
string::size_type pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen))
pos = iter.getBpos();
s.erase(pos);
}
// Escape things that would look like markup
string escapeHtml(const string &in)
string escapeHtml(const string& in)
{
string out;
for (string::size_type pos = 0; pos < in.length(); pos++) {
switch(in.at(pos)) {
switch (in.at(pos)) {
case '<':
out += "&lt;";
break;
@ -513,12 +512,12 @@ string escapeHtml(const string &in)
return out;
}
string escapeShell(const string &in)
string escapeShell(const string& in)
{
string out;
out += "\"";
for (string::size_type pos = 0; pos < in.length(); pos++) {
switch(in.at(pos)) {
switch (in.at(pos)) {
case '$':
out += "\\$";
break;
@ -547,7 +546,7 @@ string escapeShell(const string &in)
bool pcSubst(const string& in, string& out, const map<char, string>& subs)
{
string::const_iterator it;
for (it = in.begin(); it != in.end();it++) {
for (it = in.begin(); it != in.end(); it++) {
if (*it == '%') {
if (++it == in.end()) {
out += '%';
@ -557,7 +556,7 @@ bool pcSubst(const string& in, string& out, const map<char, string>& subs)
out += '%';
continue;
}
map<char,string>::const_iterator tr;
map<char, string>::const_iterator tr;
if ((tr = subs.find(*it)) != subs.end()) {
out += tr->second;
} else {
@ -594,15 +593,15 @@ bool pcSubst(const string& in, string& out, const map<string, string>& subs)
string::size_type j = in.find_first_of(")", i);
if (j == string::npos) {
// ??concatenate remaining part and stop
out += in.substr(i-2);
out += in.substr(i - 2);
break;
}
key = in.substr(i, j-i);
key = in.substr(i, j - i);
i = j;
} else {
key = in[i];
}
map<string,string>::const_iterator tr;
map<string, string>::const_iterator tr;
if ((tr = subs.find(key)) != subs.end()) {
out += tr->second;
} else {
@ -622,14 +621,15 @@ inline static int ulltorbuf(unsigned long long val, char *rbuf)
for (idx = 0; val; idx++) {
rbuf[idx] = '0' + val % 10;
val /= 10;
} while (val);
}
while (val);
rbuf[idx] = 0;
return idx;
}
inline static void ullcopyreverse(const char *rbuf, string& buf, int idx)
{
buf.reserve(idx+1);
buf.reserve(idx + 1);
for (int i = idx - 1; i >= 0; i--) {
buf.push_back(rbuf[i]);
}
@ -659,14 +659,16 @@ void lltodecstr(long long val, string& buf)
}
bool neg = val < 0;
if (neg)
if (neg) {
val = -val;
}
char rbuf[30];
int idx = ulltorbuf(val, rbuf);
if (neg)
if (neg) {
rbuf[idx++] = '-';
}
rbuf[idx] = 0;
ullcopyreverse(rbuf, buf, idx);
@ -722,12 +724,13 @@ string breakIntoLines(const string& in, unsigned int ll,
string::size_type pos = ss.find_last_of(" ");
if (pos == string::npos) {
pos = query.find_first_of(" ");
if (pos != string::npos)
ss = query.substr(0, pos+1);
else
ss = query;
if (pos != string::npos) {
ss = query.substr(0, pos + 1);
} else {
ss = ss.substr(0, pos+1);
ss = query;
}
} else {
ss = ss.substr(0, pos + 1);
}
}
// This cant happen, but anyway. Be very sure to avoid an infinite loop
@ -740,7 +743,7 @@ string breakIntoLines(const string& in, unsigned int ll,
oq += " ... \n";
break;
}
query= query.substr(ss.length());
query = query.substr(ss.length());
}
return oq;
}
@ -757,8 +760,9 @@ static bool parsedate(vector<string>::const_iterator& it,
if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) {
return false;
}
if (it == end || *it == "/")
if (it == end || *it == "/") {
return true;
}
if (*it++ != "-") {
return false;
}
@ -770,8 +774,9 @@ static bool parsedate(vector<string>::const_iterator& it,
if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) {
return false;
}
if (it == end || *it == "/")
if (it == end || *it == "/") {
return true;
}
if (*it++ != "-") {
return false;
}
@ -802,17 +807,29 @@ static bool parseperiod(vector<string>::const_iterator& it,
if (sscanf(it++->c_str(), "%d", &value) != 1) {
return false;
}
if (it == end || it->empty())
if (it == end || it->empty()) {
return false;
}
switch (it->at(0)) {
case 'Y': case 'y': dip->y1 = value;break;
case 'M': case 'm': dip->m1 = value;break;
case 'D': case 'd': dip->d1 = value;break;
default: return false;
case 'Y':
case 'y':
dip->y1 = value;
break;
case 'M':
case 'm':
dip->m1 = value;
break;
case 'D':
case 'd':
dip->d1 = value;
break;
default:
return false;
}
it++;
if (it == end)
if (it == end) {
return true;
}
if (*it == "/") {
return true;
}
@ -823,11 +840,12 @@ static bool parseperiod(vector<string>::const_iterator& it,
#ifdef _WIN32
int setenv(const char *name, const char *value, int overwrite)
{
if(!overwrite) {
if (!overwrite) {
const char *cp = getenv(name);
if (cp)
if (cp) {
return -1;
}
}
return _putenv_s(name, value);
}
void unsetenv(const char *name)
@ -845,10 +863,11 @@ time_t portable_timegm(struct tm *tm)
setenv("TZ", "", 1);
tzset();
ret = mktime(tm);
if (tz)
if (tz) {
setenv("TZ", tz, 1);
else
} else {
unsetenv("TZ");
}
tzset();
return ret;
}
@ -873,7 +892,7 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
// timegm sort it out
memset(&tm, 0, sizeof(tm));
tm.tm_year = dp->y1 - 1900 + pp->y1;
tm.tm_mon = dp->m1 + pp->m1 -1;
tm.tm_mon = dp->m1 + pp->m1 - 1;
tm.tm_mday = dp->d1 + pp->d1;
time_t tres = mktime(&tm);
localtime_r(&tres, &tm);
@ -886,10 +905,19 @@ static bool addperiod(DateInterval *dp, DateInterval *pp)
int monthdays(int mon, int year)
{
switch (mon) {
// We are returning a few two many 29 days februaries, no problem
case 2: return (year % 4) == 0 ? 29 : 28;
case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31;
default: return 30;
// We are returning a few too many 29 days februaries, no problem
case 2:
return (year % 4) == 0 ? 29 : 28;
case 1:
case 3:
case 5:
case 7:
case 8:
case 10:
case 12:
return 31;
default:
return 30;
}
}
bool parsedateinterval(const string& s, DateInterval *dip)
@ -904,8 +932,9 @@ bool parsedateinterval(const string& s, DateInterval *dip)
if (!stringToStrings(s, vs, "PYMDpymd-/")) {
return false;
}
if (vs.empty())
if (vs.empty()) {
return false;
}
vector<string>::const_iterator it = vs.begin();
if (*it == "P" || *it == "p") {
@ -1042,10 +1071,12 @@ secondelt:
void catstrerror(string *reason, const char *what, int _errno)
{
if (!reason)
if (!reason) {
return;
if (what)
}
if (what) {
reason->append(what);
}
reason->append(": errno: ");
@ -1080,59 +1111,6 @@ void catstrerror(string *reason, const char *what, int _errno)
#endif
}
void HighlightData::toString(std::string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<std::string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
out.append("\nUser terms to Query terms:");
for (map<string, string>::const_iterator it = terms.begin();
it != terms.end(); it++) {
out.append("[").append(it->first).append("]->[");
out.append(it->second).append("] ");
}
out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
size_t ugidx = (size_t)-1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
terms.insert(hl.terms.begin(), hl.terms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}
static const char *vlang_to_code[] = {
"be", "cp1251",
@ -1159,21 +1137,24 @@ static const char *vlang_to_code[] = {
"uk", "koi8-u",
};
static const string cstr_cp1252("CP1252");
string langtocode(const string& lang)
{
static STD_UNORDERED_MAP<string, string> lang_to_code;
if (lang_to_code.empty()) {
for (unsigned int i = 0;
i < sizeof(vlang_to_code) / sizeof(char *); i += 2) {
lang_to_code[vlang_to_code[i]] = vlang_to_code[i+1];
lang_to_code[vlang_to_code[i]] = vlang_to_code[i + 1];
}
}
STD_UNORDERED_MAP<string,string>::const_iterator it =
STD_UNORDERED_MAP<string, string>::const_iterator it =
lang_to_code.find(lang);
// Use cp1252 by default...
if (it == lang_to_code.end())
if (it == lang_to_code.end()) {
return cstr_cp1252;
}
return it->second;
}
@ -1182,12 +1163,15 @@ string localelang()
{
const char *lang = getenv("LANG");
if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || !strcmp(lang, "POSIX"))
if (lang == 0 || *lang == 0 || !strcmp(lang, "C") ||
!strcmp(lang, "POSIX")) {
return "en";
}
string locale(lang);
string::size_type under = locale.find_first_of("_");
if (under == string::npos)
if (under == string::npos) {
return locale;
}
return locale.substr(0, under);
}
@ -1262,41 +1246,47 @@ static void cerrdip(const string& s, DateInterval *dip)
int main(int argc, char **argv)
{
thisprog = *argv++;argc--;
thisprog = *argv++;
argc--;
#if 1
if (argc <=0 ) {
if (argc <= 0) {
cerr << "Usage: smallut <stringtosplit>" << endl;
exit(1);
}
string s = *argv++;argc--;
string s = *argv++;
argc--;
vector<string> vs;
stringToTokens(s, vs, "/");
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
cerr << "[" << *it << "] ";
}
cerr << endl;
exit(0);
#elif 0
if (argc <=0 ) {
if (argc <= 0) {
cerr << "Usage: smallut <stringtosplit>" << endl;
exit(1);
}
string s = *argv++;argc--;
string s = *argv++;
argc--;
vector<string> vs;
if (!stringToStrings(s, vs, ":-()")) {
cerr << "Bad entry" << endl;
exit(1);
}
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++)
for (vector<string>::const_iterator it = vs.begin(); it != vs.end(); it++) {
cerr << "[" << *it << "] ";
}
cerr << endl;
exit(0);
#elif 0
if (argc <=0 ) {
if (argc <= 0) {
cerr << "Usage: smallut <dateinterval>" << endl;
exit(1);
}
string s = *argv++;argc--;
string s = *argv++;
argc--;
DateInterval di;
if (!parsedateinterval(s, &di)) {
cerr << "Parse failed" << endl;
@ -1386,26 +1376,26 @@ int main(int argc, char **argv)
string slong("ABCD");
string sshortsmaller("ABB");
vector<pair<string,string> > cmps;
cmps.push_back(pair<string,string>(sshort,sshort));
cmps.push_back(pair<string,string>(sshort,slong));
cmps.push_back(pair<string,string>(slong,sshort));
cmps.push_back(pair<string,string>(sshortsmaller,sshort));
cmps.push_back(pair<string,string>(sshort, sshortsmaller));
vector<pair<string, string> > cmps;
cmps.push_back(pair<string, string>(sshort, sshort));
cmps.push_back(pair<string, string>(sshort, slong));
cmps.push_back(pair<string, string>(slong, sshort));
cmps.push_back(pair<string, string>(sshortsmaller, sshort));
cmps.push_back(pair<string, string>(sshort, sshortsmaller));
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
it != cmps.end(); it++) {
cout << it->first << " " << it->second << " " <<
stringicmp(it->first, it->second) << endl;
}
cout << endl;
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
it != cmps.end(); it++) {
cout << it->first << " " << it->second << " " <<
stringlowercmp(stringtolower(it->first), it->second) << endl;
}
cout << endl;
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
for (vector<pair<string, string> >::const_iterator it = cmps.begin();
it != cmps.end(); it++) {
cout << it->first << " " << it->second << " " <<
stringuppercmp(it->first, it->second) << endl;

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes
/* Copyright (C) 2004-2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -17,47 +17,63 @@
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
#include <stdlib.h>
#include <sys/types.h>
#include <string>
#include <vector>
#include <map>
#include <set>
using std::string;
using std::vector;
using std::map;
using std::set;
// Miscellaneous mostly string-oriented small utilities
// Note that none of the following code knows about utf-8.
// Call this before going multithread.
void smallut_init_mt();
#ifndef SMALLUT_DISABLE_MACROS
#ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
#endif
#ifndef MAX
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
#endif
#ifndef deleteZ
#define deleteZ(X) {delete X;X = 0;}
#endif
#endif /* SMALLUT_DISABLE_MACROS */
// Case-insensitive compare. ASCII ONLY !
extern int stringicmp(const std::string& s1, const std::string& s2);
// Note these are all ascii routines
extern int stringicmp(const string& s1, const string& s2);
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const string& s1)
: m_s1(s1)
{}
bool operator()(const string& s2) {
StringIcmpPred(const std::string& s1)
: m_s1(s1) {
}
bool operator()(const std::string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const string& m_s1;
const std::string& m_s1;
};
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
extern int stringlowercmp(const std::string& alreadylower,
const std::string& s2);
extern int stringuppercmp(const std::string& alreadyupper,
const std::string& s2);
extern void stringtolower(string& io);
extern string stringtolower(const string& io);
extern void stringtolower(std::string& io);
extern std::string stringtolower(const std::string& io);
// Is one string the end part of the other ?
extern int stringisuffcmp(const string& s1, const string& s2);
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
// Divine language from locale
extern std::string localelang();
// Divine 8bit charset from language
extern std::string langtocode(const string& lang);
extern std::string langtocode(const std::string& lang);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const string &cs1, const string &cs2);
extern bool samecharset(const std::string& cs1, const std::string& cs2);
// Parse date interval specifier into pair of y,m,d dates. The format
// for the time interval is based on a subset of iso 8601 with
@ -71,9 +87,14 @@ extern bool samecharset(const string &cs1, const string &cs2);
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
// This returns a pair of y,m,d dates.
struct DateInterval {
int y1;int m1;int d1; int y2;int m2;int d2;
int y1;
int m1;
int d1;
int y2;
int m2;
int d2;
};
extern bool parsedateinterval(const string&s, DateInterval *di);
extern bool parsedateinterval(const std::string& s, DateInterval *di);
extern int monthdays(int mon, int year);
/**
@ -81,172 +102,132 @@ extern int monthdays(int mon, int year);
*
* Token delimiter is " \t\n" except inside dquotes. dquote inside
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as space tab etc.
* have the ascii values and can't appear as part of a multibyte char. utf-8 ok
* but so are the iso-8859-x and surely others. addseps do have to be
* single-bytes
* Input is handled a byte at a time, things will work as long as
* space tab etc. have the ascii values and can't appear as part of a
* multibyte char. utf-8 ok but so are the iso-8859-x and surely
* others. addseps do have to be single-bytes
*/
template <class T> bool stringToStrings(const string& s, T &tokens,
const string& addseps = "");
template <class T> bool stringToStrings(const std::string& s, T& tokens,
const std::string& addseps = "");
/**
* Inverse operation:
*/
template <class T> void stringsToString(const T &tokens, string &s);
template <class T> std::string stringsToString(const T &tokens);
template <class T> void stringsToString(const T& tokens, std::string& s);
template <class T> std::string stringsToString(const T& tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
*/
template <class T> void stringsToCSV(const T &tokens, string &s,
template <class T> void stringsToCSV(const T& tokens, std::string& s,
char sep = ',');
/**
* Split input string. No handling of quoting
*/
extern void stringToTokens(const string &s, vector<string> &tokens,
const string &delims = " \t", bool skipinit=true);
extern void stringToTokens(const std::string& s,
std::vector<std::string>& tokens,
const std::string& delims = " \t",
bool skipinit = true);
/** Convert string to boolean */
extern bool stringToBool(const string &s);
extern bool stringToBool(const std::string& s);
/** Remove instances of characters belonging to set (default {space,
tab}) at beginning and end of input string */
extern void trimstring(string &s, const char *ws = " \t");
extern void trimstring(std::string& s, const char *ws = " \t");
/** Escape things like < or & by turning them into entities */
extern string escapeHtml(const string &in);
extern std::string escapeHtml(const std::string& in);
/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
* so chars should only contain ascii */
extern string neutchars(const string &str, const string &chars);
extern void neutchars(const string &str, string& out, const string &chars);
/** Replace some chars with spaces (ie: newline chars). */
extern std::string neutchars(const std::string& str, const std::string& chars);
extern void neutchars(const std::string& str, std::string& out,
const std::string& chars);
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern string escapeShell(const string &str);
extern std::string escapeShell(const std::string& str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern string truncate_to_word(const string &input, string::size_type maxlen);
extern std::string truncate_to_word(const std::string& input,
std::string::size_type maxlen);
/** Truncate in place in an utf8-legal way */
extern void utf8truncate(string &s, int maxlen);
void ulltodecstr(unsigned long long val, string& buf);
void lltodecstr(long long val, string& buf);
string lltodecstr(long long val);
string ulltodecstr(unsigned long long val);
void ulltodecstr(unsigned long long val, std::string& buf);
void lltodecstr(long long val, std::string& buf);
std::string lltodecstr(long long val);
std::string ulltodecstr(unsigned long long val);
/** Convert byte count into unit (KB/MB...) appropriate for display */
string displayableBytes(off_t size);
std::string displayableBytes(off_t size);
/** Break big string into lines */
string breakIntoLines(const string& in, unsigned int ll = 100,
unsigned int maxlines= 50);
std::string breakIntoLines(const std::string& in, unsigned int ll = 100,
unsigned int maxlines = 50);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const string& in, string& out, const map<char, string>& subs);
bool pcSubst(const std::string& in, std::string& out,
const std::map<char, std::string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const string& in, string& out, const map<string, string>& subs);
bool pcSubst(const std::string& in, std::string& out,
const std::map<std::string, std::string>& subs);
/** Append system error message */
void catstrerror(string *reason, const char *what, int _errno);
void catstrerror(std::string *reason, const char *what, int _errno);
/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
* makes it inaccessible */
struct tm;
time_t portable_timegm(struct tm *tm);
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0)
{}
TempBuf(int n)
{
m_buf = (char *)malloc(n);
}
~TempBuf()
{
if (m_buf)
free(m_buf);
}
char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); }
char *buf() {return m_buf;}
char *m_buf;
};
inline void leftzeropad(string& s, unsigned len)
inline void leftzeropad(std::string& s, unsigned len)
{
if (s.length() && s.length() < len)
if (s.length() && s.length() < len) {
s = s.insert(0, len - s.length(), '0');
}
}
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string,std::string> s,
std::map<std::string,std::string> *d);
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map
{
class create_map {
private:
std::map<T, U> m_map;
public:
create_map(const T& key, const U& val)
{
create_map(const T& key, const U& val) {
m_map[key] = val;
}
create_map<T, U>& operator()(const T& key, const U& val)
{
create_map<T, U>& operator()(const T& key, const U& val) {
m_map[key] = val;
return *this;
}
operator std::map<T, U>()
{
operator std::map<T, U>() {
return m_map;
}
};
template <typename T>
class create_vector
{
class create_vector {
private:
std::vector<T> m_vector;
public:
create_vector(const T& val)
{
create_vector(const T& val) {
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val)
{
create_vector<T>& operator()(const T& val) {
m_vector.push_back(val);
return *this;
}
operator std::vector<T>()
{
operator std::vector<T>() {
return m_vector;
}
};
#ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
#endif
#ifndef MAX
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
#endif
#ifndef deleteZ
#define deleteZ(X) {delete X;X = 0;}
#endif
void smallut_init_mt();
#endif /* _SMALLUT_H_INCLUDED_ */