moved code around to make smallut and pathut less recoll-specific and reusable. No actual changes

This commit is contained in:
Jean-Francois Dockes 2016-03-21 12:55:31 +01:00
parent 7b2a455b80
commit 35de51985b
26 changed files with 1821 additions and 1493 deletions

View file

@ -207,6 +207,7 @@ utils/fileudi.h \
utils/fstreewalk.cpp \
utils/fstreewalk.h \
utils/hldata.h \
utils/hldata.cpp \
utils/idfile.cpp \
utils/idfile.h \
utils/md5.cpp \
@ -224,6 +225,8 @@ utils/pxattr.cpp \
utils/pxattr.h \
utils/rclionice.cpp \
utils/rclionice.h \
utils/rclutil.h \
utils/rclutil.cpp \
utils/readfile.cpp \
utils/readfile.h \
utils/refcntr.h \

View file

@ -39,6 +39,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h"
#include "conftree.h"
#include "debuglog.h"
@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf)
}
// Compute our data dir name, typically /usr/local/share/recoll
m_datadir = path_sharedatadir();
m_datadir = path_pkgdatadir();
// We only do the automatic configuration creation thing for the default
// config dir, not if it was specified through -c or RECOLL_CONFDIR
bool autoconfdir = false;

View file

@ -32,6 +32,7 @@
#include "rclconfig.h"
#include "rclinit.h"
#include "pathut.h"
#include "rclutil.h"
#include "unac.h"
#include "smallut.h"
#include "execmd.h"
@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags,
// Init smallut and pathut static values
pathut_init_mt();
smallut_init_mt();
rclutil_init_mt();
// Init execmd.h static PATH and PATHELT splitting
{string bogus;
ExecCmd::which("nosuchcmd", bogus);
@ -389,4 +392,3 @@ bool recoll_ismainthread()
return pthread_equal(pthread_self(), mainthread_id);
}

View file

@ -18,40 +18,39 @@
#define _RCLINIT_H_INCLUDED_
#include <string>
#ifndef NO_NAMESPACES
using std::string;
#endif
class RclConfig;
/**
* Initialize by reading configuration, opening log file, etc.
*
*
* This must be called from the main thread before starting any others. It sets
* up the global signal handling. other threads must call recoll_threadinit()
* when starting.
*
* @param flags misc modifiers. These are currently only used to customize
* @param flags misc modifiers. These are currently only used to customize
* the log file and verbosity.
* @param cleanup function to call before exiting (atexit)
* @param sigcleanup function to call on terminal signal (INT/HUP...) This
* should typically set a flag which tells the program (recoll,
* recollindex etc.. to exit as soon as possible (after closing the db,
* @param sigcleanup function to call on terminal signal (INT/HUP...) This
* should typically set a flag which tells the program (recoll,
* recollindex etc.. to exit as soon as possible (after closing the db,
* etc.). cleanup will then be called by exit().
* @param reason in case of error: output string explaining things
* @param argcnf Configuration directory name from the command line (overriding
* default and environment
* @return the parsed configuration.
*/
enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2};
enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2};
extern RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0);
inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0) {
void (*cleanup)(void), void (*sigcleanup)(int),
std::string& reason, const string *argcnf = 0);
inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
std::string& reason,
const std::string *argcnf = 0)
{
return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
}
// Threads need to call this to block signals.
// Threads need to call this to block signals.
// The main thread handles all signals.
extern void recoll_threadinit();

View file

@ -23,6 +23,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "debuglog.h"
#include "fstreewalk.h"
#include "beaglequeue.h"

View file

@ -28,6 +28,7 @@
#include "cstr.h"
#include "pathut.h"
#include "rclutil.h"
#include "conftree.h"
#include "rclconfig.h"
#include "fstreewalk.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "smallut.h"
#include "chrono.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclmon.h"
#include "x11mon.h"
#include "cancelcheck.h"

View file

@ -21,6 +21,7 @@
#include <string>
#include "pathut.h"
#include "rclutil.h"
#include "ptmutex.h"
/// Uncompression script interface.

View file

@ -32,6 +32,7 @@ using namespace std;
#include "searchdata.h"
#include "rclquery.h"
#include "pathut.h"
#include "rclutil.h"
#include "wasatorcl.h"
#include "debuglog.h"
#include "pathut.h"

View file

@ -23,6 +23,7 @@
#include "rcldoc.h"
#include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h"
/*

View file

@ -21,6 +21,7 @@
#include "rclconfig.h"
#include "rcldb.h"
#include "rclutil.h"
#include "ptmutex.h"
#include <QString>

View file

@ -34,6 +34,7 @@ using std::list;
#include "debuglog.h"
#include "rclconfig.h"
#include "smallut.h"
#include "rclutil.h"
#include "plaintorich.h"
#include "mimehandler.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "unacpp.h"
#include "conftree.h"
#include "pathut.h"
#include "rclutil.h"
#include "smallut.h"
#include "chrono.h"
#include "utf8iter.h"
@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi)
return pterm;
}
static void utf8truncate(string& s, int maxlen)
{
if (s.size() <= string::size_type(maxlen)) {
return;
}
Utf8Iter iter(s);
string::size_type pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen)) {
pos = iter.getBpos();
}
s.erase(pos);
}
Db::Native::Native(Db *db)
: m_rcldb(db), m_isopen(false), m_iswritable(false),
m_noversionwrite(false)

View file

@ -14,9 +14,11 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include "rcldoc.h"
#include "debuglog.h"
#include "rclutil.h"
namespace Rcl {
const string Doc::keyabs("abstract");
@ -70,5 +72,31 @@ namespace Rcl {
if (dotext)
LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str()));
}
// Copy ensuring no shared string data, for threading issues.
void Doc::copyto(Doc *d) const
{
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
}

View file

@ -163,33 +163,11 @@ class Doc {
onlyxattr = false;
}
// Copy ensuring no shared string data, for threading issues.
void copyto(Doc *d) const {
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
void copyto(Doc *d) const;
Doc()
: idxi(0), syntabs(false), pc(0), xdocid(0),
haspages(false), haschildren(false), onlyxattr(false)
{
haspages(false), haschildren(false), onlyxattr(false) {
}
/** Get value for named field. If value pointer is 0, just test existence */
bool getmeta(const string& nm, string *value = 0) const

View file

@ -32,10 +32,12 @@
#include <iostream>
#include UNORDERED_MAP_INCLUDE
using std::string;
#include "smallut.h"
using std::string;
using std::vector;
/*
Storage for the exception translations. These are chars which
should not be translated according to what UnicodeData says, but

View file

@ -71,6 +71,28 @@ typedef unsigned char UCHAR;
typedef unsigned int UINT;
typedef unsigned long ULONG;
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0) {
}
TempBuf(int n) {
m_buf = (char *)malloc(n);
}
~TempBuf() {
if (m_buf) {
free(m_buf);
}
}
char *setsize(int n) {
return (m_buf = (char *)realloc(m_buf, n));
}
char *buf() {
return m_buf;
}
char *m_buf;
};
static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);
/*

View file

@ -18,10 +18,16 @@
#ifndef TEST_CPUCONF
#include "autoconfig.h"
#include <stdlib.h>
#include "cpuconf.h"
#include "execmd.h"
#include "smallut.h"
using std::string;
using std::vector;
#if defined(__gnu_linux__)
bool getCpuConf(CpuConf& conf)
{

78
src/utils/hldata.cpp Normal file
View file

@ -0,0 +1,78 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include "hldata.h"
using std::string;
using std::map;
void HighlightData::toString(string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
out.append("\nUser terms to Query terms:");
for (map<string, string>::const_iterator it = terms.begin();
it != terms.end(); it++) {
out.append("[").append(it->first).append("]->[");
out.append(it->second).append("] ");
}
out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
size_t ugidx = (size_t) - 1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
terms.insert(hl.terms.begin(), hl.terms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}

View file

@ -4,6 +4,7 @@
#include <vector>
#include <string>
#include <set>
#include <map>
/** Store data about user search terms and their expansions. This is used
* mostly for highlighting result text and walking the matches, generating

File diff suppressed because it is too large Load diff

View file

@ -16,63 +16,54 @@
*/
#ifndef _PATHUT_H_INCLUDED_
#define _PATHUT_H_INCLUDED_
#include "autoconfig.h"
#include <string>
#include <vector>
#include <set>
#include MEMORY_INCLUDE
// Must be called in main thread before starting other threads
extern void pathut_init_mt();
/// Add a / at the end if none there yet.
extern void path_catslash(std::string &s);
extern void path_catslash(std::string& s);
/// Concatenate 2 paths
extern std::string path_cat(const std::string &s1, const std::string &s2);
extern std::string path_cat(const std::string& s1, const std::string& s2);
/// Get the simple file name (get rid of any directory path prefix
extern std::string path_getsimple(const std::string &s);
extern std::string path_getsimple(const std::string& s);
/// Simple file name + optional suffix stripping
extern std::string path_basename(const std::string &s,
const std::string &suff = std::string());
extern std::string path_basename(const std::string& s,
const std::string& suff = std::string());
/// Component after last '.'
extern std::string path_suffix(const std::string &s);
extern std::string path_suffix(const std::string& s);
/// Get the father directory
extern std::string path_getfather(const std::string &s);
extern std::string path_getfather(const std::string& s);
/// Get the current user's home directory
extern std::string path_home();
/// Expand ~ at the beginning of std::string
extern std::string path_tildexpand(const std::string &s);
/// Expand ~ at the beginning of std::string
extern std::string path_tildexpand(const std::string& s);
/// Use getcwd() to make absolute path if needed. Beware: ***this can fail***
/// we return an empty path in this case.
extern std::string path_absolute(const std::string &s);
extern std::string path_absolute(const std::string& s);
/// Clean up path by removing duplicated / and resolving ../ + make it absolute
extern std::string path_canon(const std::string &s, const std::string *cwd=0);
extern std::string path_canon(const std::string& s, const std::string *cwd = 0);
/// Use glob(3) to return the file names matching pattern inside dir
extern std::vector<std::string> path_dirglob(const std::string &dir,
const std::string pattern);
extern std::vector<std::string> path_dirglob(const std::string& dir,
const std::string pattern);
/// Encode according to rfc 1738
extern std::string url_encode(const std::string& url,
std::string::size_type offs = 0);
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string &fcharset,
const std::string &in, std::string &out);
extern std::string url_encode(const std::string& url,
std::string::size_type offs = 0);
//// Convert to file path if url is like file://. This modifies the
//// input (and returns a copy for convenience)
extern std::string fileurltolocalpath(std::string url);
/// Test for file:/// url
extern bool urlisfileurl(const std::string& url);
///
///
extern std::string url_parentfolder(const std::string& url);
/// Return the host+path part of an url. This is not a general
/// routine, it does the right thing only in the recoll context
extern std::string url_gpath(const std::string& url);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb, it
/// would better be local in there, but I prefer to keep all the
/// system-specific path stuff in pathut
extern std::string url_gpathS(const std::string& url);
/// Stat parameter and check if it's a directory
extern bool path_isdir(const std::string& path);
@ -92,37 +83,28 @@ extern int path_fileprops(const std::string path, struct stat *stp,
/// Check that path is traversable and last element exists
/// Returns true if last elt could be checked to exist. False may mean that
/// the file/dir does not exist or that an error occurred.
/// the file/dir does not exist or that an error occurred.
extern bool path_exists(const std::string& path);
/// Return separator for PATH environment variable
extern std::string path_PATHsep();
/// Dump directory
extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries);
extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries);
/** A small wrapper around statfs et al, to return percentage of disk
occupation */
bool fsocc(const std::string &path, int *pc, // Percent occupied
long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024
);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
occupation
@param[output] pc percent occupied
@param[output] avmbs Mbs available to non-superuser. Mb=1024*1024
*/
bool fsocc(const std::string& path, int *pc, long long *avmbs = 0);
/// mkdir -p
extern bool makepath(const std::string& path);
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// Where we create the user data subdirs
extern std::string path_homedata();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_sharedatadir();
/// Test if path is absolute
extern bool path_isabsolute(const std::string& s);
@ -137,58 +119,12 @@ extern std::string path_pathtofileurl(const std::string& path);
void path_slashize(std::string& s);
#endif
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename()
{
return m_filename.c_str();
}
const std::string &getreason()
{
return m_reason;
}
void setnoremove(bool onoff)
{
m_noremove = onoff;
}
bool ok()
{
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {return m_dirname.c_str();}
const std::string &getreason() {return m_reason;}
bool ok() {return !m_dirname.empty();}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir &) {}
TempDir& operator=(const TempDir &) {return *this;};
};
/// Lock/pid file class. This is quite close to the pidfile_xxx
/// utilities in FreeBSD with a bit more encapsulation. I'd have used
/// the freebsd code if it was available elsewhere
class Pidfile {
public:
Pidfile(const std::string& path) : m_path(path), m_fd(-1) {}
Pidfile(const std::string& path) : m_path(path), m_fd(-1) {}
~Pidfile();
/// Open/create the pid file.
/// @return 0 if ok, > 0 for pid of existing process, -1 for other error.
@ -200,7 +136,9 @@ public:
int close();
/// Delete the pid file
int remove();
const std::string& getreason() {return m_reason;}
const std::string& getreason() {
return m_reason;
}
private:
std::string m_path;
int m_fd;
@ -209,14 +147,4 @@ private:
int flopen();
};
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size, std::string& path);
// Must be called in main thread before starting other threads
extern void pathut_init_mt();
#endif /* _PATHUT_H_INCLUDED_ */

411
src/utils/rclutil.cpp Normal file
View file

@ -0,0 +1,411 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_RCLUTIL
#include "autoconfig.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "safefcntl.h"
#include "safeunistd.h"
#include "dirent.h"
#include "cstr.h"
#ifdef _WIN32
#include "safewindows.h"
#else
#include <sys/param.h>
#include <pwd.h>
#include <sys/file.h>
#endif
#include <math.h>
#include <errno.h>
#include <sys/types.h>
#include "safesysstat.h"
#include "ptmutex.h"
#include "rclutil.h"
#include "pathut.h"
#include "wipedir.h"
#include "transcode.h"
#include "md5ut.h"
using namespace std;
void map_ss_cp_noshr(const map<string, string> s, map<string, string> *d)
{
for (map<string, string>::const_iterator it = s.begin();
it != s.end(); it++) {
d->insert(
pair<string, string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
string path_defaultrecollconfsubdir()
{
#ifdef _WIN32
return "Recoll";
#else
return ".recoll";
#endif
}
// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
const string& path_pkgdatadir()
{
static string datadir;
if (datadir.empty()) {
#ifdef _WIN32
datadir = path_cat(path_thisexecpath(), "Share");
#else
const char *cdatadir = getenv("RECOLL_DATADIR");
if (cdatadir == 0) {
// If not in environment, use the compiled-in constant.
datadir = RECOLL_DATADIR;
} else {
datadir = cdatadir;
}
#endif
}
return datadir;
}
// Printable url: this is used to transcode from the system charset
// into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out)
{
int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7);
}
return true;
}
string url_gpathS(const string& url)
{
#ifdef _WIN32
string u = url_gpath(url);
string nu;
if (path_hasdrive(u)) {
nu.append(1, '/');
nu.append(1, u[0]);
if (path_isdriveabs(u)) {
nu.append(u.substr(2));
} else {
// This should be an error really
nu.append(1, '/');
nu.append(u.substr(2));
}
}
return nu;
#else
return url_gpath(url);
#endif
}
const string& tmplocation()
{
static string stmpdir;
if (stmpdir.empty()) {
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (tmpdir == 0) {
tmpdir = getenv("TMPDIR");
}
if (tmpdir == 0) {
tmpdir = getenv("TMP");
}
if (tmpdir == 0) {
tmpdir = getenv("TEMP");
}
if (tmpdir == 0) {
#ifdef _WIN32
TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH + 1, bufw);
stmpdir = path_tchartoutf8(bufw);
#else
stmpdir = "/tmp";
#endif
} else {
stmpdir = tmpdir;
}
stmpdir = path_canon(stmpdir);
}
return stmpdir;
}
bool maketmpdir(string& tdir, string& reason)
{
#ifndef _WIN32
tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
char *cp = strdup(tdir.c_str());
if (!cp) {
reason = "maketmpdir: out of memory (for file name !)\n";
tdir.erase();
return false;
}
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#endif
if (!
#ifdef HAVE_MKDTEMP
mkdtemp(cp)
#else
mktemp(cp)
#endif // HAVE_MKDTEMP
) {
free(cp);
reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
strerror(errno);
tdir.erase();
return false;
}
tdir = cp;
free(cp);
#else // _WIN32
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
tdir = path_wingettempfilename(TEXT("rcltmp"));
#endif
// At this point the directory does not exist yet except if we used
// mkdtemp
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
if (mkdir(tdir.c_str(), 0700) < 0) {
reason = string("maketmpdir: mkdir ") + tdir + " failed";
tdir.erase();
return false;
}
#endif
return true;
}
TempFileInternal::TempFileInternal(const string& suffix)
: m_noremove(false)
{
// Because we need a specific suffix, can't use mkstemp
// well. There is a race condition between name computation and
// file creation. try to make sure that we at least don't shoot
// our own selves in the foot. maybe we'll use mkstemps one day.
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#ifndef _WIN32
string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
char *cp = strdup(filename.c_str());
if (!cp) {
m_reason = "Out of memory (for file name !)\n";
return;
}
// Using mkstemp this way is awful (bot the suffix adding and
// using mkstemp() instead of mktemp just to avoid the warnings)
int fd;
if ((fd = mkstemp(cp)) < 0) {
free(cp);
m_reason = "TempFileInternal: mkstemp failed\n";
return;
}
close(fd);
unlink(cp);
filename = cp;
free(cp);
#else
string filename = path_wingettempfilename(TEXT("recoll"));
#endif
m_filename = filename + suffix;
if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) {
m_reason = string("Could not open/create") + m_filename;
m_filename.erase();
}
}
TempFileInternal::~TempFileInternal()
{
if (!m_filename.empty() && !m_noremove) {
unlink(m_filename.c_str());
}
}
TempDir::TempDir()
{
if (!maketmpdir(m_dirname, m_reason)) {
m_dirname.erase();
return;
}
}
TempDir::~TempDir()
{
if (!m_dirname.empty()) {
(void)wipedir(m_dirname, true, true);
m_dirname.erase();
}
}
bool TempDir::wipe()
{
if (m_dirname.empty()) {
m_reason = "TempDir::wipe: no directory !\n";
return false;
}
if (wipedir(m_dirname, false, true)) {
m_reason = "TempDir::wipe: wipedir failed\n";
return false;
}
return true;
}
// Freedesktop standard paths for cache directory (thumbnails are now in there)
static const string& xdgcachedir()
{
static string xdgcache;
if (xdgcache.empty()) {
const char *cp = getenv("XDG_CACHE_HOME");
if (cp == 0) {
xdgcache = path_cat(path_home(), ".cache");
} else {
xdgcache = string(cp);
}
}
return xdgcache;
}
static const string& thumbnailsdir()
{
static string thumbnailsd;
if (thumbnailsd.empty()) {
thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
if (access(thumbnailsd.c_str(), 0) != 0) {
thumbnailsd = path_cat(path_home(), ".thumbnails");
}
}
return thumbnailsd;
}
// Place for 256x256 files
static const string thmbdirlarge = "large";
// 128x128
static const string thmbdirnormal = "normal";
static void thumbname(const string& url, string& name)
{
string digest;
string l_url = url_encode(url);
MD5String(l_url, digest);
MD5HexPrint(digest, name);
name += ".png";
}
bool thumbPathForUrl(const string& url, int size, string& path)
{
string name;
thumbname(url, name);
if (size <= 128) {
path = path_cat(thumbnailsdir(), thmbdirnormal);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
}
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// File does not exist. Path corresponds to the large version at this point,
// fix it if needed.
if (size <= 128) {
path = path_cat(path_home(), thmbdirnormal);
path = path_cat(path, name);
}
return false;
}
void rclutil_init_mt()
{
path_pkgdatadir();
tmplocation();
thumbnailsdir();
}
#else // TEST_RCLUTIL
void path_to_thumb(const string& _input)
{
string input(_input);
// Make absolute path if needed
if (input[0] != '/') {
input = path_absolute(input);
}
input = string("file://") + path_canon(input);
string path;
//path = url_encode(input, 7);
thumbPathForUrl(input, 7, path);
cout << path << endl;
}
const char *thisprog;
int main(int argc, const char **argv)
{
thisprog = *argv++;
argc--;
string s;
vector<string>::const_iterator it;
#if 0
if (argc > 1) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
string input;
if (argc == 1) {
input = *argv++;
if (input.empty()) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
path_to_thumb(input);
} else {
while (getline(cin, input)) {
path_to_thumb(input);
}
}
exit(0);
#endif
}
#endif // TEST_RCLUTIL

112
src/utils/rclutil.h Normal file
View file

@ -0,0 +1,112 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLUTIL_H_INCLUDED_
#define _RCLUTIL_H_INCLUDED_
#include "autoconfig.h"
// Misc stuff not generic enough to get into smallut or pathut
#include <map>
#include <string>
#include MEMORY_INCLUDE
extern void rclutil_init_mt();
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_pkgdatadir();
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string& fcharset,
const std::string& in, std::string& out);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb.
extern std::string url_gpathS(const std::string& url);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename() {
return m_filename.c_str();
}
const std::string& getreason() {
return m_reason;
}
void setnoremove(bool onoff) {
m_noremove = onoff;
}
bool ok() {
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {
return m_dirname.c_str();
}
const std::string& getreason() {
return m_reason;
}
bool ok() {
return !m_dirname.empty();
}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir&) {}
TempDir& operator=(const TempDir&) {
return *this;
};
};
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size,
std::string& path);
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string, std::string> s,
std::map<std::string, std::string> *d);
#endif /* _RCLUTIL_H_INCLUDED_ */

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes
/* Copyright (C) 2004-2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -17,226 +17,20 @@
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
#include <stdlib.h>
#include <sys/types.h>
#include <string>
#include <vector>
#include <map>
#include <set>
using std::string;
using std::vector;
using std::map;
using std::set;
// Miscellaneous mostly string-oriented small utilities
// Note that none of the following code knows about utf-8.
// Note these are all ascii routines
extern int stringicmp(const string& s1, const string& s2);
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const string& s1)
: m_s1(s1)
{}
bool operator()(const string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const string& m_s1;
};
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
extern void stringtolower(string& io);
extern string stringtolower(const string& io);
// Is one string the end part of the other ?
extern int stringisuffcmp(const string& s1, const string& s2);
// Divine language from locale
extern std::string localelang();
// Divine 8bit charset from language
extern std::string langtocode(const string& lang);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const string &cs1, const string &cs2);
// Parse date interval specifier into pair of y,m,d dates. The format
// for the time interval is based on a subset of iso 8601 with
// the addition of open intervals, and removal of all time indications.
// 'P' is the Period indicator, it's followed by a length in
// years/months/days (or any subset thereof)
// Dates: YYYY-MM-DD YYYY-MM YYYY
// Periods: P[nY][nM][nD] where n is an integer value.
// At least one of YMD must be specified
// The separator for the interval is /. Interval examples
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
// This returns a pair of y,m,d dates.
struct DateInterval {
int y1;int m1;int d1; int y2;int m2;int d2;
};
extern bool parsedateinterval(const string&s, DateInterval *di);
extern int monthdays(int mon, int year);
/**
* Parse input string into list of strings.
*
* Token delimiter is " \t\n" except inside dquotes. dquote inside
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as space tab etc.
* have the ascii values and can't appear as part of a multibyte char. utf-8 ok
* but so are the iso-8859-x and surely others. addseps do have to be
* single-bytes
*/
template <class T> bool stringToStrings(const string& s, T &tokens,
const string& addseps = "");
/**
* Inverse operation:
*/
template <class T> void stringsToString(const T &tokens, string &s);
template <class T> std::string stringsToString(const T &tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
*/
template <class T> void stringsToCSV(const T &tokens, string &s,
char sep = ',');
/**
* Split input string. No handling of quoting
*/
extern void stringToTokens(const string &s, vector<string> &tokens,
const string &delims = " \t", bool skipinit=true);
/** Convert string to boolean */
extern bool stringToBool(const string &s);
/** Remove instances of characters belonging to set (default {space,
tab}) at beginning and end of input string */
extern void trimstring(string &s, const char *ws = " \t");
/** Escape things like < or & by turning them into entities */
extern string escapeHtml(const string &in);
/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
* so chars should only contain ascii */
extern string neutchars(const string &str, const string &chars);
extern void neutchars(const string &str, string& out, const string &chars);
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern string escapeShell(const string &str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern string truncate_to_word(const string &input, string::size_type maxlen);
/** Truncate in place in an utf8-legal way */
extern void utf8truncate(string &s, int maxlen);
void ulltodecstr(unsigned long long val, string& buf);
void lltodecstr(long long val, string& buf);
string lltodecstr(long long val);
string ulltodecstr(unsigned long long val);
/** Convert byte count into unit (KB/MB...) appropriate for display */
string displayableBytes(off_t size);
/** Break big string into lines */
string breakIntoLines(const string& in, unsigned int ll = 100,
unsigned int maxlines= 50);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const string& in, string& out, const map<char, string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const string& in, string& out, const map<string, string>& subs);
/** Append system error message */
void catstrerror(string *reason, const char *what, int _errno);
/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
* makes it inaccessible */
struct tm;
time_t portable_timegm(struct tm *tm);
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0)
{}
TempBuf(int n)
{
m_buf = (char *)malloc(n);
}
~TempBuf()
{
if (m_buf)
free(m_buf);
}
char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); }
char *buf() {return m_buf;}
char *m_buf;
};
inline void leftzeropad(string& s, unsigned len)
{
if (s.length() && s.length() < len)
s = s.insert(0, len - s.length(), '0');
}
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string,std::string> s,
std::map<std::string,std::string> *d);
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map
{
private:
std::map<T, U> m_map;
public:
create_map(const T& key, const U& val)
{
m_map[key] = val;
}
create_map<T, U>& operator()(const T& key, const U& val)
{
m_map[key] = val;
return *this;
}
operator std::map<T, U>()
{
return m_map;
}
};
template <typename T>
class create_vector
{
private:
std::vector<T> m_vector;
public:
create_vector(const T& val)
{
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val)
{
m_vector.push_back(val);
return *this;
}
operator std::vector<T>()
{
return m_vector;
}
};
// Call this before going multithread.
void smallut_init_mt();
#ifndef SMALLUT_DISABLE_MACROS
#ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
#endif
@ -246,7 +40,194 @@ public:
#ifndef deleteZ
#define deleteZ(X) {delete X;X = 0;}
#endif
#endif /* SMALLUT_DISABLE_MACROS */
void smallut_init_mt();
// Case-insensitive compare. ASCII ONLY !
extern int stringicmp(const std::string& s1, const std::string& s2);
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const std::string& s1)
: m_s1(s1) {
}
bool operator()(const std::string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const std::string& m_s1;
};
extern int stringlowercmp(const std::string& alreadylower,
const std::string& s2);
extern int stringuppercmp(const std::string& alreadyupper,
const std::string& s2);
extern void stringtolower(std::string& io);
extern std::string stringtolower(const std::string& io);
// Is one string the end part of the other ?
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
// Divine language from locale
extern std::string localelang();
// Divine 8bit charset from language
extern std::string langtocode(const std::string& lang);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const std::string& cs1, const std::string& cs2);
// Parse date interval specifier into pair of y,m,d dates. The format
// for the time interval is based on a subset of iso 8601 with
// the addition of open intervals, and removal of all time indications.
// 'P' is the Period indicator, it's followed by a length in
// years/months/days (or any subset thereof)
// Dates: YYYY-MM-DD YYYY-MM YYYY
// Periods: P[nY][nM][nD] where n is an integer value.
// At least one of YMD must be specified
// The separator for the interval is /. Interval examples
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
// This returns a pair of y,m,d dates.
struct DateInterval {
int y1;
int m1;
int d1;
int y2;
int m2;
int d2;
};
extern bool parsedateinterval(const std::string& s, DateInterval *di);
extern int monthdays(int mon, int year);
/**
* Parse input string into list of strings.
*
* Token delimiter is " \t\n" except inside dquotes. dquote inside
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as
* space tab etc. have the ascii values and can't appear as part of a
* multibyte char. utf-8 ok but so are the iso-8859-x and surely
* others. addseps do have to be single-bytes
*/
template <class T> bool stringToStrings(const std::string& s, T& tokens,
const std::string& addseps = "");
/**
* Inverse operation:
*/
template <class T> void stringsToString(const T& tokens, std::string& s);
template <class T> std::string stringsToString(const T& tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
*/
template <class T> void stringsToCSV(const T& tokens, std::string& s,
char sep = ',');
/**
* Split input string. No handling of quoting
*/
extern void stringToTokens(const std::string& s,
std::vector<std::string>& tokens,
const std::string& delims = " \t",
bool skipinit = true);
/** Convert string to boolean */
extern bool stringToBool(const std::string& s);
/** Remove instances of characters belonging to set (default {space,
tab}) at beginning and end of input string */
extern void trimstring(std::string& s, const char *ws = " \t");
/** Escape things like < or & by turning them into entities */
extern std::string escapeHtml(const std::string& in);
/** Replace some chars with spaces (ie: newline chars). */
extern std::string neutchars(const std::string& str, const std::string& chars);
extern void neutchars(const std::string& str, std::string& out,
const std::string& chars);
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern std::string escapeShell(const std::string& str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern std::string truncate_to_word(const std::string& input,
std::string::size_type maxlen);
void ulltodecstr(unsigned long long val, std::string& buf);
void lltodecstr(long long val, std::string& buf);
std::string lltodecstr(long long val);
std::string ulltodecstr(unsigned long long val);
/** Convert byte count into unit (KB/MB...) appropriate for display */
std::string displayableBytes(off_t size);
/** Break big string into lines */
std::string breakIntoLines(const std::string& in, unsigned int ll = 100,
unsigned int maxlines = 50);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const std::string& in, std::string& out,
const std::map<char, std::string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const std::string& in, std::string& out,
const std::map<std::string, std::string>& subs);
/** Append system error message */
void catstrerror(std::string *reason, const char *what, int _errno);
/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
* makes it inaccessible */
struct tm;
time_t portable_timegm(struct tm *tm);
inline void leftzeropad(std::string& s, unsigned len)
{
if (s.length() && s.length() < len) {
s = s.insert(0, len - s.length(), '0');
}
}
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map {
private:
std::map<T, U> m_map;
public:
create_map(const T& key, const U& val) {
m_map[key] = val;
}
create_map<T, U>& operator()(const T& key, const U& val) {
m_map[key] = val;
return *this;
}
operator std::map<T, U>() {
return m_map;
}
};
template <typename T>
class create_vector {
private:
std::vector<T> m_vector;
public:
create_vector(const T& val) {
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val) {
m_vector.push_back(val);
return *this;
}
operator std::vector<T>() {
return m_vector;
}
};
#endif /* _SMALLUT_H_INCLUDED_ */