moved code around to make smallut and pathut less recoll-specific and reusable. No actual changes

This commit is contained in:
Jean-Francois Dockes 2016-03-21 12:55:31 +01:00
parent 7b2a455b80
commit 35de51985b
26 changed files with 1821 additions and 1493 deletions

View file

@ -207,6 +207,7 @@ utils/fileudi.h \
utils/fstreewalk.cpp \ utils/fstreewalk.cpp \
utils/fstreewalk.h \ utils/fstreewalk.h \
utils/hldata.h \ utils/hldata.h \
utils/hldata.cpp \
utils/idfile.cpp \ utils/idfile.cpp \
utils/idfile.h \ utils/idfile.h \
utils/md5.cpp \ utils/md5.cpp \
@ -224,6 +225,8 @@ utils/pxattr.cpp \
utils/pxattr.h \ utils/pxattr.h \
utils/rclionice.cpp \ utils/rclionice.cpp \
utils/rclionice.h \ utils/rclionice.h \
utils/rclutil.h \
utils/rclutil.cpp \
utils/readfile.cpp \ utils/readfile.cpp \
utils/readfile.h \ utils/readfile.h \
utils/refcntr.h \ utils/refcntr.h \

View file

@ -39,6 +39,7 @@
#include "cstr.h" #include "cstr.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h" #include "rclconfig.h"
#include "conftree.h" #include "conftree.h"
#include "debuglog.h" #include "debuglog.h"
@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf)
} }
// Compute our data dir name, typically /usr/local/share/recoll // Compute our data dir name, typically /usr/local/share/recoll
m_datadir = path_sharedatadir(); m_datadir = path_pkgdatadir();
// We only do the automatic configuration creation thing for the default // We only do the automatic configuration creation thing for the default
// config dir, not if it was specified through -c or RECOLL_CONFDIR // config dir, not if it was specified through -c or RECOLL_CONFDIR
bool autoconfdir = false; bool autoconfdir = false;

View file

@ -32,6 +32,7 @@
#include "rclconfig.h" #include "rclconfig.h"
#include "rclinit.h" #include "rclinit.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "unac.h" #include "unac.h"
#include "smallut.h" #include "smallut.h"
#include "execmd.h" #include "execmd.h"
@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags,
// Init smallut and pathut static values // Init smallut and pathut static values
pathut_init_mt(); pathut_init_mt();
smallut_init_mt(); smallut_init_mt();
rclutil_init_mt();
// Init execmd.h static PATH and PATHELT splitting // Init execmd.h static PATH and PATHELT splitting
{string bogus; {string bogus;
ExecCmd::which("nosuchcmd", bogus); ExecCmd::which("nosuchcmd", bogus);
@ -389,4 +392,3 @@ bool recoll_ismainthread()
return pthread_equal(pthread_self(), mainthread_id); return pthread_equal(pthread_self(), mainthread_id);
} }

View file

@ -18,40 +18,39 @@
#define _RCLINIT_H_INCLUDED_ #define _RCLINIT_H_INCLUDED_
#include <string> #include <string>
#ifndef NO_NAMESPACES
using std::string;
#endif
class RclConfig; class RclConfig;
/** /**
* Initialize by reading configuration, opening log file, etc. * Initialize by reading configuration, opening log file, etc.
* *
* This must be called from the main thread before starting any others. It sets * This must be called from the main thread before starting any others. It sets
* up the global signal handling. other threads must call recoll_threadinit() * up the global signal handling. other threads must call recoll_threadinit()
* when starting. * when starting.
* *
* @param flags misc modifiers. These are currently only used to customize * @param flags misc modifiers. These are currently only used to customize
* the log file and verbosity. * the log file and verbosity.
* @param cleanup function to call before exiting (atexit) * @param cleanup function to call before exiting (atexit)
* @param sigcleanup function to call on terminal signal (INT/HUP...) This * @param sigcleanup function to call on terminal signal (INT/HUP...) This
* should typically set a flag which tells the program (recoll, * should typically set a flag which tells the program (recoll,
* recollindex etc.. to exit as soon as possible (after closing the db, * recollindex etc.. to exit as soon as possible (after closing the db,
* etc.). cleanup will then be called by exit(). * etc.). cleanup will then be called by exit().
* @param reason in case of error: output string explaining things * @param reason in case of error: output string explaining things
* @param argcnf Configuration directory name from the command line (overriding * @param argcnf Configuration directory name from the command line (overriding
* default and environment * default and environment
* @return the parsed configuration. * @return the parsed configuration.
*/ */
enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2}; enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2};
extern RclConfig *recollinit(RclInitFlags flags, extern RclConfig *recollinit(RclInitFlags flags,
void (*cleanup)(void), void (*sigcleanup)(int), void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0); std::string& reason, const string *argcnf = 0);
inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int), inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int),
string &reason, const string *argcnf = 0) { std::string& reason,
const std::string *argcnf = 0)
{
return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf); return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf);
} }
// Threads need to call this to block signals. // Threads need to call this to block signals.
// The main thread handles all signals. // The main thread handles all signals.
extern void recoll_threadinit(); extern void recoll_threadinit();

View file

@ -23,6 +23,7 @@
#include "cstr.h" #include "cstr.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "debuglog.h" #include "debuglog.h"
#include "fstreewalk.h" #include "fstreewalk.h"
#include "beaglequeue.h" #include "beaglequeue.h"

View file

@ -28,6 +28,7 @@
#include "cstr.h" #include "cstr.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "conftree.h" #include "conftree.h"
#include "rclconfig.h" #include "rclconfig.h"
#include "fstreewalk.h" #include "fstreewalk.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "smallut.h" #include "smallut.h"
#include "chrono.h" #include "chrono.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "rclmon.h" #include "rclmon.h"
#include "x11mon.h" #include "x11mon.h"
#include "cancelcheck.h" #include "cancelcheck.h"

View file

@ -21,6 +21,7 @@
#include <string> #include <string>
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "ptmutex.h" #include "ptmutex.h"
/// Uncompression script interface. /// Uncompression script interface.

View file

@ -32,6 +32,7 @@ using namespace std;
#include "searchdata.h" #include "searchdata.h"
#include "rclquery.h" #include "rclquery.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "wasatorcl.h" #include "wasatorcl.h"
#include "debuglog.h" #include "debuglog.h"
#include "pathut.h" #include "pathut.h"

View file

@ -23,6 +23,7 @@
#include "rcldoc.h" #include "rcldoc.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "rclconfig.h" #include "rclconfig.h"
/* /*

View file

@ -21,6 +21,7 @@
#include "rclconfig.h" #include "rclconfig.h"
#include "rcldb.h" #include "rcldb.h"
#include "rclutil.h"
#include "ptmutex.h" #include "ptmutex.h"
#include <QString> #include <QString>

View file

@ -34,6 +34,7 @@ using std::list;
#include "debuglog.h" #include "debuglog.h"
#include "rclconfig.h" #include "rclconfig.h"
#include "smallut.h" #include "smallut.h"
#include "rclutil.h"
#include "plaintorich.h" #include "plaintorich.h"
#include "mimehandler.h" #include "mimehandler.h"

View file

@ -42,6 +42,7 @@ using namespace std;
#include "unacpp.h" #include "unacpp.h"
#include "conftree.h" #include "conftree.h"
#include "pathut.h" #include "pathut.h"
#include "rclutil.h"
#include "smallut.h" #include "smallut.h"
#include "chrono.h" #include "chrono.h"
#include "utf8iter.h" #include "utf8iter.h"
@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi)
return pterm; return pterm;
} }
static void utf8truncate(string& s, int maxlen)
{
if (s.size() <= string::size_type(maxlen)) {
return;
}
Utf8Iter iter(s);
string::size_type pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen)) {
pos = iter.getBpos();
}
s.erase(pos);
}
Db::Native::Native(Db *db) Db::Native::Native(Db *db)
: m_rcldb(db), m_isopen(false), m_iswritable(false), : m_rcldb(db), m_isopen(false), m_iswritable(false),
m_noversionwrite(false) m_noversionwrite(false)

View file

@ -14,9 +14,11 @@
* Free Software Foundation, Inc., * Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/ */
#include "autoconfig.h"
#include "rcldoc.h" #include "rcldoc.h"
#include "debuglog.h" #include "debuglog.h"
#include "rclutil.h"
namespace Rcl { namespace Rcl {
const string Doc::keyabs("abstract"); const string Doc::keyabs("abstract");
@ -70,5 +72,31 @@ namespace Rcl {
if (dotext) if (dotext)
LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str())); LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str()));
} }
// Copy ensuring no shared string data, for threading issues.
void Doc::copyto(Doc *d) const
{
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
} }

View file

@ -163,33 +163,11 @@ class Doc {
onlyxattr = false; onlyxattr = false;
} }
// Copy ensuring no shared string data, for threading issues. // Copy ensuring no shared string data, for threading issues.
void copyto(Doc *d) const { void copyto(Doc *d) const;
d->url.assign(url.begin(), url.end());
d->idxurl.assign(idxurl.begin(), idxurl.end());
d->idxi = idxi;
d->ipath.assign(ipath.begin(), ipath.end());
d->mimetype.assign(mimetype.begin(), mimetype.end());
d->fmtime.assign(fmtime.begin(), fmtime.end());
d->dmtime.assign(dmtime.begin(), dmtime.end());
d->origcharset.assign(origcharset.begin(), origcharset.end());
map_ss_cp_noshr(meta, &d->meta);
d->syntabs = syntabs;
d->pcbytes.assign(pcbytes.begin(), pcbytes.end());
d->fbytes.assign(fbytes.begin(), fbytes.end());
d->dbytes.assign(dbytes.begin(), dbytes.end());
d->sig.assign(sig.begin(), sig.end());
d->text.assign(text.begin(), text.end());
d->pc = pc;
d->xdocid = xdocid;
d->idxi = idxi;
d->haspages = haspages;
d->haschildren = haschildren;
d->onlyxattr = onlyxattr;
}
Doc() Doc()
: idxi(0), syntabs(false), pc(0), xdocid(0), : idxi(0), syntabs(false), pc(0), xdocid(0),
haspages(false), haschildren(false), onlyxattr(false) haspages(false), haschildren(false), onlyxattr(false) {
{
} }
/** Get value for named field. If value pointer is 0, just test existence */ /** Get value for named field. If value pointer is 0, just test existence */
bool getmeta(const string& nm, string *value = 0) const bool getmeta(const string& nm, string *value = 0) const

View file

@ -32,10 +32,12 @@
#include <iostream> #include <iostream>
#include UNORDERED_MAP_INCLUDE #include UNORDERED_MAP_INCLUDE
using std::string;
#include "smallut.h" #include "smallut.h"
using std::string;
using std::vector;
/* /*
Storage for the exception translations. These are chars which Storage for the exception translations. These are chars which
should not be translated according to what UnicodeData says, but should not be translated according to what UnicodeData says, but

View file

@ -71,6 +71,28 @@ typedef unsigned char UCHAR;
typedef unsigned int UINT; typedef unsigned int UINT;
typedef unsigned long ULONG; typedef unsigned long ULONG;
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0) {
}
TempBuf(int n) {
m_buf = (char *)malloc(n);
}
~TempBuf() {
if (m_buf) {
free(m_buf);
}
}
char *setsize(int n) {
return (m_buf = (char *)realloc(m_buf, n));
}
char *buf() {
return m_buf;
}
char *m_buf;
};
static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp); static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp);
/* /*

View file

@ -18,10 +18,16 @@
#ifndef TEST_CPUCONF #ifndef TEST_CPUCONF
#include "autoconfig.h" #include "autoconfig.h"
#include <stdlib.h>
#include "cpuconf.h" #include "cpuconf.h"
#include "execmd.h" #include "execmd.h"
#include "smallut.h" #include "smallut.h"
using std::string;
using std::vector;
#if defined(__gnu_linux__) #if defined(__gnu_linux__)
bool getCpuConf(CpuConf& conf) bool getCpuConf(CpuConf& conf)
{ {

78
src/utils/hldata.cpp Normal file
View file

@ -0,0 +1,78 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <stdio.h>
#include "hldata.h"
using std::string;
using std::map;
void HighlightData::toString(string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
out.append("\nUser terms to Query terms:");
for (map<string, string>::const_iterator it = terms.begin();
it != terms.end(); it++) {
out.append("[").append(it->first).append("]->[");
out.append(it->second).append("] ");
}
out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
size_t ugidx = (size_t) - 1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
terms.insert(hl.terms.begin(), hl.terms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<size_t>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}

View file

@ -4,6 +4,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <set> #include <set>
#include <map>
/** Store data about user search terms and their expansions. This is used /** Store data about user search terms and their expansions. This is used
* mostly for highlighting result text and walking the matches, generating * mostly for highlighting result text and walking the matches, generating

File diff suppressed because it is too large Load diff

View file

@ -16,63 +16,54 @@
*/ */
#ifndef _PATHUT_H_INCLUDED_ #ifndef _PATHUT_H_INCLUDED_
#define _PATHUT_H_INCLUDED_ #define _PATHUT_H_INCLUDED_
#include "autoconfig.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include <set> #include <set>
#include MEMORY_INCLUDE // Must be called in main thread before starting other threads
extern void pathut_init_mt();
/// Add a / at the end if none there yet. /// Add a / at the end if none there yet.
extern void path_catslash(std::string &s); extern void path_catslash(std::string& s);
/// Concatenate 2 paths /// Concatenate 2 paths
extern std::string path_cat(const std::string &s1, const std::string &s2); extern std::string path_cat(const std::string& s1, const std::string& s2);
/// Get the simple file name (get rid of any directory path prefix /// Get the simple file name (get rid of any directory path prefix
extern std::string path_getsimple(const std::string &s); extern std::string path_getsimple(const std::string& s);
/// Simple file name + optional suffix stripping /// Simple file name + optional suffix stripping
extern std::string path_basename(const std::string &s, extern std::string path_basename(const std::string& s,
const std::string &suff = std::string()); const std::string& suff = std::string());
/// Component after last '.' /// Component after last '.'
extern std::string path_suffix(const std::string &s); extern std::string path_suffix(const std::string& s);
/// Get the father directory /// Get the father directory
extern std::string path_getfather(const std::string &s); extern std::string path_getfather(const std::string& s);
/// Get the current user's home directory /// Get the current user's home directory
extern std::string path_home(); extern std::string path_home();
/// Expand ~ at the beginning of std::string /// Expand ~ at the beginning of std::string
extern std::string path_tildexpand(const std::string &s); extern std::string path_tildexpand(const std::string& s);
/// Use getcwd() to make absolute path if needed. Beware: ***this can fail*** /// Use getcwd() to make absolute path if needed. Beware: ***this can fail***
/// we return an empty path in this case. /// we return an empty path in this case.
extern std::string path_absolute(const std::string &s); extern std::string path_absolute(const std::string& s);
/// Clean up path by removing duplicated / and resolving ../ + make it absolute /// Clean up path by removing duplicated / and resolving ../ + make it absolute
extern std::string path_canon(const std::string &s, const std::string *cwd=0); extern std::string path_canon(const std::string& s, const std::string *cwd = 0);
/// Use glob(3) to return the file names matching pattern inside dir /// Use glob(3) to return the file names matching pattern inside dir
extern std::vector<std::string> path_dirglob(const std::string &dir, extern std::vector<std::string> path_dirglob(const std::string& dir,
const std::string pattern); const std::string pattern);
/// Encode according to rfc 1738 /// Encode according to rfc 1738
extern std::string url_encode(const std::string& url, extern std::string url_encode(const std::string& url,
std::string::size_type offs = 0); std::string::size_type offs = 0);
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string &fcharset,
const std::string &in, std::string &out);
//// Convert to file path if url is like file://. This modifies the //// Convert to file path if url is like file://. This modifies the
//// input (and returns a copy for convenience) //// input (and returns a copy for convenience)
extern std::string fileurltolocalpath(std::string url); extern std::string fileurltolocalpath(std::string url);
/// Test for file:/// url /// Test for file:/// url
extern bool urlisfileurl(const std::string& url); extern bool urlisfileurl(const std::string& url);
/// ///
extern std::string url_parentfolder(const std::string& url); extern std::string url_parentfolder(const std::string& url);
/// Return the host+path part of an url. This is not a general /// Return the host+path part of an url. This is not a general
/// routine, it does the right thing only in the recoll context /// routine, it does the right thing only in the recoll context
extern std::string url_gpath(const std::string& url); extern std::string url_gpath(const std::string& url);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb, it
/// would better be local in there, but I prefer to keep all the
/// system-specific path stuff in pathut
extern std::string url_gpathS(const std::string& url);
/// Stat parameter and check if it's a directory /// Stat parameter and check if it's a directory
extern bool path_isdir(const std::string& path); extern bool path_isdir(const std::string& path);
@ -92,37 +83,28 @@ extern int path_fileprops(const std::string path, struct stat *stp,
/// Check that path is traversable and last element exists /// Check that path is traversable and last element exists
/// Returns true if last elt could be checked to exist. False may mean that /// Returns true if last elt could be checked to exist. False may mean that
/// the file/dir does not exist or that an error occurred. /// the file/dir does not exist or that an error occurred.
extern bool path_exists(const std::string& path); extern bool path_exists(const std::string& path);
/// Return separator for PATH environment variable /// Return separator for PATH environment variable
extern std::string path_PATHsep(); extern std::string path_PATHsep();
/// Dump directory /// Dump directory
extern bool readdir(const std::string& dir, std::string& reason, extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries); std::set<std::string>& entries);
/** A small wrapper around statfs et al, to return percentage of disk /** A small wrapper around statfs et al, to return percentage of disk
occupation */ occupation
bool fsocc(const std::string &path, int *pc, // Percent occupied @param[output] pc percent occupied
long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024 @param[output] avmbs Mbs available to non-superuser. Mb=1024*1024
); */
bool fsocc(const std::string& path, int *pc, long long *avmbs = 0);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
/// mkdir -p /// mkdir -p
extern bool makepath(const std::string& path); extern bool makepath(const std::string& path);
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// Where we create the user data subdirs /// Where we create the user data subdirs
extern std::string path_homedata(); extern std::string path_homedata();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_sharedatadir();
/// Test if path is absolute /// Test if path is absolute
extern bool path_isabsolute(const std::string& s); extern bool path_isabsolute(const std::string& s);
@ -137,58 +119,12 @@ extern std::string path_pathtofileurl(const std::string& path);
void path_slashize(std::string& s); void path_slashize(std::string& s);
#endif #endif
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename()
{
return m_filename.c_str();
}
const std::string &getreason()
{
return m_reason;
}
void setnoremove(bool onoff)
{
m_noremove = onoff;
}
bool ok()
{
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {return m_dirname.c_str();}
const std::string &getreason() {return m_reason;}
bool ok() {return !m_dirname.empty();}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir &) {}
TempDir& operator=(const TempDir &) {return *this;};
};
/// Lock/pid file class. This is quite close to the pidfile_xxx /// Lock/pid file class. This is quite close to the pidfile_xxx
/// utilities in FreeBSD with a bit more encapsulation. I'd have used /// utilities in FreeBSD with a bit more encapsulation. I'd have used
/// the freebsd code if it was available elsewhere /// the freebsd code if it was available elsewhere
class Pidfile { class Pidfile {
public: public:
Pidfile(const std::string& path) : m_path(path), m_fd(-1) {} Pidfile(const std::string& path) : m_path(path), m_fd(-1) {}
~Pidfile(); ~Pidfile();
/// Open/create the pid file. /// Open/create the pid file.
/// @return 0 if ok, > 0 for pid of existing process, -1 for other error. /// @return 0 if ok, > 0 for pid of existing process, -1 for other error.
@ -200,7 +136,9 @@ public:
int close(); int close();
/// Delete the pid file /// Delete the pid file
int remove(); int remove();
const std::string& getreason() {return m_reason;} const std::string& getreason() {
return m_reason;
}
private: private:
std::string m_path; std::string m_path;
int m_fd; int m_fd;
@ -209,14 +147,4 @@ private:
int flopen(); int flopen();
}; };
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size, std::string& path);
// Must be called in main thread before starting other threads
extern void pathut_init_mt();
#endif /* _PATHUT_H_INCLUDED_ */ #endif /* _PATHUT_H_INCLUDED_ */

411
src/utils/rclutil.cpp Normal file
View file

@ -0,0 +1,411 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_RCLUTIL
#include "autoconfig.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "safefcntl.h"
#include "safeunistd.h"
#include "dirent.h"
#include "cstr.h"
#ifdef _WIN32
#include "safewindows.h"
#else
#include <sys/param.h>
#include <pwd.h>
#include <sys/file.h>
#endif
#include <math.h>
#include <errno.h>
#include <sys/types.h>
#include "safesysstat.h"
#include "ptmutex.h"
#include "rclutil.h"
#include "pathut.h"
#include "wipedir.h"
#include "transcode.h"
#include "md5ut.h"
using namespace std;
void map_ss_cp_noshr(const map<string, string> s, map<string, string> *d)
{
for (map<string, string>::const_iterator it = s.begin();
it != s.end(); it++) {
d->insert(
pair<string, string>(string(it->first.begin(), it->first.end()),
string(it->second.begin(), it->second.end())));
}
}
string path_defaultrecollconfsubdir()
{
#ifdef _WIN32
return "Recoll";
#else
return ".recoll";
#endif
}
// Location for sample config, filters, etc. (e.g. /usr/share/recoll/)
const string& path_pkgdatadir()
{
static string datadir;
if (datadir.empty()) {
#ifdef _WIN32
datadir = path_cat(path_thisexecpath(), "Share");
#else
const char *cdatadir = getenv("RECOLL_DATADIR");
if (cdatadir == 0) {
// If not in environment, use the compiled-in constant.
datadir = RECOLL_DATADIR;
} else {
datadir = cdatadir;
}
#endif
}
return datadir;
}
// Printable url: this is used to transcode from the system charset
// into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out)
{
int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7);
}
return true;
}
string url_gpathS(const string& url)
{
#ifdef _WIN32
string u = url_gpath(url);
string nu;
if (path_hasdrive(u)) {
nu.append(1, '/');
nu.append(1, u[0]);
if (path_isdriveabs(u)) {
nu.append(u.substr(2));
} else {
// This should be an error really
nu.append(1, '/');
nu.append(u.substr(2));
}
}
return nu;
#else
return url_gpath(url);
#endif
}
const string& tmplocation()
{
static string stmpdir;
if (stmpdir.empty()) {
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (tmpdir == 0) {
tmpdir = getenv("TMPDIR");
}
if (tmpdir == 0) {
tmpdir = getenv("TMP");
}
if (tmpdir == 0) {
tmpdir = getenv("TEMP");
}
if (tmpdir == 0) {
#ifdef _WIN32
TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)];
GetTempPath(MAX_PATH + 1, bufw);
stmpdir = path_tchartoutf8(bufw);
#else
stmpdir = "/tmp";
#endif
} else {
stmpdir = tmpdir;
}
stmpdir = path_canon(stmpdir);
}
return stmpdir;
}
bool maketmpdir(string& tdir, string& reason)
{
#ifndef _WIN32
tdir = path_cat(tmplocation(), "rcltmpXXXXXX");
char *cp = strdup(tdir.c_str());
if (!cp) {
reason = "maketmpdir: out of memory (for file name !)\n";
tdir.erase();
return false;
}
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#endif
if (!
#ifdef HAVE_MKDTEMP
mkdtemp(cp)
#else
mktemp(cp)
#endif // HAVE_MKDTEMP
) {
free(cp);
reason = "maketmpdir: mktemp failed for [" + tdir + "] : " +
strerror(errno);
tdir.erase();
return false;
}
tdir = cp;
free(cp);
#else // _WIN32
// There is a race condition between name computation and
// mkdir. try to make sure that we at least don't shoot ourselves
// in the foot
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
tdir = path_wingettempfilename(TEXT("rcltmp"));
#endif
// At this point the directory does not exist yet except if we used
// mkdtemp
#if !defined(HAVE_MKDTEMP) || defined(_WIN32)
if (mkdir(tdir.c_str(), 0700) < 0) {
reason = string("maketmpdir: mkdir ") + tdir + " failed";
tdir.erase();
return false;
}
#endif
return true;
}
TempFileInternal::TempFileInternal(const string& suffix)
: m_noremove(false)
{
// Because we need a specific suffix, can't use mkstemp
// well. There is a race condition between name computation and
// file creation. try to make sure that we at least don't shoot
// our own selves in the foot. maybe we'll use mkstemps one day.
static PTMutexInit mlock;
PTMutexLocker lock(mlock);
#ifndef _WIN32
string filename = path_cat(tmplocation(), "rcltmpfXXXXXX");
char *cp = strdup(filename.c_str());
if (!cp) {
m_reason = "Out of memory (for file name !)\n";
return;
}
// Using mkstemp this way is awful (bot the suffix adding and
// using mkstemp() instead of mktemp just to avoid the warnings)
int fd;
if ((fd = mkstemp(cp)) < 0) {
free(cp);
m_reason = "TempFileInternal: mkstemp failed\n";
return;
}
close(fd);
unlink(cp);
filename = cp;
free(cp);
#else
string filename = path_wingettempfilename(TEXT("recoll"));
#endif
m_filename = filename + suffix;
if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) {
m_reason = string("Could not open/create") + m_filename;
m_filename.erase();
}
}
TempFileInternal::~TempFileInternal()
{
if (!m_filename.empty() && !m_noremove) {
unlink(m_filename.c_str());
}
}
TempDir::TempDir()
{
if (!maketmpdir(m_dirname, m_reason)) {
m_dirname.erase();
return;
}
}
TempDir::~TempDir()
{
if (!m_dirname.empty()) {
(void)wipedir(m_dirname, true, true);
m_dirname.erase();
}
}
bool TempDir::wipe()
{
if (m_dirname.empty()) {
m_reason = "TempDir::wipe: no directory !\n";
return false;
}
if (wipedir(m_dirname, false, true)) {
m_reason = "TempDir::wipe: wipedir failed\n";
return false;
}
return true;
}
// Freedesktop standard paths for cache directory (thumbnails are now in there)
static const string& xdgcachedir()
{
static string xdgcache;
if (xdgcache.empty()) {
const char *cp = getenv("XDG_CACHE_HOME");
if (cp == 0) {
xdgcache = path_cat(path_home(), ".cache");
} else {
xdgcache = string(cp);
}
}
return xdgcache;
}
static const string& thumbnailsdir()
{
static string thumbnailsd;
if (thumbnailsd.empty()) {
thumbnailsd = path_cat(xdgcachedir(), "thumbnails");
if (access(thumbnailsd.c_str(), 0) != 0) {
thumbnailsd = path_cat(path_home(), ".thumbnails");
}
}
return thumbnailsd;
}
// Place for 256x256 files
static const string thmbdirlarge = "large";
// 128x128
static const string thmbdirnormal = "normal";
static void thumbname(const string& url, string& name)
{
string digest;
string l_url = url_encode(url);
MD5String(l_url, digest);
MD5HexPrint(digest, name);
name += ".png";
}
bool thumbPathForUrl(const string& url, int size, string& path)
{
string name;
thumbname(url, name);
if (size <= 128) {
path = path_cat(thumbnailsdir(), thmbdirnormal);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
}
path = path_cat(thumbnailsdir(), thmbdirlarge);
path = path_cat(path, name);
if (access(path.c_str(), R_OK) == 0) {
return true;
}
// File does not exist. Path corresponds to the large version at this point,
// fix it if needed.
if (size <= 128) {
path = path_cat(path_home(), thmbdirnormal);
path = path_cat(path, name);
}
return false;
}
void rclutil_init_mt()
{
path_pkgdatadir();
tmplocation();
thumbnailsdir();
}
#else // TEST_RCLUTIL
void path_to_thumb(const string& _input)
{
string input(_input);
// Make absolute path if needed
if (input[0] != '/') {
input = path_absolute(input);
}
input = string("file://") + path_canon(input);
string path;
//path = url_encode(input, 7);
thumbPathForUrl(input, 7, path);
cout << path << endl;
}
const char *thisprog;
int main(int argc, const char **argv)
{
thisprog = *argv++;
argc--;
string s;
vector<string>::const_iterator it;
#if 0
if (argc > 1) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
string input;
if (argc == 1) {
input = *argv++;
if (input.empty()) {
cerr << "Usage: thumbpath <filepath>" << endl;
exit(1);
}
path_to_thumb(input);
} else {
while (getline(cin, input)) {
path_to_thumb(input);
}
}
exit(0);
#endif
}
#endif // TEST_RCLUTIL

112
src/utils/rclutil.h Normal file
View file

@ -0,0 +1,112 @@
/* Copyright (C) 2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _RCLUTIL_H_INCLUDED_
#define _RCLUTIL_H_INCLUDED_
#include "autoconfig.h"
// Misc stuff not generic enough to get into smallut or pathut
#include <map>
#include <string>
#include MEMORY_INCLUDE
extern void rclutil_init_mt();
/// Sub-directory for default recoll config (e.g: .recoll)
extern std::string path_defaultrecollconfsubdir();
/// e.g. /usr/share/recoll. Depends on OS and config
extern const std::string& path_pkgdatadir();
/// Transcode to utf-8 if possible or url encoding, for display.
extern bool printableUrl(const std::string& fcharset,
const std::string& in, std::string& out);
/// Same but, in the case of a Windows local path, also turn "c:/" into
/// "/c/" This should be used only for splitting the path in rcldb.
extern std::string url_gpathS(const std::string& url);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp
extern const std::string& tmplocation();
/// Create temporary directory (inside the temp location)
extern bool maketmpdir(std::string& tdir, std::string& reason);
/// Temporary file class
class TempFileInternal {
public:
TempFileInternal(const std::string& suffix);
~TempFileInternal();
const char *filename() {
return m_filename.c_str();
}
const std::string& getreason() {
return m_reason;
}
void setnoremove(bool onoff) {
m_noremove = onoff;
}
bool ok() {
return !m_filename.empty();
}
private:
std::string m_filename;
std::string m_reason;
bool m_noremove;
};
typedef STD_SHARED_PTR<TempFileInternal> TempFile;
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
~TempDir();
const char *dirname() {
return m_dirname.c_str();
}
const std::string& getreason() {
return m_reason;
}
bool ok() {
return !m_dirname.empty();
}
/// Recursively delete contents but not self.
bool wipe();
private:
std::string m_dirname;
std::string m_reason;
TempDir(const TempDir&) {}
TempDir& operator=(const TempDir&) {
return *this;
};
};
// Freedesktop thumbnail standard path routine
// On return, path will have the appropriate value in all cases,
// returns true if the file already exists
extern bool thumbPathForUrl(const std::string& url, int size,
std::string& path);
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string, std::string> s,
std::map<std::string, std::string> *d);
#endif /* _RCLUTIL_H_INCLUDED_ */

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2004 J.F.Dockes /* Copyright (C) 2004-2016 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -17,226 +17,20 @@
#ifndef _SMALLUT_H_INCLUDED_ #ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_
#include <stdlib.h> #include <sys/types.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
#include <set> #include <set>
using std::string; // Miscellaneous mostly string-oriented small utilities
using std::vector; // Note that none of the following code knows about utf-8.
using std::map;
using std::set;
// Note these are all ascii routines // Call this before going multithread.
extern int stringicmp(const string& s1, const string& s2); void smallut_init_mt();
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const string& s1)
: m_s1(s1)
{}
bool operator()(const string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const string& m_s1;
};
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
extern void stringtolower(string& io);
extern string stringtolower(const string& io);
// Is one string the end part of the other ?
extern int stringisuffcmp(const string& s1, const string& s2);
// Divine language from locale
extern std::string localelang();
// Divine 8bit charset from language
extern std::string langtocode(const string& lang);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const string &cs1, const string &cs2);
// Parse date interval specifier into pair of y,m,d dates. The format
// for the time interval is based on a subset of iso 8601 with
// the addition of open intervals, and removal of all time indications.
// 'P' is the Period indicator, it's followed by a length in
// years/months/days (or any subset thereof)
// Dates: YYYY-MM-DD YYYY-MM YYYY
// Periods: P[nY][nM][nD] where n is an integer value.
// At least one of YMD must be specified
// The separator for the interval is /. Interval examples
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
// This returns a pair of y,m,d dates.
struct DateInterval {
int y1;int m1;int d1; int y2;int m2;int d2;
};
extern bool parsedateinterval(const string&s, DateInterval *di);
extern int monthdays(int mon, int year);
/**
* Parse input string into list of strings.
*
* Token delimiter is " \t\n" except inside dquotes. dquote inside
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as space tab etc.
* have the ascii values and can't appear as part of a multibyte char. utf-8 ok
* but so are the iso-8859-x and surely others. addseps do have to be
* single-bytes
*/
template <class T> bool stringToStrings(const string& s, T &tokens,
const string& addseps = "");
/**
* Inverse operation:
*/
template <class T> void stringsToString(const T &tokens, string &s);
template <class T> std::string stringsToString(const T &tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
*/
template <class T> void stringsToCSV(const T &tokens, string &s,
char sep = ',');
/**
* Split input string. No handling of quoting
*/
extern void stringToTokens(const string &s, vector<string> &tokens,
const string &delims = " \t", bool skipinit=true);
/** Convert string to boolean */
extern bool stringToBool(const string &s);
/** Remove instances of characters belonging to set (default {space,
tab}) at beginning and end of input string */
extern void trimstring(string &s, const char *ws = " \t");
/** Escape things like < or & by turning them into entities */
extern string escapeHtml(const string &in);
/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware
* so chars should only contain ascii */
extern string neutchars(const string &str, const string &chars);
extern void neutchars(const string &str, string& out, const string &chars);
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern string escapeShell(const string &str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern string truncate_to_word(const string &input, string::size_type maxlen);
/** Truncate in place in an utf8-legal way */
extern void utf8truncate(string &s, int maxlen);
void ulltodecstr(unsigned long long val, string& buf);
void lltodecstr(long long val, string& buf);
string lltodecstr(long long val);
string ulltodecstr(unsigned long long val);
/** Convert byte count into unit (KB/MB...) appropriate for display */
string displayableBytes(off_t size);
/** Break big string into lines */
string breakIntoLines(const string& in, unsigned int ll = 100,
unsigned int maxlines= 50);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const string& in, string& out, const map<char, string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const string& in, string& out, const map<string, string>& subs);
/** Append system error message */
void catstrerror(string *reason, const char *what, int _errno);
/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
* makes it inaccessible */
struct tm;
time_t portable_timegm(struct tm *tm);
/** Temp buffer with automatic deallocation */
struct TempBuf {
TempBuf()
: m_buf(0)
{}
TempBuf(int n)
{
m_buf = (char *)malloc(n);
}
~TempBuf()
{
if (m_buf)
free(m_buf);
}
char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); }
char *buf() {return m_buf;}
char *m_buf;
};
inline void leftzeropad(string& s, unsigned len)
{
if (s.length() && s.length() < len)
s = s.insert(0, len - s.length(), '0');
}
// Duplicate map<string,string> while ensuring no shared string data (to pass
// to other thread):
void map_ss_cp_noshr(const std::map<std::string,std::string> s,
std::map<std::string,std::string> *d);
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map
{
private:
std::map<T, U> m_map;
public:
create_map(const T& key, const U& val)
{
m_map[key] = val;
}
create_map<T, U>& operator()(const T& key, const U& val)
{
m_map[key] = val;
return *this;
}
operator std::map<T, U>()
{
return m_map;
}
};
template <typename T>
class create_vector
{
private:
std::vector<T> m_vector;
public:
create_vector(const T& val)
{
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val)
{
m_vector.push_back(val);
return *this;
}
operator std::vector<T>()
{
return m_vector;
}
};
#ifndef SMALLUT_DISABLE_MACROS
#ifndef MIN #ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B)) #define MIN(A,B) (((A)<(B)) ? (A) : (B))
#endif #endif
@ -246,7 +40,194 @@ public:
#ifndef deleteZ #ifndef deleteZ
#define deleteZ(X) {delete X;X = 0;} #define deleteZ(X) {delete X;X = 0;}
#endif #endif
#endif /* SMALLUT_DISABLE_MACROS */
void smallut_init_mt(); // Case-insensitive compare. ASCII ONLY !
extern int stringicmp(const std::string& s1, const std::string& s2);
// For find_if etc.
struct StringIcmpPred {
StringIcmpPred(const std::string& s1)
: m_s1(s1) {
}
bool operator()(const std::string& s2) {
return stringicmp(m_s1, s2) == 0;
}
const std::string& m_s1;
};
extern int stringlowercmp(const std::string& alreadylower,
const std::string& s2);
extern int stringuppercmp(const std::string& alreadyupper,
const std::string& s2);
extern void stringtolower(std::string& io);
extern std::string stringtolower(const std::string& io);
// Is one string the end part of the other ?
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
// Divine language from locale
extern std::string localelang();
// Divine 8bit charset from language
extern std::string langtocode(const std::string& lang);
// Compare charset names, removing the more common spelling variations
extern bool samecharset(const std::string& cs1, const std::string& cs2);
// Parse date interval specifier into pair of y,m,d dates. The format
// for the time interval is based on a subset of iso 8601 with
// the addition of open intervals, and removal of all time indications.
// 'P' is the Period indicator, it's followed by a length in
// years/months/days (or any subset thereof)
// Dates: YYYY-MM-DD YYYY-MM YYYY
// Periods: P[nY][nM][nD] where n is an integer value.
// At least one of YMD must be specified
// The separator for the interval is /. Interval examples
// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc.
// This returns a pair of y,m,d dates.
struct DateInterval {
int y1;
int m1;
int d1;
int y2;
int m2;
int d2;
};
extern bool parsedateinterval(const std::string& s, DateInterval *di);
extern int monthdays(int mon, int year);
/**
* Parse input string into list of strings.
*
* Token delimiter is " \t\n" except inside dquotes. dquote inside
* dquotes can be escaped with \ etc...
* Input is handled a byte at a time, things will work as long as
* space tab etc. have the ascii values and can't appear as part of a
* multibyte char. utf-8 ok but so are the iso-8859-x and surely
* others. addseps do have to be single-bytes
*/
template <class T> bool stringToStrings(const std::string& s, T& tokens,
const std::string& addseps = "");
/**
* Inverse operation:
*/
template <class T> void stringsToString(const T& tokens, std::string& s);
template <class T> std::string stringsToString(const T& tokens);
/**
* Strings to CSV string. tokens containing the separator are quoted (")
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
*/
template <class T> void stringsToCSV(const T& tokens, std::string& s,
char sep = ',');
/**
* Split input string. No handling of quoting
*/
extern void stringToTokens(const std::string& s,
std::vector<std::string>& tokens,
const std::string& delims = " \t",
bool skipinit = true);
/** Convert string to boolean */
extern bool stringToBool(const std::string& s);
/** Remove instances of characters belonging to set (default {space,
tab}) at beginning and end of input string */
extern void trimstring(std::string& s, const char *ws = " \t");
/** Escape things like < or & by turning them into entities */
extern std::string escapeHtml(const std::string& in);
/** Replace some chars with spaces (ie: newline chars). */
extern std::string neutchars(const std::string& str, const std::string& chars);
extern void neutchars(const std::string& str, std::string& out,
const std::string& chars);
/** Turn string into something that won't be expanded by a shell. In practise
* quote with double-quotes and escape $`\ */
extern std::string escapeShell(const std::string& str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern std::string truncate_to_word(const std::string& input,
std::string::size_type maxlen);
void ulltodecstr(unsigned long long val, std::string& buf);
void lltodecstr(long long val, std::string& buf);
std::string lltodecstr(long long val);
std::string ulltodecstr(unsigned long long val);
/** Convert byte count into unit (KB/MB...) appropriate for display */
std::string displayableBytes(off_t size);
/** Break big string into lines */
std::string breakIntoLines(const std::string& in, unsigned int ll = 100,
unsigned int maxlines = 50);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const std::string& in, std::string& out,
const std::map<char, std::string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(const std::string& in, std::string& out,
const std::map<std::string, std::string>& subs);
/** Append system error message */
void catstrerror(std::string *reason, const char *what, int _errno);
/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which
* makes it inaccessible */
struct tm;
time_t portable_timegm(struct tm *tm);
inline void leftzeropad(std::string& s, unsigned len)
{
if (s.length() && s.length() < len) {
s = s.insert(0, len - s.length(), '0');
}
}
// Code for static initialization of an stl map. Somewhat like Boost.assign.
// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c
// Example use: map<int, int> m = create_map<int, int> (1,2) (3,4) (5,6) (7,8);
template <typename T, typename U>
class create_map {
private:
std::map<T, U> m_map;
public:
create_map(const T& key, const U& val) {
m_map[key] = val;
}
create_map<T, U>& operator()(const T& key, const U& val) {
m_map[key] = val;
return *this;
}
operator std::map<T, U>() {
return m_map;
}
};
template <typename T>
class create_vector {
private:
std::vector<T> m_vector;
public:
create_vector(const T& val) {
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val) {
m_vector.push_back(val);
return *this;
}
operator std::vector<T>() {
return m_vector;
}
};
#endif /* _SMALLUT_H_INCLUDED_ */ #endif /* _SMALLUT_H_INCLUDED_ */