Implemented support for posix_spawn() and (main change): always use vfork() for starting external commands

This commit is contained in:
Jean-Francois Dockes 2015-05-27 16:24:18 +02:00
parent 1de1b64c02
commit 5916f1a0e0
10 changed files with 203 additions and 38 deletions

View file

@ -15,6 +15,11 @@
/* Path to the file program */ /* Path to the file program */
#undef FILE_PROG #undef FILE_PROG
/* Define to 1 if you have the <spawn.h> header file. */
#undef HAVE_SPAWN_H
#undef HAVE_POSIX_SPAWN
#undef USE_POSIX_SPAWN
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H #undef HAVE_INTTYPES_H

View file

@ -1 +0,0 @@
autoconfig.h

View file

@ -149,17 +149,18 @@ RclConfig *recollinit(RclInitFlags flags,
ExecCmd::useVfork(true); ExecCmd::useVfork(true);
#else #else
// Keep threads init behind log init, but make sure it's done before // Keep threads init behind log init, but make sure it's done before
// we do the vfork choice ! // we do the vfork choice ! The latter is not used any more actually,
// we always use vfork except if forbidden by config.
config->initThrConf(); config->initThrConf();
bool intern_noThr = config->getThrConf(RclConfig::ThrIntern).first == -1;
bool split_noThr = config->getThrConf(RclConfig::ThrSplit).first == -1; bool novfork;
bool write_noThr = config->getThrConf(RclConfig::ThrDbWrite).first == -1; config->getConfParam("novfork", &novfork);
if (intern_noThr && split_noThr && write_noThr) { if (novfork) {
LOGDEB0(("rclinit: single-threaded execution: use vfork\n")); LOGDEB0(("rclinit: will use fork() for starting commands\n"));
ExecCmd::useVfork(true);
} else {
LOGDEB0(("rclinit: multi-threaded execution: do not use vfork\n"));
ExecCmd::useVfork(false); ExecCmd::useVfork(false);
} else {
LOGDEB0(("rclinit: will use vfork() for starting commands\n"));
ExecCmd::useVfork(true);
} }
#endif #endif

View file

@ -32,7 +32,19 @@ fi
AC_SYS_LARGEFILE AC_SYS_LARGEFILE
# OpenBSD needs sys/param.h for mount.h to compile # OpenBSD needs sys/param.h for mount.h to compile
AC_CHECK_HEADERS([sys/param.h]) AC_CHECK_HEADERS([sys/param.h, spawn.h])
AC_CHECK_FUNCS([posix_spawn])
if test "x$ac_cv_func_posix_spawn" = xyes; then :
AC_ARG_ENABLE(posix_spawn,
AC_HELP_STRING([--enable-posix_spawn],
[Enable the use of posix_spawn().]),
posixSpawnEnabled=$enableval, posixSpawnEnabled=no)
fi
if test X$posixSpawnEnabled = Xyes ; then
AC_DEFINE(USE_POSIX_SPAWN, 1, [Use posix_spawn()])
fi
# Check for where to find unordered_map etc. # Check for where to find unordered_map etc.
AC_LANG_PUSH([C++]) AC_LANG_PUSH([C++])

View file

@ -6,7 +6,7 @@ include $(depth)/mk/sysconf
# are part of the official distrib anyway # are part of the official distrib anyway
LIBRECOLL = ../lib/librecoll.a LIBRECOLL = ../lib/librecoll.a
PROGS = pxattr trecrontab \ PROGS = pxattr trclosefrom trecrontab \
trnetcon trcopyfile trcircache trmd5 trreadfile trfileudi \ trnetcon trcopyfile trcircache trmd5 trreadfile trfileudi \
trconftree wipedir smallut trfstreewalk trpathut transcode trbase64 \ trconftree wipedir smallut trfstreewalk trpathut transcode trbase64 \
trmimeparse trexecmd utf8iter idfile trmimeparse trexecmd utf8iter idfile
@ -27,6 +27,14 @@ trecrontab.o : ecrontab.cpp ecrontab.h
$(CXX) -o trecrontab.o -c $(ALL_CXXFLAGS) \ $(CXX) -o trecrontab.o -c $(ALL_CXXFLAGS) \
-DTEST_ECRONTAB ecrontab.cpp -DTEST_ECRONTAB ecrontab.cpp
CLOSEFROM_OBJS= trclosefrom.o
trclosefrom : $(CLOSEFROM_OBJS)
$(CXX) -o trclosefrom $(CLOSEFROM_OBJS) \
$(LIBRECOLL) $(LIBICONV) $(LIBSYS)
trclosefrom.o : closefrom.cpp closefrom.h
$(CXX) -o trclosefrom.o -c $(ALL_CXXFLAGS) \
-DTEST_CLOSEFROM closefrom.cpp
FSTREEWALK_OBJS= trfstreewalk.o FSTREEWALK_OBJS= trfstreewalk.o
trfstreewalk : $(FSTREEWALK_OBJS) trfstreewalk : $(FSTREEWALK_OBJS)
$(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) \ $(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) \

View file

@ -50,7 +50,18 @@
* descriptors. Only descriptors 0, 1, 2 are shown except if * descriptors. Only descriptors 0, 1, 2 are shown except if
* fdescfs is mounted which it is not by default * fdescfs is mounted which it is not by default
* *
* Solaris:
* - Solaris 10+ has closefrom, and can specify closefrom to posix_spawn()
*
* Linux:
* - Has nothing. The method we used (opening /dev/fd) was very
* unsafe in multithread fork/exec context. We now use a close()
* loop. glibc maintainers think that closefrom() is a bad idea
* *especially* because it is implemented on *BSD and Solaris. Go
* figure...: https://sourceware.org/bugzilla/show_bug.cgi?id=10353
*
* Interface: * Interface:
*
* int libclf_closefrom(fd) * int libclf_closefrom(fd)
* @param fd All open file descriptors with equal or higher numeric * @param fd All open file descriptors with equal or higher numeric
* values will be closed. fd needs not be a valid descriptor. * values will be closed. fd needs not be a valid descriptor.
@ -62,6 +73,10 @@
#include <fcntl.h> #include <fcntl.h>
#include <string.h> #include <string.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "closefrom.h"
/* #define DEBUG_CLOSEFROM */ /* #define DEBUG_CLOSEFROM */
#ifdef DEBUG_CLOSEFROM #ifdef DEBUG_CLOSEFROM
@ -110,7 +125,12 @@ int libclf_closefrom(int fd0)
} }
/*************************************************************************/ /*************************************************************************/
#elif (defined(linux) || defined(__linux)) #elif 0 && (defined(linux) || defined(__linux))
/* We don't do this on linux anymore because opendir() may call
malloc which is unsafe in the [fork-exec] interval for a
multithreaded program. Linux does not have a good solution for
implementing closefrom as far as I know */
/* Use /proc/self/fd directory */ /* Use /proc/self/fd directory */
#include <sys/types.h> #include <sys/types.h>
@ -142,11 +162,16 @@ int libclf_closefrom(int fd0)
/*************************************************************************/ /*************************************************************************/
#else #else
/* System has no native support for this functionality whatsoever.
/* System has no native support for this functionality.
* *
* Close all descriptors up to compiled/configured maximum. * Close all descriptors up to compiled/configured maximum.
* The caller will usually have an idea of a reasonable maximum, else * The caller will usually have an idea of a reasonable maximum, else
* we retrieve a value from the system. * we retrieve a value from the system.
*
* Note that there is actually no real guarantee that no open
* descriptor higher than the reported limit can exist, as noted by
* the Solaris man page for closefrom()
*/ */
static int closefrom_maxfd = -1; static int closefrom_maxfd = -1;
@ -156,21 +181,18 @@ void libclf_setmaxfd(int max)
closefrom_maxfd = max; closefrom_maxfd = max;
} }
#ifdef sun
#include <limits.h> #include <limits.h>
#ifndef OPEN_MAX
#define OPEN_MAX 1024
#endif #endif
int libclf_closefrom(int fd0) int libclf_closefrom(int fd0)
{ {
int i, maxfd = closefrom_maxfd; int i, maxfd = closefrom_maxfd;
if (maxfd < 0) { if (maxfd < 0) {
#ifdef _SC_OPEN_MAX maxfd = libclf_maxfd();
maxfd = sysconf(_SC_OPEN_MAX);
DPRINT((stderr, "Maxfd is %d after sysconf()\n", maxfd));
#else
maxfd = getdtablesize();
DPRINT((stderr, "Maxfd is %d after getdtablesize()\n", maxfd));
#endif
} }
if (maxfd < 0) if (maxfd < 0)
maxfd = OPEN_MAX; maxfd = OPEN_MAX;
@ -184,6 +206,12 @@ int libclf_closefrom(int fd0)
} }
#endif #endif
int libclf_maxfd(int)
{
struct rlimit lim;
getrlimit(RLIMIT_NOFILE, &lim);
return int(lim.rlim_cur);
}
#else /* TEST_CLOSEFROM */ #else /* TEST_CLOSEFROM */

View file

@ -20,4 +20,8 @@
/* Close all descriptors >= fd */ /* Close all descriptors >= fd */
extern int libclf_closefrom(int fd); extern int libclf_closefrom(int fd);
/* Retrieve max open fd. This might be the actual max open one (not
thread-safe) or a system max value. */
extern int libclf_maxfd(int flags=0);
#endif /* _closefrom_h_included_ */ #endif /* _closefrom_h_included_ */

View file

@ -15,7 +15,11 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/ */
#ifndef TEST_EXECMD #ifndef TEST_EXECMD
#ifdef RECOLL_DATADIR
#include "autoconfig.h"
#else
#include "config.h" #include "config.h"
#endif
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -31,6 +35,16 @@
#include <vector> #include <vector>
#include <string> #include <string>
#ifdef HAVE_SPAWN_H
#ifndef __USE_GNU
#define __USE_GNU
#define undef__USE_GNU
#endif
#include <spawn.h>
#ifdef undef__USE_GNU
#undef __USE_GNU
#endif
#endif
#include "execmd.h" #include "execmd.h"
@ -150,6 +164,17 @@ bool ExecCmd::which(const string& cmd, string& exepath, const char* path)
return false; return false;
} }
void ExecCmd::useVfork(bool on)
{
// Just in case: there are competent people who believe that the
// dynamic linker can sometimes deadlock if execve() is resolved
// inside the vfork/exec window. Make sure it's done now. If "/" is
// an executable file, we have a problem.
const char *argv[] = {"/", 0};
execve("/", (char *const *)argv, environ);
o_useVfork = on;
}
void ExecCmd::putenv(const string &ea) void ExecCmd::putenv(const string &ea)
{ {
m_env.push_back(ea); m_env.push_back(ea);
@ -256,10 +281,17 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
} }
// Restore SIGTERM to default. Really, signal handling should be // Restore SIGTERM to default. Really, signal handling should be
// specified when creating the execmd. Help Recoll get rid of its // specified when creating the execmd, there might be other
// filter children though. To be fixed one day... Not sure that // signals to reset. Resetting SIGTERM helps Recoll get rid of its
// all of this is needed. But an ignored sigterm and the masks are // filter children for now though. To be fixed one day...
// normally inherited. // Note that resetting to SIG_DFL is a portable use of
// signal(). No need for sigaction() here.
// There is supposedely a risk of problems if another thread was
// calling a signal-affecting function when vfork was called. This
// seems acceptable though as no self-respecting thread is going
// to mess with the global process signal disposition.
if (signal(SIGTERM, SIG_DFL) == SIG_ERR) { if (signal(SIGTERM, SIG_DFL) == SIG_ERR) {
//LOGERR(("ExecCmd::DOCHILD: signal() failed, errno %d\n", errno)); //LOGERR(("ExecCmd::DOCHILD: signal() failed, errno %d\n", errno));
} }
@ -307,9 +339,9 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
libclf_closefrom(3); libclf_closefrom(3);
execve(cmd.c_str(), (char *const*)argv, (char *const*)envv); execve(cmd.c_str(), (char *const*)argv, (char *const*)envv);
// Hu ho. This should never happened as we checked the existence of the // Hu ho. This should never have happened as we checked the
// executable before calling dochild... Until we did this, this was // existence of the executable before calling dochild... Until we
// the chief cause of LOG mutex deadlock // did this check, this was the chief cause of LOG mutex deadlock
LOGERR(("ExecCmd::DOCHILD: execve(%s) failed. errno %d\n", cmd.c_str(), LOGERR(("ExecCmd::DOCHILD: execve(%s) failed. errno %d\n", cmd.c_str(),
errno)); errno));
_exit(127); _exit(127);
@ -392,11 +424,85 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
free(envv); free(envv);
return -1; return -1;
} }
//////////////////////////////// //////////////////////////////// End vfork child prepare section.
#if HAVE_POSIX_SPAWN && USE_POSIX_SPAWN
{
posix_spawnattr_t attrs;
posix_spawnattr_init (&attrs);
short flags;
posix_spawnattr_getflags(&attrs, &flags);
flags |= POSIX_SPAWN_USEVFORK;
posix_spawnattr_setpgroup(&attrs, 0);
flags |= POSIX_SPAWN_SETPGROUP;
sigset_t sset;
sigemptyset(&sset);
posix_spawnattr_setsigmask (&attrs, &sset);
flags |= POSIX_SPAWN_SETSIGMASK;
sigemptyset(&sset);
sigaddset(&sset, SIGTERM);
posix_spawnattr_setsigdefault(&attrs, &sset);
flags |= POSIX_SPAWN_SETSIGDEF;
posix_spawnattr_setflags(&attrs, flags);
posix_spawn_file_actions_t facts;
posix_spawn_file_actions_init(&facts);
if (has_input) {
posix_spawn_file_actions_addclose(&facts, m_pipein[1]);
if (m_pipein[0] != 0) {
posix_spawn_file_actions_adddup2(&facts, m_pipein[0], 0);
posix_spawn_file_actions_addclose(&facts, m_pipein[0]);
}
}
if (has_output) {
posix_spawn_file_actions_addclose(&facts, m_pipeout[0]);
if (m_pipeout[1] != 1) {
posix_spawn_file_actions_adddup2(&facts, m_pipeout[1], 1);
posix_spawn_file_actions_addclose(&facts, m_pipeout[1]);
}
}
// Do we need to redirect stderr ?
if (!m_stderrFile.empty()) {
int oflags = O_WRONLY|O_CREAT;
#ifdef O_APPEND
oflags |= O_APPEND;
#endif
posix_spawn_file_actions_addopen(&facts, 2, m_stderrFile.c_str(),
oflags, 0600);
}
LOGDEB1(("using SPAWN\n"));
// posix_spawn() does not have any standard way to ask for
// calling closefrom(). Afaik there is a solaris extension for this,
// but let's just add all fds
for (int i = 3; i < libclf_maxfd(); i++) {
posix_spawn_file_actions_addclose(&facts, i);
}
int ret = posix_spawn(&m_pid, exe.c_str(), &facts, &attrs,
(char *const *)argv, (char *const *)envv);
posix_spawnattr_destroy(&attrs);
posix_spawn_file_actions_destroy(&facts);
if (ret) {
LOGERR(("ExecCmd::startExec: posix_spawn() failed. errno %d\n",
ret));
return -1;
}
}
#else
if (o_useVfork) { if (o_useVfork) {
LOGDEB1(("using VFORK\n"));
m_pid = vfork(); m_pid = vfork();
} else { } else {
LOGDEB1(("using FORK\n"));
m_pid = fork(); m_pid = fork();
} }
if (m_pid < 0) { if (m_pid < 0) {
@ -411,6 +517,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// dochild does not return. Just in case... // dochild does not return. Just in case...
_exit(1); _exit(1);
} }
#endif
// Father process // Father process

View file

@ -70,12 +70,10 @@ class ExecCmdProvide {
*/ */
class ExecCmd { class ExecCmd {
public: public:
// Use vfork instead of fork. This must not be called in a multithreaded // Use vfork instead of fork. Our vfork usage is multithread-compatible as
// program. // far as I can see, but just in case...
static void useVfork(bool on) static void useVfork(bool on);
{
o_useVfork = on;
}
/** /**
* Add/replace environment variable before executing command. This must * Add/replace environment variable before executing command. This must
* be called before doexec() to have an effect (possibly multiple * be called before doexec() to have an effect (possibly multiple

View file

@ -36,6 +36,9 @@
<li>Bison-based query parser replaces old regexp-based <li>Bison-based query parser replaces old regexp-based
one and allows parenthized sub-expressions and easier one and allows parenthized sub-expressions and easier
future expansions.</li> future expansions.</li>
<li>Avoid retrying to index previously
indexed files if nothing seems to have
changed in the filters.</li>
</ul> </ul>
</dd> </dd>
<dt><a href="release-1.20.html">Release 1.20</a>: small <dt><a href="release-1.20.html">Release 1.20</a>: small