diff --git a/src/common/autoconfig.h.in b/src/common/autoconfig.h.in index ab081ace..27feb403 100644 --- a/src/common/autoconfig.h.in +++ b/src/common/autoconfig.h.in @@ -15,6 +15,11 @@ /* Path to the file program */ #undef FILE_PROG +/* Define to 1 if you have the header file. */ +#undef HAVE_SPAWN_H +#undef HAVE_POSIX_SPAWN +#undef USE_POSIX_SPAWN + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H diff --git a/src/common/config.h b/src/common/config.h deleted file mode 120000 index 6eb68934..00000000 --- a/src/common/config.h +++ /dev/null @@ -1 +0,0 @@ -autoconfig.h \ No newline at end of file diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index 0dff013e..a3963987 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -149,17 +149,18 @@ RclConfig *recollinit(RclInitFlags flags, ExecCmd::useVfork(true); #else // Keep threads init behind log init, but make sure it's done before - // we do the vfork choice ! + // we do the vfork choice ! The latter is not used any more actually, + // we always use vfork except if forbidden by config. config->initThrConf(); - bool intern_noThr = config->getThrConf(RclConfig::ThrIntern).first == -1; - bool split_noThr = config->getThrConf(RclConfig::ThrSplit).first == -1; - bool write_noThr = config->getThrConf(RclConfig::ThrDbWrite).first == -1; - if (intern_noThr && split_noThr && write_noThr) { - LOGDEB0(("rclinit: single-threaded execution: use vfork\n")); - ExecCmd::useVfork(true); + + bool novfork; + config->getConfParam("novfork", &novfork); + if (novfork) { + LOGDEB0(("rclinit: will use fork() for starting commands\n")); + ExecCmd::useVfork(false); } else { - LOGDEB0(("rclinit: multi-threaded execution: do not use vfork\n")); - ExecCmd::useVfork(false); + LOGDEB0(("rclinit: will use vfork() for starting commands\n")); + ExecCmd::useVfork(true); } #endif diff --git a/src/configure.ac b/src/configure.ac index 09f1a210..d4ee1812 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -32,7 +32,19 @@ fi AC_SYS_LARGEFILE # OpenBSD needs sys/param.h for mount.h to compile -AC_CHECK_HEADERS([sys/param.h]) +AC_CHECK_HEADERS([sys/param.h, spawn.h]) + +AC_CHECK_FUNCS([posix_spawn]) + +if test "x$ac_cv_func_posix_spawn" = xyes; then : + AC_ARG_ENABLE(posix_spawn, + AC_HELP_STRING([--enable-posix_spawn], + [Enable the use of posix_spawn().]), + posixSpawnEnabled=$enableval, posixSpawnEnabled=no) +fi +if test X$posixSpawnEnabled = Xyes ; then + AC_DEFINE(USE_POSIX_SPAWN, 1, [Use posix_spawn()]) +fi # Check for where to find unordered_map etc. AC_LANG_PUSH([C++]) diff --git a/src/utils/Makefile b/src/utils/Makefile index fa997667..e26601cf 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -6,7 +6,7 @@ include $(depth)/mk/sysconf # are part of the official distrib anyway LIBRECOLL = ../lib/librecoll.a -PROGS = pxattr trecrontab \ +PROGS = pxattr trclosefrom trecrontab \ trnetcon trcopyfile trcircache trmd5 trreadfile trfileudi \ trconftree wipedir smallut trfstreewalk trpathut transcode trbase64 \ trmimeparse trexecmd utf8iter idfile @@ -27,6 +27,14 @@ trecrontab.o : ecrontab.cpp ecrontab.h $(CXX) -o trecrontab.o -c $(ALL_CXXFLAGS) \ -DTEST_ECRONTAB ecrontab.cpp +CLOSEFROM_OBJS= trclosefrom.o +trclosefrom : $(CLOSEFROM_OBJS) + $(CXX) -o trclosefrom $(CLOSEFROM_OBJS) \ + $(LIBRECOLL) $(LIBICONV) $(LIBSYS) +trclosefrom.o : closefrom.cpp closefrom.h + $(CXX) -o trclosefrom.o -c $(ALL_CXXFLAGS) \ + -DTEST_CLOSEFROM closefrom.cpp + FSTREEWALK_OBJS= trfstreewalk.o trfstreewalk : $(FSTREEWALK_OBJS) $(CXX) -o trfstreewalk $(FSTREEWALK_OBJS) \ diff --git a/src/utils/closefrom.cpp b/src/utils/closefrom.cpp index 2aba4cb2..3a975ae7 100644 --- a/src/utils/closefrom.cpp +++ b/src/utils/closefrom.cpp @@ -50,7 +50,18 @@ * descriptors. Only descriptors 0, 1, 2 are shown except if * fdescfs is mounted which it is not by default * + * Solaris: + * - Solaris 10+ has closefrom, and can specify closefrom to posix_spawn() + * + * Linux: + * - Has nothing. The method we used (opening /dev/fd) was very + * unsafe in multithread fork/exec context. We now use a close() + * loop. glibc maintainers think that closefrom() is a bad idea + * *especially* because it is implemented on *BSD and Solaris. Go + * figure...: https://sourceware.org/bugzilla/show_bug.cgi?id=10353 + * * Interface: + * * int libclf_closefrom(fd) * @param fd All open file descriptors with equal or higher numeric * values will be closed. fd needs not be a valid descriptor. @@ -62,8 +73,12 @@ #include #include #include +#include +#include -/* #define DEBUG_CLOSEFROM*/ +#include "closefrom.h" + +/* #define DEBUG_CLOSEFROM */ #ifdef DEBUG_CLOSEFROM #define DPRINT(X) fprintf X #else @@ -110,7 +125,12 @@ int libclf_closefrom(int fd0) } /*************************************************************************/ -#elif (defined(linux) || defined(__linux)) +#elif 0 && (defined(linux) || defined(__linux)) + +/* We don't do this on linux anymore because opendir() may call + malloc which is unsafe in the [fork-exec] interval for a + multithreaded program. Linux does not have a good solution for + implementing closefrom as far as I know */ /* Use /proc/self/fd directory */ #include @@ -142,11 +162,16 @@ int libclf_closefrom(int fd0) /*************************************************************************/ #else -/* System has no native support for this functionality whatsoever. + +/* System has no native support for this functionality. * * Close all descriptors up to compiled/configured maximum. * The caller will usually have an idea of a reasonable maximum, else * we retrieve a value from the system. + * + * Note that there is actually no real guarantee that no open + * descriptor higher than the reported limit can exist, as noted by + * the Solaris man page for closefrom() */ static int closefrom_maxfd = -1; @@ -156,21 +181,18 @@ void libclf_setmaxfd(int max) closefrom_maxfd = max; } -#ifdef sun #include + +#ifndef OPEN_MAX +#define OPEN_MAX 1024 #endif + int libclf_closefrom(int fd0) { int i, maxfd = closefrom_maxfd; if (maxfd < 0) { -#ifdef _SC_OPEN_MAX - maxfd = sysconf(_SC_OPEN_MAX); - DPRINT((stderr, "Maxfd is %d after sysconf()\n", maxfd)); -#else - maxfd = getdtablesize(); - DPRINT((stderr, "Maxfd is %d after getdtablesize()\n", maxfd)); -#endif + maxfd = libclf_maxfd(); } if (maxfd < 0) maxfd = OPEN_MAX; @@ -184,6 +206,12 @@ int libclf_closefrom(int fd0) } #endif +int libclf_maxfd(int) +{ + struct rlimit lim; + getrlimit(RLIMIT_NOFILE, &lim); + return int(lim.rlim_cur); +} #else /* TEST_CLOSEFROM */ diff --git a/src/utils/closefrom.h b/src/utils/closefrom.h index c74c00ba..59a7ef4c 100644 --- a/src/utils/closefrom.h +++ b/src/utils/closefrom.h @@ -20,4 +20,8 @@ /* Close all descriptors >= fd */ extern int libclf_closefrom(int fd); +/* Retrieve max open fd. This might be the actual max open one (not + thread-safe) or a system max value. */ +extern int libclf_maxfd(int flags=0); + #endif /* _closefrom_h_included_ */ diff --git a/src/utils/execmd.cpp b/src/utils/execmd.cpp index f8ef4654..32da0ee5 100644 --- a/src/utils/execmd.cpp +++ b/src/utils/execmd.cpp @@ -15,7 +15,11 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef TEST_EXECMD +#ifdef RECOLL_DATADIR +#include "autoconfig.h" +#else #include "config.h" +#endif #include #include @@ -31,6 +35,16 @@ #include #include +#ifdef HAVE_SPAWN_H +#ifndef __USE_GNU +#define __USE_GNU +#define undef__USE_GNU +#endif +#include +#ifdef undef__USE_GNU +#undef __USE_GNU +#endif +#endif #include "execmd.h" @@ -150,7 +164,18 @@ bool ExecCmd::which(const string& cmd, string& exepath, const char* path) return false; } -void ExecCmd::putenv(const string &ea) +void ExecCmd::useVfork(bool on) +{ + // Just in case: there are competent people who believe that the + // dynamic linker can sometimes deadlock if execve() is resolved + // inside the vfork/exec window. Make sure it's done now. If "/" is + // an executable file, we have a problem. + const char *argv[] = {"/", 0}; + execve("/", (char *const *)argv, environ); + o_useVfork = on; +} + +void ExecCmd::putenv(const string &ea) { m_env.push_back(ea); } @@ -256,10 +281,17 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv, } // Restore SIGTERM to default. Really, signal handling should be - // specified when creating the execmd. Help Recoll get rid of its - // filter children though. To be fixed one day... Not sure that - // all of this is needed. But an ignored sigterm and the masks are - // normally inherited. + // specified when creating the execmd, there might be other + // signals to reset. Resetting SIGTERM helps Recoll get rid of its + // filter children for now though. To be fixed one day... + // Note that resetting to SIG_DFL is a portable use of + // signal(). No need for sigaction() here. + + // There is supposedely a risk of problems if another thread was + // calling a signal-affecting function when vfork was called. This + // seems acceptable though as no self-respecting thread is going + // to mess with the global process signal disposition. + if (signal(SIGTERM, SIG_DFL) == SIG_ERR) { //LOGERR(("ExecCmd::DOCHILD: signal() failed, errno %d\n", errno)); } @@ -307,9 +339,9 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv, libclf_closefrom(3); execve(cmd.c_str(), (char *const*)argv, (char *const*)envv); - // Hu ho. This should never happened as we checked the existence of the - // executable before calling dochild... Until we did this, this was - // the chief cause of LOG mutex deadlock + // Hu ho. This should never have happened as we checked the + // existence of the executable before calling dochild... Until we + // did this check, this was the chief cause of LOG mutex deadlock LOGERR(("ExecCmd::DOCHILD: execve(%s) failed. errno %d\n", cmd.c_str(), errno)); _exit(127); @@ -392,11 +424,85 @@ int ExecCmd::startExec(const string &cmd, const vector& args, free(envv); return -1; } -//////////////////////////////// +//////////////////////////////// End vfork child prepare section. +#if HAVE_POSIX_SPAWN && USE_POSIX_SPAWN + { + posix_spawnattr_t attrs; + posix_spawnattr_init (&attrs); + short flags; + posix_spawnattr_getflags(&attrs, &flags); + + flags |= POSIX_SPAWN_USEVFORK; + + posix_spawnattr_setpgroup(&attrs, 0); + flags |= POSIX_SPAWN_SETPGROUP; + + sigset_t sset; + sigemptyset(&sset); + posix_spawnattr_setsigmask (&attrs, &sset); + flags |= POSIX_SPAWN_SETSIGMASK; + + sigemptyset(&sset); + sigaddset(&sset, SIGTERM); + posix_spawnattr_setsigdefault(&attrs, &sset); + flags |= POSIX_SPAWN_SETSIGDEF; + + posix_spawnattr_setflags(&attrs, flags); + + posix_spawn_file_actions_t facts; + posix_spawn_file_actions_init(&facts); + + if (has_input) { + posix_spawn_file_actions_addclose(&facts, m_pipein[1]); + if (m_pipein[0] != 0) { + posix_spawn_file_actions_adddup2(&facts, m_pipein[0], 0); + posix_spawn_file_actions_addclose(&facts, m_pipein[0]); + } + } + if (has_output) { + posix_spawn_file_actions_addclose(&facts, m_pipeout[0]); + if (m_pipeout[1] != 1) { + posix_spawn_file_actions_adddup2(&facts, m_pipeout[1], 1); + posix_spawn_file_actions_addclose(&facts, m_pipeout[1]); + } + } + + // Do we need to redirect stderr ? + if (!m_stderrFile.empty()) { + int oflags = O_WRONLY|O_CREAT; +#ifdef O_APPEND + oflags |= O_APPEND; +#endif + posix_spawn_file_actions_addopen(&facts, 2, m_stderrFile.c_str(), + oflags, 0600); + } + LOGDEB1(("using SPAWN\n")); + + // posix_spawn() does not have any standard way to ask for + // calling closefrom(). Afaik there is a solaris extension for this, + // but let's just add all fds + for (int i = 3; i < libclf_maxfd(); i++) { + posix_spawn_file_actions_addclose(&facts, i); + } + + int ret = posix_spawn(&m_pid, exe.c_str(), &facts, &attrs, + (char *const *)argv, (char *const *)envv); + posix_spawnattr_destroy(&attrs); + posix_spawn_file_actions_destroy(&facts); + if (ret) { + LOGERR(("ExecCmd::startExec: posix_spawn() failed. errno %d\n", + ret)); + return -1; + } + } + +#else if (o_useVfork) { + LOGDEB1(("using VFORK\n")); m_pid = vfork(); } else { + LOGDEB1(("using FORK\n")); m_pid = fork(); } if (m_pid < 0) { @@ -411,6 +517,7 @@ int ExecCmd::startExec(const string &cmd, const vector& args, // dochild does not return. Just in case... _exit(1); } +#endif // Father process diff --git a/src/utils/execmd.h b/src/utils/execmd.h index 4bcec778..25c3c307 100644 --- a/src/utils/execmd.h +++ b/src/utils/execmd.h @@ -70,12 +70,10 @@ class ExecCmdProvide { */ class ExecCmd { public: - // Use vfork instead of fork. This must not be called in a multithreaded - // program. - static void useVfork(bool on) - { - o_useVfork = on; - } + // Use vfork instead of fork. Our vfork usage is multithread-compatible as + // far as I can see, but just in case... + static void useVfork(bool on); + /** * Add/replace environment variable before executing command. This must * be called before doexec() to have an effect (possibly multiple diff --git a/website/release-history.html b/website/release-history.html index 7f2ea375..0be5b067 100644 --- a/website/release-history.html +++ b/website/release-history.html @@ -36,6 +36,9 @@
  • Bison-based query parser replaces old regexp-based one and allows parenthized sub-expressions and easier future expansions.
  • +
  • Avoid retrying to index previously + indexed files if nothing seems to have + changed in the filters.
  • Release 1.20: small