Impose memory usage limit on external filters. Fixes issue #259
This commit is contained in:
parent
62a63e8f64
commit
ecd15062d9
8 changed files with 90 additions and 13 deletions
|
@ -20,6 +20,9 @@
|
|||
#undef HAVE_POSIX_SPAWN
|
||||
#undef USE_POSIX_SPAWN
|
||||
|
||||
/* Define to 1 if you have the setrlimit() call. */
|
||||
#undef HAVE_SETRLIMIT
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ AC_SYS_LARGEFILE
|
|||
# OpenBSD needs sys/param.h for mount.h to compile
|
||||
AC_CHECK_HEADERS([sys/param.h, spawn.h])
|
||||
|
||||
AC_CHECK_FUNCS([posix_spawn])
|
||||
AC_CHECK_FUNCS([posix_spawn, setrlimit])
|
||||
|
||||
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
||||
AC_ARG_ENABLE(posix_spawn,
|
||||
|
|
|
@ -6022,18 +6022,29 @@ mondelaypatterns = *.log:20 "this one has spaces*:10"
|
|||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>monioniceclass, monioniceclassdata
|
||||
</varname></term><listitem><para>These allow defining the
|
||||
<application>ionice</application> class and data used by the
|
||||
indexer (default class 3, no data).</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry><term><varname>monioniceclass, monioniceclassdata
|
||||
</varname></term><listitem><para>These allow defining the
|
||||
<application>ionice</application> class and data used by the
|
||||
indexer (default class 3, no data).</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>filtermaxseconds</varname></term>
|
||||
<listitem><para>Maximum handler execution time, after which it
|
||||
is aborted. Some postscript programs just loop...</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>filtermaxmbytes</varname></term>
|
||||
<listitem><para>&RCL; 1.20.7 and later. Maximum handler memory
|
||||
utilisation. This uses setrlimit(RLIMIT_AS) on most systems
|
||||
(total virtual memory space size limit). Some programs may start
|
||||
with 500 MBytes of mapped shared libraries, so take this into
|
||||
account when choosing a value. The default is a liberal
|
||||
2000MB.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>filtermaxseconds</varname></term>
|
||||
<listitem><para>Maximum handler execution time, after which it
|
||||
is aborted. Some postscript programs just loop...</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry><term><varname>filtersdir</varname></term>
|
||||
<listitem><para>A directory to search for the external
|
||||
input handler scripts used to index some types of files. The
|
||||
|
|
|
@ -77,6 +77,8 @@ bool MimeHandlerExec::next_document()
|
|||
|
||||
int filtermaxseconds = 900;
|
||||
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
||||
int filtermaxmbytes = 0;
|
||||
m_config->getConfParam("filtermaxmbytes", &filtermaxmbytes);
|
||||
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
|
@ -103,6 +105,7 @@ bool MimeHandlerExec::next_document()
|
|||
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
||||
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||
"RECOLL_FILTER_FORPREVIEW=no");
|
||||
mexec.setrlimit_as(filtermaxmbytes);
|
||||
|
||||
int status;
|
||||
try {
|
||||
|
|
|
@ -47,6 +47,9 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||
// Command name
|
||||
string cmd = params.front();
|
||||
|
||||
int filtermaxmbytes = 0;
|
||||
m_config->getConfParam("filtermaxmbytes", &filtermaxmbytes);
|
||||
|
||||
m_maxmemberkb = 50000;
|
||||
m_config->getConfParam("membermaxkbs", &m_maxmemberkb);
|
||||
ostringstream oss;
|
||||
|
@ -57,6 +60,8 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||
"RECOLL_FILTER_FORPREVIEW=no");
|
||||
|
||||
m_cmd.setrlimit_as(filtermaxmbytes);
|
||||
|
||||
// Build parameter list: delete cmd name
|
||||
vector<string>myparams(params.begin() + 1, params.end());
|
||||
|
||||
|
|
|
@ -250,6 +250,11 @@ textfilepagekbs = 1000
|
|||
# Maximum external filter execution time. Default 20mn. This is mainly
|
||||
# to avoid infinite loops in postscript files (loop.ps)
|
||||
filtermaxseconds = 1200
|
||||
# Maximum virtual memory space for filter process (setrlimit(RLIMIT_AS)),
|
||||
# in megabytes. Note that this includes any mapped libs (there is no
|
||||
# reliable Linux way to limit the data space only), so we need to be a
|
||||
# bit generous here. Anything over 2000 will be ignored on 32 bits machines.
|
||||
filtermaxmbytes = 2000
|
||||
|
||||
# Length of abstracts we store while indexing. Longer will make for a
|
||||
# bigger db
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
|
@ -300,6 +302,39 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
|
|||
pthread_sigmask(SIG_UNBLOCK, &sset, 0);
|
||||
sigprocmask(SIG_UNBLOCK, &sset, 0);
|
||||
|
||||
#ifdef HAVE_SETRLIMIT
|
||||
#if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
|
||||
if (m_rlimit_as_mbytes > 2000 && sizeof(rlim_t) < 8) {
|
||||
// Impossible limit, don't use it
|
||||
m_rlimit_as_mbytes = 0;
|
||||
}
|
||||
if (m_rlimit_as_mbytes > 0) {
|
||||
struct rlimit ram_limit = {
|
||||
static_cast<rlim_t>(m_rlimit_as_mbytes * 1024 * 1024),
|
||||
RLIM_INFINITY
|
||||
};
|
||||
int resource;
|
||||
|
||||
// RLIMIT_AS and RLIMIT_VMEM are usually synonyms when VMEM is
|
||||
// defined. RLIMIT_AS is Posix. Both don't really do what we
|
||||
// want, because they count e.g. shared lib mappings, which we
|
||||
// don't really care about.
|
||||
// RLIMIT_DATA only limits the data segment. Modern mallocs
|
||||
// use mmap and will not be bound. (Otoh if we only have this,
|
||||
// we're probably not modern).
|
||||
// So we're unsatisfied either way.
|
||||
#ifdef RLIMIT_AS
|
||||
resource = RLIMIT_AS;
|
||||
#elif defined RLIMIT_VMEM
|
||||
resource = RLIMIT_VMEM;
|
||||
#else
|
||||
resource = RLIMIT_DATA;
|
||||
#endif
|
||||
setrlimit(resource, &ram_limit);
|
||||
}
|
||||
#endif
|
||||
#endif // have_setrlimit
|
||||
|
||||
if (has_input) {
|
||||
close(m_pipein[1]);
|
||||
if (m_pipein[0] != 0) {
|
||||
|
@ -347,6 +382,11 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
|
|||
_exit(127);
|
||||
}
|
||||
|
||||
void ExecCmd::setrlimit_as(int mbytes)
|
||||
{
|
||||
m_rlimit_as_mbytes = mbytes;
|
||||
}
|
||||
|
||||
int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
||||
bool has_input, bool has_output)
|
||||
{
|
||||
|
@ -427,6 +467,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
|||
//////////////////////////////// End vfork child prepare section.
|
||||
|
||||
#if HAVE_POSIX_SPAWN && USE_POSIX_SPAWN
|
||||
// Note that posix_spawn provides no way to setrlimit() the child.
|
||||
{
|
||||
posix_spawnattr_t attrs;
|
||||
posix_spawnattr_init (&attrs);
|
||||
|
|
|
@ -83,6 +83,14 @@ class ExecCmd {
|
|||
void putenv(const std::string &envassign);
|
||||
void putenv(const std::string &name, const std::string& value);
|
||||
|
||||
/**
|
||||
* Try to set a limit on child process vm size. This will use
|
||||
* setrlimit() and RLIMIT_AS/VMEM if available. Parameter is in
|
||||
* units of 2**10. Must be called before starting the command, default
|
||||
* is inherit from parent.
|
||||
*/
|
||||
void setrlimit_as(int mbytes);
|
||||
|
||||
/**
|
||||
* Set function objects to call whenever new data is available or on
|
||||
* select timeout / whenever new data is needed to send. Must be called
|
||||
|
@ -158,7 +166,7 @@ class ExecCmd {
|
|||
void zapChild() {setKill(); (void)wait();}
|
||||
|
||||
ExecCmd()
|
||||
: m_advise(0), m_provide(0), m_timeoutMs(1000)
|
||||
: m_advise(0), m_provide(0), m_timeoutMs(1000), m_rlimit_as_mbytes(0)
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
@ -191,6 +199,7 @@ class ExecCmd {
|
|||
ExecCmdProvide *m_provide;
|
||||
bool m_killRequest;
|
||||
int m_timeoutMs;
|
||||
int m_rlimit_as_mbytes;
|
||||
std::string m_stderrFile;
|
||||
// Pipe for data going to the command
|
||||
int m_pipein[2];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue