Impose memory usage limit on external filters. Fixes issue #259
This commit is contained in:
parent
62a63e8f64
commit
ecd15062d9
8 changed files with 90 additions and 13 deletions
|
@ -20,6 +20,9 @@
|
||||||
#undef HAVE_POSIX_SPAWN
|
#undef HAVE_POSIX_SPAWN
|
||||||
#undef USE_POSIX_SPAWN
|
#undef USE_POSIX_SPAWN
|
||||||
|
|
||||||
|
/* Define to 1 if you have the setrlimit() call. */
|
||||||
|
#undef HAVE_SETRLIMIT
|
||||||
|
|
||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ AC_SYS_LARGEFILE
|
||||||
# OpenBSD needs sys/param.h for mount.h to compile
|
# OpenBSD needs sys/param.h for mount.h to compile
|
||||||
AC_CHECK_HEADERS([sys/param.h, spawn.h])
|
AC_CHECK_HEADERS([sys/param.h, spawn.h])
|
||||||
|
|
||||||
AC_CHECK_FUNCS([posix_spawn])
|
AC_CHECK_FUNCS([posix_spawn, setrlimit])
|
||||||
|
|
||||||
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
||||||
AC_ARG_ENABLE(posix_spawn,
|
AC_ARG_ENABLE(posix_spawn,
|
||||||
|
|
|
@ -6022,18 +6022,29 @@ mondelaypatterns = *.log:20 "this one has spaces*:10"
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry><term><varname>monioniceclass, monioniceclassdata
|
<varlistentry><term><varname>monioniceclass, monioniceclassdata
|
||||||
</varname></term><listitem><para>These allow defining the
|
</varname></term><listitem><para>These allow defining the
|
||||||
<application>ionice</application> class and data used by the
|
<application>ionice</application> class and data used by the
|
||||||
indexer (default class 3, no data).</para>
|
indexer (default class 3, no data).</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry><term><varname>filtermaxseconds</varname></term>
|
||||||
|
<listitem><para>Maximum handler execution time, after which it
|
||||||
|
is aborted. Some postscript programs just loop...</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry><term><varname>filtermaxmbytes</varname></term>
|
||||||
|
<listitem><para>&RCL; 1.20.7 and later. Maximum handler memory
|
||||||
|
utilisation. This uses setrlimit(RLIMIT_AS) on most systems
|
||||||
|
(total virtual memory space size limit). Some programs may start
|
||||||
|
with 500 MBytes of mapped shared libraries, so take this into
|
||||||
|
account when choosing a value. The default is a liberal
|
||||||
|
2000MB.</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry><term><varname>filtermaxseconds</varname></term>
|
|
||||||
<listitem><para>Maximum handler execution time, after which it
|
|
||||||
is aborted. Some postscript programs just loop...</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry><term><varname>filtersdir</varname></term>
|
<varlistentry><term><varname>filtersdir</varname></term>
|
||||||
<listitem><para>A directory to search for the external
|
<listitem><para>A directory to search for the external
|
||||||
input handler scripts used to index some types of files. The
|
input handler scripts used to index some types of files. The
|
||||||
|
|
|
@ -77,6 +77,8 @@ bool MimeHandlerExec::next_document()
|
||||||
|
|
||||||
int filtermaxseconds = 900;
|
int filtermaxseconds = 900;
|
||||||
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
||||||
|
int filtermaxmbytes = 0;
|
||||||
|
m_config->getConfParam("filtermaxmbytes", &filtermaxmbytes);
|
||||||
|
|
||||||
if (params.empty()) {
|
if (params.empty()) {
|
||||||
// Hu ho
|
// Hu ho
|
||||||
|
@ -103,6 +105,7 @@ bool MimeHandlerExec::next_document()
|
||||||
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
mexec.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
||||||
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
mexec.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
|
mexec.setrlimit_as(filtermaxmbytes);
|
||||||
|
|
||||||
int status;
|
int status;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -47,6 +47,9 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||||
// Command name
|
// Command name
|
||||||
string cmd = params.front();
|
string cmd = params.front();
|
||||||
|
|
||||||
|
int filtermaxmbytes = 0;
|
||||||
|
m_config->getConfParam("filtermaxmbytes", &filtermaxmbytes);
|
||||||
|
|
||||||
m_maxmemberkb = 50000;
|
m_maxmemberkb = 50000;
|
||||||
m_config->getConfParam("membermaxkbs", &m_maxmemberkb);
|
m_config->getConfParam("membermaxkbs", &m_maxmemberkb);
|
||||||
ostringstream oss;
|
ostringstream oss;
|
||||||
|
@ -57,6 +60,8 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||||
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
|
|
||||||
|
m_cmd.setrlimit_as(filtermaxmbytes);
|
||||||
|
|
||||||
// Build parameter list: delete cmd name
|
// Build parameter list: delete cmd name
|
||||||
vector<string>myparams(params.begin() + 1, params.end());
|
vector<string>myparams(params.begin() + 1, params.end());
|
||||||
|
|
||||||
|
|
|
@ -250,6 +250,11 @@ textfilepagekbs = 1000
|
||||||
# Maximum external filter execution time. Default 20mn. This is mainly
|
# Maximum external filter execution time. Default 20mn. This is mainly
|
||||||
# to avoid infinite loops in postscript files (loop.ps)
|
# to avoid infinite loops in postscript files (loop.ps)
|
||||||
filtermaxseconds = 1200
|
filtermaxseconds = 1200
|
||||||
|
# Maximum virtual memory space for filter process (setrlimit(RLIMIT_AS)),
|
||||||
|
# in megabytes. Note that this includes any mapped libs (there is no
|
||||||
|
# reliable Linux way to limit the data space only), so we need to be a
|
||||||
|
# bit generous here. Anything over 2000 will be ignored on 32 bits machines.
|
||||||
|
filtermaxmbytes = 2000
|
||||||
|
|
||||||
# Length of abstracts we store while indexing. Longer will make for a
|
# Length of abstracts we store while indexing. Longer will make for a
|
||||||
# bigger db
|
# bigger db
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <sys/select.h>
|
#include <sys/select.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
@ -300,6 +302,39 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
|
||||||
pthread_sigmask(SIG_UNBLOCK, &sset, 0);
|
pthread_sigmask(SIG_UNBLOCK, &sset, 0);
|
||||||
sigprocmask(SIG_UNBLOCK, &sset, 0);
|
sigprocmask(SIG_UNBLOCK, &sset, 0);
|
||||||
|
|
||||||
|
#ifdef HAVE_SETRLIMIT
|
||||||
|
#if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
|
||||||
|
if (m_rlimit_as_mbytes > 2000 && sizeof(rlim_t) < 8) {
|
||||||
|
// Impossible limit, don't use it
|
||||||
|
m_rlimit_as_mbytes = 0;
|
||||||
|
}
|
||||||
|
if (m_rlimit_as_mbytes > 0) {
|
||||||
|
struct rlimit ram_limit = {
|
||||||
|
static_cast<rlim_t>(m_rlimit_as_mbytes * 1024 * 1024),
|
||||||
|
RLIM_INFINITY
|
||||||
|
};
|
||||||
|
int resource;
|
||||||
|
|
||||||
|
// RLIMIT_AS and RLIMIT_VMEM are usually synonyms when VMEM is
|
||||||
|
// defined. RLIMIT_AS is Posix. Both don't really do what we
|
||||||
|
// want, because they count e.g. shared lib mappings, which we
|
||||||
|
// don't really care about.
|
||||||
|
// RLIMIT_DATA only limits the data segment. Modern mallocs
|
||||||
|
// use mmap and will not be bound. (Otoh if we only have this,
|
||||||
|
// we're probably not modern).
|
||||||
|
// So we're unsatisfied either way.
|
||||||
|
#ifdef RLIMIT_AS
|
||||||
|
resource = RLIMIT_AS;
|
||||||
|
#elif defined RLIMIT_VMEM
|
||||||
|
resource = RLIMIT_VMEM;
|
||||||
|
#else
|
||||||
|
resource = RLIMIT_DATA;
|
||||||
|
#endif
|
||||||
|
setrlimit(resource, &ram_limit);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif // have_setrlimit
|
||||||
|
|
||||||
if (has_input) {
|
if (has_input) {
|
||||||
close(m_pipein[1]);
|
close(m_pipein[1]);
|
||||||
if (m_pipein[0] != 0) {
|
if (m_pipein[0] != 0) {
|
||||||
|
@ -347,6 +382,11 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
|
||||||
_exit(127);
|
_exit(127);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ExecCmd::setrlimit_as(int mbytes)
|
||||||
|
{
|
||||||
|
m_rlimit_as_mbytes = mbytes;
|
||||||
|
}
|
||||||
|
|
||||||
int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
||||||
bool has_input, bool has_output)
|
bool has_input, bool has_output)
|
||||||
{
|
{
|
||||||
|
@ -427,6 +467,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
|
||||||
//////////////////////////////// End vfork child prepare section.
|
//////////////////////////////// End vfork child prepare section.
|
||||||
|
|
||||||
#if HAVE_POSIX_SPAWN && USE_POSIX_SPAWN
|
#if HAVE_POSIX_SPAWN && USE_POSIX_SPAWN
|
||||||
|
// Note that posix_spawn provides no way to setrlimit() the child.
|
||||||
{
|
{
|
||||||
posix_spawnattr_t attrs;
|
posix_spawnattr_t attrs;
|
||||||
posix_spawnattr_init (&attrs);
|
posix_spawnattr_init (&attrs);
|
||||||
|
|
|
@ -83,6 +83,14 @@ class ExecCmd {
|
||||||
void putenv(const std::string &envassign);
|
void putenv(const std::string &envassign);
|
||||||
void putenv(const std::string &name, const std::string& value);
|
void putenv(const std::string &name, const std::string& value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to set a limit on child process vm size. This will use
|
||||||
|
* setrlimit() and RLIMIT_AS/VMEM if available. Parameter is in
|
||||||
|
* units of 2**10. Must be called before starting the command, default
|
||||||
|
* is inherit from parent.
|
||||||
|
*/
|
||||||
|
void setrlimit_as(int mbytes);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set function objects to call whenever new data is available or on
|
* Set function objects to call whenever new data is available or on
|
||||||
* select timeout / whenever new data is needed to send. Must be called
|
* select timeout / whenever new data is needed to send. Must be called
|
||||||
|
@ -158,7 +166,7 @@ class ExecCmd {
|
||||||
void zapChild() {setKill(); (void)wait();}
|
void zapChild() {setKill(); (void)wait();}
|
||||||
|
|
||||||
ExecCmd()
|
ExecCmd()
|
||||||
: m_advise(0), m_provide(0), m_timeoutMs(1000)
|
: m_advise(0), m_provide(0), m_timeoutMs(1000), m_rlimit_as_mbytes(0)
|
||||||
{
|
{
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
@ -191,6 +199,7 @@ class ExecCmd {
|
||||||
ExecCmdProvide *m_provide;
|
ExecCmdProvide *m_provide;
|
||||||
bool m_killRequest;
|
bool m_killRequest;
|
||||||
int m_timeoutMs;
|
int m_timeoutMs;
|
||||||
|
int m_rlimit_as_mbytes;
|
||||||
std::string m_stderrFile;
|
std::string m_stderrFile;
|
||||||
// Pipe for data going to the command
|
// Pipe for data going to the command
|
||||||
int m_pipein[2];
|
int m_pipein[2];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue