Minimum checking that we have enough space before attempting decompression. Change config default compressed file from no limit to 20 MB

This commit is contained in:
Jean-Francois Dockes 2015-04-22 17:08:06 +02:00
parent 4447b6f147
commit 735f7ad1fd
8 changed files with 50 additions and 10 deletions

View file

@ -17,6 +17,9 @@
#include "autoconfig.h"
#include <errno.h>
#include <sys/stat.h>
#include <string>
#include <vector>
#include <map>
@ -28,6 +31,7 @@ using std::vector;
#include "debuglog.h"
#include "smallut.h"
#include "execmd.h"
#include "pathut.h"
Uncomp::UncompCache Uncomp::o_cache;
@ -56,6 +60,38 @@ bool Uncomp::uncompressfile(const string& ifn,
LOGERR(("uncompressfile: can't clear temp dir %s\n", m_dir->dirname()));
return false;
}
// Check that we have enough available space to have some hope of
// decompressing the file.
int pc;
long long availmbs;
if (!fsocc(m_dir->dirname(), &pc, &availmbs)) {
LOGERR(("uncompressfile: can't retrieve avail space for %s\n",
m_dir->dirname()));
// Hope for the best
} else {
struct stat stb;
if (stat(ifn.c_str(), &stb) < 0) {
LOGERR(("uncompressfile: stat input file %s errno %d\n",
ifn.c_str(), errno));
return false;
}
// We need at least twice the file size for the uncompressed
// and compressed versions. Most compressors don't store the
// uncompressed size, so we have no way to be sure that we
// have enough space before trying. We take a little margin
// use same Mb def as fsocc()
long long filembs = stb.st_size / (1024 * 1024);
if (availmbs < 2 * filembs + 1) {
LOGERR(("uncompressfile. %lld MBs available in %s not enough "
"to uncompress %s of size %lld mbs\n", availmbs,
m_dir->dirname(), ifn.c_str(), filembs));
return false;
}
}
string cmd = cmdv.front();
// Substitute file name and temp dir in command elements

View file

@ -221,8 +221,8 @@ membermaxkbs = 50000
# Size limit for compressed files. We need to decompress these in a
# temporary directory for identification, which can be wasteful in some
# cases. Limit the waste. Negative means no limit. 0 results in no
# processing of any compressed file
compressedfilemaxkbs = -1
# processing of any compressed file. Used to be -1 by default.
compressedfilemaxkbs = 20000
# Size limit for text files. This is for skipping monster logs
textfilemaxmbs = 20

View file

@ -59,7 +59,7 @@ using namespace std;
#include "wipedir.h"
#include "md5.h"
bool fsocc(const string &path, int *pc, long *blocks)
bool fsocc(const string &path, int *pc, long long *blocks)
{
#ifdef sun
struct statvfs buf;
@ -88,8 +88,9 @@ bool fsocc(const string &path, int *pc, long *blocks)
int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB :
FSOCC_MB / buf.f_bsize;
*blocks = buf.f_bsize > FSOCC_MB ? long(buf.f_bavail) * ratio :
long(buf.f_bavail) / ratio;
*blocks = buf.f_bsize > FSOCC_MB ?
((long long)buf.f_bavail) * ratio :
((long long)buf.f_bavail) / ratio;
}
}
return true;
@ -890,7 +891,7 @@ int main(int argc, const char **argv)
string path = *argv++;argc--;
int pc;
long blocks;
long long blocks;
if (!fsocc(path, &pc, &blocks)) {
fprintf(stderr, "fsocc failed\n");
return 1;

View file

@ -77,7 +77,7 @@ extern bool readdir(const std::string& dir, std::string& reason,
/** A small wrapper around statfs et al, to return percentage of disk
occupation */
bool fsocc(const std::string &path, int *pc, // Percent occupied
long *avmbs = 0 // Mbs available to non-superuser
long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024
);
/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp

View file

@ -1,7 +1,7 @@
6 results
[file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.log.gz] [badsufffilename.log.gz] 19 bytes
[file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.md5] [badsufffilename.md5] 19 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.bz2] [badsufffilename.tar.bz2] 19 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.gz] [badsufffilename.tar.gz] 19 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.bz2] [badsufffilename.tar.bz2] 57 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.gz] [badsufffilename.tar.gz] 39 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tbz] [badsufffilename.tbz] 19 bytes
application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tgz] [badsufffilename.tgz] 19 bytes

View file

@ -8,6 +8,7 @@ initvariables $0
(
recollq ASPCSPCCONTENT
recollq BSPCSPCCONTENT
recollq CSPCSPCCONTENT
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1

View file

@ -2,3 +2,5 @@
text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/a b.txt.gz] [a b.txt.gz] 15 bytes
1 results
text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/b b.txt.bz2] [b b.txt.bz2] 15 bytes
1 results
text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/nosuff] [nosuff] 15 bytes

View file

@ -1,2 +1,2 @@
1 results
text/plain [file:///home/dockes/projets/fulltext/testrecoll/dir/d1/testdirfile.txt] [testdirfile.txt] 0 bytes
text/plain [file:///home/dockes/projets/fulltext/testrecoll/dir/d1/testdirfile.txt] [testdirfile.txt] 1 bytes