From 735f7ad1fdb7776e9426c41a7e5323efc5aec3f8 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 22 Apr 2015 17:08:06 +0200 Subject: [PATCH] Minimum checking that we have enough space before attempting decompression. Change config default compressed file from no limit to 20 MB --- src/internfile/uncomp.cpp | 36 +++++++++++++++++++++++++++++++++ src/sampleconf/recoll.conf.in | 4 ++-- src/utils/pathut.cpp | 9 +++++---- src/utils/pathut.h | 2 +- tests/badsuffs1/badsuffs1.txt | 4 ++-- tests/compressed/compressed.sh | 1 + tests/compressed/compressed.txt | 2 ++ tests/dir/dir.txt | 2 +- 8 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/internfile/uncomp.cpp b/src/internfile/uncomp.cpp index 80c6581d..7ef4d694 100644 --- a/src/internfile/uncomp.cpp +++ b/src/internfile/uncomp.cpp @@ -17,6 +17,9 @@ #include "autoconfig.h" +#include +#include + #include #include #include @@ -28,6 +31,7 @@ using std::vector; #include "debuglog.h" #include "smallut.h" #include "execmd.h" +#include "pathut.h" Uncomp::UncompCache Uncomp::o_cache; @@ -56,6 +60,38 @@ bool Uncomp::uncompressfile(const string& ifn, LOGERR(("uncompressfile: can't clear temp dir %s\n", m_dir->dirname())); return false; } + + // Check that we have enough available space to have some hope of + // decompressing the file. + int pc; + long long availmbs; + if (!fsocc(m_dir->dirname(), &pc, &availmbs)) { + LOGERR(("uncompressfile: can't retrieve avail space for %s\n", + m_dir->dirname())); + // Hope for the best + } else { + struct stat stb; + if (stat(ifn.c_str(), &stb) < 0) { + LOGERR(("uncompressfile: stat input file %s errno %d\n", + ifn.c_str(), errno)); + return false; + } + // We need at least twice the file size for the uncompressed + // and compressed versions. Most compressors don't store the + // uncompressed size, so we have no way to be sure that we + // have enough space before trying. We take a little margin + + // use same Mb def as fsocc() + long long filembs = stb.st_size / (1024 * 1024); + + if (availmbs < 2 * filembs + 1) { + LOGERR(("uncompressfile. %lld MBs available in %s not enough " + "to uncompress %s of size %lld mbs\n", availmbs, + m_dir->dirname(), ifn.c_str(), filembs)); + return false; + } + } + string cmd = cmdv.front(); // Substitute file name and temp dir in command elements diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 307211cd..9d302981 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -221,8 +221,8 @@ membermaxkbs = 50000 # Size limit for compressed files. We need to decompress these in a # temporary directory for identification, which can be wasteful in some # cases. Limit the waste. Negative means no limit. 0 results in no -# processing of any compressed file -compressedfilemaxkbs = -1 +# processing of any compressed file. Used to be -1 by default. +compressedfilemaxkbs = 20000 # Size limit for text files. This is for skipping monster logs textfilemaxmbs = 20 diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 22fa3c7d..51e4d740 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -59,7 +59,7 @@ using namespace std; #include "wipedir.h" #include "md5.h" -bool fsocc(const string &path, int *pc, long *blocks) +bool fsocc(const string &path, int *pc, long long *blocks) { #ifdef sun struct statvfs buf; @@ -88,8 +88,9 @@ bool fsocc(const string &path, int *pc, long *blocks) int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB : FSOCC_MB / buf.f_bsize; - *blocks = buf.f_bsize > FSOCC_MB ? long(buf.f_bavail) * ratio : - long(buf.f_bavail) / ratio; + *blocks = buf.f_bsize > FSOCC_MB ? + ((long long)buf.f_bavail) * ratio : + ((long long)buf.f_bavail) / ratio; } } return true; @@ -890,7 +891,7 @@ int main(int argc, const char **argv) string path = *argv++;argc--; int pc; - long blocks; + long long blocks; if (!fsocc(path, &pc, &blocks)) { fprintf(stderr, "fsocc failed\n"); return 1; diff --git a/src/utils/pathut.h b/src/utils/pathut.h index 0e8b6777..8fe73b59 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -77,7 +77,7 @@ extern bool readdir(const std::string& dir, std::string& reason, /** A small wrapper around statfs et al, to return percentage of disk occupation */ bool fsocc(const std::string &path, int *pc, // Percent occupied - long *avmbs = 0 // Mbs available to non-superuser + long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024 ); /// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp diff --git a/tests/badsuffs1/badsuffs1.txt b/tests/badsuffs1/badsuffs1.txt index 7086c2bb..4ba5d3e9 100644 --- a/tests/badsuffs1/badsuffs1.txt +++ b/tests/badsuffs1/badsuffs1.txt @@ -1,7 +1,7 @@ 6 results [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.log.gz] [badsufffilename.log.gz] 19 bytes [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.md5] [badsufffilename.md5] 19 bytes -application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.bz2] [badsufffilename.tar.bz2] 19 bytes -application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.gz] [badsufffilename.tar.gz] 19 bytes +application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.bz2] [badsufffilename.tar.bz2] 57 bytes +application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tar.gz] [badsufffilename.tar.gz] 39 bytes application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tbz] [badsufffilename.tbz] 19 bytes application/x-tar [file:///home/dockes/projets/fulltext/testrecoll/badsuffs/badsufffilename.tgz] [badsufffilename.tgz] 19 bytes diff --git a/tests/compressed/compressed.sh b/tests/compressed/compressed.sh index 66867658..1d9da2c2 100755 --- a/tests/compressed/compressed.sh +++ b/tests/compressed/compressed.sh @@ -8,6 +8,7 @@ initvariables $0 ( recollq ASPCSPCCONTENT recollq BSPCSPCCONTENT +recollq CSPCSPCCONTENT ) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout diff -w ${myname}.txt $mystdout > $mydiffs 2>&1 diff --git a/tests/compressed/compressed.txt b/tests/compressed/compressed.txt index 5478b47b..02cc2aaa 100644 --- a/tests/compressed/compressed.txt +++ b/tests/compressed/compressed.txt @@ -2,3 +2,5 @@ text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/a b.txt.gz] [a b.txt.gz] 15 bytes 1 results text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/b b.txt.bz2] [b b.txt.bz2] 15 bytes +1 results +text/plain [file:///home/dockes/projets/fulltext/testrecoll/compressed/nosuff] [nosuff] 15 bytes diff --git a/tests/dir/dir.txt b/tests/dir/dir.txt index 6021c364..e78cbffd 100644 --- a/tests/dir/dir.txt +++ b/tests/dir/dir.txt @@ -1,2 +1,2 @@ 1 results -text/plain [file:///home/dockes/projets/fulltext/testrecoll/dir/d1/testdirfile.txt] [testdirfile.txt] 0 bytes +text/plain [file:///home/dockes/projets/fulltext/testrecoll/dir/d1/testdirfile.txt] [testdirfile.txt] 1 bytes