web queue: fix cache resizing utility + bug in indexer which would skip oldest entry

This commit is contained in:
Jean-Francois Dockes 2015-02-15 16:08:36 +01:00
parent 1f2776daf6
commit 9d26f4908a
2 changed files with 210 additions and 191 deletions

View file

@ -287,7 +287,8 @@ bool BeagleQueueIndexer::index()
if (!eof)
return false;
}
while (cc->next(eof)) {
int nentries = 0;
do {
string udi;
if (!cc->getCurrentUdi(udi)) {
LOGERR(("BeagleQueueIndexer:: cache file damaged\n"));
@ -307,7 +308,8 @@ bool BeagleQueueIndexer::index()
return false;
}
}
}
nentries++;
} while (cc->next(eof));
}
// Finally index the queue

View file

@ -156,7 +156,8 @@ public:
////// These are cache persistent state and written to the first block:
// Maximum file size, after which we begin reusing old space
off_t m_maxsize;
// Offset of the oldest header.
// Offset of the oldest header, or max file offset (file size)
// while the file is growing. This is the next write position.
off_t m_oheadoffs;
// Offset of last write (newest header)
off_t m_nheadoffs;
@ -188,15 +189,15 @@ public:
UdiH h(udi);
LOGDEB2(("Circache::khEnter: h %s offs %lu udi [%s]\n",
h.asHexString().c_str(), (ULONG)ofs, udi.c_str()));
h.asHexString().c_str(), (ULONG)ofs, udi.c_str()));
pair<kh_type::iterator, kh_type::iterator> p = m_ofskh.equal_range(h);
if (p.first != m_ofskh.end() && p.first->first == h) {
for (kh_type::iterator it = p.first; it != p.second; it++) {
LOGDEB2(("Circache::khEnter: col h %s, ofs %lu\n",
it->first.asHexString().c_str(),
(ULONG)it->second));
it->first.asHexString().c_str(),
(ULONG)it->second));
if (it->second == ofs) {
// (h,offs) already there. Happens
LOGDEB2(("Circache::khEnter: already there\n"));
@ -227,7 +228,7 @@ public:
UdiH h(udi);
LOGDEB2(("Circache::khFind: h %s udi [%s]\n",
h.asHexString().c_str(), udi.c_str()));
h.asHexString().c_str(), udi.c_str()));
pair<kh_type::iterator, kh_type::iterator> p = m_ofskh.equal_range(h);
@ -318,10 +319,10 @@ public:
bool writefirstblock()
{
if (m_fd < 0) {
m_reason << "writefirstblock: not open ";
return false;
}
if (m_fd < 0) {
m_reason << "writefirstblock: not open ";
return false;
}
ostringstream s;
s <<
@ -347,10 +348,10 @@ public:
bool readfirstblock()
{
if (m_fd < 0) {
m_reason << "readfirstblock: not open ";
return false;
}
if (m_fd < 0) {
m_reason << "readfirstblock: not open ";
return false;
}
char bf[CIRCACHE_FIRSTBLOCK_SIZE];
@ -393,14 +394,14 @@ public:
bool writeEntryHeader(off_t offset, const EntryHeaderData& d)
{
if (m_fd < 0) {
m_reason << "writeEntryHeader: not open ";
return false;
}
if (m_fd < 0) {
m_reason << "writeEntryHeader: not open ";
return false;
}
char bf[CIRCACHE_HEADER_SIZE];
memset(bf, 0, CIRCACHE_HEADER_SIZE);
snprintf(bf, CIRCACHE_HEADER_SIZE,
headerformat, d.dicsize, d.datasize, d.padsize, d.flags);
headerformat, d.dicsize, d.datasize, d.padsize, d.flags);
if (lseek(m_fd, offset, 0) != offset) {
m_reason << "CirCache::weh: lseek(" << offset <<
") failed: errno " << errno;
@ -415,10 +416,10 @@ public:
CCScanHook::status readEntryHeader(off_t offset, EntryHeaderData& d)
{
if (m_fd < 0) {
m_reason << "readEntryHeader: not open ";
if (m_fd < 0) {
m_reason << "readEntryHeader: not open ";
return CCScanHook::Error;
}
}
if (lseek(m_fd, offset, 0) != offset) {
m_reason << "readEntryHeader: lseek(" << offset <<
@ -451,10 +452,10 @@ public:
CCScanHook::status scan(off_t startoffset, CCScanHook *user,
bool fold = false)
{
if (m_fd < 0) {
m_reason << "scan: not open ";
if (m_fd < 0) {
m_reason << "scan: not open ";
return CCScanHook::Error;
}
}
off_t so0 = startoffset;
bool already_folded = false;
@ -622,16 +623,16 @@ public:
off_t headoffs;
off_t padsize;
CCScanHookRecord()
: headoffs(0), padsize(0)
: headoffs(0), padsize(0)
{
}
virtual status takeone(off_t offs, const string& udi,
const EntryHeaderData& d)
const EntryHeaderData& d)
{
headoffs = offs;
padsize = d.padsize;
LOGDEB2(("CCScanHookRecord::takeone: offs %lld padsize %lld\n",
headoffs, padsize));
headoffs = offs;
padsize = d.padsize;
LOGDEB2(("CCScanHookRecord::takeone: offs %lld padsize %lld\n",
headoffs, padsize));
return Continue;
}
};
@ -644,62 +645,62 @@ string CirCache::getpath()
bool CirCache::create(off_t maxsize, int flags)
{
LOGDEB(("CirCache::create: [%s] maxsz %lld flags 0x%x\n",
m_dir.c_str(), maxsize, flags));
m_dir.c_str(), maxsize, flags));
if (m_d == 0) {
LOGERR(("CirCache::create: null data\n"));
return false;
LOGERR(("CirCache::create: null data\n"));
return false;
}
struct stat st;
if (stat(m_dir.c_str(), &st) < 0) {
// Directory does not exist, create it
if (mkdir(m_dir.c_str(), 0777) < 0) {
m_d->m_reason << "CirCache::create: mkdir(" << m_dir <<
") failed" << " errno " << errno;
return false;
}
// Directory does not exist, create it
if (mkdir(m_dir.c_str(), 0777) < 0) {
m_d->m_reason << "CirCache::create: mkdir(" << m_dir <<
") failed" << " errno " << errno;
return false;
}
} else {
// If the file exists too, and truncate is not set, switch
// to open-mode. Still may need to update header params.
if (access(m_d->datafn(m_dir).c_str(), 0) >= 0 &&
!(flags & CC_CRTRUNCATE)) {
if (!open(CC_OPWRITE)) {
return false;
}
if (maxsize == m_d->m_maxsize &&
((flags & CC_CRUNIQUE) != 0) == m_d->m_uniquentries) {
LOGDEB(("Header unchanged, no rewrite\n"));
return true;
}
// If the new maxsize is bigger than current size, we need
// to stop recycling if this is what we are doing.
if (maxsize > m_d->m_maxsize && maxsize > st.st_size) {
// Scan the file to find the last physical record. The
// ohead is set at physical eof, and nhead is the last
// scanned record
CCScanHookRecord rec;
m_d->scan(CIRCACHE_FIRSTBLOCK_SIZE, &rec, false);
m_d->m_oheadoffs = lseek(m_d->m_fd, 0, SEEK_END);
m_d->m_nheadoffs = rec.headoffs;
m_d->m_npadsize = rec.padsize;
}
m_d->m_maxsize = maxsize;
m_d->m_uniquentries = ((flags & CC_CRUNIQUE) != 0);
LOGDEB(("CirCache::create: rewriting header with "
"maxsize %lld oheadoffs %lld nheadoffs %lld "
"npadsize %d unient %d\n",
m_d->m_maxsize, m_d->m_oheadoffs, m_d->m_nheadoffs,
m_d->m_npadsize, int(m_d->m_uniquentries)));
return m_d->writefirstblock();
}
// Else fallthrough to create file
// If the file exists too, and truncate is not set, switch
// to open-mode. Still may need to update header params.
if (access(m_d->datafn(m_dir).c_str(), 0) >= 0 &&
!(flags & CC_CRTRUNCATE)) {
if (!open(CC_OPWRITE)) {
return false;
}
if (maxsize == m_d->m_maxsize &&
((flags & CC_CRUNIQUE) != 0) == m_d->m_uniquentries) {
LOGDEB(("Header unchanged, no rewrite\n"));
return true;
}
// If the new maxsize is bigger than current size, we need
// to stop recycling if this is what we are doing.
if (maxsize > m_d->m_maxsize && maxsize > st.st_size) {
// Scan the file to find the last physical record. The
// ohead is set at physical eof, and nhead is the last
// scanned record
CCScanHookRecord rec;
m_d->scan(CIRCACHE_FIRSTBLOCK_SIZE, &rec, false);
m_d->m_oheadoffs = lseek(m_d->m_fd, 0, SEEK_END);
m_d->m_nheadoffs = rec.headoffs;
m_d->m_npadsize = rec.padsize;
}
m_d->m_maxsize = maxsize;
m_d->m_uniquentries = ((flags & CC_CRUNIQUE) != 0);
LOGDEB(("CirCache::create: rewriting header with "
"maxsize %lld oheadoffs %lld nheadoffs %lld "
"npadsize %d unient %d\n",
m_d->m_maxsize, m_d->m_oheadoffs, m_d->m_nheadoffs,
m_d->m_npadsize, int(m_d->m_uniquentries)));
return m_d->writefirstblock();
}
// Else fallthrough to create file
}
if ((m_d->m_fd = ::open(m_d->datafn(m_dir).c_str(),
O_CREAT | O_RDWR | O_TRUNC, 0666)) < 0) {
O_CREAT | O_RDWR | O_TRUNC, 0666)) < 0) {
m_d->m_reason << "CirCache::create: open/creat(" <<
m_d->datafn(m_dir) << ") failed " << "errno " << errno;
return false;
return false;
}
m_d->m_maxsize = maxsize;
@ -720,15 +721,15 @@ bool CirCache::create(off_t maxsize, int flags)
bool CirCache::open(OpMode mode)
{
if (m_d == 0) {
LOGERR(("CirCache::open: null data\n"));
return false;
LOGERR(("CirCache::open: null data\n"));
return false;
}
if (m_d->m_fd >= 0)
::close(m_d->m_fd);
if ((m_d->m_fd = ::open(m_d->datafn(m_dir).c_str(),
mode == CC_OPREAD ? O_RDONLY : O_RDWR)) < 0) {
mode == CC_OPREAD ? O_RDONLY : O_RDWR)) < 0) {
m_d->m_reason << "CirCache::open: open(" << m_d->datafn(m_dir) <<
") failed " << "errno " << errno;
return false;
@ -937,13 +938,13 @@ public:
UINT sizeseen;
vector<pair<string, off_t> > squashed_udis;
CCScanHookSpacer(int sz)
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
virtual status takeone(off_t offs, const string& udi,
const EntryHeaderData& d)
{
LOGDEB2(("Circache:ScanSpacer:off %u dcsz %u dtsz %u pdsz %u udi[%s]\n",
(UINT)offs, d.dicsize, d.datasize, d.padsize, udi.c_str()));
(UINT)offs, d.dicsize, d.datasize, d.padsize, udi.c_str()));
sizeseen += CIRCACHE_HEADER_SIZE + d.dicsize + d.datasize + d.padsize;
squashed_udis.push_back(make_pair(udi, offs));
if (sizeseen >= sizewanted)
@ -1029,10 +1030,10 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
if (m_d->readEntryHeader(m_d->m_nheadoffs, pd) != CCScanHook::Continue){
return false;
}
if (int(pd.padsize) != m_d->m_npadsize) {
m_d->m_reason << "CirCache::put: logic error: bad padsize ";
return false;
}
if (int(pd.padsize) != m_d->m_npadsize) {
m_d->m_reason << "CirCache::put: logic error: bad padsize ";
return false;
}
if (pd.dicsize == 0) {
// erased entry. Also recover the header space, no need to rewrite
// the header, we're going to write on it.
@ -1092,7 +1093,7 @@ bool CirCache::put(const string& udi, const ConfSimple *iconf,
char head[CIRCACHE_HEADER_SIZE];
memset(head, 0, CIRCACHE_HEADER_SIZE);
snprintf(head, CIRCACHE_HEADER_SIZE,
headerformat, dic.size(), datalen, npadsize, flags);
headerformat, dic.size(), datalen, npadsize, flags);
struct iovec vecs[3];
vecs[0].iov_base = head;
vecs[0].iov_len = CIRCACHE_HEADER_SIZE;
@ -1132,8 +1133,19 @@ bool CirCache::rewind(bool& eof)
eof = false;
// Read oldest header
m_d->m_itoffs = m_d->m_oheadoffs;
off_t fsize = lseek(m_d->m_fd, 0, SEEK_END);
if (fsize == (off_t)-1) {
LOGERR(("CirCache::rewind: seek to EOF failed\n"));
return false;
}
// Read oldest header. This is either at the position pointed to
// by oheadoffs, or after the first block if the file is still
// growing.
if (m_d->m_oheadoffs == fsize) {
m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE;
} else {
m_d->m_itoffs = m_d->m_oheadoffs;
}
CCScanHook::status st = m_d->readEntryHeader(m_d->m_itoffs, m_d->m_ithd);
switch(st) {
@ -1262,7 +1274,7 @@ static bool inflateToDynBuf(void* inp, UINT inlen, void **outpp, UINT *outlenp)
d_stream.avail_out, d_stream.total_out));
if (d_stream.avail_out == 0) {
if ((outp = (char*)allocmem(outp, inlen, &alloc,
imul, mxinc)) == 0) {
imul, mxinc)) == 0) {
LOGERR(("Inflate: out of memory, current alloc %d\n",
alloc*inlen));
inflateEnd(&d_stream);
@ -1347,11 +1359,16 @@ bool resizecc(const string& dir, int newmbs)
CirCache::CC_CRUNIQUE | CirCache::CC_CRTRUNCATE)) {
cerr << "Cant create new file in " << tmpdir << " : " <<
ncc->getReason() << endl;
return false;
return false;
}
bool eof = false;
occ->rewind(eof);
if (!occ->rewind(eof)) {
if (!eof) {
cerr << "Initial rewind failed" << endl;
return false;
}
}
int nentries = 0;
while (!eof) {
string udi, sdic, data;
@ -1402,15 +1419,15 @@ bool resizecc(const string& dir, int newmbs)
static char *thisprog;
static char usage [] =
" -c [-u] <dirname> : create\n"
" -p <dirname> <apath> [apath ...] : put files\n"
" -d <dirname> : dump\n"
" -g [-i instance] [-D] <dirname> <udi>: get\n"
" -D: also dump data\n"
" -e <dirname> <udi> : erase\n"
" -s <dirname> <newmbs> : resize\n"
;
static char usage [] =
" -c [-u] <dirname> : create\n"
" -p <dirname> <apath> [apath ...] : put files\n"
" -d <dirname> : dump\n"
" -g [-i instance] [-D] <dirname> <udi>: get\n"
" -D: also dump data\n"
" -e <dirname> <udi> : erase\n"
" -s <dirname> <newmbs> : resize\n"
;
static void
Usage(FILE *fp = stderr)
{
@ -1438,27 +1455,27 @@ int main(int argc, char **argv)
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'c': op_flags |= OPT_c; break;
case 'D': op_flags |= OPT_D; break;
case 'd': op_flags |= OPT_d; break;
case 'e': op_flags |= OPT_e; break;
case 'g': op_flags |= OPT_g; break;
case 'i': op_flags |= OPT_i; if (argc < 2) Usage();
if ((sscanf(*(++argv), "%d", &instance)) != 1)
Usage();
argc--;
goto b1;
case 'p': op_flags |= OPT_p; break;
case 's': op_flags |= OPT_s; break;
case 'u': op_flags |= OPT_u; break;
default: Usage(); break;
}
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'c': op_flags |= OPT_c; break;
case 'D': op_flags |= OPT_D; break;
case 'd': op_flags |= OPT_d; break;
case 'e': op_flags |= OPT_e; break;
case 'g': op_flags |= OPT_g; break;
case 'i': op_flags |= OPT_i; if (argc < 2) Usage();
if ((sscanf(*(++argv), "%d", &instance)) != 1)
Usage();
argc--;
goto b1;
case 'p': op_flags |= OPT_p; break;
case 's': op_flags |= OPT_s; break;
case 'u': op_flags |= OPT_u; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
@ -1466,19 +1483,19 @@ int main(int argc, char **argv)
DebugLog::setfilename("stderr");
if (argc < 1)
Usage();
Usage();
string dir = *argv++;argc--;
CirCache cc(dir);
if (op_flags & OPT_c) {
int flags = 0;
if (op_flags & OPT_u)
flags |= CirCache::CC_CRUNIQUE;
if (!cc.create(100*1024, flags)) {
cerr << "Create failed:" << cc.getReason() << endl;
exit(1);
}
int flags = 0;
if (op_flags & OPT_u)
flags |= CirCache::CC_CRUNIQUE;
if (!cc.create(100*1024, flags)) {
cerr << "Create failed:" << cc.getReason() << endl;
exit(1);
}
} else if (op_flags & OPT_s) {
if (argc != 1) {
Usage();
@ -1488,72 +1505,72 @@ int main(int argc, char **argv)
exit(1);
}
} else if (op_flags & OPT_p) {
if (argc < 1)
Usage();
if (!cc.open(CirCache::CC_OPWRITE)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string fn = *argv++;argc--;
char dic[1000];
string data, reason;
if (!file_to_string(fn, data, &reason)) {
cerr << "File_to_string: " << reason << endl;
exit(1);
}
string udi;
make_udi(fn, "", udi);
sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n",
udi.c_str());
string sdic;
sdic.assign(dic, strlen(dic));
ConfSimple conf(sdic);
if (argc < 1)
Usage();
if (!cc.open(CirCache::CC_OPWRITE)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string fn = *argv++;argc--;
char dic[1000];
string data, reason;
if (!file_to_string(fn, data, &reason)) {
cerr << "File_to_string: " << reason << endl;
exit(1);
}
string udi;
make_udi(fn, "", udi);
sprintf(dic, "#whatever...\nmimetype = text/plain\nudi=%s\n",
udi.c_str());
string sdic;
sdic.assign(dic, strlen(dic));
ConfSimple conf(sdic);
if (!cc.put(udi, &conf, data, 0)) {
cerr << "Put failed: " << cc.getReason() << endl;
cerr << "conf: ["; conf.write(cerr); cerr << "]" << endl;
exit(1);
}
}
cc.open(CirCache::CC_OPREAD);
if (!cc.put(udi, &conf, data, 0)) {
cerr << "Put failed: " << cc.getReason() << endl;
cerr << "conf: ["; conf.write(cerr); cerr << "]" << endl;
exit(1);
}
}
cc.open(CirCache::CC_OPREAD);
} else if (op_flags & OPT_g) {
if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string udi = *argv++;argc--;
string dic, data;
if (!cc.get(udi, dic, data, instance)) {
cerr << "Get failed: " << cc.getReason() << endl;
exit(1);
}
cout << "Dict: [" << dic << "]" << endl;
if (op_flags & OPT_D)
cout << "Data: [" << data << "]" << endl;
}
if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string udi = *argv++;argc--;
string dic, data;
if (!cc.get(udi, dic, data, instance)) {
cerr << "Get failed: " << cc.getReason() << endl;
exit(1);
}
cout << "Dict: [" << dic << "]" << endl;
if (op_flags & OPT_D)
cout << "Data: [" << data << "]" << endl;
}
} else if (op_flags & OPT_e) {
if (!cc.open(CirCache::CC_OPWRITE)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string udi = *argv++;argc--;
string dic, data;
if (!cc.erase(udi)) {
cerr << "Erase failed: " << cc.getReason() << endl;
exit(1);
}
}
if (!cc.open(CirCache::CC_OPWRITE)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
while (argc) {
string udi = *argv++;argc--;
string dic, data;
if (!cc.erase(udi)) {
cerr << "Erase failed: " << cc.getReason() << endl;
exit(1);
}
}
} else if (op_flags & OPT_d) {
if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
cc.dump();
if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl;
exit(1);
}
cc.dump();
} else
Usage();
Usage();
exit(0);
}