1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-03 09:49:19 +02:00

multi-level ToC + fixed huffdic decompression issue

This commit is contained in:
Nikolay Pultsin 2014-09-17 10:01:06 +01:00
parent 1c7c19e7f1
commit bb49ef1460
9 changed files with 64 additions and 34 deletions

View file

@ -614,3 +614,7 @@ void HtmlBookReader::endDocumentHandler() {
void HtmlBookReader::setFileName(const std::string fileName) {
myFileName = fileName;
}
size_t HtmlBookReader::listStackDepth() const {
return myListNumStack.size();
}

View file

@ -52,6 +52,7 @@ public:
~HtmlBookReader();
void setFileName(const std::string fileName);
shared_ptr<StyleSheetParser> createCSSParser();
size_t listStackDepth() const;
protected:
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);

View file

@ -40,7 +40,7 @@ unsigned long long BitReader::peek(size_t n) {
size_t g = 0;
while (g < n) {
r = (r << 8) | myData[(myOffset + g) >> 3];
g = g + 8 - ((myOffset+g) & 7);
g = g + 8 - ((myOffset + g) & 7);
}
unsigned long long mask = 1;
mask = (mask << n) - 1;

View file

@ -31,9 +31,9 @@ public:
size_t left() const;
private:
unsigned char* myData;
unsigned char *myData;
size_t myOffset;
size_t myLength;
const size_t myLength;
};
#endif //__BITREADER_H__

View file

@ -58,7 +58,7 @@ HuffDecompressor::HuffDecompressor(
myData = new unsigned char[huffDataSize];
stream.seek(huffDataOffset, true);
if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
myDicts = new unsigned char* [huffRecordsNumber - 1];
myDicts = new unsigned char*[huffRecordsNumber - 1];
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
size_t shift = *(beginIt + i + 1) - huffDataOffset;
myDicts[i] = myData + shift;
@ -95,7 +95,8 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
if (trailSize < compressedSize) {
bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize));
BitReader reader(sourceBuffer, compressedSize - trailSize);
bitsDecompress(reader);
} else {
myErrorCode = ERROR_CORRUPTED_FILE;
}
@ -110,7 +111,7 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
return myTargetBufferPtr - myTargetBuffer;
}
void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
void HuffDecompressor::bitsDecompress(BitReader &bits, size_t depth) {
if (depth > 32) {
myErrorCode = ERROR_CORRUPTED_FILE;
return;
@ -137,7 +138,7 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
// return false;
//}
if (!bits.eat(codelen)) {
return;
break;
}
const unsigned long dicno = r >> myEntryBits;
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
@ -151,10 +152,11 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
memcpy(myTargetBufferPtr, slice, sliceSize);
myTargetBufferPtr += sliceSize;
} else {
return;
break;
}
} else {
bitsDecompress(BitReader(slice, sliceSize), depth + 1);
BitReader reader(slice, sliceSize);
bitsDecompress(reader, depth + 1);
}
}
}
@ -170,6 +172,9 @@ size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, size_t size)
}
flags >>= 1;
}
if (myExtraFlags & 1) {
num += (data[size - num - 1] & 0x3) + 1;
}
return num;
}

View file

@ -39,7 +39,7 @@ public:
private:
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
void bitsDecompress(BitReader bits, size_t depth = 0);
void bitsDecompress(BitReader &bits, size_t depth = 0);
private:
unsigned long myEntryBits;

View file

@ -159,12 +159,12 @@ void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
myIsActive = true;
}
void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
void MobipocketHtmlBookReader::TOCReader::endReadEntry(size_t level) {
if (myIsActive && !myCurrentEntryText.empty()) {
std::string converted;
myReader.myConverter->convert(converted, myCurrentEntryText);
myReader.myConverter->reset();
myEntries[myCurrentReference] = converted;
myEntries[myCurrentReference] = Entry(converted, level);
myCurrentEntryText.erase();
}
myIsActive = false;
@ -176,8 +176,14 @@ void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t le
}
}
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) {
myEntries[position] = text;
MobipocketHtmlBookReader::TOCReader::Entry::Entry() : Level(0) {
}
MobipocketHtmlBookReader::TOCReader::Entry::Entry(const std::string &text, size_t level) : Text(text), Level(level) {
}
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text, size_t level) {
myEntries[position] = Entry(text, level);
if (rangeContainsPosition(position)) {
setEndOffset(position);
}
@ -185,7 +191,7 @@ void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const st
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
myStartOffset = position;
std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
std::map<size_t,Entry>::const_iterator it = myEntries.lower_bound(position);
if (it != myEntries.end()) {
++it;
if (it != myEntries.end()) {
@ -198,7 +204,7 @@ void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
myEndOffset = position;
}
const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
const std::map<size_t,MobipocketHtmlBookReader::TOCReader::Entry> &MobipocketHtmlBookReader::TOCReader::entries() const {
return myEntries;
}
@ -224,7 +230,7 @@ void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
}
}
} else {
reader.myTocReader.endReadEntry();
reader.myTocReader.endReadEntry(reader.listStackDepth());
}
HtmlHrefTagAction::run(tag);
}
@ -264,7 +270,7 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
if (!title.empty() && !filepos.empty()) {
const int position = ZLStringUtil::parseDecimal(filepos, -1);
if (position > 0) {
reader.myTocReader.addReference(position, title);
reader.myTocReader.addReference(position, title, reader.listStackDepth());
if (isTocReference) {
reader.myTocReader.setStartOffset(position);
}
@ -348,16 +354,23 @@ void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
}
jt = myPositionToParagraphMap.begin();
const std::map<size_t,std::string> &entries = myTocReader.entries();
for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
const std::map<size_t,TOCReader::Entry> &entries = myTocReader.entries();
int level = 0;
for (std::map<size_t,TOCReader::Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
++jt;
}
if (jt == myPositionToParagraphMap.end()) {
break;
}
for (; level >= (int)it->second.Level; --level) {
myBookReader.endContentsParagraph();
}
myBookReader.beginContentsParagraph(jt->second);
myBookReader.addContentsData(it->second);
myBookReader.addContentsData(it->second.Text);
level = it->second.Level;
}
for (; level >= 0; --level) {
myBookReader.endContentsParagraph();
}
}

View file

@ -39,11 +39,20 @@ private:
public:
class TOCReader {
public:
struct Entry {
std::string Text;
size_t Level;
Entry();
Entry(const std::string &text, size_t level);
};
public:
TOCReader(MobipocketHtmlBookReader &reader);
void reset();
void addReference(size_t position, const std::string &text);
void addReference(size_t position, const std::string &text, size_t level);
void setStartOffset(size_t position);
void setEndOffset(size_t position);
@ -51,15 +60,15 @@ public:
bool rangeContainsPosition(size_t position);
void startReadEntry(size_t position);
void endReadEntry();
void endReadEntry(size_t level);
void appendText(const char *text, size_t len);
const std::map<size_t,std::string> &entries() const;
const std::map<size_t,Entry> &entries() const;
private:
MobipocketHtmlBookReader &myReader;
std::map<size_t,std::string> myEntries;
std::map<size_t,Entry> myEntries;
bool myIsActive;
size_t myStartOffset;

View file

@ -119,21 +119,19 @@ bool PalmDocStream::processZeroRecord() {
unsigned long mobiHeaderLength;
unsigned long huffSectionIndex;
unsigned long huffSectionNumber;
unsigned short extraFlags;
unsigned long extraFlags = 0;
unsigned long initialOffset = header().Offsets[0];
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70)
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 106 (0x74)
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 110 (0x78)
myBase->seek(initialOffset + 112, true); // myBase offset: ^ + 112
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 116
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 120
if (mobiHeaderLength >= 244) {
myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2)
extraFlags = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 244 (0xF4)
} else {
extraFlags = 0;
if (16 + mobiHeaderLength >= 244) {
myBase->seek(initialOffset + 240, true); // myBase offset: ^ + 240
extraFlags = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 244
}
/*
std::cerr << "mobi header length: " << mobiHeaderLength << "\n";