1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-03 17:59:33 +02:00

multi-level ToC + fixed huffdic decompression issue

This commit is contained in:
Nikolay Pultsin 2014-09-17 10:01:06 +01:00
parent 1c7c19e7f1
commit bb49ef1460
9 changed files with 64 additions and 34 deletions

View file

@ -614,3 +614,7 @@ void HtmlBookReader::endDocumentHandler() {
void HtmlBookReader::setFileName(const std::string fileName) { void HtmlBookReader::setFileName(const std::string fileName) {
myFileName = fileName; myFileName = fileName;
} }
size_t HtmlBookReader::listStackDepth() const {
return myListNumStack.size();
}

View file

@ -52,6 +52,7 @@ public:
~HtmlBookReader(); ~HtmlBookReader();
void setFileName(const std::string fileName); void setFileName(const std::string fileName);
shared_ptr<StyleSheetParser> createCSSParser(); shared_ptr<StyleSheetParser> createCSSParser();
size_t listStackDepth() const;
protected: protected:
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag); virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);

View file

@ -40,7 +40,7 @@ unsigned long long BitReader::peek(size_t n) {
size_t g = 0; size_t g = 0;
while (g < n) { while (g < n) {
r = (r << 8) | myData[(myOffset + g) >> 3]; r = (r << 8) | myData[(myOffset + g) >> 3];
g = g + 8 - ((myOffset+g) & 7); g = g + 8 - ((myOffset + g) & 7);
} }
unsigned long long mask = 1; unsigned long long mask = 1;
mask = (mask << n) - 1; mask = (mask << n) - 1;

View file

@ -31,9 +31,9 @@ public:
size_t left() const; size_t left() const;
private: private:
unsigned char* myData; unsigned char *myData;
size_t myOffset; size_t myOffset;
size_t myLength; const size_t myLength;
}; };
#endif //__BITREADER_H__ #endif //__BITREADER_H__

View file

@ -58,7 +58,7 @@ HuffDecompressor::HuffDecompressor(
myData = new unsigned char[huffDataSize]; myData = new unsigned char[huffDataSize];
stream.seek(huffDataOffset, true); stream.seek(huffDataOffset, true);
if (huffDataSize == stream.read((char*)myData, huffDataSize)) { if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
myDicts = new unsigned char* [huffRecordsNumber - 1]; myDicts = new unsigned char*[huffRecordsNumber - 1];
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) { for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
size_t shift = *(beginIt + i + 1) - huffDataOffset; size_t shift = *(beginIt + i + 1) - huffDataOffset;
myDicts[i] = myData + shift; myDicts[i] = myData + shift;
@ -95,7 +95,8 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) { if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize); const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
if (trailSize < compressedSize) { if (trailSize < compressedSize) {
bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize)); BitReader reader(sourceBuffer, compressedSize - trailSize);
bitsDecompress(reader);
} else { } else {
myErrorCode = ERROR_CORRUPTED_FILE; myErrorCode = ERROR_CORRUPTED_FILE;
} }
@ -110,7 +111,7 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
return myTargetBufferPtr - myTargetBuffer; return myTargetBufferPtr - myTargetBuffer;
} }
void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) { void HuffDecompressor::bitsDecompress(BitReader &bits, size_t depth) {
if (depth > 32) { if (depth > 32) {
myErrorCode = ERROR_CORRUPTED_FILE; myErrorCode = ERROR_CORRUPTED_FILE;
return; return;
@ -137,7 +138,7 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
// return false; // return false;
//} //}
if (!bits.eat(codelen)) { if (!bits.eat(codelen)) {
return; break;
} }
const unsigned long dicno = r >> myEntryBits; const unsigned long dicno = r >> myEntryBits;
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2; const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
@ -151,10 +152,11 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
memcpy(myTargetBufferPtr, slice, sliceSize); memcpy(myTargetBufferPtr, slice, sliceSize);
myTargetBufferPtr += sliceSize; myTargetBufferPtr += sliceSize;
} else { } else {
return; break;
} }
} else { } else {
bitsDecompress(BitReader(slice, sliceSize), depth + 1); BitReader reader(slice, sliceSize);
bitsDecompress(reader, depth + 1);
} }
} }
} }
@ -170,6 +172,9 @@ size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, size_t size)
} }
flags >>= 1; flags >>= 1;
} }
if (myExtraFlags & 1) {
num += (data[size - num - 1] & 0x3) + 1;
}
return num; return num;
} }

View file

@ -39,7 +39,7 @@ public:
private: private:
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const; size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const; size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
void bitsDecompress(BitReader bits, size_t depth = 0); void bitsDecompress(BitReader &bits, size_t depth = 0);
private: private:
unsigned long myEntryBits; unsigned long myEntryBits;

View file

@ -159,12 +159,12 @@ void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
myIsActive = true; myIsActive = true;
} }
void MobipocketHtmlBookReader::TOCReader::endReadEntry() { void MobipocketHtmlBookReader::TOCReader::endReadEntry(size_t level) {
if (myIsActive && !myCurrentEntryText.empty()) { if (myIsActive && !myCurrentEntryText.empty()) {
std::string converted; std::string converted;
myReader.myConverter->convert(converted, myCurrentEntryText); myReader.myConverter->convert(converted, myCurrentEntryText);
myReader.myConverter->reset(); myReader.myConverter->reset();
myEntries[myCurrentReference] = converted; myEntries[myCurrentReference] = Entry(converted, level);
myCurrentEntryText.erase(); myCurrentEntryText.erase();
} }
myIsActive = false; myIsActive = false;
@ -176,8 +176,14 @@ void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t le
} }
} }
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) { MobipocketHtmlBookReader::TOCReader::Entry::Entry() : Level(0) {
myEntries[position] = text; }
MobipocketHtmlBookReader::TOCReader::Entry::Entry(const std::string &text, size_t level) : Text(text), Level(level) {
}
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text, size_t level) {
myEntries[position] = Entry(text, level);
if (rangeContainsPosition(position)) { if (rangeContainsPosition(position)) {
setEndOffset(position); setEndOffset(position);
} }
@ -185,7 +191,7 @@ void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const st
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) { void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
myStartOffset = position; myStartOffset = position;
std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position); std::map<size_t,Entry>::const_iterator it = myEntries.lower_bound(position);
if (it != myEntries.end()) { if (it != myEntries.end()) {
++it; ++it;
if (it != myEntries.end()) { if (it != myEntries.end()) {
@ -198,7 +204,7 @@ void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
myEndOffset = position; myEndOffset = position;
} }
const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const { const std::map<size_t,MobipocketHtmlBookReader::TOCReader::Entry> &MobipocketHtmlBookReader::TOCReader::entries() const {
return myEntries; return myEntries;
} }
@ -224,7 +230,7 @@ void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
} }
} }
} else { } else {
reader.myTocReader.endReadEntry(); reader.myTocReader.endReadEntry(reader.listStackDepth());
} }
HtmlHrefTagAction::run(tag); HtmlHrefTagAction::run(tag);
} }
@ -264,7 +270,7 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
if (!title.empty() && !filepos.empty()) { if (!title.empty() && !filepos.empty()) {
const int position = ZLStringUtil::parseDecimal(filepos, -1); const int position = ZLStringUtil::parseDecimal(filepos, -1);
if (position > 0) { if (position > 0) {
reader.myTocReader.addReference(position, title); reader.myTocReader.addReference(position, title, reader.listStackDepth());
if (isTocReference) { if (isTocReference) {
reader.myTocReader.setStartOffset(position); reader.myTocReader.setStartOffset(position);
} }
@ -348,16 +354,23 @@ void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
} }
jt = myPositionToParagraphMap.begin(); jt = myPositionToParagraphMap.begin();
const std::map<size_t,std::string> &entries = myTocReader.entries(); const std::map<size_t,TOCReader::Entry> &entries = myTocReader.entries();
for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) { int level = 0;
for (std::map<size_t,TOCReader::Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) { while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
++jt; ++jt;
} }
if (jt == myPositionToParagraphMap.end()) { if (jt == myPositionToParagraphMap.end()) {
break; break;
} }
for (; level >= (int)it->second.Level; --level) {
myBookReader.endContentsParagraph();
}
myBookReader.beginContentsParagraph(jt->second); myBookReader.beginContentsParagraph(jt->second);
myBookReader.addContentsData(it->second); myBookReader.addContentsData(it->second.Text);
level = it->second.Level;
}
for (; level >= 0; --level) {
myBookReader.endContentsParagraph(); myBookReader.endContentsParagraph();
} }
} }

View file

@ -39,11 +39,20 @@ private:
public: public:
class TOCReader { class TOCReader {
public:
struct Entry {
std::string Text;
size_t Level;
Entry();
Entry(const std::string &text, size_t level);
};
public: public:
TOCReader(MobipocketHtmlBookReader &reader); TOCReader(MobipocketHtmlBookReader &reader);
void reset(); void reset();
void addReference(size_t position, const std::string &text); void addReference(size_t position, const std::string &text, size_t level);
void setStartOffset(size_t position); void setStartOffset(size_t position);
void setEndOffset(size_t position); void setEndOffset(size_t position);
@ -51,15 +60,15 @@ public:
bool rangeContainsPosition(size_t position); bool rangeContainsPosition(size_t position);
void startReadEntry(size_t position); void startReadEntry(size_t position);
void endReadEntry(); void endReadEntry(size_t level);
void appendText(const char *text, size_t len); void appendText(const char *text, size_t len);
const std::map<size_t,std::string> &entries() const; const std::map<size_t,Entry> &entries() const;
private: private:
MobipocketHtmlBookReader &myReader; MobipocketHtmlBookReader &myReader;
std::map<size_t,std::string> myEntries; std::map<size_t,Entry> myEntries;
bool myIsActive; bool myIsActive;
size_t myStartOffset; size_t myStartOffset;

View file

@ -119,21 +119,19 @@ bool PalmDocStream::processZeroRecord() {
unsigned long mobiHeaderLength; unsigned long mobiHeaderLength;
unsigned long huffSectionIndex; unsigned long huffSectionIndex;
unsigned long huffSectionNumber; unsigned long huffSectionNumber;
unsigned short extraFlags; unsigned long extraFlags = 0;
unsigned long initialOffset = header().Offsets[0]; unsigned long initialOffset = header().Offsets[0];
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20 myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24 mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70) myBase->seek(initialOffset + 112, true); // myBase offset: ^ + 112
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 106 (0x74) huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 116
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 110 (0x78) huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 120
if (mobiHeaderLength >= 244) { if (16 + mobiHeaderLength >= 244) {
myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2) myBase->seek(initialOffset + 240, true); // myBase offset: ^ + 240
extraFlags = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 244 (0xF4) extraFlags = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 244
} else {
extraFlags = 0;
} }
/* /*
std::cerr << "mobi header length: " << mobiHeaderLength << "\n"; std::cerr << "mobi header length: " << mobiHeaderLength << "\n";