mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 09:49:19 +02:00
multi-level ToC + fixed huffdic decompression issue
This commit is contained in:
parent
1c7c19e7f1
commit
bb49ef1460
9 changed files with 64 additions and 34 deletions
|
@ -614,3 +614,7 @@ void HtmlBookReader::endDocumentHandler() {
|
|||
void HtmlBookReader::setFileName(const std::string fileName) {
|
||||
myFileName = fileName;
|
||||
}
|
||||
|
||||
size_t HtmlBookReader::listStackDepth() const {
|
||||
return myListNumStack.size();
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ public:
|
|||
~HtmlBookReader();
|
||||
void setFileName(const std::string fileName);
|
||||
shared_ptr<StyleSheetParser> createCSSParser();
|
||||
size_t listStackDepth() const;
|
||||
|
||||
protected:
|
||||
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);
|
||||
|
|
|
@ -40,7 +40,7 @@ unsigned long long BitReader::peek(size_t n) {
|
|||
size_t g = 0;
|
||||
while (g < n) {
|
||||
r = (r << 8) | myData[(myOffset + g) >> 3];
|
||||
g = g + 8 - ((myOffset+g) & 7);
|
||||
g = g + 8 - ((myOffset + g) & 7);
|
||||
}
|
||||
unsigned long long mask = 1;
|
||||
mask = (mask << n) - 1;
|
||||
|
|
|
@ -31,9 +31,9 @@ public:
|
|||
size_t left() const;
|
||||
|
||||
private:
|
||||
unsigned char* myData;
|
||||
unsigned char *myData;
|
||||
size_t myOffset;
|
||||
size_t myLength;
|
||||
const size_t myLength;
|
||||
};
|
||||
|
||||
#endif //__BITREADER_H__
|
||||
|
|
|
@ -58,7 +58,7 @@ HuffDecompressor::HuffDecompressor(
|
|||
myData = new unsigned char[huffDataSize];
|
||||
stream.seek(huffDataOffset, true);
|
||||
if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
|
||||
myDicts = new unsigned char* [huffRecordsNumber - 1];
|
||||
myDicts = new unsigned char*[huffRecordsNumber - 1];
|
||||
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
|
||||
size_t shift = *(beginIt + i + 1) - huffDataOffset;
|
||||
myDicts[i] = myData + shift;
|
||||
|
@ -95,7 +95,8 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
|
|||
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
|
||||
const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
|
||||
if (trailSize < compressedSize) {
|
||||
bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize));
|
||||
BitReader reader(sourceBuffer, compressedSize - trailSize);
|
||||
bitsDecompress(reader);
|
||||
} else {
|
||||
myErrorCode = ERROR_CORRUPTED_FILE;
|
||||
}
|
||||
|
@ -110,7 +111,7 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
|
|||
return myTargetBufferPtr - myTargetBuffer;
|
||||
}
|
||||
|
||||
void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
||||
void HuffDecompressor::bitsDecompress(BitReader &bits, size_t depth) {
|
||||
if (depth > 32) {
|
||||
myErrorCode = ERROR_CORRUPTED_FILE;
|
||||
return;
|
||||
|
@ -137,7 +138,7 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
|||
// return false;
|
||||
//}
|
||||
if (!bits.eat(codelen)) {
|
||||
return;
|
||||
break;
|
||||
}
|
||||
const unsigned long dicno = r >> myEntryBits;
|
||||
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
|
||||
|
@ -151,10 +152,11 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
|||
memcpy(myTargetBufferPtr, slice, sliceSize);
|
||||
myTargetBufferPtr += sliceSize;
|
||||
} else {
|
||||
return;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
bitsDecompress(BitReader(slice, sliceSize), depth + 1);
|
||||
BitReader reader(slice, sliceSize);
|
||||
bitsDecompress(reader, depth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,6 +172,9 @@ size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, size_t size)
|
|||
}
|
||||
flags >>= 1;
|
||||
}
|
||||
if (myExtraFlags & 1) {
|
||||
num += (data[size - num - 1] & 0x3) + 1;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ public:
|
|||
private:
|
||||
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
|
||||
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
|
||||
void bitsDecompress(BitReader bits, size_t depth = 0);
|
||||
void bitsDecompress(BitReader &bits, size_t depth = 0);
|
||||
|
||||
private:
|
||||
unsigned long myEntryBits;
|
||||
|
|
|
@ -159,12 +159,12 @@ void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
|
|||
myIsActive = true;
|
||||
}
|
||||
|
||||
void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
|
||||
void MobipocketHtmlBookReader::TOCReader::endReadEntry(size_t level) {
|
||||
if (myIsActive && !myCurrentEntryText.empty()) {
|
||||
std::string converted;
|
||||
myReader.myConverter->convert(converted, myCurrentEntryText);
|
||||
myReader.myConverter->reset();
|
||||
myEntries[myCurrentReference] = converted;
|
||||
myEntries[myCurrentReference] = Entry(converted, level);
|
||||
myCurrentEntryText.erase();
|
||||
}
|
||||
myIsActive = false;
|
||||
|
@ -176,8 +176,14 @@ void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t le
|
|||
}
|
||||
}
|
||||
|
||||
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) {
|
||||
myEntries[position] = text;
|
||||
MobipocketHtmlBookReader::TOCReader::Entry::Entry() : Level(0) {
|
||||
}
|
||||
|
||||
MobipocketHtmlBookReader::TOCReader::Entry::Entry(const std::string &text, size_t level) : Text(text), Level(level) {
|
||||
}
|
||||
|
||||
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text, size_t level) {
|
||||
myEntries[position] = Entry(text, level);
|
||||
if (rangeContainsPosition(position)) {
|
||||
setEndOffset(position);
|
||||
}
|
||||
|
@ -185,7 +191,7 @@ void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const st
|
|||
|
||||
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
|
||||
myStartOffset = position;
|
||||
std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
|
||||
std::map<size_t,Entry>::const_iterator it = myEntries.lower_bound(position);
|
||||
if (it != myEntries.end()) {
|
||||
++it;
|
||||
if (it != myEntries.end()) {
|
||||
|
@ -198,7 +204,7 @@ void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
|
|||
myEndOffset = position;
|
||||
}
|
||||
|
||||
const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
|
||||
const std::map<size_t,MobipocketHtmlBookReader::TOCReader::Entry> &MobipocketHtmlBookReader::TOCReader::entries() const {
|
||||
return myEntries;
|
||||
}
|
||||
|
||||
|
@ -224,7 +230,7 @@ void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
reader.myTocReader.endReadEntry();
|
||||
reader.myTocReader.endReadEntry(reader.listStackDepth());
|
||||
}
|
||||
HtmlHrefTagAction::run(tag);
|
||||
}
|
||||
|
@ -264,7 +270,7 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
|
|||
if (!title.empty() && !filepos.empty()) {
|
||||
const int position = ZLStringUtil::parseDecimal(filepos, -1);
|
||||
if (position > 0) {
|
||||
reader.myTocReader.addReference(position, title);
|
||||
reader.myTocReader.addReference(position, title, reader.listStackDepth());
|
||||
if (isTocReference) {
|
||||
reader.myTocReader.setStartOffset(position);
|
||||
}
|
||||
|
@ -348,16 +354,23 @@ void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
|
|||
}
|
||||
|
||||
jt = myPositionToParagraphMap.begin();
|
||||
const std::map<size_t,std::string> &entries = myTocReader.entries();
|
||||
for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
|
||||
const std::map<size_t,TOCReader::Entry> &entries = myTocReader.entries();
|
||||
int level = 0;
|
||||
for (std::map<size_t,TOCReader::Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
|
||||
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
|
||||
++jt;
|
||||
}
|
||||
if (jt == myPositionToParagraphMap.end()) {
|
||||
break;
|
||||
}
|
||||
for (; level >= (int)it->second.Level; --level) {
|
||||
myBookReader.endContentsParagraph();
|
||||
}
|
||||
myBookReader.beginContentsParagraph(jt->second);
|
||||
myBookReader.addContentsData(it->second);
|
||||
myBookReader.addContentsData(it->second.Text);
|
||||
level = it->second.Level;
|
||||
}
|
||||
for (; level >= 0; --level) {
|
||||
myBookReader.endContentsParagraph();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,11 +39,20 @@ private:
|
|||
public:
|
||||
class TOCReader {
|
||||
|
||||
public:
|
||||
struct Entry {
|
||||
std::string Text;
|
||||
size_t Level;
|
||||
|
||||
Entry();
|
||||
Entry(const std::string &text, size_t level);
|
||||
};
|
||||
|
||||
public:
|
||||
TOCReader(MobipocketHtmlBookReader &reader);
|
||||
void reset();
|
||||
|
||||
void addReference(size_t position, const std::string &text);
|
||||
void addReference(size_t position, const std::string &text, size_t level);
|
||||
|
||||
void setStartOffset(size_t position);
|
||||
void setEndOffset(size_t position);
|
||||
|
@ -51,15 +60,15 @@ public:
|
|||
bool rangeContainsPosition(size_t position);
|
||||
|
||||
void startReadEntry(size_t position);
|
||||
void endReadEntry();
|
||||
void endReadEntry(size_t level);
|
||||
void appendText(const char *text, size_t len);
|
||||
|
||||
const std::map<size_t,std::string> &entries() const;
|
||||
const std::map<size_t,Entry> &entries() const;
|
||||
|
||||
private:
|
||||
MobipocketHtmlBookReader &myReader;
|
||||
|
||||
std::map<size_t,std::string> myEntries;
|
||||
std::map<size_t,Entry> myEntries;
|
||||
|
||||
bool myIsActive;
|
||||
size_t myStartOffset;
|
||||
|
|
|
@ -119,21 +119,19 @@ bool PalmDocStream::processZeroRecord() {
|
|||
unsigned long mobiHeaderLength;
|
||||
unsigned long huffSectionIndex;
|
||||
unsigned long huffSectionNumber;
|
||||
unsigned short extraFlags;
|
||||
unsigned long extraFlags = 0;
|
||||
unsigned long initialOffset = header().Offsets[0];
|
||||
|
||||
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
|
||||
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
|
||||
|
||||
myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70)
|
||||
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 106 (0x74)
|
||||
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 110 (0x78)
|
||||
myBase->seek(initialOffset + 112, true); // myBase offset: ^ + 112
|
||||
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 116
|
||||
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 120
|
||||
|
||||
if (mobiHeaderLength >= 244) {
|
||||
myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2)
|
||||
extraFlags = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 244 (0xF4)
|
||||
} else {
|
||||
extraFlags = 0;
|
||||
if (16 + mobiHeaderLength >= 244) {
|
||||
myBase->seek(initialOffset + 240, true); // myBase offset: ^ + 240
|
||||
extraFlags = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 244
|
||||
}
|
||||
/*
|
||||
std::cerr << "mobi header length: " << mobiHeaderLength << "\n";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue