mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 17:59:33 +02:00
multi-level ToC + fixed huffdic decompression issue
This commit is contained in:
parent
1c7c19e7f1
commit
bb49ef1460
9 changed files with 64 additions and 34 deletions
|
@ -614,3 +614,7 @@ void HtmlBookReader::endDocumentHandler() {
|
||||||
void HtmlBookReader::setFileName(const std::string fileName) {
|
void HtmlBookReader::setFileName(const std::string fileName) {
|
||||||
myFileName = fileName;
|
myFileName = fileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t HtmlBookReader::listStackDepth() const {
|
||||||
|
return myListNumStack.size();
|
||||||
|
}
|
||||||
|
|
|
@ -52,6 +52,7 @@ public:
|
||||||
~HtmlBookReader();
|
~HtmlBookReader();
|
||||||
void setFileName(const std::string fileName);
|
void setFileName(const std::string fileName);
|
||||||
shared_ptr<StyleSheetParser> createCSSParser();
|
shared_ptr<StyleSheetParser> createCSSParser();
|
||||||
|
size_t listStackDepth() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);
|
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);
|
||||||
|
|
|
@ -40,7 +40,7 @@ unsigned long long BitReader::peek(size_t n) {
|
||||||
size_t g = 0;
|
size_t g = 0;
|
||||||
while (g < n) {
|
while (g < n) {
|
||||||
r = (r << 8) | myData[(myOffset + g) >> 3];
|
r = (r << 8) | myData[(myOffset + g) >> 3];
|
||||||
g = g + 8 - ((myOffset+g) & 7);
|
g = g + 8 - ((myOffset + g) & 7);
|
||||||
}
|
}
|
||||||
unsigned long long mask = 1;
|
unsigned long long mask = 1;
|
||||||
mask = (mask << n) - 1;
|
mask = (mask << n) - 1;
|
||||||
|
|
|
@ -31,9 +31,9 @@ public:
|
||||||
size_t left() const;
|
size_t left() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
unsigned char* myData;
|
unsigned char *myData;
|
||||||
size_t myOffset;
|
size_t myOffset;
|
||||||
size_t myLength;
|
const size_t myLength;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__BITREADER_H__
|
#endif //__BITREADER_H__
|
||||||
|
|
|
@ -58,7 +58,7 @@ HuffDecompressor::HuffDecompressor(
|
||||||
myData = new unsigned char[huffDataSize];
|
myData = new unsigned char[huffDataSize];
|
||||||
stream.seek(huffDataOffset, true);
|
stream.seek(huffDataOffset, true);
|
||||||
if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
|
if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
|
||||||
myDicts = new unsigned char* [huffRecordsNumber - 1];
|
myDicts = new unsigned char*[huffRecordsNumber - 1];
|
||||||
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
|
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
|
||||||
size_t shift = *(beginIt + i + 1) - huffDataOffset;
|
size_t shift = *(beginIt + i + 1) - huffDataOffset;
|
||||||
myDicts[i] = myData + shift;
|
myDicts[i] = myData + shift;
|
||||||
|
@ -95,7 +95,8 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
|
||||||
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
|
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
|
||||||
const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
|
const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
|
||||||
if (trailSize < compressedSize) {
|
if (trailSize < compressedSize) {
|
||||||
bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize));
|
BitReader reader(sourceBuffer, compressedSize - trailSize);
|
||||||
|
bitsDecompress(reader);
|
||||||
} else {
|
} else {
|
||||||
myErrorCode = ERROR_CORRUPTED_FILE;
|
myErrorCode = ERROR_CORRUPTED_FILE;
|
||||||
}
|
}
|
||||||
|
@ -110,7 +111,7 @@ size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, s
|
||||||
return myTargetBufferPtr - myTargetBuffer;
|
return myTargetBufferPtr - myTargetBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
void HuffDecompressor::bitsDecompress(BitReader &bits, size_t depth) {
|
||||||
if (depth > 32) {
|
if (depth > 32) {
|
||||||
myErrorCode = ERROR_CORRUPTED_FILE;
|
myErrorCode = ERROR_CORRUPTED_FILE;
|
||||||
return;
|
return;
|
||||||
|
@ -137,7 +138,7 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
||||||
// return false;
|
// return false;
|
||||||
//}
|
//}
|
||||||
if (!bits.eat(codelen)) {
|
if (!bits.eat(codelen)) {
|
||||||
return;
|
break;
|
||||||
}
|
}
|
||||||
const unsigned long dicno = r >> myEntryBits;
|
const unsigned long dicno = r >> myEntryBits;
|
||||||
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
|
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
|
||||||
|
@ -151,10 +152,11 @@ void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
|
||||||
memcpy(myTargetBufferPtr, slice, sliceSize);
|
memcpy(myTargetBufferPtr, slice, sliceSize);
|
||||||
myTargetBufferPtr += sliceSize;
|
myTargetBufferPtr += sliceSize;
|
||||||
} else {
|
} else {
|
||||||
return;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bitsDecompress(BitReader(slice, sliceSize), depth + 1);
|
BitReader reader(slice, sliceSize);
|
||||||
|
bitsDecompress(reader, depth + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,6 +172,9 @@ size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, size_t size)
|
||||||
}
|
}
|
||||||
flags >>= 1;
|
flags >>= 1;
|
||||||
}
|
}
|
||||||
|
if (myExtraFlags & 1) {
|
||||||
|
num += (data[size - num - 1] & 0x3) + 1;
|
||||||
|
}
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ public:
|
||||||
private:
|
private:
|
||||||
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
|
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
|
||||||
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
|
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
|
||||||
void bitsDecompress(BitReader bits, size_t depth = 0);
|
void bitsDecompress(BitReader &bits, size_t depth = 0);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
unsigned long myEntryBits;
|
unsigned long myEntryBits;
|
||||||
|
|
|
@ -159,12 +159,12 @@ void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
|
||||||
myIsActive = true;
|
myIsActive = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
|
void MobipocketHtmlBookReader::TOCReader::endReadEntry(size_t level) {
|
||||||
if (myIsActive && !myCurrentEntryText.empty()) {
|
if (myIsActive && !myCurrentEntryText.empty()) {
|
||||||
std::string converted;
|
std::string converted;
|
||||||
myReader.myConverter->convert(converted, myCurrentEntryText);
|
myReader.myConverter->convert(converted, myCurrentEntryText);
|
||||||
myReader.myConverter->reset();
|
myReader.myConverter->reset();
|
||||||
myEntries[myCurrentReference] = converted;
|
myEntries[myCurrentReference] = Entry(converted, level);
|
||||||
myCurrentEntryText.erase();
|
myCurrentEntryText.erase();
|
||||||
}
|
}
|
||||||
myIsActive = false;
|
myIsActive = false;
|
||||||
|
@ -176,8 +176,14 @@ void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t le
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) {
|
MobipocketHtmlBookReader::TOCReader::Entry::Entry() : Level(0) {
|
||||||
myEntries[position] = text;
|
}
|
||||||
|
|
||||||
|
MobipocketHtmlBookReader::TOCReader::Entry::Entry(const std::string &text, size_t level) : Text(text), Level(level) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text, size_t level) {
|
||||||
|
myEntries[position] = Entry(text, level);
|
||||||
if (rangeContainsPosition(position)) {
|
if (rangeContainsPosition(position)) {
|
||||||
setEndOffset(position);
|
setEndOffset(position);
|
||||||
}
|
}
|
||||||
|
@ -185,7 +191,7 @@ void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const st
|
||||||
|
|
||||||
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
|
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
|
||||||
myStartOffset = position;
|
myStartOffset = position;
|
||||||
std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
|
std::map<size_t,Entry>::const_iterator it = myEntries.lower_bound(position);
|
||||||
if (it != myEntries.end()) {
|
if (it != myEntries.end()) {
|
||||||
++it;
|
++it;
|
||||||
if (it != myEntries.end()) {
|
if (it != myEntries.end()) {
|
||||||
|
@ -198,7 +204,7 @@ void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
|
||||||
myEndOffset = position;
|
myEndOffset = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
|
const std::map<size_t,MobipocketHtmlBookReader::TOCReader::Entry> &MobipocketHtmlBookReader::TOCReader::entries() const {
|
||||||
return myEntries;
|
return myEntries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,7 +230,7 @@ void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
reader.myTocReader.endReadEntry();
|
reader.myTocReader.endReadEntry(reader.listStackDepth());
|
||||||
}
|
}
|
||||||
HtmlHrefTagAction::run(tag);
|
HtmlHrefTagAction::run(tag);
|
||||||
}
|
}
|
||||||
|
@ -264,7 +270,7 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
|
||||||
if (!title.empty() && !filepos.empty()) {
|
if (!title.empty() && !filepos.empty()) {
|
||||||
const int position = ZLStringUtil::parseDecimal(filepos, -1);
|
const int position = ZLStringUtil::parseDecimal(filepos, -1);
|
||||||
if (position > 0) {
|
if (position > 0) {
|
||||||
reader.myTocReader.addReference(position, title);
|
reader.myTocReader.addReference(position, title, reader.listStackDepth());
|
||||||
if (isTocReference) {
|
if (isTocReference) {
|
||||||
reader.myTocReader.setStartOffset(position);
|
reader.myTocReader.setStartOffset(position);
|
||||||
}
|
}
|
||||||
|
@ -348,16 +354,23 @@ void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
|
||||||
}
|
}
|
||||||
|
|
||||||
jt = myPositionToParagraphMap.begin();
|
jt = myPositionToParagraphMap.begin();
|
||||||
const std::map<size_t,std::string> &entries = myTocReader.entries();
|
const std::map<size_t,TOCReader::Entry> &entries = myTocReader.entries();
|
||||||
for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
|
int level = 0;
|
||||||
|
for (std::map<size_t,TOCReader::Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
|
||||||
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
|
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
|
||||||
++jt;
|
++jt;
|
||||||
}
|
}
|
||||||
if (jt == myPositionToParagraphMap.end()) {
|
if (jt == myPositionToParagraphMap.end()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
for (; level >= (int)it->second.Level; --level) {
|
||||||
|
myBookReader.endContentsParagraph();
|
||||||
|
}
|
||||||
myBookReader.beginContentsParagraph(jt->second);
|
myBookReader.beginContentsParagraph(jt->second);
|
||||||
myBookReader.addContentsData(it->second);
|
myBookReader.addContentsData(it->second.Text);
|
||||||
|
level = it->second.Level;
|
||||||
|
}
|
||||||
|
for (; level >= 0; --level) {
|
||||||
myBookReader.endContentsParagraph();
|
myBookReader.endContentsParagraph();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,11 +39,20 @@ private:
|
||||||
public:
|
public:
|
||||||
class TOCReader {
|
class TOCReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct Entry {
|
||||||
|
std::string Text;
|
||||||
|
size_t Level;
|
||||||
|
|
||||||
|
Entry();
|
||||||
|
Entry(const std::string &text, size_t level);
|
||||||
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TOCReader(MobipocketHtmlBookReader &reader);
|
TOCReader(MobipocketHtmlBookReader &reader);
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
void addReference(size_t position, const std::string &text);
|
void addReference(size_t position, const std::string &text, size_t level);
|
||||||
|
|
||||||
void setStartOffset(size_t position);
|
void setStartOffset(size_t position);
|
||||||
void setEndOffset(size_t position);
|
void setEndOffset(size_t position);
|
||||||
|
@ -51,15 +60,15 @@ public:
|
||||||
bool rangeContainsPosition(size_t position);
|
bool rangeContainsPosition(size_t position);
|
||||||
|
|
||||||
void startReadEntry(size_t position);
|
void startReadEntry(size_t position);
|
||||||
void endReadEntry();
|
void endReadEntry(size_t level);
|
||||||
void appendText(const char *text, size_t len);
|
void appendText(const char *text, size_t len);
|
||||||
|
|
||||||
const std::map<size_t,std::string> &entries() const;
|
const std::map<size_t,Entry> &entries() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MobipocketHtmlBookReader &myReader;
|
MobipocketHtmlBookReader &myReader;
|
||||||
|
|
||||||
std::map<size_t,std::string> myEntries;
|
std::map<size_t,Entry> myEntries;
|
||||||
|
|
||||||
bool myIsActive;
|
bool myIsActive;
|
||||||
size_t myStartOffset;
|
size_t myStartOffset;
|
||||||
|
|
|
@ -119,21 +119,19 @@ bool PalmDocStream::processZeroRecord() {
|
||||||
unsigned long mobiHeaderLength;
|
unsigned long mobiHeaderLength;
|
||||||
unsigned long huffSectionIndex;
|
unsigned long huffSectionIndex;
|
||||||
unsigned long huffSectionNumber;
|
unsigned long huffSectionNumber;
|
||||||
unsigned short extraFlags;
|
unsigned long extraFlags = 0;
|
||||||
unsigned long initialOffset = header().Offsets[0];
|
unsigned long initialOffset = header().Offsets[0];
|
||||||
|
|
||||||
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
|
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
|
||||||
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
|
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
|
||||||
|
|
||||||
myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70)
|
myBase->seek(initialOffset + 112, true); // myBase offset: ^ + 112
|
||||||
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 106 (0x74)
|
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 116
|
||||||
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 110 (0x78)
|
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 120
|
||||||
|
|
||||||
if (mobiHeaderLength >= 244) {
|
if (16 + mobiHeaderLength >= 244) {
|
||||||
myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2)
|
myBase->seek(initialOffset + 240, true); // myBase offset: ^ + 240
|
||||||
extraFlags = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 244 (0xF4)
|
extraFlags = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 244
|
||||||
} else {
|
|
||||||
extraFlags = 0;
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
std::cerr << "mobi header length: " << mobiHeaderLength << "\n";
|
std::cerr << "mobi header length: " << mobiHeaderLength << "\n";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue