mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 17:59:33 +02:00
ms-word doc plugin: now can read doc files with erorrs in formatting structures
This commit is contained in:
parent
52607ae0f1
commit
aed70a41ea
3 changed files with 60 additions and 23 deletions
|
@ -83,13 +83,21 @@ bool OleMainStream::open() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return readPieceTable(headerBuffer, tableEntry) &&
|
result = readPieceTable(headerBuffer, tableEntry);
|
||||||
readBookmarks(headerBuffer, tableEntry) &&
|
|
||||||
readStylesheet(headerBuffer, tableEntry) &&
|
|
||||||
//readSectionsInfoTable(headerBuffer, tableEntry) && //it doesn't uses now
|
|
||||||
readParagraphStyleTable(headerBuffer, tableEntry) &&
|
|
||||||
readCharInfoTable(headerBuffer, tableEntry);
|
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
ZLLogger::Instance().println("OleMainStream", "error during reading piece table");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//result of reading following structures doesn't check, because all these
|
||||||
|
//problems can be ignored, and document can be showed anyway, maybe with wrong formatting
|
||||||
|
readBookmarks(headerBuffer, tableEntry);
|
||||||
|
readStylesheet(headerBuffer, tableEntry);
|
||||||
|
//readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now
|
||||||
|
readParagraphStyleTable(headerBuffer, tableEntry);
|
||||||
|
readCharInfoTable(headerBuffer, tableEntry);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const OleMainStream::Pieces &OleMainStream::getPieces() const {
|
const OleMainStream::Pieces &OleMainStream::getPieces() const {
|
||||||
|
@ -491,7 +499,7 @@ bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &
|
||||||
}
|
}
|
||||||
|
|
||||||
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
||||||
for (size_t index = 0; index < size; ++index) {
|
for (size_t index = 0; index < charBlocks.size(); ++index) {
|
||||||
seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
||||||
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -540,15 +548,15 @@ bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleE
|
||||||
}
|
}
|
||||||
|
|
||||||
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
||||||
for (size_t index = 0; index < size; ++index) {
|
for (size_t index = 0; index < paragraphBlocks.size(); ++index) {
|
||||||
seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
||||||
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
unsigned int cpara = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with cpara (count of paragraphs)
|
unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs)
|
||||||
for (unsigned int index2 = 0; index2 < cpara; ++index2) {
|
for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) {
|
||||||
unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
|
unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
|
||||||
unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (cpara + 1) * 4 + index2 * 13) * 2;
|
unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2;
|
||||||
if (papxOffset <= 0) {
|
if (papxOffset <= 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -609,7 +617,7 @@ bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEnt
|
||||||
|
|
||||||
//reading the section properties
|
//reading the section properties
|
||||||
char tmpBuffer[2];
|
char tmpBuffer[2];
|
||||||
for (size_t index = 0; index < decriptorsCount; ++index) {
|
for (size_t index = 0; index < sectPage.size(); ++index) {
|
||||||
if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
|
if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
|
||||||
SectionInfo sectionInfo;
|
SectionInfo sectionInfo;
|
||||||
sectionInfo.charPos = charPos.at(index);
|
sectionInfo.charPos = charPos.at(index);
|
||||||
|
@ -617,14 +625,18 @@ bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEnt
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
//getting number of bytes to read
|
//getting number of bytes to read
|
||||||
seek(sectPage.at(index), true);
|
if (!seek(sectPage.at(index), true)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (read(tmpBuffer, 2) != 2) {
|
if (read(tmpBuffer, 2) != 2) {
|
||||||
return false;
|
continue;
|
||||||
}
|
}
|
||||||
size_t bytes = 2 + (size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
|
size_t bytes = 2 + (size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
|
||||||
|
|
||||||
|
if (!seek(sectPage.at(index), true)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
char *formatPageBuffer = new char[bytes];
|
char *formatPageBuffer = new char[bytes];
|
||||||
seek(sectPage.at(index), true);
|
|
||||||
if (read(formatPageBuffer, bytes) != bytes) {
|
if (read(formatPageBuffer, bytes) != bytes) {
|
||||||
delete formatPageBuffer;
|
delete formatPageBuffer;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -110,6 +110,12 @@ bool OleStorage::readBBD(char *oleBuf) {
|
||||||
char buffer[mySectorSize];
|
char buffer[mySectorSize];
|
||||||
unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
|
unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
|
||||||
|
|
||||||
|
if (myDIFAT.size() < bbdNumberBlocks) {
|
||||||
|
//TODO maybe add check on myDIFAT == bbdNumberBlocks
|
||||||
|
ZLLogger::Instance().println("OleStorage", "Wrong number of FAT blocks value");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
|
for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
|
||||||
int bbdSector = myDIFAT.at(i);
|
int bbdSector = myDIFAT.at(i);
|
||||||
if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
|
if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
|
||||||
|
@ -118,7 +124,7 @@ bool OleStorage::readBBD(char *oleBuf) {
|
||||||
}
|
}
|
||||||
myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
|
myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
|
||||||
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
||||||
ZLLogger::Instance().println("OleStorage", "Can't read BBD!");
|
ZLLogger::Instance().println("OleStorage", "Error during reading BBD!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
||||||
|
@ -140,13 +146,20 @@ bool OleStorage::readSBD(char *oleBuf) {
|
||||||
char buffer[mySectorSize];
|
char buffer[mySectorSize];
|
||||||
for (int i = 0; i < sbdCount; ++i) {
|
for (int i = 0; i < sbdCount; ++i) {
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
|
if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) {
|
||||||
|
ZLLogger::Instance().println("OleStorage", "error during parsing SBD");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
sbdCur = myBBD.at(sbdCur);
|
sbdCur = myBBD.at(sbdCur);
|
||||||
}
|
}
|
||||||
if (sbdCur <= 0) {
|
if (sbdCur <= 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
|
myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
|
||||||
myInputStream->read(buffer, mySectorSize);
|
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
||||||
|
ZLLogger::Instance().println("OleStorage", "reading error during parsing SBD");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
||||||
mySBD.push_back(OleUtil::get4Bytes(buffer, j));
|
mySBD.push_back(OleUtil::get4Bytes(buffer, j));
|
||||||
}
|
}
|
||||||
|
@ -165,7 +178,10 @@ bool OleStorage::readProperties(char *oleBuf) {
|
||||||
char buffer[mySectorSize];
|
char buffer[mySectorSize];
|
||||||
do {
|
do {
|
||||||
myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
|
myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
|
||||||
myInputStream->read(buffer, mySectorSize);
|
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
||||||
|
ZLLogger::Instance().println("OleStorage", "Error during reading properties");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
for (unsigned int j = 0; j < mySectorSize; j += 128) {
|
for (unsigned int j = 0; j < mySectorSize; j += 128) {
|
||||||
myProperties.push_back(std::string(buffer + j, 128));
|
myProperties.push_back(std::string(buffer + j, 128));
|
||||||
}
|
}
|
||||||
|
@ -212,6 +228,10 @@ bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
|
||||||
int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
|
int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
|
||||||
e.name.clear();
|
e.name.clear();
|
||||||
e.name.reserve(33); //max size of entry name
|
e.name.reserve(33); //max size of entry name
|
||||||
|
|
||||||
|
if ((unsigned int)nameLength >= property.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
for (int i = 0; i < nameLength; i+=2) {
|
for (int i = 0; i < nameLength; i+=2) {
|
||||||
char c = property.at(i);
|
char c = property.at(i);
|
||||||
if (c != 0) {
|
if (c != 0) {
|
||||||
|
|
|
@ -198,14 +198,19 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!stream.seek(piece.offset, true)) {
|
||||||
|
//TODO maybe in that case we should take next piece?
|
||||||
|
return false;
|
||||||
|
}
|
||||||
char *textBuffer = new char[piece.length];
|
char *textBuffer = new char[piece.length];
|
||||||
|
size_t readedBytes = stream.read(textBuffer, piece.length);
|
||||||
stream.seek(piece.offset, true);
|
if (readedBytes != (unsigned int)piece.length) {
|
||||||
stream.read(textBuffer, piece.length);
|
ZLLogger::Instance().println("OleStreamReader", "not all bytes has been readed from piece");
|
||||||
|
}
|
||||||
|
|
||||||
myBuffer.clear();
|
myBuffer.clear();
|
||||||
if (!piece.isANSI) {
|
if (!piece.isANSI) {
|
||||||
for (int i = 0; i < piece.length; i += 2) {
|
for (unsigned int i = 0; i < readedBytes; i += 2) {
|
||||||
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
||||||
myBuffer.push_back(ch);
|
myBuffer.push_back(ch);
|
||||||
}
|
}
|
||||||
|
@ -219,7 +224,7 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::string utf8String;
|
std::string utf8String;
|
||||||
myConverter->convert(utf8String, std::string(textBuffer, piece.length));
|
myConverter->convert(utf8String, std::string(textBuffer, readedBytes));
|
||||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||||
}
|
}
|
||||||
myCurBufferPosition = 0;
|
myCurBufferPosition = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue