1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-03 17:59:33 +02:00

ms-word doc plugin: now can read doc files with erorrs in formatting structures

This commit is contained in:
Alexander Turkin 2012-06-15 17:32:29 +04:00
parent 52607ae0f1
commit aed70a41ea
3 changed files with 60 additions and 23 deletions

View file

@ -83,13 +83,21 @@ bool OleMainStream::open() {
return true; return true;
} }
return readPieceTable(headerBuffer, tableEntry) && result = readPieceTable(headerBuffer, tableEntry);
readBookmarks(headerBuffer, tableEntry) &&
readStylesheet(headerBuffer, tableEntry) &&
//readSectionsInfoTable(headerBuffer, tableEntry) && //it doesn't uses now
readParagraphStyleTable(headerBuffer, tableEntry) &&
readCharInfoTable(headerBuffer, tableEntry);
if (!result) {
ZLLogger::Instance().println("OleMainStream", "error during reading piece table");
return false;
}
//result of reading following structures doesn't check, because all these
//problems can be ignored, and document can be showed anyway, maybe with wrong formatting
readBookmarks(headerBuffer, tableEntry);
readStylesheet(headerBuffer, tableEntry);
//readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now
readParagraphStyleTable(headerBuffer, tableEntry);
readCharInfoTable(headerBuffer, tableEntry);
return true;
} }
const OleMainStream::Pieces &OleMainStream::getPieces() const { const OleMainStream::Pieces &OleMainStream::getPieces() const {
@ -491,7 +499,7 @@ bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &
} }
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
for (size_t index = 0; index < size; ++index) { for (size_t index = 0; index < charBlocks.size(); ++index) {
seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
return false; return false;
@ -540,15 +548,15 @@ bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleE
} }
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
for (size_t index = 0; index < size; ++index) { for (size_t index = 0; index < paragraphBlocks.size(); ++index) {
seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
return false; return false;
} }
unsigned int cpara = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with cpara (count of paragraphs) unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs)
for (unsigned int index2 = 0; index2 < cpara; ++index2) { for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) {
unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4); unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (cpara + 1) * 4 + index2 * 13) * 2; unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2;
if (papxOffset <= 0) { if (papxOffset <= 0) {
continue; continue;
} }
@ -609,7 +617,7 @@ bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEnt
//reading the section properties //reading the section properties
char tmpBuffer[2]; char tmpBuffer[2];
for (size_t index = 0; index < decriptorsCount; ++index) { for (size_t index = 0; index < sectPage.size(); ++index) {
if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
SectionInfo sectionInfo; SectionInfo sectionInfo;
sectionInfo.charPos = charPos.at(index); sectionInfo.charPos = charPos.at(index);
@ -617,14 +625,18 @@ bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEnt
continue; continue;
} }
//getting number of bytes to read //getting number of bytes to read
seek(sectPage.at(index), true); if (!seek(sectPage.at(index), true)) {
continue;
}
if (read(tmpBuffer, 2) != 2) { if (read(tmpBuffer, 2) != 2) {
return false; continue;
} }
size_t bytes = 2 + (size_t)OleUtil::getU2Bytes(tmpBuffer, 0); size_t bytes = 2 + (size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
if (!seek(sectPage.at(index), true)) {
continue;
}
char *formatPageBuffer = new char[bytes]; char *formatPageBuffer = new char[bytes];
seek(sectPage.at(index), true);
if (read(formatPageBuffer, bytes) != bytes) { if (read(formatPageBuffer, bytes) != bytes) {
delete formatPageBuffer; delete formatPageBuffer;
continue; continue;

View file

@ -110,6 +110,12 @@ bool OleStorage::readBBD(char *oleBuf) {
char buffer[mySectorSize]; char buffer[mySectorSize];
unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
if (myDIFAT.size() < bbdNumberBlocks) {
//TODO maybe add check on myDIFAT == bbdNumberBlocks
ZLLogger::Instance().println("OleStorage", "Wrong number of FAT blocks value");
return false;
}
for (unsigned int i = 0; i < bbdNumberBlocks; ++i) { for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
int bbdSector = myDIFAT.at(i); int bbdSector = myDIFAT.at(i);
if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) { if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
@ -118,7 +124,7 @@ bool OleStorage::readBBD(char *oleBuf) {
} }
myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true); myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) { if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
ZLLogger::Instance().println("OleStorage", "Can't read BBD!"); ZLLogger::Instance().println("OleStorage", "Error during reading BBD!");
return false; return false;
} }
for (unsigned int j = 0; j < mySectorSize; j += 4) { for (unsigned int j = 0; j < mySectorSize; j += 4) {
@ -140,13 +146,20 @@ bool OleStorage::readSBD(char *oleBuf) {
char buffer[mySectorSize]; char buffer[mySectorSize];
for (int i = 0; i < sbdCount; ++i) { for (int i = 0; i < sbdCount; ++i) {
if (i != 0) { if (i != 0) {
if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) {
ZLLogger::Instance().println("OleStorage", "error during parsing SBD");
return false;
}
sbdCur = myBBD.at(sbdCur); sbdCur = myBBD.at(sbdCur);
} }
if (sbdCur <= 0) { if (sbdCur <= 0) {
break; break;
} }
myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true); myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
myInputStream->read(buffer, mySectorSize); if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
ZLLogger::Instance().println("OleStorage", "reading error during parsing SBD");
return false;
}
for (unsigned int j = 0; j < mySectorSize; j += 4) { for (unsigned int j = 0; j < mySectorSize; j += 4) {
mySBD.push_back(OleUtil::get4Bytes(buffer, j)); mySBD.push_back(OleUtil::get4Bytes(buffer, j));
} }
@ -165,7 +178,10 @@ bool OleStorage::readProperties(char *oleBuf) {
char buffer[mySectorSize]; char buffer[mySectorSize];
do { do {
myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true); myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
myInputStream->read(buffer, mySectorSize); if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
ZLLogger::Instance().println("OleStorage", "Error during reading properties");
return false;
}
for (unsigned int j = 0; j < mySectorSize; j += 128) { for (unsigned int j = 0; j < mySectorSize; j += 128) {
myProperties.push_back(std::string(buffer + j, 128)); myProperties.push_back(std::string(buffer + j, 128));
} }
@ -212,6 +228,10 @@ bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
e.name.clear(); e.name.clear();
e.name.reserve(33); //max size of entry name e.name.reserve(33); //max size of entry name
if ((unsigned int)nameLength >= property.size()) {
return false;
}
for (int i = 0; i < nameLength; i+=2) { for (int i = 0; i < nameLength; i+=2) {
char c = property.at(i); char c = property.at(i);
if (c != 0) { if (c != 0) {

View file

@ -198,14 +198,19 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
return false; return false;
} }
if (!stream.seek(piece.offset, true)) {
//TODO maybe in that case we should take next piece?
return false;
}
char *textBuffer = new char[piece.length]; char *textBuffer = new char[piece.length];
size_t readedBytes = stream.read(textBuffer, piece.length);
stream.seek(piece.offset, true); if (readedBytes != (unsigned int)piece.length) {
stream.read(textBuffer, piece.length); ZLLogger::Instance().println("OleStreamReader", "not all bytes has been readed from piece");
}
myBuffer.clear(); myBuffer.clear();
if (!piece.isANSI) { if (!piece.isANSI) {
for (int i = 0; i < piece.length; i += 2) { for (unsigned int i = 0; i < readedBytes; i += 2) {
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i); ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
myBuffer.push_back(ch); myBuffer.push_back(ch);
} }
@ -219,7 +224,7 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
} }
} }
std::string utf8String; std::string utf8String;
myConverter->convert(utf8String, std::string(textBuffer, piece.length)); myConverter->convert(utf8String, std::string(textBuffer, readedBytes));
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
} }
myCurBufferPosition = 0; myCurBufferPosition = 0;