mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-05 02:39:23 +02:00
first version of ms-word doc plugin has been added
This commit is contained in:
parent
899528c2be
commit
52607ae0f1
19 changed files with 2686 additions and 0 deletions
|
@ -125,6 +125,14 @@ LOCAL_SRC_FILES := \
|
|||
NativeFormats/fbreader/src/formats/util/MiscUtil.cpp \
|
||||
NativeFormats/fbreader/src/formats/util/XMLTextStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/xhtml/XHTMLReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \
|
||||
NativeFormats/fbreader/src/library/Author.cpp \
|
||||
NativeFormats/fbreader/src/library/Book.cpp \
|
||||
NativeFormats/fbreader/src/library/Comparators.cpp \
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
//#include "chm/CHMPlugin.h"
|
||||
#include "rtf/RtfPlugin.h"
|
||||
//#include "openreader/OpenReaderPlugin.h"
|
||||
#include "doc/DocPlugin.h"
|
||||
|
||||
PluginCollection *PluginCollection::ourInstance = 0;
|
||||
|
||||
|
@ -54,6 +55,7 @@ PluginCollection &PluginCollection::Instance() {
|
|||
// ourInstance->myPlugins.push_back(new CHMPlugin());
|
||||
ourInstance->myPlugins.push_back(new OEBPlugin());
|
||||
ourInstance->myPlugins.push_back(new RtfPlugin());
|
||||
ourInstance->myPlugins.push_back(new DocPlugin());
|
||||
// ourInstance->myPlugins.push_back(new OpenReaderPlugin());
|
||||
}
|
||||
return *ourInstance;
|
||||
|
|
357
jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp
Normal file
357
jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp
Normal file
|
@ -0,0 +1,357 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ZLInputStream.h>
|
||||
#include <ZLLogger.h>
|
||||
#include <ZLFile.h>
|
||||
#include <ZLStringUtil.h>
|
||||
|
||||
#include "DocBookReader.h"
|
||||
#include "../../bookmodel/BookModel.h"
|
||||
#include "../../library/Book.h"
|
||||
|
||||
#include "OleStorage.h"
|
||||
#include "OleMainStream.h"
|
||||
|
||||
DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
|
||||
OleStreamReader(encoding),
|
||||
myModelReader(model) {
|
||||
myReadState = READ_TEXT;
|
||||
}
|
||||
|
||||
bool DocBookReader::readBook() {
|
||||
const ZLFile &file = myModelReader.model().book()->file();
|
||||
shared_ptr<ZLInputStream> stream = file.inputStream();
|
||||
if (stream.isNull()) {
|
||||
return false;
|
||||
}
|
||||
return readDocument(stream, file.size());
|
||||
}
|
||||
|
||||
bool DocBookReader::readDocument(shared_ptr<ZLInputStream> inputStream, size_t streamSize) {
|
||||
static const std::string WORD_DOCUMENT = "WordDocument";
|
||||
|
||||
if (inputStream.isNull() || !inputStream->open()) {
|
||||
return false;
|
||||
}
|
||||
myModelReader.setMainTextModel();
|
||||
myModelReader.pushKind(REGULAR);
|
||||
myModelReader.beginParagraph();
|
||||
|
||||
shared_ptr<OleStorage> storage = new OleStorage;
|
||||
|
||||
if (!storage->init(inputStream, streamSize)) {
|
||||
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
OleEntry wordDocumentEntry;
|
||||
bool result = storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry);
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
||||
result = readStream(oleStream);
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
myModelReader.insertEndOfTextParagraph();
|
||||
return true;
|
||||
}
|
||||
|
||||
void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||
if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) {
|
||||
myFieldInfoBuffer.push_back(ucs2char);
|
||||
return;
|
||||
}
|
||||
if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) {
|
||||
return;
|
||||
}
|
||||
if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) {
|
||||
//to remove pagination from TOC (from doc saved in OpenOffice)
|
||||
myReadFieldState = DONT_READ_FIELD_TEXT;
|
||||
return;
|
||||
}
|
||||
std::string utf8String;
|
||||
ZLUnicodeUtil::Ucs2String ucs2String;
|
||||
ucs2String.push_back(ucs2char);
|
||||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
|
||||
if (!myModelReader.paragraphIsOpen()) {
|
||||
myModelReader.beginParagraph();
|
||||
}
|
||||
myModelReader.addData(utf8String);
|
||||
}
|
||||
|
||||
void DocBookReader::handleHardLinebreak() {
|
||||
if (myModelReader.paragraphIsOpen()) {
|
||||
myModelReader.endParagraph();
|
||||
}
|
||||
myModelReader.beginParagraph();
|
||||
if (!myCurStyleEntry.isNull()) {
|
||||
myModelReader.addStyleEntry(*myCurStyleEntry);
|
||||
}
|
||||
for (size_t i = 0; i < myKindStack.size(); ++i) {
|
||||
myModelReader.addControl(myKindStack.at(i), true);
|
||||
}
|
||||
}
|
||||
|
||||
void DocBookReader::handleParagraphEnd() {
|
||||
if (myModelReader.paragraphIsOpen()) {
|
||||
myModelReader.endParagraph();
|
||||
}
|
||||
myModelReader.beginParagraph();
|
||||
myCurStyleEntry = 0;
|
||||
}
|
||||
|
||||
void DocBookReader::handlePageBreak() {
|
||||
if (myModelReader.paragraphIsOpen()) {
|
||||
myModelReader.endParagraph();
|
||||
}
|
||||
myCurStyleEntry = 0;
|
||||
myModelReader.insertEndOfSectionParagraph();
|
||||
myModelReader.beginParagraph();
|
||||
}
|
||||
|
||||
void DocBookReader::handleTableSeparator() {
|
||||
handleChar(SPACE);
|
||||
handleChar(VERTICAL_LINE);
|
||||
handleChar(SPACE);
|
||||
}
|
||||
|
||||
void DocBookReader::handleTableEndRow() {
|
||||
handleParagraphEnd();
|
||||
}
|
||||
|
||||
void DocBookReader::handleFootNoteMark() {
|
||||
//TODO implement
|
||||
}
|
||||
|
||||
void DocBookReader::handleStartField() {
|
||||
if (myReadState == READ_FIELD) { //for nested fields
|
||||
handleEndField();
|
||||
}
|
||||
myReadState = READ_FIELD;
|
||||
myReadFieldState = READ_FIELD_INFO;
|
||||
myHyperlinkTypeState = NO_HYPERLINK;
|
||||
}
|
||||
|
||||
void DocBookReader::handleSeparatorField() {
|
||||
static const std::string HYPERLINK = "HYPERLINK";
|
||||
// static const std::string PAGE = "PAGE";
|
||||
// static const std::string PAGEREF = "PAGEREF";
|
||||
// static const std::string SHAPE = "SHAPE";
|
||||
static const std::string SPACE_DELIMETER = " ";
|
||||
static const std::string LOCAL_LINK = "\\l";
|
||||
static const std::string QUOTE = "\"";
|
||||
myReadFieldState = READ_FIELD_TEXT;
|
||||
myHyperlinkTypeState = NO_HYPERLINK;
|
||||
ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer;
|
||||
myFieldInfoBuffer.clear();
|
||||
std::string utf8String;
|
||||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer);
|
||||
ZLStringUtil::stripWhiteSpaces(utf8String);
|
||||
if (utf8String.empty()) {
|
||||
return;
|
||||
}
|
||||
std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER);
|
||||
//TODO split function can returns empty string, maybe fix it
|
||||
std::vector<std::string> splitted;
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
if (!result.at(i).empty()) {
|
||||
splitted.push_back(result.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) {
|
||||
myReadFieldState = DONT_READ_FIELD_TEXT;
|
||||
//to remove pagination from TOC and not hyperlink fields
|
||||
return;
|
||||
}
|
||||
|
||||
if (splitted.at(1) == LOCAL_LINK) {
|
||||
std::string link = parseLink(buffer);
|
||||
if (!link.empty()) {
|
||||
myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link);
|
||||
myHyperlinkTypeState = INT_HYPERLINK_INSERTED;
|
||||
}
|
||||
} else {
|
||||
std::string link = parseLink(buffer, true);
|
||||
if (!link.empty()) {
|
||||
myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link);
|
||||
myHyperlinkTypeState = EXT_HYPERLINK_INSERTED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DocBookReader::handleEndField() {
|
||||
myFieldInfoBuffer.clear();
|
||||
if (myReadState == READ_TEXT) {
|
||||
return;
|
||||
}
|
||||
if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) {
|
||||
myModelReader.addControl(EXTERNAL_HYPERLINK, false);
|
||||
} else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) {
|
||||
myModelReader.addControl(INTERNAL_HYPERLINK, false);
|
||||
}
|
||||
myReadState = READ_TEXT;
|
||||
myHyperlinkTypeState = NO_HYPERLINK;
|
||||
|
||||
}
|
||||
|
||||
void DocBookReader::handleStartOfHeading() {
|
||||
//heading can be, for example, a picture
|
||||
//TODO implement
|
||||
}
|
||||
|
||||
void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||
if (ucs2char == WORD_SHORT_DEFIS) {
|
||||
handleChar(SHORT_DEFIS);
|
||||
} else if (ucs2char == WORD_SOFT_HYPHEN) {
|
||||
//skip
|
||||
} else if (ucs2char == WORD_HORIZONTAL_TAB) {
|
||||
handleChar(ucs2char);
|
||||
} else {
|
||||
// myTextBuffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void DocBookReader::handleFontStyle(unsigned int fontStyle) {
|
||||
if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) {
|
||||
//to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink
|
||||
return;
|
||||
}
|
||||
while (!myKindStack.empty()) {
|
||||
myModelReader.addControl(myKindStack.back(), false);
|
||||
myKindStack.pop_back();
|
||||
}
|
||||
if (fontStyle & OleMainStream::CharInfo::BOLD) {
|
||||
myKindStack.push_back(BOLD);
|
||||
}
|
||||
if (fontStyle & OleMainStream::CharInfo::ITALIC) {
|
||||
myKindStack.push_back(ITALIC);
|
||||
}
|
||||
for (size_t i = 0; i < myKindStack.size(); ++i) {
|
||||
myModelReader.addControl(myKindStack.at(i), true);
|
||||
}
|
||||
}
|
||||
|
||||
void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
|
||||
if (styleInfo.hasPageBreakBefore) {
|
||||
handlePageBreak();
|
||||
}
|
||||
shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry();
|
||||
|
||||
if (styleInfo.alignment == OleMainStream::Style::LEFT) {
|
||||
entry->setAlignmentType(ALIGN_JUSTIFY); //force justify align
|
||||
} else if (styleInfo.alignment == OleMainStream::Style::CENTER) {
|
||||
entry->setAlignmentType(ALIGN_CENTER);
|
||||
} else if (styleInfo.alignment == OleMainStream::Style::RIGHT) {
|
||||
entry->setAlignmentType(ALIGN_RIGHT);
|
||||
} else if (styleInfo.alignment == OleMainStream::Style::JUSTIFY) {
|
||||
entry->setAlignmentType(ALIGN_JUSTIFY);
|
||||
}
|
||||
|
||||
//TODO in case, where style is heading, but size is small it works wrong
|
||||
ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
|
||||
if (styleInfo.istd == OleMainStream::H1) {
|
||||
entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit);
|
||||
} else if (styleInfo.istd == OleMainStream::H2) {
|
||||
entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit);
|
||||
} else if (styleInfo.istd == OleMainStream::H3) {
|
||||
entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit);
|
||||
}
|
||||
|
||||
myCurStyleEntry = entry;
|
||||
myModelReader.addStyleEntry(*myCurStyleEntry);
|
||||
|
||||
//we should have the same font style, as for the previous paragraph, if it has the same istd
|
||||
if (myCurStyleInfo.istd != OleMainStream::ISTD_INVALID && myCurStyleInfo.istd == styleInfo.istd) {
|
||||
for (size_t i = 0; i < myKindStack.size(); ++i) {
|
||||
myModelReader.addControl(myKindStack.at(i), true);
|
||||
}
|
||||
} else {
|
||||
myKindStack.clear();
|
||||
handleFontStyle(styleInfo.charInfo.fontStyle); //fill by the fontstyle, that was got from Stylesheet
|
||||
}
|
||||
myCurStyleInfo = styleInfo;
|
||||
}
|
||||
|
||||
void DocBookReader::handleBookmark(const std::string &name) {
|
||||
myModelReader.addHyperlinkLabel(name);
|
||||
}
|
||||
|
||||
std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) {
|
||||
//TODO add support for HYPERLINK like that:
|
||||
// [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15]
|
||||
//Current implementation search for last QUOTE, so, it reads \t and _blank as part of link
|
||||
//Last quote searching is need to handle link like that:
|
||||
// [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15]
|
||||
|
||||
static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22;
|
||||
size_t i, first = 0;
|
||||
//TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class
|
||||
for (i = 0; i < s.size(); ++i) {
|
||||
if (s.at(i) == QUOTE) {
|
||||
first = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == s.size()) {
|
||||
return std::string();
|
||||
}
|
||||
size_t j, last = 0;
|
||||
for (j = s.size(); j > 0 ; --j) {
|
||||
if (s.at(j - 1) == QUOTE) {
|
||||
last = j - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == 0 || last == first) {
|
||||
return std::string();
|
||||
}
|
||||
|
||||
ZLUnicodeUtil::Ucs2String link;
|
||||
for (size_t k = first + 1; k < last; ++k) {
|
||||
ZLUnicodeUtil::Ucs2Char ch = s.at(k);
|
||||
if (urlencode && ZLUnicodeUtil::isSpace(ch)) {
|
||||
//TODO maybe implement function for encoding all signs in url, not only spaces and quotes
|
||||
//TODO maybe add backslash support
|
||||
link.push_back('%');
|
||||
link.push_back('2');
|
||||
link.push_back('0');
|
||||
} else if (urlencode && ch == QUOTE) {
|
||||
link.push_back('%');
|
||||
link.push_back('2');
|
||||
link.push_back('2');
|
||||
} else {
|
||||
link.push_back(ch);
|
||||
}
|
||||
}
|
||||
std::string utf8String;
|
||||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
||||
return utf8String;
|
||||
}
|
||||
|
96
jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h
Normal file
96
jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h
Normal file
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DOCBOOKREADER_H__
|
||||
#define __DOCBOOKREADER_H__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <shared_ptr.h>
|
||||
#include <ZLFile.h>
|
||||
#include <ZLTextStyleEntry.h>
|
||||
|
||||
#include "../../bookmodel/BookReader.h"
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
class DocBookReader : public OleStreamReader {
|
||||
|
||||
public:
|
||||
DocBookReader(BookModel &model, const std::string &encoding);
|
||||
~DocBookReader();
|
||||
bool readBook();
|
||||
|
||||
private:
|
||||
bool readDocument(shared_ptr<ZLInputStream> stream, size_t streamSize);
|
||||
|
||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||
void handleHardLinebreak();
|
||||
void handleParagraphEnd();
|
||||
void handlePageBreak();
|
||||
void handleTableSeparator();
|
||||
void handleTableEndRow();
|
||||
void handleFootNoteMark();
|
||||
void handleStartField();
|
||||
void handleSeparatorField();
|
||||
void handleEndField();
|
||||
void handleStartOfHeading();
|
||||
void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||
|
||||
//formatting:
|
||||
void handleFontStyle(unsigned int fontStyle);
|
||||
void handleParagraphStyle(const OleMainStream::Style &styleInfo);
|
||||
void handleBookmark(const std::string &name);
|
||||
|
||||
private:
|
||||
static std::string parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode = false);
|
||||
|
||||
private:
|
||||
BookReader myModelReader;
|
||||
|
||||
ZLUnicodeUtil::Ucs2String myFieldInfoBuffer;
|
||||
|
||||
enum {
|
||||
READ_FIELD,
|
||||
READ_TEXT
|
||||
} myReadState;
|
||||
|
||||
enum {
|
||||
READ_FIELD_TEXT,
|
||||
DONT_READ_FIELD_TEXT,
|
||||
READ_FIELD_INFO
|
||||
} myReadFieldState;
|
||||
|
||||
//maybe it should be flag?
|
||||
enum {
|
||||
NO_HYPERLINK,
|
||||
EXT_HYPERLINK_INSERTED,
|
||||
INT_HYPERLINK_INSERTED
|
||||
} myHyperlinkTypeState;
|
||||
|
||||
//formatting
|
||||
std::vector<FBTextKind> myKindStack;
|
||||
shared_ptr<ZLTextStyleEntry> myCurStyleEntry;
|
||||
OleMainStream::Style myCurStyleInfo;
|
||||
};
|
||||
|
||||
inline DocBookReader::~DocBookReader() {}
|
||||
|
||||
#endif /* __DOCBOOKREADER_H__ */
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <ZLInputStream.h>
|
||||
|
||||
#include "../../library/Book.h"
|
||||
|
||||
#include "DocMetaInfoReader.h"
|
||||
|
||||
DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) {
|
||||
myBook.removeAllAuthors();
|
||||
myBook.setTitle(std::string());
|
||||
myBook.setLanguage(std::string());
|
||||
myBook.removeAllTags();
|
||||
}
|
||||
|
||||
/*
|
||||
void DocMetaInfoReader::characterDataHandler(const char *text, size_t len) {
|
||||
}
|
||||
|
||||
void DocMetaInfoReader::startElementHandler(int tag, const char **) {
|
||||
}
|
||||
|
||||
void DocMetaInfoReader::endElementHandler(int tag) {
|
||||
}
|
||||
*/
|
||||
|
||||
bool DocMetaInfoReader::readMetaInfo() {
|
||||
myBook.removeAllAuthors();
|
||||
myBook.setTitle(myBook.file().name(true));
|
||||
myBook.setEncoding("windows-1251"); //TODO implement encoding retrieving
|
||||
myBook.removeAllTags();
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DOCMETAINFOREADER_H__
|
||||
#define __DOCMETAINFOREADER_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
class Book;
|
||||
|
||||
class DocMetaInfoReader {
|
||||
|
||||
public:
|
||||
DocMetaInfoReader(Book &book);
|
||||
~DocMetaInfoReader();
|
||||
bool readMetaInfo();
|
||||
|
||||
/*
|
||||
void startElementHandler(int tag, const char **attributes);
|
||||
void endElementHandler(int tag);
|
||||
void characterDataHandler(const char *text, size_t len);
|
||||
*/
|
||||
|
||||
private:
|
||||
Book &myBook;
|
||||
};
|
||||
|
||||
inline DocMetaInfoReader::~DocMetaInfoReader() {}
|
||||
|
||||
#endif /* __DOCMETAINFOREADER_H__ */
|
61
jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp
Normal file
61
jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <ZLFile.h>
|
||||
#include <ZLInputStream.h>
|
||||
#include <ZLLogger.h>
|
||||
#include <ZLImage.h>
|
||||
|
||||
#include "DocPlugin.h"
|
||||
#include "DocMetaInfoReader.h"
|
||||
#include "DocBookReader.h"
|
||||
#include "../../bookmodel/BookModel.h"
|
||||
#include "../../library/Book.h"
|
||||
|
||||
DocPlugin::DocPlugin() {
|
||||
}
|
||||
|
||||
DocPlugin::~DocPlugin() {
|
||||
}
|
||||
|
||||
bool DocPlugin::providesMetaInfo() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::string DocPlugin::supportedFileType() const {
|
||||
return "doc";
|
||||
}
|
||||
|
||||
bool DocPlugin::acceptsFile(const ZLFile &file) const {
|
||||
return file.extension() == "doc";
|
||||
}
|
||||
|
||||
bool DocPlugin::readMetaInfo(Book &book) const {
|
||||
return DocMetaInfoReader(book).readMetaInfo();
|
||||
}
|
||||
|
||||
bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DocPlugin::readModel(BookModel &model) const {
|
||||
return DocBookReader(model, model.book()->encoding()).readBook();
|
||||
}
|
39
jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.h
Normal file
39
jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DOCPLUGIN_H__
|
||||
#define __DOCPLUGIN_H__
|
||||
|
||||
#include "../FormatPlugin.h"
|
||||
|
||||
class DocPlugin : public FormatPlugin {
|
||||
|
||||
public:
|
||||
DocPlugin();
|
||||
~DocPlugin();
|
||||
bool providesMetaInfo() const;
|
||||
|
||||
const std::string supportedFileType() const;
|
||||
bool acceptsFile(const ZLFile &file) const;
|
||||
bool readMetaInfo(Book &book) const;
|
||||
bool readLanguageAndEncoding(Book &book) const;
|
||||
bool readModel(BookModel &model) const;
|
||||
};
|
||||
|
||||
#endif /* __DOCPLUGIN_H__ */
|
889
jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp
Normal file
889
jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp
Normal file
|
@ -0,0 +1,889 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <cstring> //for memset
|
||||
#include <string>
|
||||
|
||||
#include <ZLLogger.h>
|
||||
#include <ZLUnicodeUtil.h>
|
||||
|
||||
#include "OleUtil.h"
|
||||
#include "OleStorage.h"
|
||||
|
||||
#include "OleMainStream.h"
|
||||
|
||||
OleMainStream::Style::Style() {
|
||||
(void)memset(this, 0, sizeof(*this));
|
||||
istd = ISTD_INVALID;
|
||||
istdNext = ISTD_INVALID;
|
||||
hasPageBreakBefore = false;
|
||||
charInfo.fontSize = 20;
|
||||
}
|
||||
|
||||
OleMainStream::CharInfo::CharInfo():
|
||||
fontStyle(0),
|
||||
fontSize(20) {
|
||||
}
|
||||
|
||||
|
||||
OleMainStream::SectionInfo::SectionInfo() :
|
||||
charPos(0),
|
||||
newPage(true) {
|
||||
}
|
||||
|
||||
OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) :
|
||||
OleStream(storage, oleEntry, stream) {
|
||||
}
|
||||
|
||||
bool OleMainStream::open() {
|
||||
if (OleStream::open() == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static const size_t HEADER_SIZE = 768; //size of data in header of main stream
|
||||
char headerBuffer[HEADER_SIZE];
|
||||
seek(0, true);
|
||||
|
||||
if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool result = readFIB(headerBuffer);
|
||||
if (!result) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//determining table stream number
|
||||
unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0;
|
||||
std::string tableName = tableNumber == 0 ? "0" : "1";
|
||||
tableName += "Table";
|
||||
OleEntry tableEntry;
|
||||
result = myStorage->getEntryByName(tableName, tableEntry);
|
||||
|
||||
if (!result) {
|
||||
//cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream
|
||||
Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::TEXT, 0}; //CHECK may be not all old documents have ANSI
|
||||
myPieces.push_back(piece);
|
||||
return true;
|
||||
}
|
||||
|
||||
return readPieceTable(headerBuffer, tableEntry) &&
|
||||
readBookmarks(headerBuffer, tableEntry) &&
|
||||
readStylesheet(headerBuffer, tableEntry) &&
|
||||
//readSectionsInfoTable(headerBuffer, tableEntry) && //it doesn't uses now
|
||||
readParagraphStyleTable(headerBuffer, tableEntry) &&
|
||||
readCharInfoTable(headerBuffer, tableEntry);
|
||||
|
||||
}
|
||||
|
||||
const OleMainStream::Pieces &OleMainStream::getPieces() const {
|
||||
return myPieces;
|
||||
}
|
||||
|
||||
const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const {
|
||||
return myCharInfoList;
|
||||
}
|
||||
|
||||
const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const {
|
||||
return myStyleInfoList;
|
||||
}
|
||||
|
||||
const OleMainStream::Bookmarks &OleMainStream::getBookmarks() const {
|
||||
return myBookmarks;
|
||||
}
|
||||
|
||||
bool OleMainStream::readFIB(const char *headerBuffer) {
|
||||
int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags
|
||||
|
||||
if (flags & 0x0004) { //flag for complex format
|
||||
ZLLogger::Instance().println("OleMainStream", "This was fast-saved. Some information is lost");
|
||||
//lostInfo = (flags & 0xF0) >> 4);
|
||||
}
|
||||
|
||||
if (flags & 0x1000) { //flag for using extending charset
|
||||
ZLLogger::Instance().println("OleMainStream", "File uses extended character set (get_word8_char)");
|
||||
} else {
|
||||
ZLLogger::Instance().println("OleMainStream", "File uses get_8bit_char character set");
|
||||
}
|
||||
|
||||
if (flags & 0x100) { //flag for encrypted files
|
||||
ZLLogger::Instance().println("OleMainStream", "File is encrypted");
|
||||
// Encryption key = %08lx ; NumUtil::get4Bytes(header, 14)
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number
|
||||
if (charset && charset != 0x100) { //0x100 = default charset
|
||||
ZLLogger::Instance().println("OleMainStream", "Using not default character set %d");
|
||||
} else {
|
||||
ZLLogger::Instance().println("OleMainStream", "Using default character set");
|
||||
}
|
||||
|
||||
myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value
|
||||
myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value
|
||||
return true;
|
||||
}
|
||||
|
||||
void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) {
|
||||
Pieces source = s;
|
||||
dest1.clear();
|
||||
dest2.clear();
|
||||
|
||||
int sumLength = 0;
|
||||
size_t i = 0;
|
||||
for (i = 0; i < source.size(); ++i) {
|
||||
Piece piece = source.at(i);
|
||||
if (piece.length + sumLength >= boundary) {
|
||||
Piece piece2 = piece;
|
||||
|
||||
piece.length = boundary - sumLength;
|
||||
piece.type = type1;
|
||||
|
||||
piece2.type = type2;
|
||||
piece2.offset += piece.length * 2;
|
||||
piece2.length -= piece.length;
|
||||
|
||||
if (piece.length > 0) {
|
||||
dest1.push_back(piece);
|
||||
}
|
||||
if (piece2.length > 0) {
|
||||
dest2.push_back(piece2);
|
||||
}
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
sumLength += piece.length;
|
||||
piece.type = type1;
|
||||
dest1.push_back(piece);
|
||||
}
|
||||
for (; i < source.size(); ++i) {
|
||||
Piece piece = source.at(i);
|
||||
piece.type = type2;
|
||||
dest2.push_back(piece);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) {
|
||||
unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure
|
||||
unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length
|
||||
|
||||
//1 step : loading CLX table from table stream
|
||||
char *clxBuffer = new char[clxLength];
|
||||
tableStream.seek(clxOffset, true);
|
||||
tableStream.read(clxBuffer, clxLength);
|
||||
std::string clx(clxBuffer, clxLength);
|
||||
delete clxBuffer;
|
||||
|
||||
//2 step: searching for pieces table buffer at CLX
|
||||
//(determines it by 0x02 as start symbol)
|
||||
size_t from = 0;
|
||||
size_t i;
|
||||
std::string pieceTableBuffer;
|
||||
while ((i = clx.find_first_of(0x02, from)) != std::string::npos) {
|
||||
unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1);
|
||||
pieceTableBuffer = std::string(clx, i + 1 + 4);
|
||||
if (pieceTableBuffer.length() != pieceTableLength) {
|
||||
from = i + 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return pieceTableBuffer;
|
||||
}
|
||||
|
||||
|
||||
bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream);
|
||||
|
||||
//getting count of Character Positions for different types of subdocuments in Main Stream
|
||||
int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text
|
||||
int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument
|
||||
int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument
|
||||
int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument
|
||||
int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument
|
||||
int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument
|
||||
int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument
|
||||
int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header
|
||||
int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx;
|
||||
if (lastCP != 0) {
|
||||
++lastCP;
|
||||
}
|
||||
lastCP += ccpText;
|
||||
|
||||
//getting the CP (character positions) and CP descriptors
|
||||
std::vector<int> cp; //array of character positions for pieces
|
||||
unsigned int j = 0;
|
||||
for (j = 0; ; j += 4) {
|
||||
int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j);
|
||||
cp.push_back(curCP);
|
||||
if (curCP == lastCP) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> descriptors;
|
||||
for (size_t k = 0; k < cp.size() - 1; ++k) {
|
||||
//j + 4, because it should be taken after CP in PiecesTable Buffer
|
||||
//k * 8, because it should be taken 8 byte for each descriptor
|
||||
descriptors.push_back(piecesTableBuffer.substr(j + 4 + k * 8, 8));
|
||||
}
|
||||
|
||||
//filling the Pieces vector
|
||||
for (size_t i = 0; i < descriptors.size(); ++i) {
|
||||
//4byte integer with offset and ANSI flag
|
||||
int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure
|
||||
Piece piece;
|
||||
piece.isANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag
|
||||
piece.offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece
|
||||
piece.length = cp.at(i + 1) - cp.at(i);
|
||||
myPieces.push_back(piece);
|
||||
}
|
||||
|
||||
//split pieces into different types
|
||||
Pieces piecesText, piecesFootnote, piecesOther;
|
||||
splitPieces(myPieces, piecesText, piecesFootnote, Piece::TEXT, Piece::FOOTNOTE, ccpText);
|
||||
splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::FOOTNOTE, Piece::OTHER, ccpFtn);
|
||||
|
||||
myPieces.clear();
|
||||
for (size_t i = 0; i < piecesText.size(); ++i) {
|
||||
myPieces.push_back(piecesText.at(i));
|
||||
}
|
||||
for (size_t i = 0; i < piecesFootnote.size(); ++i) {
|
||||
myPieces.push_back(piecesFootnote.at(i));
|
||||
}
|
||||
for (size_t i = 0; i < piecesOther.size(); ++i) {
|
||||
myPieces.push_back(piecesOther.at(i));
|
||||
}
|
||||
|
||||
//converting length and offset depending on isANSI
|
||||
for (size_t i = 0; i < myPieces.size(); ++i) {
|
||||
Piece &piece = myPieces.at(i);
|
||||
if (!piece.isANSI) {
|
||||
piece.length *= 2;
|
||||
} else {
|
||||
piece.offset /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
//filling startCP field
|
||||
unsigned int curStartCP = 0;
|
||||
for (size_t i = 0; i < myPieces.size(); ++i) {
|
||||
Piece &piece = myPieces.at(i);
|
||||
piece.startCP = curStartCP;
|
||||
if (piece.isANSI) {
|
||||
curStartCP += piece.length;
|
||||
} else {
|
||||
curStartCP += piece.length / 2;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
//SttbfBkmk structure is a table of bookmark name strings
|
||||
unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure
|
||||
size_t namesInfoLength = (size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure
|
||||
|
||||
if (namesInfoLength == 0) {
|
||||
return true; //there's no bookmarks
|
||||
}
|
||||
|
||||
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
std::string buffer;
|
||||
if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records
|
||||
|
||||
std::vector<std::string> names;
|
||||
unsigned int offset = 0x6; //initial offset
|
||||
for (unsigned int i = 0; i < recordsNumber; ++i) {
|
||||
unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //legnth of string in bytes
|
||||
ZLUnicodeUtil::Ucs2String name;
|
||||
for (unsigned int j = 0; j < length; j+=2) {
|
||||
char ch1 = buffer.at(offset + 2 + j);
|
||||
char ch2 = buffer.at(offset + 2 + j + 1);
|
||||
ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8);
|
||||
name.push_back(ucs2Char);
|
||||
}
|
||||
std::string utf8Name;
|
||||
ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name);
|
||||
names.push_back(utf8Name);
|
||||
offset += length + 2;
|
||||
}
|
||||
|
||||
//plcfBkmkf structure is table recording beginning CPs of bookmarks
|
||||
unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure
|
||||
size_t charPosInfoLen = (size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure
|
||||
|
||||
if (charPosInfoLen == 0) {
|
||||
return true; //there's no bookmarks
|
||||
}
|
||||
|
||||
if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = (charPosInfoLen / 4 - 1) / 2;
|
||||
std::vector<unsigned int> charPage;
|
||||
for (size_t index = 0, offset = 0; index < size; ++index, offset += 4) {
|
||||
charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < names.size(); ++i) {
|
||||
if (i >= charPage.size()) {
|
||||
break; //for the case if something in these structures goes wrong, to not to lose all bookmarks
|
||||
}
|
||||
Bookmark bookmark;
|
||||
bookmark.charPos = charPage.at(i);
|
||||
bookmark.name = names.at(i);
|
||||
myBookmarks.push_back(bookmark);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
//STSH structure is a stylesheet
|
||||
unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure
|
||||
size_t stshInfoLength = (size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure
|
||||
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
char *buffer = new char[stshInfoLength];
|
||||
tableStream.seek(beginStshInfo, true);
|
||||
if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t stdCount = (size_t)OleUtil::getU2Bytes(buffer, 2);
|
||||
size_t stdBaseInFile = (size_t)OleUtil::getU2Bytes(buffer, 4);
|
||||
myStyleSheet.resize(stdCount);
|
||||
|
||||
std::vector<bool> isFilled;
|
||||
isFilled.resize(stdCount, false);
|
||||
|
||||
size_t stdLen = 0;
|
||||
bool styleSheetWasChanged = false;
|
||||
do { //make it in while loop, because some base style can be after their successors
|
||||
styleSheetWasChanged = false;
|
||||
for (size_t index = 0, offset = 2 + (size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) {
|
||||
stdLen = (size_t)OleUtil::getU2Bytes(buffer, offset);
|
||||
if (isFilled.at(index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stdLen == 0) {
|
||||
//if record is empty, left it default
|
||||
isFilled[index] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
Style styleInfo = myStyleSheet.at(index);
|
||||
|
||||
unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4);
|
||||
unsigned int styleType = styleAndBaseType % 16;
|
||||
unsigned int baseStyle = styleAndBaseType / 16;
|
||||
if (baseStyle == STI_NIL || baseStyle == STI_USER) {
|
||||
//if based on nil or user style, left defaukt
|
||||
} else {
|
||||
int baseStyleIndex = getStyleIndex(baseStyle, isFilled, myStyleSheet);
|
||||
if (baseStyleIndex < 0) {
|
||||
//this base style is not filled yet, sp pass it at some time
|
||||
continue;
|
||||
}
|
||||
styleInfo = myStyleSheet.at(baseStyleIndex);
|
||||
styleInfo.istd = ISTD_INVALID;
|
||||
}
|
||||
|
||||
// parse STD structure
|
||||
unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6);
|
||||
unsigned int upxCount = tmp % 16;
|
||||
styleInfo.istdNext = tmp / 16;
|
||||
|
||||
//adding current style
|
||||
myStyleSheet[index] = styleInfo;
|
||||
isFilled[index] = true;
|
||||
styleSheetWasChanged = true;
|
||||
|
||||
size_t pos = 2 + stdBaseInFile;
|
||||
size_t nameLen = (size_t)OleUtil::getU2Bytes(buffer, offset + pos);
|
||||
nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length
|
||||
pos += 2 + nameLen;
|
||||
if (pos % 2 != 0) {
|
||||
++pos;
|
||||
}
|
||||
if (pos >= stdLen) {
|
||||
continue;
|
||||
}
|
||||
size_t upxLen = (size_t)OleUtil::getU2Bytes(buffer, offset + pos);
|
||||
if (pos + upxLen > stdLen) {
|
||||
//UPX length too large
|
||||
continue;
|
||||
}
|
||||
//for style info styleType must be equal 1
|
||||
if (styleType == 1 && upxCount >= 1) {
|
||||
if (upxLen >= 2) {
|
||||
styleInfo.istd = OleUtil::getU2Bytes(buffer, offset + pos + 2);
|
||||
getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo);
|
||||
myStyleSheet[index] = styleInfo;
|
||||
}
|
||||
pos += 2 + upxLen;
|
||||
if (pos % 2 != 0) {
|
||||
++pos;
|
||||
}
|
||||
upxLen = (size_t)OleUtil::getU2Bytes(buffer, offset + pos);
|
||||
}
|
||||
if (upxLen == 0 || pos + upxLen > stdLen) {
|
||||
//too small/too large
|
||||
continue;
|
||||
}
|
||||
//for char info styleType can be equal 1 or 2
|
||||
if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) {
|
||||
CharInfo charInfo;
|
||||
getCharInfo(0, ISTD_INVALID, buffer + offset + pos + 2, upxLen, charInfo);
|
||||
styleInfo.charInfo = charInfo;
|
||||
myStyleSheet[index] = styleInfo;
|
||||
}
|
||||
}
|
||||
} while (styleSheetWasChanged);
|
||||
delete buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
//fcPlcfbteChpx structure is table with formatting for particular run of text
|
||||
unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of fcPlcfbteChpx structure
|
||||
size_t charInfoLength = (size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of fcPlcfbteChpx structure
|
||||
if (charInfoLength < 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
std::string buffer;
|
||||
if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = (charInfoLength / 4 - 1) / 2;
|
||||
std::vector<unsigned int> charBlocks;
|
||||
for (size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += 4) {
|
||||
charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
|
||||
}
|
||||
|
||||
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
||||
for (size_t index = 0; index < size; ++index) {
|
||||
seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
||||
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
||||
return false;
|
||||
}
|
||||
unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text')
|
||||
for (unsigned int index2 = 0; index2 < crun; ++index2) {
|
||||
unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
|
||||
unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2);
|
||||
unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset);
|
||||
unsigned int charPos = 0;
|
||||
if (!offsetToCharPos(offset, charPos, myPieces)) {
|
||||
continue;
|
||||
}
|
||||
unsigned int istd = getIstdByCharPos(charPos, myStyleInfoList);
|
||||
|
||||
CharInfo charInfo = getStyleFromStylesheet(istd, myStyleSheet).charInfo;
|
||||
if (chpxOffset != 0) {
|
||||
getCharInfo(chpxOffset, istd, formatPageBuffer + 1, len - 1, charInfo);
|
||||
}
|
||||
myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo));
|
||||
}
|
||||
}
|
||||
delete formatPageBuffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
//PlcBtePapx structure is table with formatting for all paragraphs
|
||||
unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure
|
||||
size_t paragraphInfoLength = (size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure
|
||||
if (paragraphInfoLength < 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
std::string buffer;
|
||||
if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = (paragraphInfoLength / 4 - 1) / 2;
|
||||
|
||||
std::vector<unsigned int> paragraphBlocks;
|
||||
for (size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += 4) {
|
||||
paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
|
||||
}
|
||||
|
||||
char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
|
||||
for (size_t index = 0; index < size; ++index) {
|
||||
seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
|
||||
if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
|
||||
return false;
|
||||
}
|
||||
unsigned int cpara = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with cpara (count of paragraphs)
|
||||
for (unsigned int index2 = 0; index2 < cpara; ++index2) {
|
||||
unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
|
||||
unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (cpara + 1) * 4 + index2 * 13) * 2;
|
||||
if (papxOffset <= 0) {
|
||||
continue;
|
||||
}
|
||||
unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
|
||||
if (len == 0) {
|
||||
++papxOffset;
|
||||
len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
|
||||
}
|
||||
|
||||
unsigned int istd = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1);
|
||||
Style styleInfo = getStyleFromStylesheet(istd, myStyleSheet);
|
||||
|
||||
if (len >= 3) {
|
||||
getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo);
|
||||
}
|
||||
|
||||
unsigned int charPos = 0;
|
||||
if (!offsetToCharPos(offset, charPos, myPieces)) {
|
||||
continue;
|
||||
}
|
||||
myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo));
|
||||
}
|
||||
}
|
||||
delete formatPageBuffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
|
||||
//PlcfSed structure is a section table
|
||||
unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream
|
||||
unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure
|
||||
|
||||
size_t sectInfoLen = (size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure
|
||||
if (sectInfoLen < 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OleStream tableStream(myStorage, tableEntry, myBaseStream);
|
||||
std::string buffer;
|
||||
if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t decriptorsCount = (sectInfoLen - 4) / 16;
|
||||
|
||||
//saving the section offsets (in character positions)
|
||||
std::vector<unsigned int> charPos;
|
||||
for (size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) {
|
||||
unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
|
||||
charPos.push_back(beginOfText + ulTextOffset);
|
||||
}
|
||||
|
||||
//saving sepx offsets
|
||||
std::vector<unsigned int> sectPage;
|
||||
for (size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += 12) {
|
||||
sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2));
|
||||
}
|
||||
|
||||
//reading the section properties
|
||||
char tmpBuffer[2];
|
||||
for (size_t index = 0; index < decriptorsCount; ++index) {
|
||||
if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
|
||||
SectionInfo sectionInfo;
|
||||
sectionInfo.charPos = charPos.at(index);
|
||||
mySectionInfoList.push_back(sectionInfo);
|
||||
continue;
|
||||
}
|
||||
//getting number of bytes to read
|
||||
seek(sectPage.at(index), true);
|
||||
if (read(tmpBuffer, 2) != 2) {
|
||||
return false;
|
||||
}
|
||||
size_t bytes = 2 + (size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
|
||||
|
||||
char *formatPageBuffer = new char[bytes];
|
||||
seek(sectPage.at(index), true);
|
||||
if (read(formatPageBuffer, bytes) != bytes) {
|
||||
delete formatPageBuffer;
|
||||
continue;
|
||||
}
|
||||
SectionInfo sectionInfo;
|
||||
sectionInfo.charPos = charPos.at(index);
|
||||
getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo);
|
||||
mySectionInfoList.push_back(sectionInfo);
|
||||
delete formatPageBuffer;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) {
|
||||
int tmp, toDelete, toAdd;
|
||||
unsigned int offset = 0;
|
||||
while (bytes >= offset + 2) {
|
||||
unsigned int curPrlLength = 0;
|
||||
switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) {
|
||||
case 0x2403:
|
||||
styleInfo.alignment = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0x4610:
|
||||
styleInfo.leftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
if (styleInfo.leftIndent < 0) {
|
||||
styleInfo.leftIndent = 0;
|
||||
}
|
||||
break;
|
||||
case 0xc60d: // ChgTabsPapx
|
||||
case 0xc615: // ChgTabs
|
||||
tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2);
|
||||
if (tmp < 2) {
|
||||
curPrlLength = 1;
|
||||
break;
|
||||
}
|
||||
toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3);
|
||||
if (tmp < 2 + 2 * toDelete) {
|
||||
curPrlLength = 1;
|
||||
break;
|
||||
}
|
||||
toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete);
|
||||
if (tmp < 2 + 2 * toDelete + 2 * toAdd) {
|
||||
curPrlLength = 1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x840e:
|
||||
styleInfo.rightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0x840f:
|
||||
styleInfo.leftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0x8411:
|
||||
styleInfo.firstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0xa413:
|
||||
styleInfo.beforeIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0xa414:
|
||||
styleInfo.afterIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
|
||||
break;
|
||||
case 0x2407:
|
||||
styleInfo.hasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (curPrlLength == 0) {
|
||||
curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset);
|
||||
}
|
||||
offset += curPrlLength;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*istd*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) {
|
||||
unsigned int sprm = 0; //single propery modifier
|
||||
unsigned int offset = 0;
|
||||
while (bytes >= offset + 2) {
|
||||
switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
|
||||
case 0x0835: //bold
|
||||
sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
|
||||
switch (sprm) {
|
||||
case UNSET:
|
||||
charInfo.fontStyle &= ~CharInfo::BOLD;
|
||||
break;
|
||||
case SET:
|
||||
charInfo.fontStyle |= CharInfo::BOLD;
|
||||
break;
|
||||
case UNCHANGED:
|
||||
break;
|
||||
case NEGATION:
|
||||
charInfo.fontStyle ^= CharInfo::BOLD;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x0836: //italic
|
||||
sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
|
||||
switch (sprm) {
|
||||
case UNSET:
|
||||
charInfo.fontStyle &= ~CharInfo::ITALIC;
|
||||
break;
|
||||
case SET:
|
||||
charInfo.fontStyle |= CharInfo::ITALIC;
|
||||
break;
|
||||
case UNCHANGED:
|
||||
break;
|
||||
case NEGATION:
|
||||
charInfo.fontStyle ^= CharInfo::ITALIC;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x4a43: //size of font
|
||||
charInfo.fontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void OleMainStream::getSectionInfo(const char *grpprlBuffer, size_t bytes, SectionInfo §ionInfo) {
|
||||
unsigned int tmp;
|
||||
size_t offset = 0;
|
||||
while (bytes >= offset + 2) {
|
||||
switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) {
|
||||
case 0x3009: //new page
|
||||
tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2);
|
||||
sectionInfo.newPage = (tmp != 0 && tmp != 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
offset += getPrlLength(grpprlBuffer, offset);
|
||||
}
|
||||
}
|
||||
|
||||
OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int istd, const StyleSheet &stylesheet) {
|
||||
//TODO optimize it: StyleSheet can be map structure with istd key
|
||||
Style style;
|
||||
if (istd != ISTD_INVALID && istd != STI_NIL && istd != STI_USER) {
|
||||
for (size_t index = 0; index < stylesheet.size(); ++index) {
|
||||
if (stylesheet.at(index).istd == istd) {
|
||||
return stylesheet.at(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
style.istd = istd;
|
||||
return style;
|
||||
}
|
||||
|
||||
int OleMainStream::getStyleIndex(unsigned int istd, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) {
|
||||
//TODO optimize it: StyleSheet can be map structure with istd key
|
||||
//in that case, this method will be excess
|
||||
if (istd == ISTD_INVALID) {
|
||||
return -1;
|
||||
}
|
||||
for (int index = 0; index < (int)stylesheet.size(); ++index) {
|
||||
if (isFilled.at(index) && stylesheet.at(index).istd == istd) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned int OleMainStream::getIstdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) {
|
||||
unsigned int istd = ISTD_INVALID;
|
||||
for (size_t i = 0; i < styleInfoList.size(); ++i) {
|
||||
const Style &info = styleInfoList.at(i).second;
|
||||
if (i == styleInfoList.size() - 1) { //if last
|
||||
istd = info.istd;
|
||||
break;
|
||||
}
|
||||
unsigned int curOffset = styleInfoList.at(i).first;
|
||||
unsigned int nextOffset = styleInfoList.at(i + 1).first;
|
||||
if (charPos >= curOffset && charPos < nextOffset) {
|
||||
istd = info.istd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return istd;
|
||||
}
|
||||
|
||||
bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) {
|
||||
if (pieces.empty()) {
|
||||
return false;
|
||||
}
|
||||
if ((unsigned int)pieces.front().offset > offset) {
|
||||
return false;
|
||||
}
|
||||
if ((unsigned int)(pieces.back().offset + pieces.back().length) <= offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t pieceNumber = 0;
|
||||
for (size_t i = 0; i < pieces.size(); ++i) {
|
||||
if (i == pieces.size() - 1) { //if last
|
||||
pieceNumber = i;
|
||||
break;
|
||||
}
|
||||
unsigned int curOffset = pieces.at(i).offset;
|
||||
unsigned int nextOffset = pieces.at(i + 1).offset;
|
||||
if (offset >= curOffset && offset < nextOffset) {
|
||||
pieceNumber = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const Piece &piece = pieces.at(pieceNumber);
|
||||
unsigned int diffOffset = offset - piece.offset;
|
||||
if (!piece.isANSI) {
|
||||
diffOffset /= 2;
|
||||
}
|
||||
charPos = piece.startCP + diffOffset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, size_t length, OleStream &stream) {
|
||||
char *buffer = new char[length];
|
||||
stream.seek(offset, true);
|
||||
if (stream.read(buffer, length) != length) {
|
||||
return false;
|
||||
}
|
||||
result = std::string(buffer, length);
|
||||
delete buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) {
|
||||
unsigned int tmp;
|
||||
unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber);
|
||||
switch (opCode & 0xe000) {
|
||||
case 0x0000:
|
||||
case 0x2000:
|
||||
return 3;
|
||||
case 0x4000:
|
||||
case 0x8000:
|
||||
case 0xA000:
|
||||
return 4;
|
||||
case 0xE000:
|
||||
return 5;
|
||||
case 0x6000:
|
||||
return 6;
|
||||
case 0xC000:
|
||||
//counting of info length
|
||||
tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2);
|
||||
if (opCode == 0xc615 && tmp == 255) {
|
||||
unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3);
|
||||
unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4);
|
||||
tmp = 2 + del * 4 + add * 3;
|
||||
}
|
||||
return 3 + tmp;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
178
jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h
Normal file
178
jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h
Normal file
|
@ -0,0 +1,178 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLEMAINSTREAM_H__
|
||||
#define __OLEMAINSTREAM_H__
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "OleStream.h"
|
||||
|
||||
class OleMainStream : public OleStream {
|
||||
public:
|
||||
struct Piece {
|
||||
enum PieceType {
|
||||
TEXT,
|
||||
FOOTNOTE,
|
||||
OTHER
|
||||
};
|
||||
|
||||
int offset; //maybe make it unsigned int
|
||||
int length; //maybe make it unsigned int
|
||||
bool isANSI;
|
||||
PieceType type;
|
||||
unsigned int startCP;
|
||||
};
|
||||
typedef std::vector<Piece> Pieces;
|
||||
|
||||
struct CharInfo {
|
||||
|
||||
enum Font {
|
||||
REGULAR = 0x0000,
|
||||
BOLD = 0x0001,
|
||||
ITALIC = 0x0002,
|
||||
UNDERLINE = 0x0004,
|
||||
CAPITALS = 0x0008,
|
||||
SMALL_CAPITALS = 0x0010,
|
||||
STRIKE = 0x0020,
|
||||
HIDDEN = 0x0040,
|
||||
MARKDEL = 0x0080,
|
||||
SUPERSCRIPT = 0x0100,
|
||||
SUBSCRIPT = 0x0200
|
||||
};
|
||||
|
||||
unsigned int fontStyle;
|
||||
unsigned int fontSize;
|
||||
|
||||
CharInfo();
|
||||
};
|
||||
typedef std::pair<unsigned int, CharInfo> CharPosToCharInfo;
|
||||
typedef std::vector<CharPosToCharInfo > CharInfoList;
|
||||
|
||||
struct Style {
|
||||
|
||||
enum Alignment {
|
||||
LEFT = 0x00,
|
||||
CENTER = 0x01,
|
||||
RIGHT = 0x02,
|
||||
JUSTIFY = 0x03
|
||||
};
|
||||
|
||||
unsigned int istd; //Current style
|
||||
unsigned int istdNext; //Next style unless overruled
|
||||
bool hasPageBreakBefore;
|
||||
unsigned int beforeIndent; //Vertical indent before paragraph
|
||||
unsigned int afterIndent; //Vertical indent after paragraph
|
||||
int leftIndent; //Left indent
|
||||
int firstLineIndent; //First line left indent
|
||||
int rightIndent; //Right indent
|
||||
unsigned int alignment;
|
||||
|
||||
CharInfo charInfo;
|
||||
Style();
|
||||
};
|
||||
typedef std::pair<unsigned int, Style> CharPosToStyle;
|
||||
typedef std::vector<CharPosToStyle> StyleInfoList;
|
||||
typedef std::vector<Style> StyleSheet;
|
||||
|
||||
enum StyleID {
|
||||
H1 = 0x1,
|
||||
H2 = 0x2,
|
||||
H3 = 0x3,
|
||||
STI_USER = 0xFFE,
|
||||
STI_NIL = 0xFFF,
|
||||
ISTD_INVALID = 0xFFFF
|
||||
};
|
||||
|
||||
struct SectionInfo {
|
||||
unsigned int charPos;
|
||||
bool newPage;
|
||||
SectionInfo();
|
||||
};
|
||||
|
||||
typedef std::vector<SectionInfo> SectionInfoList;
|
||||
|
||||
struct Bookmark {
|
||||
unsigned int charPos;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
typedef std::vector<Bookmark> Bookmarks;
|
||||
|
||||
public:
|
||||
OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
|
||||
|
||||
public:
|
||||
bool open();
|
||||
const Pieces &getPieces() const;
|
||||
const CharInfoList &getCharInfoList() const;
|
||||
const StyleInfoList &getStyleInfoList() const;
|
||||
const Bookmarks &getBookmarks() const;
|
||||
|
||||
private:
|
||||
bool readFIB(const char *headerBuffer);
|
||||
bool readPieceTable(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
bool readBookmarks(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
bool readStylesheet(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
bool readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
bool readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
bool readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
|
||||
|
||||
private: //readPieceTable helpers methods
|
||||
static std::string getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream);
|
||||
static void splitPieces(const Pieces &source, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary);
|
||||
|
||||
private: //formatting reader helpers methods
|
||||
static unsigned int getPrlLength(const char *grpprlBuffer, unsigned int byteNumber);
|
||||
static void getCharInfo(unsigned int chpxOffset, unsigned int istd, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo);
|
||||
static void getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo);
|
||||
static void getSectionInfo(const char *grpprlBuffer, size_t bytes, SectionInfo §ionInfo);
|
||||
|
||||
static Style getStyleFromStylesheet(unsigned int istd, const StyleSheet &stylesheet);
|
||||
static int getStyleIndex(unsigned int istd, const std::vector<bool> &isFilled, const StyleSheet &stylesheet);
|
||||
static unsigned int getIstdByCharPos(unsigned int offset, const StyleInfoList &styleInfoList);
|
||||
|
||||
static bool offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces);
|
||||
static bool readToBuffer(std::string &result, unsigned int offset, size_t length, OleStream &stream);
|
||||
|
||||
private:
|
||||
enum PrlFlag {
|
||||
UNSET = 0,
|
||||
SET = 1,
|
||||
UNCHANGED = 128,
|
||||
NEGATION = 129
|
||||
};
|
||||
|
||||
private:
|
||||
int myStartOfText;
|
||||
int myEndOfText;
|
||||
|
||||
Pieces myPieces;
|
||||
|
||||
StyleSheet myStyleSheet;
|
||||
|
||||
CharInfoList myCharInfoList;
|
||||
StyleInfoList myStyleInfoList;
|
||||
SectionInfoList mySectionInfoList;
|
||||
|
||||
Bookmarks myBookmarks;
|
||||
};
|
||||
|
||||
#endif /* __OLEMAINSTREAM_H__ */
|
268
jni/NativeFormats/fbreader/src/formats/doc/OleStorage.cpp
Normal file
268
jni/NativeFormats/fbreader/src/formats/doc/OleStorage.cpp
Normal file
|
@ -0,0 +1,268 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <ZLLogger.h>
|
||||
|
||||
#include "OleStorage.h"
|
||||
#include "OleUtil.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
const size_t OleStorage::BBD_BLOCK_SIZE = 512;
|
||||
|
||||
OleStorage::OleStorage() {
|
||||
clear();
|
||||
}
|
||||
|
||||
void OleStorage::clear() {
|
||||
myInputStream = 0;
|
||||
mySectorSize = 0;
|
||||
myShortSectorSize = 0;
|
||||
myStreamSize = 0;
|
||||
myRootEntryIndex = -1;
|
||||
|
||||
myDIFAT.clear();
|
||||
myBBD.clear();
|
||||
mySBD.clear();
|
||||
myProperties.clear();
|
||||
myEntries.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool OleStorage::init(shared_ptr<ZLInputStream> stream, size_t streamSize) {
|
||||
clear();
|
||||
|
||||
myInputStream = stream;
|
||||
myStreamSize = streamSize;
|
||||
myInputStream->seek(0, true);
|
||||
|
||||
char oleBuf[BBD_BLOCK_SIZE];
|
||||
size_t ret = myInputStream->read(oleBuf, BBD_BLOCK_SIZE);
|
||||
if (ret != BBD_BLOCK_SIZE) {
|
||||
clear();
|
||||
return false;
|
||||
}
|
||||
static const char OLE_SIGN[] = {0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0};
|
||||
if (strncmp(oleBuf, OLE_SIGN, 8) != 0) {
|
||||
clear();
|
||||
return false;
|
||||
}
|
||||
mySectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x1e); //offset for value of big sector size
|
||||
myShortSectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x20); //offset for value of small sector size
|
||||
|
||||
if (readDIFAT(oleBuf) && readBBD(oleBuf) && readSBD(oleBuf) && readProperties(oleBuf) && readAllEntries()) {
|
||||
return true;
|
||||
}
|
||||
clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OleStorage::readDIFAT(char *oleBuf) {
|
||||
int difatBlock = OleUtil::get4Bytes(oleBuf, 0x44); //address for first difat sector
|
||||
int difatSectorNumbers = OleUtil::get4Bytes(oleBuf, 0x48); //numbers of additional difat records
|
||||
|
||||
//436 of difat records are stored in header, by offset 0x4c
|
||||
for (unsigned int i = 0; i < 436; i += 4) {
|
||||
myDIFAT.push_back(OleUtil::get4Bytes(oleBuf + 0x4c, i));
|
||||
}
|
||||
|
||||
//for files > 6.78 mb we need read additional DIFAT fields
|
||||
for (int i = 0; difatBlock > 0 && i < difatSectorNumbers; ++i) {
|
||||
ZLLogger::Instance().println("OleStorage", "Read additional data for DIFAT");
|
||||
char buffer[mySectorSize];
|
||||
myInputStream->seek(BBD_BLOCK_SIZE + difatBlock * mySectorSize, true);
|
||||
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
||||
ZLLogger::Instance().println("OleStorage", "Error read DIFAT!");
|
||||
return false;
|
||||
}
|
||||
for (unsigned int j = 0; j < (mySectorSize - 4); j += 4) {
|
||||
myDIFAT.push_back(OleUtil::get4Bytes(buffer, j));
|
||||
}
|
||||
difatBlock = OleUtil::get4Bytes(buffer, mySectorSize - 4); //next DIFAT block is pointed at the end of the sector
|
||||
}
|
||||
|
||||
//removing unusable DIFAT links
|
||||
//0xFFFFFFFF means "free section"
|
||||
while (!myDIFAT.empty() && myDIFAT.back() == (int)0xFFFFFFFF) {
|
||||
myDIFAT.pop_back();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStorage::readBBD(char *oleBuf) {
|
||||
char buffer[mySectorSize];
|
||||
unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
|
||||
|
||||
for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
|
||||
int bbdSector = myDIFAT.at(i);
|
||||
if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
|
||||
ZLLogger::Instance().println("OleStorage", "Bad BBD entry!");
|
||||
return false;
|
||||
}
|
||||
myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
|
||||
if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
|
||||
ZLLogger::Instance().println("OleStorage", "Can't read BBD!");
|
||||
return false;
|
||||
}
|
||||
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
||||
myBBD.push_back(OleUtil::get4Bytes(buffer, j));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStorage::readSBD(char *oleBuf) {
|
||||
int sbdCur = OleUtil::get4Bytes(oleBuf, 0x3c); //address of first small sector
|
||||
int sbdCount = OleUtil::get4Bytes(oleBuf, 0x40); //count of small sectors
|
||||
|
||||
if (sbdCur <= 0) {
|
||||
ZLLogger::Instance().println("OleStorage", "There's no SBD, don't read it");
|
||||
return true;
|
||||
}
|
||||
|
||||
char buffer[mySectorSize];
|
||||
for (int i = 0; i < sbdCount; ++i) {
|
||||
if (i != 0) {
|
||||
sbdCur = myBBD.at(sbdCur);
|
||||
}
|
||||
if (sbdCur <= 0) {
|
||||
break;
|
||||
}
|
||||
myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
|
||||
myInputStream->read(buffer, mySectorSize);
|
||||
for (unsigned int j = 0; j < mySectorSize; j += 4) {
|
||||
mySBD.push_back(OleUtil::get4Bytes(buffer, j));
|
||||
}
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStorage::readProperties(char *oleBuf) {
|
||||
int propCur = OleUtil::get4Bytes(oleBuf, 0x30); //offset for address of sector with first property
|
||||
if (propCur < 0) {
|
||||
ZLLogger::Instance().println("OleStorage", "Wrong first directory sector location");
|
||||
return false;
|
||||
}
|
||||
|
||||
char buffer[mySectorSize];
|
||||
do {
|
||||
myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
|
||||
myInputStream->read(buffer, mySectorSize);
|
||||
for (unsigned int j = 0; j < mySectorSize; j += 128) {
|
||||
myProperties.push_back(std::string(buffer + j, 128));
|
||||
}
|
||||
if (propCur < 0 || (size_t)propCur >= myBBD.size()) {
|
||||
break;
|
||||
}
|
||||
propCur = myBBD.at(propCur);
|
||||
} while (propCur >= 0 && propCur < (int)(myStreamSize / mySectorSize));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStorage::readAllEntries() {
|
||||
int propCount = myProperties.size();
|
||||
for (int i = 0; i < propCount; ++i) {
|
||||
OleEntry entry;
|
||||
bool result = readOleEntry(i, entry);
|
||||
if (!result) {
|
||||
break;
|
||||
}
|
||||
if (entry.type == OleEntry::ROOT_DIR) {
|
||||
myRootEntryIndex = i;
|
||||
}
|
||||
myEntries.push_back(entry);
|
||||
}
|
||||
if (myRootEntryIndex < 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
|
||||
static const std::string ROOT_ENTRY = "Root Entry";
|
||||
|
||||
std::string property = myProperties.at(propNumber);
|
||||
|
||||
char oleType = property.at(0x42); //offset for Ole Type
|
||||
if (oleType != 1 && oleType != 2 && oleType != 3 && oleType != 5) {
|
||||
ZLLogger::Instance().println("OleStorage", "entry -- not right ole type");
|
||||
return false;
|
||||
}
|
||||
|
||||
e.type = (OleEntry::Type)oleType;
|
||||
|
||||
int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
|
||||
e.name.clear();
|
||||
e.name.reserve(33); //max size of entry name
|
||||
for (int i = 0; i < nameLength; i+=2) {
|
||||
char c = property.at(i);
|
||||
if (c != 0) {
|
||||
e.name += c;
|
||||
}
|
||||
}
|
||||
|
||||
e.length = OleUtil::getU4Bytes(property.c_str(), 0x78); //offset for entry's length value
|
||||
e.isBigBlock = e.length >= 0x1000 || e.name == ROOT_ENTRY;
|
||||
|
||||
// Read sector chain
|
||||
int chainCur = OleUtil::get4Bytes(property.c_str(), 0x74); //offset for start block of entry
|
||||
if (chainCur >= 0 && (chainCur <= (int)(myStreamSize / (e.isBigBlock ? mySectorSize : myShortSectorSize)))) {
|
||||
//filling blocks with chains
|
||||
do {
|
||||
e.blocks.push_back((unsigned int)chainCur);
|
||||
if (e.isBigBlock && (size_t)chainCur < myBBD.size()) {
|
||||
chainCur = myBBD.at(chainCur);
|
||||
} else if (!mySBD.empty() && (size_t)chainCur < mySBD.size()) {
|
||||
chainCur = mySBD.at(chainCur);
|
||||
} else {
|
||||
chainCur = -1;
|
||||
}
|
||||
} while (chainCur > 0 &&
|
||||
chainCur < (int)(e.isBigBlock ? myBBD.size() : mySBD.size()) &&
|
||||
e.blocks.size() <= e.length / (e.isBigBlock ? mySectorSize : myShortSectorSize));
|
||||
}
|
||||
e.length = std::min(e.length, (unsigned int)(e.isBigBlock ? mySectorSize : myShortSectorSize) * e.blocks.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int OleStorage::getFileOffsetOfBlock(OleEntry &e, unsigned int blockNumber) {
|
||||
unsigned int res;
|
||||
if (e.isBigBlock) {
|
||||
res = BBD_BLOCK_SIZE + e.blocks.at(blockNumber) * mySectorSize;
|
||||
} else {
|
||||
unsigned int sbdPerSector = mySectorSize / myShortSectorSize;
|
||||
unsigned int sbdSectorNumber = e.blocks.at(blockNumber) / sbdPerSector;
|
||||
unsigned int sbdSectorMod = e.blocks.at(blockNumber) % sbdPerSector;
|
||||
res = BBD_BLOCK_SIZE + myEntries.at(myRootEntryIndex).blocks.at(sbdSectorNumber) * mySectorSize + sbdSectorMod * myShortSectorSize;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
bool OleStorage::getEntryByName(std::string name, OleEntry &returnEntry) const {
|
||||
for (size_t i = 0; i < myEntries.size(); ++i) {
|
||||
const OleEntry &entry = myEntries.at(i);
|
||||
if (entry.name == name) {
|
||||
returnEntry = entry;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
92
jni/NativeFormats/fbreader/src/formats/doc/OleStorage.h
Normal file
92
jni/NativeFormats/fbreader/src/formats/doc/OleStorage.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLESTORAGE_H__
|
||||
#define __OLESTORAGE_H__
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ZLInputStream.h>
|
||||
|
||||
struct OleEntry {
|
||||
enum Type {
|
||||
DIR = 1,
|
||||
STREAM = 2,
|
||||
ROOT_DIR = 5,
|
||||
LOCK_BYTES =3
|
||||
};
|
||||
|
||||
typedef std::vector<unsigned int> Blocks;
|
||||
|
||||
std::string name;
|
||||
unsigned int length;
|
||||
Type type;
|
||||
Blocks blocks;
|
||||
bool isBigBlock;
|
||||
};
|
||||
|
||||
class OleStorage {
|
||||
|
||||
public:
|
||||
static const size_t BBD_BLOCK_SIZE;
|
||||
|
||||
public:
|
||||
OleStorage();
|
||||
bool init(shared_ptr<ZLInputStream>, size_t streamSize);
|
||||
void clear();
|
||||
const std::vector<OleEntry> &getEntries() const;
|
||||
bool getEntryByName(std::string name, OleEntry &entry) const;
|
||||
|
||||
unsigned int getSectorSize();
|
||||
unsigned int getShortSectorSize();
|
||||
|
||||
public: //TODO make private
|
||||
unsigned int getFileOffsetOfBlock(OleEntry &e, unsigned int blockNumber);
|
||||
|
||||
private:
|
||||
bool readDIFAT(char *oleBuf);
|
||||
bool readBBD(char *oleBuf);
|
||||
bool readSBD(char *oleBuf);
|
||||
bool readProperties(char *oleBuf);
|
||||
|
||||
bool readAllEntries();
|
||||
bool readOleEntry(int propNumber, OleEntry &entry);
|
||||
|
||||
private:
|
||||
|
||||
shared_ptr<ZLInputStream> myInputStream;
|
||||
unsigned int mySectorSize, myShortSectorSize;
|
||||
|
||||
size_t myStreamSize;
|
||||
std::vector<int> myDIFAT; //double-indirect file allocation table
|
||||
std::vector<int> myBBD; //Big Block Depot
|
||||
std::vector<int> mySBD; //Small Block Depot
|
||||
std::vector<std::string> myProperties;
|
||||
std::vector<OleEntry> myEntries;
|
||||
int myRootEntryIndex;
|
||||
|
||||
};
|
||||
|
||||
inline const std::vector<OleEntry> &OleStorage::getEntries() const { return myEntries; }
|
||||
inline unsigned int OleStorage::getSectorSize() { return mySectorSize; }
|
||||
inline unsigned int OleStorage::getShortSectorSize() { return myShortSectorSize; }
|
||||
|
||||
#endif /* __OLESTORAGE_H__ */
|
127
jni/NativeFormats/fbreader/src/formats/doc/OleStream.cpp
Normal file
127
jni/NativeFormats/fbreader/src/formats/doc/OleStream.cpp
Normal file
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <ZLLogger.h>
|
||||
|
||||
#include "OleStream.h"
|
||||
#include "OleUtil.h"
|
||||
|
||||
OleStream::OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) :
|
||||
myStorage(storage),
|
||||
myOleEntry(oleEntry),
|
||||
myBaseStream(stream) {
|
||||
myOleOffset = 0;
|
||||
}
|
||||
|
||||
|
||||
bool OleStream::open() {
|
||||
if (myOleEntry.type != OleEntry::STREAM) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t OleStream::read(char *buffer, size_t maxSize) {
|
||||
size_t length = maxSize;
|
||||
size_t readedBytes = 0;
|
||||
size_t bytesLeftInCurBlock;
|
||||
unsigned int newFileOffset;
|
||||
|
||||
unsigned int curBlockNumber, modBlock;
|
||||
size_t toReadBlocks, toReadBytes;
|
||||
|
||||
if (myOleOffset + length > myOleEntry.length) {
|
||||
length = myOleEntry.length - myOleOffset;
|
||||
}
|
||||
|
||||
size_t sectorSize = (size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
|
||||
|
||||
curBlockNumber = myOleOffset / sectorSize;
|
||||
if (curBlockNumber >= myOleEntry.blocks.size()) {
|
||||
return 0;
|
||||
}
|
||||
modBlock = myOleOffset % sectorSize;
|
||||
bytesLeftInCurBlock = sectorSize - modBlock;
|
||||
if (bytesLeftInCurBlock < length) {
|
||||
toReadBlocks = (length - bytesLeftInCurBlock) / sectorSize;
|
||||
toReadBytes = (length - bytesLeftInCurBlock) % sectorSize;
|
||||
} else {
|
||||
toReadBlocks = toReadBytes = 0;
|
||||
}
|
||||
|
||||
newFileOffset = myStorage->getFileOffsetOfBlock(myOleEntry, curBlockNumber) + modBlock;
|
||||
myBaseStream->seek(newFileOffset, true);
|
||||
|
||||
readedBytes = myBaseStream->read(buffer, std::min(length, bytesLeftInCurBlock));
|
||||
for (size_t i = 0; i < toReadBlocks; ++i) {
|
||||
size_t readbytes;
|
||||
++curBlockNumber;
|
||||
newFileOffset = myStorage->getFileOffsetOfBlock(myOleEntry, curBlockNumber);
|
||||
myBaseStream->seek(newFileOffset, true);
|
||||
readbytes = myBaseStream->read(buffer + readedBytes, std::min(length - readedBytes, sectorSize));
|
||||
readedBytes += readbytes;
|
||||
}
|
||||
if (toReadBytes > 0) {
|
||||
size_t readbytes;
|
||||
++curBlockNumber;
|
||||
newFileOffset = myStorage->getFileOffsetOfBlock(myOleEntry, curBlockNumber);
|
||||
myBaseStream->seek(newFileOffset, true);
|
||||
readbytes = myBaseStream->read(buffer + readedBytes, toReadBytes);
|
||||
readedBytes += readbytes;
|
||||
}
|
||||
myOleOffset += readedBytes;
|
||||
return readedBytes;
|
||||
}
|
||||
|
||||
bool OleStream::eof() const {
|
||||
return (myOleOffset >= myOleEntry.length);
|
||||
}
|
||||
|
||||
|
||||
void OleStream::close() {
|
||||
}
|
||||
|
||||
bool OleStream::seek(unsigned int offset, bool absoluteOffset) {
|
||||
unsigned int newOleOffset = 0;
|
||||
unsigned int newFileOffset;
|
||||
|
||||
if (absoluteOffset) {
|
||||
newOleOffset = offset;
|
||||
} else {
|
||||
newOleOffset = myOleOffset + offset;
|
||||
}
|
||||
|
||||
newOleOffset = std::min(newOleOffset, myOleEntry.length);
|
||||
|
||||
unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
|
||||
unsigned int blockNumber = newOleOffset / sectorSize;
|
||||
if (blockNumber >= myOleEntry.blocks.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int modBlock = newOleOffset % sectorSize;
|
||||
newFileOffset = myStorage->getFileOffsetOfBlock(myOleEntry, blockNumber) + modBlock;
|
||||
myBaseStream->seek(newFileOffset, true);
|
||||
myOleOffset = newOleOffset;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t OleStream::offset() {
|
||||
return myOleOffset;
|
||||
}
|
53
jni/NativeFormats/fbreader/src/formats/doc/OleStream.h
Normal file
53
jni/NativeFormats/fbreader/src/formats/doc/OleStream.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLESTREAM_H__
|
||||
#define __OLESTREAM_H__
|
||||
|
||||
#include "OleStorage.h"
|
||||
|
||||
class OleStream {
|
||||
|
||||
public:
|
||||
OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
|
||||
|
||||
public:
|
||||
bool open();
|
||||
size_t read(char *buffer, size_t maxSize);
|
||||
void close();
|
||||
|
||||
public:
|
||||
bool seek(unsigned int offset, bool absoluteOffset);
|
||||
size_t offset();
|
||||
|
||||
public:
|
||||
bool eof() const;
|
||||
|
||||
protected:
|
||||
shared_ptr<OleStorage> myStorage;
|
||||
|
||||
OleEntry myOleEntry;
|
||||
shared_ptr<ZLInputStream> myBaseStream;
|
||||
|
||||
unsigned int myOleOffset;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* __OLESTREAM_H__ */
|
230
jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp
Normal file
230
jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp
Normal file
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
|
||||
#include <ZLLogger.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "DocBookReader.h"
|
||||
#include "OleUtil.h"
|
||||
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
//word's control chars:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_FOOTNOTE_MARK = 0x0002;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_TABLE_SEPARATOR = 0x0007;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SHORT_DEFIS = 0x001e;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_FIELD = 0x0015;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
|
||||
|
||||
//unicode values:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::NULL_SYMBOL = 0x0;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::START_OF_HEADING = 0x0001;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SHORT_DEFIS = 0x2D;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
|
||||
|
||||
OleStreamReader::OleStreamReader(const std::string &encoding) :
|
||||
myEncoding(encoding) {
|
||||
clear();
|
||||
}
|
||||
|
||||
void OleStreamReader::clear() {
|
||||
myBuffer.clear();
|
||||
myCurBufferPosition = 0;
|
||||
myNextPieceNumber = 0;
|
||||
|
||||
myCurCharPos = 0;
|
||||
myNextStyleInfoIndex = 0;
|
||||
myNextCharInfoIndex = 0;
|
||||
myNextBookmarkIndex = 0;
|
||||
}
|
||||
|
||||
bool OleStreamReader::readStream(OleMainStream &oleMainStream) {
|
||||
clear();
|
||||
|
||||
bool res = oleMainStream.open();
|
||||
if (!res) {
|
||||
ZLLogger::Instance().println("OleStreamReader", "doesn't open correct");
|
||||
return false;
|
||||
}
|
||||
ZLUnicodeUtil::Ucs2Char ucs2char;
|
||||
bool tabMode = false;
|
||||
while (getUcs2Char(oleMainStream, ucs2char)) {
|
||||
if (ucs2char < 32) { //< 32 are control symbols
|
||||
//printf("[0x%x]", ucs2char); //debug output
|
||||
}
|
||||
|
||||
if (tabMode) {
|
||||
tabMode = false;
|
||||
if (ucs2char == WORD_TABLE_SEPARATOR) {
|
||||
handleTableEndRow();
|
||||
continue;
|
||||
} else {
|
||||
handleTableSeparator();
|
||||
}
|
||||
}
|
||||
|
||||
if (ucs2char < 32) {
|
||||
switch (ucs2char) {
|
||||
case NULL_SYMBOL:
|
||||
break;
|
||||
case WORD_HARD_LINEBREAK:
|
||||
//printf("\n");
|
||||
handleHardLinebreak();
|
||||
break;
|
||||
case WORD_END_OF_PARAGRAPH:
|
||||
case WORD_PAGE_BREAK:
|
||||
//printf("\n");
|
||||
handleParagraphEnd();
|
||||
break;
|
||||
case WORD_TABLE_SEPARATOR:
|
||||
tabMode = true;
|
||||
break;
|
||||
case WORD_FOOTNOTE_MARK:
|
||||
handleFootNoteMark();
|
||||
break;
|
||||
case WORD_START_FIELD:
|
||||
handleStartField();
|
||||
break;
|
||||
case WORD_SEPARATOR_FIELD:
|
||||
handleSeparatorField();
|
||||
break;
|
||||
case WORD_END_FIELD:
|
||||
handleEndField();
|
||||
break;
|
||||
case START_OF_HEADING:
|
||||
handleStartOfHeading();
|
||||
break;
|
||||
default:
|
||||
handleOtherControlChar(ucs2char);
|
||||
break;
|
||||
}
|
||||
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
|
||||
continue; //skip
|
||||
} else {
|
||||
//debug output
|
||||
//std::string utf8String;
|
||||
//ZLUnicodeUtil::Ucs2String ucs2String;
|
||||
//ucs2String.push_back(ucs2char);
|
||||
//ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
|
||||
//printf("%s", utf8String.c_str());
|
||||
|
||||
handleChar(ucs2char);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStreamReader::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
|
||||
if (myCurBufferPosition >= myBuffer.size()) {
|
||||
if (!fillBuffer(stream)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
|
||||
if (!styleInfoList.empty()) {
|
||||
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
|
||||
handleParagraphStyle(info);
|
||||
++myNextStyleInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
|
||||
if (!charInfoList.empty()) {
|
||||
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
|
||||
handleFontStyle(info.fontStyle);
|
||||
++myNextCharInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::Bookmarks &bookmarksList = stream.getBookmarks();
|
||||
if (!bookmarksList.empty()) {
|
||||
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).charPos == myCurCharPos) {
|
||||
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
|
||||
handleBookmark(bookmark.name);
|
||||
++myNextBookmarkIndex;
|
||||
}
|
||||
}
|
||||
|
||||
ucs2char = myBuffer.at(myCurBufferPosition++);
|
||||
++myCurCharPos;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||
const OleMainStream::Pieces &pieces = stream.getPieces();
|
||||
if (myNextPieceNumber >= pieces.size()) {
|
||||
return false; //end of reading
|
||||
}
|
||||
const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
|
||||
|
||||
if (piece.type == OleMainStream::Piece::FOOTNOTE) {
|
||||
handlePageBreak();
|
||||
} else if (piece.type == OleMainStream::Piece::OTHER) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char *textBuffer = new char[piece.length];
|
||||
|
||||
stream.seek(piece.offset, true);
|
||||
stream.read(textBuffer, piece.length);
|
||||
|
||||
myBuffer.clear();
|
||||
if (!piece.isANSI) {
|
||||
for (int i = 0; i < piece.length; i += 2) {
|
||||
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
||||
myBuffer.push_back(ch);
|
||||
}
|
||||
} else {
|
||||
if (myConverter.isNull()) {
|
||||
//lazy convertor loading, because documents can be in Unicode only and don't need to be converted
|
||||
ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
||||
myConverter = collection.converter(myEncoding);
|
||||
if (myConverter.isNull()) {
|
||||
myConverter = collection.defaultConverter();
|
||||
}
|
||||
}
|
||||
std::string utf8String;
|
||||
myConverter->convert(utf8String, std::string(textBuffer, piece.length));
|
||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||
}
|
||||
myCurBufferPosition = 0;
|
||||
++myNextPieceNumber;
|
||||
delete textBuffer;
|
||||
|
||||
return true;
|
||||
}
|
99
jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h
Normal file
99
jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLESTREAMREADER_H__
|
||||
#define __OLESTREAMREADER_H__
|
||||
|
||||
#include <ZLUnicodeUtil.h>
|
||||
#include <ZLEncodingConverter.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
|
||||
class OleStreamReader {
|
||||
|
||||
public:
|
||||
//word's control chars:
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SHORT_DEFIS;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
|
||||
|
||||
//unicode values:
|
||||
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
|
||||
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
||||
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char START_OF_HEADING;
|
||||
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
||||
static const ZLUnicodeUtil::Ucs2Char SHORT_DEFIS;
|
||||
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
||||
|
||||
public:
|
||||
OleStreamReader(const std::string &encoding);
|
||||
|
||||
bool readStream(OleMainStream &stream);
|
||||
void clear();
|
||||
|
||||
protected:
|
||||
//virtual void parapgraphHandler(std::string paragraph) = 0;
|
||||
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
virtual void handleHardLinebreak() = 0;
|
||||
virtual void handleParagraphEnd() = 0;
|
||||
virtual void handlePageBreak() = 0;
|
||||
virtual void handleTableSeparator() = 0;
|
||||
virtual void handleTableEndRow() = 0;
|
||||
virtual void handleFootNoteMark() = 0;
|
||||
virtual void handleStartField() = 0;
|
||||
virtual void handleSeparatorField() = 0;
|
||||
virtual void handleEndField() = 0;
|
||||
virtual void handleStartOfHeading() = 0;
|
||||
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
|
||||
virtual void handleFontStyle(unsigned int fontStyle) = 0;
|
||||
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
|
||||
virtual void handleBookmark(const std::string &name) = 0;
|
||||
|
||||
private:
|
||||
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
|
||||
bool fillBuffer(OleMainStream &stream);
|
||||
|
||||
private:
|
||||
ZLUnicodeUtil::Ucs2String myBuffer;
|
||||
size_t myCurBufferPosition;
|
||||
size_t myNextPieceNumber;
|
||||
|
||||
shared_ptr<ZLEncodingConverter> myConverter;
|
||||
const std::string myEncoding;
|
||||
|
||||
unsigned int myCurCharPos;
|
||||
|
||||
size_t myNextStyleInfoIndex;
|
||||
size_t myNextCharInfoIndex;
|
||||
size_t myNextBookmarkIndex;
|
||||
};
|
||||
|
||||
#endif /* __OLESTREAMREADER_H__ */
|
58
jni/NativeFormats/fbreader/src/formats/doc/OleUtil.cpp
Normal file
58
jni/NativeFormats/fbreader/src/formats/doc/OleUtil.cpp
Normal file
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "OleUtil.h"
|
||||
|
||||
int OleUtil::get4Bytes(const char *buffer, unsigned int offset) {
|
||||
const unsigned char *buf = (const unsigned char*)buffer;
|
||||
return
|
||||
(int)buf[offset]
|
||||
| ((int)buf[offset+1] << 8)
|
||||
| ((int)buf[offset+2] << 16)
|
||||
| ((int)buf[offset+3] << 24);
|
||||
}
|
||||
|
||||
unsigned int OleUtil::getU4Bytes(const char *buffer, unsigned int offset) {
|
||||
const unsigned char *buf = (const unsigned char*)buffer;
|
||||
return
|
||||
(unsigned int)buf[offset]
|
||||
| ((unsigned int)buf[offset+1] << 8)
|
||||
| ((unsigned int)buf[offset+2] << 16)
|
||||
| ((unsigned int)buf[offset+3] << 24);
|
||||
}
|
||||
|
||||
unsigned int OleUtil::getU2Bytes(const char *buffer, unsigned int offset) {
|
||||
const unsigned char *buf = (const unsigned char*)buffer;
|
||||
return
|
||||
(unsigned int)buf[offset]
|
||||
| ((unsigned int)buf[offset+1] << 8);
|
||||
}
|
||||
|
||||
unsigned int OleUtil::getU1Byte(const char *buffer, unsigned int offset) {
|
||||
const unsigned char *buf = (const unsigned char*)buffer;
|
||||
return (unsigned int)buf[offset];
|
||||
}
|
||||
|
||||
int OleUtil::get1Byte(const char *buffer, unsigned int offset) {
|
||||
const unsigned char *buf = (const unsigned char*)buffer;
|
||||
return (int)buf[offset];
|
||||
}
|
||||
|
||||
|
||||
|
32
jni/NativeFormats/fbreader/src/formats/doc/OleUtil.h
Normal file
32
jni/NativeFormats/fbreader/src/formats/doc/OleUtil.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (C) 2009-2010 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLEUTIL_H__
|
||||
#define __OLEUTIL_H__
|
||||
|
||||
class OleUtil {
|
||||
public:
|
||||
static int get4Bytes(const char *buffer, unsigned int offset);
|
||||
static unsigned int getU4Bytes(const char *buffer, unsigned int offset);
|
||||
static unsigned int getU2Bytes(const char *buffer, unsigned int offset);
|
||||
static unsigned int getU1Byte(const char *buffer, unsigned int offset);
|
||||
static int get1Byte(const char *buffer, unsigned int offset);
|
||||
};
|
||||
|
||||
#endif /* __OLEUTIL_H__ */
|
|
@ -39,6 +39,7 @@ public class FileTypeCollection {
|
|||
addType(new SimpleFileType("PDF", "pdf", MimeType.TYPES_PDF));
|
||||
addType(new FileTypeDjVu());
|
||||
addType(new SimpleFileType("ZIP archive", "zip", Collections.singletonList(MimeType.APP_ZIP)));
|
||||
addType(new SimpleFileType("DOC", "doc", MimeType.TYPES_DOC));
|
||||
}
|
||||
|
||||
private void addType(FileType type) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue