1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-03 17:59:33 +02:00

synchronization with C++ version

This commit is contained in:
Nikolay Pultsin 2012-10-27 02:30:03 +04:00
parent e790aeb68a
commit 8ac2815d94
19 changed files with 640 additions and 518 deletions

View file

@ -128,10 +128,11 @@ LOCAL_SRC_FILES := \
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \ NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \ NativeFormats/fbreader/src/formats/doc/DocStreams.cpp \
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \ NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \ NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \ NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp \
NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \ NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \
NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \ NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \
NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \

View file

@ -22,22 +22,24 @@
#include <ZLInputStream.h> #include <ZLInputStream.h>
#include <ZLLanguageDetector.h> #include <ZLLanguageDetector.h>
#include <ZLImage.h> #include <ZLImage.h>
#include <ZLEncodingConverter.h>
#include "FormatPlugin.h" #include "FormatPlugin.h"
#include "../library/Book.h" #include "../library/Book.h"
void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) { bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) {
std::string language = book.language(); std::string language = book.language();
std::string encoding = book.encoding(); std::string encoding = book.encoding();
if (!encoding.empty()) { if (!force && !encoding.empty()) {
return; return true;
} }
bool detected = false;
PluginCollection &collection = PluginCollection::Instance(); PluginCollection &collection = PluginCollection::Instance();
if (encoding.empty()) { if (encoding.empty()) {
encoding = "utf-8"; encoding = ZLEncodingConverter::UTF8;
} }
if (collection.isLanguageAutoDetectEnabled() && stream.open()) { if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
static const int BUFSIZE = 65536; static const int BUFSIZE = 65536;
@ -47,25 +49,30 @@ void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream)
shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size);
delete[] buffer; delete[] buffer;
if (!info.isNull()) { if (!info.isNull()) {
detected = true;
if (!info->Language.empty()) { if (!info->Language.empty()) {
language = info->Language; language = info->Language;
} }
encoding = info->Encoding; encoding = info->Encoding;
if ((encoding == "us-ascii") || (encoding == "iso-8859-1")) { if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") {
encoding = "windows-1252"; encoding = "windows-1252";
} }
} }
} }
book.setEncoding(encoding); book.setEncoding(encoding);
book.setLanguage(language); book.setLanguage(language);
return detected;
} }
void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) { bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) {
std::string language = book.language(); std::string language = book.language();
if (!language.empty()) { if (!force && !language.empty()) {
return; return true;
} }
bool detected = false;
PluginCollection &collection = PluginCollection::Instance(); PluginCollection &collection = PluginCollection::Instance();
if (collection.isLanguageAutoDetectEnabled() && stream.open()) { if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
static const int BUFSIZE = 65536; static const int BUFSIZE = 65536;
@ -73,15 +80,18 @@ void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) {
const size_t size = stream.read(buffer, BUFSIZE); const size_t size = stream.read(buffer, BUFSIZE);
stream.close(); stream.close();
shared_ptr<ZLLanguageDetector::LanguageInfo> info = shared_ptr<ZLLanguageDetector::LanguageInfo> info =
ZLLanguageDetector().findInfoForEncoding(book.encoding(), buffer, size, -20000); ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000);
delete[] buffer; delete[] buffer;
if (!info.isNull()) { if (!info.isNull()) {
detected = true;
if (!info->Language.empty()) { if (!info->Language.empty()) {
language = info->Language; language = info->Language;
} }
} }
} }
book.setLanguage(language); book.setLanguage(language);
return detected;
} }
const std::string &FormatPlugin::tryOpen(const ZLFile&) const { const std::string &FormatPlugin::tryOpen(const ZLFile&) const {

View file

@ -63,8 +63,8 @@ public:
virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const; virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
protected: protected:
static void detectEncodingAndLanguage(Book &book, ZLInputStream &stream); static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false);
static void detectLanguage(Book &book, ZLInputStream &stream); static bool detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force = false);
}; };
class PluginCollection { class PluginCollection {

View file

@ -34,9 +34,9 @@
#include "OleMainStream.h" #include "OleMainStream.h"
DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) : DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
OleStreamReader(encoding),
myModelReader(model), myModelReader(model),
myPictureCounter(0) { myPictureCounter(0),
myEncoding(encoding) {
myReadState = READ_TEXT; myReadState = READ_TEXT;
} }
@ -355,3 +355,25 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link); ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
return utf8String; return utf8String;
} }
void DocBookReader::footnoteHandler() {
handlePageBreak();
}
void DocBookReader::dataHandler(const char *buffer, size_t len) {
if (myConverter.isNull()) {
// lazy converter initialization
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
myConverter = collection.converter(myEncoding);
if (myConverter.isNull()) {
myConverter = collection.defaultConverter();
}
}
std::string utf8String;
myConverter->convert(utf8String, buffer, buffer + len);
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
}
void DocBookReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
myBuffer.push_back(symbol);
}

View file

@ -25,13 +25,14 @@
#include <shared_ptr.h> #include <shared_ptr.h>
#include <ZLFile.h> #include <ZLFile.h>
#include <ZLTextStyleEntry.h> #include <ZLTextStyleEntry.h>
#include <ZLEncodingConverter.h>
#include "../../bookmodel/BookReader.h" #include "../../bookmodel/BookReader.h"
#include "OleMainStream.h" #include "OleMainStream.h"
#include "OleStreamReader.h" #include "OleStreamParser.h"
class DocBookReader : public OleStreamReader { class DocBookReader : public OleStreamParser {
public: public:
DocBookReader(BookModel &model, const std::string &encoding); DocBookReader(BookModel &model, const std::string &encoding);
@ -39,6 +40,10 @@ public:
bool readBook(); bool readBook();
private: private:
void dataHandler(const char *buffer, size_t len);
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
void footnoteHandler();
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
void handleHardLinebreak(); void handleHardLinebreak();
void handleParagraphEnd(); void handleParagraphEnd();
@ -88,6 +93,9 @@ private:
shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry; shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
OleMainStream::Style myCurrentStyleInfo; OleMainStream::Style myCurrentStyleInfo;
unsigned int myPictureCounter; unsigned int myPictureCounter;
const std::string myEncoding;
shared_ptr<ZLEncodingConverter> myConverter;
}; };
inline DocBookReader::~DocBookReader() {} inline DocBookReader::~DocBookReader() {}

View file

@ -21,11 +21,12 @@
#include <ZLInputStream.h> #include <ZLInputStream.h>
#include <ZLLogger.h> #include <ZLLogger.h>
#include <ZLImage.h> #include <ZLImage.h>
#include <ZLEncodingConverter.h>
#include "DocPlugin.h" #include "DocPlugin.h"
#include "DocMetaInfoReader.h" #include "DocMetaInfoReader.h"
#include "DocBookReader.h" #include "DocBookReader.h"
#include "DocReaderStream.h" #include "DocStreams.h"
#include "../../bookmodel/BookModel.h" #include "../../bookmodel/BookModel.h"
#include "../../library/Book.h" #include "../../library/Book.h"
@ -52,9 +53,10 @@ bool DocPlugin::readMetaInfo(Book &book) const {
return false; return false;
} }
shared_ptr<ZLInputStream> stream = new DocReaderStream(book.file(), 50000); shared_ptr<ZLInputStream> stream = new DocCharStream(book.file(), 50000);
if (!stream.isNull()) { if (!detectEncodingAndLanguage(book, *stream)) {
detectEncodingAndLanguage(book, *stream); stream = new DocAnsiStream(book.file(), 50000);
detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
} }
return true; return true;

View file

@ -1,178 +0,0 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <cstdlib>
#include <string>
#include "DocReaderStream.h"
#include "OleStreamReader.h"
class DocTextOnlyReader : public OleStreamReader {
public:
DocTextOnlyReader(char *buffer, size_t maxSize);
~DocTextOnlyReader();
size_t readSize() const;
private:
void dataHandler(const char *buffer, size_t len);
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
void handleHardLinebreak();
void handleParagraphEnd();
void handlePageBreak();
void handleTableSeparator();
void handleTableEndRow();
void handleFootNoteMark();
void handleStartField();
void handleSeparatorField();
void handleEndField();
void handleImage(const ZLFileImage::Blocks &blocks);
void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
void handleFontStyle(unsigned int fontStyle);
void handleParagraphStyle(const OleMainStream::Style &styleInfo);
void handleBookmark(const std::string &name);
private:
char *myBuffer;
const size_t myMaxSize;
size_t myActualSize;
};
DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
}
DocTextOnlyReader::~DocTextOnlyReader() {
}
void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) {
if (myActualSize >= myMaxSize) {
// break stream reading
} else {
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
strncpy(myBuffer + myActualSize, buffer, len);
myActualSize += len;
}
OleStreamReader::dataHandler(buffer, dataLength);
}
void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
}
void DocTextOnlyReader::handleHardLinebreak() {
}
void DocTextOnlyReader::handleParagraphEnd() {
}
void DocTextOnlyReader::handlePageBreak() {
}
void DocTextOnlyReader::handleTableSeparator() {
}
void DocTextOnlyReader::handleTableEndRow() {
}
void DocTextOnlyReader::handleFootNoteMark() {
}
void DocTextOnlyReader::handleStartField() {
}
void DocTextOnlyReader::handleSeparatorField() {
}
void DocTextOnlyReader::handleEndField() {
}
void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) {
}
void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
}
void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) {
}
void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
}
void DocTextOnlyReader::handleBookmark(const std::string &name) {
}
size_t DocTextOnlyReader::readSize() const {
return myActualSize;
}
DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
}
DocReaderStream::~DocReaderStream() {
close();
}
bool DocReaderStream::open() {
if (mySize != 0) {
myBuffer = new char[mySize];
}
DocTextOnlyReader reader(myBuffer, mySize);
shared_ptr<ZLInputStream> stream = myFile.inputStream();
if (stream.isNull() || !stream->open()) {
return false;
}
if (!reader.readDocument(stream)) {
return false;
}
mySize = reader.readSize();
myOffset = 0;
return true;
}
size_t DocReaderStream::read(char *buffer, size_t maxSize) {
maxSize = std::min(maxSize, mySize - myOffset);
if ((buffer != 0) && (myBuffer !=0)) {
memcpy(buffer, myBuffer + myOffset, maxSize);
}
myOffset += maxSize;
return maxSize;
}
void DocReaderStream::close() {
if (myBuffer != 0) {
delete[] myBuffer;
myBuffer = 0;
}
}
void DocReaderStream::seek(int offset, bool absoluteOffset) {
if (!absoluteOffset) {
offset += myOffset;
}
myOffset = std::min(mySize, (size_t)std::max(0, offset));
}
size_t DocReaderStream::offset() const {
return myOffset;
}
size_t DocReaderStream::sizeOfOpened() {
return mySize;
}

View file

@ -0,0 +1,197 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <cstdlib>
#include <string>
#include "DocStreams.h"
#include "OleStreamReader.h"
class DocReader : public OleStreamReader {
public:
DocReader(char *buffer, size_t maxSize);
~DocReader();
size_t readSize() const;
private:
bool readStream(OleMainStream &stream);
void dataHandler(const char *buffer, size_t len);
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
void footnoteHandler();
protected:
char *myBuffer;
const size_t myMaxSize;
size_t myActualSize;
};
class DocCharReader : public DocReader {
public:
DocCharReader(char *buffer, size_t maxSize);
~DocCharReader();
private:
void dataHandler(const char *buffer, size_t len);
};
class DocAnsiReader : public DocReader {
public:
DocAnsiReader(char *buffer, size_t maxSize);
~DocAnsiReader();
private:
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
};
DocReader::DocReader(char *buffer, size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
}
DocReader::~DocReader() {
}
bool DocReader::readStream(OleMainStream &stream) {
while (myActualSize < myMaxSize) {
if (!readNextPiece(stream)) {
break;
}
}
return true;
}
void DocReader::dataHandler(const char*, size_t) {
}
void DocReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char) {
}
void DocReader::footnoteHandler() {
}
size_t DocReader::readSize() const {
return myActualSize;
}
DocCharReader::DocCharReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
}
DocCharReader::~DocCharReader() {
}
void DocCharReader::dataHandler(const char *buffer, size_t dataLength) {
if (myActualSize < myMaxSize) {
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
strncpy(myBuffer + myActualSize, buffer, len);
myActualSize += len;
}
}
DocAnsiReader::DocAnsiReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
}
DocAnsiReader::~DocAnsiReader() {
}
void DocAnsiReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
if (myActualSize < myMaxSize) {
char buffer[4];
const size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
strncpy(myBuffer + myActualSize, buffer, len);
myActualSize += len;
}
}
DocStream::DocStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
}
DocStream::~DocStream() {
close();
}
bool DocStream::open() {
if (mySize != 0) {
myBuffer = new char[mySize];
}
shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
shared_ptr<ZLInputStream> stream = myFile.inputStream();
if (stream.isNull() || !stream->open()) {
return false;
}
if (!reader->readDocument(stream)) {
return false;
}
mySize = reader->readSize();
myOffset = 0;
return true;
}
size_t DocStream::read(char *buffer, size_t maxSize) {
maxSize = std::min(maxSize, mySize - myOffset);
if ((buffer != 0) && (myBuffer !=0)) {
memcpy(buffer, myBuffer + myOffset, maxSize);
}
myOffset += maxSize;
return maxSize;
}
void DocStream::close() {
if (myBuffer != 0) {
delete[] myBuffer;
myBuffer = 0;
}
}
void DocStream::seek(int offset, bool absoluteOffset) {
if (!absoluteOffset) {
offset += myOffset;
}
myOffset = std::min(mySize, (size_t)std::max(0, offset));
}
size_t DocStream::offset() const {
return myOffset;
}
size_t DocStream::sizeOfOpened() {
return mySize;
}
DocCharStream::DocCharStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
}
DocCharStream::~DocCharStream() {
}
shared_ptr<DocReader> DocCharStream::createReader(char *buffer, size_t maxSize) {
return new DocCharReader(buffer, maxSize);
}
DocAnsiStream::DocAnsiStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
}
DocAnsiStream::~DocAnsiStream() {
}
shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, size_t maxSize) {
return new DocAnsiReader(buffer, maxSize);
}

View file

@ -17,19 +17,19 @@
* 02110-1301, USA. * 02110-1301, USA.
*/ */
#ifndef __DOCREADERSTREAM_H__ #ifndef __DOCSTREAMS_H__
#define __DOCREADERSTREAM_H__ #define __DOCSTREAMS_H__
#include <string>
#include <ZLFile.h> #include <ZLFile.h>
#include <ZLInputStream.h> #include <ZLInputStream.h>
class DocReaderStream : public ZLInputStream { class DocReader;
class DocStream : public ZLInputStream {
public: public:
DocReaderStream(const ZLFile& file, size_t maxSize); DocStream(const ZLFile& file, size_t maxSize);
~DocReaderStream(); ~DocStream();
private: private:
bool open(); bool open();
@ -40,6 +40,9 @@ private:
size_t offset() const; size_t offset() const;
size_t sizeOfOpened(); size_t sizeOfOpened();
protected:
virtual shared_ptr<DocReader> createReader(char *buffer, size_t maxSize) = 0;
private: private:
const ZLFile myFile; const ZLFile myFile;
char *myBuffer; char *myBuffer;
@ -47,4 +50,24 @@ private:
size_t myOffset; size_t myOffset;
}; };
#endif /* __DOCREADERSTREAM_H__ */ class DocCharStream : public DocStream {
public:
DocCharStream(const ZLFile& file, size_t maxSize);
~DocCharStream();
private:
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
};
class DocAnsiStream : public DocStream {
public:
DocAnsiStream(const ZLFile& file, size_t maxSize);
~DocAnsiStream();
private:
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
};
#endif /* __DOCSTREAMS_H__ */

View file

@ -0,0 +1,210 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
//#include <cctype>
//#include <cstring>
#include <ZLLogger.h>
#include "OleMainStream.h"
#include "OleUtil.h"
#include "OleStreamParser.h"
//word's control chars:
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
//unicode values:
const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
OleStreamParser::OleStreamParser() {
myCurBufferPosition = 0;
myCurCharPos = 0;
myNextStyleInfoIndex = 0;
myNextCharInfoIndex = 0;
myNextBookmarkIndex = 0;
myNextInlineImageInfoIndex = 0;
myNextFloatImageInfoIndex = 0;
}
bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
ZLUnicodeUtil::Ucs2Char ucs2char;
bool tabMode = false;
while (getUcs2Char(oleMainStream, ucs2char)) {
if (tabMode) {
tabMode = false;
if (ucs2char == WORD_TABLE_SEPARATOR) {
handleTableEndRow();
continue;
} else {
handleTableSeparator();
}
}
if (ucs2char < 32) {
switch (ucs2char) {
case NULL_SYMBOL:
break;
case WORD_HARD_LINEBREAK:
handleHardLinebreak();
break;
case WORD_END_OF_PARAGRAPH:
case WORD_PAGE_BREAK:
handleParagraphEnd();
break;
case WORD_TABLE_SEPARATOR:
tabMode = true;
break;
case WORD_FOOTNOTE_MARK:
handleFootNoteMark();
break;
case WORD_START_FIELD:
handleStartField();
break;
case WORD_SEPARATOR_FIELD:
handleSeparatorField();
break;
case WORD_END_FIELD:
handleEndField();
break;
case INLINE_IMAGE:
case FLOAT_IMAGE:
break;
default:
handleOtherControlChar(ucs2char);
break;
}
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
continue; //skip
} else {
handleChar(ucs2char);
}
}
return true;
}
bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
while (myCurBufferPosition >= myBuffer.size()) {
myBuffer.clear();
myCurBufferPosition = 0;
if (!readNextPiece(stream)) {
return false;
}
}
ucs2char = myBuffer.at(myCurBufferPosition++);
processStyles(stream);
switch (ucs2char) {
case INLINE_IMAGE:
processInlineImage(stream);
break;
case FLOAT_IMAGE:
processFloatImage(stream);
break;
}
++myCurCharPos;
return true;
}
void OleStreamParser::processInlineImage(OleMainStream &stream) {
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
if (imageInfoList.empty()) {
return;
}
//seek to curCharPos, because not all entries are real pictures
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
++myNextInlineImageInfoIndex;
}
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
if (!list.empty()) {
handleImage(list);
}
++myNextInlineImageInfoIndex;
}
}
void OleStreamParser::processFloatImage(OleMainStream &stream) {
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
if (imageInfoList.empty()) {
return;
}
//seek to curCharPos, because not all entries are real pictures
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
++myNextFloatImageInfoIndex;
}
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
if (!list.empty()) {
handleImage(list);
}
++myNextFloatImageInfoIndex;
}
}
void OleStreamParser::processStyles(OleMainStream &stream) {
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
if (!styleInfoList.empty()) {
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
handleParagraphStyle(info);
++myNextStyleInfoIndex;
}
}
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
if (!charInfoList.empty()) {
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
handleFontStyle(info.FontStyle);
++myNextCharInfoIndex;
}
}
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
if (!bookmarksList.empty()) {
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
handleBookmark(bookmark.Name);
++myNextBookmarkIndex;
}
}
}

View file

@ -0,0 +1,101 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OLESTREAMPARSER_H__
#define __OLESTREAMPARSER_H__
#include <ZLUnicodeUtil.h>
#include "OleMainStream.h"
#include "OleStreamReader.h"
class OleStreamParser : public OleStreamReader {
public:
//word's control chars:
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
//unicode values:
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
static const ZLUnicodeUtil::Ucs2Char SPACE;
static const ZLUnicodeUtil::Ucs2Char MINUS;
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
public:
OleStreamParser();
private:
bool readStream(OleMainStream &stream);
protected:
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
virtual void handleHardLinebreak() = 0;
virtual void handleParagraphEnd() = 0;
virtual void handlePageBreak() = 0;
virtual void handleTableSeparator() = 0;
virtual void handleTableEndRow() = 0;
virtual void handleFootNoteMark() = 0;
virtual void handleStartField() = 0;
virtual void handleSeparatorField() = 0;
virtual void handleEndField() = 0;
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
virtual void handleFontStyle(unsigned int fontStyle) = 0;
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
virtual void handleBookmark(const std::string &name) = 0;
private:
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
void processInlineImage(OleMainStream &stream);
void processFloatImage(OleMainStream &stream);
void processStyles(OleMainStream &stream);
private:
protected:
ZLUnicodeUtil::Ucs2String myBuffer;
private:
size_t myCurBufferPosition;
unsigned int myCurCharPos;
size_t myNextStyleInfoIndex;
size_t myNextCharInfoIndex;
size_t myNextBookmarkIndex;
size_t myNextInlineImageInfoIndex;
size_t myNextFloatImageInfoIndex;
};
#endif /* __OLESTREAMPARSER_H__ */

View file

@ -17,59 +17,13 @@
* 02110-1301, USA. * 02110-1301, USA.
*/ */
#include <cctype>
#include <cstring>
#include <ZLLogger.h> #include <ZLLogger.h>
#include "OleMainStream.h" #include "OleMainStream.h"
#include "DocBookReader.h"
#include "OleUtil.h" #include "OleUtil.h"
#include "DocInlineImageReader.h"
#include "OleStreamReader.h" #include "OleStreamReader.h"
//word's control chars: OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_FOOTNOTE_MARK = 0x0002;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_TABLE_SEPARATOR = 0x0007;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_FIELD = 0x0015;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::INLINE_IMAGE = 0x0001;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FLOAT_IMAGE = 0x0008;
//unicode values:
const ZLUnicodeUtil::Ucs2Char OleStreamReader::NULL_SYMBOL = 0x0;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D;
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
OleStreamReader::OleStreamReader(const std::string &encoding) :
myEncoding(encoding) {
clear();
}
void OleStreamReader::clear() {
myBuffer.clear();
myCurBufferPosition = 0;
myNextPieceNumber = 0;
myCurCharPos = 0;
myNextStyleInfoIndex = 0;
myNextCharInfoIndex = 0;
myNextBookmarkIndex = 0;
myNextInlineImageInfoIndex = 0;
myNextFloatImageInfoIndex = 0;
} }
bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) { bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
@ -78,7 +32,7 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
shared_ptr<OleStorage> storage = new OleStorage; shared_ptr<OleStorage> storage = new OleStorage;
if (!storage->init(inputStream, inputStream->sizeOfOpened())) { if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!"); ZLLogger::Instance().println("OleStreamReader", "Broken OLE file");
return false; return false;
} }
@ -88,176 +42,22 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
} }
OleMainStream oleStream(storage, wordDocumentEntry, inputStream); OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
if (!oleStream.open()) {
ZLLogger::Instance().println("OleStreamReader", "Cannot open OleMainStream");
return false;
}
return readStream(oleStream); return readStream(oleStream);
} }
bool OleStreamReader::readStream(OleMainStream &oleMainStream) { bool OleStreamReader::readNextPiece(OleMainStream &stream) {
clear();
if (!oleMainStream.open()) {
ZLLogger::Instance().println("OleStreamReader", "doesn't open correct");
return false;
}
ZLUnicodeUtil::Ucs2Char ucs2char;
bool tabMode = false;
while (getUcs2Char(oleMainStream, ucs2char)) {
if (ucs2char < 32) { //< 32 are control symbols
//printf("[0x%x]", ucs2char); //debug output
}
if (tabMode) {
tabMode = false;
if (ucs2char == WORD_TABLE_SEPARATOR) {
handleTableEndRow();
continue;
} else {
handleTableSeparator();
}
}
if (ucs2char < 32) {
switch (ucs2char) {
case NULL_SYMBOL:
break;
case WORD_HARD_LINEBREAK:
//printf("\n");
handleHardLinebreak();
break;
case WORD_END_OF_PARAGRAPH:
case WORD_PAGE_BREAK:
//printf("\n");
handleParagraphEnd();
break;
case WORD_TABLE_SEPARATOR:
tabMode = true;
break;
case WORD_FOOTNOTE_MARK:
handleFootNoteMark();
break;
case WORD_START_FIELD:
handleStartField();
break;
case WORD_SEPARATOR_FIELD:
handleSeparatorField();
break;
case WORD_END_FIELD:
handleEndField();
break;
case INLINE_IMAGE: case FLOAT_IMAGE:
break;
default:
handleOtherControlChar(ucs2char);
break;
}
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
continue; //skip
} else {
//debug output
// std::string utf8String;
// ZLUnicodeUtil::Ucs2String ucs2String;
// ucs2String.push_back(ucs2char);
// ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
// printf("%s", utf8String.c_str());
handleChar(ucs2char);
}
}
return true;
}
bool OleStreamReader::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
if (myCurBufferPosition >= myBuffer.size() && !fillBuffer(stream)) {
return false;
}
ucs2char = myBuffer.at(myCurBufferPosition++);
processStyles(stream);
if (ucs2char == INLINE_IMAGE) {
processInlineImage(stream);
} else if (ucs2char == FLOAT_IMAGE) {
processFloatImage(stream);
}
++myCurCharPos;
return true;
}
void OleStreamReader::processInlineImage(OleMainStream &stream) {
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
if (imageInfoList.empty()) {
return;
}
//seek to curCharPos, because not all entries are real pictures
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
++myNextInlineImageInfoIndex;
}
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
if (!list.empty()) {
handleImage(list);
}
++myNextInlineImageInfoIndex;
}
}
void OleStreamReader::processFloatImage(OleMainStream &stream) {
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
if (imageInfoList.empty()) {
return;
}
//seek to curCharPos, because not all entries are real pictures
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
++myNextFloatImageInfoIndex;
}
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
if (!list.empty()) {
handleImage(list);
}
++myNextFloatImageInfoIndex;
}
}
void OleStreamReader::processStyles(OleMainStream &stream) {
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
if (!styleInfoList.empty()) {
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
handleParagraphStyle(info);
++myNextStyleInfoIndex;
}
}
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
if (!charInfoList.empty()) {
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
handleFontStyle(info.FontStyle);
++myNextCharInfoIndex;
}
}
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
if (!bookmarksList.empty()) {
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
handleBookmark(bookmark.Name);
++myNextBookmarkIndex;
}
}
}
bool OleStreamReader::fillBuffer(OleMainStream &stream) {
const OleMainStream::Pieces &pieces = stream.getPieces(); const OleMainStream::Pieces &pieces = stream.getPieces();
if (myNextPieceNumber >= pieces.size()) { if (myNextPieceNumber >= pieces.size()) {
return false; //end of reading return false;
} }
const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber); const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) { if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
handlePageBreak(); footnoteHandler();
} else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) { } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
return false; return false;
} }
@ -272,32 +72,15 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece"); ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece");
} }
myBuffer.clear();
if (!piece.IsANSI) { if (!piece.IsANSI) {
for (size_t i = 0; i < readBytes; i += 2) { for (size_t i = 0; i < readBytes; i += 2) {
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i); ansiSymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
myBuffer.push_back(ch);
} }
} else { } else {
dataHandler(textBuffer, readBytes); dataHandler(textBuffer, readBytes);
} }
myCurBufferPosition = 0;
++myNextPieceNumber; ++myNextPieceNumber;
delete[] textBuffer; delete[] textBuffer;
return true; return true;
} }
void OleStreamReader::dataHandler(const char *buffer, size_t len) {
if (myConverter.isNull()) {
// lazy converter initialization
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
myConverter = collection.converter(myEncoding);
if (myConverter.isNull()) {
myConverter = collection.defaultConverter();
}
}
std::string utf8String;
myConverter->convert(utf8String, buffer, buffer + len);
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
}

View file

@ -21,89 +21,26 @@
#define __OLESTREAMREADER_H__ #define __OLESTREAMREADER_H__
#include <ZLUnicodeUtil.h> #include <ZLUnicodeUtil.h>
#include <ZLEncodingConverter.h>
#include "OleMainStream.h" #include "OleMainStream.h"
class OleStreamReader { class OleStreamReader {
public: public:
//word's control chars: OleStreamReader();
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
//unicode values:
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
static const ZLUnicodeUtil::Ucs2Char SPACE;
static const ZLUnicodeUtil::Ucs2Char MINUS;
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
public:
OleStreamReader(const std::string &encoding);
bool readDocument(shared_ptr<ZLInputStream> stream); bool readDocument(shared_ptr<ZLInputStream> stream);
void clear();
private:
bool readStream(OleMainStream &stream);
protected: protected:
virtual void dataHandler(const char *buffer, size_t len); virtual bool readStream(OleMainStream &stream) = 0;
//virtual void parapgraphHandler(std::string paragraph) = 0; bool readNextPiece(OleMainStream &stream);
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
virtual void handleHardLinebreak() = 0;
virtual void handleParagraphEnd() = 0;
virtual void handlePageBreak() = 0;
virtual void handleTableSeparator() = 0;
virtual void handleTableEndRow() = 0;
virtual void handleFootNoteMark() = 0;
virtual void handleStartField() = 0;
virtual void handleSeparatorField() = 0;
virtual void handleEndField() = 0;
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
virtual void handleFontStyle(unsigned int fontStyle) = 0; virtual void dataHandler(const char *buffer, size_t len) = 0;
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0; virtual void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
virtual void handleBookmark(const std::string &name) = 0; virtual void footnoteHandler() = 0;
private: private:
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
void processInlineImage(OleMainStream &stream);
void processFloatImage(OleMainStream &stream);
void processStyles(OleMainStream &stream);
bool fillBuffer(OleMainStream &stream);
private:
ZLUnicodeUtil::Ucs2String myBuffer;
size_t myCurBufferPosition;
size_t myNextPieceNumber; size_t myNextPieceNumber;
shared_ptr<ZLEncodingConverter> myConverter;
const std::string myEncoding;
unsigned int myCurCharPos;
size_t myNextStyleInfoIndex;
size_t myNextCharInfoIndex;
size_t myNextBookmarkIndex;
size_t myNextInlineImageInfoIndex;
size_t myNextFloatImageInfoIndex;
}; };
#endif /* __OLESTREAMREADER_H__ */ #endif /* __OLESTREAMREADER_H__ */

View file

@ -135,7 +135,7 @@ shared_ptr<const ZLImage> OEBPlugin::coverImage(const ZLFile &file) const {
bool OEBPlugin::readLanguageAndEncoding(Book &book) const { bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
if (book.language().empty()) { if (book.language().empty()) {
shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file())); shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
detectLanguage(book, *oebStream); detectLanguage(book, *oebStream, book.encoding());
} }
return true; return true;
} }

View file

@ -48,7 +48,7 @@ void RtfBookReader::addCharData(const char *data, size_t len, bool convert) {
void RtfBookReader::flushBuffer() { void RtfBookReader::flushBuffer() {
if (!myOutputBuffer.empty()) { if (!myOutputBuffer.empty()) {
if (myCurrentState.ReadText) { if (myCurrentState.ReadText) {
if (!myConverter.isNull()) { if (!myConverter.isNull()) {
static std::string newString; static std::string newString;
myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length()); myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length());
@ -87,27 +87,27 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) {
if (on) { if (on) {
std::string id; std::string id;
ZLStringUtil::appendNumber(id, myFootnoteIndex++); ZLStringUtil::appendNumber(id, myFootnoteIndex++);
myStateStack.push(myCurrentState); myStateStack.push(myCurrentState);
myCurrentState.Id = id; myCurrentState.Id = id;
myCurrentState.ReadText = true; myCurrentState.ReadText = true;
myBookReader.addHyperlinkControl(FOOTNOTE, id); myBookReader.addHyperlinkControl(FOOTNOTE, id);
myBookReader.addData(id); myBookReader.addData(id);
myBookReader.addControl(FOOTNOTE, false); myBookReader.addControl(FOOTNOTE, false);
myBookReader.setFootnoteTextModel(id); myBookReader.setFootnoteTextModel(id);
myBookReader.pushKind(REGULAR); myBookReader.pushKind(REGULAR);
myBookReader.beginParagraph(); myBookReader.beginParagraph();
} else { } else {
myBookReader.endParagraph(); myBookReader.endParagraph();
myBookReader.popKind(); myBookReader.popKind();
if (!myStateStack.empty()) { if (!myStateStack.empty()) {
myCurrentState = myStateStack.top(); myCurrentState = myStateStack.top();
myStateStack.pop(); myStateStack.pop();
} }
if (myStateStack.empty()) { if (myStateStack.empty()) {
myBookReader.setMainTextModel(); myBookReader.setMainTextModel();
} else { } else {
@ -121,7 +121,7 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) {
void RtfBookReader::insertImage(const std::string &mimeType, const std::string &fileName, size_t startOffset, size_t size) { void RtfBookReader::insertImage(const std::string &mimeType, const std::string &fileName, size_t startOffset, size_t size) {
std::string id; std::string id;
ZLStringUtil::appendNumber(id, myImageIndex++); ZLStringUtil::appendNumber(id, myImageIndex++);
myBookReader.addImageReference(id, 0, false); myBookReader.addImageReference(id, 0, false);
const ZLFile file(fileName, mimeType); const ZLFile file(fileName, mimeType);
myBookReader.addImage(id, new ZLFileImage(file, "hex", startOffset, size)); myBookReader.addImage(id, new ZLFileImage(file, "hex", startOffset, size));
} }
@ -163,7 +163,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
return; return;
} }
flushBuffer(); flushBuffer();
switch (property) { switch (property) {
case FONT_BOLD: case FONT_BOLD:
if (myState.Bold) { if (myState.Bold) {
@ -175,7 +175,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
break; break;
case FONT_ITALIC: case FONT_ITALIC:
if (myState.Italic) { if (myState.Italic) {
if (!myState.Bold) { if (!myState.Bold) {
//DPRINT("add style emphasis.\n"); //DPRINT("add style emphasis.\n");
myBookReader.pushKind(EMPHASIS); myBookReader.pushKind(EMPHASIS);
myBookReader.addControl(EMPHASIS, true); myBookReader.addControl(EMPHASIS, true);
@ -183,14 +183,14 @@ void RtfBookReader::setFontProperty(FontProperty property) {
//DPRINT("add style emphasis and strong.\n"); //DPRINT("add style emphasis and strong.\n");
myBookReader.popKind(); myBookReader.popKind();
myBookReader.addControl(STRONG, false); myBookReader.addControl(STRONG, false);
myBookReader.pushKind(EMPHASIS); myBookReader.pushKind(EMPHASIS);
myBookReader.addControl(EMPHASIS, true); myBookReader.addControl(EMPHASIS, true);
myBookReader.pushKind(STRONG); myBookReader.pushKind(STRONG);
myBookReader.addControl(STRONG, true); myBookReader.addControl(STRONG, true);
} }
} else { } else {
if (!myState.Bold) { if (!myState.Bold) {
//DPRINT("remove style emphasis.\n"); //DPRINT("remove style emphasis.\n");
myBookReader.addControl(EMPHASIS, false); myBookReader.addControl(EMPHASIS, false);
myBookReader.popKind(); myBookReader.popKind();
@ -200,7 +200,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
myBookReader.popKind(); myBookReader.popKind();
myBookReader.addControl(EMPHASIS, false); myBookReader.addControl(EMPHASIS, false);
myBookReader.popKind(); myBookReader.popKind();
myBookReader.pushKind(STRONG); myBookReader.pushKind(STRONG);
myBookReader.addControl(STRONG, true); myBookReader.addControl(STRONG, true);
} }

View file

@ -46,7 +46,7 @@ bool RtfPlugin::readMetaInfo(Book &book) const {
} else if (book.language().empty()) { } else if (book.language().empty()) {
shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000); shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000);
if (!stream.isNull()) { if (!stream.isNull()) {
detectLanguage(book, *stream); detectLanguage(book, *stream, book.encoding());
} }
} }

View file

@ -41,7 +41,9 @@ friend class DummyEncodingConverterProvider;
bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) { bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) {
const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding); const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding);
return (lowerCasedEncoding == "utf-8") || (lowerCasedEncoding == "us-ascii"); return
lowerCasedEncoding == ZLEncodingConverter::UTF8 ||
lowerCasedEncoding == ZLEncodingConverter::ASCII;
} }
shared_ptr<ZLEncodingConverter> DummyEncodingConverterProvider::createConverter(const std::string &name) { shared_ptr<ZLEncodingConverter> DummyEncodingConverterProvider::createConverter(const std::string &name) {

View file

@ -20,6 +20,8 @@
#include "ZLEncodingConverter.h" #include "ZLEncodingConverter.h"
#include "ZLEncodingConverterProvider.h" #include "ZLEncodingConverterProvider.h"
const std::string ZLEncodingConverter::ASCII = "us-ascii";
const std::string ZLEncodingConverter::UTF8 = "utf-8";
const std::string ZLEncodingConverter::UTF16 = "utf-16"; const std::string ZLEncodingConverter::UTF16 = "utf-16";
const std::string ZLEncodingConverter::UTF16BE = "utf-16be"; const std::string ZLEncodingConverter::UTF16BE = "utf-16be";

View file

@ -29,6 +29,8 @@
class ZLEncodingConverter { class ZLEncodingConverter {
public: public:
static const std::string ASCII;
static const std::string UTF8;
static const std::string UTF16; static const std::string UTF16;
static const std::string UTF16BE; static const std::string UTF16BE;