From 8ac2815d94eade6ab4d4c08c99ab24649d7b39dc Mon Sep 17 00:00:00 2001 From: Nikolay Pultsin Date: Sat, 27 Oct 2012 02:30:03 +0400 Subject: [PATCH] synchronization with C++ version --- jni/Android.mk | 3 +- .../fbreader/src/formats/FormatPlugin.cpp | 28 ++- .../fbreader/src/formats/FormatPlugin.h | 4 +- .../src/formats/doc/DocBookReader.cpp | 26 +- .../fbreader/src/formats/doc/DocBookReader.h | 12 +- .../fbreader/src/formats/doc/DocPlugin.cpp | 10 +- .../src/formats/doc/DocReaderStream.cpp | 178 ------------- .../fbreader/src/formats/doc/DocStreams.cpp | 197 +++++++++++++++ .../doc/{DocReaderStream.h => DocStreams.h} | 39 ++- .../src/formats/doc/OleStreamParser.cpp | 210 ++++++++++++++++ .../src/formats/doc/OleStreamParser.h | 101 ++++++++ .../src/formats/doc/OleStreamReader.cpp | 237 +----------------- .../src/formats/doc/OleStreamReader.h | 75 +----- .../fbreader/src/formats/oeb/OEBPlugin.cpp | 2 +- .../src/formats/rtf/RtfBookReader.cpp | 26 +- .../fbreader/src/formats/rtf/RtfPlugin.cpp | 2 +- .../src/encoding/DummyEncodingConverter.cpp | 4 +- .../core/src/encoding/ZLEncodingConverter.cpp | 2 + .../core/src/encoding/ZLEncodingConverter.h | 2 + 19 files changed, 640 insertions(+), 518 deletions(-) delete mode 100644 jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp create mode 100644 jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp rename jni/NativeFormats/fbreader/src/formats/doc/{DocReaderStream.h => DocStreams.h} (61%) create mode 100644 jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp create mode 100644 jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h diff --git a/jni/Android.mk b/jni/Android.mk index b968141dd..7521ba9a9 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -128,10 +128,11 @@ LOCAL_SRC_FILES := \ NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \ - NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \ + NativeFormats/fbreader/src/formats/doc/DocStreams.cpp \ NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \ NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \ NativeFormats/fbreader/src/formats/doc/OleStream.cpp \ + NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp \ NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \ NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \ NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \ diff --git a/jni/NativeFormats/fbreader/src/formats/FormatPlugin.cpp b/jni/NativeFormats/fbreader/src/formats/FormatPlugin.cpp index 9e69af1f3..3c8cba3cd 100644 --- a/jni/NativeFormats/fbreader/src/formats/FormatPlugin.cpp +++ b/jni/NativeFormats/fbreader/src/formats/FormatPlugin.cpp @@ -22,22 +22,24 @@ #include #include #include +#include #include "FormatPlugin.h" #include "../library/Book.h" -void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) { +bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) { std::string language = book.language(); std::string encoding = book.encoding(); - if (!encoding.empty()) { - return; + if (!force && !encoding.empty()) { + return true; } + bool detected = false; PluginCollection &collection = PluginCollection::Instance(); if (encoding.empty()) { - encoding = "utf-8"; + encoding = ZLEncodingConverter::UTF8; } if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; @@ -47,25 +49,30 @@ void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) shared_ptr info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { + detected = true; if (!info->Language.empty()) { language = info->Language; } encoding = info->Encoding; - if ((encoding == "us-ascii") || (encoding == "iso-8859-1")) { + if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") { encoding = "windows-1252"; } } } book.setEncoding(encoding); book.setLanguage(language); + + return detected; } -void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) { +bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) { std::string language = book.language(); - if (!language.empty()) { - return; + if (!force && !language.empty()) { + return true; } + bool detected = false; + PluginCollection &collection = PluginCollection::Instance(); if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; @@ -73,15 +80,18 @@ void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) { const size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr info = - ZLLanguageDetector().findInfoForEncoding(book.encoding(), buffer, size, -20000); + ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000); delete[] buffer; if (!info.isNull()) { + detected = true; if (!info->Language.empty()) { language = info->Language; } } } book.setLanguage(language); + + return detected; } const std::string &FormatPlugin::tryOpen(const ZLFile&) const { diff --git a/jni/NativeFormats/fbreader/src/formats/FormatPlugin.h b/jni/NativeFormats/fbreader/src/formats/FormatPlugin.h index 269137f34..63bde642e 100644 --- a/jni/NativeFormats/fbreader/src/formats/FormatPlugin.h +++ b/jni/NativeFormats/fbreader/src/formats/FormatPlugin.h @@ -63,8 +63,8 @@ public: virtual shared_ptr coverImage(const ZLFile &file) const; protected: - static void detectEncodingAndLanguage(Book &book, ZLInputStream &stream); - static void detectLanguage(Book &book, ZLInputStream &stream); + static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false); + static bool detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force = false); }; class PluginCollection { diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp index 96a13f5b6..7234ff9f1 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp @@ -34,9 +34,9 @@ #include "OleMainStream.h" DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) : - OleStreamReader(encoding), myModelReader(model), - myPictureCounter(0) { + myPictureCounter(0), + myEncoding(encoding) { myReadState = READ_TEXT; } @@ -355,3 +355,25 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode ZLUnicodeUtil::ucs2ToUtf8(utf8String, link); return utf8String; } + +void DocBookReader::footnoteHandler() { + handlePageBreak(); +} + +void DocBookReader::dataHandler(const char *buffer, size_t len) { + if (myConverter.isNull()) { + // lazy converter initialization + const ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); + myConverter = collection.converter(myEncoding); + if (myConverter.isNull()) { + myConverter = collection.defaultConverter(); + } + } + std::string utf8String; + myConverter->convert(utf8String, buffer, buffer + len); + ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); +} + +void DocBookReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) { + myBuffer.push_back(symbol); +} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h index a8e21c921..b020e4fab 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h @@ -25,13 +25,14 @@ #include #include #include +#include #include "../../bookmodel/BookReader.h" #include "OleMainStream.h" -#include "OleStreamReader.h" +#include "OleStreamParser.h" -class DocBookReader : public OleStreamReader { +class DocBookReader : public OleStreamParser { public: DocBookReader(BookModel &model, const std::string &encoding); @@ -39,6 +40,10 @@ public: bool readBook(); private: + void dataHandler(const char *buffer, size_t len); + void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); + void footnoteHandler(); + void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); void handleHardLinebreak(); void handleParagraphEnd(); @@ -88,6 +93,9 @@ private: shared_ptr myCurrentStyleEntry; OleMainStream::Style myCurrentStyleInfo; unsigned int myPictureCounter; + + const std::string myEncoding; + shared_ptr myConverter; }; inline DocBookReader::~DocBookReader() {} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp index 32d21d33f..0cfe02ea7 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp @@ -21,11 +21,12 @@ #include #include #include +#include #include "DocPlugin.h" #include "DocMetaInfoReader.h" #include "DocBookReader.h" -#include "DocReaderStream.h" +#include "DocStreams.h" #include "../../bookmodel/BookModel.h" #include "../../library/Book.h" @@ -52,9 +53,10 @@ bool DocPlugin::readMetaInfo(Book &book) const { return false; } - shared_ptr stream = new DocReaderStream(book.file(), 50000); - if (!stream.isNull()) { - detectEncodingAndLanguage(book, *stream); + shared_ptr stream = new DocCharStream(book.file(), 50000); + if (!detectEncodingAndLanguage(book, *stream)) { + stream = new DocAnsiStream(book.file(), 50000); + detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true); } return true; diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp deleted file mode 100644 index e6a3cd236..000000000 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include -#include -#include - -#include "DocReaderStream.h" -#include "OleStreamReader.h" - -class DocTextOnlyReader : public OleStreamReader { - -public: - DocTextOnlyReader(char *buffer, size_t maxSize); - ~DocTextOnlyReader(); - size_t readSize() const; - -private: - void dataHandler(const char *buffer, size_t len); - - void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); - void handleHardLinebreak(); - void handleParagraphEnd(); - void handlePageBreak(); - void handleTableSeparator(); - void handleTableEndRow(); - void handleFootNoteMark(); - void handleStartField(); - void handleSeparatorField(); - void handleEndField(); - void handleImage(const ZLFileImage::Blocks &blocks); - void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char); - void handleFontStyle(unsigned int fontStyle); - void handleParagraphStyle(const OleMainStream::Style &styleInfo); - void handleBookmark(const std::string &name); - -private: - char *myBuffer; - const size_t myMaxSize; - size_t myActualSize; -}; - -DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) { -} - -DocTextOnlyReader::~DocTextOnlyReader() { -} - -void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) { - if (myActualSize >= myMaxSize) { - // break stream reading - } else { - const size_t len = std::min(dataLength, myMaxSize - myActualSize); - strncpy(myBuffer + myActualSize, buffer, len); - myActualSize += len; - } - OleStreamReader::dataHandler(buffer, dataLength); -} - -void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) { -} - -void DocTextOnlyReader::handleHardLinebreak() { -} - -void DocTextOnlyReader::handleParagraphEnd() { -} - -void DocTextOnlyReader::handlePageBreak() { -} - -void DocTextOnlyReader::handleTableSeparator() { -} - -void DocTextOnlyReader::handleTableEndRow() { -} - -void DocTextOnlyReader::handleFootNoteMark() { -} - -void DocTextOnlyReader::handleStartField() { -} - -void DocTextOnlyReader::handleSeparatorField() { -} - -void DocTextOnlyReader::handleEndField() { -} - -void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) { -} - -void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) { -} - -void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) { -} - -void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) { -} - -void DocTextOnlyReader::handleBookmark(const std::string &name) { -} - -size_t DocTextOnlyReader::readSize() const { - return myActualSize; -} - -DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) { -} - -DocReaderStream::~DocReaderStream() { - close(); -} - -bool DocReaderStream::open() { - if (mySize != 0) { - myBuffer = new char[mySize]; - } - DocTextOnlyReader reader(myBuffer, mySize); - shared_ptr stream = myFile.inputStream(); - if (stream.isNull() || !stream->open()) { - return false; - } - if (!reader.readDocument(stream)) { - return false; - } - mySize = reader.readSize(); - myOffset = 0; - return true; -} - -size_t DocReaderStream::read(char *buffer, size_t maxSize) { - maxSize = std::min(maxSize, mySize - myOffset); - if ((buffer != 0) && (myBuffer !=0)) { - memcpy(buffer, myBuffer + myOffset, maxSize); - } - myOffset += maxSize; - return maxSize; -} - -void DocReaderStream::close() { - if (myBuffer != 0) { - delete[] myBuffer; - myBuffer = 0; - } -} - -void DocReaderStream::seek(int offset, bool absoluteOffset) { - if (!absoluteOffset) { - offset += myOffset; - } - myOffset = std::min(mySize, (size_t)std::max(0, offset)); -} - -size_t DocReaderStream::offset() const { - return myOffset; -} - -size_t DocReaderStream::sizeOfOpened() { - return mySize; -} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp new file mode 100644 index 000000000..b4ae346c2 --- /dev/null +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include +#include +#include + +#include "DocStreams.h" +#include "OleStreamReader.h" + +class DocReader : public OleStreamReader { + +public: + DocReader(char *buffer, size_t maxSize); + ~DocReader(); + size_t readSize() const; + +private: + bool readStream(OleMainStream &stream); + void dataHandler(const char *buffer, size_t len); + void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); + void footnoteHandler(); + +protected: + char *myBuffer; + const size_t myMaxSize; + size_t myActualSize; +}; + +class DocCharReader : public DocReader { + +public: + DocCharReader(char *buffer, size_t maxSize); + ~DocCharReader(); + +private: + void dataHandler(const char *buffer, size_t len); +}; + +class DocAnsiReader : public DocReader { + +public: + DocAnsiReader(char *buffer, size_t maxSize); + ~DocAnsiReader(); + +private: + void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol); +}; + +DocReader::DocReader(char *buffer, size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) { +} + +DocReader::~DocReader() { +} + +bool DocReader::readStream(OleMainStream &stream) { + while (myActualSize < myMaxSize) { + if (!readNextPiece(stream)) { + break; + } + } + return true; +} + +void DocReader::dataHandler(const char*, size_t) { +} + +void DocReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char) { +} + +void DocReader::footnoteHandler() { +} + +size_t DocReader::readSize() const { + return myActualSize; +} + +DocCharReader::DocCharReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) { +} + +DocCharReader::~DocCharReader() { +} + +void DocCharReader::dataHandler(const char *buffer, size_t dataLength) { + if (myActualSize < myMaxSize) { + const size_t len = std::min(dataLength, myMaxSize - myActualSize); + strncpy(myBuffer + myActualSize, buffer, len); + myActualSize += len; + } +} + +DocAnsiReader::DocAnsiReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) { +} + +DocAnsiReader::~DocAnsiReader() { +} + +void DocAnsiReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) { + if (myActualSize < myMaxSize) { + char buffer[4]; + const size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol); + const size_t len = std::min(dataLength, myMaxSize - myActualSize); + strncpy(myBuffer + myActualSize, buffer, len); + myActualSize += len; + } +} + +DocStream::DocStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) { +} + +DocStream::~DocStream() { + close(); +} + +bool DocStream::open() { + if (mySize != 0) { + myBuffer = new char[mySize]; + } + shared_ptr reader = createReader(myBuffer, mySize); + shared_ptr stream = myFile.inputStream(); + if (stream.isNull() || !stream->open()) { + return false; + } + if (!reader->readDocument(stream)) { + return false; + } + mySize = reader->readSize(); + myOffset = 0; + return true; +} + +size_t DocStream::read(char *buffer, size_t maxSize) { + maxSize = std::min(maxSize, mySize - myOffset); + if ((buffer != 0) && (myBuffer !=0)) { + memcpy(buffer, myBuffer + myOffset, maxSize); + } + myOffset += maxSize; + return maxSize; +} + +void DocStream::close() { + if (myBuffer != 0) { + delete[] myBuffer; + myBuffer = 0; + } +} + +void DocStream::seek(int offset, bool absoluteOffset) { + if (!absoluteOffset) { + offset += myOffset; + } + myOffset = std::min(mySize, (size_t)std::max(0, offset)); +} + +size_t DocStream::offset() const { + return myOffset; +} + +size_t DocStream::sizeOfOpened() { + return mySize; +} + +DocCharStream::DocCharStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) { +} + +DocCharStream::~DocCharStream() { +} + +shared_ptr DocCharStream::createReader(char *buffer, size_t maxSize) { + return new DocCharReader(buffer, maxSize); +} + +DocAnsiStream::DocAnsiStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) { +} + +DocAnsiStream::~DocAnsiStream() { +} + +shared_ptr DocAnsiStream::createReader(char *buffer, size_t maxSize) { + return new DocAnsiReader(buffer, maxSize); +} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h b/jni/NativeFormats/fbreader/src/formats/doc/DocStreams.h similarity index 61% rename from jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h rename to jni/NativeFormats/fbreader/src/formats/doc/DocStreams.h index 65b5ae2a7..d9a08f27f 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocStreams.h @@ -17,19 +17,19 @@ * 02110-1301, USA. */ -#ifndef __DOCREADERSTREAM_H__ -#define __DOCREADERSTREAM_H__ - -#include +#ifndef __DOCSTREAMS_H__ +#define __DOCSTREAMS_H__ #include #include -class DocReaderStream : public ZLInputStream { +class DocReader; + +class DocStream : public ZLInputStream { public: - DocReaderStream(const ZLFile& file, size_t maxSize); - ~DocReaderStream(); + DocStream(const ZLFile& file, size_t maxSize); + ~DocStream(); private: bool open(); @@ -40,6 +40,9 @@ private: size_t offset() const; size_t sizeOfOpened(); +protected: + virtual shared_ptr createReader(char *buffer, size_t maxSize) = 0; + private: const ZLFile myFile; char *myBuffer; @@ -47,4 +50,24 @@ private: size_t myOffset; }; -#endif /* __DOCREADERSTREAM_H__ */ +class DocCharStream : public DocStream { + +public: + DocCharStream(const ZLFile& file, size_t maxSize); + ~DocCharStream(); + +private: + shared_ptr createReader(char *buffer, size_t maxSize); +}; + +class DocAnsiStream : public DocStream { + +public: + DocAnsiStream(const ZLFile& file, size_t maxSize); + ~DocAnsiStream(); + +private: + shared_ptr createReader(char *buffer, size_t maxSize); +}; + +#endif /* __DOCSTREAMS_H__ */ diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp new file mode 100644 index 000000000..0a9c62d38 --- /dev/null +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +//#include +//#include + +#include + +#include "OleMainStream.h" +#include "OleUtil.h" +#include "OleStreamParser.h" + +//word's control chars: +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008; + +//unicode values: +const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D; +const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C; + +OleStreamParser::OleStreamParser() { + myCurBufferPosition = 0; + + myCurCharPos = 0; + myNextStyleInfoIndex = 0; + myNextCharInfoIndex = 0; + myNextBookmarkIndex = 0; + myNextInlineImageInfoIndex = 0; + myNextFloatImageInfoIndex = 0; +} + +bool OleStreamParser::readStream(OleMainStream &oleMainStream) { + ZLUnicodeUtil::Ucs2Char ucs2char; + bool tabMode = false; + while (getUcs2Char(oleMainStream, ucs2char)) { + if (tabMode) { + tabMode = false; + if (ucs2char == WORD_TABLE_SEPARATOR) { + handleTableEndRow(); + continue; + } else { + handleTableSeparator(); + } + } + + if (ucs2char < 32) { + switch (ucs2char) { + case NULL_SYMBOL: + break; + case WORD_HARD_LINEBREAK: + handleHardLinebreak(); + break; + case WORD_END_OF_PARAGRAPH: + case WORD_PAGE_BREAK: + handleParagraphEnd(); + break; + case WORD_TABLE_SEPARATOR: + tabMode = true; + break; + case WORD_FOOTNOTE_MARK: + handleFootNoteMark(); + break; + case WORD_START_FIELD: + handleStartField(); + break; + case WORD_SEPARATOR_FIELD: + handleSeparatorField(); + break; + case WORD_END_FIELD: + handleEndField(); + break; + case INLINE_IMAGE: + case FLOAT_IMAGE: + break; + default: + handleOtherControlChar(ucs2char); + break; + } + } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) { + continue; //skip + } else { + handleChar(ucs2char); + } + } + + return true; +} + +bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) { + while (myCurBufferPosition >= myBuffer.size()) { + myBuffer.clear(); + myCurBufferPosition = 0; + if (!readNextPiece(stream)) { + return false; + } + } + ucs2char = myBuffer.at(myCurBufferPosition++); + processStyles(stream); + + switch (ucs2char) { + case INLINE_IMAGE: + processInlineImage(stream); + break; + case FLOAT_IMAGE: + processFloatImage(stream); + break; + } + ++myCurCharPos; + return true; +} + +void OleStreamParser::processInlineImage(OleMainStream &stream) { + const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList(); + if (imageInfoList.empty()) { + return; + } + //seek to curCharPos, because not all entries are real pictures + while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) { + ++myNextInlineImageInfoIndex; + } + while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) { + OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second; + ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition); + if (!list.empty()) { + handleImage(list); + } + ++myNextInlineImageInfoIndex; + } +} + +void OleStreamParser::processFloatImage(OleMainStream &stream) { + const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList(); + if (imageInfoList.empty()) { + return; + } + //seek to curCharPos, because not all entries are real pictures + while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) { + ++myNextFloatImageInfoIndex; + } + while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) { + OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second; + ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId); + if (!list.empty()) { + handleImage(list); + } + ++myNextFloatImageInfoIndex; + } +} + +void OleStreamParser::processStyles(OleMainStream &stream) { + const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList(); + if (!styleInfoList.empty()) { + while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) { + OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second; + handleParagraphStyle(info); + ++myNextStyleInfoIndex; + } + } + + const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList(); + if (!charInfoList.empty()) { + while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) { + OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second; + handleFontStyle(info.FontStyle); + ++myNextCharInfoIndex; + } + } + + const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks(); + if (!bookmarksList.empty()) { + while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) { + OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex); + handleBookmark(bookmark.Name); + ++myNextBookmarkIndex; + } + } +} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h new file mode 100644 index 000000000..373e19a15 --- /dev/null +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __OLESTREAMPARSER_H__ +#define __OLESTREAMPARSER_H__ + +#include + +#include "OleMainStream.h" +#include "OleStreamReader.h" + +class OleStreamParser : public OleStreamReader { + +public: + //word's control chars: + static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK; + static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR; + static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB; + static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK; + static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK; + static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH; + static const ZLUnicodeUtil::Ucs2Char WORD_MINUS; + static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN; + static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD; + static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD; + static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD; + static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE; + static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE; + static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE; + + //unicode values: + static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL; + static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR; + static const ZLUnicodeUtil::Ucs2Char LINE_FEED; + static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN; + static const ZLUnicodeUtil::Ucs2Char SPACE; + static const ZLUnicodeUtil::Ucs2Char MINUS; + static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE; + +public: + OleStreamParser(); + +private: + bool readStream(OleMainStream &stream); + +protected: + virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; + virtual void handleHardLinebreak() = 0; + virtual void handleParagraphEnd() = 0; + virtual void handlePageBreak() = 0; + virtual void handleTableSeparator() = 0; + virtual void handleTableEndRow() = 0; + virtual void handleFootNoteMark() = 0; + virtual void handleStartField() = 0; + virtual void handleSeparatorField() = 0; + virtual void handleEndField() = 0; + virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0; + virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; + + virtual void handleFontStyle(unsigned int fontStyle) = 0; + virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0; + virtual void handleBookmark(const std::string &name) = 0; + +private: + bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char); + void processInlineImage(OleMainStream &stream); + void processFloatImage(OleMainStream &stream); + void processStyles(OleMainStream &stream); + +private: +protected: + ZLUnicodeUtil::Ucs2String myBuffer; +private: + size_t myCurBufferPosition; + + unsigned int myCurCharPos; + + size_t myNextStyleInfoIndex; + size_t myNextCharInfoIndex; + size_t myNextBookmarkIndex; + size_t myNextInlineImageInfoIndex; + size_t myNextFloatImageInfoIndex; +}; + +#endif /* __OLESTREAMPARSER_H__ */ diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp index 790325936..ee5d10b28 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp @@ -17,59 +17,13 @@ * 02110-1301, USA. */ -#include -#include - #include #include "OleMainStream.h" -#include "DocBookReader.h" #include "OleUtil.h" -#include "DocInlineImageReader.h" - #include "OleStreamReader.h" -//word's control chars: -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_FOOTNOTE_MARK = 0x0002; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_TABLE_SEPARATOR = 0x0007; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_FIELD = 0x0015; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::INLINE_IMAGE = 0x0001; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::FLOAT_IMAGE = 0x0008; - -//unicode values: -const ZLUnicodeUtil::Ucs2Char OleStreamReader::NULL_SYMBOL = 0x0; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C; - -OleStreamReader::OleStreamReader(const std::string &encoding) : - myEncoding(encoding) { - clear(); -} - -void OleStreamReader::clear() { - myBuffer.clear(); - myCurBufferPosition = 0; - myNextPieceNumber = 0; - - myCurCharPos = 0; - myNextStyleInfoIndex = 0; - myNextCharInfoIndex = 0; - myNextBookmarkIndex = 0; - myNextInlineImageInfoIndex = 0; - myNextFloatImageInfoIndex = 0; +OleStreamReader::OleStreamReader() : myNextPieceNumber(0) { } bool OleStreamReader::readDocument(shared_ptr inputStream) { @@ -78,7 +32,7 @@ bool OleStreamReader::readDocument(shared_ptr inputStream) { shared_ptr storage = new OleStorage; if (!storage->init(inputStream, inputStream->sizeOfOpened())) { - ZLLogger::Instance().println("DocBookReader", "Broken OLE file!"); + ZLLogger::Instance().println("OleStreamReader", "Broken OLE file"); return false; } @@ -88,176 +42,22 @@ bool OleStreamReader::readDocument(shared_ptr inputStream) { } OleMainStream oleStream(storage, wordDocumentEntry, inputStream); + if (!oleStream.open()) { + ZLLogger::Instance().println("OleStreamReader", "Cannot open OleMainStream"); + return false; + } return readStream(oleStream); } -bool OleStreamReader::readStream(OleMainStream &oleMainStream) { - clear(); - - if (!oleMainStream.open()) { - ZLLogger::Instance().println("OleStreamReader", "doesn't open correct"); - return false; - } - ZLUnicodeUtil::Ucs2Char ucs2char; - bool tabMode = false; - while (getUcs2Char(oleMainStream, ucs2char)) { - if (ucs2char < 32) { //< 32 are control symbols - //printf("[0x%x]", ucs2char); //debug output - } - - if (tabMode) { - tabMode = false; - if (ucs2char == WORD_TABLE_SEPARATOR) { - handleTableEndRow(); - continue; - } else { - handleTableSeparator(); - } - } - - if (ucs2char < 32) { - switch (ucs2char) { - case NULL_SYMBOL: - break; - case WORD_HARD_LINEBREAK: - //printf("\n"); - handleHardLinebreak(); - break; - case WORD_END_OF_PARAGRAPH: - case WORD_PAGE_BREAK: - //printf("\n"); - handleParagraphEnd(); - break; - case WORD_TABLE_SEPARATOR: - tabMode = true; - break; - case WORD_FOOTNOTE_MARK: - handleFootNoteMark(); - break; - case WORD_START_FIELD: - handleStartField(); - break; - case WORD_SEPARATOR_FIELD: - handleSeparatorField(); - break; - case WORD_END_FIELD: - handleEndField(); - break; - case INLINE_IMAGE: case FLOAT_IMAGE: - break; - default: - handleOtherControlChar(ucs2char); - break; - } - } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) { - continue; //skip - } else { - //debug output -// std::string utf8String; -// ZLUnicodeUtil::Ucs2String ucs2String; -// ucs2String.push_back(ucs2char); -// ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String); -// printf("%s", utf8String.c_str()); - - handleChar(ucs2char); - } - } - - return true; -} - -bool OleStreamReader::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) { - if (myCurBufferPosition >= myBuffer.size() && !fillBuffer(stream)) { - return false; - } - ucs2char = myBuffer.at(myCurBufferPosition++); - processStyles(stream); - - if (ucs2char == INLINE_IMAGE) { - processInlineImage(stream); - } else if (ucs2char == FLOAT_IMAGE) { - processFloatImage(stream); - } - ++myCurCharPos; - return true; -} - -void OleStreamReader::processInlineImage(OleMainStream &stream) { - const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList(); - if (imageInfoList.empty()) { - return; - } - //seek to curCharPos, because not all entries are real pictures - while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) { - ++myNextInlineImageInfoIndex; - } - while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) { - OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second; - ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition); - if (!list.empty()) { - handleImage(list); - } - ++myNextInlineImageInfoIndex; - } -} - -void OleStreamReader::processFloatImage(OleMainStream &stream) { - const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList(); - if (imageInfoList.empty()) { - return; - } - //seek to curCharPos, because not all entries are real pictures - while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) { - ++myNextFloatImageInfoIndex; - } - while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) { - OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second; - ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId); - if (!list.empty()) { - handleImage(list); - } - ++myNextFloatImageInfoIndex; - } -} - -void OleStreamReader::processStyles(OleMainStream &stream) { - const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList(); - if (!styleInfoList.empty()) { - while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) { - OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second; - handleParagraphStyle(info); - ++myNextStyleInfoIndex; - } - } - - const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList(); - if (!charInfoList.empty()) { - while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) { - OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second; - handleFontStyle(info.FontStyle); - ++myNextCharInfoIndex; - } - } - - const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks(); - if (!bookmarksList.empty()) { - while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) { - OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex); - handleBookmark(bookmark.Name); - ++myNextBookmarkIndex; - } - } -} - -bool OleStreamReader::fillBuffer(OleMainStream &stream) { +bool OleStreamReader::readNextPiece(OleMainStream &stream) { const OleMainStream::Pieces &pieces = stream.getPieces(); if (myNextPieceNumber >= pieces.size()) { - return false; //end of reading + return false; } const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber); if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) { - handlePageBreak(); + footnoteHandler(); } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) { return false; } @@ -272,32 +72,15 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) { ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece"); } - myBuffer.clear(); if (!piece.IsANSI) { for (size_t i = 0; i < readBytes; i += 2) { - ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i); - myBuffer.push_back(ch); + ansiSymbolHandler(OleUtil::getU2Bytes(textBuffer, i)); } } else { dataHandler(textBuffer, readBytes); } - myCurBufferPosition = 0; ++myNextPieceNumber; delete[] textBuffer; return true; } - -void OleStreamReader::dataHandler(const char *buffer, size_t len) { - if (myConverter.isNull()) { - // lazy converter initialization - const ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); - myConverter = collection.converter(myEncoding); - if (myConverter.isNull()) { - myConverter = collection.defaultConverter(); - } - } - std::string utf8String; - myConverter->convert(utf8String, buffer, buffer + len); - ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); -} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h index a2d6ad381..7959b519a 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h @@ -21,89 +21,26 @@ #define __OLESTREAMREADER_H__ #include -#include #include "OleMainStream.h" class OleStreamReader { public: - //word's control chars: - static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK; - static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR; - static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB; - static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK; - static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK; - static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH; - static const ZLUnicodeUtil::Ucs2Char WORD_MINUS; - static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN; - static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD; - static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE; - static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE; - static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE; - - //unicode values: - static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL; - static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR; - static const ZLUnicodeUtil::Ucs2Char LINE_FEED; - static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN; - static const ZLUnicodeUtil::Ucs2Char SPACE; - static const ZLUnicodeUtil::Ucs2Char MINUS; - static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE; - -public: - OleStreamReader(const std::string &encoding); + OleStreamReader(); bool readDocument(shared_ptr stream); - void clear(); - -private: - bool readStream(OleMainStream &stream); protected: - virtual void dataHandler(const char *buffer, size_t len); + virtual bool readStream(OleMainStream &stream) = 0; - //virtual void parapgraphHandler(std::string paragraph) = 0; - virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; - virtual void handleHardLinebreak() = 0; - virtual void handleParagraphEnd() = 0; - virtual void handlePageBreak() = 0; - virtual void handleTableSeparator() = 0; - virtual void handleTableEndRow() = 0; - virtual void handleFootNoteMark() = 0; - virtual void handleStartField() = 0; - virtual void handleSeparatorField() = 0; - virtual void handleEndField() = 0; - virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0; - virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; + bool readNextPiece(OleMainStream &stream); - virtual void handleFontStyle(unsigned int fontStyle) = 0; - virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0; - virtual void handleBookmark(const std::string &name) = 0; + virtual void dataHandler(const char *buffer, size_t len) = 0; + virtual void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0; + virtual void footnoteHandler() = 0; private: - bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char); - void processInlineImage(OleMainStream &stream); - void processFloatImage(OleMainStream &stream); - void processStyles(OleMainStream &stream); - bool fillBuffer(OleMainStream &stream); - -private: - ZLUnicodeUtil::Ucs2String myBuffer; - size_t myCurBufferPosition; size_t myNextPieceNumber; - - shared_ptr myConverter; - const std::string myEncoding; - - unsigned int myCurCharPos; - - size_t myNextStyleInfoIndex; - size_t myNextCharInfoIndex; - size_t myNextBookmarkIndex; - size_t myNextInlineImageInfoIndex; - size_t myNextFloatImageInfoIndex; }; #endif /* __OLESTREAMREADER_H__ */ diff --git a/jni/NativeFormats/fbreader/src/formats/oeb/OEBPlugin.cpp b/jni/NativeFormats/fbreader/src/formats/oeb/OEBPlugin.cpp index 9be0ff180..07ee2c0c2 100644 --- a/jni/NativeFormats/fbreader/src/formats/oeb/OEBPlugin.cpp +++ b/jni/NativeFormats/fbreader/src/formats/oeb/OEBPlugin.cpp @@ -135,7 +135,7 @@ shared_ptr OEBPlugin::coverImage(const ZLFile &file) const { bool OEBPlugin::readLanguageAndEncoding(Book &book) const { if (book.language().empty()) { shared_ptr oebStream = new OEBTextStream(opfFile(book.file())); - detectLanguage(book, *oebStream); + detectLanguage(book, *oebStream, book.encoding()); } return true; } diff --git a/jni/NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp b/jni/NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp index 38cdc0bd9..ad36ef369 100644 --- a/jni/NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp @@ -48,7 +48,7 @@ void RtfBookReader::addCharData(const char *data, size_t len, bool convert) { void RtfBookReader::flushBuffer() { if (!myOutputBuffer.empty()) { - if (myCurrentState.ReadText) { + if (myCurrentState.ReadText) { if (!myConverter.isNull()) { static std::string newString; myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length()); @@ -87,27 +87,27 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) { if (on) { std::string id; ZLStringUtil::appendNumber(id, myFootnoteIndex++); - + myStateStack.push(myCurrentState); myCurrentState.Id = id; myCurrentState.ReadText = true; - - myBookReader.addHyperlinkControl(FOOTNOTE, id); + + myBookReader.addHyperlinkControl(FOOTNOTE, id); myBookReader.addData(id); myBookReader.addControl(FOOTNOTE, false); - + myBookReader.setFootnoteTextModel(id); myBookReader.pushKind(REGULAR); myBookReader.beginParagraph(); } else { myBookReader.endParagraph(); myBookReader.popKind(); - + if (!myStateStack.empty()) { myCurrentState = myStateStack.top(); myStateStack.pop(); } - + if (myStateStack.empty()) { myBookReader.setMainTextModel(); } else { @@ -121,7 +121,7 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) { void RtfBookReader::insertImage(const std::string &mimeType, const std::string &fileName, size_t startOffset, size_t size) { std::string id; ZLStringUtil::appendNumber(id, myImageIndex++); - myBookReader.addImageReference(id, 0, false); + myBookReader.addImageReference(id, 0, false); const ZLFile file(fileName, mimeType); myBookReader.addImage(id, new ZLFileImage(file, "hex", startOffset, size)); } @@ -163,7 +163,7 @@ void RtfBookReader::setFontProperty(FontProperty property) { return; } flushBuffer(); - + switch (property) { case FONT_BOLD: if (myState.Bold) { @@ -175,7 +175,7 @@ void RtfBookReader::setFontProperty(FontProperty property) { break; case FONT_ITALIC: if (myState.Italic) { - if (!myState.Bold) { + if (!myState.Bold) { //DPRINT("add style emphasis.\n"); myBookReader.pushKind(EMPHASIS); myBookReader.addControl(EMPHASIS, true); @@ -183,14 +183,14 @@ void RtfBookReader::setFontProperty(FontProperty property) { //DPRINT("add style emphasis and strong.\n"); myBookReader.popKind(); myBookReader.addControl(STRONG, false); - + myBookReader.pushKind(EMPHASIS); myBookReader.addControl(EMPHASIS, true); myBookReader.pushKind(STRONG); myBookReader.addControl(STRONG, true); } } else { - if (!myState.Bold) { + if (!myState.Bold) { //DPRINT("remove style emphasis.\n"); myBookReader.addControl(EMPHASIS, false); myBookReader.popKind(); @@ -200,7 +200,7 @@ void RtfBookReader::setFontProperty(FontProperty property) { myBookReader.popKind(); myBookReader.addControl(EMPHASIS, false); myBookReader.popKind(); - + myBookReader.pushKind(STRONG); myBookReader.addControl(STRONG, true); } diff --git a/jni/NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp b/jni/NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp index c42cc9a05..9b348d33c 100644 --- a/jni/NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp +++ b/jni/NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp @@ -46,7 +46,7 @@ bool RtfPlugin::readMetaInfo(Book &book) const { } else if (book.language().empty()) { shared_ptr stream = new RtfReaderStream(book.file(), 50000); if (!stream.isNull()) { - detectLanguage(book, *stream); + detectLanguage(book, *stream, book.encoding()); } } diff --git a/jni/NativeFormats/zlibrary/core/src/encoding/DummyEncodingConverter.cpp b/jni/NativeFormats/zlibrary/core/src/encoding/DummyEncodingConverter.cpp index 60ea76f2a..e206bf334 100644 --- a/jni/NativeFormats/zlibrary/core/src/encoding/DummyEncodingConverter.cpp +++ b/jni/NativeFormats/zlibrary/core/src/encoding/DummyEncodingConverter.cpp @@ -41,7 +41,9 @@ friend class DummyEncodingConverterProvider; bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) { const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding); - return (lowerCasedEncoding == "utf-8") || (lowerCasedEncoding == "us-ascii"); + return + lowerCasedEncoding == ZLEncodingConverter::UTF8 || + lowerCasedEncoding == ZLEncodingConverter::ASCII; } shared_ptr DummyEncodingConverterProvider::createConverter(const std::string &name) { diff --git a/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.cpp b/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.cpp index c40b5cbe2..1b9f26ef2 100644 --- a/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.cpp +++ b/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.cpp @@ -20,6 +20,8 @@ #include "ZLEncodingConverter.h" #include "ZLEncodingConverterProvider.h" +const std::string ZLEncodingConverter::ASCII = "us-ascii"; +const std::string ZLEncodingConverter::UTF8 = "utf-8"; const std::string ZLEncodingConverter::UTF16 = "utf-16"; const std::string ZLEncodingConverter::UTF16BE = "utf-16be"; diff --git a/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.h b/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.h index f40d830fd..9fa1d042f 100644 --- a/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.h +++ b/jni/NativeFormats/zlibrary/core/src/encoding/ZLEncodingConverter.h @@ -29,6 +29,8 @@ class ZLEncodingConverter { public: + static const std::string ASCII; + static const std::string UTF8; static const std::string UTF16; static const std::string UTF16BE;