mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 17:59:33 +02:00
synchronization with C++ version
This commit is contained in:
parent
e790aeb68a
commit
8ac2815d94
19 changed files with 640 additions and 518 deletions
|
@ -128,10 +128,11 @@ LOCAL_SRC_FILES := \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocStreams.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
||||||
|
NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \
|
||||||
|
|
|
@ -22,22 +22,24 @@
|
||||||
#include <ZLInputStream.h>
|
#include <ZLInputStream.h>
|
||||||
#include <ZLLanguageDetector.h>
|
#include <ZLLanguageDetector.h>
|
||||||
#include <ZLImage.h>
|
#include <ZLImage.h>
|
||||||
|
#include <ZLEncodingConverter.h>
|
||||||
|
|
||||||
#include "FormatPlugin.h"
|
#include "FormatPlugin.h"
|
||||||
|
|
||||||
#include "../library/Book.h"
|
#include "../library/Book.h"
|
||||||
|
|
||||||
void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) {
|
bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) {
|
||||||
std::string language = book.language();
|
std::string language = book.language();
|
||||||
std::string encoding = book.encoding();
|
std::string encoding = book.encoding();
|
||||||
|
|
||||||
if (!encoding.empty()) {
|
if (!force && !encoding.empty()) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool detected = false;
|
||||||
PluginCollection &collection = PluginCollection::Instance();
|
PluginCollection &collection = PluginCollection::Instance();
|
||||||
if (encoding.empty()) {
|
if (encoding.empty()) {
|
||||||
encoding = "utf-8";
|
encoding = ZLEncodingConverter::UTF8;
|
||||||
}
|
}
|
||||||
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
||||||
static const int BUFSIZE = 65536;
|
static const int BUFSIZE = 65536;
|
||||||
|
@ -47,25 +49,30 @@ void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream)
|
||||||
shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size);
|
shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size);
|
||||||
delete[] buffer;
|
delete[] buffer;
|
||||||
if (!info.isNull()) {
|
if (!info.isNull()) {
|
||||||
|
detected = true;
|
||||||
if (!info->Language.empty()) {
|
if (!info->Language.empty()) {
|
||||||
language = info->Language;
|
language = info->Language;
|
||||||
}
|
}
|
||||||
encoding = info->Encoding;
|
encoding = info->Encoding;
|
||||||
if ((encoding == "us-ascii") || (encoding == "iso-8859-1")) {
|
if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") {
|
||||||
encoding = "windows-1252";
|
encoding = "windows-1252";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
book.setEncoding(encoding);
|
book.setEncoding(encoding);
|
||||||
book.setLanguage(language);
|
book.setLanguage(language);
|
||||||
|
|
||||||
|
return detected;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) {
|
bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) {
|
||||||
std::string language = book.language();
|
std::string language = book.language();
|
||||||
if (!language.empty()) {
|
if (!force && !language.empty()) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool detected = false;
|
||||||
|
|
||||||
PluginCollection &collection = PluginCollection::Instance();
|
PluginCollection &collection = PluginCollection::Instance();
|
||||||
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
||||||
static const int BUFSIZE = 65536;
|
static const int BUFSIZE = 65536;
|
||||||
|
@ -73,15 +80,18 @@ void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) {
|
||||||
const size_t size = stream.read(buffer, BUFSIZE);
|
const size_t size = stream.read(buffer, BUFSIZE);
|
||||||
stream.close();
|
stream.close();
|
||||||
shared_ptr<ZLLanguageDetector::LanguageInfo> info =
|
shared_ptr<ZLLanguageDetector::LanguageInfo> info =
|
||||||
ZLLanguageDetector().findInfoForEncoding(book.encoding(), buffer, size, -20000);
|
ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000);
|
||||||
delete[] buffer;
|
delete[] buffer;
|
||||||
if (!info.isNull()) {
|
if (!info.isNull()) {
|
||||||
|
detected = true;
|
||||||
if (!info->Language.empty()) {
|
if (!info->Language.empty()) {
|
||||||
language = info->Language;
|
language = info->Language;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
book.setLanguage(language);
|
book.setLanguage(language);
|
||||||
|
|
||||||
|
return detected;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string &FormatPlugin::tryOpen(const ZLFile&) const {
|
const std::string &FormatPlugin::tryOpen(const ZLFile&) const {
|
||||||
|
|
|
@ -63,8 +63,8 @@ public:
|
||||||
virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
|
virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static void detectEncodingAndLanguage(Book &book, ZLInputStream &stream);
|
static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false);
|
||||||
static void detectLanguage(Book &book, ZLInputStream &stream);
|
static bool detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force = false);
|
||||||
};
|
};
|
||||||
|
|
||||||
class PluginCollection {
|
class PluginCollection {
|
||||||
|
|
|
@ -34,9 +34,9 @@
|
||||||
#include "OleMainStream.h"
|
#include "OleMainStream.h"
|
||||||
|
|
||||||
DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
|
DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
|
||||||
OleStreamReader(encoding),
|
|
||||||
myModelReader(model),
|
myModelReader(model),
|
||||||
myPictureCounter(0) {
|
myPictureCounter(0),
|
||||||
|
myEncoding(encoding) {
|
||||||
myReadState = READ_TEXT;
|
myReadState = READ_TEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,3 +355,25 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode
|
||||||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
||||||
return utf8String;
|
return utf8String;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DocBookReader::footnoteHandler() {
|
||||||
|
handlePageBreak();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocBookReader::dataHandler(const char *buffer, size_t len) {
|
||||||
|
if (myConverter.isNull()) {
|
||||||
|
// lazy converter initialization
|
||||||
|
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
||||||
|
myConverter = collection.converter(myEncoding);
|
||||||
|
if (myConverter.isNull()) {
|
||||||
|
myConverter = collection.defaultConverter();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string utf8String;
|
||||||
|
myConverter->convert(utf8String, buffer, buffer + len);
|
||||||
|
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocBookReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
|
||||||
|
myBuffer.push_back(symbol);
|
||||||
|
}
|
||||||
|
|
|
@ -25,13 +25,14 @@
|
||||||
#include <shared_ptr.h>
|
#include <shared_ptr.h>
|
||||||
#include <ZLFile.h>
|
#include <ZLFile.h>
|
||||||
#include <ZLTextStyleEntry.h>
|
#include <ZLTextStyleEntry.h>
|
||||||
|
#include <ZLEncodingConverter.h>
|
||||||
|
|
||||||
#include "../../bookmodel/BookReader.h"
|
#include "../../bookmodel/BookReader.h"
|
||||||
|
|
||||||
#include "OleMainStream.h"
|
#include "OleMainStream.h"
|
||||||
#include "OleStreamReader.h"
|
#include "OleStreamParser.h"
|
||||||
|
|
||||||
class DocBookReader : public OleStreamReader {
|
class DocBookReader : public OleStreamParser {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DocBookReader(BookModel &model, const std::string &encoding);
|
DocBookReader(BookModel &model, const std::string &encoding);
|
||||||
|
@ -39,6 +40,10 @@ public:
|
||||||
bool readBook();
|
bool readBook();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void dataHandler(const char *buffer, size_t len);
|
||||||
|
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||||
|
void footnoteHandler();
|
||||||
|
|
||||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||||
void handleHardLinebreak();
|
void handleHardLinebreak();
|
||||||
void handleParagraphEnd();
|
void handleParagraphEnd();
|
||||||
|
@ -88,6 +93,9 @@ private:
|
||||||
shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
|
shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
|
||||||
OleMainStream::Style myCurrentStyleInfo;
|
OleMainStream::Style myCurrentStyleInfo;
|
||||||
unsigned int myPictureCounter;
|
unsigned int myPictureCounter;
|
||||||
|
|
||||||
|
const std::string myEncoding;
|
||||||
|
shared_ptr<ZLEncodingConverter> myConverter;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline DocBookReader::~DocBookReader() {}
|
inline DocBookReader::~DocBookReader() {}
|
||||||
|
|
|
@ -21,11 +21,12 @@
|
||||||
#include <ZLInputStream.h>
|
#include <ZLInputStream.h>
|
||||||
#include <ZLLogger.h>
|
#include <ZLLogger.h>
|
||||||
#include <ZLImage.h>
|
#include <ZLImage.h>
|
||||||
|
#include <ZLEncodingConverter.h>
|
||||||
|
|
||||||
#include "DocPlugin.h"
|
#include "DocPlugin.h"
|
||||||
#include "DocMetaInfoReader.h"
|
#include "DocMetaInfoReader.h"
|
||||||
#include "DocBookReader.h"
|
#include "DocBookReader.h"
|
||||||
#include "DocReaderStream.h"
|
#include "DocStreams.h"
|
||||||
#include "../../bookmodel/BookModel.h"
|
#include "../../bookmodel/BookModel.h"
|
||||||
#include "../../library/Book.h"
|
#include "../../library/Book.h"
|
||||||
|
|
||||||
|
@ -52,9 +53,10 @@ bool DocPlugin::readMetaInfo(Book &book) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<ZLInputStream> stream = new DocReaderStream(book.file(), 50000);
|
shared_ptr<ZLInputStream> stream = new DocCharStream(book.file(), 50000);
|
||||||
if (!stream.isNull()) {
|
if (!detectEncodingAndLanguage(book, *stream)) {
|
||||||
detectEncodingAndLanguage(book, *stream);
|
stream = new DocAnsiStream(book.file(), 50000);
|
||||||
|
detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1,178 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
||||||
* 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "DocReaderStream.h"
|
|
||||||
#include "OleStreamReader.h"
|
|
||||||
|
|
||||||
class DocTextOnlyReader : public OleStreamReader {
|
|
||||||
|
|
||||||
public:
|
|
||||||
DocTextOnlyReader(char *buffer, size_t maxSize);
|
|
||||||
~DocTextOnlyReader();
|
|
||||||
size_t readSize() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void dataHandler(const char *buffer, size_t len);
|
|
||||||
|
|
||||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
|
||||||
void handleHardLinebreak();
|
|
||||||
void handleParagraphEnd();
|
|
||||||
void handlePageBreak();
|
|
||||||
void handleTableSeparator();
|
|
||||||
void handleTableEndRow();
|
|
||||||
void handleFootNoteMark();
|
|
||||||
void handleStartField();
|
|
||||||
void handleSeparatorField();
|
|
||||||
void handleEndField();
|
|
||||||
void handleImage(const ZLFileImage::Blocks &blocks);
|
|
||||||
void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
|
||||||
void handleFontStyle(unsigned int fontStyle);
|
|
||||||
void handleParagraphStyle(const OleMainStream::Style &styleInfo);
|
|
||||||
void handleBookmark(const std::string &name);
|
|
||||||
|
|
||||||
private:
|
|
||||||
char *myBuffer;
|
|
||||||
const size_t myMaxSize;
|
|
||||||
size_t myActualSize;
|
|
||||||
};
|
|
||||||
|
|
||||||
DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
|
|
||||||
}
|
|
||||||
|
|
||||||
DocTextOnlyReader::~DocTextOnlyReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) {
|
|
||||||
if (myActualSize >= myMaxSize) {
|
|
||||||
// break stream reading
|
|
||||||
} else {
|
|
||||||
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
|
||||||
strncpy(myBuffer + myActualSize, buffer, len);
|
|
||||||
myActualSize += len;
|
|
||||||
}
|
|
||||||
OleStreamReader::dataHandler(buffer, dataLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleHardLinebreak() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleParagraphEnd() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handlePageBreak() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleTableSeparator() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleTableEndRow() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleFootNoteMark() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleStartField() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleSeparatorField() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleEndField() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocTextOnlyReader::handleBookmark(const std::string &name) {
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t DocTextOnlyReader::readSize() const {
|
|
||||||
return myActualSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
|
|
||||||
}
|
|
||||||
|
|
||||||
DocReaderStream::~DocReaderStream() {
|
|
||||||
close();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DocReaderStream::open() {
|
|
||||||
if (mySize != 0) {
|
|
||||||
myBuffer = new char[mySize];
|
|
||||||
}
|
|
||||||
DocTextOnlyReader reader(myBuffer, mySize);
|
|
||||||
shared_ptr<ZLInputStream> stream = myFile.inputStream();
|
|
||||||
if (stream.isNull() || !stream->open()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!reader.readDocument(stream)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
mySize = reader.readSize();
|
|
||||||
myOffset = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t DocReaderStream::read(char *buffer, size_t maxSize) {
|
|
||||||
maxSize = std::min(maxSize, mySize - myOffset);
|
|
||||||
if ((buffer != 0) && (myBuffer !=0)) {
|
|
||||||
memcpy(buffer, myBuffer + myOffset, maxSize);
|
|
||||||
}
|
|
||||||
myOffset += maxSize;
|
|
||||||
return maxSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocReaderStream::close() {
|
|
||||||
if (myBuffer != 0) {
|
|
||||||
delete[] myBuffer;
|
|
||||||
myBuffer = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocReaderStream::seek(int offset, bool absoluteOffset) {
|
|
||||||
if (!absoluteOffset) {
|
|
||||||
offset += myOffset;
|
|
||||||
}
|
|
||||||
myOffset = std::min(mySize, (size_t)std::max(0, offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t DocReaderStream::offset() const {
|
|
||||||
return myOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t DocReaderStream::sizeOfOpened() {
|
|
||||||
return mySize;
|
|
||||||
}
|
|
197
jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp
Normal file
197
jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "DocStreams.h"
|
||||||
|
#include "OleStreamReader.h"
|
||||||
|
|
||||||
|
class DocReader : public OleStreamReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocReader(char *buffer, size_t maxSize);
|
||||||
|
~DocReader();
|
||||||
|
size_t readSize() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool readStream(OleMainStream &stream);
|
||||||
|
void dataHandler(const char *buffer, size_t len);
|
||||||
|
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||||
|
void footnoteHandler();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
char *myBuffer;
|
||||||
|
const size_t myMaxSize;
|
||||||
|
size_t myActualSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DocCharReader : public DocReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocCharReader(char *buffer, size_t maxSize);
|
||||||
|
~DocCharReader();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void dataHandler(const char *buffer, size_t len);
|
||||||
|
};
|
||||||
|
|
||||||
|
class DocAnsiReader : public DocReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocAnsiReader(char *buffer, size_t maxSize);
|
||||||
|
~DocAnsiReader();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||||
|
};
|
||||||
|
|
||||||
|
DocReader::DocReader(char *buffer, size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocReader::~DocReader() {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DocReader::readStream(OleMainStream &stream) {
|
||||||
|
while (myActualSize < myMaxSize) {
|
||||||
|
if (!readNextPiece(stream)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocReader::dataHandler(const char*, size_t) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocReader::footnoteHandler() {
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocReader::readSize() const {
|
||||||
|
return myActualSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocCharReader::DocCharReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocCharReader::~DocCharReader() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocCharReader::dataHandler(const char *buffer, size_t dataLength) {
|
||||||
|
if (myActualSize < myMaxSize) {
|
||||||
|
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||||
|
strncpy(myBuffer + myActualSize, buffer, len);
|
||||||
|
myActualSize += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DocAnsiReader::DocAnsiReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocAnsiReader::~DocAnsiReader() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocAnsiReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
|
||||||
|
if (myActualSize < myMaxSize) {
|
||||||
|
char buffer[4];
|
||||||
|
const size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
|
||||||
|
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||||
|
strncpy(myBuffer + myActualSize, buffer, len);
|
||||||
|
myActualSize += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DocStream::DocStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocStream::~DocStream() {
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DocStream::open() {
|
||||||
|
if (mySize != 0) {
|
||||||
|
myBuffer = new char[mySize];
|
||||||
|
}
|
||||||
|
shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
|
||||||
|
shared_ptr<ZLInputStream> stream = myFile.inputStream();
|
||||||
|
if (stream.isNull() || !stream->open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!reader->readDocument(stream)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mySize = reader->readSize();
|
||||||
|
myOffset = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocStream::read(char *buffer, size_t maxSize) {
|
||||||
|
maxSize = std::min(maxSize, mySize - myOffset);
|
||||||
|
if ((buffer != 0) && (myBuffer !=0)) {
|
||||||
|
memcpy(buffer, myBuffer + myOffset, maxSize);
|
||||||
|
}
|
||||||
|
myOffset += maxSize;
|
||||||
|
return maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocStream::close() {
|
||||||
|
if (myBuffer != 0) {
|
||||||
|
delete[] myBuffer;
|
||||||
|
myBuffer = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocStream::seek(int offset, bool absoluteOffset) {
|
||||||
|
if (!absoluteOffset) {
|
||||||
|
offset += myOffset;
|
||||||
|
}
|
||||||
|
myOffset = std::min(mySize, (size_t)std::max(0, offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocStream::offset() const {
|
||||||
|
return myOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocStream::sizeOfOpened() {
|
||||||
|
return mySize;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocCharStream::DocCharStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocCharStream::~DocCharStream() {
|
||||||
|
}
|
||||||
|
|
||||||
|
shared_ptr<DocReader> DocCharStream::createReader(char *buffer, size_t maxSize) {
|
||||||
|
return new DocCharReader(buffer, maxSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
DocAnsiStream::DocAnsiStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocAnsiStream::~DocAnsiStream() {
|
||||||
|
}
|
||||||
|
|
||||||
|
shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, size_t maxSize) {
|
||||||
|
return new DocAnsiReader(buffer, maxSize);
|
||||||
|
}
|
|
@ -17,19 +17,19 @@
|
||||||
* 02110-1301, USA.
|
* 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __DOCREADERSTREAM_H__
|
#ifndef __DOCSTREAMS_H__
|
||||||
#define __DOCREADERSTREAM_H__
|
#define __DOCSTREAMS_H__
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include <ZLFile.h>
|
#include <ZLFile.h>
|
||||||
#include <ZLInputStream.h>
|
#include <ZLInputStream.h>
|
||||||
|
|
||||||
class DocReaderStream : public ZLInputStream {
|
class DocReader;
|
||||||
|
|
||||||
|
class DocStream : public ZLInputStream {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DocReaderStream(const ZLFile& file, size_t maxSize);
|
DocStream(const ZLFile& file, size_t maxSize);
|
||||||
~DocReaderStream();
|
~DocStream();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool open();
|
bool open();
|
||||||
|
@ -40,6 +40,9 @@ private:
|
||||||
size_t offset() const;
|
size_t offset() const;
|
||||||
size_t sizeOfOpened();
|
size_t sizeOfOpened();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual shared_ptr<DocReader> createReader(char *buffer, size_t maxSize) = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const ZLFile myFile;
|
const ZLFile myFile;
|
||||||
char *myBuffer;
|
char *myBuffer;
|
||||||
|
@ -47,4 +50,24 @@ private:
|
||||||
size_t myOffset;
|
size_t myOffset;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __DOCREADERSTREAM_H__ */
|
class DocCharStream : public DocStream {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocCharStream(const ZLFile& file, size_t maxSize);
|
||||||
|
~DocCharStream();
|
||||||
|
|
||||||
|
private:
|
||||||
|
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
|
||||||
|
};
|
||||||
|
|
||||||
|
class DocAnsiStream : public DocStream {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocAnsiStream(const ZLFile& file, size_t maxSize);
|
||||||
|
~DocAnsiStream();
|
||||||
|
|
||||||
|
private:
|
||||||
|
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __DOCSTREAMS_H__ */
|
210
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp
Normal file
210
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp
Normal file
|
@ -0,0 +1,210 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//#include <cctype>
|
||||||
|
//#include <cstring>
|
||||||
|
|
||||||
|
#include <ZLLogger.h>
|
||||||
|
|
||||||
|
#include "OleMainStream.h"
|
||||||
|
#include "OleUtil.h"
|
||||||
|
#include "OleStreamParser.h"
|
||||||
|
|
||||||
|
//word's control chars:
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
|
||||||
|
|
||||||
|
//unicode values:
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
|
||||||
|
const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
|
||||||
|
|
||||||
|
OleStreamParser::OleStreamParser() {
|
||||||
|
myCurBufferPosition = 0;
|
||||||
|
|
||||||
|
myCurCharPos = 0;
|
||||||
|
myNextStyleInfoIndex = 0;
|
||||||
|
myNextCharInfoIndex = 0;
|
||||||
|
myNextBookmarkIndex = 0;
|
||||||
|
myNextInlineImageInfoIndex = 0;
|
||||||
|
myNextFloatImageInfoIndex = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
|
||||||
|
ZLUnicodeUtil::Ucs2Char ucs2char;
|
||||||
|
bool tabMode = false;
|
||||||
|
while (getUcs2Char(oleMainStream, ucs2char)) {
|
||||||
|
if (tabMode) {
|
||||||
|
tabMode = false;
|
||||||
|
if (ucs2char == WORD_TABLE_SEPARATOR) {
|
||||||
|
handleTableEndRow();
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
handleTableSeparator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ucs2char < 32) {
|
||||||
|
switch (ucs2char) {
|
||||||
|
case NULL_SYMBOL:
|
||||||
|
break;
|
||||||
|
case WORD_HARD_LINEBREAK:
|
||||||
|
handleHardLinebreak();
|
||||||
|
break;
|
||||||
|
case WORD_END_OF_PARAGRAPH:
|
||||||
|
case WORD_PAGE_BREAK:
|
||||||
|
handleParagraphEnd();
|
||||||
|
break;
|
||||||
|
case WORD_TABLE_SEPARATOR:
|
||||||
|
tabMode = true;
|
||||||
|
break;
|
||||||
|
case WORD_FOOTNOTE_MARK:
|
||||||
|
handleFootNoteMark();
|
||||||
|
break;
|
||||||
|
case WORD_START_FIELD:
|
||||||
|
handleStartField();
|
||||||
|
break;
|
||||||
|
case WORD_SEPARATOR_FIELD:
|
||||||
|
handleSeparatorField();
|
||||||
|
break;
|
||||||
|
case WORD_END_FIELD:
|
||||||
|
handleEndField();
|
||||||
|
break;
|
||||||
|
case INLINE_IMAGE:
|
||||||
|
case FLOAT_IMAGE:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
handleOtherControlChar(ucs2char);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
|
||||||
|
continue; //skip
|
||||||
|
} else {
|
||||||
|
handleChar(ucs2char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
|
||||||
|
while (myCurBufferPosition >= myBuffer.size()) {
|
||||||
|
myBuffer.clear();
|
||||||
|
myCurBufferPosition = 0;
|
||||||
|
if (!readNextPiece(stream)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ucs2char = myBuffer.at(myCurBufferPosition++);
|
||||||
|
processStyles(stream);
|
||||||
|
|
||||||
|
switch (ucs2char) {
|
||||||
|
case INLINE_IMAGE:
|
||||||
|
processInlineImage(stream);
|
||||||
|
break;
|
||||||
|
case FLOAT_IMAGE:
|
||||||
|
processFloatImage(stream);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++myCurCharPos;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void OleStreamParser::processInlineImage(OleMainStream &stream) {
|
||||||
|
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
|
||||||
|
if (imageInfoList.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
//seek to curCharPos, because not all entries are real pictures
|
||||||
|
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
|
||||||
|
++myNextInlineImageInfoIndex;
|
||||||
|
}
|
||||||
|
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
|
||||||
|
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
|
||||||
|
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
|
||||||
|
if (!list.empty()) {
|
||||||
|
handleImage(list);
|
||||||
|
}
|
||||||
|
++myNextInlineImageInfoIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void OleStreamParser::processFloatImage(OleMainStream &stream) {
|
||||||
|
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
|
||||||
|
if (imageInfoList.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
//seek to curCharPos, because not all entries are real pictures
|
||||||
|
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
|
||||||
|
++myNextFloatImageInfoIndex;
|
||||||
|
}
|
||||||
|
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
|
||||||
|
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
|
||||||
|
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
|
||||||
|
if (!list.empty()) {
|
||||||
|
handleImage(list);
|
||||||
|
}
|
||||||
|
++myNextFloatImageInfoIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void OleStreamParser::processStyles(OleMainStream &stream) {
|
||||||
|
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
|
||||||
|
if (!styleInfoList.empty()) {
|
||||||
|
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
|
||||||
|
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
|
||||||
|
handleParagraphStyle(info);
|
||||||
|
++myNextStyleInfoIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
|
||||||
|
if (!charInfoList.empty()) {
|
||||||
|
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
|
||||||
|
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
|
||||||
|
handleFontStyle(info.FontStyle);
|
||||||
|
++myNextCharInfoIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
|
||||||
|
if (!bookmarksList.empty()) {
|
||||||
|
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
|
||||||
|
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
|
||||||
|
handleBookmark(bookmark.Name);
|
||||||
|
++myNextBookmarkIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
101
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h
Normal file
101
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __OLESTREAMPARSER_H__
|
||||||
|
#define __OLESTREAMPARSER_H__
|
||||||
|
|
||||||
|
#include <ZLUnicodeUtil.h>
|
||||||
|
|
||||||
|
#include "OleMainStream.h"
|
||||||
|
#include "OleStreamReader.h"
|
||||||
|
|
||||||
|
class OleStreamParser : public OleStreamReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
//word's control chars:
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
|
||||||
|
|
||||||
|
//unicode values:
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char MINUS;
|
||||||
|
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
||||||
|
|
||||||
|
public:
|
||||||
|
OleStreamParser();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool readStream(OleMainStream &stream);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||||
|
virtual void handleHardLinebreak() = 0;
|
||||||
|
virtual void handleParagraphEnd() = 0;
|
||||||
|
virtual void handlePageBreak() = 0;
|
||||||
|
virtual void handleTableSeparator() = 0;
|
||||||
|
virtual void handleTableEndRow() = 0;
|
||||||
|
virtual void handleFootNoteMark() = 0;
|
||||||
|
virtual void handleStartField() = 0;
|
||||||
|
virtual void handleSeparatorField() = 0;
|
||||||
|
virtual void handleEndField() = 0;
|
||||||
|
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
|
||||||
|
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||||
|
|
||||||
|
virtual void handleFontStyle(unsigned int fontStyle) = 0;
|
||||||
|
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
|
||||||
|
virtual void handleBookmark(const std::string &name) = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
|
||||||
|
void processInlineImage(OleMainStream &stream);
|
||||||
|
void processFloatImage(OleMainStream &stream);
|
||||||
|
void processStyles(OleMainStream &stream);
|
||||||
|
|
||||||
|
private:
|
||||||
|
protected:
|
||||||
|
ZLUnicodeUtil::Ucs2String myBuffer;
|
||||||
|
private:
|
||||||
|
size_t myCurBufferPosition;
|
||||||
|
|
||||||
|
unsigned int myCurCharPos;
|
||||||
|
|
||||||
|
size_t myNextStyleInfoIndex;
|
||||||
|
size_t myNextCharInfoIndex;
|
||||||
|
size_t myNextBookmarkIndex;
|
||||||
|
size_t myNextInlineImageInfoIndex;
|
||||||
|
size_t myNextFloatImageInfoIndex;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __OLESTREAMPARSER_H__ */
|
|
@ -17,59 +17,13 @@
|
||||||
* 02110-1301, USA.
|
* 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cctype>
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#include <ZLLogger.h>
|
#include <ZLLogger.h>
|
||||||
|
|
||||||
#include "OleMainStream.h"
|
#include "OleMainStream.h"
|
||||||
#include "DocBookReader.h"
|
|
||||||
#include "OleUtil.h"
|
#include "OleUtil.h"
|
||||||
#include "DocInlineImageReader.h"
|
|
||||||
|
|
||||||
#include "OleStreamReader.h"
|
#include "OleStreamReader.h"
|
||||||
|
|
||||||
//word's control chars:
|
OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_FOOTNOTE_MARK = 0x0002;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_TABLE_SEPARATOR = 0x0007;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_FIELD = 0x0015;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::INLINE_IMAGE = 0x0001;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FLOAT_IMAGE = 0x0008;
|
|
||||||
|
|
||||||
//unicode values:
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::NULL_SYMBOL = 0x0;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D;
|
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
|
|
||||||
|
|
||||||
OleStreamReader::OleStreamReader(const std::string &encoding) :
|
|
||||||
myEncoding(encoding) {
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void OleStreamReader::clear() {
|
|
||||||
myBuffer.clear();
|
|
||||||
myCurBufferPosition = 0;
|
|
||||||
myNextPieceNumber = 0;
|
|
||||||
|
|
||||||
myCurCharPos = 0;
|
|
||||||
myNextStyleInfoIndex = 0;
|
|
||||||
myNextCharInfoIndex = 0;
|
|
||||||
myNextBookmarkIndex = 0;
|
|
||||||
myNextInlineImageInfoIndex = 0;
|
|
||||||
myNextFloatImageInfoIndex = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
||||||
|
@ -78,7 +32,7 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
||||||
shared_ptr<OleStorage> storage = new OleStorage;
|
shared_ptr<OleStorage> storage = new OleStorage;
|
||||||
|
|
||||||
if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
|
if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
|
||||||
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!");
|
ZLLogger::Instance().println("OleStreamReader", "Broken OLE file");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,176 +42,22 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
||||||
}
|
}
|
||||||
|
|
||||||
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
||||||
|
if (!oleStream.open()) {
|
||||||
|
ZLLogger::Instance().println("OleStreamReader", "Cannot open OleMainStream");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return readStream(oleStream);
|
return readStream(oleStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OleStreamReader::readStream(OleMainStream &oleMainStream) {
|
bool OleStreamReader::readNextPiece(OleMainStream &stream) {
|
||||||
clear();
|
|
||||||
|
|
||||||
if (!oleMainStream.open()) {
|
|
||||||
ZLLogger::Instance().println("OleStreamReader", "doesn't open correct");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ZLUnicodeUtil::Ucs2Char ucs2char;
|
|
||||||
bool tabMode = false;
|
|
||||||
while (getUcs2Char(oleMainStream, ucs2char)) {
|
|
||||||
if (ucs2char < 32) { //< 32 are control symbols
|
|
||||||
//printf("[0x%x]", ucs2char); //debug output
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tabMode) {
|
|
||||||
tabMode = false;
|
|
||||||
if (ucs2char == WORD_TABLE_SEPARATOR) {
|
|
||||||
handleTableEndRow();
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
handleTableSeparator();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ucs2char < 32) {
|
|
||||||
switch (ucs2char) {
|
|
||||||
case NULL_SYMBOL:
|
|
||||||
break;
|
|
||||||
case WORD_HARD_LINEBREAK:
|
|
||||||
//printf("\n");
|
|
||||||
handleHardLinebreak();
|
|
||||||
break;
|
|
||||||
case WORD_END_OF_PARAGRAPH:
|
|
||||||
case WORD_PAGE_BREAK:
|
|
||||||
//printf("\n");
|
|
||||||
handleParagraphEnd();
|
|
||||||
break;
|
|
||||||
case WORD_TABLE_SEPARATOR:
|
|
||||||
tabMode = true;
|
|
||||||
break;
|
|
||||||
case WORD_FOOTNOTE_MARK:
|
|
||||||
handleFootNoteMark();
|
|
||||||
break;
|
|
||||||
case WORD_START_FIELD:
|
|
||||||
handleStartField();
|
|
||||||
break;
|
|
||||||
case WORD_SEPARATOR_FIELD:
|
|
||||||
handleSeparatorField();
|
|
||||||
break;
|
|
||||||
case WORD_END_FIELD:
|
|
||||||
handleEndField();
|
|
||||||
break;
|
|
||||||
case INLINE_IMAGE: case FLOAT_IMAGE:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
handleOtherControlChar(ucs2char);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
|
|
||||||
continue; //skip
|
|
||||||
} else {
|
|
||||||
//debug output
|
|
||||||
// std::string utf8String;
|
|
||||||
// ZLUnicodeUtil::Ucs2String ucs2String;
|
|
||||||
// ucs2String.push_back(ucs2char);
|
|
||||||
// ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
|
|
||||||
// printf("%s", utf8String.c_str());
|
|
||||||
|
|
||||||
handleChar(ucs2char);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OleStreamReader::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
|
|
||||||
if (myCurBufferPosition >= myBuffer.size() && !fillBuffer(stream)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ucs2char = myBuffer.at(myCurBufferPosition++);
|
|
||||||
processStyles(stream);
|
|
||||||
|
|
||||||
if (ucs2char == INLINE_IMAGE) {
|
|
||||||
processInlineImage(stream);
|
|
||||||
} else if (ucs2char == FLOAT_IMAGE) {
|
|
||||||
processFloatImage(stream);
|
|
||||||
}
|
|
||||||
++myCurCharPos;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OleStreamReader::processInlineImage(OleMainStream &stream) {
|
|
||||||
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
|
|
||||||
if (imageInfoList.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
//seek to curCharPos, because not all entries are real pictures
|
|
||||||
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
|
|
||||||
++myNextInlineImageInfoIndex;
|
|
||||||
}
|
|
||||||
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
|
|
||||||
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
|
|
||||||
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
|
|
||||||
if (!list.empty()) {
|
|
||||||
handleImage(list);
|
|
||||||
}
|
|
||||||
++myNextInlineImageInfoIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OleStreamReader::processFloatImage(OleMainStream &stream) {
|
|
||||||
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
|
|
||||||
if (imageInfoList.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
//seek to curCharPos, because not all entries are real pictures
|
|
||||||
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
|
|
||||||
++myNextFloatImageInfoIndex;
|
|
||||||
}
|
|
||||||
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
|
|
||||||
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
|
|
||||||
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
|
|
||||||
if (!list.empty()) {
|
|
||||||
handleImage(list);
|
|
||||||
}
|
|
||||||
++myNextFloatImageInfoIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OleStreamReader::processStyles(OleMainStream &stream) {
|
|
||||||
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
|
|
||||||
if (!styleInfoList.empty()) {
|
|
||||||
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
|
|
||||||
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
|
|
||||||
handleParagraphStyle(info);
|
|
||||||
++myNextStyleInfoIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
|
|
||||||
if (!charInfoList.empty()) {
|
|
||||||
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
|
|
||||||
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
|
|
||||||
handleFontStyle(info.FontStyle);
|
|
||||||
++myNextCharInfoIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
|
|
||||||
if (!bookmarksList.empty()) {
|
|
||||||
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
|
|
||||||
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
|
|
||||||
handleBookmark(bookmark.Name);
|
|
||||||
++myNextBookmarkIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
|
||||||
const OleMainStream::Pieces &pieces = stream.getPieces();
|
const OleMainStream::Pieces &pieces = stream.getPieces();
|
||||||
if (myNextPieceNumber >= pieces.size()) {
|
if (myNextPieceNumber >= pieces.size()) {
|
||||||
return false; //end of reading
|
return false;
|
||||||
}
|
}
|
||||||
const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
|
const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
|
||||||
|
|
||||||
if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
|
if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
|
||||||
handlePageBreak();
|
footnoteHandler();
|
||||||
} else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
|
} else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -272,32 +72,15 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||||
ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece");
|
ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece");
|
||||||
}
|
}
|
||||||
|
|
||||||
myBuffer.clear();
|
|
||||||
if (!piece.IsANSI) {
|
if (!piece.IsANSI) {
|
||||||
for (size_t i = 0; i < readBytes; i += 2) {
|
for (size_t i = 0; i < readBytes; i += 2) {
|
||||||
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
ansiSymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
|
||||||
myBuffer.push_back(ch);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dataHandler(textBuffer, readBytes);
|
dataHandler(textBuffer, readBytes);
|
||||||
}
|
}
|
||||||
myCurBufferPosition = 0;
|
|
||||||
++myNextPieceNumber;
|
++myNextPieceNumber;
|
||||||
delete[] textBuffer;
|
delete[] textBuffer;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OleStreamReader::dataHandler(const char *buffer, size_t len) {
|
|
||||||
if (myConverter.isNull()) {
|
|
||||||
// lazy converter initialization
|
|
||||||
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
|
||||||
myConverter = collection.converter(myEncoding);
|
|
||||||
if (myConverter.isNull()) {
|
|
||||||
myConverter = collection.defaultConverter();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::string utf8String;
|
|
||||||
myConverter->convert(utf8String, buffer, buffer + len);
|
|
||||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
|
||||||
}
|
|
||||||
|
|
|
@ -21,89 +21,26 @@
|
||||||
#define __OLESTREAMREADER_H__
|
#define __OLESTREAMREADER_H__
|
||||||
|
|
||||||
#include <ZLUnicodeUtil.h>
|
#include <ZLUnicodeUtil.h>
|
||||||
#include <ZLEncodingConverter.h>
|
|
||||||
|
|
||||||
#include "OleMainStream.h"
|
#include "OleMainStream.h"
|
||||||
|
|
||||||
class OleStreamReader {
|
class OleStreamReader {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
//word's control chars:
|
OleStreamReader();
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
|
|
||||||
|
|
||||||
//unicode values:
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char MINUS;
|
|
||||||
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
|
||||||
|
|
||||||
public:
|
|
||||||
OleStreamReader(const std::string &encoding);
|
|
||||||
bool readDocument(shared_ptr<ZLInputStream> stream);
|
bool readDocument(shared_ptr<ZLInputStream> stream);
|
||||||
void clear();
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool readStream(OleMainStream &stream);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void dataHandler(const char *buffer, size_t len);
|
virtual bool readStream(OleMainStream &stream) = 0;
|
||||||
|
|
||||||
//virtual void parapgraphHandler(std::string paragraph) = 0;
|
bool readNextPiece(OleMainStream &stream);
|
||||||
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
|
||||||
virtual void handleHardLinebreak() = 0;
|
|
||||||
virtual void handleParagraphEnd() = 0;
|
|
||||||
virtual void handlePageBreak() = 0;
|
|
||||||
virtual void handleTableSeparator() = 0;
|
|
||||||
virtual void handleTableEndRow() = 0;
|
|
||||||
virtual void handleFootNoteMark() = 0;
|
|
||||||
virtual void handleStartField() = 0;
|
|
||||||
virtual void handleSeparatorField() = 0;
|
|
||||||
virtual void handleEndField() = 0;
|
|
||||||
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
|
|
||||||
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
|
||||||
|
|
||||||
virtual void handleFontStyle(unsigned int fontStyle) = 0;
|
virtual void dataHandler(const char *buffer, size_t len) = 0;
|
||||||
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
|
virtual void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
|
||||||
virtual void handleBookmark(const std::string &name) = 0;
|
virtual void footnoteHandler() = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
|
|
||||||
void processInlineImage(OleMainStream &stream);
|
|
||||||
void processFloatImage(OleMainStream &stream);
|
|
||||||
void processStyles(OleMainStream &stream);
|
|
||||||
bool fillBuffer(OleMainStream &stream);
|
|
||||||
|
|
||||||
private:
|
|
||||||
ZLUnicodeUtil::Ucs2String myBuffer;
|
|
||||||
size_t myCurBufferPosition;
|
|
||||||
size_t myNextPieceNumber;
|
size_t myNextPieceNumber;
|
||||||
|
|
||||||
shared_ptr<ZLEncodingConverter> myConverter;
|
|
||||||
const std::string myEncoding;
|
|
||||||
|
|
||||||
unsigned int myCurCharPos;
|
|
||||||
|
|
||||||
size_t myNextStyleInfoIndex;
|
|
||||||
size_t myNextCharInfoIndex;
|
|
||||||
size_t myNextBookmarkIndex;
|
|
||||||
size_t myNextInlineImageInfoIndex;
|
|
||||||
size_t myNextFloatImageInfoIndex;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __OLESTREAMREADER_H__ */
|
#endif /* __OLESTREAMREADER_H__ */
|
||||||
|
|
|
@ -135,7 +135,7 @@ shared_ptr<const ZLImage> OEBPlugin::coverImage(const ZLFile &file) const {
|
||||||
bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
|
bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
|
||||||
if (book.language().empty()) {
|
if (book.language().empty()) {
|
||||||
shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
|
shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
|
||||||
detectLanguage(book, *oebStream);
|
detectLanguage(book, *oebStream, book.encoding());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,7 +48,7 @@ void RtfBookReader::addCharData(const char *data, size_t len, bool convert) {
|
||||||
|
|
||||||
void RtfBookReader::flushBuffer() {
|
void RtfBookReader::flushBuffer() {
|
||||||
if (!myOutputBuffer.empty()) {
|
if (!myOutputBuffer.empty()) {
|
||||||
if (myCurrentState.ReadText) {
|
if (myCurrentState.ReadText) {
|
||||||
if (!myConverter.isNull()) {
|
if (!myConverter.isNull()) {
|
||||||
static std::string newString;
|
static std::string newString;
|
||||||
myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length());
|
myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length());
|
||||||
|
@ -87,27 +87,27 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) {
|
||||||
if (on) {
|
if (on) {
|
||||||
std::string id;
|
std::string id;
|
||||||
ZLStringUtil::appendNumber(id, myFootnoteIndex++);
|
ZLStringUtil::appendNumber(id, myFootnoteIndex++);
|
||||||
|
|
||||||
myStateStack.push(myCurrentState);
|
myStateStack.push(myCurrentState);
|
||||||
myCurrentState.Id = id;
|
myCurrentState.Id = id;
|
||||||
myCurrentState.ReadText = true;
|
myCurrentState.ReadText = true;
|
||||||
|
|
||||||
myBookReader.addHyperlinkControl(FOOTNOTE, id);
|
myBookReader.addHyperlinkControl(FOOTNOTE, id);
|
||||||
myBookReader.addData(id);
|
myBookReader.addData(id);
|
||||||
myBookReader.addControl(FOOTNOTE, false);
|
myBookReader.addControl(FOOTNOTE, false);
|
||||||
|
|
||||||
myBookReader.setFootnoteTextModel(id);
|
myBookReader.setFootnoteTextModel(id);
|
||||||
myBookReader.pushKind(REGULAR);
|
myBookReader.pushKind(REGULAR);
|
||||||
myBookReader.beginParagraph();
|
myBookReader.beginParagraph();
|
||||||
} else {
|
} else {
|
||||||
myBookReader.endParagraph();
|
myBookReader.endParagraph();
|
||||||
myBookReader.popKind();
|
myBookReader.popKind();
|
||||||
|
|
||||||
if (!myStateStack.empty()) {
|
if (!myStateStack.empty()) {
|
||||||
myCurrentState = myStateStack.top();
|
myCurrentState = myStateStack.top();
|
||||||
myStateStack.pop();
|
myStateStack.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (myStateStack.empty()) {
|
if (myStateStack.empty()) {
|
||||||
myBookReader.setMainTextModel();
|
myBookReader.setMainTextModel();
|
||||||
} else {
|
} else {
|
||||||
|
@ -121,7 +121,7 @@ void RtfBookReader::switchDestination(DestinationType destination, bool on) {
|
||||||
void RtfBookReader::insertImage(const std::string &mimeType, const std::string &fileName, size_t startOffset, size_t size) {
|
void RtfBookReader::insertImage(const std::string &mimeType, const std::string &fileName, size_t startOffset, size_t size) {
|
||||||
std::string id;
|
std::string id;
|
||||||
ZLStringUtil::appendNumber(id, myImageIndex++);
|
ZLStringUtil::appendNumber(id, myImageIndex++);
|
||||||
myBookReader.addImageReference(id, 0, false);
|
myBookReader.addImageReference(id, 0, false);
|
||||||
const ZLFile file(fileName, mimeType);
|
const ZLFile file(fileName, mimeType);
|
||||||
myBookReader.addImage(id, new ZLFileImage(file, "hex", startOffset, size));
|
myBookReader.addImage(id, new ZLFileImage(file, "hex", startOffset, size));
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
flushBuffer();
|
flushBuffer();
|
||||||
|
|
||||||
switch (property) {
|
switch (property) {
|
||||||
case FONT_BOLD:
|
case FONT_BOLD:
|
||||||
if (myState.Bold) {
|
if (myState.Bold) {
|
||||||
|
@ -175,7 +175,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
|
||||||
break;
|
break;
|
||||||
case FONT_ITALIC:
|
case FONT_ITALIC:
|
||||||
if (myState.Italic) {
|
if (myState.Italic) {
|
||||||
if (!myState.Bold) {
|
if (!myState.Bold) {
|
||||||
//DPRINT("add style emphasis.\n");
|
//DPRINT("add style emphasis.\n");
|
||||||
myBookReader.pushKind(EMPHASIS);
|
myBookReader.pushKind(EMPHASIS);
|
||||||
myBookReader.addControl(EMPHASIS, true);
|
myBookReader.addControl(EMPHASIS, true);
|
||||||
|
@ -183,14 +183,14 @@ void RtfBookReader::setFontProperty(FontProperty property) {
|
||||||
//DPRINT("add style emphasis and strong.\n");
|
//DPRINT("add style emphasis and strong.\n");
|
||||||
myBookReader.popKind();
|
myBookReader.popKind();
|
||||||
myBookReader.addControl(STRONG, false);
|
myBookReader.addControl(STRONG, false);
|
||||||
|
|
||||||
myBookReader.pushKind(EMPHASIS);
|
myBookReader.pushKind(EMPHASIS);
|
||||||
myBookReader.addControl(EMPHASIS, true);
|
myBookReader.addControl(EMPHASIS, true);
|
||||||
myBookReader.pushKind(STRONG);
|
myBookReader.pushKind(STRONG);
|
||||||
myBookReader.addControl(STRONG, true);
|
myBookReader.addControl(STRONG, true);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!myState.Bold) {
|
if (!myState.Bold) {
|
||||||
//DPRINT("remove style emphasis.\n");
|
//DPRINT("remove style emphasis.\n");
|
||||||
myBookReader.addControl(EMPHASIS, false);
|
myBookReader.addControl(EMPHASIS, false);
|
||||||
myBookReader.popKind();
|
myBookReader.popKind();
|
||||||
|
@ -200,7 +200,7 @@ void RtfBookReader::setFontProperty(FontProperty property) {
|
||||||
myBookReader.popKind();
|
myBookReader.popKind();
|
||||||
myBookReader.addControl(EMPHASIS, false);
|
myBookReader.addControl(EMPHASIS, false);
|
||||||
myBookReader.popKind();
|
myBookReader.popKind();
|
||||||
|
|
||||||
myBookReader.pushKind(STRONG);
|
myBookReader.pushKind(STRONG);
|
||||||
myBookReader.addControl(STRONG, true);
|
myBookReader.addControl(STRONG, true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ bool RtfPlugin::readMetaInfo(Book &book) const {
|
||||||
} else if (book.language().empty()) {
|
} else if (book.language().empty()) {
|
||||||
shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000);
|
shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000);
|
||||||
if (!stream.isNull()) {
|
if (!stream.isNull()) {
|
||||||
detectLanguage(book, *stream);
|
detectLanguage(book, *stream, book.encoding());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,9 @@ friend class DummyEncodingConverterProvider;
|
||||||
|
|
||||||
bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) {
|
bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) {
|
||||||
const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding);
|
const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding);
|
||||||
return (lowerCasedEncoding == "utf-8") || (lowerCasedEncoding == "us-ascii");
|
return
|
||||||
|
lowerCasedEncoding == ZLEncodingConverter::UTF8 ||
|
||||||
|
lowerCasedEncoding == ZLEncodingConverter::ASCII;
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<ZLEncodingConverter> DummyEncodingConverterProvider::createConverter(const std::string &name) {
|
shared_ptr<ZLEncodingConverter> DummyEncodingConverterProvider::createConverter(const std::string &name) {
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
#include "ZLEncodingConverter.h"
|
#include "ZLEncodingConverter.h"
|
||||||
#include "ZLEncodingConverterProvider.h"
|
#include "ZLEncodingConverterProvider.h"
|
||||||
|
|
||||||
|
const std::string ZLEncodingConverter::ASCII = "us-ascii";
|
||||||
|
const std::string ZLEncodingConverter::UTF8 = "utf-8";
|
||||||
const std::string ZLEncodingConverter::UTF16 = "utf-16";
|
const std::string ZLEncodingConverter::UTF16 = "utf-16";
|
||||||
const std::string ZLEncodingConverter::UTF16BE = "utf-16be";
|
const std::string ZLEncodingConverter::UTF16BE = "utf-16be";
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,8 @@
|
||||||
class ZLEncodingConverter {
|
class ZLEncodingConverter {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static const std::string ASCII;
|
||||||
|
static const std::string UTF8;
|
||||||
static const std::string UTF16;
|
static const std::string UTF16;
|
||||||
static const std::string UTF16BE;
|
static const std::string UTF16BE;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue