mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 17:59:33 +02:00
synchronization with C++ version
This commit is contained in:
parent
e790aeb68a
commit
8ac2815d94
19 changed files with 640 additions and 518 deletions
|
@ -128,10 +128,11 @@ LOCAL_SRC_FILES := \
|
|||
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocStreams.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/OleUtil.cpp \
|
||||
NativeFormats/fbreader/src/formats/doc/DocInlineImageReader.cpp \
|
||||
|
|
|
@ -22,22 +22,24 @@
|
|||
#include <ZLInputStream.h>
|
||||
#include <ZLLanguageDetector.h>
|
||||
#include <ZLImage.h>
|
||||
#include <ZLEncodingConverter.h>
|
||||
|
||||
#include "FormatPlugin.h"
|
||||
|
||||
#include "../library/Book.h"
|
||||
|
||||
void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) {
|
||||
bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) {
|
||||
std::string language = book.language();
|
||||
std::string encoding = book.encoding();
|
||||
|
||||
if (!encoding.empty()) {
|
||||
return;
|
||||
if (!force && !encoding.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool detected = false;
|
||||
PluginCollection &collection = PluginCollection::Instance();
|
||||
if (encoding.empty()) {
|
||||
encoding = "utf-8";
|
||||
encoding = ZLEncodingConverter::UTF8;
|
||||
}
|
||||
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
||||
static const int BUFSIZE = 65536;
|
||||
|
@ -47,25 +49,30 @@ void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream)
|
|||
shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size);
|
||||
delete[] buffer;
|
||||
if (!info.isNull()) {
|
||||
detected = true;
|
||||
if (!info->Language.empty()) {
|
||||
language = info->Language;
|
||||
}
|
||||
encoding = info->Encoding;
|
||||
if ((encoding == "us-ascii") || (encoding == "iso-8859-1")) {
|
||||
if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") {
|
||||
encoding = "windows-1252";
|
||||
}
|
||||
}
|
||||
}
|
||||
book.setEncoding(encoding);
|
||||
book.setLanguage(language);
|
||||
|
||||
return detected;
|
||||
}
|
||||
|
||||
void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) {
|
||||
bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) {
|
||||
std::string language = book.language();
|
||||
if (!language.empty()) {
|
||||
return;
|
||||
if (!force && !language.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool detected = false;
|
||||
|
||||
PluginCollection &collection = PluginCollection::Instance();
|
||||
if (collection.isLanguageAutoDetectEnabled() && stream.open()) {
|
||||
static const int BUFSIZE = 65536;
|
||||
|
@ -73,15 +80,18 @@ void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) {
|
|||
const size_t size = stream.read(buffer, BUFSIZE);
|
||||
stream.close();
|
||||
shared_ptr<ZLLanguageDetector::LanguageInfo> info =
|
||||
ZLLanguageDetector().findInfoForEncoding(book.encoding(), buffer, size, -20000);
|
||||
ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000);
|
||||
delete[] buffer;
|
||||
if (!info.isNull()) {
|
||||
detected = true;
|
||||
if (!info->Language.empty()) {
|
||||
language = info->Language;
|
||||
}
|
||||
}
|
||||
}
|
||||
book.setLanguage(language);
|
||||
|
||||
return detected;
|
||||
}
|
||||
|
||||
const std::string &FormatPlugin::tryOpen(const ZLFile&) const {
|
||||
|
|
|
@ -63,8 +63,8 @@ public:
|
|||
virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
|
||||
|
||||
protected:
|
||||
static void detectEncodingAndLanguage(Book &book, ZLInputStream &stream);
|
||||
static void detectLanguage(Book &book, ZLInputStream &stream);
|
||||
static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false);
|
||||
static bool detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force = false);
|
||||
};
|
||||
|
||||
class PluginCollection {
|
||||
|
|
|
@ -34,9 +34,9 @@
|
|||
#include "OleMainStream.h"
|
||||
|
||||
DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
|
||||
OleStreamReader(encoding),
|
||||
myModelReader(model),
|
||||
myPictureCounter(0) {
|
||||
myPictureCounter(0),
|
||||
myEncoding(encoding) {
|
||||
myReadState = READ_TEXT;
|
||||
}
|
||||
|
||||
|
@ -355,3 +355,25 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode
|
|||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
||||
return utf8String;
|
||||
}
|
||||
|
||||
void DocBookReader::footnoteHandler() {
|
||||
handlePageBreak();
|
||||
}
|
||||
|
||||
void DocBookReader::dataHandler(const char *buffer, size_t len) {
|
||||
if (myConverter.isNull()) {
|
||||
// lazy converter initialization
|
||||
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
||||
myConverter = collection.converter(myEncoding);
|
||||
if (myConverter.isNull()) {
|
||||
myConverter = collection.defaultConverter();
|
||||
}
|
||||
}
|
||||
std::string utf8String;
|
||||
myConverter->convert(utf8String, buffer, buffer + len);
|
||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||
}
|
||||
|
||||
void DocBookReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
|
||||
myBuffer.push_back(symbol);
|
||||
}
|
||||
|
|
|
@ -25,13 +25,14 @@
|
|||
#include <shared_ptr.h>
|
||||
#include <ZLFile.h>
|
||||
#include <ZLTextStyleEntry.h>
|
||||
#include <ZLEncodingConverter.h>
|
||||
|
||||
#include "../../bookmodel/BookReader.h"
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "OleStreamReader.h"
|
||||
#include "OleStreamParser.h"
|
||||
|
||||
class DocBookReader : public OleStreamReader {
|
||||
class DocBookReader : public OleStreamParser {
|
||||
|
||||
public:
|
||||
DocBookReader(BookModel &model, const std::string &encoding);
|
||||
|
@ -39,6 +40,10 @@ public:
|
|||
bool readBook();
|
||||
|
||||
private:
|
||||
void dataHandler(const char *buffer, size_t len);
|
||||
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||
void footnoteHandler();
|
||||
|
||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||
void handleHardLinebreak();
|
||||
void handleParagraphEnd();
|
||||
|
@ -88,6 +93,9 @@ private:
|
|||
shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
|
||||
OleMainStream::Style myCurrentStyleInfo;
|
||||
unsigned int myPictureCounter;
|
||||
|
||||
const std::string myEncoding;
|
||||
shared_ptr<ZLEncodingConverter> myConverter;
|
||||
};
|
||||
|
||||
inline DocBookReader::~DocBookReader() {}
|
||||
|
|
|
@ -21,11 +21,12 @@
|
|||
#include <ZLInputStream.h>
|
||||
#include <ZLLogger.h>
|
||||
#include <ZLImage.h>
|
||||
#include <ZLEncodingConverter.h>
|
||||
|
||||
#include "DocPlugin.h"
|
||||
#include "DocMetaInfoReader.h"
|
||||
#include "DocBookReader.h"
|
||||
#include "DocReaderStream.h"
|
||||
#include "DocStreams.h"
|
||||
#include "../../bookmodel/BookModel.h"
|
||||
#include "../../library/Book.h"
|
||||
|
||||
|
@ -52,9 +53,10 @@ bool DocPlugin::readMetaInfo(Book &book) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
shared_ptr<ZLInputStream> stream = new DocReaderStream(book.file(), 50000);
|
||||
if (!stream.isNull()) {
|
||||
detectEncodingAndLanguage(book, *stream);
|
||||
shared_ptr<ZLInputStream> stream = new DocCharStream(book.file(), 50000);
|
||||
if (!detectEncodingAndLanguage(book, *stream)) {
|
||||
stream = new DocAnsiStream(book.file(), 50000);
|
||||
detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1,178 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "DocReaderStream.h"
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
class DocTextOnlyReader : public OleStreamReader {
|
||||
|
||||
public:
|
||||
DocTextOnlyReader(char *buffer, size_t maxSize);
|
||||
~DocTextOnlyReader();
|
||||
size_t readSize() const;
|
||||
|
||||
private:
|
||||
void dataHandler(const char *buffer, size_t len);
|
||||
|
||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||
void handleHardLinebreak();
|
||||
void handleParagraphEnd();
|
||||
void handlePageBreak();
|
||||
void handleTableSeparator();
|
||||
void handleTableEndRow();
|
||||
void handleFootNoteMark();
|
||||
void handleStartField();
|
||||
void handleSeparatorField();
|
||||
void handleEndField();
|
||||
void handleImage(const ZLFileImage::Blocks &blocks);
|
||||
void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||
void handleFontStyle(unsigned int fontStyle);
|
||||
void handleParagraphStyle(const OleMainStream::Style &styleInfo);
|
||||
void handleBookmark(const std::string &name);
|
||||
|
||||
private:
|
||||
char *myBuffer;
|
||||
const size_t myMaxSize;
|
||||
size_t myActualSize;
|
||||
};
|
||||
|
||||
DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
|
||||
}
|
||||
|
||||
DocTextOnlyReader::~DocTextOnlyReader() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) {
|
||||
if (myActualSize >= myMaxSize) {
|
||||
// break stream reading
|
||||
} else {
|
||||
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||
strncpy(myBuffer + myActualSize, buffer, len);
|
||||
myActualSize += len;
|
||||
}
|
||||
OleStreamReader::dataHandler(buffer, dataLength);
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleHardLinebreak() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleParagraphEnd() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handlePageBreak() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleTableSeparator() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleTableEndRow() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleFootNoteMark() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleStartField() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleSeparatorField() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleEndField() {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
|
||||
}
|
||||
|
||||
void DocTextOnlyReader::handleBookmark(const std::string &name) {
|
||||
}
|
||||
|
||||
size_t DocTextOnlyReader::readSize() const {
|
||||
return myActualSize;
|
||||
}
|
||||
|
||||
DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
|
||||
}
|
||||
|
||||
DocReaderStream::~DocReaderStream() {
|
||||
close();
|
||||
}
|
||||
|
||||
bool DocReaderStream::open() {
|
||||
if (mySize != 0) {
|
||||
myBuffer = new char[mySize];
|
||||
}
|
||||
DocTextOnlyReader reader(myBuffer, mySize);
|
||||
shared_ptr<ZLInputStream> stream = myFile.inputStream();
|
||||
if (stream.isNull() || !stream->open()) {
|
||||
return false;
|
||||
}
|
||||
if (!reader.readDocument(stream)) {
|
||||
return false;
|
||||
}
|
||||
mySize = reader.readSize();
|
||||
myOffset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DocReaderStream::read(char *buffer, size_t maxSize) {
|
||||
maxSize = std::min(maxSize, mySize - myOffset);
|
||||
if ((buffer != 0) && (myBuffer !=0)) {
|
||||
memcpy(buffer, myBuffer + myOffset, maxSize);
|
||||
}
|
||||
myOffset += maxSize;
|
||||
return maxSize;
|
||||
}
|
||||
|
||||
void DocReaderStream::close() {
|
||||
if (myBuffer != 0) {
|
||||
delete[] myBuffer;
|
||||
myBuffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void DocReaderStream::seek(int offset, bool absoluteOffset) {
|
||||
if (!absoluteOffset) {
|
||||
offset += myOffset;
|
||||
}
|
||||
myOffset = std::min(mySize, (size_t)std::max(0, offset));
|
||||
}
|
||||
|
||||
size_t DocReaderStream::offset() const {
|
||||
return myOffset;
|
||||
}
|
||||
|
||||
size_t DocReaderStream::sizeOfOpened() {
|
||||
return mySize;
|
||||
}
|
197
jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp
Normal file
197
jni/NativeFormats/fbreader/src/formats/doc/DocStreams.cpp
Normal file
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "DocStreams.h"
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
class DocReader : public OleStreamReader {
|
||||
|
||||
public:
|
||||
DocReader(char *buffer, size_t maxSize);
|
||||
~DocReader();
|
||||
size_t readSize() const;
|
||||
|
||||
private:
|
||||
bool readStream(OleMainStream &stream);
|
||||
void dataHandler(const char *buffer, size_t len);
|
||||
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||
void footnoteHandler();
|
||||
|
||||
protected:
|
||||
char *myBuffer;
|
||||
const size_t myMaxSize;
|
||||
size_t myActualSize;
|
||||
};
|
||||
|
||||
class DocCharReader : public DocReader {
|
||||
|
||||
public:
|
||||
DocCharReader(char *buffer, size_t maxSize);
|
||||
~DocCharReader();
|
||||
|
||||
private:
|
||||
void dataHandler(const char *buffer, size_t len);
|
||||
};
|
||||
|
||||
class DocAnsiReader : public DocReader {
|
||||
|
||||
public:
|
||||
DocAnsiReader(char *buffer, size_t maxSize);
|
||||
~DocAnsiReader();
|
||||
|
||||
private:
|
||||
void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
|
||||
};
|
||||
|
||||
DocReader::DocReader(char *buffer, size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
|
||||
}
|
||||
|
||||
DocReader::~DocReader() {
|
||||
}
|
||||
|
||||
bool DocReader::readStream(OleMainStream &stream) {
|
||||
while (myActualSize < myMaxSize) {
|
||||
if (!readNextPiece(stream)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DocReader::dataHandler(const char*, size_t) {
|
||||
}
|
||||
|
||||
void DocReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char) {
|
||||
}
|
||||
|
||||
void DocReader::footnoteHandler() {
|
||||
}
|
||||
|
||||
size_t DocReader::readSize() const {
|
||||
return myActualSize;
|
||||
}
|
||||
|
||||
DocCharReader::DocCharReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
|
||||
}
|
||||
|
||||
DocCharReader::~DocCharReader() {
|
||||
}
|
||||
|
||||
void DocCharReader::dataHandler(const char *buffer, size_t dataLength) {
|
||||
if (myActualSize < myMaxSize) {
|
||||
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||
strncpy(myBuffer + myActualSize, buffer, len);
|
||||
myActualSize += len;
|
||||
}
|
||||
}
|
||||
|
||||
DocAnsiReader::DocAnsiReader(char *buffer, size_t maxSize) : DocReader(buffer, maxSize) {
|
||||
}
|
||||
|
||||
DocAnsiReader::~DocAnsiReader() {
|
||||
}
|
||||
|
||||
void DocAnsiReader::ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
|
||||
if (myActualSize < myMaxSize) {
|
||||
char buffer[4];
|
||||
const size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
|
||||
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||
strncpy(myBuffer + myActualSize, buffer, len);
|
||||
myActualSize += len;
|
||||
}
|
||||
}
|
||||
|
||||
DocStream::DocStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
|
||||
}
|
||||
|
||||
DocStream::~DocStream() {
|
||||
close();
|
||||
}
|
||||
|
||||
bool DocStream::open() {
|
||||
if (mySize != 0) {
|
||||
myBuffer = new char[mySize];
|
||||
}
|
||||
shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
|
||||
shared_ptr<ZLInputStream> stream = myFile.inputStream();
|
||||
if (stream.isNull() || !stream->open()) {
|
||||
return false;
|
||||
}
|
||||
if (!reader->readDocument(stream)) {
|
||||
return false;
|
||||
}
|
||||
mySize = reader->readSize();
|
||||
myOffset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DocStream::read(char *buffer, size_t maxSize) {
|
||||
maxSize = std::min(maxSize, mySize - myOffset);
|
||||
if ((buffer != 0) && (myBuffer !=0)) {
|
||||
memcpy(buffer, myBuffer + myOffset, maxSize);
|
||||
}
|
||||
myOffset += maxSize;
|
||||
return maxSize;
|
||||
}
|
||||
|
||||
void DocStream::close() {
|
||||
if (myBuffer != 0) {
|
||||
delete[] myBuffer;
|
||||
myBuffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void DocStream::seek(int offset, bool absoluteOffset) {
|
||||
if (!absoluteOffset) {
|
||||
offset += myOffset;
|
||||
}
|
||||
myOffset = std::min(mySize, (size_t)std::max(0, offset));
|
||||
}
|
||||
|
||||
size_t DocStream::offset() const {
|
||||
return myOffset;
|
||||
}
|
||||
|
||||
size_t DocStream::sizeOfOpened() {
|
||||
return mySize;
|
||||
}
|
||||
|
||||
DocCharStream::DocCharStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
|
||||
}
|
||||
|
||||
DocCharStream::~DocCharStream() {
|
||||
}
|
||||
|
||||
shared_ptr<DocReader> DocCharStream::createReader(char *buffer, size_t maxSize) {
|
||||
return new DocCharReader(buffer, maxSize);
|
||||
}
|
||||
|
||||
DocAnsiStream::DocAnsiStream(const ZLFile& file, size_t maxSize) : DocStream(file, maxSize) {
|
||||
}
|
||||
|
||||
DocAnsiStream::~DocAnsiStream() {
|
||||
}
|
||||
|
||||
shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, size_t maxSize) {
|
||||
return new DocAnsiReader(buffer, maxSize);
|
||||
}
|
|
@ -17,19 +17,19 @@
|
|||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __DOCREADERSTREAM_H__
|
||||
#define __DOCREADERSTREAM_H__
|
||||
|
||||
#include <string>
|
||||
#ifndef __DOCSTREAMS_H__
|
||||
#define __DOCSTREAMS_H__
|
||||
|
||||
#include <ZLFile.h>
|
||||
#include <ZLInputStream.h>
|
||||
|
||||
class DocReaderStream : public ZLInputStream {
|
||||
class DocReader;
|
||||
|
||||
class DocStream : public ZLInputStream {
|
||||
|
||||
public:
|
||||
DocReaderStream(const ZLFile& file, size_t maxSize);
|
||||
~DocReaderStream();
|
||||
DocStream(const ZLFile& file, size_t maxSize);
|
||||
~DocStream();
|
||||
|
||||
private:
|
||||
bool open();
|
||||
|
@ -40,6 +40,9 @@ private:
|
|||
size_t offset() const;
|
||||
size_t sizeOfOpened();
|
||||
|
||||
protected:
|
||||
virtual shared_ptr<DocReader> createReader(char *buffer, size_t maxSize) = 0;
|
||||
|
||||
private:
|
||||
const ZLFile myFile;
|
||||
char *myBuffer;
|
||||
|
@ -47,4 +50,24 @@ private:
|
|||
size_t myOffset;
|
||||
};
|
||||
|
||||
#endif /* __DOCREADERSTREAM_H__ */
|
||||
class DocCharStream : public DocStream {
|
||||
|
||||
public:
|
||||
DocCharStream(const ZLFile& file, size_t maxSize);
|
||||
~DocCharStream();
|
||||
|
||||
private:
|
||||
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
|
||||
};
|
||||
|
||||
class DocAnsiStream : public DocStream {
|
||||
|
||||
public:
|
||||
DocAnsiStream(const ZLFile& file, size_t maxSize);
|
||||
~DocAnsiStream();
|
||||
|
||||
private:
|
||||
shared_ptr<DocReader> createReader(char *buffer, size_t maxSize);
|
||||
};
|
||||
|
||||
#endif /* __DOCSTREAMS_H__ */
|
210
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp
Normal file
210
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.cpp
Normal file
|
@ -0,0 +1,210 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
//#include <cctype>
|
||||
//#include <cstring>
|
||||
|
||||
#include <ZLLogger.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "OleUtil.h"
|
||||
#include "OleStreamParser.h"
|
||||
|
||||
//word's control chars:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
|
||||
|
||||
//unicode values:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
|
||||
|
||||
OleStreamParser::OleStreamParser() {
|
||||
myCurBufferPosition = 0;
|
||||
|
||||
myCurCharPos = 0;
|
||||
myNextStyleInfoIndex = 0;
|
||||
myNextCharInfoIndex = 0;
|
||||
myNextBookmarkIndex = 0;
|
||||
myNextInlineImageInfoIndex = 0;
|
||||
myNextFloatImageInfoIndex = 0;
|
||||
}
|
||||
|
||||
bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
|
||||
ZLUnicodeUtil::Ucs2Char ucs2char;
|
||||
bool tabMode = false;
|
||||
while (getUcs2Char(oleMainStream, ucs2char)) {
|
||||
if (tabMode) {
|
||||
tabMode = false;
|
||||
if (ucs2char == WORD_TABLE_SEPARATOR) {
|
||||
handleTableEndRow();
|
||||
continue;
|
||||
} else {
|
||||
handleTableSeparator();
|
||||
}
|
||||
}
|
||||
|
||||
if (ucs2char < 32) {
|
||||
switch (ucs2char) {
|
||||
case NULL_SYMBOL:
|
||||
break;
|
||||
case WORD_HARD_LINEBREAK:
|
||||
handleHardLinebreak();
|
||||
break;
|
||||
case WORD_END_OF_PARAGRAPH:
|
||||
case WORD_PAGE_BREAK:
|
||||
handleParagraphEnd();
|
||||
break;
|
||||
case WORD_TABLE_SEPARATOR:
|
||||
tabMode = true;
|
||||
break;
|
||||
case WORD_FOOTNOTE_MARK:
|
||||
handleFootNoteMark();
|
||||
break;
|
||||
case WORD_START_FIELD:
|
||||
handleStartField();
|
||||
break;
|
||||
case WORD_SEPARATOR_FIELD:
|
||||
handleSeparatorField();
|
||||
break;
|
||||
case WORD_END_FIELD:
|
||||
handleEndField();
|
||||
break;
|
||||
case INLINE_IMAGE:
|
||||
case FLOAT_IMAGE:
|
||||
break;
|
||||
default:
|
||||
handleOtherControlChar(ucs2char);
|
||||
break;
|
||||
}
|
||||
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
|
||||
continue; //skip
|
||||
} else {
|
||||
handleChar(ucs2char);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
|
||||
while (myCurBufferPosition >= myBuffer.size()) {
|
||||
myBuffer.clear();
|
||||
myCurBufferPosition = 0;
|
||||
if (!readNextPiece(stream)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
ucs2char = myBuffer.at(myCurBufferPosition++);
|
||||
processStyles(stream);
|
||||
|
||||
switch (ucs2char) {
|
||||
case INLINE_IMAGE:
|
||||
processInlineImage(stream);
|
||||
break;
|
||||
case FLOAT_IMAGE:
|
||||
processFloatImage(stream);
|
||||
break;
|
||||
}
|
||||
++myCurCharPos;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OleStreamParser::processInlineImage(OleMainStream &stream) {
|
||||
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
|
||||
if (imageInfoList.empty()) {
|
||||
return;
|
||||
}
|
||||
//seek to curCharPos, because not all entries are real pictures
|
||||
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
|
||||
++myNextInlineImageInfoIndex;
|
||||
}
|
||||
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
|
||||
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
|
||||
if (!list.empty()) {
|
||||
handleImage(list);
|
||||
}
|
||||
++myNextInlineImageInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void OleStreamParser::processFloatImage(OleMainStream &stream) {
|
||||
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
|
||||
if (imageInfoList.empty()) {
|
||||
return;
|
||||
}
|
||||
//seek to curCharPos, because not all entries are real pictures
|
||||
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
|
||||
++myNextFloatImageInfoIndex;
|
||||
}
|
||||
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
|
||||
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
|
||||
if (!list.empty()) {
|
||||
handleImage(list);
|
||||
}
|
||||
++myNextFloatImageInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void OleStreamParser::processStyles(OleMainStream &stream) {
|
||||
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
|
||||
if (!styleInfoList.empty()) {
|
||||
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
|
||||
handleParagraphStyle(info);
|
||||
++myNextStyleInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
|
||||
if (!charInfoList.empty()) {
|
||||
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
|
||||
handleFontStyle(info.FontStyle);
|
||||
++myNextCharInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
|
||||
if (!bookmarksList.empty()) {
|
||||
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
|
||||
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
|
||||
handleBookmark(bookmark.Name);
|
||||
++myNextBookmarkIndex;
|
||||
}
|
||||
}
|
||||
}
|
101
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h
Normal file
101
jni/NativeFormats/fbreader/src/formats/doc/OleStreamParser.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __OLESTREAMPARSER_H__
|
||||
#define __OLESTREAMPARSER_H__
|
||||
|
||||
#include <ZLUnicodeUtil.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
class OleStreamParser : public OleStreamReader {
|
||||
|
||||
public:
|
||||
//word's control chars:
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
|
||||
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
|
||||
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
|
||||
|
||||
//unicode values:
|
||||
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
|
||||
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
||||
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
||||
static const ZLUnicodeUtil::Ucs2Char MINUS;
|
||||
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
||||
|
||||
public:
|
||||
OleStreamParser();
|
||||
|
||||
private:
|
||||
bool readStream(OleMainStream &stream);
|
||||
|
||||
protected:
|
||||
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
virtual void handleHardLinebreak() = 0;
|
||||
virtual void handleParagraphEnd() = 0;
|
||||
virtual void handlePageBreak() = 0;
|
||||
virtual void handleTableSeparator() = 0;
|
||||
virtual void handleTableEndRow() = 0;
|
||||
virtual void handleFootNoteMark() = 0;
|
||||
virtual void handleStartField() = 0;
|
||||
virtual void handleSeparatorField() = 0;
|
||||
virtual void handleEndField() = 0;
|
||||
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
|
||||
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
|
||||
virtual void handleFontStyle(unsigned int fontStyle) = 0;
|
||||
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
|
||||
virtual void handleBookmark(const std::string &name) = 0;
|
||||
|
||||
private:
|
||||
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
|
||||
void processInlineImage(OleMainStream &stream);
|
||||
void processFloatImage(OleMainStream &stream);
|
||||
void processStyles(OleMainStream &stream);
|
||||
|
||||
private:
|
||||
protected:
|
||||
ZLUnicodeUtil::Ucs2String myBuffer;
|
||||
private:
|
||||
size_t myCurBufferPosition;
|
||||
|
||||
unsigned int myCurCharPos;
|
||||
|
||||
size_t myNextStyleInfoIndex;
|
||||
size_t myNextCharInfoIndex;
|
||||
size_t myNextBookmarkIndex;
|
||||
size_t myNextInlineImageInfoIndex;
|
||||
size_t myNextFloatImageInfoIndex;
|
||||
};
|
||||
|
||||
#endif /* __OLESTREAMPARSER_H__ */
|
|
@ -17,59 +17,13 @@
|
|||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
|
||||
#include <ZLLogger.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
#include "DocBookReader.h"
|
||||
#include "OleUtil.h"
|
||||
#include "DocInlineImageReader.h"
|
||||
|
||||
#include "OleStreamReader.h"
|
||||
|
||||
//word's control chars:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_FOOTNOTE_MARK = 0x0002;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_TABLE_SEPARATOR = 0x0007;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_FIELD = 0x0015;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::INLINE_IMAGE = 0x0001;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FLOAT_IMAGE = 0x0008;
|
||||
|
||||
//unicode values:
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::NULL_SYMBOL = 0x0;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D;
|
||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
|
||||
|
||||
OleStreamReader::OleStreamReader(const std::string &encoding) :
|
||||
myEncoding(encoding) {
|
||||
clear();
|
||||
}
|
||||
|
||||
void OleStreamReader::clear() {
|
||||
myBuffer.clear();
|
||||
myCurBufferPosition = 0;
|
||||
myNextPieceNumber = 0;
|
||||
|
||||
myCurCharPos = 0;
|
||||
myNextStyleInfoIndex = 0;
|
||||
myNextCharInfoIndex = 0;
|
||||
myNextBookmarkIndex = 0;
|
||||
myNextInlineImageInfoIndex = 0;
|
||||
myNextFloatImageInfoIndex = 0;
|
||||
OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
|
||||
}
|
||||
|
||||
bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
||||
|
@ -78,7 +32,7 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
|||
shared_ptr<OleStorage> storage = new OleStorage;
|
||||
|
||||
if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
|
||||
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!");
|
||||
ZLLogger::Instance().println("OleStreamReader", "Broken OLE file");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -88,176 +42,22 @@ bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
|||
}
|
||||
|
||||
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
||||
if (!oleStream.open()) {
|
||||
ZLLogger::Instance().println("OleStreamReader", "Cannot open OleMainStream");
|
||||
return false;
|
||||
}
|
||||
return readStream(oleStream);
|
||||
}
|
||||
|
||||
bool OleStreamReader::readStream(OleMainStream &oleMainStream) {
|
||||
clear();
|
||||
|
||||
if (!oleMainStream.open()) {
|
||||
ZLLogger::Instance().println("OleStreamReader", "doesn't open correct");
|
||||
return false;
|
||||
}
|
||||
ZLUnicodeUtil::Ucs2Char ucs2char;
|
||||
bool tabMode = false;
|
||||
while (getUcs2Char(oleMainStream, ucs2char)) {
|
||||
if (ucs2char < 32) { //< 32 are control symbols
|
||||
//printf("[0x%x]", ucs2char); //debug output
|
||||
}
|
||||
|
||||
if (tabMode) {
|
||||
tabMode = false;
|
||||
if (ucs2char == WORD_TABLE_SEPARATOR) {
|
||||
handleTableEndRow();
|
||||
continue;
|
||||
} else {
|
||||
handleTableSeparator();
|
||||
}
|
||||
}
|
||||
|
||||
if (ucs2char < 32) {
|
||||
switch (ucs2char) {
|
||||
case NULL_SYMBOL:
|
||||
break;
|
||||
case WORD_HARD_LINEBREAK:
|
||||
//printf("\n");
|
||||
handleHardLinebreak();
|
||||
break;
|
||||
case WORD_END_OF_PARAGRAPH:
|
||||
case WORD_PAGE_BREAK:
|
||||
//printf("\n");
|
||||
handleParagraphEnd();
|
||||
break;
|
||||
case WORD_TABLE_SEPARATOR:
|
||||
tabMode = true;
|
||||
break;
|
||||
case WORD_FOOTNOTE_MARK:
|
||||
handleFootNoteMark();
|
||||
break;
|
||||
case WORD_START_FIELD:
|
||||
handleStartField();
|
||||
break;
|
||||
case WORD_SEPARATOR_FIELD:
|
||||
handleSeparatorField();
|
||||
break;
|
||||
case WORD_END_FIELD:
|
||||
handleEndField();
|
||||
break;
|
||||
case INLINE_IMAGE: case FLOAT_IMAGE:
|
||||
break;
|
||||
default:
|
||||
handleOtherControlChar(ucs2char);
|
||||
break;
|
||||
}
|
||||
} else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
|
||||
continue; //skip
|
||||
} else {
|
||||
//debug output
|
||||
// std::string utf8String;
|
||||
// ZLUnicodeUtil::Ucs2String ucs2String;
|
||||
// ucs2String.push_back(ucs2char);
|
||||
// ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
|
||||
// printf("%s", utf8String.c_str());
|
||||
|
||||
handleChar(ucs2char);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OleStreamReader::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
|
||||
if (myCurBufferPosition >= myBuffer.size() && !fillBuffer(stream)) {
|
||||
return false;
|
||||
}
|
||||
ucs2char = myBuffer.at(myCurBufferPosition++);
|
||||
processStyles(stream);
|
||||
|
||||
if (ucs2char == INLINE_IMAGE) {
|
||||
processInlineImage(stream);
|
||||
} else if (ucs2char == FLOAT_IMAGE) {
|
||||
processFloatImage(stream);
|
||||
}
|
||||
++myCurCharPos;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OleStreamReader::processInlineImage(OleMainStream &stream) {
|
||||
const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
|
||||
if (imageInfoList.empty()) {
|
||||
return;
|
||||
}
|
||||
//seek to curCharPos, because not all entries are real pictures
|
||||
while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
|
||||
++myNextInlineImageInfoIndex;
|
||||
}
|
||||
while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
|
||||
ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
|
||||
if (!list.empty()) {
|
||||
handleImage(list);
|
||||
}
|
||||
++myNextInlineImageInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void OleStreamReader::processFloatImage(OleMainStream &stream) {
|
||||
const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
|
||||
if (imageInfoList.empty()) {
|
||||
return;
|
||||
}
|
||||
//seek to curCharPos, because not all entries are real pictures
|
||||
while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
|
||||
++myNextFloatImageInfoIndex;
|
||||
}
|
||||
while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
|
||||
ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
|
||||
if (!list.empty()) {
|
||||
handleImage(list);
|
||||
}
|
||||
++myNextFloatImageInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void OleStreamReader::processStyles(OleMainStream &stream) {
|
||||
const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
|
||||
if (!styleInfoList.empty()) {
|
||||
while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
|
||||
handleParagraphStyle(info);
|
||||
++myNextStyleInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
|
||||
if (!charInfoList.empty()) {
|
||||
while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
|
||||
OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
|
||||
handleFontStyle(info.FontStyle);
|
||||
++myNextCharInfoIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
|
||||
if (!bookmarksList.empty()) {
|
||||
while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
|
||||
OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
|
||||
handleBookmark(bookmark.Name);
|
||||
++myNextBookmarkIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||
bool OleStreamReader::readNextPiece(OleMainStream &stream) {
|
||||
const OleMainStream::Pieces &pieces = stream.getPieces();
|
||||
if (myNextPieceNumber >= pieces.size()) {
|
||||
return false; //end of reading
|
||||
return false;
|
||||
}
|
||||
const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
|
||||
|
||||
if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
|
||||
handlePageBreak();
|
||||
footnoteHandler();
|
||||
} else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
|
||||
return false;
|
||||
}
|
||||
|
@ -272,32 +72,15 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
|||
ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece");
|
||||
}
|
||||
|
||||
myBuffer.clear();
|
||||
if (!piece.IsANSI) {
|
||||
for (size_t i = 0; i < readBytes; i += 2) {
|
||||
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
||||
myBuffer.push_back(ch);
|
||||
ansiSymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
|
||||
}
|
||||
} else {
|
||||
dataHandler(textBuffer, readBytes);
|
||||
}
|
||||
myCurBufferPosition = 0;
|
||||
++myNextPieceNumber;
|
||||
delete[] textBuffer;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OleStreamReader::dataHandler(const char *buffer, size_t len) {
|
||||
if (myConverter.isNull()) {
|
||||
// lazy converter initialization
|
||||
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
||||
myConverter = collection.converter(myEncoding);
|
||||
if (myConverter.isNull()) {
|
||||
myConverter = collection.defaultConverter();
|
||||
}
|
||||
}
|
||||
std::string utf8String;
|
||||
myConverter->convert(utf8String, buffer, buffer + len);
|
||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||
}
|
||||
|
|
|
@ -21,89 +21,26 @@
|
|||
#define __OLESTREAMREADER_H__
|
||||
|
||||
#include <ZLUnicodeUtil.h>
|
||||
#include <ZLEncodingConverter.h>
|
||||
|
||||
#include "OleMainStream.h"
|
||||
|
||||
class OleStreamReader {
|
||||
|
||||
public:
|
||||
//word's control chars:
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
|
||||
static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
|
||||
static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
|
||||
static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
|
||||
|
||||
//unicode values:
|
||||
static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
|
||||
static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
|
||||
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
||||
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
||||
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
||||
static const ZLUnicodeUtil::Ucs2Char MINUS;
|
||||
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
||||
|
||||
public:
|
||||
OleStreamReader(const std::string &encoding);
|
||||
OleStreamReader();
|
||||
bool readDocument(shared_ptr<ZLInputStream> stream);
|
||||
void clear();
|
||||
|
||||
private:
|
||||
bool readStream(OleMainStream &stream);
|
||||
|
||||
protected:
|
||||
virtual void dataHandler(const char *buffer, size_t len);
|
||||
virtual bool readStream(OleMainStream &stream) = 0;
|
||||
|
||||
//virtual void parapgraphHandler(std::string paragraph) = 0;
|
||||
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
virtual void handleHardLinebreak() = 0;
|
||||
virtual void handleParagraphEnd() = 0;
|
||||
virtual void handlePageBreak() = 0;
|
||||
virtual void handleTableSeparator() = 0;
|
||||
virtual void handleTableEndRow() = 0;
|
||||
virtual void handleFootNoteMark() = 0;
|
||||
virtual void handleStartField() = 0;
|
||||
virtual void handleSeparatorField() = 0;
|
||||
virtual void handleEndField() = 0;
|
||||
virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
|
||||
virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||
bool readNextPiece(OleMainStream &stream);
|
||||
|
||||
virtual void handleFontStyle(unsigned int fontStyle) = 0;
|
||||
virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
|
||||
virtual void handleBookmark(const std::string &name) = 0;
|
||||
virtual void dataHandler(const char *buffer, size_t len) = 0;
|
||||
virtual void ansiSymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
|
||||
virtual void footnoteHandler() = 0;
|
||||
|
||||
private:
|
||||
bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
|
||||
void processInlineImage(OleMainStream &stream);
|
||||
void processFloatImage(OleMainStream &stream);
|
||||
void processStyles(OleMainStream &stream);
|
||||
bool fillBuffer(OleMainStream &stream);
|
||||
|
||||
private:
|
||||
ZLUnicodeUtil::Ucs2String myBuffer;
|
||||
size_t myCurBufferPosition;
|
||||
size_t myNextPieceNumber;
|
||||
|
||||
shared_ptr<ZLEncodingConverter> myConverter;
|
||||
const std::string myEncoding;
|
||||
|
||||
unsigned int myCurCharPos;
|
||||
|
||||
size_t myNextStyleInfoIndex;
|
||||
size_t myNextCharInfoIndex;
|
||||
size_t myNextBookmarkIndex;
|
||||
size_t myNextInlineImageInfoIndex;
|
||||
size_t myNextFloatImageInfoIndex;
|
||||
};
|
||||
|
||||
#endif /* __OLESTREAMREADER_H__ */
|
||||
|
|
|
@ -135,7 +135,7 @@ shared_ptr<const ZLImage> OEBPlugin::coverImage(const ZLFile &file) const {
|
|||
bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
|
||||
if (book.language().empty()) {
|
||||
shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
|
||||
detectLanguage(book, *oebStream);
|
||||
detectLanguage(book, *oebStream, book.encoding());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ bool RtfPlugin::readMetaInfo(Book &book) const {
|
|||
} else if (book.language().empty()) {
|
||||
shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000);
|
||||
if (!stream.isNull()) {
|
||||
detectLanguage(book, *stream);
|
||||
detectLanguage(book, *stream, book.encoding());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,9 @@ friend class DummyEncodingConverterProvider;
|
|||
|
||||
bool DummyEncodingConverterProvider::providesConverter(const std::string &encoding) {
|
||||
const std::string lowerCasedEncoding = ZLUnicodeUtil::toLower(encoding);
|
||||
return (lowerCasedEncoding == "utf-8") || (lowerCasedEncoding == "us-ascii");
|
||||
return
|
||||
lowerCasedEncoding == ZLEncodingConverter::UTF8 ||
|
||||
lowerCasedEncoding == ZLEncodingConverter::ASCII;
|
||||
}
|
||||
|
||||
shared_ptr<ZLEncodingConverter> DummyEncodingConverterProvider::createConverter(const std::string &name) {
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "ZLEncodingConverter.h"
|
||||
#include "ZLEncodingConverterProvider.h"
|
||||
|
||||
const std::string ZLEncodingConverter::ASCII = "us-ascii";
|
||||
const std::string ZLEncodingConverter::UTF8 = "utf-8";
|
||||
const std::string ZLEncodingConverter::UTF16 = "utf-16";
|
||||
const std::string ZLEncodingConverter::UTF16BE = "utf-16be";
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
class ZLEncodingConverter {
|
||||
|
||||
public:
|
||||
static const std::string ASCII;
|
||||
static const std::string UTF8;
|
||||
static const std::string UTF16;
|
||||
static const std::string UTF16BE;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue