mirror of
https://github.com/geometer/FBReaderJ.git
synced 2025-10-03 17:59:33 +02:00
doc (ms word) files encoding auto detection
This commit is contained in:
parent
e2e72cc046
commit
7f30c5fa73
13 changed files with 298 additions and 72 deletions
|
@ -128,6 +128,7 @@ LOCAL_SRC_FILES := \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \
|
||||||
|
NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \
|
||||||
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
NativeFormats/fbreader/src/formats/doc/OleStream.cpp \
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
* 02110-1301, USA.
|
* 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -44,39 +43,14 @@ DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
|
||||||
bool DocBookReader::readBook() {
|
bool DocBookReader::readBook() {
|
||||||
const ZLFile &file = myModelReader.model().book()->file();
|
const ZLFile &file = myModelReader.model().book()->file();
|
||||||
shared_ptr<ZLInputStream> stream = file.inputStream();
|
shared_ptr<ZLInputStream> stream = file.inputStream();
|
||||||
if (stream.isNull()) {
|
if (stream.isNull() || !stream->open()) {
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return readDocument(stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DocBookReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
|
||||||
static const std::string WORD_DOCUMENT = "WordDocument";
|
|
||||||
|
|
||||||
if (inputStream.isNull() || !inputStream->open()) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
myModelReader.setMainTextModel();
|
myModelReader.setMainTextModel();
|
||||||
myModelReader.pushKind(REGULAR);
|
myModelReader.pushKind(REGULAR);
|
||||||
myModelReader.beginParagraph();
|
myModelReader.beginParagraph();
|
||||||
|
|
||||||
shared_ptr<OleStorage> storage = new OleStorage;
|
if (!readDocument(stream)) {
|
||||||
|
|
||||||
if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
|
|
||||||
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
OleEntry wordDocumentEntry;
|
|
||||||
bool result = storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry);
|
|
||||||
if (!result) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
|
||||||
result = readStream(oleStream);
|
|
||||||
if (!result) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,8 +213,8 @@ void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||||
if (ucs2char == WORD_SHORT_DEFIS) {
|
if (ucs2char == WORD_MINUS) {
|
||||||
handleChar(SHORT_DEFIS);
|
handleChar(MINUS);
|
||||||
} else if (ucs2char == WORD_SOFT_HYPHEN) {
|
} else if (ucs2char == WORD_SOFT_HYPHEN) {
|
||||||
//skip
|
//skip
|
||||||
} else if (ucs2char == WORD_HORIZONTAL_TAB) {
|
} else if (ucs2char == WORD_HORIZONTAL_TAB) {
|
||||||
|
@ -381,4 +355,3 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode
|
||||||
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
|
||||||
return utf8String;
|
return utf8String;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,8 +39,6 @@ public:
|
||||||
bool readBook();
|
bool readBook();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool readDocument(shared_ptr<ZLInputStream> stream);
|
|
||||||
|
|
||||||
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||||
void handleHardLinebreak();
|
void handleHardLinebreak();
|
||||||
void handleParagraphEnd();
|
void handleParagraphEnd();
|
||||||
|
|
|
@ -30,21 +30,9 @@ DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) {
|
||||||
myBook.removeAllTags();
|
myBook.removeAllTags();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
void DocMetaInfoReader::characterDataHandler(const char *text, size_t len) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocMetaInfoReader::startElementHandler(int tag, const char **) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void DocMetaInfoReader::endElementHandler(int tag) {
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool DocMetaInfoReader::readMetaInfo() {
|
bool DocMetaInfoReader::readMetaInfo() {
|
||||||
myBook.removeAllAuthors();
|
myBook.removeAllAuthors();
|
||||||
myBook.setTitle(myBook.file().name(true));
|
myBook.setTitle(myBook.file().name(true));
|
||||||
myBook.setEncoding("windows-1251"); //TODO implement encoding retrieving
|
|
||||||
myBook.removeAllTags();
|
myBook.removeAllTags();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "DocPlugin.h"
|
#include "DocPlugin.h"
|
||||||
#include "DocMetaInfoReader.h"
|
#include "DocMetaInfoReader.h"
|
||||||
#include "DocBookReader.h"
|
#include "DocBookReader.h"
|
||||||
|
#include "DocReaderStream.h"
|
||||||
#include "../../bookmodel/BookModel.h"
|
#include "../../bookmodel/BookModel.h"
|
||||||
#include "../../library/Book.h"
|
#include "../../library/Book.h"
|
||||||
|
|
||||||
|
@ -47,7 +48,16 @@ bool DocPlugin::acceptsFile(const ZLFile &file) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DocPlugin::readMetaInfo(Book &book) const {
|
bool DocPlugin::readMetaInfo(Book &book) const {
|
||||||
return DocMetaInfoReader(book).readMetaInfo();
|
if (!DocMetaInfoReader(book).readMetaInfo()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
shared_ptr<ZLInputStream> stream = new DocReaderStream(book.file(), 50000);
|
||||||
|
if (!stream.isNull()) {
|
||||||
|
detectEncodingAndLanguage(book, *stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
|
bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
|
||||||
|
|
178
jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp
Normal file
178
jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "DocReaderStream.h"
|
||||||
|
#include "OleStreamReader.h"
|
||||||
|
|
||||||
|
class DocTextOnlyReader : public OleStreamReader {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocTextOnlyReader(char *buffer, size_t maxSize);
|
||||||
|
~DocTextOnlyReader();
|
||||||
|
size_t readSize() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void dataHandler(const char *buffer, size_t len);
|
||||||
|
|
||||||
|
void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||||
|
void handleHardLinebreak();
|
||||||
|
void handleParagraphEnd();
|
||||||
|
void handlePageBreak();
|
||||||
|
void handleTableSeparator();
|
||||||
|
void handleTableEndRow();
|
||||||
|
void handleFootNoteMark();
|
||||||
|
void handleStartField();
|
||||||
|
void handleSeparatorField();
|
||||||
|
void handleEndField();
|
||||||
|
void handleImage(const ZLFileImage::Blocks &blocks);
|
||||||
|
void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
|
||||||
|
void handleFontStyle(unsigned int fontStyle);
|
||||||
|
void handleParagraphStyle(const OleMainStream::Style &styleInfo);
|
||||||
|
void handleBookmark(const std::string &name);
|
||||||
|
|
||||||
|
private:
|
||||||
|
char *myBuffer;
|
||||||
|
const size_t myMaxSize;
|
||||||
|
size_t myActualSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocTextOnlyReader::~DocTextOnlyReader() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) {
|
||||||
|
if (myActualSize >= myMaxSize) {
|
||||||
|
// break stream reading
|
||||||
|
} else {
|
||||||
|
const size_t len = std::min(dataLength, myMaxSize - myActualSize);
|
||||||
|
strncpy(myBuffer + myActualSize, buffer, len);
|
||||||
|
myActualSize += len;
|
||||||
|
}
|
||||||
|
OleStreamReader::dataHandler(buffer, dataLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleHardLinebreak() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleParagraphEnd() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handlePageBreak() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleTableSeparator() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleTableEndRow() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleFootNoteMark() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleStartField() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleSeparatorField() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleEndField() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocTextOnlyReader::handleBookmark(const std::string &name) {
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocTextOnlyReader::readSize() const {
|
||||||
|
return myActualSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
DocReaderStream::~DocReaderStream() {
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DocReaderStream::open() {
|
||||||
|
if (mySize != 0) {
|
||||||
|
myBuffer = new char[mySize];
|
||||||
|
}
|
||||||
|
DocTextOnlyReader reader(myBuffer, mySize);
|
||||||
|
shared_ptr<ZLInputStream> stream = myFile.inputStream();
|
||||||
|
if (stream.isNull() || !stream->open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!reader.readDocument(stream)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mySize = reader.readSize();
|
||||||
|
myOffset = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocReaderStream::read(char *buffer, size_t maxSize) {
|
||||||
|
maxSize = std::min(maxSize, mySize - myOffset);
|
||||||
|
if ((buffer != 0) && (myBuffer !=0)) {
|
||||||
|
memcpy(buffer, myBuffer + myOffset, maxSize);
|
||||||
|
}
|
||||||
|
myOffset += maxSize;
|
||||||
|
return maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocReaderStream::close() {
|
||||||
|
if (myBuffer != 0) {
|
||||||
|
delete[] myBuffer;
|
||||||
|
myBuffer = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DocReaderStream::seek(int offset, bool absoluteOffset) {
|
||||||
|
if (!absoluteOffset) {
|
||||||
|
offset += myOffset;
|
||||||
|
}
|
||||||
|
myOffset = std::min(mySize, (size_t)std::max(0, offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocReaderStream::offset() const {
|
||||||
|
return myOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DocReaderStream::sizeOfOpened() {
|
||||||
|
return mySize;
|
||||||
|
}
|
50
jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h
Normal file
50
jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __DOCREADERSTREAM_H__
|
||||||
|
#define __DOCREADERSTREAM_H__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <ZLFile.h>
|
||||||
|
#include <ZLInputStream.h>
|
||||||
|
|
||||||
|
class DocReaderStream : public ZLInputStream {
|
||||||
|
|
||||||
|
public:
|
||||||
|
DocReaderStream(const ZLFile& file, size_t maxSize);
|
||||||
|
~DocReaderStream();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool open();
|
||||||
|
size_t read(char *buffer, size_t maxSize);
|
||||||
|
void close();
|
||||||
|
|
||||||
|
void seek(int offset, bool absoluteOffset);
|
||||||
|
size_t offset() const;
|
||||||
|
size_t sizeOfOpened();
|
||||||
|
|
||||||
|
private:
|
||||||
|
const ZLFile myFile;
|
||||||
|
char *myBuffer;
|
||||||
|
size_t mySize;
|
||||||
|
size_t myOffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __DOCREADERSTREAM_H__ */
|
|
@ -27,6 +27,7 @@
|
||||||
#include "DocFloatImageReader.h"
|
#include "DocFloatImageReader.h"
|
||||||
|
|
||||||
class OleMainStream : public OleStream {
|
class OleMainStream : public OleStream {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
struct Piece {
|
struct Piece {
|
||||||
enum PieceType {
|
enum PieceType {
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
* 02110-1301, USA.
|
* 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
|
@ -37,7 +36,7 @@ const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SHORT_DEFIS = 0x001e;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014;
|
||||||
|
@ -52,7 +51,7 @@ const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::SHORT_DEFIS = 0x2D;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D;
|
||||||
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
|
const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C;
|
||||||
|
|
||||||
OleStreamReader::OleStreamReader(const std::string &encoding) :
|
OleStreamReader::OleStreamReader(const std::string &encoding) :
|
||||||
|
@ -73,6 +72,26 @@ void OleStreamReader::clear() {
|
||||||
myNextFloatImageInfoIndex = 0;
|
myNextFloatImageInfoIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream) {
|
||||||
|
static const std::string WORD_DOCUMENT = "WordDocument";
|
||||||
|
|
||||||
|
shared_ptr<OleStorage> storage = new OleStorage;
|
||||||
|
|
||||||
|
if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
|
||||||
|
ZLLogger::Instance().println("DocBookReader", "Broken OLE file!");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
OleEntry wordDocumentEntry;
|
||||||
|
bool result = storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry);
|
||||||
|
if (!result) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
|
||||||
|
return readStream(oleStream);
|
||||||
|
}
|
||||||
|
|
||||||
bool OleStreamReader::readStream(OleMainStream &oleMainStream) {
|
bool OleStreamReader::readStream(OleMainStream &oleMainStream) {
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
|
@ -252,29 +271,19 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
char *textBuffer = new char[piece.Length];
|
char *textBuffer = new char[piece.Length];
|
||||||
size_t readedBytes = stream.read(textBuffer, piece.Length);
|
size_t readBytes = stream.read(textBuffer, piece.Length);
|
||||||
if (readedBytes != (unsigned int)piece.Length) {
|
if (readBytes != (size_t)piece.Length) {
|
||||||
ZLLogger::Instance().println("OleStreamReader", "not all bytes has been readed from piece");
|
ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece");
|
||||||
}
|
}
|
||||||
|
|
||||||
myBuffer.clear();
|
myBuffer.clear();
|
||||||
if (!piece.IsANSI) {
|
if (!piece.IsANSI) {
|
||||||
for (unsigned int i = 0; i < readedBytes; i += 2) {
|
for (size_t i = 0; i < readBytes; i += 2) {
|
||||||
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i);
|
||||||
myBuffer.push_back(ch);
|
myBuffer.push_back(ch);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (myConverter.isNull()) {
|
dataHandler(textBuffer, readBytes);
|
||||||
//lazy convertor loading, because documents can be in Unicode only and don't need to be converted
|
|
||||||
ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
|
||||||
myConverter = collection.converter(myEncoding);
|
|
||||||
if (myConverter.isNull()) {
|
|
||||||
myConverter = collection.defaultConverter();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::string utf8String;
|
|
||||||
myConverter->convert(utf8String, std::string(textBuffer, readedBytes));
|
|
||||||
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
|
||||||
}
|
}
|
||||||
myCurBufferPosition = 0;
|
myCurBufferPosition = 0;
|
||||||
++myNextPieceNumber;
|
++myNextPieceNumber;
|
||||||
|
@ -282,3 +291,17 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) {
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OleStreamReader::dataHandler(const char *buffer, size_t len) {
|
||||||
|
if (myConverter.isNull()) {
|
||||||
|
// lazy converter initialization
|
||||||
|
const ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
|
||||||
|
myConverter = collection.converter(myEncoding);
|
||||||
|
if (myConverter.isNull()) {
|
||||||
|
myConverter = collection.defaultConverter();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string utf8String;
|
||||||
|
myConverter->convert(utf8String, buffer, buffer + len);
|
||||||
|
ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
|
||||||
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ public:
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_SHORT_DEFIS;
|
static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
|
||||||
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
|
||||||
|
@ -50,16 +50,20 @@ public:
|
||||||
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
|
||||||
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
|
||||||
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
static const ZLUnicodeUtil::Ucs2Char SPACE;
|
||||||
static const ZLUnicodeUtil::Ucs2Char SHORT_DEFIS;
|
static const ZLUnicodeUtil::Ucs2Char MINUS;
|
||||||
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
OleStreamReader(const std::string &encoding);
|
OleStreamReader(const std::string &encoding);
|
||||||
|
bool readDocument(shared_ptr<ZLInputStream> stream);
|
||||||
bool readStream(OleMainStream &stream);
|
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool readStream(OleMainStream &stream);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
virtual void dataHandler(const char *buffer, size_t len);
|
||||||
|
|
||||||
//virtual void parapgraphHandler(std::string paragraph) = 0;
|
//virtual void parapgraphHandler(std::string paragraph) = 0;
|
||||||
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
|
||||||
virtual void handleHardLinebreak() = 0;
|
virtual void handleHardLinebreak() = 0;
|
||||||
|
|
|
@ -172,4 +172,3 @@ size_t RtfReaderStream::offset() const {
|
||||||
size_t RtfReaderStream::sizeOfOpened() {
|
size_t RtfReaderStream::sizeOfOpened() {
|
||||||
return mySize;
|
return mySize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -143,7 +143,8 @@ const shared_ptr<ZLTextParagraphEntry> ZLTextParagraph::Iterator::entry() const
|
||||||
case ZLTextParagraphEntry::IMAGE_ENTRY:
|
case ZLTextParagraphEntry::IMAGE_ENTRY:
|
||||||
myEntry = new ImageEntry(myPointer + 2);
|
myEntry = new ImageEntry(myPointer + 2);
|
||||||
break;
|
break;
|
||||||
case ZLTextParagraphEntry::STYLE_ENTRY:
|
case ZLTextParagraphEntry::STYLE_CSS_ENTRY:
|
||||||
|
case ZLTextParagraphEntry::STYLE_OTHER_ENTRY:
|
||||||
myEntry = new ZLTextStyleEntry(myPointer + 2);
|
myEntry = new ZLTextStyleEntry(myPointer + 2);
|
||||||
break;
|
break;
|
||||||
case ZLTextParagraphEntry::FIXED_HSPACE_ENTRY:
|
case ZLTextParagraphEntry::FIXED_HSPACE_ENTRY:
|
||||||
|
|
|
@ -78,7 +78,7 @@ private:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ZLTextStyleEntry(unsigned char entryKind);
|
ZLTextStyleEntry(unsigned char entryKind);
|
||||||
//ZLTextStyleEntry(char *address);
|
//ZLTextStyleEntry(unsigned char entryKind, char *address);
|
||||||
~ZLTextStyleEntry();
|
~ZLTextStyleEntry();
|
||||||
|
|
||||||
unsigned char entryKind() const;
|
unsigned char entryKind() const;
|
||||||
|
@ -99,7 +99,7 @@ public:
|
||||||
void setFontFamily(const std::string &fontFamily);
|
void setFontFamily(const std::string &fontFamily);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
unsigned char myEntryKind;
|
const unsigned char myEntryKind;
|
||||||
unsigned short myFeatureMask;
|
unsigned short myFeatureMask;
|
||||||
|
|
||||||
LengthType myLengths[NUMBER_OF_LENGTHS];
|
LengthType myLengths[NUMBER_OF_LENGTHS];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue