diff --git a/jni/Android.mk b/jni/Android.mk index 2a0c623ae..b968141dd 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -128,6 +128,7 @@ LOCAL_SRC_FILES := \ NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp \ NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp \ + NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp \ NativeFormats/fbreader/src/formats/doc/OleMainStream.cpp \ NativeFormats/fbreader/src/formats/doc/OleStorage.cpp \ NativeFormats/fbreader/src/formats/doc/OleStream.cpp \ diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp index faa7a0497..96a13f5b6 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.cpp @@ -17,7 +17,6 @@ * 02110-1301, USA. */ -#include #include #include @@ -44,39 +43,14 @@ DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) : bool DocBookReader::readBook() { const ZLFile &file = myModelReader.model().book()->file(); shared_ptr stream = file.inputStream(); - if (stream.isNull()) { - return false; - } - return readDocument(stream); -} - -bool DocBookReader::readDocument(shared_ptr inputStream) { - static const std::string WORD_DOCUMENT = "WordDocument"; - - if (inputStream.isNull() || !inputStream->open()) { + if (stream.isNull() || !stream->open()) { return false; } myModelReader.setMainTextModel(); myModelReader.pushKind(REGULAR); myModelReader.beginParagraph(); - shared_ptr storage = new OleStorage; - - if (!storage->init(inputStream, inputStream->sizeOfOpened())) { - ZLLogger::Instance().println("DocBookReader", "Broken OLE file!"); - return false; - } - - - OleEntry wordDocumentEntry; - bool result = storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry); - if (!result) { - return false; - } - - OleMainStream oleStream(storage, wordDocumentEntry, inputStream); - result = readStream(oleStream); - if (!result) { + if (!readDocument(stream)) { return false; } @@ -239,8 +213,8 @@ void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) { } void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) { - if (ucs2char == WORD_SHORT_DEFIS) { - handleChar(SHORT_DEFIS); + if (ucs2char == WORD_MINUS) { + handleChar(MINUS); } else if (ucs2char == WORD_SOFT_HYPHEN) { //skip } else if (ucs2char == WORD_HORIZONTAL_TAB) { @@ -381,4 +355,3 @@ std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode ZLUnicodeUtil::ucs2ToUtf8(utf8String, link); return utf8String; } - diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h index bb50fb387..a8e21c921 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocBookReader.h @@ -39,8 +39,6 @@ public: bool readBook(); private: - bool readDocument(shared_ptr stream); - void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); void handleHardLinebreak(); void handleParagraphEnd(); diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp index 0ddcb4230..37b39c293 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocMetaInfoReader.cpp @@ -30,21 +30,9 @@ DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) { myBook.removeAllTags(); } -/* -void DocMetaInfoReader::characterDataHandler(const char *text, size_t len) { -} - -void DocMetaInfoReader::startElementHandler(int tag, const char **) { -} - -void DocMetaInfoReader::endElementHandler(int tag) { -} -*/ - bool DocMetaInfoReader::readMetaInfo() { myBook.removeAllAuthors(); myBook.setTitle(myBook.file().name(true)); - myBook.setEncoding("windows-1251"); //TODO implement encoding retrieving myBook.removeAllTags(); return true; } diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp index 21f29fdc7..32d21d33f 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocPlugin.cpp @@ -25,6 +25,7 @@ #include "DocPlugin.h" #include "DocMetaInfoReader.h" #include "DocBookReader.h" +#include "DocReaderStream.h" #include "../../bookmodel/BookModel.h" #include "../../library/Book.h" @@ -47,7 +48,16 @@ bool DocPlugin::acceptsFile(const ZLFile &file) const { } bool DocPlugin::readMetaInfo(Book &book) const { - return DocMetaInfoReader(book).readMetaInfo(); + if (!DocMetaInfoReader(book).readMetaInfo()) { + return false; + } + + shared_ptr stream = new DocReaderStream(book.file(), 50000); + if (!stream.isNull()) { + detectEncodingAndLanguage(book, *stream); + } + + return true; } bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const { diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp b/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp new file mode 100644 index 000000000..e6a3cd236 --- /dev/null +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include +#include +#include + +#include "DocReaderStream.h" +#include "OleStreamReader.h" + +class DocTextOnlyReader : public OleStreamReader { + +public: + DocTextOnlyReader(char *buffer, size_t maxSize); + ~DocTextOnlyReader(); + size_t readSize() const; + +private: + void dataHandler(const char *buffer, size_t len); + + void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char); + void handleHardLinebreak(); + void handleParagraphEnd(); + void handlePageBreak(); + void handleTableSeparator(); + void handleTableEndRow(); + void handleFootNoteMark(); + void handleStartField(); + void handleSeparatorField(); + void handleEndField(); + void handleImage(const ZLFileImage::Blocks &blocks); + void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char); + void handleFontStyle(unsigned int fontStyle); + void handleParagraphStyle(const OleMainStream::Style &styleInfo); + void handleBookmark(const std::string &name); + +private: + char *myBuffer; + const size_t myMaxSize; + size_t myActualSize; +}; + +DocTextOnlyReader::DocTextOnlyReader(char *buffer, size_t maxSize) : OleStreamReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) { +} + +DocTextOnlyReader::~DocTextOnlyReader() { +} + +void DocTextOnlyReader::dataHandler(const char *buffer, size_t dataLength) { + if (myActualSize >= myMaxSize) { + // break stream reading + } else { + const size_t len = std::min(dataLength, myMaxSize - myActualSize); + strncpy(myBuffer + myActualSize, buffer, len); + myActualSize += len; + } + OleStreamReader::dataHandler(buffer, dataLength); +} + +void DocTextOnlyReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) { +} + +void DocTextOnlyReader::handleHardLinebreak() { +} + +void DocTextOnlyReader::handleParagraphEnd() { +} + +void DocTextOnlyReader::handlePageBreak() { +} + +void DocTextOnlyReader::handleTableSeparator() { +} + +void DocTextOnlyReader::handleTableEndRow() { +} + +void DocTextOnlyReader::handleFootNoteMark() { +} + +void DocTextOnlyReader::handleStartField() { +} + +void DocTextOnlyReader::handleSeparatorField() { +} + +void DocTextOnlyReader::handleEndField() { +} + +void DocTextOnlyReader::handleImage(const ZLFileImage::Blocks &blocks) { +} + +void DocTextOnlyReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) { +} + +void DocTextOnlyReader::handleFontStyle(unsigned int fontStyle) { +} + +void DocTextOnlyReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) { +} + +void DocTextOnlyReader::handleBookmark(const std::string &name) { +} + +size_t DocTextOnlyReader::readSize() const { + return myActualSize; +} + +DocReaderStream::DocReaderStream(const ZLFile& file, size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) { +} + +DocReaderStream::~DocReaderStream() { + close(); +} + +bool DocReaderStream::open() { + if (mySize != 0) { + myBuffer = new char[mySize]; + } + DocTextOnlyReader reader(myBuffer, mySize); + shared_ptr stream = myFile.inputStream(); + if (stream.isNull() || !stream->open()) { + return false; + } + if (!reader.readDocument(stream)) { + return false; + } + mySize = reader.readSize(); + myOffset = 0; + return true; +} + +size_t DocReaderStream::read(char *buffer, size_t maxSize) { + maxSize = std::min(maxSize, mySize - myOffset); + if ((buffer != 0) && (myBuffer !=0)) { + memcpy(buffer, myBuffer + myOffset, maxSize); + } + myOffset += maxSize; + return maxSize; +} + +void DocReaderStream::close() { + if (myBuffer != 0) { + delete[] myBuffer; + myBuffer = 0; + } +} + +void DocReaderStream::seek(int offset, bool absoluteOffset) { + if (!absoluteOffset) { + offset += myOffset; + } + myOffset = std::min(mySize, (size_t)std::max(0, offset)); +} + +size_t DocReaderStream::offset() const { + return myOffset; +} + +size_t DocReaderStream::sizeOfOpened() { + return mySize; +} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h b/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h new file mode 100644 index 000000000..65b5ae2a7 --- /dev/null +++ b/jni/NativeFormats/fbreader/src/formats/doc/DocReaderStream.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __DOCREADERSTREAM_H__ +#define __DOCREADERSTREAM_H__ + +#include + +#include +#include + +class DocReaderStream : public ZLInputStream { + +public: + DocReaderStream(const ZLFile& file, size_t maxSize); + ~DocReaderStream(); + +private: + bool open(); + size_t read(char *buffer, size_t maxSize); + void close(); + + void seek(int offset, bool absoluteOffset); + size_t offset() const; + size_t sizeOfOpened(); + +private: + const ZLFile myFile; + char *myBuffer; + size_t mySize; + size_t myOffset; +}; + +#endif /* __DOCREADERSTREAM_H__ */ diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h b/jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h index 8c45addaa..79c3d33eb 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleMainStream.h @@ -27,6 +27,7 @@ #include "DocFloatImageReader.h" class OleMainStream : public OleStream { + public: struct Piece { enum PieceType { diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp index bcf7eb616..bc3cdb3c5 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.cpp @@ -17,7 +17,6 @@ * 02110-1301, USA. */ - #include #include @@ -37,7 +36,7 @@ const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HORIZONTAL_TAB = 0x0009; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_HARD_LINEBREAK = 0x000b; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_PAGE_BREAK = 0x000c; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_END_OF_PARAGRAPH = 0x000d; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SHORT_DEFIS = 0x001e; +const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_MINUS = 0x001e; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SOFT_HYPHEN = 0x001f; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_START_FIELD = 0x0013; const ZLUnicodeUtil::Ucs2Char OleStreamReader::WORD_SEPARATOR_FIELD = 0x0014; @@ -52,7 +51,7 @@ const ZLUnicodeUtil::Ucs2Char OleStreamReader::FILE_SEPARATOR = 0x1c; const ZLUnicodeUtil::Ucs2Char OleStreamReader::LINE_FEED = 0x000a; const ZLUnicodeUtil::Ucs2Char OleStreamReader::SOFT_HYPHEN = 0xad; const ZLUnicodeUtil::Ucs2Char OleStreamReader::SPACE = 0x20; -const ZLUnicodeUtil::Ucs2Char OleStreamReader::SHORT_DEFIS = 0x2D; +const ZLUnicodeUtil::Ucs2Char OleStreamReader::MINUS = 0x2D; const ZLUnicodeUtil::Ucs2Char OleStreamReader::VERTICAL_LINE = 0x7C; OleStreamReader::OleStreamReader(const std::string &encoding) : @@ -73,6 +72,26 @@ void OleStreamReader::clear() { myNextFloatImageInfoIndex = 0; } +bool OleStreamReader::readDocument(shared_ptr inputStream) { + static const std::string WORD_DOCUMENT = "WordDocument"; + + shared_ptr storage = new OleStorage; + + if (!storage->init(inputStream, inputStream->sizeOfOpened())) { + ZLLogger::Instance().println("DocBookReader", "Broken OLE file!"); + return false; + } + + OleEntry wordDocumentEntry; + bool result = storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry); + if (!result) { + return false; + } + + OleMainStream oleStream(storage, wordDocumentEntry, inputStream); + return readStream(oleStream); +} + bool OleStreamReader::readStream(OleMainStream &oleMainStream) { clear(); @@ -252,29 +271,19 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) { return false; } char *textBuffer = new char[piece.Length]; - size_t readedBytes = stream.read(textBuffer, piece.Length); - if (readedBytes != (unsigned int)piece.Length) { - ZLLogger::Instance().println("OleStreamReader", "not all bytes has been readed from piece"); + size_t readBytes = stream.read(textBuffer, piece.Length); + if (readBytes != (size_t)piece.Length) { + ZLLogger::Instance().println("OleStreamReader", "not all bytes have been read from piece"); } myBuffer.clear(); if (!piece.IsANSI) { - for (unsigned int i = 0; i < readedBytes; i += 2) { + for (size_t i = 0; i < readBytes; i += 2) { ZLUnicodeUtil::Ucs2Char ch = OleUtil::getU2Bytes(textBuffer, i); myBuffer.push_back(ch); } } else { - if (myConverter.isNull()) { - //lazy convertor loading, because documents can be in Unicode only and don't need to be converted - ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); - myConverter = collection.converter(myEncoding); - if (myConverter.isNull()) { - myConverter = collection.defaultConverter(); - } - } - std::string utf8String; - myConverter->convert(utf8String, std::string(textBuffer, readedBytes)); - ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); + dataHandler(textBuffer, readBytes); } myCurBufferPosition = 0; ++myNextPieceNumber; @@ -282,3 +291,17 @@ bool OleStreamReader::fillBuffer(OleMainStream &stream) { return true; } + +void OleStreamReader::dataHandler(const char *buffer, size_t len) { + if (myConverter.isNull()) { + // lazy converter initialization + const ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); + myConverter = collection.converter(myEncoding); + if (myConverter.isNull()) { + myConverter = collection.defaultConverter(); + } + } + std::string utf8String; + myConverter->convert(utf8String, buffer, buffer + len); + ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String); +} diff --git a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h index 89ccf998c..a2d6ad381 100644 --- a/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h +++ b/jni/NativeFormats/fbreader/src/formats/doc/OleStreamReader.h @@ -35,7 +35,7 @@ public: static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK; static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK; static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH; - static const ZLUnicodeUtil::Ucs2Char WORD_SHORT_DEFIS; + static const ZLUnicodeUtil::Ucs2Char WORD_MINUS; static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN; static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD; static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD; @@ -50,16 +50,20 @@ public: static const ZLUnicodeUtil::Ucs2Char LINE_FEED; static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN; static const ZLUnicodeUtil::Ucs2Char SPACE; - static const ZLUnicodeUtil::Ucs2Char SHORT_DEFIS; + static const ZLUnicodeUtil::Ucs2Char MINUS; static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE; public: OleStreamReader(const std::string &encoding); - - bool readStream(OleMainStream &stream); + bool readDocument(shared_ptr stream); void clear(); +private: + bool readStream(OleMainStream &stream); + protected: + virtual void dataHandler(const char *buffer, size_t len); + //virtual void parapgraphHandler(std::string paragraph) = 0; virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0; virtual void handleHardLinebreak() = 0; diff --git a/jni/NativeFormats/fbreader/src/formats/rtf/RtfReaderStream.cpp b/jni/NativeFormats/fbreader/src/formats/rtf/RtfReaderStream.cpp index ef88f6e7d..2eb8dcdff 100644 --- a/jni/NativeFormats/fbreader/src/formats/rtf/RtfReaderStream.cpp +++ b/jni/NativeFormats/fbreader/src/formats/rtf/RtfReaderStream.cpp @@ -172,4 +172,3 @@ size_t RtfReaderStream::offset() const { size_t RtfReaderStream::sizeOfOpened() { return mySize; } - diff --git a/jni/NativeFormats/zlibrary/text/src/model/ZLTextParagraph.cpp b/jni/NativeFormats/zlibrary/text/src/model/ZLTextParagraph.cpp index a5c850e34..44788914c 100644 --- a/jni/NativeFormats/zlibrary/text/src/model/ZLTextParagraph.cpp +++ b/jni/NativeFormats/zlibrary/text/src/model/ZLTextParagraph.cpp @@ -143,7 +143,8 @@ const shared_ptr ZLTextParagraph::Iterator::entry() const case ZLTextParagraphEntry::IMAGE_ENTRY: myEntry = new ImageEntry(myPointer + 2); break; - case ZLTextParagraphEntry::STYLE_ENTRY: + case ZLTextParagraphEntry::STYLE_CSS_ENTRY: + case ZLTextParagraphEntry::STYLE_OTHER_ENTRY: myEntry = new ZLTextStyleEntry(myPointer + 2); break; case ZLTextParagraphEntry::FIXED_HSPACE_ENTRY: diff --git a/jni/NativeFormats/zlibrary/text/src/model/ZLTextStyleEntry.h b/jni/NativeFormats/zlibrary/text/src/model/ZLTextStyleEntry.h index b3651f41b..ecfe8dc05 100644 --- a/jni/NativeFormats/zlibrary/text/src/model/ZLTextStyleEntry.h +++ b/jni/NativeFormats/zlibrary/text/src/model/ZLTextStyleEntry.h @@ -78,7 +78,7 @@ private: public: ZLTextStyleEntry(unsigned char entryKind); - //ZLTextStyleEntry(char *address); + //ZLTextStyleEntry(unsigned char entryKind, char *address); ~ZLTextStyleEntry(); unsigned char entryKind() const; @@ -99,7 +99,7 @@ public: void setFontFamily(const std::string &fontFamily); private: - unsigned char myEntryKind; + const unsigned char myEntryKind; unsigned short myFeatureMask; LengthType myLengths[NUMBER_OF_LENGTHS];