1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-04 02:09:35 +02:00

new engine for mobi files

This commit is contained in:
Nikolay Pultsin 2014-09-08 07:41:08 +01:00
parent a25c23f5f9
commit aa8ab30807
64 changed files with 2652 additions and 2806 deletions

View file

@ -1,4 +1,5 @@
===== 2.1 (Sep ??, 2014) ===== ===== 2.1 (Sep ??, 2014) =====
* New engine for mobipocket (a.k.a. Kindle) files
* Better background settings * Better background settings
* DjVu plugin support * DjVu plugin support
* (planned) Fixed authors list/tags list editing * (planned) Fixed authors list/tags list editing

View file

@ -69,6 +69,7 @@ LOCAL_SRC_FILES := \
NativeFormats/zlibrary/core/src/library/ZLibrary.cpp \ NativeFormats/zlibrary/core/src/library/ZLibrary.cpp \
NativeFormats/zlibrary/core/src/logger/ZLLogger.cpp \ NativeFormats/zlibrary/core/src/logger/ZLLogger.cpp \
NativeFormats/zlibrary/core/src/util/ZLFileUtil.cpp \ NativeFormats/zlibrary/core/src/util/ZLFileUtil.cpp \
NativeFormats/zlibrary/core/src/util/ZLLanguageUtil.cpp \
NativeFormats/zlibrary/core/src/util/ZLStringUtil.cpp \ NativeFormats/zlibrary/core/src/util/ZLStringUtil.cpp \
NativeFormats/zlibrary/core/src/util/ZLUnicodeUtil.cpp \ NativeFormats/zlibrary/core/src/util/ZLUnicodeUtil.cpp \
NativeFormats/zlibrary/core/src/xml/ZLAsynchronousInputStream.cpp \ NativeFormats/zlibrary/core/src/xml/ZLAsynchronousInputStream.cpp \
@ -125,6 +126,19 @@ LOCAL_SRC_FILES := \
NativeFormats/fbreader/src/formats/oeb/OEBUidReader.cpp \ NativeFormats/fbreader/src/formats/oeb/OEBUidReader.cpp \
NativeFormats/fbreader/src/formats/oeb/OPFReader.cpp \ NativeFormats/fbreader/src/formats/oeb/OPFReader.cpp \
NativeFormats/fbreader/src/formats/oeb/XHTMLImageFinder.cpp \ NativeFormats/fbreader/src/formats/oeb/XHTMLImageFinder.cpp \
NativeFormats/fbreader/src/formats/pdb/BitReader.cpp \
NativeFormats/fbreader/src/formats/pdb/DocDecompressor.cpp \
NativeFormats/fbreader/src/formats/pdb/HtmlMetainfoReader.cpp \
NativeFormats/fbreader/src/formats/pdb/HuffDecompressor.cpp \
NativeFormats/fbreader/src/formats/pdb/MobipocketHtmlBookReader.cpp \
NativeFormats/fbreader/src/formats/pdb/MobipocketPlugin.cpp \
NativeFormats/fbreader/src/formats/pdb/PalmDocLikePlugin.cpp \
NativeFormats/fbreader/src/formats/pdb/PalmDocLikeStream.cpp \
NativeFormats/fbreader/src/formats/pdb/PalmDocStream.cpp \
NativeFormats/fbreader/src/formats/pdb/PdbPlugin.cpp \
NativeFormats/fbreader/src/formats/pdb/PdbReader.cpp \
NativeFormats/fbreader/src/formats/pdb/PdbStream.cpp \
NativeFormats/fbreader/src/formats/pdb/SimplePdbPlugin.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfDescriptionReader.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfDescriptionReader.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp \

View file

@ -353,6 +353,17 @@ JNIEXPORT jint JNICALL Java_org_geometerplus_fbreader_formats_NativeFormatPlugin
return 0; return 0;
} }
extern "C"
JNIEXPORT jstring JNICALL Java_org_geometerplus_fbreader_formats_NativeFormatPlugin_readAnnotationInternal(JNIEnv* env, jobject thiz, jobject file) {
shared_ptr<FormatPlugin> plugin = findCppPlugin(thiz);
if (plugin.isNull()) {
return 0;
}
const std::string path = AndroidUtil::Method_ZLFile_getPath->callForCppString(file);
return AndroidUtil::createJavaString(env, plugin->readAnnotation(ZLFile(path)));
}
extern "C" extern "C"
JNIEXPORT void JNICALL Java_org_geometerplus_fbreader_formats_NativeFormatPlugin_readCoverInternal(JNIEnv* env, jobject thiz, jobject file, jobjectArray box) { JNIEXPORT void JNICALL Java_org_geometerplus_fbreader_formats_NativeFormatPlugin_readCoverInternal(JNIEnv* env, jobject thiz, jobject file, jobjectArray box) {
shared_ptr<FormatPlugin> plugin = findCppPlugin(thiz); shared_ptr<FormatPlugin> plugin = findCppPlugin(thiz);

View file

@ -94,10 +94,12 @@ bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::
return detected; return detected;
} }
/*
const std::string &FormatPlugin::tryOpen(const ZLFile&) const { const std::string &FormatPlugin::tryOpen(const ZLFile&) const {
static const std::string EMPTY = ""; static const std::string EMPTY = "";
return EMPTY; return EMPTY;
} }
*/
std::vector<shared_ptr<FileEncryptionInfo> > FormatPlugin::readEncryptionInfos(Book &book) const { std::vector<shared_ptr<FileEncryptionInfo> > FormatPlugin::readEncryptionInfos(Book &book) const {
return std::vector<shared_ptr<FileEncryptionInfo> >(); return std::vector<shared_ptr<FileEncryptionInfo> >();
@ -106,3 +108,7 @@ std::vector<shared_ptr<FileEncryptionInfo> > FormatPlugin::readEncryptionInfos(B
shared_ptr<const ZLImage> FormatPlugin::coverImage(const ZLFile &file) const { shared_ptr<const ZLImage> FormatPlugin::coverImage(const ZLFile &file) const {
return 0; return 0;
} }
std::string FormatPlugin::readAnnotation(const ZLFile &file) const {
return "";
}

View file

@ -53,17 +53,18 @@ protected:
public: public:
virtual ~FormatPlugin(); virtual ~FormatPlugin();
virtual bool providesMetaInfo() const = 0; virtual bool providesMetainfo() const = 0;
virtual const std::string supportedFileType() const = 0; virtual const std::string supportedFileType() const = 0;
//virtual FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file); //virtual FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
virtual const std::string &tryOpen(const ZLFile &file) const; //virtual const std::string &tryOpen(const ZLFile &file) const;
virtual bool readMetainfo(Book &book) const = 0; virtual bool readMetainfo(Book &book) const = 0;
virtual std::vector<shared_ptr<FileEncryptionInfo> > readEncryptionInfos(Book &book) const; virtual std::vector<shared_ptr<FileEncryptionInfo> > readEncryptionInfos(Book &book) const;
virtual bool readUids(Book &book) const = 0; virtual bool readUids(Book &book) const = 0;
virtual bool readLanguageAndEncoding(Book &book) const = 0; virtual bool readLanguageAndEncoding(Book &book) const = 0;
virtual bool readModel(BookModel &model) const = 0; virtual bool readModel(BookModel &model) const = 0;
virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const; virtual shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
virtual std::string readAnnotation(const ZLFile &file) const;
protected: protected:
static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false); static bool detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force = false);

View file

@ -30,7 +30,7 @@
#include "fb2/FB2Plugin.h" #include "fb2/FB2Plugin.h"
#include "html/HtmlPlugin.h" #include "html/HtmlPlugin.h"
#include "txt/TxtPlugin.h" #include "txt/TxtPlugin.h"
//#include "pdb/PdbPlugin.h" #include "pdb/PdbPlugin.h"
//#include "tcr/TcrPlugin.h" //#include "tcr/TcrPlugin.h"
#include "oeb/OEBPlugin.h" #include "oeb/OEBPlugin.h"
//#include "chm/CHMPlugin.h" //#include "chm/CHMPlugin.h"
@ -48,7 +48,7 @@ PluginCollection &PluginCollection::Instance() {
ourInstance->myPlugins.push_back(new TxtPlugin()); ourInstance->myPlugins.push_back(new TxtPlugin());
// ourInstance->myPlugins.push_back(new PluckerPlugin()); // ourInstance->myPlugins.push_back(new PluckerPlugin());
// ourInstance->myPlugins.push_back(new PalmDocPlugin()); // ourInstance->myPlugins.push_back(new PalmDocPlugin());
// ourInstance->myPlugins.push_back(new MobipocketPlugin()); ourInstance->myPlugins.push_back(new MobipocketPlugin());
// ourInstance->myPlugins.push_back(new EReaderPlugin()); // ourInstance->myPlugins.push_back(new EReaderPlugin());
// ourInstance->myPlugins.push_back(new ZTXTPlugin()); // ourInstance->myPlugins.push_back(new ZTXTPlugin());
// ourInstance->myPlugins.push_back(new TcrPlugin()); // ourInstance->myPlugins.push_back(new TcrPlugin());

View file

@ -175,7 +175,7 @@ shared_ptr<ZLTextStyleEntry> StyleSheetTable::createControl(const AttributeMap &
} else if (bold == "lighter") { } else if (bold == "lighter") {
// TODO: implement // TODO: implement
} else { } else {
num = ZLStringUtil::stringToInteger(bold, -1); num = ZLStringUtil::parseDecimal(bold, -1);
} }
if (num != -1) { if (num != -1) {
entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_BOLD, num >= 600); entry->setFontModifier(ZLTextStyleEntry::FONT_MODIFIER_BOLD, num >= 600);

View file

@ -36,7 +36,7 @@ DocPlugin::DocPlugin() {
DocPlugin::~DocPlugin() { DocPlugin::~DocPlugin() {
} }
bool DocPlugin::providesMetaInfo() const { bool DocPlugin::providesMetainfo() const {
return true; return true;
} }

View file

@ -27,7 +27,7 @@ class DocPlugin : public FormatPlugin {
public: public:
DocPlugin(); DocPlugin();
~DocPlugin(); ~DocPlugin();
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool acceptsFile(const ZLFile &file) const; bool acceptsFile(const ZLFile &file) const;

View file

@ -27,7 +27,7 @@ class FB2Plugin : public FormatPlugin {
public: public:
FB2Plugin(); FB2Plugin();
~FB2Plugin(); ~FB2Plugin();
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool readMetainfo(Book &book) const; bool readMetainfo(Book &book) const;
bool readUids(Book &book) const; bool readUids(Book &book) const;
@ -38,6 +38,6 @@ public:
inline FB2Plugin::FB2Plugin() {} inline FB2Plugin::FB2Plugin() {}
inline FB2Plugin::~FB2Plugin() {} inline FB2Plugin::~FB2Plugin() {}
inline bool FB2Plugin::providesMetaInfo() const { return true; } inline bool FB2Plugin::providesMetainfo() const { return true; }
#endif /* __FB2PLUGIN_H__ */ #endif /* __FB2PLUGIN_H__ */

View file

@ -27,7 +27,7 @@ class HtmlPlugin : public FormatPlugin {
public: public:
HtmlPlugin(); HtmlPlugin();
~HtmlPlugin(); ~HtmlPlugin();
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool readMetainfo(Book &book) const; bool readMetainfo(Book &book) const;
bool readUids(Book &book) const; bool readUids(Book &book) const;
@ -38,6 +38,6 @@ public:
inline HtmlPlugin::HtmlPlugin() {} inline HtmlPlugin::HtmlPlugin() {}
inline HtmlPlugin::~HtmlPlugin() {} inline HtmlPlugin::~HtmlPlugin() {}
inline bool HtmlPlugin::providesMetaInfo() const { return false; } inline bool HtmlPlugin::providesMetainfo() const { return false; }
#endif /* __HTMLPLUGIN_H__ */ #endif /* __HTMLPLUGIN_H__ */

View file

@ -70,7 +70,7 @@ void ContainerFileReader::startElementHandler(const char *tag, const char **attr
OEBPlugin::~OEBPlugin() { OEBPlugin::~OEBPlugin() {
} }
bool OEBPlugin::providesMetaInfo() const { bool OEBPlugin::providesMetainfo() const {
return true; return true;
} }

View file

@ -30,7 +30,7 @@ public:
public: public:
~OEBPlugin(); ~OEBPlugin();
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool readMetainfo(Book &book) const; bool readMetainfo(Book &book) const;
virtual std::vector<shared_ptr<FileEncryptionInfo> > readEncryptionInfos(Book &book) const; virtual std::vector<shared_ptr<FileEncryptionInfo> > readEncryptionInfos(Book &book) const;

View file

@ -0,0 +1,57 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <string>
#include "BitReader.h"
BitReader::BitReader(const unsigned char* data, size_t size) : myOffset(0), myLength(size * 8) {
myData = new unsigned char[size + 4];
memcpy(myData, data, size);
memset(myData + size, 0x00, 4);
}
BitReader::~BitReader() {
delete[] myData;
}
unsigned long long BitReader::peek(size_t n) {
if (n > 32) {
return 0;
}
unsigned long long r = 0;
size_t g = 0;
while (g < n) {
r = (r << 8) | myData[(myOffset + g) >> 3];
g = g + 8 - ((myOffset+g) & 7);
}
unsigned long long mask = 1;
mask = (mask << n) - 1;
return (r >> (g - n)) & mask;
}
bool BitReader::eat(size_t n) {
myOffset += n;
return myOffset <= myLength;
}
size_t BitReader::left() const {
return myLength - myOffset;
}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com> * Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -17,18 +17,23 @@
* 02110-1301, USA. * 02110-1301, USA.
*/ */
package org.geometerplus.fbreader.formats.xhtml; #ifndef __BITREADER_H__
#define __BITREADER_H__
import java.util.ArrayList; class BitReader {
import java.util.List;
public class XHTMLReader { public:
private static ArrayList<String> ourExternalDTDs = new ArrayList<String>(); BitReader(const unsigned char* data, size_t size);
~BitReader();
public static List<String> xhtmlDTDs() { unsigned long long peek(size_t n);
if (ourExternalDTDs.isEmpty()) { bool eat(size_t n);
ourExternalDTDs.add("formats/xhtml/xhtml-merged.ent"); size_t left() const;
}
return ourExternalDTDs; private:
} unsigned char* myData;
} size_t myOffset;
size_t myLength;
};
#endif //__BITREADER_H__

View file

@ -0,0 +1,103 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <ZLInputStream.h>
#include "DocDecompressor.h"
static unsigned char TOKEN_CODE[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
size_t DocDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, size_t compressedSize, size_t maxUncompressedSize) {
const unsigned char *sourceBuffer = new unsigned char[compressedSize];
const unsigned char *sourceBufferEnd = sourceBuffer + compressedSize;
const unsigned char *sourcePtr = sourceBuffer;
unsigned char *targetBufferEnd = (unsigned char*)targetBuffer + maxUncompressedSize;
unsigned char *targetPtr = (unsigned char*)targetBuffer;
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
unsigned char token;
unsigned short copyLength, N, shift;
unsigned char *shifted;
while ((sourcePtr < sourceBufferEnd) && (targetPtr < targetBufferEnd)) {
token = *(sourcePtr++);
switch (TOKEN_CODE[token]) {
case 0:
*(targetPtr++) = token;
break;
case 1:
if ((sourcePtr + token > sourceBufferEnd) || (targetPtr + token > targetBufferEnd)) {
goto endOfLoop;
}
memcpy(targetPtr, sourcePtr, token);
sourcePtr += token;
targetPtr += token;
break;
case 2:
if (targetPtr + 2 > targetBufferEnd) {
goto endOfLoop;
}
*(targetPtr++) = ' ';
*(targetPtr++) = token ^ 0x80;
break;
case 3:
if (sourcePtr + 1 > sourceBufferEnd) {
goto endOfLoop;
}
N = 256 * token + *(sourcePtr++);
copyLength = (N & 7) + 3;
if (targetPtr + copyLength > targetBufferEnd) {
goto endOfLoop;
}
shift = (N & 0x3fff) / 8;
shifted = targetPtr - shift;
if ((char*)shifted >= targetBuffer) {
for (short i = 0; i < copyLength; i++) {
*(targetPtr++) = *(shifted++);
}
}
break;
}
}
}
endOfLoop:
delete[] sourceBuffer;
return targetPtr - (unsigned char*)targetBuffer;
}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com> * Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -17,14 +17,20 @@
* 02110-1301, USA. * 02110-1301, USA.
*/ */
package org.geometerplus.zlibrary.core.html; #ifndef __DOCDECOMPRESSOR_H__
#define __DOCDECOMPRESSOR_H__
public interface ZLHtmlReader { #include <string>
public void startDocumentHandler();
public void endDocumentHandler();
public void startElementHandler(String tag, int offset, ZLHtmlAttributeMap attributes); class ZLInputStream;
public void endElementHandler(String tag);
public void byteDataHandler(byte[] ch, int start, int length); class DocDecompressor {
public void entityDataHandler(String entity);
} public:
DocDecompressor() {}
~DocDecompressor() {}
size_t decompress(ZLInputStream &stream, char *buffer, size_t compressedSize, size_t maxUncompressedSize);
};
#endif /* __DOCDECOMPRESSOR_H__ */

View file

@ -0,0 +1,89 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLUnicodeUtil.h>
#include "HtmlMetainfoReader.h"
#include "../../library/Book.h"
HtmlMetainfoReader::HtmlMetainfoReader(Book &book, ReadType readType) :
HtmlReader(book.encoding()), myBook(book), myReadType(readType) {
}
bool HtmlMetainfoReader::tagHandler(const HtmlReader::HtmlTag &tag) {
if (tag.Name == "BODY") {
return false;
} else if (((myReadType & TAGS) == TAGS) && (tag.Name == "DC:SUBJECT")) {
myReadTags = tag.Start;
if (!tag.Start && !myBuffer.empty()) {
myBook.addTag(myBuffer);
myBuffer.erase();
}
} else if (((myReadType & TITLE) == TITLE) && (tag.Name == "DC:TITLE")) {
myReadTitle = tag.Start;
if (!tag.Start && !myBuffer.empty()) {
myBook.setTitle(myBuffer);
myBuffer.erase();
}
} else if (((myReadType & AUTHOR) == AUTHOR) && (tag.Name == "DC:CREATOR")) {
if (tag.Start) {
bool flag = false;
for (size_t i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "ROLE") {
flag = ZLUnicodeUtil::toUpper(tag.Attributes[i].Value) == "AUT";
break;
}
}
if (flag) {
if (!myBuffer.empty()) {
myBuffer += ", ";
}
myReadAuthor = true;
}
} else {
myReadAuthor = false;
if (!myBuffer.empty()) {
myBook.addAuthor(myBuffer);
}
myBuffer.erase();
}
}
return true;
}
void HtmlMetainfoReader::startDocumentHandler() {
myReadAuthor = false;
myReadTitle = false;
myReadTags = false;
}
void HtmlMetainfoReader::endDocumentHandler() {
}
bool HtmlMetainfoReader::characterDataHandler(const char *text, size_t len, bool convert) {
if (myReadTitle || myReadAuthor || myReadTags) {
if (convert) {
myConverter->convert(myBuffer, text, text + len);
} else {
myBuffer.append(text, len);
}
}
return true;
}

View file

@ -0,0 +1,60 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __HTMLMETAINFOREADER_H__
#define __HTMLMETAINFOREADER_H__
#include "../html/HtmlReader.h"
class Book;
class HtmlMetainfoReader : public HtmlReader {
public:
enum ReadType {
NONE = 0,
TITLE = 1,
AUTHOR = 2,
TITLE_AND_AUTHOR = TITLE | AUTHOR,
TAGS = 4,
ALL = TITLE | AUTHOR | TAGS
};
public:
HtmlMetainfoReader(Book &book, ReadType readType);
private:
void startDocumentHandler();
void endDocumentHandler();
bool tagHandler(const HtmlTag &tag);
bool characterDataHandler(const char *text, size_t len, bool convert);
private:
Book &myBook;
const ReadType myReadType;
bool myReadTitle;
bool myReadAuthor;
bool myReadTags;
std::string myBuffer;
};
#endif /* __HTMLMETAINFOREADER_H__ */

View file

@ -0,0 +1,191 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <ZLInputStream.h>
#include "PdbReader.h"
#include "BitReader.h"
#include "HuffDecompressor.h"
HuffDecompressor::HuffDecompressor(ZLInputStream& stream,
const std::vector<unsigned long>::const_iterator beginIt,
const std::vector<unsigned long>::const_iterator endIt,
const unsigned long endHuffDataOffset, const unsigned long extraFlags) : myExtraFlags(extraFlags), myErrorCode(ERROR_NONE) {
const unsigned long huffHeaderOffset = *beginIt;
const unsigned long huffRecordsNumber = endIt - beginIt;
const unsigned long huffDataOffset = *(beginIt + 1);
stream.seek(huffHeaderOffset, true);
stream.seek(16, false);
const unsigned long cacheTableOffset = PdbUtil::readUnsignedLongBE(stream);
const unsigned long baseTableOffset = PdbUtil::readUnsignedLongBE(stream);
myCacheTable = new unsigned long[256];
stream.seek(huffHeaderOffset + cacheTableOffset, true);
for (size_t i = 0; i < 256; ++i) {
myCacheTable[i] = PdbUtil::readUnsignedLongLE(stream); //LE
}
myBaseTable = new unsigned long[64];
stream.seek(huffHeaderOffset + baseTableOffset, true);
for (size_t i = 0; i < 64; ++i) {
myBaseTable[i] = PdbUtil::readUnsignedLongLE(stream); //LE
}
stream.seek(huffDataOffset + 12, true);
myEntryBits = PdbUtil::readUnsignedLongBE(stream);
size_t huffDataSize = endHuffDataOffset - huffDataOffset;
myData = new unsigned char[huffDataSize];
stream.seek(huffDataOffset, true);
if (huffDataSize == stream.read((char*)myData, huffDataSize)) {
myDicts = new unsigned char* [huffRecordsNumber - 1];
for(size_t i = 0; i < huffRecordsNumber - 1; ++i) {
size_t shift = *(beginIt + i + 1) - huffDataOffset;
myDicts[i] = myData + shift;
}
} else {
myErrorCode = ERROR_CORRUPTED_FILE;
}
myTargetBuffer = 0;
myTargetBufferEnd = 0;
myTargetBufferPtr = 0;
}
HuffDecompressor::~HuffDecompressor() {
delete[] myCacheTable;
delete[] myBaseTable;
delete[] myData;
delete[] myDicts;
}
bool HuffDecompressor::error() const {
return myErrorCode == ERROR_CORRUPTED_FILE;
}
size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, size_t compressedSize, size_t maxUncompressedSize) {
if ((compressedSize == 0) || (myErrorCode == ERROR_CORRUPTED_FILE)) {
return 0;
}
if (targetBuffer != 0) {
unsigned char *sourceBuffer = new unsigned char[compressedSize];
myTargetBuffer = targetBuffer;
myTargetBufferEnd = targetBuffer + maxUncompressedSize;
myTargetBufferPtr = targetBuffer;
if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) {
size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize);
if (trailSize < compressedSize) {
bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize));
} else {
myErrorCode = ERROR_CORRUPTED_FILE;
}
}
delete[] sourceBuffer;
} else {
myTargetBuffer = 0;
myTargetBufferEnd = 0;
myTargetBufferPtr = 0;
}
return myTargetBufferPtr - myTargetBuffer;
}
void HuffDecompressor::bitsDecompress(BitReader bits, size_t depth) {
if (depth > 32) {
myErrorCode = ERROR_CORRUPTED_FILE;
return;
}
while (bits.left()) {
const unsigned long dw = (unsigned long)bits.peek(32);
const unsigned long v = myCacheTable[dw >> 24];
unsigned long codelen = v & 0x1F;
//if ((codelen == 0) || (codelen > 32)) {
// return false;
//}
unsigned long code = dw >> (32 - codelen);
unsigned long r = (v >> 8);
if (!(v & 0x80)) {
while (code < myBaseTable[(codelen - 1) * 2]) {
codelen += 1;
code = dw >> (32 - codelen);
}
r = myBaseTable[(codelen - 1) * 2 + 1];
}
r -= code;
//if (codelen == 0) {
// return false;
//}
if (!bits.eat(codelen)) {
return;
}
const unsigned long dicno = r >> myEntryBits;
const unsigned long off1 = 16 + (r - (dicno << myEntryBits)) * 2;
const unsigned char* dict = myDicts[dicno]; //TODO need index check
const unsigned long off2 = 16 + dict[off1] * 256 + dict[off1 + 1]; //TODO need index check
const unsigned long blen = dict[off2] * 256 + dict[off2 + 1]; //TODO need index check
const unsigned char* slice = dict + off2 + 2;
const unsigned long sliceSize = blen & 0x7fff;
if (blen & 0x8000) {
if (myTargetBufferPtr + sliceSize < myTargetBufferEnd) {
memcpy(myTargetBufferPtr, slice, sliceSize);
myTargetBufferPtr += sliceSize;
} else {
return;
}
} else {
bitsDecompress(BitReader(slice, sliceSize), depth + 1);
}
}
}
size_t HuffDecompressor::sizeOfTrailingEntries(unsigned char* data, size_t size) const {
size_t num = 0;
size_t flags = myExtraFlags >> 1;
while (flags) {
if (flags & 1) {
if (num < size) {
num += readVariableWidthIntegerBE(data, size - num);
}
}
flags >>= 1;
}
return num;
}
size_t HuffDecompressor::readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const {
unsigned char bitsSaved = 0;
size_t result = 0;
while (true) {
const unsigned char oneByte = ptr[psize - 1];
result |= (oneByte & 0x7F) << bitsSaved;
bitsSaved += 7;
psize -= 1;
if (((oneByte & 0x80) != 0) || (bitsSaved >= 28) || (psize == 0)) {
return result;
}
}
}

View file

@ -0,0 +1,63 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __HUFFDECOMPRESSOR_H__
#define __HUFFDECOMPRESSOR_H__
#include <string>
class ZLInputStream;
class BitReader;
class HuffDecompressor {
public:
HuffDecompressor(ZLInputStream& stream,
const std::vector<unsigned long>::const_iterator beginHuffRecordOffsetIt,
const std::vector<unsigned long>::const_iterator endHuffRecordOffsetIt,
const unsigned long endHuffDataOffset, const unsigned long extraFlags);
~HuffDecompressor();
size_t decompress(ZLInputStream &stream, char *buffer, size_t compressedSize, size_t maxUncompressedSize);
bool error() const;
private:
size_t sizeOfTrailingEntries(unsigned char* data, size_t size) const;
size_t readVariableWidthIntegerBE(unsigned char* ptr, size_t psize) const;
void bitsDecompress(BitReader bits, size_t depth = 0);
private:
unsigned long myEntryBits;
unsigned long myExtraFlags;
unsigned long* myCacheTable;
unsigned long* myBaseTable;
unsigned char* myData;
unsigned char** myDicts;
char* myTargetBuffer;
char* myTargetBufferEnd;
char* myTargetBufferPtr;
enum {
ERROR_NONE,
ERROR_CORRUPTED_FILE
} myErrorCode;
};
#endif /* __HUFFDECOMPRESSOR_H__ */

View file

@ -0,0 +1,362 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstdlib>
#include <algorithm>
#include <ZLFile.h>
#include <ZLFileImage.h>
#include <ZLStringUtil.h>
#include <ZLUnicodeUtil.h>
#include "MobipocketHtmlBookReader.h"
#include "PalmDocStream.h"
#include "../html/HtmlTagActions.h"
#include "../../bookmodel/BookModel.h"
class MobipocketHtmlImageTagAction : public HtmlTagAction {
public:
MobipocketHtmlImageTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
class MobipocketHtmlHrTagAction : public HtmlTagAction {
public:
MobipocketHtmlHrTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
class MobipocketHtmlHrefTagAction : public HtmlHrefTagAction {
public:
MobipocketHtmlHrefTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
class MobipocketHtmlGuideTagAction : public HtmlTagAction {
public:
MobipocketHtmlGuideTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
class MobipocketHtmlReferenceTagAction : public HtmlTagAction {
public:
MobipocketHtmlReferenceTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
class MobipocketHtmlPagebreakTagAction : public HtmlTagAction {
public:
MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader);
void run(const HtmlReader::HtmlTag &tag);
};
MobipocketHtmlImageTagAction::MobipocketHtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
if (tag.Start) {
int index = -1;
for (unsigned int i = 0; i < tag.Attributes.size() && index < 0; ++i) {
const std::string aName = tag.Attributes[i].Name;
if (aName == "RECINDEX") {
index = ZLStringUtil::parseDecimal(tag.Attributes[i].Value, -1);
} else if (aName == "SRC") {
static const std::string KINDLE_EMBED_PREFIX = "kindle:embed:";
std::string aValue = tag.Attributes[i].Value;
if (ZLStringUtil::stringStartsWith(aValue, KINDLE_EMBED_PREFIX)) {
aValue = aValue.substr(KINDLE_EMBED_PREFIX.length());
const size_t q = aValue.find('?');
if (q != std::string::npos) {
aValue = aValue.substr(0, q);
}
index = ZLStringUtil::parseHex(aValue, -1);
}
}
}
if (index >= 0) {
((MobipocketHtmlBookReader&)myReader).myImageIndexes.insert(index);
bool stopParagraph = bookReader().paragraphIsOpen();
if (stopParagraph) {
bookReader().endParagraph();
}
bookReader().addImageReference(ZLStringUtil::numberToString(index), 0, false);
if (stopParagraph) {
bookReader().beginParagraph();
}
}
}
}
MobipocketHtmlHrTagAction::MobipocketHtmlHrTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
void MobipocketHtmlHrTagAction::run(const HtmlReader::HtmlTag &tag) {
if (tag.Start) {
if (bookReader().contentsParagraphIsOpen()) {
bookReader().endContentsParagraph();
bookReader().exitTitle();
}
bookReader().insertEndOfSectionParagraph();
}
}
MobipocketHtmlHrefTagAction::MobipocketHtmlHrefTagAction(HtmlBookReader &reader) : HtmlHrefTagAction(reader) {
}
MobipocketHtmlPagebreakTagAction::MobipocketHtmlPagebreakTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
void MobipocketHtmlPagebreakTagAction::run(const HtmlReader::HtmlTag &tag) {
if (tag.Start) {
if (bookReader().contentsParagraphIsOpen()) {
bookReader().endContentsParagraph();
bookReader().exitTitle();
}
bookReader().insertEndOfSectionParagraph();
}
}
MobipocketHtmlBookReader::TOCReader::TOCReader(MobipocketHtmlBookReader &reader) : myReader(reader) {
reset();
}
void MobipocketHtmlBookReader::TOCReader::reset() {
myEntries.clear();
myIsActive = false;
myStartOffset = (size_t)-1;
myEndOffset = (size_t)-1;
myCurrentEntryText.erase();
}
bool MobipocketHtmlBookReader::TOCReader::rangeContainsPosition(size_t position) {
return (myStartOffset <= position) && (myEndOffset > position);
}
void MobipocketHtmlBookReader::TOCReader::startReadEntry(size_t position) {
myCurrentReference = position;
myIsActive = true;
}
void MobipocketHtmlBookReader::TOCReader::endReadEntry() {
if (myIsActive && !myCurrentEntryText.empty()) {
std::string converted;
myReader.myConverter->convert(converted, myCurrentEntryText);
myReader.myConverter->reset();
myEntries[myCurrentReference] = converted;
myCurrentEntryText.erase();
}
myIsActive = false;
}
void MobipocketHtmlBookReader::TOCReader::appendText(const char *text, size_t len) {
if (myIsActive) {
myCurrentEntryText.append(text, len);
}
}
void MobipocketHtmlBookReader::TOCReader::addReference(size_t position, const std::string &text) {
myEntries[position] = text;
if (rangeContainsPosition(position)) {
setEndOffset(position);
}
}
void MobipocketHtmlBookReader::TOCReader::setStartOffset(size_t position) {
myStartOffset = position;
std::map<size_t,std::string>::const_iterator it = myEntries.lower_bound(position);
if (it != myEntries.end()) {
++it;
if (it != myEntries.end()) {
myEndOffset = it->first;
}
}
}
void MobipocketHtmlBookReader::TOCReader::setEndOffset(size_t position) {
myEndOffset = position;
}
const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries() const {
return myEntries;
}
void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
if (tag.Start) {
for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "FILEPOS") {
const std::string &value = tag.Attributes[i].Value;
if (!value.empty()) {
std::string label = "&";
int intValue = atoi(value.c_str());
if (intValue > 0) {
if (reader.myTocReader.rangeContainsPosition(tag.Offset)) {
reader.myTocReader.startReadEntry(intValue);
if (reader.myTocReader.rangeContainsPosition(intValue)) {
reader.myTocReader.setEndOffset(intValue);
}
}
reader.myFileposReferences.insert(intValue);
ZLStringUtil::appendNumber(label, intValue);
setHyperlinkType(INTERNAL_HYPERLINK);
bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, label);
return;
}
}
}
}
} else {
reader.myTocReader.endReadEntry();
}
HtmlHrefTagAction::run(tag);
}
MobipocketHtmlGuideTagAction::MobipocketHtmlGuideTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
void MobipocketHtmlGuideTagAction::run(const HtmlReader::HtmlTag &tag) {
MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
reader.myInsideGuide = tag.Start;
}
MobipocketHtmlReferenceTagAction::MobipocketHtmlReferenceTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
if (reader.myInsideGuide) {
std::string title;
std::string filepos;
bool isTocReference = false;
for (size_t i = 0; i < tag.Attributes.size(); ++i) {
const std::string &name = tag.Attributes[i].Name;
const std::string &value = tag.Attributes[i].Value;
if (name == "TITLE") {
title = value;
} else if (name == "FILEPOS") {
filepos = value;
} else if ((name == "TYPE") && (ZLUnicodeUtil::toUpper(value) == "TOC")) {
isTocReference = true;
}
}
if (!title.empty() && !filepos.empty()) {
int position = atoi(filepos.c_str());
if (position > 0) {
reader.myTocReader.addReference(position, title);
if (isTocReference) {
reader.myTocReader.setStartOffset(position);
}
}
}
}
}
shared_ptr<HtmlTagAction> MobipocketHtmlBookReader::createAction(const std::string &tag) {
if (tag == "IMG") {
return new MobipocketHtmlImageTagAction(*this);
} else if (tag == "HR") {
return new MobipocketHtmlHrTagAction(*this);
} else if (tag == "A") {
return new MobipocketHtmlHrefTagAction(*this);
} else if (tag == "GUIDE") {
return new MobipocketHtmlGuideTagAction(*this);
} else if (tag == "REFERENCE") {
return new MobipocketHtmlReferenceTagAction(*this);
} else if (tag == "MBP:PAGEBREAK") {
return new MobipocketHtmlPagebreakTagAction(*this);
}
return HtmlBookReader::createAction(tag);
}
void MobipocketHtmlBookReader::startDocumentHandler() {
HtmlBookReader::startDocumentHandler();
myInsideGuide = false;
myFileposReferences.clear();
myPositionToParagraphMap.clear();
myTocReader.reset();
}
bool MobipocketHtmlBookReader::tagHandler(const HtmlTag &tag) {
size_t paragraphNumber = myBookReader.model().bookTextModel()->paragraphsNumber();
if (myBookReader.paragraphIsOpen()) {
--paragraphNumber;
}
myPositionToParagraphMap.push_back(std::make_pair(tag.Offset, paragraphNumber));
return HtmlBookReader::tagHandler(tag);
}
MobipocketHtmlBookReader::MobipocketHtmlBookReader(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlBookReader("", model, format, encoding), myFileName(file.path()), myTocReader(*this) {
setBuildTableOfContent(false);
setProcessPreTag(false);
}
bool MobipocketHtmlBookReader::characterDataHandler(const char *text, size_t len, bool convert) {
myTocReader.appendText(text, len);
return HtmlBookReader::characterDataHandler(text, len, convert);
}
void MobipocketHtmlBookReader::readDocument(ZLInputStream &stream) {
HtmlBookReader::readDocument(stream);
PalmDocStream &pdStream = (PalmDocStream&)stream;
for (std::set<int>::const_iterator it = myImageIndexes.begin(); it != myImageIndexes.end(); ++it) {
std::pair<int,int> imageLocation = pdStream.imageLocation(pdStream.header(), *it - 1);
if (imageLocation.first > 0 && imageLocation.second > 0) {
myBookReader.addImage(
ZLStringUtil::numberToString(*it),
new ZLFileImage(ZLFile(myFileName), "", imageLocation.first, imageLocation.second)
);
}
}
std::vector<std::pair<size_t,size_t> >::const_iterator jt = myPositionToParagraphMap.begin();
for (std::set<size_t>::const_iterator it = myFileposReferences.begin(); it != myFileposReferences.end(); ++it) {
while (jt != myPositionToParagraphMap.end() && jt->first < *it) {
++jt;
}
if (jt == myPositionToParagraphMap.end()) {
break;
}
std::string label = "&";
ZLStringUtil::appendNumber(label, *it);
myBookReader.addHyperlinkLabel(label, jt->second);
}
jt = myPositionToParagraphMap.begin();
const std::map<size_t,std::string> &entries = myTocReader.entries();
for (std::map<size_t,std::string>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
while (jt != myPositionToParagraphMap.end() && jt->first < it->first) {
++jt;
}
if (jt == myPositionToParagraphMap.end()) {
break;
}
myBookReader.beginContentsParagraph(jt->second);
myBookReader.addContentsData(it->second);
myBookReader.endContentsParagraph();
}
}

View file

@ -0,0 +1,89 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __MOBIPOCKETHTMLBOOKREADER_H__
#define __MOBIPOCKETHTMLBOOKREADER_H__
#include <set>
#include "../html/HtmlBookReader.h"
class MobipocketHtmlBookReader : public HtmlBookReader {
public:
MobipocketHtmlBookReader(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding);
void readDocument(ZLInputStream &stream);
private:
void startDocumentHandler();
bool tagHandler(const HtmlTag &tag);
bool characterDataHandler(const char *text, size_t len, bool convert);
shared_ptr<HtmlTagAction> createAction(const std::string &tag);
public:
class TOCReader {
public:
TOCReader(MobipocketHtmlBookReader &reader);
void reset();
void addReference(size_t position, const std::string &text);
void setStartOffset(size_t position);
void setEndOffset(size_t position);
bool rangeContainsPosition(size_t position);
void startReadEntry(size_t position);
void endReadEntry();
void appendText(const char *text, size_t len);
const std::map<size_t,std::string> &entries() const;
private:
MobipocketHtmlBookReader &myReader;
std::map<size_t,std::string> myEntries;
bool myIsActive;
size_t myStartOffset;
size_t myEndOffset;
size_t myCurrentReference;
std::string myCurrentEntryText;
};
private:
std::set<int> myImageIndexes;
const std::string myFileName;
std::vector<std::pair<size_t,size_t> > myPositionToParagraphMap;
std::set<size_t> myFileposReferences;
bool myInsideGuide;
TOCReader myTocReader;
friend class MobipocketHtmlImageTagAction;
friend class MobipocketHtmlHrefTagAction;
friend class MobipocketHtmlGuideTagAction;
friend class MobipocketHtmlReferenceTagAction;
friend class MobipocketHtmlPagebreakTagAction;
friend class TOCReader;
};
#endif /* __MOBIPOCKETHTMLBOOKREADER_H__ */

View file

@ -0,0 +1,311 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include <ZLInputStream.h>
#include <ZLEncodingConverter.h>
#include <ZLStringUtil.h>
#include <ZLLanguageUtil.h>
#include <ZLImage.h>
#include <ZLFileImage.h>
#include <ZLLogger.h>
#include "PdbPlugin.h"
#include "PalmDocStream.h"
#include "MobipocketHtmlBookReader.h"
#include "../../library/Book.h"
const std::string MobipocketPlugin::supportedFileType() const {
return "Mobipocket";
}
//bool MobipocketPlugin::acceptsFile(const ZLFile &file) const {
// return PdbPlugin::fileType(file) == "BOOKMOBI";
//}
void MobipocketPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
MobipocketHtmlBookReader(file, model, format, encoding).readDocument(stream);
}
bool MobipocketPlugin::readMetainfo(Book &book) const {
shared_ptr<ZLInputStream> stream = book.file().inputStream();
if (stream.isNull() || ! stream->open()) {
return false;
}
PdbHeader header;
if (!header.read(stream)) {
return false;
}
stream->seek(header.Offsets[0] + 16, true);
char test[5];
test[4] = '\0';
stream->read(test, 4);
static const std::string MOBI = "MOBI";
if (MOBI != test) {
return PalmDocLikePlugin::readMetainfo(book);
}
const unsigned long length = PdbUtil::readUnsignedLongBE(*stream);
stream->seek(4, false);
const unsigned long encodingCode = PdbUtil::readUnsignedLongBE(*stream);
if (book.encoding().empty()) {
shared_ptr<ZLEncodingConverter> converter =
ZLEncodingCollection::Instance().converter(encodingCode);
if (!converter.isNull()) {
book.setEncoding(converter->name());
}
}
stream->seek(52, false);
const unsigned long fullNameOffset = PdbUtil::readUnsignedLongBE(*stream);
const unsigned long fullNameLength = PdbUtil::readUnsignedLongBE(*stream);
const unsigned long languageCode = PdbUtil::readUnsignedLongBE(*stream);
const std::string lang =
ZLLanguageUtil::languageByIntCode(languageCode & 0xFF, (languageCode >> 8) & 0xFF);
if (lang != "") {
book.setLanguage(lang);
}
stream->seek(32, false);
const unsigned long exthFlags = PdbUtil::readUnsignedLongBE(*stream);
if (exthFlags & 0x40) {
stream->seek(header.Offsets[0] + 16 + length, true);
stream->read(test, 4);
static const std::string EXTH = "EXTH";
if (EXTH == test) {
stream->seek(4, false);
const unsigned long recordsNum = PdbUtil::readUnsignedLongBE(*stream);
for (unsigned long i = 0; i < recordsNum; ++i) {
const unsigned long type = PdbUtil::readUnsignedLongBE(*stream);
const unsigned long size = PdbUtil::readUnsignedLongBE(*stream);
if (size > 8) {
std::string value(size - 8, '\0');
stream->read((char*)value.data(), size - 8);
switch (type) {
case 100: // author
{
int index = value.find(',');
if (index != -1) {
std::string part0 = value.substr(0, index);
std::string part1 = value.substr(index + 1);
ZLStringUtil::stripWhiteSpaces(part0);
ZLStringUtil::stripWhiteSpaces(part1);
value = part1 + ' ' + part0;
} else {
ZLStringUtil::stripWhiteSpaces(value);
}
book.addAuthor(value);
break;
}
case 105: // subject
book.addTag(value);
break;
}
}
}
}
}
stream->seek(header.Offsets[0] + fullNameOffset, true);
std::string title(fullNameLength, '\0');
stream->read((char*)title.data(), fullNameLength);
book.setTitle(title);
stream->close();
return PalmDocLikePlugin::readMetainfo(book);
}
shared_ptr<const ZLImage> MobipocketPlugin::coverImage(const ZLFile &file) const {
shared_ptr<ZLInputStream> stream = file.inputStream();
if (stream.isNull() || ! stream->open()) {
return 0;
}
PdbHeader header;
if (!header.read(stream)) {
return 0;
}
stream->seek(header.Offsets[0] + 16, true);
char test[5];
test[4] = '\0';
stream->read(test, 4);
static const std::string MOBI = "MOBI";
if (MOBI != test) {
return 0;
}
const unsigned long length = PdbUtil::readUnsignedLongBE(*stream);
stream->seek(104, false);
const unsigned long exthFlags = PdbUtil::readUnsignedLongBE(*stream);
unsigned long coverIndex = (unsigned long)-1;
unsigned long thumbIndex = (unsigned long)-1;
if (exthFlags & 0x40) {
stream->seek(header.Offsets[0] + 16 + length, true);
stream->read(test, 4);
static const std::string EXTH = "EXTH";
if (EXTH != test) {
return 0;
}
stream->seek(4, false);
const unsigned long recordsNum = PdbUtil::readUnsignedLongBE(*stream);
for (unsigned long i = 0; i < recordsNum; ++i) {
const unsigned long type = PdbUtil::readUnsignedLongBE(*stream);
const unsigned long size = PdbUtil::readUnsignedLongBE(*stream);
switch (type) {
case 201: // coveroffset
if (size == 12) {
coverIndex = PdbUtil::readUnsignedLongBE(*stream);
} else {
stream->seek(size - 8, false);
}
break;
case 202: // thumboffset
if (size == 12) {
thumbIndex = PdbUtil::readUnsignedLongBE(*stream);
} else {
stream->seek(size - 8, false);
}
break;
default:
stream->seek(size - 8, false);
break;
}
}
}
stream->close();
if (coverIndex == (unsigned long)-1) {
if (thumbIndex == (unsigned long)-1) {
return 0;
}
coverIndex = thumbIndex;
}
PalmDocStream pbStream(file);
if (!pbStream.open()) {
return 0;
}
std::pair<int,int> imageLocation = pbStream.imageLocation(pbStream.header(), coverIndex);
if (imageLocation.first > 0 && imageLocation.second > 0) {
return new ZLFileImage(
file,
"",
imageLocation.first,
imageLocation.second
);
}
return 0;
}
bool MobipocketPlugin::readLanguageAndEncoding(Book &book) const {
shared_ptr<ZLInputStream> stream = book.file().inputStream();
if (stream.isNull() || ! stream->open()) {
return false;
}
PdbHeader header;
if (!header.read(stream)) {
return false;
}
stream->seek(header.Offsets[0] + 16, true);
if (PdbUtil::readUnsignedLongBE(*stream) != 0x4D4F4249) /* "MOBI" */ {
return false;
}
stream->seek(8, false);
const unsigned long encodingCode = PdbUtil::readUnsignedLongBE(*stream);
shared_ptr<ZLEncodingConverter> converter =
ZLEncodingCollection::Instance().converter(encodingCode);
book.setEncoding(converter.isNull() ? "utf-8" : converter->name());
stream->seek(60, false);
const unsigned long languageCode = PdbUtil::readUnsignedLongBE(*stream);
const std::string lang =
ZLLanguageUtil::languageByIntCode(languageCode & 0xFF, (languageCode >> 8) & 0xFF);
if (lang != "") {
book.setLanguage(lang);
}
return true;
}
std::string MobipocketPlugin::readAnnotation(const ZLFile &file) const {
shared_ptr<ZLInputStream> stream = file.inputStream();
if (stream.isNull() || ! stream->open()) {
return "";
}
PdbHeader header;
if (!header.read(stream)) {
return "";
}
stream->seek(header.Offsets[0] + 16, true);
char test[5];
test[4] = '\0';
stream->read(test, 4);
static const std::string MOBI = "MOBI";
if (MOBI != test) {
return "";
}
std::string annotation;
const unsigned long length = PdbUtil::readUnsignedLongBE(*stream);
stream->seek(104, false);
const unsigned long exthFlags = PdbUtil::readUnsignedLongBE(*stream);
if (exthFlags & 0x40) {
stream->seek(header.Offsets[0] + 16 + length, true);
stream->read(test, 4);
static const std::string EXTH = "EXTH";
if (EXTH != test) {
return 0;
}
stream->seek(4, false);
const unsigned long recordsNum = PdbUtil::readUnsignedLongBE(*stream);
for (unsigned long i = 0; i < recordsNum; ++i) {
const unsigned long type = PdbUtil::readUnsignedLongBE(*stream);
const unsigned long size = PdbUtil::readUnsignedLongBE(*stream);
switch (type) {
case 103: // description
if (size > 8) {
std::string value(size - 8, '\0');
stream->read((char*)value.data(), size - 8);
annotation = value;
} else {
stream->seek(size - 8, false);
}
break;
default:
stream->seek(size - 8, false);
break;
}
}
}
stream->close();
return annotation;
}
bool MobipocketPlugin::readUids(Book &/*book*/) const {
return true;
}

View file

@ -0,0 +1,42 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include "PdbPlugin.h"
#include "PalmDocStream.h"
#include "PalmDocLikeStream.h"
#include "../../library/Book.h"
bool PalmDocLikePlugin::providesMetainfo() const {
return true;
}
shared_ptr<ZLInputStream> PalmDocLikePlugin::createStream(const ZLFile &file) const {
return new PalmDocStream(file);
}
/*
const std::string &PalmDocLikePlugin::tryOpen(const ZLFile &file) const {
PalmDocStream stream(file);
stream.open();
return stream.error();
}
*/

View file

@ -0,0 +1,80 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
//#include <ZLResource.h>
#include "PalmDocLikeStream.h"
PalmDocLikeStream::PalmDocLikeStream(const ZLFile &file) : PdbStream(file) {
}
PalmDocLikeStream::~PalmDocLikeStream() {
close();
}
bool PalmDocLikeStream::open() {
myErrorCode = ERROR_NONE;
if (!PdbStream::open()) {
myErrorCode = ERROR_UNKNOWN;
return false;
}
if (!processZeroRecord()) {
return false;
}
myBuffer = new char[myMaxRecordSize];
myRecordIndex = 0;
return true;
}
bool PalmDocLikeStream::fillBuffer() {
while (myBufferOffset == myBufferLength) {
if (myRecordIndex + 1 > myMaxRecordIndex) {
return false;
}
++myRecordIndex;
if (!processRecord()) {
return false;
}
}
//myBufferOffset = 0;
return true;
}
/*
const std::string &PalmDocLikeStream::error() const {
static const ZLResource &resource = ZLResource::resource("mobipocketPlugin");
switch (myErrorCode) {
default:
{
static const std::string EMPTY;
return EMPTY;
}
case ERROR_UNKNOWN:
return resource["unknown"].value();
case ERROR_COMPRESSION:
return resource["unsupportedCompressionMethod"].value();
case ERROR_ENCRYPTION:
return resource["encryptedFile"].value();
}
}
*/

View file

@ -0,0 +1,58 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __PALMDOCLIKESTREAM_H__
#define __PALMDOCLIKESTREAM_H__
#include "PdbStream.h"
class ZLFile;
class PalmDocLikeStream : public PdbStream {
public:
PalmDocLikeStream(const ZLFile &file);
~PalmDocLikeStream();
bool open();
//const std::string &error() const;
//std::pair<int,int> imageLocation(int index);
//bool hasExtraSections() const;
protected:
bool fillBuffer();
private:
virtual bool processRecord() = 0;
virtual bool processZeroRecord() = 0;
protected:
unsigned short myMaxRecordSize;
size_t myRecordIndex;
size_t myMaxRecordIndex;
enum {
ERROR_NONE,
ERROR_UNKNOWN,
ERROR_COMPRESSION,
ERROR_ENCRYPTION,
} myErrorCode;
};
#endif /* __PALMDOCLIKESTREAM_H__ */

View file

@ -0,0 +1,174 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <algorithm>
#include <ZLFile.h>
#include "PalmDocStream.h"
#include "DocDecompressor.h"
#include "HuffDecompressor.h"
PalmDocStream::PalmDocStream(const ZLFile &file) : PalmDocLikeStream(file) {
}
PalmDocStream::~PalmDocStream() {
close();
}
bool PalmDocStream::processRecord() {
const size_t currentOffset = recordOffset(myRecordIndex);
if (currentOffset < myBase->offset()) {
return false;
}
myBase->seek(currentOffset, true);
const size_t nextOffset = recordOffset(myRecordIndex + 1);
if (nextOffset < currentOffset) {
return false;
}
const unsigned short recordSize = nextOffset - currentOffset;
switch (myCompressionVersion) {
case 17480://'DH' // HuffCDic compression
myBufferLength = myHuffDecompressorPtr->decompress(*myBase, myBuffer, recordSize, myMaxRecordSize);
//if (myHuffDecompressorPtr->error()) {
// myErrorCode = ERROR_UNKNOWN;
//}
break;
case 2: // PalmDoc compression
myBufferLength = DocDecompressor().decompress(*myBase, myBuffer, recordSize, myMaxRecordSize);
break;
case 1: // No compression
myBufferLength = myBase->read(myBuffer, std::min(recordSize, myMaxRecordSize));
break;
}
myBufferOffset = 0;
return true;
}
bool PalmDocStream::processZeroRecord() {
// Uses with offset presetting to zero record offset value
myCompressionVersion = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 2
switch (myCompressionVersion) {
case 1:
case 2:
case 17480:
break;
default:
myErrorCode = ERROR_COMPRESSION;
return false;
}
myBase->seek(2, false); // myBase offset: ^ + 4
myTextLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 8
myTextRecordNumber = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 10
unsigned short endSectionIndex = header().Offsets.size();
myMaxRecordIndex = std::min(myTextRecordNumber, (unsigned short)(endSectionIndex - 1));
//TODO Insert in this point error message about uncompatible records and numRecords from Header
myMaxRecordSize = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 12
if (myMaxRecordSize == 0) {
myErrorCode = ERROR_UNKNOWN;
return false;
}
/*
std::cerr << "PalmDocStream::processRecord0():\n";
std::cerr << "PDB header indentificator : " << header().Id << "\n";
std::cerr << "PDB file system: sizeof opened : " << myBaseSize << "\n";
std::cerr << "PDB header/record[0] max index : " << myMaxRecordIndex << "\n";
std::cerr << "PDB record[0][0..2] compression : " << myCompressionVersion << "\n";
std::cerr << "PDB record[0][2..4] spare : " << mySpare << "\n";
std::cerr << "PDB record[0][4..8] text length : " << myTextLength << "\n";
std::cerr << "PDB record[0][8..10] text records : " << myTextRecords << "\n";
std::cerr << "PDB record[0][10..12] max record size: " << myMaxRecordSize << "\n";
*/
if (header().Id == "BOOKMOBI") {
const unsigned short encrypted = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 14
if (encrypted) { //Always = 2, if encrypted
myErrorCode = ERROR_ENCRYPTION;
return false;
}
} else {
myBase->seek(2, false);
}
myBase->seek(94, false);
myImageStartIndex = PdbUtil::readUnsignedLongBE(*myBase);
if (myCompressionVersion == 17480) {
unsigned long mobiHeaderLength;
unsigned long huffSectionIndex;
unsigned long huffSectionNumber;
unsigned short extraFlags;
unsigned long initialOffset = header().Offsets[0];
myBase->seek(initialOffset + 20, true); // myBase offset: ^ + 20
mobiHeaderLength = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 24
myBase->seek(0x70 - 24, false); // myBase offset: ^ + 102 (0x70)
huffSectionIndex = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 106 (0x74)
huffSectionNumber = PdbUtil::readUnsignedLongBE(*myBase); // myBase offset: ^ + 110 (0x78)
if (mobiHeaderLength >= 244) {
myBase->seek(0xF2 - 0x78, false); // myBase offset: ^ + 242 (0xF2)
extraFlags = PdbUtil::readUnsignedShort(*myBase); // myBase offset: ^ + 244 (0xF4)
} else {
extraFlags = 0;
}
/*
std::cerr << "mobi header length: " << mobiHeaderLength << "\n";
std::cerr << "Huff's start record : " << huffSectionIndex << " from " << endSectionIndex - 1 << "\n";
std::cerr << "Huff's records number: " << huffSectionNumber << "\n";
std::cerr << "Huff's extraFlags : " << extraFlags << "\n";
*/
const unsigned long endHuffSectionIndex = huffSectionIndex + huffSectionNumber;
if (endHuffSectionIndex > endSectionIndex || huffSectionNumber <= 1) {
myErrorCode = ERROR_COMPRESSION;
return false;
}
const unsigned long endHuffDataOffset = recordOffset(endHuffSectionIndex);
std::vector<unsigned long>::const_iterator beginHuffSectionOffsetIt = header().Offsets.begin() + huffSectionIndex;
// point to first Huff section
std::vector<unsigned long>::const_iterator endHuffSectionOffsetIt = header().Offsets.begin() + endHuffSectionIndex;
// point behind last Huff section
myHuffDecompressorPtr = new HuffDecompressor(*myBase, beginHuffSectionOffsetIt, endHuffSectionOffsetIt, endHuffDataOffset, extraFlags);
myBase->seek(initialOffset + 14, true); // myBase offset: ^ + 14
}
return true;
}
bool PalmDocStream::hasExtraSections() const {
return myMaxRecordIndex < header().Offsets.size() - 1;
}
std::pair<int,int> PalmDocStream::imageLocation(const PdbHeader &header, int index) const {
index += myImageStartIndex;
int recordNumber = header.Offsets.size();
if (index > recordNumber - 1) {
return std::make_pair(-1, -1);
} else {
int start = header.Offsets[index];
int end = (index < recordNumber - 1) ?
header.Offsets[index + 1] : myBase->offset();
return std::make_pair(start, end - start);
}
}

View file

@ -0,0 +1,50 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __PALMDOCSTREAM_H__
#define __PALMDOCSTREAM_H__
#include "PalmDocLikeStream.h"
class ZLFile;
class HuffDecompressor;
class PalmDocStream : public PalmDocLikeStream {
public:
PalmDocStream(const ZLFile &file);
~PalmDocStream();
std::pair<int,int> imageLocation(const PdbHeader &header, int index) const;
bool hasExtraSections() const;
private:
bool processRecord();
bool processZeroRecord();
private:
unsigned short myCompressionVersion;
unsigned long myTextLength; //TODO: Warning: isn't used
unsigned short myTextRecordNumber;
unsigned short myImageStartIndex;
shared_ptr<HuffDecompressor> myHuffDecompressorPtr;
};
#endif /* __PALMDOCSTREAM_H__ */

View file

@ -0,0 +1,66 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include <ZLInputStream.h>
//#include <ZLOptions.h>
#include "PdbPlugin.h"
//#include "../../options/FBCategoryKey.h"
//#include "../../database/booksdb/BooksDBUtil.h"
//#include "../../database/booksdb/BooksDB.h"
PdbPlugin::~PdbPlugin() {
}
/*
std::string PdbPlugin::fileType(const ZLFile &file) {
const std::string &extension = file.extension();
if ((extension != "prc") && (extension != "pdb") && (extension != "mobi")) {
return "";
}
const std::string &fileName = file.path();
//int index = fileName.find(':');
//ZLFile baseFile = (index == -1) ? file : ZLFile(fileName.substr(0, index));
ZLFile baseFile(file.physicalFilePath());
bool upToDate = BooksDBUtil::checkInfo(baseFile);
//ZLStringOption palmTypeOption(FBCategoryKey::BOOKS, file.path(), "PalmType", "");
std::string palmType = BooksDB::Instance().getPalmType(fileName);
if ((palmType.length() != 8) || !upToDate) {
shared_ptr<ZLInputStream> stream = file.inputStream();
if (stream.isNull() || !stream->open()) {
return "";
}
stream->seek(60, false);
char id[8];
stream->read(id, 8);
stream->close();
palmType = std::string(id, 8);
if (!upToDate) {
BooksDBUtil::saveInfo(baseFile);
}
//palmTypeOption.setValue(palmType);
BooksDB::Instance().setPalmType(fileName, palmType);
}
return palmType;
}
*/

View file

@ -0,0 +1,129 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __PDBPLUGIN_H__
#define __PDBPLUGIN_H__
#include <shared_ptr.h>
#include "../FormatPlugin.h"
class PdbPlugin : public FormatPlugin {
public:
static std::string fileType(const ZLFile &file);
protected:
PdbPlugin();
public:
virtual ~PdbPlugin();
};
/*
class PluckerPlugin : public PdbPlugin {
public:
bool providesMetainfo() const;
bool acceptsFile(const ZLFile &file) const;
bool readMetainfo(Book &book) const;
bool readModel(BookModel &model) const;
};
*/
class SimplePdbPlugin : public PdbPlugin {
public:
bool readMetainfo(Book &book) const;
bool readModel(BookModel &model) const;
protected:
virtual shared_ptr<ZLInputStream> createStream(const ZLFile &file) const = 0;
virtual void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
};
class PalmDocLikePlugin : public SimplePdbPlugin {
public:
bool providesMetainfo() const;
const std::string &tryOpen(const ZLFile &file) const;
protected:
shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
};
class PalmDocPlugin : public PalmDocLikePlugin {
public:
//bool acceptsFile(const ZLFile &file) const;
void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
//private:
//FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
};
class MobipocketPlugin : public PalmDocLikePlugin {
private:
//bool acceptsFile(const ZLFile &file) const;
const std::string supportedFileType() const;
//virtual FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
//virtual const std::string &tryOpen(const ZLFile &file) const;
bool readUids(Book &book) const;
bool readLanguageAndEncoding(Book &book) const;
bool readMetainfo(Book &book) const;
void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
shared_ptr<const ZLImage> coverImage(const ZLFile &file) const;
std::string readAnnotation(const ZLFile &file) const;
};
/*
class EReaderPlugin : public SimplePdbPlugin {
public:
bool providesMetainfo() const;
bool acceptsFile(const ZLFile &file) const;
bool readMetainfo(Book &book) const;
const std::string &tryOpen(const ZLFile &file) const;
void readDocumentInternal(const ZLFile &file, BookModel &model, const class PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const;
protected:
shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
};
class ZTXTPlugin : public SimplePdbPlugin {
public:
bool providesMetainfo() const;
bool acceptsFile(const ZLFile &file) const;
protected:
shared_ptr<ZLInputStream> createStream(const ZLFile &file) const;
private:
FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
};
*/
inline PdbPlugin::PdbPlugin() {}
#endif /* __PDBPLUGIN_H__ */

View file

@ -0,0 +1,106 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include "PdbReader.h"
unsigned short PdbUtil::readUnsignedShort(ZLInputStream &stream) {
unsigned char data[2];
stream.read((char*)data, 2);
return (((unsigned short)data[0]) << 8) + data[1];
}
unsigned long PdbUtil::readUnsignedLongBE(ZLInputStream &stream) {
unsigned char data[4];
stream.read((char*)data, 4);
return (((unsigned long)data[0]) << 24) +
(((unsigned long)data[1]) << 16) +
(((unsigned long)data[2]) << 8) +
(unsigned long)data[3];
}
unsigned long PdbUtil::readUnsignedLongLE(ZLInputStream &stream) {
unsigned char data[4];
stream.read((char*)data, 4);
return (((unsigned long)data[3]) << 24) +
(((unsigned long)data[2]) << 16) +
(((unsigned long)data[1]) << 8) +
(unsigned long)data[0];
}
bool PdbHeader::read(shared_ptr<ZLInputStream> stream) {
const size_t startOffset = stream->offset();
DocName.erase();
DocName.append(32, '\0');
stream->read((char*)DocName.data(), 32); // stream offset: +32
Flags = PdbUtil::readUnsignedShort(*stream); // stream offset: +34
stream->seek(26, false); // stream offset: +60
Id.erase();
Id.append(8, '\0');
stream->read((char*)Id.data(), 8); // stream offset: +68
stream->seek(8, false); // stream offset: +76
Offsets.clear();
const unsigned short numRecords = PdbUtil::readUnsignedShort(*stream); // stream offset: +78
Offsets.reserve(numRecords);
for (int i = 0; i < numRecords; ++i) { // stream offset: +78 + 8 * records number
const unsigned long recordOffset = PdbUtil::readUnsignedLongBE(*stream);
Offsets.push_back(recordOffset);
stream->seek(4, false);
}
return stream->offset() == startOffset + 78 + 8 * numRecords;
}
/*bool PdbRecord0::read(shared_ptr<ZLInputStream> stream) {
size_t startOffset = stream->offset();
CompressionType = PdbUtil::readUnsignedShort(*stream);
Spare = PdbUtil::readUnsignedShort(*stream);
TextLength = PdbUtil::readUnsignedLongBE(*stream);
TextRecords = PdbUtil::readUnsignedShort(*stream);
MaxRecordSize = PdbUtil::readUnsignedShort(*stream);
NontextOffset = PdbUtil::readUnsignedShort(*stream);
NontextOffset2 = PdbUtil::readUnsignedShort(*stream);
MobipocketID = PdbUtil::readUnsignedLongBE(*stream);
MobipocketHeaderSize = PdbUtil::readUnsignedLongBE(*stream);
Unknown24 = PdbUtil::readUnsignedLongBE(*stream);
FootnoteRecs = PdbUtil::readUnsignedShort(*stream);
SidebarRecs = PdbUtil::readUnsignedShort(*stream);
BookmarkOffset = PdbUtil::readUnsignedShort(*stream);
Unknown34 = PdbUtil::readUnsignedShort(*stream);
NontextOffset3 = PdbUtil::readUnsignedShort(*stream);
Unknown38 = PdbUtil::readUnsignedShort(*stream);
ImagedataOffset = PdbUtil::readUnsignedShort(*stream);
ImagedataOffset2 = PdbUtil::readUnsignedShort(*stream);
MetadataOffset = PdbUtil::readUnsignedShort(*stream);
MetadataOffset2 = PdbUtil::readUnsignedShort(*stream);
FootnoteOffset = PdbUtil::readUnsignedShort(*stream);
SidebarOffset = PdbUtil::readUnsignedShort(*stream);
LastDataOffset = PdbUtil::readUnsignedShort(*stream);
Unknown54 = PdbUtil::readUnsignedShort(*stream);
return stream->offset() == startOffset + 56;
}*/

View file

@ -0,0 +1,82 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __PDBREADER_H__
#define __PDBREADER_H__
#include <vector>
#include <shared_ptr.h>
#include <ZLInputStream.h>
//class BookModel;
class PdbUtil {
public:
static unsigned short readUnsignedShort(ZLInputStream &stream);
static unsigned long readUnsignedLongBE(ZLInputStream &stream);
static unsigned long readUnsignedLongLE(ZLInputStream &stream);
};
struct PdbHeader {
std::string DocName;
unsigned short Flags;
std::string Id;
std::vector<unsigned long> Offsets;
bool read(shared_ptr<ZLInputStream> stream);
};
struct PdbRecord0 {
unsigned short CompressionType; //[0..2] PalmDoc, Mobipocket, Ereader:version
unsigned short Spare; //[2..4] PalmDoc, Mobipocket
unsigned long TextLength; //[4..8] PalmDoc, Mobipocket
unsigned short TextRecords; //[8..10] PalmDoc, Mobipocket
unsigned short MaxRecordSize; //[10..12] PalmDoc, Mobipocket
unsigned short NontextOffset; //[12..14] Ereader
unsigned short NontextOffset2; //[14..16] Ereader //PalmDoc, Mobipocket: encrypted - there is conflict !!!!
unsigned long MobipocketID; //[16..20] Mobipocket
unsigned long MobipocketHeaderSize;//[20..24] Mobipocket
unsigned long Unknown24; //[24..28]
unsigned short FootnoteRecs; //[28..30] Ereader
unsigned short SidebarRecs; //[30..32] Ereader
// Following fields are specific for EReader pdb document specification
unsigned short BookmarkOffset; //[32..34]
unsigned short Unknown34; //[34..36]
unsigned short NontextOffset3; //[36..38]
unsigned short Unknown38; //[38..40]
unsigned short ImagedataOffset; //[40..42]
unsigned short ImagedataOffset2; //[42..44]
unsigned short MetadataOffset; //[44..46]
unsigned short MetadataOffset2; //[46..48]
unsigned short FootnoteOffset; //[48..50]
unsigned short SidebarOffset; //[50..52]
unsigned short LastDataOffset; //[52..54]
unsigned short Unknown54; //[54..56]
bool read(shared_ptr<ZLInputStream> stream);
//private:
// static bool readNumberBE(unsigned char* buffer, size_t offset, size_t size);
};
#endif /* __PDBREADER_H__ */

View file

@ -0,0 +1,109 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <ZLFile.h>
#include "PdbStream.h"
PdbStream::PdbStream(const ZLFile &file) : myBase(file.inputStream()) {
myBuffer = 0;
}
PdbStream::~PdbStream() {
}
bool PdbStream::open() {
close();
if (myBase.isNull() || !myBase->open() || !myHeader.read(myBase)) {
return false;
}
// myBase offset: startOffset + 78 + 8 * records number ( myHeader.Offsets.size() )
myBase->seek(myHeader.Offsets[0], true);
// myBase offset: Offset[0] - zero record
myBufferLength = 0;
myBufferOffset = 0;
myOffset = 0;
return true;
}
size_t PdbStream::read(char *buffer, size_t maxSize) {
size_t realSize = 0;
while (realSize < maxSize) {
if (!fillBuffer()) {
break;
}
size_t size = std::min((size_t)(maxSize - realSize), (size_t)(myBufferLength - myBufferOffset));
if (size > 0) {
if (buffer != 0) {
memcpy(buffer + realSize, myBuffer + myBufferOffset, size);
}
realSize += size;
myBufferOffset += size;
}
}
myOffset += realSize;
return realSize;
}
void PdbStream::close() {
if (!myBase.isNull()) {
myBase->close();
}
if (myBuffer != 0) {
delete[] myBuffer;
myBuffer = 0;
}
}
void PdbStream::seek(int offset, bool absoluteOffset) {
if (absoluteOffset) {
offset -= this->offset();
}
if (offset > 0) {
read(0, offset);
} else if (offset < 0) {
offset += this->offset();
open();
if (offset >= 0) {
read(0, offset);
}
}
}
size_t PdbStream::offset() const {
return myOffset;
}
size_t PdbStream::sizeOfOpened() {
// TODO: implement
return 0;
}
size_t PdbStream::recordOffset(size_t index) const {
return index < myHeader.Offsets.size() ?
myHeader.Offsets[index] : myBase->sizeOfOpened();
}

View file

@ -0,0 +1,72 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __PDBSTREAM_H__
#define __PDBSTREAM_H__
#include <ZLInputStream.h>
#include "PdbReader.h"
class ZLFile;
class PdbStream : public ZLInputStream {
public:
PdbStream(const ZLFile &file);
virtual ~PdbStream();
protected:
virtual bool open();
virtual void close();
private:
size_t read(char *buffer, size_t maxSize);
void seek(int offset, bool absoluteOffset);
size_t offset() const;
size_t sizeOfOpened();
protected:
virtual bool fillBuffer() = 0;
protected:
size_t recordOffset(size_t index) const;
public:
const PdbHeader &header() const;
protected:
shared_ptr<ZLInputStream> myBase;
size_t myOffset;
private:
PdbHeader myHeader;
protected:
char *myBuffer;
unsigned short myBufferLength;
unsigned short myBufferOffset;
};
inline const PdbHeader &PdbStream::header() const {
return myHeader;
}
#endif /* __PDBSTREAM_H__ */

View file

@ -0,0 +1,78 @@
/*
* Copyright (C) 2004-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include <ZLInputStream.h>
#include "PdbPlugin.h"
#include "../txt/TxtBookReader.h"
#include "../html/HtmlBookReader.h"
#include "HtmlMetainfoReader.h"
//#include "../util/TextFormatDetector.h"
#include "../../bookmodel/BookModel.h"
#include "../../library/Book.h"
bool SimplePdbPlugin::readMetainfo(Book &book) const {
const ZLFile &file = book.file();
shared_ptr<ZLInputStream> stream = createStream(file);
detectEncodingAndLanguage(book, *stream);
if (book.encoding().empty()) {
return false;
}
int readType = HtmlMetainfoReader::NONE;
if (book.title().empty()) {
readType |= HtmlMetainfoReader::TITLE;
}
if (book.authors().empty()) {
readType |= HtmlMetainfoReader::AUTHOR;
}
if (readType != HtmlMetainfoReader::NONE) {
//if ((readType != HtmlMetainfoReader::NONE) && TextFormatDetector().isHtml(*stream)) {
readType |= HtmlMetainfoReader::TAGS;
HtmlMetainfoReader metainfoReader(book, (HtmlMetainfoReader::ReadType)readType);
metainfoReader.readDocument(*stream);
}
return true;
}
bool SimplePdbPlugin::readModel(BookModel &model) const {
const Book &book = *model.book();
const ZLFile &file = book.file();
shared_ptr<ZLInputStream> stream = createStream(file);
PlainTextFormat format(file);
/*
if (!format.initialized()) {
PlainTextFormatDetector detector;
detector.detect(*stream, format);
}
*/
readDocumentInternal(file, model, format, book.encoding(), *stream);
return true;
}
void SimplePdbPlugin::readDocumentInternal(const ZLFile&, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
//if (TextFormatDetector().isHtml(stream)) {
HtmlBookReader("", model, format, encoding).readDocument(stream);
//} else {
//TxtBookReader(model, format, encoding).readDocument(stream);
//}
}

View file

@ -29,7 +29,7 @@
#include "../../bookmodel/BookModel.h" #include "../../bookmodel/BookModel.h"
#include "../../library/Book.h" #include "../../library/Book.h"
bool RtfPlugin::providesMetaInfo() const { bool RtfPlugin::providesMetainfo() const {
return false; return false;
} }

View file

@ -25,7 +25,7 @@
class RtfPlugin : public FormatPlugin { class RtfPlugin : public FormatPlugin {
public: public:
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool readMetainfo(Book &book) const; bool readMetainfo(Book &book) const;
bool readUids(Book &book) const; bool readUids(Book &book) const;

View file

@ -30,7 +30,7 @@
TxtPlugin::~TxtPlugin() { TxtPlugin::~TxtPlugin() {
} }
bool TxtPlugin::providesMetaInfo() const { bool TxtPlugin::providesMetainfo() const {
return false; return false;
} }

View file

@ -26,7 +26,7 @@ class TxtPlugin : public FormatPlugin {
public: public:
~TxtPlugin(); ~TxtPlugin();
bool providesMetaInfo() const; bool providesMetainfo() const;
const std::string supportedFileType() const; const std::string supportedFileType() const;
bool readMetainfo(Book &book) const; bool readMetainfo(Book &book) const;
bool readUids(Book &book) const; bool readUids(Book &book) const;

View file

@ -0,0 +1,161 @@
/*
* Copyright (C) 2008-2010 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include "ZLLanguageUtil.h"
std::string ZLLanguageUtil::languageByIntCode(unsigned char languageCode, unsigned char subLanguageCode) {
switch (languageCode) {
default: return "";
case 0x01: return "ar"; // Arabic
case 0x02: return "bg"; // Bulgarian
case 0x03: return "ca"; // Catalan
case 0x04: return "zh"; // Chinese
case 0x05: return "cs"; // Czech
case 0x06: return "da"; // Danish
case 0x07: return "de"; // German
case 0x08: return "el"; // Greek
case 0x09: return "en"; // English
case 0x0A: return "es"; // Spanish
case 0x0B: return "fi"; // Finnish
case 0x0C: return "fr"; // French
case 0x0D: return "he"; // Hebrew
case 0x0E: return "hu"; // Hungarian
case 0x0F: return "is"; // Icelandic
case 0x10: return "it"; // Italian
case 0x11: return "ja"; // Japanese
case 0x12: return "ko"; // Korean
case 0x13: return "nl"; // Dutch
case 0x14: return "no"; // Norwegian
case 0x15: return "pl"; // Polish
case 0x16: return "pt"; // Portuguese
case 0x17: return "rm"; // Romansh
case 0x18: return "ro"; // Romanian
case 0x19: return "ru"; // Russian
case 0x1A:
switch (subLanguageCode) {
default: return "sr"; // Serbian
case 0x04:
case 0x10: return "hr"; // Croatian
case 0x14:
case 0x20:
case 0x78: return "bs"; // Bosnian
}
case 0x1B: return "sk"; // Slovak
case 0x1C: return "sq"; // Albanian
case 0x1D: return "sv"; // Swedish
case 0x1E: return "th"; // Thai
case 0x1F: return "tr"; // Turkish
case 0x20: return "ur"; // Urdu
case 0x21: return "id"; // Indonesian
case 0x22: return "uk"; // Ukrainian
case 0x23: return "be"; // Belarusian
case 0x24: return "sl"; // Slovenian
case 0x25: return "et"; // Estonian
case 0x26: return "lv"; // Latvian
case 0x27: return "lt"; // Lithuanian
case 0x28: return "tg"; // Tajik
case 0x29: return "fa"; // Persian (Farsi)
case 0x2A: return "vi"; // Vietnamese
case 0x2B: return "hy"; // Armenian
case 0x2C: return "az"; // Azeri
case 0x2D: return "eu"; // Basque
case 0x2E: return (subLanguageCode == 0x08)
? "dsb" // Lower Sorbian
: "wen"; // Upper Sorbian
case 0x2F: return "mk"; // Makedonian
case 0x32: return "tn"; // Setswana/Tswana
case 0x34: return "xh"; // Xhosa/isiXhosa
case 0x35: return "zu"; // Zulu/isiZulu
case 0x36: return "af"; // Afrikaans
case 0x37: return "ka"; // Georgian
case 0x38: return "fo"; // Faeroese
case 0x39: return "hi"; // Hindi
case 0x3A: return "mt"; // Maltese
case 0x3B: return "se"; // Sami
case 0x3C: return "ga"; // Irish
case 0x3E: return "ms"; // Malay
case 0x3F: return "kk"; // Kazak
case 0x40: return "ky"; // Kyrgyz
case 0x41: return "sw"; // Swahili
case 0x42: return "tk"; // Turkmen
case 0x43: return "uz"; // Uzbek
case 0x44: return "tt"; // Tatar
case 0x45: return "bn"; // Bengali
case 0x46: return "pa"; // Punjabi
case 0x47: return "gu"; // Gujaratu
case 0x48: return "or"; // Oriya
case 0x49: return "ta"; // Tamil
case 0x4A: return "te"; // Telugi
case 0x4B: return "kn"; // Kannada
case 0x4C: return "ml"; // Malayalam
case 0x4D: return "as"; // Assamese
case 0x4E: return "mr"; // Marathi
case 0x4F: return "sa"; // Sanskrit
case 0x50: return "mn"; // Mongolian
case 0x51: return "bo"; // Tibetian
case 0x52: return "cy"; // Welsh
case 0x53: return "kh"; // Khmer
case 0x54: return "lo"; // Lao
case 0x56: return "gl"; // Galician
case 0x57: return "kok"; // Konkani
case 0x58: return "mni"; // Manipuri
case 0x59: return "sd"; // Sindhi
case 0x5A: return "syr"; // Syriac
case 0x5B: return "si"; // Sinhala
case 0x5D: return "iu"; // Inuktitut
case 0x5E: return "am"; // Amharic
case 0x5F: return "tzm"; // Tamazight
case 0x60: return "ks"; // Kashmiri
case 0x61: return "ne"; // Nepali
case 0x62: return "fy"; // Frisian
case 0x63: return "ps"; // Pashto
case 0x64: return "fil"; // Filipino
case 0x65: return "dv"; // Divehi
case 0x68: return "ha"; // Hausa
case 0x6A: return "yo"; // Yoruba
case 0x6B: return "quz"; // Quechua
case 0x6C: return "ns"; // Northern Sotho
case 0x6D: return "ba"; // Bashkir
case 0x6E: return "lb"; // Luxemburgish
case 0x6F: return "kl"; // Greenlandic
case 0x70: return "ig"; // Igbo
case 0x73: return "ti"; // Tigrinya
case 0x78: return "yi"; // Yi
case 0x7A: return "arn"; // Mapudungun
case 0x7C: return "moh"; // Mohawk
case 0x7E: return "be"; // Breton
case 0x80: return "ug"; // Uighur
case 0x81: return "mi"; // Maori
case 0x82: return "oc"; // Occitan
case 0x83: return "co"; // Corsican
case 0x84: return "gsw"; // Alsatian
case 0x85: return "sah"; // Yakut
case 0x86: return "qut"; // K'iche
case 0x87: return "rw"; // Kinyarwanda
case 0x88: return "wo"; // Wolof
case 0x8C: return "prs"; // Dari
case 0x8D: return "mg"; // Malagasy
}
}
bool ZLLanguageUtil::isRTLLanguage(const std::string &languageCode) {
return
(languageCode == "ar") ||
(languageCode == "he");
}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com> * Copyright (C) 2008-2010 Geometer Plus <contact@geometerplus.com>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -17,21 +17,19 @@
* 02110-1301, USA. * 02110-1301, USA.
*/ */
package org.geometerplus.zlibrary.core.html; #ifndef __ZLLANGUAGEUTIL_H__
#define __ZLLANGUAGEUTIL_H__
import java.io.InputStream; #include <string>
import java.io.IOException;
public abstract class ZLHtmlProcessor { class ZLLanguageUtil {
public static boolean read(ZLHtmlReader reader, InputStream stream) {
try { public:
ZLHtmlParser parser = new ZLHtmlParser(reader, stream); static std::string languageByIntCode(unsigned char languageCode, unsigned char subLanguageCode);
reader.startDocumentHandler(); static bool isRTLLanguage(const std::string &languageCode);
parser.doIt();
reader.endDocumentHandler(); private:
} catch (IOException e) { ZLLanguageUtil();
return false; };
}
return true; #endif /* __ZLLANGUAGEUTIL_H__ */
}
}

View file

@ -146,7 +146,7 @@ double ZLStringUtil::stringToDouble(const std::string &str, double defaultValue)
} }
} }
int ZLStringUtil::stringToInteger(const std::string &str, int defaultValue) { int ZLStringUtil::parseDecimal(const std::string &str, int defaultValue) {
if (str.empty()) { if (str.empty()) {
return defaultValue; return defaultValue;
} }
@ -162,3 +162,18 @@ int ZLStringUtil::stringToInteger(const std::string &str, int defaultValue) {
return std::atoi(str.c_str()); return std::atoi(str.c_str());
} }
unsigned long ZLStringUtil::parseHex(const std::string &str, int defaultValue) {
if (str.empty()) {
return defaultValue;
}
for (std::size_t i = 0; i < str.length(); ++i) {
if (!std::isxdigit(str[i])) {
return defaultValue;
}
}
char *ptr;
return std::strtol(str.c_str(), &ptr, 16);
}

View file

@ -44,7 +44,8 @@ public:
static std::string doubleToString(double value); static std::string doubleToString(double value);
static double stringToDouble(const std::string &value, double defaultValue); static double stringToDouble(const std::string &value, double defaultValue);
static int stringToInteger(const std::string &str, int defaultValue); static int parseDecimal(const std::string &str, int defaultValue);
static unsigned long parseHex(const std::string &str, int defaultValue);
}; };
#endif /* __ZLSTRINGUTIL_H__ */ #endif /* __ZLSTRINGUTIL_H__ */

View file

@ -128,10 +128,11 @@ public class NativeFormatPlugin extends BuiltinFormatPlugin {
@Override @Override
public String readAnnotation(ZLFile file) { public String readAnnotation(ZLFile file) {
// TODO: implement in native code (?) return readAnnotationInternal(file);
return null;
} }
protected native String readAnnotationInternal(ZLFile file);
@Override @Override
public Type type() { public Type type() {
return Type.NATIVE; return Type.NATIVE;

View file

@ -26,7 +26,6 @@ import android.os.Build;
import org.geometerplus.zlibrary.core.filesystem.ZLFile; import org.geometerplus.zlibrary.core.filesystem.ZLFile;
import org.geometerplus.zlibrary.core.filetypes.*; import org.geometerplus.zlibrary.core.filetypes.*;
import org.geometerplus.fbreader.formats.pdb.MobipocketPlugin;
import org.geometerplus.fbreader.formats.external.DjVuPlugin; import org.geometerplus.fbreader.formats.external.DjVuPlugin;
import org.geometerplus.fbreader.formats.external.PDFPlugin; import org.geometerplus.fbreader.formats.external.PDFPlugin;
@ -60,7 +59,6 @@ public class PluginCollection {
} }
private PluginCollection() { private PluginCollection() {
addPlugin(new MobipocketPlugin());
if (Build.VERSION.SDK_INT >= 8) { if (Build.VERSION.SDK_INT >= 8) {
addPlugin(new DjVuPlugin()); addPlugin(new DjVuPlugin());
addPlugin(new PDFPlugin()); addPlugin(new PDFPlugin());

View file

@ -1,362 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.html;
import java.util.HashMap;
import java.io.*;
import java.nio.charset.*;
import org.geometerplus.fbreader.bookmodel.BookModel;
import org.geometerplus.fbreader.bookmodel.BookReader;
import org.geometerplus.fbreader.bookmodel.FBTextKind;
import org.geometerplus.zlibrary.core.html.*;
import org.geometerplus.zlibrary.core.util.ZLArrayUtils;
import org.geometerplus.zlibrary.text.model.ZLTextParagraph;
import org.geometerplus.zlibrary.core.xml.ZLXMLProcessor;
import org.geometerplus.fbreader.formats.xhtml.XHTMLReader;
public class HtmlReader extends BookReader implements ZLHtmlReader {
private final byte[] myStyleTable = new byte[HtmlTag.TAG_NUMBER];
{
myStyleTable[HtmlTag.H1] = FBTextKind.H1;
myStyleTable[HtmlTag.H2] = FBTextKind.H2;
myStyleTable[HtmlTag.H3] = FBTextKind.H3;
myStyleTable[HtmlTag.H4] = FBTextKind.H4;
myStyleTable[HtmlTag.H5] = FBTextKind.H5;
myStyleTable[HtmlTag.H6] = FBTextKind.H6;
myStyleTable[HtmlTag.B] = FBTextKind.BOLD;
myStyleTable[HtmlTag.SUB] = FBTextKind.SUB;
myStyleTable[HtmlTag.SUP] = FBTextKind.SUP;
myStyleTable[HtmlTag.S] = FBTextKind.STRIKETHROUGH;
myStyleTable[HtmlTag.PRE] = FBTextKind.PREFORMATTED;
myStyleTable[HtmlTag.EM] = FBTextKind.EMPHASIS;
myStyleTable[HtmlTag.DFN] = FBTextKind.DEFINITION;
myStyleTable[HtmlTag.CITE] = FBTextKind.CITE;
myStyleTable[HtmlTag.CODE] = FBTextKind.CODE;
myStyleTable[HtmlTag.STRONG] = FBTextKind.STRONG;
myStyleTable[HtmlTag.I] = FBTextKind.ITALIC;
}
protected final CharsetDecoder myAttributeDecoder;
private boolean myInsideTitle = false;
private boolean mySectionStarted = false;
private byte myHyperlinkType;
private final char[] SPACE = { ' ' };
private String myHrefAttribute = "href";
private boolean myOrderedListIsStarted = false;
//private boolean myUnorderedListIsStarted = false;
private int myOLCounter = 0;
private byte[] myControls = new byte[10];
private byte myControlsNumber = 0;
public HtmlReader(BookModel model) throws UnsupportedEncodingException {
super(model);
try {
//String encoding = model.Book.getEncoding();
myAttributeDecoder = createDecoder();
setByteDecoder(createDecoder());
} catch (UnsupportedCharsetException e) {
throw new UnsupportedEncodingException(e.getMessage());
}
}
protected final CharsetDecoder createDecoder() throws UnsupportedEncodingException {
return Charset.forName(Model.Book.getEncoding()).newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
public boolean readBook() throws IOException {
return ZLHtmlProcessor.read(this, getInputStream());
}
public InputStream getInputStream() throws IOException {
return Model.Book.File.getInputStream();
}
public void startDocumentHandler() {
}
public void endDocumentHandler() {
unsetCurrentTextModel();
}
public void byteDataHandler(byte[] data, int start, int length) {
addByteData(data, start, length);
}
private HashMap<String,char[]> myEntityMap;
public void entityDataHandler(String entity) {
if (myEntityMap == null) {
myEntityMap = new HashMap<String,char[]>(ZLXMLProcessor.getEntityMap(XHTMLReader.xhtmlDTDs()));
}
char[] data = myEntityMap.get(entity);
if (data == null) {
if ((entity.length() > 0) && (entity.charAt(0) == '#')) {
try {
int number;
if (entity.charAt(1) == 'x') {
number = Integer.parseInt(entity.substring(2), 16);
} else {
number = Integer.parseInt(entity.substring(1));
}
data = new char[] { (char)number };
} catch (NumberFormatException e) {
}
}
if (data == null) {
data = new char[0];
}
myEntityMap.put(entity, data);
}
addData(data);
}
private void openControl(byte control) {
addControl(control, true);
if (myControlsNumber == myControls.length) {
myControls = ZLArrayUtils.createCopy(myControls, myControlsNumber, 2 * myControlsNumber);
}
myControls[myControlsNumber++] = control;
}
private void closeControl(byte control) {
for (int i = 0; i < myControlsNumber; i++) {
addControl(myControls[i], false);
}
boolean flag = false;
int removedControl = myControlsNumber;
for (int i = 0; i < myControlsNumber; i++) {
if (!flag && (myControls[i] == control)) {
flag = true;
removedControl = i;
continue;
}
addControl(myControls[i], true);
}
if (removedControl == myControlsNumber) {
return;
}
--myControlsNumber;
for (int i = removedControl; i < myControlsNumber; i++) {
myControls[i] = myControls[i + 1];
}
}
private void startNewParagraph() {
endParagraph();
beginParagraph(ZLTextParagraph.Kind.TEXT_PARAGRAPH);
}
public final void endElementHandler(String tagName) {
endElementHandler(HtmlTag.getTagByName(tagName));
}
public void endElementHandler(byte tag) {
switch (tag) {
case HtmlTag.SCRIPT:
case HtmlTag.SELECT:
case HtmlTag.STYLE:
case HtmlTag.P:
case HtmlTag.DIV:
case HtmlTag.BLOCKQUOTE:
startNewParagraph();
break;
case HtmlTag.H1:
case HtmlTag.H2:
case HtmlTag.H3:
case HtmlTag.H4:
case HtmlTag.H5:
case HtmlTag.H6:
case HtmlTag.PRE:
closeControl(myStyleTable[tag]);
startNewParagraph();
break;
case HtmlTag.A:
closeControl(myHyperlinkType);
break;
case HtmlTag.BODY:
break;
case HtmlTag.HTML:
//unsetCurrentTextModel();
break;
case HtmlTag.B:
case HtmlTag.S:
case HtmlTag.SUB:
case HtmlTag.SUP:
case HtmlTag.EM:
case HtmlTag.DFN:
case HtmlTag.CITE:
case HtmlTag.CODE:
case HtmlTag.STRONG:
case HtmlTag.I:
closeControl(myStyleTable[tag]);
break;
case HtmlTag.FONT:
// TODO: implement
break;
case HtmlTag.OL:
myOrderedListIsStarted = false;
myOLCounter = 0;
break;
case HtmlTag.UL:
//myUnorderedListIsStarted = false;
break;
default:
break;
}
}
public final void startElementHandler(String tagName, int offset, ZLHtmlAttributeMap attributes) {
startElementHandler(HtmlTag.getTagByName(tagName), offset, attributes);
}
public void startElementHandler(byte tag, int offset, ZLHtmlAttributeMap attributes) {
switch (tag) {
case HtmlTag.HTML:
break;
case HtmlTag.BODY:
setMainTextModel();
pushKind(FBTextKind.REGULAR);
beginParagraph(ZLTextParagraph.Kind.TEXT_PARAGRAPH);
break;
case HtmlTag.P:
case HtmlTag.DIV:
case HtmlTag.BLOCKQUOTE:
if (mySectionStarted) {
mySectionStarted = false;
} else if (myInsideTitle) {
addContentsData(SPACE);
}
beginParagraph(ZLTextParagraph.Kind.TEXT_PARAGRAPH);
break;
case HtmlTag.A:
{
String ref = attributes.getStringValue(myHrefAttribute, myAttributeDecoder);
if ((ref != null) && (ref.length() != 0)) {
if (ref.charAt(0) == '#') {
myHyperlinkType = FBTextKind.FOOTNOTE;
ref = ref.substring(1);
} else if (ref.charAt(0) == '&') {
myHyperlinkType = FBTextKind.INTERNAL_HYPERLINK;
ref = ref.substring(1);
} else {
myHyperlinkType = FBTextKind.EXTERNAL_HYPERLINK;
}
addHyperlinkControl(myHyperlinkType, ref);
myControls[myControlsNumber] = myHyperlinkType;
myControlsNumber++;
}
break;
}
case HtmlTag.IMG:
{
/*
String ref = attributes.getStringValue(mySrcAttribute, myAttributeDecoder);
if ((ref != null) && (ref.length() != 0)) {
addImageReference(ref, (short)0);
String filePath = ref;
if (!":\\".equals(ref.substring(1, 3))) {
filePath = Model.Book.File.getPath();
filePath = filePath.substring(0, filePath.lastIndexOf('\\') + 1) + ref;
}
addImage(ref, new ZLFileImage(MimeTypes.MIME_IMAGE_AUTO, ZLFile.createFileByPath(filePath)));
}
*/
break;
}
case HtmlTag.B:
case HtmlTag.S:
case HtmlTag.SUB:
case HtmlTag.SUP:
case HtmlTag.PRE:
case HtmlTag.STRONG:
case HtmlTag.CODE:
case HtmlTag.EM:
case HtmlTag.CITE:
case HtmlTag.DFN:
case HtmlTag.I:
openControl(myStyleTable[tag]);
break;
case HtmlTag.FONT:
// TODO: implement
break;
case HtmlTag.H1:
case HtmlTag.H2:
case HtmlTag.H3:
case HtmlTag.H4:
case HtmlTag.H5:
case HtmlTag.H6:
startNewParagraph();
openControl(myStyleTable[tag]);
break;
case HtmlTag.OL:
myOrderedListIsStarted = true;
break;
case HtmlTag.UL:
//myUnorderedListIsStarted = true;
break;
case HtmlTag.LI:
startNewParagraph();
if (myOrderedListIsStarted) {
char[] number = (new Integer(++myOLCounter)).toString().toCharArray();
addData(number);
addData(new char[] {'.', ' '});
} else {
addData(new char[] {'*', ' '});
}
break;
case HtmlTag.SCRIPT:
case HtmlTag.SELECT:
case HtmlTag.STYLE:
endParagraph();
break;
case HtmlTag.TR:
case HtmlTag.BR:
startNewParagraph();
break;
default:
break;
}
}
}

View file

@ -1,133 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.html;
import java.util.*;
public final class HtmlTag {
public static final byte UNKNOWN = 0;
public static final byte HTML = 1;
public static final byte HEAD = 2;
public static final byte BODY = 3;
public static final byte TITLE = 4;
public static final byte P = 5;
public static final byte H1 = 6;
public static final byte H2 = 7;
public static final byte H3 = 8;
public static final byte H4 = 9;
public static final byte H5 = 10;
public static final byte H6 = 11;
public static final byte A = 12;
public static final byte B = 13;
public static final byte I = 14;
public static final byte BR = 15;
public static final byte STRONG = 16;
public static final byte IMG = 17;
public static final byte SCRIPT = 18;
public static final byte OL = 19;
public static final byte UL = 20;
public static final byte LI = 21;
public static final byte SELECT = 22;
public static final byte DIV = 23;
public static final byte TR = 24;
public static final byte STYLE = 25;
public static final byte BLOCKQUOTE = 26;
public static final byte S = 27;
public static final byte SUB = 28;
public static final byte SUP = 29;
public static final byte PRE = 30;
public static final byte CODE = 31;
public static final byte EM = 32;
public static final byte DFN = 33;
public static final byte CITE = 34;
public static final byte FONT = 35;
public static final byte HR = 36;
// mobipocket specific tags
public static final byte REFERENCE = 37;
public static final byte GUIDE = 38;
public static final byte MBP_PAGEBREAK = 39;
public static final byte TAG_NUMBER = 40;
private static final HashMap<String,Byte> ourTagByName = new HashMap<String,Byte>(256, 0.2f);
private static final Byte ourUnknownTag;
static {
ourTagByName.put("unknown", UNKNOWN);
ourUnknownTag = ourTagByName.get("unknown");
ourTagByName.put("html", HTML);
ourTagByName.put("head", HEAD);
ourTagByName.put("body", BODY);
ourTagByName.put("title", TITLE);
ourTagByName.put("div", DIV);
ourTagByName.put("p", P);
ourTagByName.put("h1", H1);
ourTagByName.put("h2", H2);
ourTagByName.put("h3", H3);
ourTagByName.put("h4", H4);
ourTagByName.put("h5", H5);
ourTagByName.put("h6", H6);
ourTagByName.put("a", A);
ourTagByName.put("b", B);
ourTagByName.put("i", I);
ourTagByName.put("br", BR);
ourTagByName.put("strong", STRONG);
ourTagByName.put("img", IMG);
ourTagByName.put("script", SCRIPT);
ourTagByName.put("ol", OL);
ourTagByName.put("ul", UL);
ourTagByName.put("li", LI);
ourTagByName.put("select", SELECT);
ourTagByName.put("tr", TR);
ourTagByName.put("style", STYLE);
ourTagByName.put("blockquote", BLOCKQUOTE);
ourTagByName.put("s", S);
ourTagByName.put("sub", SUB);
ourTagByName.put("sup", SUP);
ourTagByName.put("pre", PRE);
ourTagByName.put("code", CODE);
ourTagByName.put("em", EM);
ourTagByName.put("def", DFN);
ourTagByName.put("cite", CITE);
ourTagByName.put("font", FONT);
ourTagByName.put("hr", HR);
ourTagByName.put("guide", GUIDE);
ourTagByName.put("reference", REFERENCE);
ourTagByName.put("mbp:pagebreak", MBP_PAGEBREAK);
}
public static byte getTagByName(String name) {
final HashMap<String,Byte> tagByName = ourTagByName;
Byte num = tagByName.get(name);
if (num == null) {
final String lowerCaseName = name.toLowerCase().intern();
num = tagByName.get(lowerCaseName);
if (num == null) {
num = ourUnknownTag;
tagByName.put(lowerCaseName, num);
}
tagByName.put(name, num);
}
return num.byteValue();
}
}

View file

@ -1,183 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.oeb;
import java.util.*;
import java.io.IOException;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
import org.geometerplus.zlibrary.core.filesystem.ZLArchiveEntryFile;
import org.geometerplus.zlibrary.core.xml.*;
import org.geometerplus.fbreader.bookmodel.*;
import org.geometerplus.fbreader.formats.util.MiscUtil;
class NCXReader extends ZLXMLReaderAdapter {
static class NavPoint {
final int Order;
final int Level;
String Text = "";
String ContentHRef = "";
NavPoint(int order, int level) {
Order = order;
Level = level;
}
}
private final TreeMap<Integer,NavPoint> myNavigationMap = new TreeMap<Integer,NavPoint>();
private final ArrayList<NavPoint> myPointStack = new ArrayList<NavPoint>();
private static final int READ_NONE = 0;
private static final int READ_MAP = 1;
private static final int READ_POINT = 2;
private static final int READ_LABEL = 3;
private static final int READ_TEXT = 4;
int myReadState = READ_NONE;
int myPlayIndex = -65535;
private String myLocalPathPrefix;
NCXReader(BookReader modelReader) {
}
void readFile(ZLFile file) throws BookReadingException {
myLocalPathPrefix = MiscUtil.archiveEntryName(MiscUtil.htmlDirectoryPrefix(file));
try {
read(file);
} catch (IOException e) {
throw new BookReadingException(e, file);
}
}
Map<Integer,NavPoint> navigationMap() {
return myNavigationMap;
}
private static final String TAG_NAVMAP = "navmap";
private static final String TAG_NAVPOINT = "navpoint";
private static final String TAG_NAVLABEL = "navlabel";
private static final String TAG_CONTENT = "content";
private static final String TAG_TEXT = "text";
private static final String ATTRIBUTE_PLAYORDER = "playOrder";
private int atoi(String number) {
try {
return Integer.parseInt(number);
} catch (NumberFormatException e) {
return 0;
}
}
@Override
public boolean startElementHandler(String tag, ZLStringMap attributes) {
tag = tag.toLowerCase().intern();
switch (myReadState) {
case READ_NONE:
if (tag == TAG_NAVMAP) {
myReadState = READ_MAP;
}
break;
case READ_MAP:
if (tag == TAG_NAVPOINT) {
final String order = attributes.getValue(ATTRIBUTE_PLAYORDER);
final int index = (order != null) ? atoi(order) : myPlayIndex++;
myPointStack.add(new NavPoint(index, myPointStack.size()));
myReadState = READ_POINT;
}
break;
case READ_POINT:
if (tag == TAG_NAVPOINT) {
final String order = attributes.getValue(ATTRIBUTE_PLAYORDER);
final int index = (order != null) ? atoi(order) : myPlayIndex++;
myPointStack.add(new NavPoint(index, myPointStack.size()));
} else if (tag == TAG_NAVLABEL) {
myReadState = READ_LABEL;
} else if (tag == TAG_CONTENT) {
final int size = myPointStack.size();
if (size > 0) {
myPointStack.get(size - 1).ContentHRef =
ZLArchiveEntryFile.normalizeEntryName(
myLocalPathPrefix + MiscUtil.decodeHtmlReference(attributes.getValue("src"))
);
}
}
break;
case READ_LABEL:
if (TAG_TEXT == tag) {
myReadState = READ_TEXT;
}
break;
case READ_TEXT:
break;
}
return false;
}
@Override
public boolean endElementHandler(String tag) {
tag = tag.toLowerCase().intern();
switch (myReadState) {
case READ_NONE:
break;
case READ_MAP:
if (TAG_NAVMAP == tag) {
myReadState = READ_NONE;
}
break;
case READ_POINT:
if (TAG_NAVPOINT == tag) {
NavPoint last = myPointStack.get(myPointStack.size() - 1);
if (last.Text.length() == 0) {
last.Text = "...";
}
myNavigationMap.put(last.Order, last);
myPointStack.remove(myPointStack.size() - 1);
myReadState = myPointStack.isEmpty() ? READ_MAP : READ_POINT;
}
case READ_LABEL:
if (TAG_NAVLABEL == tag) {
myReadState = READ_POINT;
}
break;
case READ_TEXT:
if (TAG_TEXT == tag) {
myReadState = READ_LABEL;
}
break;
}
return false;
}
@Override
public void characterDataHandler(char[] ch, int start, int length) {
if (myReadState == READ_TEXT) {
final ArrayList<NavPoint> stack = myPointStack;
final NavPoint last = stack.get(stack.size() - 1);
last.Text += new String(ch, start, length);
}
}
@Override
public boolean dontCacheAttributeValues() {
return true;
}
}

View file

@ -1,78 +0,0 @@
/*
* Copyright (C) 2010-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.oeb;
import org.geometerplus.zlibrary.core.constants.XMLNamespaces;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
import org.geometerplus.zlibrary.core.image.ZLFileImage;
import org.geometerplus.zlibrary.core.util.MimeType;
import org.geometerplus.zlibrary.core.xml.*;
import org.geometerplus.fbreader.formats.util.MiscUtil;
class XHTMLImageFinder extends ZLXMLReaderAdapter {
static ZLFileImage getCoverImage(ZLFile coverFile) {
if (coverFile == null) {
return null;
}
final String ext = coverFile.getExtension();
if ("gif".equals(ext) || "jpg".equals(ext) || "jpeg".equals(ext)) {
return new ZLFileImage(coverFile);
} else {
return new XHTMLImageFinder().readImage(coverFile);
}
}
private String myXHTMLPathPrefix;
private ZLFileImage myImage;
ZLFileImage readImage(ZLFile file) {
myXHTMLPathPrefix = MiscUtil.htmlDirectoryPrefix(file);
myImage = null;
readQuietly(file);
return myImage;
}
@Override
public boolean processNamespaces() {
return true;
}
@Override
public boolean startElementHandler(String tag, ZLStringMap attributes) {
tag = tag.toLowerCase();
String href = null;
if ("img".equals(tag)) {
href = attributes.getValue("src");
} else if ("image".equals(tag)) {
href = getAttributeValue(attributes, XMLNamespaces.XLink, "href");
}
if (href != null) {
myImage = new ZLFileImage(
ZLFile.createFileByPath(myXHTMLPathPrefix + MiscUtil.decodeHtmlReference(href))
);
return true;
}
return false;
}
}

View file

@ -1,113 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.*;
public abstract class DocDecompressor {
public static int decompress(InputStream stream, byte[] targetBuffer, int compressedSize) throws IOException {
final byte[] sourceBuffer = new byte[compressedSize];
if (stream.read(sourceBuffer) != compressedSize) {
return 0;
}
int sourceIndex = 0;
int targetIndex = 0;
int count0 = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
try {
while (true) {
final byte token = sourceBuffer[sourceIndex++];
switch (token) {
default:
++count0;
targetBuffer[targetIndex++] = token;
break;
case 1: case 2: case 3: case 4:
case 5: case 6: case 7: case 8:
++count1;
System.arraycopy(sourceBuffer, sourceIndex, targetBuffer, targetIndex, token);
sourceIndex += token;
targetIndex += token;
break;
case -64: case -63: case -62: case -61:
case -60: case -59: case -58: case -57:
case -56: case -55: case -54: case -53:
case -52: case -51: case -50: case -49:
case -48: case -47: case -46: case -45:
case -44: case -43: case -42: case -41:
case -40: case -39: case -38: case -37:
case -36: case -35: case -34: case -33:
case -32: case -31: case -30: case -29:
case -28: case -27: case -26: case -25:
case -24: case -23: case -22: case -21:
case -20: case -19: case -18: case -17:
case -16: case -15: case -14: case -13:
case -12: case -11: case -10: case -9:
case -8: case -7: case -6: case -5:
case -4: case -3: case -2: case -1:
++count2;
targetBuffer[targetIndex++] = ' ';
targetBuffer[targetIndex++] = (byte)(token ^ 0x80);
break;
case -128: case -127: case -126: case -125:
case -124: case -123: case -122: case -121:
case -120: case -119: case -118: case -117:
case -116: case -115: case -114: case -113:
case -112: case -111: case -110: case -109:
case -108: case -107: case -106: case -105:
case -104: case -103: case -102: case -101:
case -100: case -99: case -98: case -97:
case -96: case -95: case -94: case -93:
case -92: case -91: case -90: case -89:
case -88: case -87: case -86: case -85:
case -84: case -83: case -82: case -81:
case -80: case -79: case -78: case -77:
case -76: case -75: case -74: case -73:
case -72: case -71: case -70: case -69:
case -68: case -67: case -66: case -65:
++count3;
final int N = ((token & 0x3F) << 8) + (sourceBuffer[sourceIndex++] & 0xFF);
int copyLength = (N & 7) + 3;
int srcIndex = targetIndex - (N >> 3);
if (targetIndex >= srcIndex + copyLength) {
System.arraycopy(targetBuffer, srcIndex, targetBuffer, targetIndex, copyLength);
targetIndex += copyLength;
} else {
while (copyLength-- > 0) {
targetBuffer[targetIndex++] = targetBuffer[srcIndex++];
}
}
break;
}
}
} catch (Exception e) {
if (targetIndex > targetBuffer.length) {
targetIndex = targetBuffer.length;
}
}
return targetIndex;
}
}

View file

@ -1,113 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.*;
public abstract class HuffdicDecompressor {
public static int decompress(InputStream stream, byte[] targetBuffer, int compressedSize) throws IOException {
final byte[] sourceBuffer = new byte[compressedSize];
if (stream.read(sourceBuffer) != compressedSize) {
return 0;
}
int sourceIndex = 0;
int targetIndex = 0;
int count0 = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
try {
while (true) {
final byte token = sourceBuffer[sourceIndex++];
switch (token) {
default:
++count0;
targetBuffer[targetIndex++] = token;
break;
case 1: case 2: case 3: case 4:
case 5: case 6: case 7: case 8:
++count1;
System.arraycopy(sourceBuffer, sourceIndex, targetBuffer, targetIndex, token);
sourceIndex += token;
targetIndex += token;
break;
case -64: case -63: case -62: case -61:
case -60: case -59: case -58: case -57:
case -56: case -55: case -54: case -53:
case -52: case -51: case -50: case -49:
case -48: case -47: case -46: case -45:
case -44: case -43: case -42: case -41:
case -40: case -39: case -38: case -37:
case -36: case -35: case -34: case -33:
case -32: case -31: case -30: case -29:
case -28: case -27: case -26: case -25:
case -24: case -23: case -22: case -21:
case -20: case -19: case -18: case -17:
case -16: case -15: case -14: case -13:
case -12: case -11: case -10: case -9:
case -8: case -7: case -6: case -5:
case -4: case -3: case -2: case -1:
++count2;
targetBuffer[targetIndex++] = ' ';
targetBuffer[targetIndex++] = (byte)(token ^ 0x80);
break;
case -128: case -127: case -126: case -125:
case -124: case -123: case -122: case -121:
case -120: case -119: case -118: case -117:
case -116: case -115: case -114: case -113:
case -112: case -111: case -110: case -109:
case -108: case -107: case -106: case -105:
case -104: case -103: case -102: case -101:
case -100: case -99: case -98: case -97:
case -96: case -95: case -94: case -93:
case -92: case -91: case -90: case -89:
case -88: case -87: case -86: case -85:
case -84: case -83: case -82: case -81:
case -80: case -79: case -78: case -77:
case -76: case -75: case -74: case -73:
case -72: case -71: case -70: case -69:
case -68: case -67: case -66: case -65:
++count3;
final int N = ((token & 0x3F) << 8) + (sourceBuffer[sourceIndex++] & 0xFF);
int copyLength = (N & 7) + 3;
int srcIndex = targetIndex - (N >> 3);
if (targetIndex >= srcIndex + copyLength) {
System.arraycopy(targetBuffer, srcIndex, targetBuffer, targetIndex, copyLength);
targetIndex += copyLength;
} else {
while (copyLength-- > 0) {
targetBuffer[targetIndex++] = targetBuffer[srcIndex++];
}
}
break;
}
}
} catch (Exception e) {
if (targetIndex > targetBuffer.length) {
targetIndex = targetBuffer.length;
}
}
return targetIndex;
}
}

View file

@ -1,215 +0,0 @@
/*
* Copyright (C) 2009-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.util.*;
import java.io.*;
import java.nio.charset.CharsetDecoder;
import org.geometerplus.zlibrary.core.html.ZLByteBuffer;
import org.geometerplus.zlibrary.core.html.ZLHtmlAttributeMap;
import org.geometerplus.zlibrary.core.image.ZLFileImage;
import org.geometerplus.zlibrary.core.util.MimeType;
import org.geometerplus.fbreader.formats.html.HtmlReader;
import org.geometerplus.fbreader.formats.html.HtmlTag;
import org.geometerplus.fbreader.bookmodel.BookModel;
public class MobipocketHtmlBookReader extends HtmlReader {
private final CharsetDecoder myTocDecoder;
private MobipocketStream myMobipocketStream;
MobipocketHtmlBookReader(BookModel model) throws UnsupportedEncodingException {
super(model);
myTocDecoder = createDecoder();
}
public InputStream getInputStream() throws IOException {
myMobipocketStream = new MobipocketStream(Model.Book.File);
return myMobipocketStream;
}
private boolean myReadGuide;
private int myTocStartOffset = Integer.MAX_VALUE;
private int myTocEndOffset = Integer.MAX_VALUE;
private final TreeMap<Integer,String> myTocEntries = new TreeMap<Integer,String>();
private final TreeMap<Integer,Integer> myPositionToParagraph = new TreeMap<Integer,Integer>();
private final TreeSet<Integer> myFileposReferences = new TreeSet<Integer>();
private int myCurrentTocPosition = -1;
private final ZLByteBuffer myTocBuffer = new ZLByteBuffer();
private boolean tocRangeContainsPosition(int position) {
return (myTocStartOffset <= position) && (position < myTocEndOffset);
}
@Override
public void startElementHandler(byte tag, int offset, ZLHtmlAttributeMap attributes) {
final int paragraphIndex = Model.BookTextModel.getParagraphsNumber();
myPositionToParagraph.put(offset, paragraphIsOpen() ? paragraphIndex - 1 : paragraphIndex);
switch (tag) {
case HtmlTag.IMG:
{
final ZLByteBuffer recIndex = attributes.getValue("recindex");
if (recIndex != null) {
try {
final int index = Integer.parseInt(recIndex.toString());
if (paragraphIsOpen()) {
endParagraph();
addImageReference("" + index, false);
beginParagraph();
} else {
addImageReference("" + index, false);
}
} catch (NumberFormatException e) {
}
}
break;
}
case HtmlTag.GUIDE:
myReadGuide = true;
break;
case HtmlTag.REFERENCE:
if (myReadGuide) {
final ZLByteBuffer fp = attributes.getValue("filepos");
final ZLByteBuffer title = attributes.getValue("title");
if ((fp != null) && (title != null)) {
try {
int filePosition = Integer.parseInt(fp.toString());
myTocEntries.put(filePosition, title.toString(myAttributeDecoder));
if (tocRangeContainsPosition(filePosition)) {
myTocEndOffset = filePosition;
}
if (attributes.getValue("type").equalsToLCString("toc")) {
myTocStartOffset = filePosition;
final SortedMap<Integer,String> subMap =
myTocEntries.tailMap(filePosition + 1);
if (!subMap.isEmpty()) {
myTocEndOffset = subMap.firstKey();
}
}
} catch (NumberFormatException e) {
}
}
}
break;
case HtmlTag.A:
{
final ZLByteBuffer fp = attributes.getValue("filepos");
if (fp != null) {
try {
int filePosition = Integer.parseInt(fp.toString());
if (tocRangeContainsPosition(offset)) {
myCurrentTocPosition = filePosition;
if (tocRangeContainsPosition(filePosition)) {
myTocEndOffset = filePosition;
}
}
myFileposReferences.add(filePosition);
attributes.put(new ZLByteBuffer("href"), new ZLByteBuffer("&filepos" + filePosition));
} catch (NumberFormatException e) {
}
}
super.startElementHandler(tag, offset, attributes);
break;
}
case HtmlTag.MBP_PAGEBREAK:
insertEndOfSectionParagraph();
break;
default:
super.startElementHandler(tag, offset, attributes);
break;
}
}
@Override
public void endElementHandler(byte tag) {
switch (tag) {
case HtmlTag.IMG:
break;
case HtmlTag.GUIDE:
myReadGuide = false;
break;
case HtmlTag.REFERENCE:
break;
case HtmlTag.A:
if (myCurrentTocPosition != -1) {
if (!myTocBuffer.isEmpty()) {
myTocEntries.put(myCurrentTocPosition, myTocBuffer.toString(myTocDecoder));
myTocBuffer.clear();
}
myCurrentTocPosition = -1;
}
super.endElementHandler(tag);
break;
default:
super.endElementHandler(tag);
break;
}
}
@Override
public void byteDataHandler(byte[] data, int start, int length) {
if (myCurrentTocPosition != -1) {
myTocBuffer.append(data, start, length);
}
super.byteDataHandler(data, start, length);
}
@Override
public void startDocumentHandler() {
super.startDocumentHandler();
for (int index = 0; ; ++index) {
final int offset = myMobipocketStream.getImageOffset(index);
if (offset < 0) {
break;
}
final int length = myMobipocketStream.getImageLength(index);
if (length <= 0) {
break;
}
addImage(String.valueOf(index + 1), new ZLFileImage(Model.Book.File, ZLFileImage.ENCODING_NONE, offset, length));
}
}
@Override
public void endDocumentHandler() {
for (Integer entry : myFileposReferences) {
final SortedMap<Integer,Integer> subMap =
myPositionToParagraph.tailMap(entry);
if (subMap.isEmpty()) {
break;
}
addHyperlinkLabel("filepos" + entry, subMap.get(subMap.firstKey()));
}
for (Map.Entry<Integer,String> entry : myTocEntries.entrySet()) {
final SortedMap<Integer,Integer> subMap =
myPositionToParagraph.tailMap(entry.getKey());
if (subMap.isEmpty()) {
break;
}
beginContentsParagraph(subMap.get(subMap.firstKey()));
addContentsData(entry.getValue().toCharArray());
endContentsParagraph();
}
super.endDocumentHandler();
}
}

View file

@ -1,284 +0,0 @@
/*
* Copyright (C) 2009-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.*;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
import org.geometerplus.zlibrary.core.image.*;
import org.geometerplus.zlibrary.core.encodings.Encoding;
import org.geometerplus.zlibrary.core.encodings.JavaEncodingCollection;
import org.geometerplus.zlibrary.core.language.ZLLanguageUtil;
import org.geometerplus.zlibrary.core.util.MimeType;
import org.geometerplus.fbreader.book.Book;
import org.geometerplus.fbreader.book.BookUtil;
import org.geometerplus.fbreader.bookmodel.BookModel;
import org.geometerplus.fbreader.bookmodel.BookReadingException;
import org.geometerplus.fbreader.formats.JavaFormatPlugin;
public class MobipocketPlugin extends JavaFormatPlugin {
public MobipocketPlugin() {
super("Mobipocket");
}
@Override
public void readMetainfo(Book book) throws BookReadingException {
InputStream stream = null;
try {
stream = book.File.getInputStream();
final PdbHeader header = new PdbHeader(stream);
PdbUtil.skip(stream, header.Offsets[0] + 16 - header.length());
if (PdbUtil.readInt(stream) != 0x4D4F4249) /* "MOBI" */ {
throw new BookReadingException("unsupportedFileFormat", book.File);
}
final int length = (int)PdbUtil.readInt(stream);
PdbUtil.skip(stream, 4);
final int encodingCode = (int)PdbUtil.readInt(stream);
final Encoding encoding = supportedEncodings().getEncoding(encodingCode);
final String encodingName = encoding != null ? encoding.Name : "utf-8";
book.setEncoding(encodingName);
PdbUtil.skip(stream, 52);
final int fullNameOffset = (int)PdbUtil.readInt(stream);
final int fullNameLength = (int)PdbUtil.readInt(stream);
final int languageCode = (int)PdbUtil.readInt(stream);
book.setLanguage(ZLLanguageUtil.languageByIntCode(languageCode & 0xFF, (languageCode >> 8) & 0xFF));
PdbUtil.skip(stream, 32);
int offset = 132;
if ((PdbUtil.readInt(stream) & 0x40) != 0) {
PdbUtil.skip(stream, length - 116);
offset = length + 20;
if (PdbUtil.readInt(stream) == 0x45585448) /* "EXTH" */ {
PdbUtil.skip(stream, 4);
final int recordsNumber = (int)PdbUtil.readInt(stream);
offset += 8;
for (int i = 0; i < recordsNumber; ++i) {
final int type = (int)PdbUtil.readInt(stream);
final int size = (int)PdbUtil.readInt(stream);
offset += size;
if (size <= 8) {
continue;
}
switch (type) {
default:
PdbUtil.skip(stream, size - 8);
break;
case 100:
{
final byte[] buffer = new byte[size - 8];
stream.read(buffer);
String author = new String(buffer, encodingName);
final int index = author.indexOf(',');
if (index != -1) {
author = author.substring(index + 1).trim() +
' ' +
author.substring(0, index).trim();
} else {
author = author.trim();
}
book.addAuthor(author);
break;
}
case 105:
{
final byte[] buffer = new byte[size - 8];
stream.read(buffer);
book.addTag(new String(buffer, encodingName));
break;
}
}
}
}
}
PdbUtil.skip(stream, fullNameOffset - offset);
final byte[] titleBuffer = new byte[fullNameLength];
stream.read(titleBuffer);
book.setTitle(new String(titleBuffer, encodingName));
} catch (IOException e) {
throw new BookReadingException(e, book.File);
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
}
}
}
}
@Override
public void readUids(Book book) throws BookReadingException {
if (book.uids().isEmpty()) {
book.addUid(BookUtil.createUid(book.File, "SHA-256"));
}
}
@Override
public void readModel(BookModel model) throws BookReadingException {
try {
new MobipocketHtmlBookReader(model).readBook();
} catch (IOException e) {
throw new BookReadingException(e, model.Book.File);
}
}
@Override
public ZLImage readCover(ZLFile file) {
return new ZLImageFileProxy(file) {
@Override
protected ZLImage retrieveRealImage() {
return readCoverInternal(File);
}
};
}
private ZLImage readCoverInternal(ZLFile file) {
InputStream stream = null;
try {
stream = file.getInputStream();
final PdbHeader header = new PdbHeader(stream);
PdbUtil.skip(stream, header.Offsets[0] + 16 - header.length());
if (PdbUtil.readInt(stream) != 0x4D4F4249) /* "MOBI" */ {
return null;
}
final int length = (int)PdbUtil.readInt(stream);
PdbUtil.skip(stream, 104);
final int exthFlags = (int)PdbUtil.readInt(stream);
int coverIndex = -1;
int thumbIndex = -1;
int offset = 132;
if ((exthFlags & 0x40) != 0) {
PdbUtil.skip(stream, length - 116);
offset = length + 20;
if (PdbUtil.readInt(stream) != 0x45585448) /* "EXTH" */ {
return null;
}
PdbUtil.skip(stream, 4);
final int recordsNumber = (int)PdbUtil.readInt(stream);
offset += 8;
for (int i = 0; i < recordsNumber; ++i) {
final int type = (int)PdbUtil.readInt(stream);
final int size = (int)PdbUtil.readInt(stream);
offset += size;
if (size <= 8) {
continue;
}
switch (type) {
default:
PdbUtil.skip(stream, size - 8);
break;
case 201:
{
if (size == 12) {
coverIndex = (int)PdbUtil.readInt(stream);
} else {
PdbUtil.skip(stream, size - 8);
}
break;
}
case 202:
{
if (size == 12) {
thumbIndex = (int)PdbUtil.readInt(stream);
} else {
PdbUtil.skip(stream, size - 8);
}
break;
}
}
}
}
final InputStream tempStream = stream;
stream = null;
tempStream.close();
if (coverIndex == -1) {
if (thumbIndex == -1) {
return null;
}
coverIndex = thumbIndex;
}
MobipocketStream myMobipocketStream = new MobipocketStream(file);
int start = myMobipocketStream.getImageOffset(coverIndex);
if (start >= 0) {
int len = myMobipocketStream.getImageLength(coverIndex);
if (len > 0) {
return new ZLFileImage(file, ZLFileImage.ENCODING_NONE, start, len);
}
}
return null;
} catch (IOException e) {
return null;
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
}
}
}
}
@Override
public String readAnnotation(ZLFile file) {
return null;
}
@Override
public JavaEncodingCollection supportedEncodings() {
return JavaEncodingCollection.Instance();
}
@Override
public void detectLanguageAndEncoding(Book book) throws BookReadingException {
InputStream stream = null;
try {
stream = book.File.getInputStream();
final PdbHeader header = new PdbHeader(stream);
PdbUtil.skip(stream, header.Offsets[0] + 16 - header.length());
if (PdbUtil.readInt(stream) != 0x4D4F4249) /* "MOBI" */ {
throw new BookReadingException("unsupportedFileFormat", book.File);
}
final int length = (int)PdbUtil.readInt(stream);
PdbUtil.skip(stream, 4);
final int encodingCode = (int)PdbUtil.readInt(stream);
final Encoding encoding = supportedEncodings().getEncoding(encodingCode);
final String encodingName = encoding != null ? encoding.Name : "utf-8";
book.setEncoding(encodingName);
PdbUtil.skip(stream, 52);
final int fullNameOffset = (int)PdbUtil.readInt(stream);
final int fullNameLength = (int)PdbUtil.readInt(stream);
final int languageCode = (int)PdbUtil.readInt(stream);
book.setLanguage(ZLLanguageUtil.languageByIntCode(languageCode & 0xFF, (languageCode >> 8) & 0xFF));
} catch (IOException e) {
throw new BookReadingException(e, book.File);
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
}
}
}
}
}

View file

@ -1,66 +0,0 @@
/*
* Copyright (C) 2009-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.IOException;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
class MobipocketStream extends PalmDocLikeStream {
private final int myFileSize;
private final int myImageStartIndex;
MobipocketStream(ZLFile file) throws IOException {
super(file);
myFileSize = (int)file.size();
myCompressionType = PdbUtil.readShort(myBase);
PdbUtil.skip(myBase, 6);
myMaxRecordIndex = Math.min(PdbUtil.readShort(myBase), myHeader.Offsets.length - 1);
final int maxRecordSize = PdbUtil.readShort(myBase);
if (maxRecordSize == 0) {
throw new IOException("The records are too short");
}
myBuffer = new byte[maxRecordSize];
myRecordIndex = 0;
PdbUtil.skip(myBase, 96);
myImageStartIndex = (int)PdbUtil.readInt(myBase);
}
int getImageOffset(int index) {
try {
return myHeader.Offsets[index + myImageStartIndex];
} catch (ArrayIndexOutOfBoundsException e) {
return -1;
}
}
int getImageLength(int index) {
try {
final int i = index + myImageStartIndex;
final int start = myHeader.Offsets[i];
final int end = (i == myHeader.Offsets.length) ? myFileSize : myHeader.Offsets[i + 1];
return end - start;
} catch (ArrayIndexOutOfBoundsException e) {
return -1;
}
}
}

View file

@ -1,85 +0,0 @@
/*
* Copyright (C) 2009-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.IOException;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
abstract class PalmDocLikeStream extends PdbStream {
protected int myMaxRecordIndex;
protected int myRecordIndex;
protected interface CompressionType {
int NONE = 1;
int DOC = 2;
int HUFFDIC = 17480;
}
protected int myCompressionType;
private final long myFileSize;
PalmDocLikeStream(ZLFile file) throws IOException {
super(file);
myFileSize = file.size();
}
protected final boolean fillBuffer() {
while (myBufferOffset == myBufferLength) {
if (myRecordIndex + 1 > myMaxRecordIndex) {
return false;
}
++myRecordIndex;
final int currentOffset = myHeader.Offsets[myRecordIndex];
try {
myBase.skip(currentOffset - myBase.offset());
final int nextOffset =
(myRecordIndex + 1 < myHeader.Offsets.length) ?
myHeader.Offsets[myRecordIndex + 1] :
(int)myFileSize;
if (nextOffset < currentOffset) {
return false;
}
final short recordSize = (short)Math.min(nextOffset - currentOffset, myBuffer.length);
switch (myCompressionType) {
case CompressionType.NONE:
myBase.read(myBuffer, 0, recordSize);
myBufferLength = recordSize;
break;
case CompressionType.DOC:
myBufferLength = (short)DocDecompressor.decompress(myBase, myBuffer, recordSize);
break;
//case CompressionType.HUFFDIC:
// myBufferLength = (short)HuffdicDecompressor.decompress(myBase, myBuffer, recordSize);
// break;
default:
// Unsupported compression type
return false;
}
} catch (IOException e) {
return false;
}
myBufferOffset = 0;
}
return true;
}
}

View file

@ -1,63 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.IOException;
import java.io.InputStream;
public class PdbHeader {
public final String DocName;
public final int Flags;
public final String Id;
public final int[] Offsets;
public PdbHeader(InputStream stream) throws IOException {
final byte[] buffer = new byte[32];
if (stream.read(buffer, 0, 32) != 32) {
throw new IOException("PdbHeader: cannot reader document name");
}
DocName = new String(buffer);
Flags = PdbUtil.readShort(stream);
PdbUtil.skip(stream, 26);
if (stream.read(buffer, 0, 8) != 8) {
throw new IOException("PdbHeader: cannot reader palm id");
}
Id = new String(buffer, 0, 8);
PdbUtil.skip(stream, 8);
int numRecords = PdbUtil.readShort(stream);
if (numRecords <= 0) {
throw new IOException("PdbHeader: record number = " + numRecords);
}
Offsets = new int[numRecords];
for (int i = 0; i < numRecords; ++i) {
Offsets[i] = (int)PdbUtil.readInt(stream);
PdbUtil.skip(stream, 4);
}
}
public final int length() {
return 78 + Offsets.length * 8;
}
}

View file

@ -1,90 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.IOException;
import java.io.InputStream;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
import org.geometerplus.zlibrary.core.util.InputStreamWithOffset;
public abstract class PdbStream extends InputStream {
protected final InputStreamWithOffset myBase;
public PdbHeader myHeader;
protected byte[] myBuffer;
protected short myBufferLength;
protected short myBufferOffset;
public PdbStream(ZLFile file) throws IOException {
myBase = new InputStreamWithOffset(file.getInputStream());
myHeader = new PdbHeader(myBase);
myBase.skip(myHeader.Offsets[0] - myHeader.length());
myBufferLength = 0;
myBufferOffset = 0;
}
public int read() {
if (!fillBuffer()) {
return -1;
}
return myBuffer[myBufferOffset++];
}
public int read(byte[] buffer, int offset, int maxSize) {
int realSize = 0;
while (realSize < maxSize) {
if (!fillBuffer()) {
break;
}
int size = Math.min(maxSize - realSize, myBufferLength - myBufferOffset);
if (size > 0) {
if (buffer != null) {
System.arraycopy(myBuffer, myBufferOffset, buffer, offset + realSize, size);
}
realSize += size;
myBufferOffset += size;
}
}
return (realSize > 0) ? realSize : -1;
}
public void close() throws IOException {
if (myBase != null) {
myBase.close();
}
if (myBuffer != null) {
myBuffer = null;
}
}
public void skip(int offset) throws IOException {
if (offset > 0) {
read(null, 0, offset);
} else {
throw new IOException("Cannot skip: " + offset + " bytes");
}
}
protected abstract boolean fillBuffer();
}

View file

@ -1,48 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.pdb;
import java.io.*;
public abstract class PdbUtil {
public static void skip(InputStream stream, int numBytes) throws IOException {
numBytes -= stream.skip(numBytes);
for (; numBytes > 0; --numBytes) {
if (stream.read() == -1) {
throw new IOException("Unexpected end of stream");
}
}
}
public static int readShort(InputStream stream) throws IOException {
final byte[] tmp = new byte[2];
stream.read(tmp, 0, 2);
return (tmp[1] & 0xFF) + ((tmp[0] & 0xFF) << 8);
}
public static long readInt(InputStream stream) throws IOException {
final byte[] tmp = new byte[4];
stream.read(tmp, 0, 4);
return (((long)(tmp[0] & 0xFF)) << 24) +
+ ((tmp[1] & 0xFF) << 16) +
+ ((tmp[2] & 0xFF) << 8) +
+ (tmp[3] & 0xFF);
}
}

View file

@ -1,67 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.formats.util;
import org.geometerplus.zlibrary.core.filesystem.ZLFile;
public class MiscUtil {
public static String htmlDirectoryPrefix(ZLFile file) {
String shortName = file.getShortName();
String path = file.getPath();
return path.substring(0, path.length() - shortName.length());
}
public static String archiveEntryName(String fullPath) {
final int index = fullPath.lastIndexOf(':');
return (index >= 2) ? fullPath.substring(index + 1) : fullPath;
}
private static boolean isHexDigit(char ch) {
return
(ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F');
}
public static String decodeHtmlReference(String name) {
if (name == null) {
return null;
}
int index = 0;
while (true) {
index = name.indexOf('%', index);
if (index == -1 || index >= name.length() - 2) {
break;
}
if (isHexDigit(name.charAt(index + 1)) &&
isHexDigit(name.charAt(index + 2))) {
char c = 0;
try {
c = (char)Integer.decode("0x" + name.substring(index + 1, index + 3)).intValue();
} catch (NumberFormatException e) {
}
name = name.substring(0, index) + c + name.substring(index + 3);
}
index = index + 1;
}
return name;
}
}

View file

@ -1,135 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.zlibrary.core.html;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import org.geometerplus.zlibrary.core.util.ZLArrayUtils;
public final class ZLByteBuffer {
byte[] myData;
int myLength;
ZLByteBuffer(int len) {
myData = new byte[len];
}
public ZLByteBuffer() {
this(20);
}
public ZLByteBuffer(String value) {
myLength = value.length();
myData = value.getBytes();
}
ZLByteBuffer(ZLByteBuffer container) {
final int len = container.myLength;
myData = ZLArrayUtils.createCopy(container.myData, len, len);
myLength = len;
}
public boolean isEmpty() {
return myLength == 0;
}
public void append(byte[] buffer, int offset, int count) {
final int len = myLength;
byte[] data = myData;
final int newLength = len + count;
if (data.length < newLength) {
data = ZLArrayUtils.createCopy(data, len, newLength);
myData = data;
}
System.arraycopy(buffer, offset, data, len, count);
myLength = newLength;
myStringValue = null;
}
public void clear() {
myLength = 0;
myStringValue = null;
}
public boolean equals(Object o) {
final ZLByteBuffer container = (ZLByteBuffer)o;
final int len = myLength;
if (len != container.myLength) {
return false;
}
final byte[] data0 = myData;
final byte[] data1 = container.myData;
for (int i = len; --i >= 0; ) {
if (data0[i] != data1[i]) {
return false;
}
}
return true;
}
public int hashCode() {
final int len = myLength;
final byte[] data = myData;
int code = len * 31;
if (len > 1) {
code += data[0];
code *= 31;
code += data[1];
if (len > 2) {
code *= 31;
code += data[2];
}
} else if (len > 0) {
code += data[0];
}
return code;
}
public boolean equalsToLCString(String lcPattern) {
return (myLength == lcPattern.length()) &&
lcPattern.equals(new String(myData, 0, myLength).toLowerCase());
}
private static final Object myConverterLock = new Object();
private static char[] myConverterBuffer = new char[20];
private String myStringValue;
public String toString(CharsetDecoder decoder) {
if (myStringValue == null) {
synchronized (myConverterLock) {
if (myConverterBuffer.length < myLength) {
myConverterBuffer = new char[myLength];
}
ByteBuffer byteBuffer = ByteBuffer.wrap(myData, 0, myLength);
CharBuffer charBuffer = CharBuffer.wrap(myConverterBuffer);
decoder.decode(byteBuffer, charBuffer, true);
myStringValue = new String(myConverterBuffer, 0, charBuffer.position());
}
}
return myStringValue;
}
public String toString() {
if (myStringValue == null) {
myStringValue = new String(myData, 0, myLength);
}
return myStringValue;
}
}

View file

@ -1,83 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.zlibrary.core.html;
import java.nio.charset.CharsetDecoder;
// optimized partially implemented map ZLByteBuffer -> ZLByteBuffer
// there is no remove() in this implementation
// put with the same key does not remove old entry
public final class ZLHtmlAttributeMap {
private ZLByteBuffer[] myKeys;
private ZLByteBuffer[] myValues;
private int mySize;
public ZLHtmlAttributeMap() {
myKeys = new ZLByteBuffer[8];
myValues = new ZLByteBuffer[8];
}
public void put(ZLByteBuffer key, ZLByteBuffer value) {
final int size = mySize++;
ZLByteBuffer[] keys = myKeys;
if (keys.length == size) {
keys = new ZLByteBuffer[size << 1];
System.arraycopy(myKeys, 0, keys, 0, size);
myKeys = keys;
final ZLByteBuffer[] values = new ZLByteBuffer[size << 1];
System.arraycopy(myValues, 0, values, 0, size);
myValues = values;
}
keys[size] = key;
myValues[size] = value;
}
public ZLByteBuffer getValue(String lcPattern) {
int index = mySize;
if (index > 0) {
final ZLByteBuffer[] keys = myKeys;
while (--index >= 0) {
if (keys[index].equalsToLCString(lcPattern)) {
return myValues[index];
}
}
}
return null;
}
public String getStringValue(String lcPattern, CharsetDecoder decoder) {
final ZLByteBuffer buffer = getValue(lcPattern);
return (buffer != null) ? buffer.toString(decoder) : null;
}
public int getSize() {
return mySize;
}
public ZLByteBuffer getKey(int index) {
return myKeys[index];
}
public void clear() {
mySize = 0;
}
}

View file

@ -1,490 +0,0 @@
/*
* Copyright (C) 2007-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.zlibrary.core.html;
import java.io.*;
import java.util.*;
import org.geometerplus.zlibrary.core.util.ZLArrayUtils;
import org.geometerplus.zlibrary.core.html.ZLHtmlReader;
final class ZLHtmlParser {
private static final byte START_DOCUMENT = 0;
private static final byte START_TAG = 1;
private static final byte END_TAG = 2;
private static final byte TEXT = 3;
//private static final byte IGNORABLE_WHITESPACE = 4;
//private static final byte PROCESSING_INSTRUCTION = 5;
private static final byte COMMENT = 6;
private static final byte LANGLE = 7;
private static final byte WS_AFTER_START_TAG_NAME = 8;
private static final byte WS_AFTER_END_TAG_NAME = 9;
private static final byte WAIT_EQUALS = 10;
private static final byte WAIT_ATTRIBUTE_VALUE = 11;
private static final byte SLASH = 12;
private static final byte ATTRIBUTE_NAME = 13;
private static final byte S_ATTRIBUTE_VALUE = 14;
private static final byte DEFAULT_ATTRIBUTE_VALUE = 15;
private static final byte COMMENT_MINUS = 17;
private static final byte D_ATTRIBUTE_VALUE = 18;
private static final byte SCRIPT = 19;
private static final byte ENTITY_REF = 20;
private static ZLByteBuffer unique(HashMap<ZLByteBuffer,ZLByteBuffer> strings, ZLByteBuffer container) {
ZLByteBuffer s = strings.get(container);
if (s == null) {
s = new ZLByteBuffer(container);
strings.put(s, s);
}
container.clear();
return s;
}
private final ZLHtmlReader myReader;
private final InputStream myStream;
public ZLHtmlParser(ZLHtmlReader htmlReader, InputStream stream) throws IOException {
myReader = htmlReader;
myStream = stream;
}
public void doIt() throws IOException {
final InputStream stream = myStream;
final ZLHtmlReader htmlReader = myReader;
byte[] buffer = new byte[8192];
final ZLByteBuffer tagName = new ZLByteBuffer();
final ZLByteBuffer attributeName = new ZLByteBuffer();
final ZLByteBuffer attributeValue = new ZLByteBuffer();
final ZLByteBuffer entityName = new ZLByteBuffer();
final HashMap<ZLByteBuffer,ZLByteBuffer> strings = new HashMap<ZLByteBuffer,ZLByteBuffer>();
final ZLHtmlAttributeMap attributes = new ZLHtmlAttributeMap();
boolean scriptOpened = false;
//boolean html = false;
int bufferOffset = 0;
int offset = 0;
byte state = START_DOCUMENT;
while (true) {
final int count = stream.read(buffer);
if (count <= 0) {
return;
}
if (count < buffer.length) {
buffer = ZLArrayUtils.createCopy(buffer, count, count);
}
int startPosition = 0;
try {
for (int i = -1;;) {
mainSwitchLabel:
switch (state) {
case START_DOCUMENT:
while (buffer[++i] != '<') {}
state = LANGLE;
break;
case LANGLE:
offset = bufferOffset + i;
switch (buffer[++i]) {
case '/':
state = END_TAG;
startPosition = i + 1;
break;
case '!':
switch (buffer[++i]) {
case '-':
state = COMMENT_MINUS;
i--;
break;
default:
state = COMMENT;
break;
}
case '?':
state = COMMENT;
break;
default:
state = START_TAG;
startPosition = i;
break;
}
break;
case SCRIPT:
while (true) {
if (buffer[++i] == '<') {
if (buffer[++i] == '/') {
state = END_TAG;
startPosition = i + 1;
break mainSwitchLabel;
}
}
}
case COMMENT_MINUS:
{
int minusCounter = 0;
while (minusCounter != 2) {
switch (buffer[++i]) {
case '-':
minusCounter++;
break;
default:
minusCounter = 0;
break;
}
}
switch (buffer[++i]) {
case '>':
state = TEXT;
startPosition = i + 1;
break mainSwitchLabel;
}
}
case COMMENT:
while (true) {
switch (buffer[++i]) {
case '>':
state = TEXT;
startPosition = i + 1;
break mainSwitchLabel;
}
}
case START_TAG:
while (true) {
switch (buffer[++i]) {
case 0x0008:
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case ' ':
state = WS_AFTER_START_TAG_NAME;
tagName.append(buffer, startPosition, i - startPosition);
break mainSwitchLabel;
case '>':
state = TEXT;
tagName.append(buffer, startPosition, i - startPosition);
{
final ZLByteBuffer stringTagName = unique(strings, tagName);
processStartTag(htmlReader, stringTagName, offset, attributes);
if (stringTagName.equalsToLCString("script")) {
scriptOpened = true;
state = SCRIPT;
break mainSwitchLabel;
}
/*if (stringTagName.equalsToLCString("html")) {
html = true;
}*/
}
startPosition = i + 1;
break mainSwitchLabel;
case '/':
state = SLASH;
tagName.append(buffer, startPosition, i - startPosition);
//processFullTag(htmlReader, unique(strings, tagName), attributes);
break mainSwitchLabel;
}
}
case END_TAG:
while (true) {
switch (buffer[++i]) {
case 0x0008:
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case ' ':
state = WS_AFTER_END_TAG_NAME;
tagName.append(buffer, startPosition, i - startPosition);
break mainSwitchLabel;
case '>':
tagName.append(buffer, startPosition, i - startPosition);
{
final ZLByteBuffer stringTagName = unique(strings, tagName);
processEndTag(htmlReader, stringTagName);
if (scriptOpened) {
}
if (stringTagName.equalsToLCString("script")) {
scriptOpened = false;
}
}
if (scriptOpened) {
state = SCRIPT;
} else {
state = TEXT;
startPosition = i + 1;
}
break mainSwitchLabel;
}
}
case WS_AFTER_START_TAG_NAME:
switch (buffer[++i]) {
case '>':
{
final ZLByteBuffer stringTagName = unique(strings, tagName);
processStartTag(htmlReader, stringTagName, offset, attributes);
if (stringTagName.equalsToLCString("script")) {
scriptOpened = true;
state = SCRIPT;
break mainSwitchLabel;
}
state = TEXT;
startPosition = i + 1;
break;
}
case '/':
state = SLASH;
break;
case 0x0008:
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case ' ':
break;
default:
state = ATTRIBUTE_NAME;
startPosition = i;
break;
}
break;
case WS_AFTER_END_TAG_NAME:
switch (buffer[++i]) {
case '>':
{
ZLByteBuffer stringTagName = unique(strings, tagName);
processEndTag(htmlReader, stringTagName);
if (stringTagName.equalsToLCString("script")) {
scriptOpened = false;
}
if (scriptOpened) {
state = SCRIPT;
} else {
state = TEXT;
startPosition = i + 1;
}
break;
}
}
break;
case ATTRIBUTE_NAME:
while (true) {
switch (buffer[++i]) {
case '=':
attributeName.append(buffer, startPosition, i - startPosition);
state = WAIT_ATTRIBUTE_VALUE;
break mainSwitchLabel;
case 0x0008:
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000C:
case 0x000D:
case ' ':
attributeName.append(buffer, startPosition, i - startPosition);
state = WAIT_EQUALS;
break mainSwitchLabel;
}
}
case WAIT_EQUALS:
while (true) {
switch (buffer[++i]) {
case '=':
state = WAIT_ATTRIBUTE_VALUE;
break mainSwitchLabel;
}
}
case WAIT_ATTRIBUTE_VALUE:
while (true) {
switch (buffer[++i]) {
case ' ':
break;
case '\t':
break;
case '\n':
break;
case '\'':
state = S_ATTRIBUTE_VALUE;
startPosition = i + 1;
break mainSwitchLabel;
case '"':
state = D_ATTRIBUTE_VALUE;
startPosition = i + 1;
break mainSwitchLabel;
default:
state = DEFAULT_ATTRIBUTE_VALUE;
startPosition = i;
break mainSwitchLabel;
}
}
case DEFAULT_ATTRIBUTE_VALUE:
while (true) {
i++;
if ((buffer[i] == ' ') || (buffer[i] == '\'')
|| (buffer[i] == '"') || (buffer[i] == '>')) {
attributeValue.append(buffer, startPosition, i - startPosition);
attributes.put(unique(strings, attributeName), new ZLByteBuffer(attributeValue));
attributeValue.clear();
}
switch (buffer[i]) {
case ' ':
case '\'':
case '"':
state = WS_AFTER_START_TAG_NAME;
break mainSwitchLabel;
case '/':
state = SLASH;
break mainSwitchLabel;
case '>':
ZLByteBuffer stringTagName = unique(strings, tagName);
processStartTag(htmlReader, stringTagName, offset, attributes);
if (stringTagName.equalsToLCString("script")) {
scriptOpened = true;
state = SCRIPT;
break mainSwitchLabel;
}
state = TEXT;
startPosition = i + 1;
break mainSwitchLabel;
}
}
case D_ATTRIBUTE_VALUE:
while (true) {
switch (buffer[++i]) {
case '"':
attributeValue.append(buffer, startPosition, i - startPosition);
state = WS_AFTER_START_TAG_NAME;
attributes.put(unique(strings, attributeName), new ZLByteBuffer(attributeValue));
attributeValue.clear();
break mainSwitchLabel;
}
}
case S_ATTRIBUTE_VALUE:
while (true) {
switch (buffer[++i]) {
case '\'':
attributeValue.append(buffer, startPosition, i - startPosition);
state = WS_AFTER_START_TAG_NAME;
attributes.put(unique(strings, attributeName), new ZLByteBuffer(attributeValue));
attributeValue.clear();
break mainSwitchLabel;
}
}
case SLASH:
while (true) {
switch (buffer[++i]) {
case ' ':
break;
case '>':
processFullTag(htmlReader, unique(strings, tagName), offset, attributes);
state = TEXT;
startPosition = i + 1;
break mainSwitchLabel;
default:
state = DEFAULT_ATTRIBUTE_VALUE;
break mainSwitchLabel;
}
}
case TEXT:
while (true) {
switch (buffer[++i]) {
case '<':
if (i > startPosition) {
htmlReader.byteDataHandler(buffer, startPosition, i - startPosition);
}
state = LANGLE;
break mainSwitchLabel;
case '&':
if (i > startPosition) {
htmlReader.byteDataHandler(buffer, startPosition, i - startPosition);
}
state = ENTITY_REF;
startPosition = i + 1;
break mainSwitchLabel;
}
}
case ENTITY_REF:
while (true) {
byte sym = buffer[++i];
if (sym == ';') {
entityName.append(buffer, startPosition, i - startPosition);
state = TEXT;
startPosition = i + 1;
htmlReader.entityDataHandler(unique(strings, entityName).toString());
entityName.clear();
break mainSwitchLabel;
} else if ((sym != '#') && !Character.isLetterOrDigit(sym)) {
entityName.append(buffer, startPosition, i - startPosition);
state = TEXT;
startPosition = i;
htmlReader.byteDataHandler(new byte[] { '&' }, 0, 1);
htmlReader.byteDataHandler(entityName.myData, 0, entityName.myLength);
entityName.clear();
break mainSwitchLabel;
}
}
}
}
} catch (ArrayIndexOutOfBoundsException e) {
switch (state) {
case START_TAG:
case END_TAG:
tagName.append(buffer, startPosition, count - startPosition);
break;
case ATTRIBUTE_NAME:
attributeName.append(buffer, startPosition, count - startPosition);
break;
case S_ATTRIBUTE_VALUE:
case D_ATTRIBUTE_VALUE:
attributeValue.append(buffer, startPosition, count - startPosition);
break;
case TEXT:
htmlReader.byteDataHandler(buffer, startPosition, count - startPosition);
break;
case ENTITY_REF:
entityName.append(buffer, startPosition, count - startPosition);
break;
}
}
bufferOffset += count;
}
}
private static void processFullTag(ZLHtmlReader htmlReader, ZLByteBuffer tagName, int offset, ZLHtmlAttributeMap attributes) {
String stringTagName = tagName.toString();
htmlReader.startElementHandler(stringTagName, offset, attributes);
htmlReader.endElementHandler(stringTagName);
attributes.clear();
}
private static void processStartTag(ZLHtmlReader htmlReader, ZLByteBuffer tagName, int offset, ZLHtmlAttributeMap attributes) {
htmlReader.startElementHandler(tagName.toString(), offset, attributes);
attributes.clear();
}
private static void processEndTag(ZLHtmlReader htmlReader, ZLByteBuffer tagName) {
htmlReader.endElementHandler(tagName.toString());
}
}

View file

@ -58,139 +58,4 @@ public abstract class ZLLanguageUtil {
public static ZLFile patternsFile() { public static ZLFile patternsFile() {
return ZLResourceFile.createResourceFile("languagePatterns"); return ZLResourceFile.createResourceFile("languagePatterns");
} }
public static String languageByIntCode(int languageCode, int subLanguageCode) {
switch (languageCode) {
default: return null;
case 0x01: return "ar"; // Arabic
case 0x02: return "bg"; // Bulgarian
case 0x03: return "ca"; // Catalan
case 0x04: return "zh"; // Chinese
case 0x05: return "cs"; // Czech
case 0x06: return "da"; // Danish
case 0x07: return "de"; // German
case 0x08: return "el"; // Greek
case 0x09: return "en"; // English
case 0x0A: return "es"; // Spanish
case 0x0B: return "fi"; // Finnish
case 0x0C: return "fr"; // French
case 0x0D: return "he"; // Hebrew
case 0x0E: return "hu"; // Hungarian
case 0x0F: return "is"; // Icelandic
case 0x10: return "it"; // Italian
case 0x11: return "ja"; // Japanese
case 0x12: return "ko"; // Korean
case 0x13: return "nl"; // Dutch
case 0x14: return "nb"; // Norwegian
case 0x15: return "pl"; // Polish
case 0x16: return "pt"; // Portuguese
case 0x17: return "rm"; // Romansh
case 0x18: return "ro"; // Romanian
case 0x19: return "ru"; // Russian
case 0x1A:
switch (subLanguageCode) {
default: return "sr"; // Serbian
case 0x04:
case 0x10: return "hr"; // Croatian
case 0x14:
case 0x20:
case 0x78: return "bs"; // Bosnian
}
case 0x1B: return "sk"; // Slovak
case 0x1C: return "sq"; // Albanian
case 0x1D: return "sv"; // Swedish
case 0x1E: return "th"; // Thai
case 0x1F: return "tr"; // Turkish
case 0x20: return "ur"; // Urdu
case 0x21: return "id"; // Indonesian
case 0x22: return "uk"; // Ukrainian
case 0x23: return "be"; // Belarusian
case 0x24: return "sl"; // Slovenian
case 0x25: return "et"; // Estonian
case 0x26: return "lv"; // Latvian
case 0x27: return "lt"; // Lithuanian
case 0x28: return "tg"; // Tajik
case 0x29: return "fa"; // Persian (Farsi)
case 0x2A: return "vi"; // Vietnamese
case 0x2B: return "hy"; // Armenian
case 0x2C: return "az"; // Azeri
case 0x2D: return "eu"; // Basque
case 0x2E: return (subLanguageCode == 0x08)
? "dsb" // Lower Sorbian
: "wen"; // Upper Sorbian
case 0x2F: return "mk"; // Makedonian
case 0x32: return "tn"; // Setswana/Tswana
case 0x34: return "xh"; // Xhosa/isiXhosa
case 0x35: return "zu"; // Zulu/isiZulu
case 0x36: return "af"; // Afrikaans
case 0x37: return "ka"; // Georgian
case 0x38: return "fo"; // Faeroese
case 0x39: return "hi"; // Hindi
case 0x3A: return "mt"; // Maltese
case 0x3B: return "se"; // Sami
case 0x3C: return "ga"; // Irish
case 0x3E: return "ms"; // Malay
case 0x3F: return "kk"; // Kazak
case 0x40: return "ky"; // Kyrgyz
case 0x41: return "sw"; // Swahili
case 0x42: return "tk"; // Turkmen
case 0x43: return "uz"; // Uzbek
case 0x44: return "tt"; // Tatar
case 0x45: return "bn"; // Bengali
case 0x46: return "pa"; // Punjabi
case 0x47: return "gu"; // Gujaratu
case 0x48: return "or"; // Oriya
case 0x49: return "ta"; // Tamil
case 0x4A: return "te"; // Telugi
case 0x4B: return "kn"; // Kannada
case 0x4C: return "ml"; // Malayalam
case 0x4D: return "as"; // Assamese
case 0x4E: return "mr"; // Marathi
case 0x4F: return "sa"; // Sanskrit
case 0x50: return "mn"; // Mongolian
case 0x51: return "bo"; // Tibetian
case 0x52: return "cy"; // Welsh
case 0x53: return "kh"; // Khmer
case 0x54: return "lo"; // Lao
case 0x56: return "gl"; // Galician
case 0x57: return "kok"; // Konkani
case 0x58: return "mni"; // Manipuri
case 0x59: return "sd"; // Sindhi
case 0x5A: return "syr"; // Syriac
case 0x5B: return "si"; // Sinhala
case 0x5D: return "iu"; // Inuktitut
case 0x5E: return "am"; // Amharic
case 0x5F: return "tzm"; // Tamazight
case 0x60: return "ks"; // Kashmiri
case 0x61: return "ne"; // Nepali
case 0x62: return "fy"; // Frisian
case 0x63: return "ps"; // Pashto
case 0x64: return "fil"; // Filipino
case 0x65: return "dv"; // Divehi
case 0x68: return "ha"; // Hausa
case 0x6A: return "yo"; // Yoruba
case 0x6B: return "quz"; // Quechua
case 0x6C: return "ns"; // Northern Sotho
case 0x6D: return "ba"; // Bashkir
case 0x6E: return "lb"; // Luxemburgish
case 0x6F: return "kl"; // Greenlandic
case 0x70: return "ig"; // Igbo
case 0x73: return "ti"; // Tigrinya
case 0x78: return "yi"; // Yi
case 0x7A: return "arn"; // Mapudungun
case 0x7C: return "moh"; // Mohawk
case 0x7E: return "be"; // Breton
case 0x80: return "ug"; // Uighur
case 0x81: return "mi"; // Maori
case 0x82: return "oc"; // Occitan
case 0x83: return "co"; // Corsican
case 0x84: return "gsw"; // Alsatian
case 0x85: return "sah"; // Yakut
case 0x86: return "qut"; // K'iche
case 0x87: return "rw"; // Kinyarwanda
case 0x88: return "wo"; // Wolof
case 0x8C: return "prs"; // Dari
case 0x8D: return "mg"; // Malagasy
}
}
} }