1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-04 18:29:23 +02:00

epub code is included into native library

This commit is contained in:
Nikolay Pultsin 2012-03-29 18:03:23 +01:00
parent 0b598680c9
commit 0927986772
20 changed files with 2546 additions and 2 deletions

View file

@ -96,6 +96,14 @@ LOCAL_SRC_FILES := \
NativeFormats/fbreader/src/formats/fb2/FB2Plugin.cpp \ NativeFormats/fbreader/src/formats/fb2/FB2Plugin.cpp \
NativeFormats/fbreader/src/formats/fb2/FB2Reader.cpp \ NativeFormats/fbreader/src/formats/fb2/FB2Reader.cpp \
NativeFormats/fbreader/src/formats/fb2/FB2TagManager.cpp \ NativeFormats/fbreader/src/formats/fb2/FB2TagManager.cpp \
NativeFormats/fbreader/src/formats/css/StyleSheetParser.cpp \
NativeFormats/fbreader/src/formats/css/StyleSheetTable.cpp \
NativeFormats/fbreader/src/formats/oeb/NCXReader.cpp \
NativeFormats/fbreader/src/formats/oeb/OEBBookReader.cpp \
NativeFormats/fbreader/src/formats/oeb/OEBCoverReader.cpp \
NativeFormats/fbreader/src/formats/oeb/OEBMetaInfoReader.cpp \
NativeFormats/fbreader/src/formats/oeb/OEBPlugin.cpp \
NativeFormats/fbreader/src/formats/oeb/OEBTextStream.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfBookReader.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfDescriptionReader.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfDescriptionReader.cpp \
NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp \ NativeFormats/fbreader/src/formats/rtf/RtfPlugin.cpp \
@ -105,6 +113,7 @@ LOCAL_SRC_FILES := \
NativeFormats/fbreader/src/formats/txt/TxtBookReader.cpp \ NativeFormats/fbreader/src/formats/txt/TxtBookReader.cpp \
NativeFormats/fbreader/src/formats/txt/TxtPlugin.cpp \ NativeFormats/fbreader/src/formats/txt/TxtPlugin.cpp \
NativeFormats/fbreader/src/formats/txt/TxtReader.cpp \ NativeFormats/fbreader/src/formats/txt/TxtReader.cpp \
NativeFormats/fbreader/src/formats/xhtml/XHTMLReader.cpp \
NativeFormats/fbreader/src/library/Author.cpp \ NativeFormats/fbreader/src/library/Author.cpp \
NativeFormats/fbreader/src/library/Book.cpp \ NativeFormats/fbreader/src/library/Book.cpp \
NativeFormats/fbreader/src/library/Comparators.cpp \ NativeFormats/fbreader/src/library/Comparators.cpp \

View file

@ -32,7 +32,7 @@
#include "txt/TxtPlugin.h" #include "txt/TxtPlugin.h"
//#include "pdb/PdbPlugin.h" //#include "pdb/PdbPlugin.h"
//#include "tcr/TcrPlugin.h" //#include "tcr/TcrPlugin.h"
//#include "oeb/OEBPlugin.h" #include "oeb/OEBPlugin.h"
//#include "chm/CHMPlugin.h" //#include "chm/CHMPlugin.h"
#include "rtf/RtfPlugin.h" #include "rtf/RtfPlugin.h"
//#include "openreader/OpenReaderPlugin.h" //#include "openreader/OpenReaderPlugin.h"
@ -54,7 +54,7 @@ PluginCollection &PluginCollection::Instance() {
// ourInstance->myPlugins.push_back(new ZTXTPlugin()); // ourInstance->myPlugins.push_back(new ZTXTPlugin());
// ourInstance->myPlugins.push_back(new TcrPlugin()); // ourInstance->myPlugins.push_back(new TcrPlugin());
// ourInstance->myPlugins.push_back(new CHMPlugin()); // ourInstance->myPlugins.push_back(new CHMPlugin());
//ourInstance->myPlugins.push_back(new OEBPlugin()); ourInstance->myPlugins.push_back(new OEBPlugin());
ourInstance->myPlugins.push_back(new RtfPlugin()); ourInstance->myPlugins.push_back(new RtfPlugin());
// ourInstance->myPlugins.push_back(new OpenReaderPlugin()); // ourInstance->myPlugins.push_back(new OpenReaderPlugin());
// //ourInstance->myPlugins.push_back(new PdfPlugin()); // //ourInstance->myPlugins.push_back(new PdfPlugin());

View file

@ -0,0 +1,195 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cctype>
#include <cstring>
#include <ZLStringUtil.h>
#include <ZLInputStream.h>
#include "StyleSheetParser.h"
StyleSheetTableParser::StyleSheetTableParser(StyleSheetTable &table) : myTable(table) {
}
void StyleSheetTableParser::storeData(const std::string &tagName, const std::string &className, const StyleSheetTable::AttributeMap &map) {
myTable.addMap(tagName, className, map);
}
shared_ptr<ZLTextStyleEntry> StyleSheetSingleStyleParser::parseString(const char *text) {
myReadState = ATTRIBUTE_NAME;
parse(text, strlen(text), true);
shared_ptr<ZLTextStyleEntry> control = StyleSheetTable::createControl(myMap);
reset();
return control;
}
StyleSheetParser::StyleSheetParser() : myReadState(TAG_NAME), myInsideComment(false) {
}
StyleSheetParser::~StyleSheetParser() {
}
void StyleSheetParser::reset() {
myWord.erase();
myAttributeName.erase();
myReadState = TAG_NAME;
myInsideComment = false;
myTagName.erase();
myClassName.erase();
myMap.clear();
}
void StyleSheetParser::parse(ZLInputStream &stream) {
if (stream.open()) {
char *buffer = new char[1024];
while (true) {
int len = stream.read(buffer, 1024);
if (len == 0) {
break;
}
parse(buffer, len);
}
delete[] buffer;
stream.close();
}
}
void StyleSheetParser::parse(const char *text, int len, bool final) {
const char *start = text;
const char *end = text + len;
for (const char *ptr = start; ptr != end; ++ptr) {
if (isspace(*ptr)) {
if (start != ptr) {
myWord.append(start, ptr - start);
}
processWord(myWord);
myWord.erase();
start = ptr + 1;
} else if (isControlSymbol(*ptr)) {
if (start != ptr) {
myWord.append(start, ptr - start);
}
processWord(myWord);
myWord.erase();
processControl(*ptr);
start = ptr + 1;
}
}
if (start < end) {
myWord.append(start, end - start);
if (final) {
processWord(myWord);
myWord.erase();
}
}
}
bool StyleSheetParser::isControlSymbol(const char symbol) {
switch (symbol) {
case '{':
case '}':
case ';':
case ':':
return true;
default:
return false;
}
}
void StyleSheetParser::storeData(const std::string&, const std::string&, const StyleSheetTable::AttributeMap&) {
}
void StyleSheetParser::processControl(const char control) {
switch (control) {
case '{':
myReadState = (myReadState == TAG_NAME) ? ATTRIBUTE_NAME : BROKEN;
break;
case '}':
if (myReadState != BROKEN) {
storeData(myTagName, myClassName, myMap);
}
myReadState = TAG_NAME;
myTagName.erase();
myClassName.erase();
myMap.clear();
break;
case ';':
myReadState =
((myReadState == ATTRIBUTE_VALUE) ||
(myReadState == ATTRIBUTE_NAME)) ? ATTRIBUTE_NAME : BROKEN;
break;
case ':':
myReadState = (myReadState == ATTRIBUTE_NAME) ? ATTRIBUTE_VALUE : BROKEN;
break;
}
}
void StyleSheetParser::processWord(std::string &word) {
while (!word.empty()) {
int index = word.find(myInsideComment ? "*/" : "/*");
if (!myInsideComment) {
if (index == -1) {
processWordWithoutComments(word);
} else if (index > 0) {
processWordWithoutComments(word.substr(0, index));
}
}
if (index == -1) {
break;
}
myInsideComment = !myInsideComment;
word.erase(0, index + 2);
}
}
void StyleSheetParser::processWordWithoutComments(const std::string &word) {
switch (myReadState) {
case TAG_NAME:
{
int index = word.find('.');
if (index == -1) {
if (myTagName.empty()) {
myTagName = word;
} else {
myTagName += ' ' + word;
}
} else {
if (myTagName.empty()) {
myTagName = word.substr(0, index);
myClassName = word.substr(index + 1);
} else {
myTagName += ' ' + word.substr(0, index);
myClassName += ' ' + word.substr(index + 1);
}
}
myMap.clear();
break;
}
case ATTRIBUTE_NAME:
myAttributeName = word;
myMap[myAttributeName].clear();
break;
case ATTRIBUTE_VALUE:
myMap[myAttributeName].push_back(word);
break;
case BROKEN:
break;
}
}

View file

@ -0,0 +1,82 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __STYLESHEETPARSER_H__
#define __STYLESHEETPARSER_H__
#include "StyleSheetTable.h"
class ZLInputStream;
class StyleSheetParser {
protected:
StyleSheetParser();
public:
virtual ~StyleSheetParser();
void reset();
void parse(ZLInputStream &stream);
void parse(const char *text, int len, bool final = false);
protected:
virtual void storeData(const std::string &tagName, const std::string &className, const StyleSheetTable::AttributeMap &map);
private:
bool isControlSymbol(const char symbol);
void processWord(std::string &word);
void processWordWithoutComments(const std::string &word);
void processControl(const char control);
private:
std::string myWord;
std::string myAttributeName;
enum {
TAG_NAME,
ATTRIBUTE_NAME,
ATTRIBUTE_VALUE,
BROKEN,
} myReadState;
bool myInsideComment;
std::string myTagName;
std::string myClassName;
StyleSheetTable::AttributeMap myMap;
friend class StyleSheetSingleStyleParser;
};
class StyleSheetTableParser : public StyleSheetParser {
public:
StyleSheetTableParser(StyleSheetTable &table);
private:
void storeData(const std::string &tagName, const std::string &className, const StyleSheetTable::AttributeMap &map);
private:
StyleSheetTable &myTable;
};
class StyleSheetSingleStyleParser : public StyleSheetParser {
public:
shared_ptr<ZLTextStyleEntry> parseString(const char *text);
};
#endif /* __STYLESHEETPARSER_H__ */

View file

@ -0,0 +1,220 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstdlib>
#include <ZLStringUtil.h>
#include "StyleSheetTable.h"
bool StyleSheetTable::isEmpty() const {
return myControlMap.empty() && myPageBreakBeforeMap.empty() && myPageBreakAfterMap.empty();
}
void StyleSheetTable::addMap(const std::string &tag, const std::string &aClass, const AttributeMap &map) {
if ((!tag.empty() || !aClass.empty()) && !map.empty()) {
Key key(tag, aClass);
myControlMap[key] = createControl(map);
const std::vector<std::string> &pbb = values(map, "page-break-before");
if (!pbb.empty()) {
if ((pbb[0] == "always") ||
(pbb[0] == "left") ||
(pbb[0] == "right")) {
myPageBreakBeforeMap[key] = true;
} else if (pbb[0] == "avoid") {
myPageBreakBeforeMap[key] = false;
}
}
const std::vector<std::string> &pba = values(map, "page-break-after");
if (!pba.empty()) {
if ((pba[0] == "always") ||
(pba[0] == "left") ||
(pba[0] == "right")) {
myPageBreakAfterMap[key] = true;
} else if (pba[0] == "avoid") {
myPageBreakAfterMap[key] = false;
}
}
}
}
static void parseLength(const std::string &toParse, short &size, ZLTextStyleEntry::SizeUnit &unit) {
if (ZLStringUtil::stringEndsWith(toParse, "%")) {
unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
size = atoi(toParse.c_str());
} else if (ZLStringUtil::stringEndsWith(toParse, "em")) {
unit = ZLTextStyleEntry::SIZE_UNIT_EM_100;
size = (short)(100 * ZLStringUtil::stringToDouble(toParse, 0));
} else if (ZLStringUtil::stringEndsWith(toParse, "ex")) {
unit = ZLTextStyleEntry::SIZE_UNIT_EX_100;
size = (short)(100 * ZLStringUtil::stringToDouble(toParse, 0));
} else {
unit = ZLTextStyleEntry::SIZE_UNIT_PIXEL;
size = atoi(toParse.c_str());
}
}
void StyleSheetTable::setLength(ZLTextStyleEntry &entry, ZLTextStyleEntry::Length name, const AttributeMap &map, const std::string &attributeName) {
StyleSheetTable::AttributeMap::const_iterator it = map.find(attributeName);
if (it == map.end()) {
return;
}
const std::vector<std::string> &values = it->second;
if (!values.empty() && !values[0].empty()) {
short size;
ZLTextStyleEntry::SizeUnit unit;
parseLength(values[0], size, unit);
entry.setLength(name, size, unit);
}
}
bool StyleSheetTable::doBreakBefore(const std::string &tag, const std::string &aClass) const {
std::map<Key,bool>::const_iterator it = myPageBreakBeforeMap.find(Key(tag, aClass));
if (it != myPageBreakBeforeMap.end()) {
return it->second;
}
it = myPageBreakBeforeMap.find(Key("", aClass));
if (it != myPageBreakBeforeMap.end()) {
return it->second;
}
it = myPageBreakBeforeMap.find(Key(tag, ""));
if (it != myPageBreakBeforeMap.end()) {
return it->second;
}
return false;
}
bool StyleSheetTable::doBreakAfter(const std::string &tag, const std::string &aClass) const {
std::map<Key,bool>::const_iterator it = myPageBreakAfterMap.find(Key(tag, aClass));
if (it != myPageBreakAfterMap.end()) {
return it->second;
}
it = myPageBreakAfterMap.find(Key("", aClass));
if (it != myPageBreakAfterMap.end()) {
return it->second;
}
it = myPageBreakAfterMap.find(Key(tag, ""));
if (it != myPageBreakAfterMap.end()) {
return it->second;
}
return false;
}
shared_ptr<ZLTextStyleEntry> StyleSheetTable::control(const std::string &tag, const std::string &aClass) const {
std::map<Key,shared_ptr<ZLTextStyleEntry> >::const_iterator it =
myControlMap.find(Key(tag, aClass));
return (it != myControlMap.end()) ? it->second : 0;
}
const std::vector<std::string> &StyleSheetTable::values(const AttributeMap &map, const std::string &name) {
const AttributeMap::const_iterator it = map.find(name);
if (it != map.end()) {
return it->second;
}
static const std::vector<std::string> emptyVector;
return emptyVector;
}
shared_ptr<ZLTextStyleEntry> StyleSheetTable::createControl(const AttributeMap &styles) {
shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry();
const std::vector<std::string> &alignment = values(styles, "text-align");
if (!alignment.empty()) {
if (alignment[0] == "justify") {
entry->setAlignmentType(ALIGN_JUSTIFY);
} else if (alignment[0] == "left") {
entry->setAlignmentType(ALIGN_LEFT);
} else if (alignment[0] == "right") {
entry->setAlignmentType(ALIGN_RIGHT);
} else if (alignment[0] == "center") {
entry->setAlignmentType(ALIGN_CENTER);
}
}
const std::vector<std::string> &bold = values(styles, "font-weight");
if (!bold.empty()) {
int num = -1;
if (bold[0] == "bold") {
num = 700;
} else if (bold[0] == "normal") {
num = 400;
} else if ((bold[0].length() == 3) &&
(bold[0][1] == '0') &&
(bold[0][2] == '0') &&
(bold[0][0] >= '1') &&
(bold[0][0] <= '9')) {
num = 100 * (bold[0][0] - '0');
} else if (bold[0] == "bolder") {
} else if (bold[0] == "lighter") {
}
if (num != -1) {
entry->setFontModifier(FONT_MODIFIER_BOLD, num >= 600);
}
}
const std::vector<std::string> &italic = values(styles, "font-style");
if (!italic.empty()) {
entry->setFontModifier(FONT_MODIFIER_ITALIC, italic[0] == "italic");
}
const std::vector<std::string> &variant = values(styles, "font-variant");
if (!variant.empty()) {
entry->setFontModifier(FONT_MODIFIER_SMALLCAPS, variant[0] == "small-caps");
}
const std::vector<std::string> &fontFamily = values(styles, "font-family");
if (!fontFamily.empty() && !fontFamily[0].empty()) {
entry->setFontFamily(fontFamily[0]);
}
const std::vector<std::string> &fontSize = values(styles, "font-size");
if (!fontSize.empty()) {
if (fontSize[0] == "xx-small") {
entry->setFontSizeMag(-3);
} else if (fontSize[0] == "x-small") {
entry->setFontSizeMag(-2);
} else if (fontSize[0] == "small") {
entry->setFontSizeMag(-1);
} else if (fontSize[0] == "medium") {
entry->setFontSizeMag(0);
} else if (fontSize[0] == "large") {
entry->setFontSizeMag(1);
} else if (fontSize[0] == "x-large") {
entry->setFontSizeMag(2);
} else if (fontSize[0] == "xx-large") {
entry->setFontSizeMag(3);
}
}
setLength(*entry, ZLTextStyleEntry::LENGTH_LEFT_INDENT, styles, "margin-left");
setLength(*entry, ZLTextStyleEntry::LENGTH_RIGHT_INDENT, styles, "margin-right");
setLength(*entry, ZLTextStyleEntry::LENGTH_FIRST_LINE_INDENT_DELTA, styles, "text-indent");
setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_BEFORE, styles, "margin-top");
setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_BEFORE, styles, "padding-top");
setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_AFTER, styles, "margin-bottom");
setLength(*entry, ZLTextStyleEntry::LENGTH_SPACE_AFTER, styles, "padding-bottom");
return entry;
}

View file

@ -0,0 +1,73 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __STYLESHEETTABLE_H__
#define __STYLESHEETTABLE_H__
#include <string>
#include <map>
#include <vector>
#include <shared_ptr.h>
#include <ZLTextParagraph.h>
class StyleSheetTable {
public:
typedef std::map<std::string,std::vector<std::string> > AttributeMap;
static shared_ptr<ZLTextStyleEntry> createControl(const AttributeMap &map);
private:
void addMap(const std::string &tag, const std::string &aClass, const AttributeMap &map);
static void setLength(ZLTextStyleEntry &entry, ZLTextStyleEntry::Length name, const AttributeMap &map, const std::string &attributeName);
static const std::vector<std::string> &values(const AttributeMap &map, const std::string &name);
public:
bool isEmpty() const;
bool doBreakBefore(const std::string &tag, const std::string &aClass) const;
bool doBreakAfter(const std::string &tag, const std::string &aClass) const;
shared_ptr<ZLTextStyleEntry> control(const std::string &tag, const std::string &aClass) const;
private:
struct Key {
Key(const std::string &tag, const std::string &aClass);
const std::string TagName;
const std::string ClassName;
bool operator < (const Key &key) const;
};
std::map<Key,shared_ptr<ZLTextStyleEntry> > myControlMap;
std::map<Key,bool> myPageBreakBeforeMap;
std::map<Key,bool> myPageBreakAfterMap;
friend class StyleSheetTableParser;
};
inline StyleSheetTable::Key::Key(const std::string &tag, const std::string &aClass) : TagName(tag), ClassName(aClass) {
}
inline bool StyleSheetTable::Key::operator < (const StyleSheetTable::Key &key) const {
return (TagName < key.TagName) || ((TagName == key.TagName) && (ClassName < key.ClassName));
}
#endif /* __STYLESHEETTABLE_H__ */

View file

@ -0,0 +1,131 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstdlib>
#include "NCXReader.h"
#include "../util/MiscUtil.h"
#include "../util/EntityFilesCollector.h"
NCXReader::NCXReader(BookReader &modelReader) : myModelReader(modelReader), myReadState(READ_NONE), myPlayIndex(-65535) {
}
static const std::string TAG_NAVMAP = "navMap";
static const std::string TAG_NAVPOINT = "navPoint";
static const std::string TAG_NAVLABEL = "navLabel";
static const std::string TAG_CONTENT = "content";
static const std::string TAG_TEXT = "text";
void NCXReader::startElementHandler(const char *fullTag, const char **attributes) {
std::string tag = fullTag;
const size_t index = tag.rfind(':');
if (index != std::string::npos) {
tag = tag.substr(index + 1);
}
switch (myReadState) {
case READ_NONE:
if (TAG_NAVMAP == tag) {
myReadState = READ_MAP;
}
break;
case READ_MAP:
if (TAG_NAVPOINT == tag) {
const char *order = attributeValue(attributes, "playOrder");
myPointStack.push_back(NavPoint((order != 0) ? atoi(order) : myPlayIndex++, myPointStack.size()));
myReadState = READ_POINT;
}
break;
case READ_POINT:
if (TAG_NAVPOINT == tag) {
const char *order = attributeValue(attributes, "playOrder");
myPointStack.push_back(NavPoint((order != 0) ? atoi(order) : myPlayIndex++, myPointStack.size()));
} else if (TAG_NAVLABEL == tag) {
myReadState = READ_LABEL;
} else if (TAG_CONTENT == tag) {
const char *src = attributeValue(attributes, "src");
if (src != 0) {
myPointStack.back().ContentHRef = MiscUtil::decodeHtmlURL(src);
}
}
break;
case READ_LABEL:
if (TAG_TEXT == tag) {
myReadState = READ_TEXT;
}
break;
case READ_TEXT:
break;
}
}
void NCXReader::endElementHandler(const char *fullTag) {
std::string tag = fullTag;
const size_t index = tag.rfind(':');
if (index != std::string::npos) {
tag = tag.substr(index + 1);
}
switch (myReadState) {
case READ_NONE:
break;
case READ_MAP:
if (TAG_NAVMAP == tag) {
myReadState = READ_NONE;
}
break;
case READ_POINT:
if (TAG_NAVPOINT == tag) {
if (myPointStack.back().Text.empty()) {
myPointStack.back().Text = "...";
}
myNavigationMap[myPointStack.back().Order] = myPointStack.back();
myPointStack.pop_back();
myReadState = myPointStack.empty() ? READ_MAP : READ_POINT;
}
case READ_LABEL:
if (TAG_NAVLABEL == tag) {
myReadState = READ_POINT;
}
break;
case READ_TEXT:
if (TAG_TEXT == tag) {
myReadState = READ_LABEL;
}
break;
}
}
void NCXReader::characterDataHandler(const char *text, size_t len) {
if (myReadState == READ_TEXT) {
myPointStack.back().Text.append(text, len);
}
}
const std::vector<std::string> &NCXReader::externalDTDs() const {
return EntityFilesCollector::Instance().externalDTDs("xhtml");
}
const std::map<int,NCXReader::NavPoint> &NCXReader::navigationMap() const {
return myNavigationMap;
}
NCXReader::NavPoint::NavPoint() {
}
NCXReader::NavPoint::NavPoint(int order, size_t level) : Order(order), Level(level) {
}

View file

@ -0,0 +1,69 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __NCXREADER_H__
#define __NCXREADER_H__
#include <map>
#include <vector>
#include <ZLXMLReader.h>
#include "../../bookmodel/BookReader.h"
class NCXReader : public ZLXMLReader {
public:
struct NavPoint {
NavPoint();
NavPoint(int order, size_t level);
int Order;
size_t Level;
std::string Text;
std::string ContentHRef;
};
public:
NCXReader(BookReader &modelReader);
const std::map<int,NavPoint> &navigationMap() const;
private:
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
void characterDataHandler(const char *text, size_t len);
const std::vector<std::string> &externalDTDs() const;
private:
BookReader &myModelReader;
std::map<int,NavPoint> myNavigationMap;
std::vector<NavPoint> myPointStack;
enum {
READ_NONE,
READ_MAP,
READ_POINT,
READ_LABEL,
READ_TEXT
} myReadState;
int myPlayIndex;
};
#endif /* __NCXREADER_H__ */

View file

@ -0,0 +1,211 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <algorithm>
#include <ZLStringUtil.h>
#include <ZLUnicodeUtil.h>
#include <ZLFile.h>
#include <ZLFileImage.h>
#include <ZLXMLNamespace.h>
#include "OEBBookReader.h"
#include "NCXReader.h"
#include "../xhtml/XHTMLReader.h"
#include "../util/MiscUtil.h"
#include "../util/EntityFilesCollector.h"
#include "../../bookmodel/BookModel.h"
OEBBookReader::OEBBookReader(BookModel &model) : myModelReader(model) {
}
static const std::string MANIFEST = "manifest";
static const std::string SPINE = "spine";
static const std::string GUIDE = "guide";
static const std::string TOUR = "tour";
static const std::string SITE = "site";
static const std::string ITEM = "item";
static const std::string ITEMREF = "itemref";
static const std::string REFERENCE = "reference";
static const std::string COVER_IMAGE = "other.ms-coverimage-standard";
void OEBBookReader::startElementHandler(const char *tag, const char **xmlattributes) {
std::string tagString = ZLUnicodeUtil::toLower(tag);
if (!myOPFSchemePrefix.empty() &&
ZLStringUtil::stringStartsWith(tagString, myOPFSchemePrefix)) {
tagString = tagString.substr(myOPFSchemePrefix.length());
}
if (MANIFEST == tagString) {
myState = READ_MANIFEST;
} else if (SPINE == tagString) {
const char *toc = attributeValue(xmlattributes, "toc");
if (toc != 0) {
myNCXTOCFileName = myIdToHref[toc];
}
myState = READ_SPINE;
} else if (GUIDE == tagString) {
myState = READ_GUIDE;
} else if (TOUR == tagString) {
myState = READ_TOUR;
} else if ((myState == READ_MANIFEST) && (ITEM == tagString)) {
const char *id = attributeValue(xmlattributes, "id");
const char *href = attributeValue(xmlattributes, "href");
if ((id != 0) && (href != 0)) {
myIdToHref[id] = MiscUtil::decodeHtmlURL(href);
}
} else if ((myState == READ_SPINE) && (ITEMREF == tagString)) {
const char *id = attributeValue(xmlattributes, "idref");
if (id != 0) {
const std::string &fileName = myIdToHref[id];
if (!fileName.empty()) {
myHtmlFileNames.push_back(fileName);
}
}
} else if ((myState == READ_GUIDE) && (REFERENCE == tagString)) {
const char *type = attributeValue(xmlattributes, "type");
const char *title = attributeValue(xmlattributes, "title");
const char *href = attributeValue(xmlattributes, "href");
if (href != 0) {
const std::string reference = MiscUtil::decodeHtmlURL(href);
if (title != 0) {
myGuideTOC.push_back(std::make_pair(std::string(title), reference));
}
if ((type != 0) && (COVER_IMAGE == type)) {
myModelReader.setMainTextModel();
ZLFile imageFile(myFilePrefix + reference);
const std::string imageName = imageFile.name(false);
myModelReader.addImageReference(imageName, 0, false);
myModelReader.addImage(imageName, new ZLFileImage(imageFile, "", 0));
}
}
} else if ((myState == READ_TOUR) && (SITE == tagString)) {
const char *title = attributeValue(xmlattributes, "title");
const char *href = attributeValue(xmlattributes, "href");
if ((title != 0) && (href != 0)) {
myTourTOC.push_back(std::make_pair(title, MiscUtil::decodeHtmlURL(href)));
}
}
}
void OEBBookReader::endElementHandler(const char *tag) {
std::string tagString = ZLUnicodeUtil::toLower(tag);
if (!myOPFSchemePrefix.empty() &&
ZLStringUtil::stringStartsWith(tagString, myOPFSchemePrefix)) {
tagString = tagString.substr(myOPFSchemePrefix.length());
}
if ((MANIFEST == tagString) || (SPINE == tagString) || (GUIDE == tagString) || (TOUR == tagString)) {
myState = READ_NONE;
}
}
bool OEBBookReader::readBook(const ZLFile &file) {
myFilePrefix = MiscUtil::htmlDirectoryPrefix(file.path());
myIdToHref.clear();
myHtmlFileNames.clear();
myNCXTOCFileName.erase();
myTourTOC.clear();
myGuideTOC.clear();
myState = READ_NONE;
if (!readDocument(file)) {
return false;
}
myModelReader.setMainTextModel();
myModelReader.pushKind(REGULAR);
for (std::vector<std::string>::const_iterator it = myHtmlFileNames.begin(); it != myHtmlFileNames.end(); ++it) {
if (it != myHtmlFileNames.begin()) {
myModelReader.insertEndOfSectionParagraph();
}
XHTMLReader xhtmlReader(myModelReader);
xhtmlReader.readFile(ZLFile(myFilePrefix + *it), *it);
}
generateTOC();
return true;
}
void OEBBookReader::generateTOC() {
if (!myNCXTOCFileName.empty()) {
NCXReader ncxReader(myModelReader);
if (ncxReader.readDocument(ZLFile(myFilePrefix + myNCXTOCFileName))) {
const std::map<int,NCXReader::NavPoint> navigationMap = ncxReader.navigationMap();
if (!navigationMap.empty()) {
size_t level = 0;
for (std::map<int,NCXReader::NavPoint>::const_iterator it = navigationMap.begin(); it != navigationMap.end(); ++it) {
const NCXReader::NavPoint &point = it->second;
int index = myModelReader.model().label(point.ContentHRef).ParagraphNumber;
while (level > point.Level) {
myModelReader.endContentsParagraph();
--level;
}
while (++level <= point.Level) {
myModelReader.beginContentsParagraph(-2);
myModelReader.addContentsData("...");
}
myModelReader.beginContentsParagraph(index);
myModelReader.addContentsData(point.Text);
}
while (level > 0) {
myModelReader.endContentsParagraph();
--level;
}
return;
}
}
}
std::vector<std::pair<std::string,std::string> > &toc = myTourTOC.empty() ? myGuideTOC : myTourTOC;
for (std::vector<std::pair<std::string,std::string> >::const_iterator it = toc.begin(); it != toc.end(); ++it) {
int index = myModelReader.model().label(it->second).ParagraphNumber;
if (index != -1) {
myModelReader.beginContentsParagraph(index);
myModelReader.addContentsData(it->first);
myModelReader.endContentsParagraph();
}
}
}
bool OEBBookReader::processNamespaces() const {
return true;
}
void OEBBookReader::namespaceListChangedHandler() {
const std::map<std::string,std::string> &namespaceMap = namespaces();
std::map<std::string,std::string>::const_iterator iter = namespaceMap.begin();
for (; iter != namespaceMap.end(); ++iter) {
if (iter->second == ZLXMLNamespace::OpenPackagingFormat) {
break;
}
}
if (iter != namespaceMap.end()) {
myOPFSchemePrefix = iter->first + ":";
} else {
myOPFSchemePrefix.erase();
}
}
const std::vector<std::string> &OEBBookReader::externalDTDs() const {
return EntityFilesCollector::Instance().externalDTDs("xhtml");
}

View file

@ -0,0 +1,68 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OEBBOOKREADER_H__
#define __OEBBOOKREADER_H__
#include <map>
#include <vector>
#include <string>
#include <ZLXMLReader.h>
#include "../../bookmodel/BookReader.h"
class OEBBookReader : public ZLXMLReader {
public:
OEBBookReader(BookModel &model);
bool readBook(const ZLFile &file);
private:
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
bool processNamespaces() const;
void namespaceListChangedHandler();
const std::vector<std::string> &externalDTDs() const;
void generateTOC();
private:
enum ReaderState {
READ_NONE,
READ_MANIFEST,
READ_SPINE,
READ_GUIDE,
READ_TOUR
};
BookReader myModelReader;
ReaderState myState;
std::string myOPFSchemePrefix;
std::string myFilePrefix;
std::map<std::string,std::string> myIdToHref;
std::vector<std::string> myHtmlFileNames;
std::string myNCXTOCFileName;
std::vector<std::pair<std::string,std::string> > myTourTOC;
std::vector<std::pair<std::string,std::string> > myGuideTOC;
};
#endif /* __OEBBOOKREADER_H__ */

View file

@ -0,0 +1,110 @@
/*
* Copyright (C) 2009-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include <ZLFileImage.h>
#include "OEBCoverReader.h"
#include "../util/MiscUtil.h"
class XHTMLImageFinder : public ZLXMLReader {
public:
XHTMLImageFinder(OEBCoverReader &coverReader);
private:
void startElementHandler(const char *tag, const char **attributes);
private:
OEBCoverReader &myCoverReader;
};
XHTMLImageFinder::XHTMLImageFinder(OEBCoverReader &coverReader) : myCoverReader(coverReader) {
}
static const std::string IMG = "img";
void XHTMLImageFinder::startElementHandler(const char *tag, const char **attributes) {
if (IMG == tag) {
const char *src = attributeValue(attributes, "src");
if (src != 0) {
myCoverReader.myImage =
new ZLFileImage(ZLFile(myCoverReader.myPathPrefix + src), "", 0);
interrupt();
}
}
}
OEBCoverReader::OEBCoverReader() {
}
shared_ptr<ZLImage> OEBCoverReader::readCover(const ZLFile &file) {
myPathPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
myReadGuide = false;
myImage = 0;
myCoverXHTML.erase();
readDocument(file);
myPathPrefix = MiscUtil::htmlDirectoryPrefix(myCoverXHTML);
if (!myCoverXHTML.empty()) {
ZLFile coverFile(myCoverXHTML);
const std::string ext = coverFile.extension();
if (ext == "gif" || ext == "jpeg" || ext == "jpg") {
myImage = new ZLFileImage(ZLFile(myCoverXHTML), "", 0);
} else {
XHTMLImageFinder(*this).readDocument(coverFile);
}
}
return myImage;
}
static const std::string GUIDE = "guide";
static const std::string REFERENCE = "reference";
static const std::string COVER = "cover";
static const std::string COVER_IMAGE = "other.ms-coverimage-standard";
void OEBCoverReader::startElementHandler(const char *tag, const char **attributes) {
if (GUIDE == tag) {
myReadGuide = true;
} else if (myReadGuide && REFERENCE == tag) {
const char *type = attributeValue(attributes, "type");
if (type != 0) {
if (COVER == type) {
const char *href = attributeValue(attributes, "href");
if (href != 0) {
myCoverXHTML = myPathPrefix + MiscUtil::decodeHtmlURL(href);
interrupt();
}
} else if (COVER_IMAGE == type) {
const char *href = attributeValue(attributes, "href");
if (href != 0) {
myImage = new ZLFileImage(ZLFile(myPathPrefix + MiscUtil::decodeHtmlURL(href)), "", 0);
interrupt();
}
}
}
}
}
void OEBCoverReader::endElementHandler(const char *tag) {
if (GUIDE == tag) {
myReadGuide = false;
interrupt();
}
}

View file

@ -0,0 +1,48 @@
/*
* Copyright (C) 2009-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OEBCOVERREADER_H__
#define __OEBCOVERREADER_H__
#include <vector>
#include <shared_ptr.h>
#include <ZLXMLReader.h>
class ZLImage;
class OEBCoverReader : public ZLXMLReader {
public:
OEBCoverReader();
shared_ptr<ZLImage> readCover(const ZLFile &file);
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
private:
shared_ptr<ZLImage> myImage;
std::string myPathPrefix;
std::string myCoverXHTML;
bool myReadGuide;
friend class XHTMLImageFinder;
};
#endif /* __OEBCOVERREADER_H__ */

View file

@ -0,0 +1,189 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstdlib>
#include <ZLStringUtil.h>
#include <ZLUnicodeUtil.h>
#include <ZLLogger.h>
#include <ZLXMLNamespace.h>
#include "OEBMetaInfoReader.h"
#include "../util/EntityFilesCollector.h"
#include "../../library/Book.h"
OEBMetaInfoReader::OEBMetaInfoReader(Book &book) : myBook(book) {
myBook.removeAllAuthors();
myBook.setTitle("");
myBook.removeAllTags();
}
static const std::string METADATA = "metadata";
static const std::string DC_METADATA = "dc-metadata";
static const std::string TITLE_SUFFIX = ":title";
static const std::string AUTHOR_SUFFIX = ":creator";
static const std::string SUBJECT_SUFFIX = ":subject";
static const std::string LANGUAGE_SUFFIX = ":language";
static const std::string SERIES = "series";
static const std::string SERIES_INDEX = "series_index";
static const std::string META = "meta";
static const std::string AUTHOR_ROLE = "aut";
void OEBMetaInfoReader::characterDataHandler(const char *text, size_t len) {
switch (myReadState) {
case READ_NONE:
break;
case READ_AUTHOR:
case READ_AUTHOR2:
case READ_SUBJECT:
case READ_LANGUAGE:
case READ_TITLE:
myBuffer.append(text, len);
break;
}
}
bool OEBMetaInfoReader::isDublinCoreNamespace(const std::string &nsId) const {
const std::map<std::string,std::string> &namespaceMap = namespaces();
std::map<std::string,std::string>::const_iterator iter = namespaces().find(nsId);
return
((iter != namespaceMap.end()) &&
(ZLStringUtil::stringStartsWith(iter->second, ZLXMLNamespace::DublinCorePrefix) ||
ZLStringUtil::stringStartsWith(iter->second, ZLXMLNamespace::DublinCoreLegacyPrefix)));
}
bool OEBMetaInfoReader::isNSName(const std::string &fullName, const std::string &shortName, const std::string &fullNSId) const {
const int prefixLength = fullName.length() - shortName.length() - 1;
if (prefixLength <= 0 ||
fullName[prefixLength] != ':' ||
!ZLStringUtil::stringEndsWith(fullName, shortName)) {
return false;
}
const std::map<std::string,std::string> &namespaceMap = namespaces();
std::map<std::string,std::string>::const_iterator iter =
namespaceMap.find(fullName.substr(0, prefixLength));
return iter != namespaceMap.end() && iter->second == fullNSId;
}
void OEBMetaInfoReader::startElementHandler(const char *tag, const char **attributes) {
const std::string tagString = ZLUnicodeUtil::toLower(tag);
if (METADATA == tagString || DC_METADATA == tagString ||
isNSName(tagString, METADATA, ZLXMLNamespace::OpenPackagingFormat)) {
myDCMetadataTag = tagString;
myReadMetaData = true;
} else if (myReadMetaData) {
if (ZLStringUtil::stringEndsWith(tagString, TITLE_SUFFIX)) {
if (isDublinCoreNamespace(tagString.substr(0, tagString.length() - TITLE_SUFFIX.length()))) {
myReadState = READ_TITLE;
}
} else if (ZLStringUtil::stringEndsWith(tagString, AUTHOR_SUFFIX)) {
if (isDublinCoreNamespace(tagString.substr(0, tagString.length() - AUTHOR_SUFFIX.length()))) {
const char *role = attributeValue(attributes, "role");
if (role == 0) {
myReadState = READ_AUTHOR2;
} else if (AUTHOR_ROLE == role) {
myReadState = READ_AUTHOR;
}
}
} else if (ZLStringUtil::stringEndsWith(tagString, SUBJECT_SUFFIX)) {
if (isDublinCoreNamespace(tagString.substr(0, tagString.length() - SUBJECT_SUFFIX.length()))) {
myReadState = READ_SUBJECT;
}
} else if (ZLStringUtil::stringEndsWith(tagString, LANGUAGE_SUFFIX)) {
if (isDublinCoreNamespace(tagString.substr(0, tagString.length() - LANGUAGE_SUFFIX.length()))) {
myReadState = READ_LANGUAGE;
}
} else if (tagString == META) {
const char *name = attributeValue(attributes, "name");
const char *content = attributeValue(attributes, "content");
if (name != 0 && content != 0) {
std::string sName = name;
if (isNSName(sName, SERIES, ZLXMLNamespace::CalibreMetadata)) {
myBook.setSeries(content, myBook.indexInSeries());
} else if (isNSName(sName, SERIES_INDEX, ZLXMLNamespace::CalibreMetadata)) {
myBook.setSeries(myBook.seriesTitle(), atoi(content));
}
}
}
}
}
void OEBMetaInfoReader::endElementHandler(const char *tag) {
const std::string tagString = ZLUnicodeUtil::toLower(tag);
if (myDCMetadataTag == tagString) {
interrupt();
} else {
ZLStringUtil::stripWhiteSpaces(myBuffer);
if (!myBuffer.empty()) {
if (myReadState == READ_AUTHOR) {
myAuthorList.push_back(myBuffer);
} else if (myReadState == READ_AUTHOR2) {
myAuthorList2.push_back(myBuffer);
} else if (myReadState == READ_SUBJECT) {
myBook.addTag(myBuffer);
} else if (myReadState == READ_TITLE) {
myBook.setTitle(myBuffer);
} else if (myReadState == READ_LANGUAGE) {
int index = myBuffer.find('-');
if (index >= 0) {
myBuffer = myBuffer.substr(0, index);
}
index = myBuffer.find('_');
if (index >= 0) {
myBuffer = myBuffer.substr(0, index);
}
if (myBuffer == "cz") {
myBuffer = "cs";
}
myBook.setLanguage(myBuffer);
}
myBuffer.erase();
}
myReadState = READ_NONE;
}
}
bool OEBMetaInfoReader::processNamespaces() const {
return true;
}
bool OEBMetaInfoReader::readMetaInfo(const ZLFile &file) {
myReadMetaData = false;
myReadState = READ_NONE;
if (!readDocument(file)) {
ZLLogger::Instance().println("epub", "Failure while reading info from " + file.path());
return false;
}
if (!myAuthorList.empty()) {
for (std::vector<std::string>::const_iterator it = myAuthorList.begin(); it != myAuthorList.end(); ++it) {
myBook.addAuthor(*it);
}
} else {
for (std::vector<std::string>::const_iterator it = myAuthorList2.begin(); it != myAuthorList2.end(); ++it) {
myBook.addAuthor(*it);
}
}
return true;
}
const std::vector<std::string> &OEBMetaInfoReader::externalDTDs() const {
return EntityFilesCollector::Instance().externalDTDs("xhtml");
}

View file

@ -0,0 +1,64 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OEBMETAINFOREADER_H__
#define __OEBMETAINFOREADER_H__
#include <vector>
#include <ZLXMLReader.h>
class Book;
class OEBMetaInfoReader : public ZLXMLReader {
public:
OEBMetaInfoReader(Book &book);
bool readMetaInfo(const ZLFile &file);
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
void characterDataHandler(const char *text, size_t len);
bool processNamespaces() const;
const std::vector<std::string> &externalDTDs() const;
private:
bool isDublinCoreNamespace(const std::string &nsId) const;
bool isNSName(const std::string &fullName, const std::string &shortName, const std::string &fullNSId) const;
private:
Book &myBook;
bool myReadMetaData;
enum {
READ_NONE,
READ_AUTHOR,
READ_AUTHOR2,
READ_TITLE,
READ_SUBJECT,
READ_LANGUAGE,
} myReadState;
std::string myDCMetadataTag;
std::string myBuffer;
std::vector<std::string> myAuthorList;
std::vector<std::string> myAuthorList2;
};
#endif /* __OEBMETAINFOREADER_H__ */

View file

@ -0,0 +1,112 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <ZLFile.h>
#include <ZLImage.h>
#include <ZLStringUtil.h>
#include <ZLDir.h>
#include <ZLInputStream.h>
#include <ZLLogger.h>
//#include <ZLMimeType.h>
#include "OEBPlugin.h"
#include "OEBMetaInfoReader.h"
#include "OEBBookReader.h"
#include "OEBCoverReader.h"
#include "OEBTextStream.h"
#include "../../bookmodel/BookModel.h"
#include "../../library/Book.h"
static const std::string OPF = "opf";
static const std::string OEBZIP = "oebzip";
static const std::string EPUB = "epub";
OEBPlugin::~OEBPlugin() {
}
bool OEBPlugin::providesMetaInfo() const {
return true;
}
const std::string OEBPlugin::supportedFileType() const {
return "ePub";
}
ZLFile OEBPlugin::opfFile(const ZLFile &oebFile) {
if (oebFile.extension() == OPF) {
return oebFile;
}
ZLLogger::Instance().println("epub", "Looking for opf file in " + oebFile.path());
oebFile.forceArchiveType(ZLFile::ZIP);
shared_ptr<ZLDir> zipDir = oebFile.directory(false);
if (zipDir.isNull()) {
ZLLogger::Instance().println("epub", "Couldn't open zip archive");
return ZLFile::NO_FILE;
}
std::vector<std::string> fileNames;
zipDir->collectFiles(fileNames, false);
for (std::vector<std::string>::const_iterator it = fileNames.begin(); it != fileNames.end(); ++it) {
ZLLogger::Instance().println("epub", "Item: " + *it);
if (ZLStringUtil::stringEndsWith(*it, ".opf")) {
return ZLFile(zipDir->itemPath(*it));
}
}
ZLLogger::Instance().println("epub", "Opf file not found");
return ZLFile::NO_FILE;
}
bool OEBPlugin::readMetaInfo(Book &book) const {
const ZLFile &file = book.file();
shared_ptr<ZLInputStream> lock = file.inputStream();
return OEBMetaInfoReader(book).readMetaInfo(opfFile(file));
}
class InputStreamLock : public ZLUserData {
public:
InputStreamLock(shared_ptr<ZLInputStream> stream);
private:
shared_ptr<ZLInputStream> myStream;
};
InputStreamLock::InputStreamLock(shared_ptr<ZLInputStream> stream) : myStream(stream) {
}
bool OEBPlugin::readModel(BookModel &model) const {
const ZLFile &file = model.book()->file();
model.addUserData(
"inputStreamLock",
new InputStreamLock(file.inputStream())
);
return OEBBookReader(model).readBook(opfFile(file));
}
shared_ptr<ZLImage> OEBPlugin::coverImage(const ZLFile &file) const {
return OEBCoverReader().readCover(opfFile(file));
}
bool OEBPlugin::readLanguageAndEncoding(Book &book) const {
if (book.language().empty()) {
shared_ptr<ZLInputStream> oebStream = new OEBTextStream(opfFile(book.file()));
detectLanguage(book, *oebStream);
}
return true;
}

View file

@ -0,0 +1,40 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OEBPLUGIN_H__
#define __OEBPLUGIN_H__
#include "../FormatPlugin.h"
class OEBPlugin : public FormatPlugin {
public:
static ZLFile opfFile(const ZLFile &oebFile);
public:
~OEBPlugin();
bool providesMetaInfo() const;
const std::string supportedFileType() const;
bool readMetaInfo(Book &book) const;
bool readLanguageAndEncoding(Book &book) const;
bool readModel(BookModel &model) const;
shared_ptr<ZLImage> coverImage(const ZLFile &file) const;
};
#endif /* __OEBPLUGIN_H__ */

View file

@ -0,0 +1,101 @@
/*
* Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <map>
#include <ZLFile.h>
#include <ZLXMLReader.h>
#include <ZLUnicodeUtil.h>
#include "OEBTextStream.h"
#include "../util/MiscUtil.h"
#include "../util/XMLTextStream.h"
class XHTMLFilesCollector : public ZLXMLReader {
public:
XHTMLFilesCollector(std::vector<std::string> &xhtmlFileNames);
private:
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
private:
std::vector<std::string> &myXHTMLFileNames;
std::map<std::string,std::string> myIdToHref;
enum {
READ_NONE,
READ_MANIFEST,
READ_SPINE
} myState;
};
XHTMLFilesCollector::XHTMLFilesCollector(std::vector<std::string> &xhtmlFileNames) : myXHTMLFileNames(xhtmlFileNames), myState(READ_NONE) {
}
static const std::string MANIFEST = "manifest";
static const std::string SPINE = "spine";
static const std::string ITEM = "item";
static const std::string ITEMREF = "itemref";
void XHTMLFilesCollector::startElementHandler(const char *tag, const char **xmlattributes) {
const std::string tagString = ZLUnicodeUtil::toLower(tag);
if (MANIFEST == tagString) {
myState = READ_MANIFEST;
} else if (SPINE == tagString) {
myState = READ_SPINE;
} else if ((myState == READ_MANIFEST) && (ITEM == tagString)) {
const char *id = attributeValue(xmlattributes, "id");
const char *href = attributeValue(xmlattributes, "href");
if ((id != 0) && (href != 0)) {
myIdToHref[id] = href;
}
} else if ((myState == READ_SPINE) && (ITEMREF == tagString)) {
const char *id = attributeValue(xmlattributes, "idref");
if (id != 0) {
const std::string &fileName = myIdToHref[id];
if (!fileName.empty()) {
myXHTMLFileNames.push_back(fileName);
}
}
}
}
void XHTMLFilesCollector::endElementHandler(const char *tag) {
if (SPINE == ZLUnicodeUtil::toLower(tag)) {
interrupt();
}
}
OEBTextStream::OEBTextStream(const ZLFile &opfFile) {
myFilePrefix = MiscUtil::htmlDirectoryPrefix(opfFile.path());
XHTMLFilesCollector(myXHTMLFileNames).readDocument(opfFile);
}
void OEBTextStream::resetToStart() {
myIndex = 0;
}
shared_ptr<ZLInputStream> OEBTextStream::nextStream() {
if (myIndex >= myXHTMLFileNames.size()) {
return 0;
}
ZLFile xhtmlFile(myFilePrefix + myXHTMLFileNames[myIndex++]);
return new XMLTextStream(xhtmlFile.inputStream(), "body");
}

View file

@ -0,0 +1,43 @@
/*
* Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __OEBTEXTSTREAM_H__
#define __OEBTEXTSTREAM_H__
#include <vector>
#include <string>
#include "../util/MergedStream.h"
class OEBTextStream : public MergedStream {
public:
OEBTextStream(const ZLFile &opfFile);
private:
void resetToStart();
shared_ptr<ZLInputStream> nextStream();
private:
std::string myFilePrefix;
std::vector<std::string> myXHTMLFileNames;
size_t myIndex;
};
#endif /* __OEBTEXTSTREAM_H__ */

View file

@ -0,0 +1,670 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <cctype>
#include <ZLFile.h>
#include <ZLFileUtil.h>
#include <ZLFileImage.h>
#include <ZLUnicodeUtil.h>
#include <ZLStringUtil.h>
#include <ZLXMLNamespace.h>
#include "XHTMLReader.h"
#include "../util/EntityFilesCollector.h"
#include "../util/MiscUtil.h"
#include "../css/StyleSheetParser.h"
#include "../../bookmodel/BookReader.h"
#include "../../bookmodel/BookModel.h"
static const bool USE_CSS = false;
std::map<std::string,XHTMLTagAction*> XHTMLReader::ourTagActions;
XHTMLTagAction::~XHTMLTagAction() {
}
BookReader &XHTMLTagAction::bookReader(XHTMLReader &reader) {
return reader.myModelReader;
}
const std::string &XHTMLTagAction::pathPrefix(XHTMLReader &reader) {
return reader.myPathPrefix;
}
void XHTMLTagAction::beginParagraph(XHTMLReader &reader) {
reader.beginParagraph();
}
void XHTMLTagAction::endParagraph(XHTMLReader &reader) {
reader.endParagraph();
}
class XHTMLTagStyleAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagLinkAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagParagraphAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagBodyAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagRestartParagraphAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagImageAction : public XHTMLTagAction {
public:
XHTMLTagImageAction(shared_ptr<ZLXMLReader::AttributeNamePredicate> predicate);
XHTMLTagImageAction(const std::string &attributeName);
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
private:
shared_ptr<ZLXMLReader::AttributeNamePredicate> myPredicate;
};
class XHTMLSvgImageAttributeNamePredicate : public ZLXMLReader::NamespaceAttributeNamePredicate {
public:
XHTMLSvgImageAttributeNamePredicate();
bool accepts(const ZLXMLReader &reader, const char *name) const;
private:
bool myIsEnabled;
friend class XHTMLTagSvgAction;
};
class XHTMLTagSvgAction : public XHTMLTagAction {
public:
XHTMLTagSvgAction(XHTMLSvgImageAttributeNamePredicate &predicate);
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
private:
XHTMLSvgImageAttributeNamePredicate &myPredicate;
};
class XHTMLTagItemAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
class XHTMLTagHyperlinkAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
private:
std::stack<FBTextKind> myHyperlinkStack;
};
class XHTMLTagControlAction : public XHTMLTagAction {
public:
XHTMLTagControlAction(FBTextKind control);
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
private:
FBTextKind myControl;
};
class XHTMLTagParagraphWithControlAction : public XHTMLTagAction {
public:
XHTMLTagParagraphWithControlAction(FBTextKind control);
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
private:
FBTextKind myControl;
};
class XHTMLTagPreAction : public XHTMLTagAction {
public:
void doAtStart(XHTMLReader &reader, const char **xmlattributes);
void doAtEnd(XHTMLReader &reader);
};
void XHTMLTagStyleAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
static const std::string TYPE = "text/css";
const char *type = reader.attributeValue(xmlattributes, "type");
if ((type == 0) || (TYPE != type)) {
return;
}
if (reader.myReadState == XHTMLReader::READ_NOTHING) {
reader.myReadState = XHTMLReader::READ_STYLE;
reader.myTableParser = new StyleSheetTableParser(reader.myStyleSheetTable);
}
}
void XHTMLTagStyleAction::doAtEnd(XHTMLReader &reader) {
if (reader.myReadState == XHTMLReader::READ_STYLE) {
reader.myReadState = XHTMLReader::READ_NOTHING;
reader.myTableParser.reset();
}
}
void XHTMLTagLinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
static const std::string REL = "stylesheet";
const char *rel = reader.attributeValue(xmlattributes, "rel");
if ((rel == 0) || (REL != rel)) {
return;
}
static const std::string TYPE = "text/css";
const char *type = reader.attributeValue(xmlattributes, "type");
if ((type == 0) || (TYPE != type)) {
return;
}
const char *href = reader.attributeValue(xmlattributes, "href");
if (href == 0) {
return;
}
shared_ptr<ZLInputStream> cssStream = ZLFile(reader.myPathPrefix + MiscUtil::decodeHtmlURL(href)).inputStream();
if (cssStream.isNull()) {
return;
}
StyleSheetTableParser parser(reader.myStyleSheetTable);
parser.parse(*cssStream);
//reader.myStyleSheetTable.dump();
}
void XHTMLTagLinkAction::doAtEnd(XHTMLReader&) {
}
void XHTMLTagParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
if (!reader.myNewParagraphInProgress) {
beginParagraph(reader);
reader.myNewParagraphInProgress = true;
}
}
void XHTMLTagParagraphAction::doAtEnd(XHTMLReader &reader) {
endParagraph(reader);
}
void XHTMLTagBodyAction::doAtStart(XHTMLReader &reader, const char**) {
reader.myReadState = XHTMLReader::READ_BODY;
}
void XHTMLTagBodyAction::doAtEnd(XHTMLReader &reader) {
endParagraph(reader);
reader.myReadState = XHTMLReader::READ_NOTHING;
}
void XHTMLTagRestartParagraphAction::doAtStart(XHTMLReader &reader, const char**) {
if (reader.myCurrentParagraphIsEmpty) {
bookReader(reader).addData(" ");
}
endParagraph(reader);
beginParagraph(reader);
}
void XHTMLTagRestartParagraphAction::doAtEnd(XHTMLReader&) {
}
void XHTMLTagItemAction::doAtStart(XHTMLReader &reader, const char**) {
endParagraph(reader);
// TODO: increase left indent
beginParagraph(reader);
// TODO: replace bullet sign by number inside OL tag
const std::string bullet = "\xE2\x80\xA2\xC0\xA0";
bookReader(reader).addData(bullet);
}
void XHTMLTagItemAction::doAtEnd(XHTMLReader &reader) {
endParagraph(reader);
}
XHTMLTagImageAction::XHTMLTagImageAction(shared_ptr<ZLXMLReader::AttributeNamePredicate> predicate) {
myPredicate = predicate;
}
XHTMLTagImageAction::XHTMLTagImageAction(const std::string &attributeName) {
myPredicate = new ZLXMLReader::FixedAttributeNamePredicate(attributeName);
}
void XHTMLTagImageAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
const char *fileName = reader.attributeValue(xmlattributes, *myPredicate);
if (fileName == 0) {
return;
}
const std::string fullfileName = pathPrefix(reader) + MiscUtil::decodeHtmlURL(fileName);
ZLFile imageFile(fullfileName);
if (!imageFile.exists()) {
return;
}
bool flag = bookReader(reader).paragraphIsOpen();
if (flag) {
endParagraph(reader);
}
const std::string imageName = imageFile.name(false);
bookReader(reader).addImageReference(imageName, 0, false);
bookReader(reader).addImage(imageName, new ZLFileImage(imageFile, "", 0));
if (flag) {
beginParagraph(reader);
}
}
XHTMLTagSvgAction::XHTMLTagSvgAction(XHTMLSvgImageAttributeNamePredicate &predicate) : myPredicate(predicate) {
}
void XHTMLTagSvgAction::doAtStart(XHTMLReader&, const char**) {
myPredicate.myIsEnabled = true;
}
void XHTMLTagSvgAction::doAtEnd(XHTMLReader&) {
myPredicate.myIsEnabled = false;
}
XHTMLSvgImageAttributeNamePredicate::XHTMLSvgImageAttributeNamePredicate() : ZLXMLReader::NamespaceAttributeNamePredicate(ZLXMLNamespace::XLink, "href"), myIsEnabled(false) {
}
bool XHTMLSvgImageAttributeNamePredicate::accepts(const ZLXMLReader &reader, const char *name) const {
return myIsEnabled && NamespaceAttributeNamePredicate::accepts(reader, name);
}
void XHTMLTagImageAction::doAtEnd(XHTMLReader&) {
}
XHTMLTagControlAction::XHTMLTagControlAction(FBTextKind control) : myControl(control) {
}
void XHTMLTagControlAction::doAtStart(XHTMLReader &reader, const char**) {
bookReader(reader).pushKind(myControl);
bookReader(reader).addControl(myControl, true);
}
void XHTMLTagControlAction::doAtEnd(XHTMLReader &reader) {
bookReader(reader).addControl(myControl, false);
bookReader(reader).popKind();
}
void XHTMLTagHyperlinkAction::doAtStart(XHTMLReader &reader, const char **xmlattributes) {
const char *href = reader.attributeValue(xmlattributes, "href");
if (href != 0 && href[0] != '\0') {
const FBTextKind hyperlinkType = MiscUtil::referenceType(href);
std::string link = MiscUtil::decodeHtmlURL(href);
if (hyperlinkType == INTERNAL_HYPERLINK) {
link = (link[0] == '#') ?
reader.myReferenceName + link :
reader.myReferenceDirName + link;
link = ZLFileUtil::normalizeUnixPath(link);
}
myHyperlinkStack.push(hyperlinkType);
bookReader(reader).addHyperlinkControl(hyperlinkType, link);
} else {
myHyperlinkStack.push(REGULAR);
}
const char *name = reader.attributeValue(xmlattributes, "name");
if (name != 0) {
bookReader(reader).addHyperlinkLabel(
reader.myReferenceName + "#" + MiscUtil::decodeHtmlURL(name)
);
}
}
void XHTMLTagHyperlinkAction::doAtEnd(XHTMLReader &reader) {
FBTextKind kind = myHyperlinkStack.top();
if (kind != REGULAR) {
bookReader(reader).addControl(kind, false);
}
myHyperlinkStack.pop();
}
XHTMLTagParagraphWithControlAction::XHTMLTagParagraphWithControlAction(FBTextKind control) : myControl(control) {
}
void XHTMLTagParagraphWithControlAction::doAtStart(XHTMLReader &reader, const char**) {
if ((myControl == TITLE) && (bookReader(reader).model().bookTextModel()->paragraphsNumber() > 1)) {
bookReader(reader).insertEndOfSectionParagraph();
}
bookReader(reader).pushKind(myControl);
beginParagraph(reader);
}
void XHTMLTagParagraphWithControlAction::doAtEnd(XHTMLReader &reader) {
endParagraph(reader);
bookReader(reader).popKind();
}
void XHTMLTagPreAction::doAtStart(XHTMLReader &reader, const char**) {
reader.myPreformatted = true;
beginParagraph(reader);
bookReader(reader).addControl(CODE, true);
}
void XHTMLTagPreAction::doAtEnd(XHTMLReader &reader) {
bookReader(reader).addControl(CODE, false);
endParagraph(reader);
reader.myPreformatted = false;
}
XHTMLTagAction *XHTMLReader::addAction(const std::string &tag, XHTMLTagAction *action) {
XHTMLTagAction *old = ourTagActions[tag];
ourTagActions[tag] = action;
return old;
}
void XHTMLReader::fillTagTable() {
if (ourTagActions.empty()) {
//addAction("html", new XHTMLTagAction());
addAction("body", new XHTMLTagBodyAction());
//addAction("title", new XHTMLTagAction());
//addAction("meta", new XHTMLTagAction());
//addAction("script", new XHTMLTagAction());
//addAction("font", new XHTMLTagAction());
addAction("style", new XHTMLTagStyleAction());
addAction("p", new XHTMLTagParagraphAction());
addAction("h1", new XHTMLTagParagraphWithControlAction(H1));
addAction("h2", new XHTMLTagParagraphWithControlAction(H2));
addAction("h3", new XHTMLTagParagraphWithControlAction(H3));
addAction("h4", new XHTMLTagParagraphWithControlAction(H4));
addAction("h5", new XHTMLTagParagraphWithControlAction(H5));
addAction("h6", new XHTMLTagParagraphWithControlAction(H6));
//addAction("ol", new XHTMLTagAction());
//addAction("ul", new XHTMLTagAction());
//addAction("dl", new XHTMLTagAction());
addAction("li", new XHTMLTagItemAction());
addAction("strong", new XHTMLTagControlAction(STRONG));
addAction("b", new XHTMLTagControlAction(BOLD));
addAction("em", new XHTMLTagControlAction(EMPHASIS));
addAction("i", new XHTMLTagControlAction(ITALIC));
addAction("code", new XHTMLTagControlAction(CODE));
addAction("tt", new XHTMLTagControlAction(CODE));
addAction("kbd", new XHTMLTagControlAction(CODE));
addAction("var", new XHTMLTagControlAction(CODE));
addAction("samp", new XHTMLTagControlAction(CODE));
addAction("cite", new XHTMLTagControlAction(CITE));
addAction("sub", new XHTMLTagControlAction(SUB));
addAction("sup", new XHTMLTagControlAction(SUP));
addAction("dd", new XHTMLTagControlAction(DEFINITION_DESCRIPTION));
addAction("dfn", new XHTMLTagControlAction(DEFINITION));
addAction("strike", new XHTMLTagControlAction(STRIKETHROUGH));
addAction("a", new XHTMLTagHyperlinkAction());
addAction("img", new XHTMLTagImageAction("src"));
addAction("object", new XHTMLTagImageAction("data"));
XHTMLSvgImageAttributeNamePredicate *predicate = new XHTMLSvgImageAttributeNamePredicate();
addAction("image", new XHTMLTagImageAction(predicate));
addAction("svg", new XHTMLTagSvgAction(*predicate));
//addAction("area", new XHTMLTagAction());
//addAction("map", new XHTMLTagAction());
//addAction("base", new XHTMLTagAction());
//addAction("blockquote", new XHTMLTagAction());
addAction("br", new XHTMLTagRestartParagraphAction());
//addAction("center", new XHTMLTagAction());
addAction("div", new XHTMLTagParagraphAction());
addAction("dt", new XHTMLTagParagraphAction());
//addAction("head", new XHTMLTagAction());
//addAction("hr", new XHTMLTagAction());
addAction("link", new XHTMLTagLinkAction());
//addAction("param", new XHTMLTagAction());
//addAction("q", new XHTMLTagAction());
//addAction("s", new XHTMLTagAction());
addAction("pre", new XHTMLTagPreAction());
//addAction("big", new XHTMLTagAction());
//addAction("small", new XHTMLTagAction());
//addAction("u", new XHTMLTagAction());
//addAction("table", new XHTMLTagAction());
addAction("td", new XHTMLTagParagraphAction());
addAction("th", new XHTMLTagParagraphAction());
//addAction("tr", new XHTMLTagAction());
//addAction("caption", new XHTMLTagAction());
//addAction("span", new XHTMLTagAction());
}
}
XHTMLReader::XHTMLReader(BookReader &modelReader) : myModelReader(modelReader) {
}
bool XHTMLReader::readFile(const ZLFile &file, const std::string &referenceName) {
myModelReader.addHyperlinkLabel(referenceName);
fillTagTable();
myPathPrefix = MiscUtil::htmlDirectoryPrefix(file.path());
myReferenceName = referenceName;
const int index = referenceName.rfind('/', referenceName.length() - 1);
myReferenceDirName = referenceName.substr(0, index + 1);
myPreformatted = false;
myNewParagraphInProgress = false;
myReadState = READ_NOTHING;
myCSSStack.clear();
myStyleEntryStack.clear();
myStylesToRemove = 0;
return readDocument(file);
}
void XHTMLReader::addStyleEntry(const std::string tag, const std::string aClass) {
shared_ptr<ZLTextStyleEntry> entry = myStyleSheetTable.control(tag, aClass);
if (!entry.isNull()) {
myModelReader.addControl(*entry);
myStyleEntryStack.push_back(entry);
}
}
void XHTMLReader::startElementHandler(const char *tag, const char **attributes) {
static const std::string HASH = "#";
const char *id = attributeValue(attributes, "id");
if (id != 0) {
myModelReader.addHyperlinkLabel(myReferenceName + HASH + id);
}
const std::string sTag = ZLUnicodeUtil::toLower(tag);
const char *aClass = attributeValue(attributes, "class");
const std::string sClass = (aClass != 0) ? aClass : "";
if (myStyleSheetTable.doBreakBefore(sTag, sClass)) {
myModelReader.insertEndOfSectionParagraph();
}
myDoPageBreakAfterStack.push_back(myStyleSheetTable.doBreakAfter(sTag, sClass));
XHTMLTagAction *action = ourTagActions[sTag];
if (action != 0) {
action->doAtStart(*this, attributes);
}
const int sizeBefore = myStyleEntryStack.size();
addStyleEntry(sTag, "");
addStyleEntry("", sClass);
addStyleEntry(sTag, sClass);
const char *style = attributeValue(attributes, "style");
if (style != 0) {
shared_ptr<ZLTextStyleEntry> entry = myStyleParser.parseString(style);
myModelReader.addControl(*entry);
myStyleEntryStack.push_back(entry);
}
myCSSStack.push_back(myStyleEntryStack.size() - sizeBefore);
}
void XHTMLReader::endElementHandler(const char *tag) {
for (int i = myCSSStack.back(); i > 0; --i) {
myModelReader.addControl(REGULAR, false);
}
myStylesToRemove = myCSSStack.back();
myCSSStack.pop_back();
XHTMLTagAction *action = ourTagActions[ZLUnicodeUtil::toLower(tag)];
if (action != 0) {
action->doAtEnd(*this);
myNewParagraphInProgress = false;
}
for (; myStylesToRemove > 0; --myStylesToRemove) {
myStyleEntryStack.pop_back();
}
if (myDoPageBreakAfterStack.back()) {
myModelReader.insertEndOfSectionParagraph();
}
myDoPageBreakAfterStack.pop_back();
}
void XHTMLReader::beginParagraph() {
myCurrentParagraphIsEmpty = true;
myModelReader.beginParagraph();
bool doBlockSpaceBefore = false;
for (std::vector<shared_ptr<ZLTextStyleEntry> >::const_iterator it = myStyleEntryStack.begin(); it != myStyleEntryStack.end(); ++it) {
myModelReader.addControl(**it);
doBlockSpaceBefore =
doBlockSpaceBefore ||
(*it)->lengthSupported(ZLTextStyleEntry::LENGTH_SPACE_BEFORE);
}
if (doBlockSpaceBefore) {
ZLTextStyleEntry blockingEntry;
blockingEntry.setLength(
ZLTextStyleEntry::LENGTH_SPACE_BEFORE,
0,
ZLTextStyleEntry::SIZE_UNIT_PIXEL
);
myModelReader.addControl(blockingEntry);
}
}
void XHTMLReader::endParagraph() {
bool doBlockSpaceAfter = false;
for (std::vector<shared_ptr<ZLTextStyleEntry> >::const_iterator it = myStyleEntryStack.begin(); it != myStyleEntryStack.end() - myStylesToRemove; ++it) {
doBlockSpaceAfter =
doBlockSpaceAfter ||
(*it)->lengthSupported(ZLTextStyleEntry::LENGTH_SPACE_AFTER);
}
if (doBlockSpaceAfter) {
ZLTextStyleEntry blockingEntry;
blockingEntry.setLength(
ZLTextStyleEntry::LENGTH_SPACE_AFTER,
0,
ZLTextStyleEntry::SIZE_UNIT_PIXEL
);
myModelReader.addControl(blockingEntry);
}
for (; myStylesToRemove > 0; --myStylesToRemove) {
myModelReader.addControl(*myStyleEntryStack.back());
myStyleEntryStack.pop_back();
}
myModelReader.endParagraph();
}
void XHTMLReader::characterDataHandler(const char *text, size_t len) {
switch (myReadState) {
case READ_NOTHING:
break;
case READ_STYLE:
if (!myTableParser.isNull()) {
myTableParser->parse(text, len);
}
break;
case READ_BODY:
if (myPreformatted) {
if ((*text == '\r') || (*text == '\n')) {
myModelReader.addControl(CODE, false);
endParagraph();
beginParagraph();
myModelReader.addControl(CODE, true);
}
size_t spaceCounter = 0;
while ((spaceCounter < len) && isspace((unsigned char)*(text + spaceCounter))) {
++spaceCounter;
}
myModelReader.addFixedHSpace(spaceCounter);
text += spaceCounter;
len -= spaceCounter;
} else if ((myNewParagraphInProgress) || !myModelReader.paragraphIsOpen()) {
while (isspace((unsigned char)*text)) {
++text;
if (--len == 0) {
break;
}
}
}
if (len > 0) {
myCurrentParagraphIsEmpty = false;
if (!myModelReader.paragraphIsOpen()) {
myModelReader.beginParagraph();
}
myModelReader.addData(std::string(text, len));
myNewParagraphInProgress = false;
}
break;
}
}
const std::vector<std::string> &XHTMLReader::externalDTDs() const {
return EntityFilesCollector::Instance().externalDTDs("xhtml");
}
bool XHTMLReader::processNamespaces() const {
return true;
}

View file

@ -0,0 +1,109 @@
/*
* Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __XHTMLREADER_H__
#define __XHTMLREADER_H__
#include <string>
#include <map>
#include <vector>
#include <ZLXMLReader.h>
#include "../css/StyleSheetTable.h"
#include "../css/StyleSheetParser.h"
class ZLFile;
class BookReader;
class XHTMLReader;
class XHTMLTagAction {
public:
virtual ~XHTMLTagAction();
virtual void doAtStart(XHTMLReader &reader, const char **xmlattributes) = 0;
virtual void doAtEnd(XHTMLReader &reader) = 0;
protected:
static BookReader &bookReader(XHTMLReader &reader);
static const std::string &pathPrefix(XHTMLReader &reader);
static void beginParagraph(XHTMLReader &reader);
static void endParagraph(XHTMLReader &reader);
};
class XHTMLReader : public ZLXMLReader {
public:
static XHTMLTagAction *addAction(const std::string &tag, XHTMLTagAction *action);
static void fillTagTable();
private:
static std::map<std::string,XHTMLTagAction*> ourTagActions;
public:
XHTMLReader(BookReader &modelReader);
bool readFile(const ZLFile &file, const std::string &referenceName);
private:
void startElementHandler(const char *tag, const char **attributes);
void endElementHandler(const char *tag);
void characterDataHandler(const char *text, size_t len);
const std::vector<std::string> &externalDTDs() const;
bool processNamespaces() const;
void beginParagraph();
void endParagraph();
void addStyleEntry(const std::string tag, const std::string aClass);
private:
BookReader &myModelReader;
std::string myPathPrefix;
std::string myReferenceName;
std::string myReferenceDirName;
bool myPreformatted;
bool myNewParagraphInProgress;
StyleSheetTable myStyleSheetTable;
std::vector<int> myCSSStack;
std::vector<shared_ptr<ZLTextStyleEntry> > myStyleEntryStack;
int myStylesToRemove;
std::vector<bool> myDoPageBreakAfterStack;
bool myCurrentParagraphIsEmpty;
StyleSheetSingleStyleParser myStyleParser;
shared_ptr<StyleSheetTableParser> myTableParser;
enum {
READ_NOTHING,
READ_STYLE,
READ_BODY
} myReadState;
friend class XHTMLTagAction;
friend class XHTMLTagStyleAction;
friend class XHTMLTagLinkAction;
friend class XHTMLTagHyperlinkAction;
friend class XHTMLTagPreAction;
friend class XHTMLTagParagraphAction;
friend class XHTMLTagBodyAction;
friend class XHTMLTagRestartParagraphAction;
};
#endif /* __XHTMLREADER_H__ */