1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-05 02:39:23 +02:00

CSS in HTMLs

This commit is contained in:
Nikolay Pultsin 2014-09-09 21:50:25 +01:00
parent b21e516d6a
commit b4fefc8c04
10 changed files with 170 additions and 135 deletions

View file

@ -124,9 +124,9 @@ HtmlHrefTagAction::HtmlHrefTagAction(HtmlBookReader &reader) : HtmlTagAction(rea
void HtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
if (tag.Start) {
for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "NAME") {
if (tag.Attributes[i].Name == "name") {
bookReader().addHyperlinkLabel(tag.Attributes[i].Value);
} else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "HREF")) {
} else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "href")) {
std::string value = tag.Attributes[i].Value;
if (!myReader.myFileName.empty() &&
(value.length() > myReader.myFileName.length()) &&
@ -171,16 +171,14 @@ HtmlImageTagAction::HtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(r
void HtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
if (tag.Start) {
bookReader().endParagraph();
for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "SRC") {
const std::string fileName = MiscUtil::decodeHtmlURL(tag.Attributes[i].Value);
const std::string *ptr = tag.find("src");
if (ptr != 0) {
const std::string fileName = MiscUtil::decodeHtmlURL(*ptr);
const ZLFile file(myReader.myBaseDirPath + fileName);
if (file.exists()) {
bookReader().addImageReference(fileName, 0, false);
bookReader().addImage(fileName, new ZLFileImage(file, "", 0));
}
break;
}
}
bookReader().beginParagraph();
}
@ -273,8 +271,12 @@ void HtmlTableTagAction::run(const HtmlReader::HtmlTag &tag) {
HtmlStyleTagAction::HtmlStyleTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) {
}
shared_ptr<StyleSheetParser> HtmlBookReader::createCSSParser() {
return new StyleSheetTableParser(myBaseDirPath, myStyleSheetTable, myFontMap, 0);
}
void HtmlStyleTagAction::run(const HtmlReader::HtmlTag &tag) {
myReader.myStyleSheetParser = tag.Start ? new StyleSheetTableParser(myReader.myBaseDirPath, myReader.myStyleSheetTable, myReader.myFontMap, 0) : 0;
myReader.myStyleSheetParser = tag.Start ? myReader.createCSSParser() : 0;
/*
if (!tag.Start) {
myReader.myStyleSheetTable.dump();
@ -283,105 +285,105 @@ void HtmlStyleTagAction::run(const HtmlReader::HtmlTag &tag) {
}
shared_ptr<HtmlTagAction> HtmlBookReader::createAction(const std::string &tag) {
if (tag == "EM") {
if (tag == "em") {
return new HtmlControlTagAction(*this, EMPHASIS);
} else if (tag == "STRONG") {
} else if (tag == "strong") {
return new HtmlControlTagAction(*this, STRONG);
} else if (tag == "B") {
} else if (tag == "b") {
return new HtmlControlTagAction(*this, BOLD);
} else if (tag == "I") {
} else if (tag == "i") {
return new HtmlControlTagAction(*this, ITALIC);
} else if (tag == "TT") {
} else if (tag == "tt") {
return new HtmlControlTagAction(*this, CODE);
} else if (tag == "CODE") {
} else if (tag == "code") {
return new HtmlControlTagAction(*this, CODE);
} else if (tag == "CITE") {
} else if (tag == "cite") {
return new HtmlControlTagAction(*this, CITE);
} else if (tag == "SUB") {
} else if (tag == "sub") {
return new HtmlControlTagAction(*this, SUB);
} else if (tag == "SUP") {
} else if (tag == "sup") {
return new HtmlControlTagAction(*this, SUP);
} else if (tag == "H1") {
} else if (tag == "h1") {
return new HtmlHeaderTagAction(*this, H1);
} else if (tag == "H2") {
} else if (tag == "h2") {
return new HtmlHeaderTagAction(*this, H2);
} else if (tag == "H3") {
} else if (tag == "h3") {
return new HtmlHeaderTagAction(*this, H3);
} else if (tag == "H4") {
} else if (tag == "h4") {
return new HtmlHeaderTagAction(*this, H4);
} else if (tag == "H5") {
} else if (tag == "h5") {
return new HtmlHeaderTagAction(*this, H5);
} else if (tag == "H6") {
} else if (tag == "h6") {
return new HtmlHeaderTagAction(*this, H6);
} else if (tag == "HEAD") {
} else if (tag == "head") {
return new HtmlIgnoreTagAction(*this);
} else if (tag == "TITLE") {
} else if (tag == "title") {
return new HtmlIgnoreTagAction(*this);
} else if (tag == "STYLE") {
} else if (tag == "style") {
return new HtmlStyleTagAction(*this);
} else if (tag == "SELECT") {
} else if (tag == "select") {
return new HtmlIgnoreTagAction(*this);
} else if (tag == "SCRIPT") {
} else if (tag == "script") {
return new HtmlIgnoreTagAction(*this);
} else if (tag == "A") {
} else if (tag == "a") {
return new HtmlHrefTagAction(*this);
} else if (tag == "TD") {
} else if (tag == "td") {
//return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
} else if (tag == "TR") {
} else if (tag == "tr") {
return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
} else if (tag == "DIV") {
} else if (tag == "div") {
return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END);
} else if (tag == "DT") {
} else if (tag == "dt") {
return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START);
} else if (tag == "P") {
} else if (tag == "p") {
return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END);
} else if (tag == "BR") {
} else if (tag == "br") {
return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END);
} else if (tag == "IMG") {
} else if (tag == "img") {
return new HtmlImageTagAction(*this);
} else if (tag == "UL") {
} else if (tag == "ul") {
return new HtmlListTagAction(*this, 0);
} else if (tag == "MENU") {
} else if (tag == "menu") {
return new HtmlListTagAction(*this, 0);
} else if (tag == "DIR") {
} else if (tag == "dir") {
return new HtmlListTagAction(*this, 0);
} else if (tag == "OL") {
} else if (tag == "ol") {
return new HtmlListTagAction(*this, 1);
} else if (tag == "LI") {
} else if (tag == "li") {
return new HtmlListItemTagAction(*this);
} else if (tag == "PRE") {
} else if (tag == "pre") {
if (myProcessPreTag) {
return new HtmlPreTagAction(*this);
}
} else if (tag == "TABLE") {
} else if (tag == "table") {
return new HtmlTableTagAction(*this);
}
/*
} else if (tag == "DD") {
} else if (tag == "dd") {
return 0;
} else if (tag == "DL") {
} else if (tag == "dl") {
return 0;
} else if (tag == "DFN") {
} else if (tag == "dfn") {
return 0;
} else if (tag == "SAMP") {
} else if (tag == "samp") {
return 0;
} else if (tag == "KBD") {
} else if (tag == "kbd") {
return 0;
} else if (tag == "VAR") {
} else if (tag == "var") {
return 0;
} else if (tag == "ABBR") {
} else if (tag == "abbr") {
return 0;
} else if (tag == "ACRONYM") {
} else if (tag == "acronym") {
return 0;
} else if (tag == "BLOCKQUOTE") {
} else if (tag == "blockquote") {
return 0;
} else if (tag == "Q") {
} else if (tag == "q") {
return 0;
} else if (tag == "INS") {
} else if (tag == "ins") {
return 0;
} else if (tag == "DEL") {
} else if (tag == "del") {
return 0;
} else if (tag == "BODY") {
} else if (tag == "body") {
return 0;
*/
return new DummyHtmlTagAction(*this);
@ -427,14 +429,44 @@ void HtmlBookReader::addConvertedDataToBuffer(const char *text, std::size_t len,
}
}
void HtmlBookReader::TagData::addEntry(shared_ptr<ZLTextStyleEntry> entry) {
if (!entry.isNull()) {
StyleEntries.push_back(entry);
}
}
bool HtmlBookReader::tagHandler(const HtmlTag &tag) {
myConverter->reset();
for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "ID") {
myBookReader.addHyperlinkLabel(tag.Attributes[i].Value);
break;
if (tag.Start) {
shared_ptr<TagData> tagData = new TagData();
tagData->addEntry(myStyleSheetTable.control(tag.Name, ""));
const std::string *cls = tag.find("class");
if (cls != 0) {
tagData->addEntry(myStyleSheetTable.control("", *cls));
tagData->addEntry(myStyleSheetTable.control(tag.Name, *cls));
}
myTagDataStack.push_back(tagData);
for (std::vector<shared_ptr<TagData> >::const_iterator it = myTagDataStack.begin(); it != myTagDataStack.end(); ++it) {
const std::vector<shared_ptr<ZLTextStyleEntry> > &entries = (*it)->StyleEntries;
const bool inheritedOnly = it + 1 != myTagDataStack.end();
for (std::vector<shared_ptr<ZLTextStyleEntry> >::const_iterator jt = entries.begin(); jt != entries.end(); ++jt) {
shared_ptr<ZLTextStyleEntry> entry = inheritedOnly ? (*jt)->inherited() : *jt;
myBookReader.addStyleEntry(*entry);
}
}
} else {
if (!myTagDataStack.empty()) {
for (int i = myTagDataStack.back()->StyleEntries.size(); i > 0; --i) {
myBookReader.addStyleCloseEntry();
}
myTagDataStack.pop_back();
}
}
const std::string *id = tag.find("id");
if (id != 0) {
myBookReader.addHyperlinkLabel(*id);
}
shared_ptr<HtmlTagAction> action = myActionMap[tag.Name];
if (action.isNull()) {
@ -553,6 +585,7 @@ void HtmlBookReader::startDocumentHandler() {
while (!myListNumStack.empty()) {
myListNumStack.pop();
}
myTagDataStack.clear();
myConverterBuffer.erase();
myKindList.clear();

View file

@ -21,6 +21,7 @@
#define __HTMLBOOKREADER_H__
#include <stack>
#include <vector>
#include <shared_ptr.h>
@ -33,15 +34,24 @@
class BookModel;
class PlainTextFormat;
class StyleSheetParser;
class ZLTextStyleEntry;
class HtmlTagAction;
class HtmlBookReader : public HtmlReader {
public:
struct TagData {
std::vector<shared_ptr<ZLTextStyleEntry> > StyleEntries;
void addEntry(shared_ptr<ZLTextStyleEntry> entry);
};
public:
HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding);
~HtmlBookReader();
void setFileName(const std::string fileName);
shared_ptr<StyleSheetParser> createCSSParser();
protected:
virtual shared_ptr<HtmlTagAction> createAction(const std::string &tag);
@ -73,6 +83,7 @@ private:
bool myProcessPreTag;
bool myIgnoreTitles;
std::stack<int> myListNumStack;
std::vector<shared_ptr<TagData> > myTagDataStack;
StyleSheetTable myStyleSheetTable;
shared_ptr<StyleSheetParser> myStyleSheetParser;

View file

@ -40,17 +40,17 @@ void HtmlDescriptionReader::endDocumentHandler() {
}
bool HtmlDescriptionReader::tagHandler(const HtmlTag &tag) {
if (tag.Name == "TITLE") {
if (tag.Name == "title") {
if (myReadTitle && !tag.Start) {
myBook.setTitle(myBuffer);
myBuffer.erase();
}
myReadTitle = tag.Start && myBook.title().empty();
return true;
} else if (tag.Start && tag.Name == "META") {
} else if (tag.Start && tag.Name == "meta") {
std::vector<HtmlAttribute>::const_iterator it = tag.Attributes.begin();
for (; it != tag.Attributes.end(); ++it) {
if (it->Name == "CONTENT") {
if (it->Name == "content") {
break;
}
}
@ -71,7 +71,7 @@ bool HtmlDescriptionReader::tagHandler(const HtmlTag &tag) {
}
}
}
return tag.Name != "BODY";
return tag.Name != "body";
}
bool HtmlDescriptionReader::characterDataHandler(const char *text, std::size_t len, bool) {

View file

@ -52,7 +52,7 @@ void HtmlReader::setTag(HtmlTag &tag, const std::string &name) {
const std::size_t len = tag.Name.length();
for (std::size_t i = 0; i < len; ++i) {
tag.Name[i] = std::toupper(tag.Name[i]);
tag.Name[i] = std::tolower(tag.Name[i]);
}
}
@ -264,7 +264,7 @@ void HtmlReader::readDocument(ZLInputStream &stream) {
if (ptr != start || !currentString.empty()) {
currentString.append(start, ptr - start);
for (unsigned int i = 0; i < currentString.length(); ++i) {
currentString[i] = std::toupper(currentString[i]);
currentString[i] = std::tolower(currentString[i]);
}
currentTag.addAttribute(currentString);
currentString.erase();
@ -371,3 +371,12 @@ endOfProcessing:
stream.close();
}
const std::string *HtmlReader::HtmlTag::find(const std::string &name) const {
for (unsigned int i = 0; i < Attributes.size(); ++i) {
if (Attributes[i].Name == name) {
return &Attributes[i].Value;
}
}
return 0;
}

View file

@ -51,6 +51,7 @@ public:
~HtmlTag();
void addAttribute(const std::string &name);
void setLastAttributeValue(const std::string &value);
const std::string *find(const std::string &name) const;
private:
HtmlTag(const HtmlTag&);

View file

@ -58,7 +58,7 @@ void HtmlTextOnlyReader::endDocumentHandler() {
}
bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) {
if (tag.Name == "SCRIPT") {
if (tag.Name == "script") {
myIgnoreText = tag.Start;
}
if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) {

View file

@ -28,30 +28,24 @@ HtmlMetainfoReader::HtmlMetainfoReader(Book &book, ReadType readType) :
}
bool HtmlMetainfoReader::tagHandler(const HtmlReader::HtmlTag &tag) {
if (tag.Name == "BODY") {
if (tag.Name == "body") {
return false;
} else if (((myReadType & TAGS) == TAGS) && (tag.Name == "DC:SUBJECT")) {
} else if ((myReadType & TAGS) == TAGS && tag.Name == "dc:subject") {
myReadTags = tag.Start;
if (!tag.Start && !myBuffer.empty()) {
myBook.addTag(myBuffer);
myBuffer.erase();
}
} else if (((myReadType & TITLE) == TITLE) && (tag.Name == "DC:TITLE")) {
} else if ((myReadType & TITLE) == TITLE && tag.Name == "dc:title") {
myReadTitle = tag.Start;
if (!tag.Start && !myBuffer.empty()) {
myBook.setTitle(myBuffer);
myBuffer.erase();
}
} else if (((myReadType & AUTHOR) == AUTHOR) && (tag.Name == "DC:CREATOR")) {
} else if ((myReadType & AUTHOR) == AUTHOR && tag.Name == "dc:creator") {
if (tag.Start) {
bool flag = false;
for (size_t i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "ROLE") {
flag = ZLUnicodeUtil::toUpper(tag.Attributes[i].Value) == "AUT";
break;
}
}
if (flag) {
const std::string *role = tag.find("role");
if (role != 0 && ZLUnicodeUtil::toLower(*role) == "aut") {
if (!myBuffer.empty()) {
myBuffer += ", ";
}

View file

@ -80,9 +80,9 @@ void MobipocketHtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) {
int index = -1;
for (unsigned int i = 0; i < tag.Attributes.size() && index < 0; ++i) {
const std::string aName = tag.Attributes[i].Name;
if (aName == "RECINDEX") {
if (aName == "recindex") {
index = ZLStringUtil::parseDecimal(tag.Attributes[i].Value, -1);
} else if (aName == "SRC") {
} else if (aName == "src") {
static const std::string KINDLE_EMBED_PREFIX = "kindle:embed:";
std::string aValue = tag.Attributes[i].Value;
if (ZLStringUtil::stringStartsWith(aValue, KINDLE_EMBED_PREFIX)) {
@ -206,12 +206,10 @@ const std::map<size_t,std::string> &MobipocketHtmlBookReader::TOCReader::entries
void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
MobipocketHtmlBookReader &reader = (MobipocketHtmlBookReader&)myReader;
if (tag.Start) {
for (unsigned int i = 0; i < tag.Attributes.size(); ++i) {
if (tag.Attributes[i].Name == "FILEPOS") {
const std::string &value = tag.Attributes[i].Value;
if (!value.empty()) {
const std::string *filepos = tag.find("filepos");
if (filepos != 0 && !filepos->empty()) {
std::string label = "&";
int intValue = atoi(value.c_str());
int intValue = atoi(filepos->c_str());
if (intValue > 0) {
if (reader.myTocReader.rangeContainsPosition(tag.Offset)) {
reader.myTocReader.startReadEntry(intValue);
@ -226,8 +224,6 @@ void MobipocketHtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) {
return;
}
}
}
}
} else {
reader.myTocReader.endReadEntry();
}
@ -254,11 +250,11 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
for (size_t i = 0; i < tag.Attributes.size(); ++i) {
const std::string &name = tag.Attributes[i].Name;
const std::string &value = tag.Attributes[i].Value;
if (name == "TITLE") {
if (name == "title") {
title = value;
} else if (name == "FILEPOS") {
} else if (name == "filepos") {
filepos = value;
} else if ((name == "TYPE") && (ZLUnicodeUtil::toUpper(value) == "TOC")) {
} else if ((name == "type") && (ZLUnicodeUtil::toLower(value) == "toc")) {
isTocReference = true;
}
}
@ -275,17 +271,17 @@ void MobipocketHtmlReferenceTagAction::run(const HtmlReader::HtmlTag &tag) {
}
shared_ptr<HtmlTagAction> MobipocketHtmlBookReader::createAction(const std::string &tag) {
if (tag == "IMG") {
if (tag == "img") {
return new MobipocketHtmlImageTagAction(*this);
} else if (tag == "HR") {
} else if (tag == "hr") {
return new MobipocketHtmlHrTagAction(*this);
} else if (tag == "A") {
} else if (tag == "a") {
return new MobipocketHtmlHrefTagAction(*this);
} else if (tag == "GUIDE") {
} else if (tag == "guide") {
return new MobipocketHtmlGuideTagAction(*this);
} else if (tag == "REFERENCE") {
} else if (tag == "reference") {
return new MobipocketHtmlReferenceTagAction(*this);
} else if (tag == "MBP:PAGEBREAK") {
} else if (tag == "mbp:pagebreak") {
return new MobipocketHtmlPagebreakTagAction(*this);
}
return HtmlBookReader::createAction(tag);

View file

@ -24,12 +24,12 @@
#include <ZLLanguageUtil.h>
#include <ZLImage.h>
#include <ZLFileImage.h>
#include <ZLLogger.h>
#include "PdbPlugin.h"
#include "PalmDocStream.h"
#include "MobipocketHtmlBookReader.h"
#include "../css/StyleSheetParser.h"
#include "../txt/PlainTextFormat.h"
#include "../../library/Book.h"
#include "../../bookmodel/BookModel.h"
@ -42,28 +42,18 @@ const std::string MobipocketPlugin::supportedFileType() const {
//}
void MobipocketPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const {
MobipocketHtmlBookReader(file, model, format, encoding).readDocument(stream);
MobipocketHtmlBookReader reader(file, model, format, encoding);
shared_ptr<StyleSheetParser> cssParser = reader.createCSSParser();
cssParser->parseStream(new PalmDocCssStream(file));
reader.readDocument(stream);
}
bool MobipocketPlugin::readModel(BookModel &model) const {
const Book &book = *model.book();
const ZLFile &file = book.file();
ZLLogger::Instance().registerClass("MobiCSS");
shared_ptr<ZLInputStream> cssStream = new PalmDocCssStream(file);
if (cssStream->open()) {
char *buffer = new char[1024];
while (true) {
const int len = cssStream->read(buffer, 1024);
if (len <= 0) {
break;
}
ZLLogger::Instance().println("MobiCSS", std::string(buffer, len));
}
delete[] buffer;
cssStream->close();
}
shared_ptr<ZLInputStream> stream = createStream(file);
PlainTextFormat format(file);

View file

@ -62,14 +62,15 @@ protected:
static void endParagraph(XHTMLReader &reader);
};
class XHTMLReader : public ZLXMLReader {
public:
struct TagData {
std::vector<FBTextKind> TextKinds;
std::vector<shared_ptr<ZLTextStyleEntry> > StyleEntries;
bool PageBreakAfter;
};
class XHTMLReader : public ZLXMLReader {
public:
static XHTMLTagAction *addAction(const std::string &tag, XHTMLTagAction *action);
static XHTMLTagAction *addAction(const std::string &ns, const std::string &name, XHTMLTagAction *action);