1
0
Fork 0
mirror of https://github.com/geometer/FBReaderJ.git synced 2025-10-04 18:29:23 +02:00

filter CSS comments in CSSInputStream

This commit is contained in:
Nikolay Pultsin 2014-03-30 02:28:46 +03:00
parent 54eecb9a3e
commit 5cea231a6a
9 changed files with 394 additions and 48 deletions

View file

@ -101,6 +101,8 @@ LOCAL_SRC_FILES := \
NativeFormats/fbreader/src/formats/fb2/FB2TagManager.cpp \ NativeFormats/fbreader/src/formats/fb2/FB2TagManager.cpp \
NativeFormats/fbreader/src/formats/fb2/FB2UidReader.cpp \ NativeFormats/fbreader/src/formats/fb2/FB2UidReader.cpp \
NativeFormats/fbreader/src/formats/css/FontMap.cpp \ NativeFormats/fbreader/src/formats/css/FontMap.cpp \
NativeFormats/fbreader/src/formats/css/CSSInputStream.cpp \
NativeFormats/fbreader/src/formats/css/StringInputStream.cpp \
NativeFormats/fbreader/src/formats/css/StyleSheetParser.cpp \ NativeFormats/fbreader/src/formats/css/StyleSheetParser.cpp \
NativeFormats/fbreader/src/formats/css/StyleSheetTable.cpp \ NativeFormats/fbreader/src/formats/css/StyleSheetTable.cpp \
NativeFormats/fbreader/src/formats/html/HtmlBookReader.cpp \ NativeFormats/fbreader/src/formats/html/HtmlBookReader.cpp \

View file

@ -0,0 +1,170 @@
/*
* Copyright (C) 2004-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <ZLLogger.h>
#include "CSSInputStream.h"
CSSInputStream::Buffer::Buffer(std::size_t capacity) : Capacity(capacity - 1) {
Content = new char[capacity];
Length = 0;
Offset = 0;
}
CSSInputStream::Buffer::~Buffer() {
delete[] Content;
}
CSSInputStream::CSSInputStream(shared_ptr<ZLInputStream> base) : myBaseStream(base), myBuffer(8192), myBufferNoComments(8192) {
//ZLLogger::Instance().registerClass("CSSInputStream");
}
CSSInputStream::~CSSInputStream() {
close();
}
bool CSSInputStream::open() {
myState = PLAIN_TEXT;
return myBaseStream->open();
}
std::size_t CSSInputStream::read(char *buffer, std::size_t maxSize) {
std::size_t ready = 0;
while (ready < maxSize) {
fillBufferNoComments();
if (myBufferNoComments.isEmpty()) {
break;
}
std::size_t len = std::min(
maxSize - ready,
myBufferNoComments.Length - myBufferNoComments.Offset
);
if (buffer != 0) {
std::memcpy(buffer + ready, myBufferNoComments.Content + myBufferNoComments.Offset, len);
}
myBufferNoComments.Offset += len;
ready += len;
}
//ZLLogger::Instance().println("CSSInputStream", std::string(buffer, ready));
return ready;
}
void CSSInputStream::close() {
return myBaseStream->close();
}
void CSSInputStream::seek(int offset, bool absoluteOffset) {
// TODO: implement
}
std::size_t CSSInputStream::offset() const {
// TODO: implement
return 0;
}
std::size_t CSSInputStream::sizeOfOpened() {
// TODO: not a correct computation
return myBaseStream->sizeOfOpened();
}
void CSSInputStream::fillBufferNoComments() {
if (!myBufferNoComments.isEmpty()) {
return;
}
myBufferNoComments.Length = 0;
myBufferNoComments.Offset = 0;
while (!myBufferNoComments.isFull()) {
if (myBuffer.isEmpty()) {
myBuffer.Offset = 0;
myBuffer.Length = myBaseStream->read(myBuffer.Content, myBuffer.Capacity);
}
if (myBuffer.isEmpty()) {
break;
}
while (!myBuffer.isEmpty() && !myBufferNoComments.isFull()) {
const char ch = myBuffer.Content[myBuffer.Offset++];
switch (myState) {
case PLAIN_TEXT:
switch (ch) {
case '\'':
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
myState = S_QUOTED_TEXT;
break;
case '"':
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
myState = D_QUOTED_TEXT;
break;
case '/':
myState = COMMENT_START_SLASH;
break;
default:
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
break;
}
break;
case S_QUOTED_TEXT:
if (ch == '\'') {
myState = PLAIN_TEXT;
}
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
break;
case D_QUOTED_TEXT:
if (ch == '"') {
myState = PLAIN_TEXT;
}
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
break;
case COMMENT_START_SLASH:
switch (ch) {
case '/':
myBufferNoComments.Content[myBufferNoComments.Length++] = '/';
break;
case '*':
myState = COMMENT;
break;
default:
myState = PLAIN_TEXT;
myBufferNoComments.Content[myBufferNoComments.Length++] = '/';
myBufferNoComments.Content[myBufferNoComments.Length++] = ch;
break;
}
break;
case COMMENT:
if (ch == '*') {
myState = COMMENT_END_ASTERISK;
}
break;
case COMMENT_END_ASTERISK:
switch (ch) {
case '/':
myState = PLAIN_TEXT;
break;
case '*':
break;
default:
myState = COMMENT;
break;
}
break;
}
}
}
}

View file

@ -0,0 +1,81 @@
/*
* Copyright (C) 2004-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __CSSINPUTSTREAM_H__
#define __CSSINPUTSTREAM_H__
#include <shared_ptr.h>
#include <ZLInputStream.h>
class CSSInputStream : public ZLInputStream {
public:
CSSInputStream(shared_ptr<ZLInputStream> base);
~CSSInputStream();
private:
bool open();
std::size_t read(char *buffer, std::size_t maxSize);
void close();
void seek(int offset, bool absoluteOffset);
std::size_t offset() const;
std::size_t sizeOfOpened();
private:
void fillBufferNoComments();
private:
shared_ptr<ZLInputStream> myBaseStream;
struct Buffer {
Buffer(std::size_t capacity);
~Buffer();
bool isEmpty() const;
bool isFull() const;
const std::size_t Capacity;
std::size_t Offset;
std::size_t Length;
char *Content;
};
Buffer myBuffer;
Buffer myBufferNoComments;
enum {
PLAIN_TEXT,
S_QUOTED_TEXT,
D_QUOTED_TEXT,
COMMENT_START_SLASH,
COMMENT,
COMMENT_END_ASTERISK
} myState;
};
inline bool CSSInputStream::Buffer::isEmpty() const {
return Offset == Length;
}
inline bool CSSInputStream::Buffer::isFull() const {
return Length >= Capacity;
}
#endif /* __CSSINPUTSTREAM_H__ */

View file

@ -0,0 +1,58 @@
/*
* Copyright (C) 2004-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstring>
#include <ZLLogger.h>
#include "StringInputStream.h"
StringInputStream::StringInputStream(const char *cString, std::size_t len) : myCString(cString), myLength(len), myOffset(0) {
//ZLLogger::Instance().registerClass("StringInputStream");
}
bool StringInputStream::open() {
return true;
}
std::size_t StringInputStream::read(char *buffer, std::size_t maxSize) {
const std::size_t len = std::min(maxSize, myLength - myOffset);
std::memcpy(buffer, myCString + myOffset, len);
myOffset += len;
//ZLLogger::Instance().println("StringInputStream", std::string(buffer, len));
return len;
}
void StringInputStream::close() {
}
void StringInputStream::seek(int offset, bool absoluteOffset) {
if (!absoluteOffset) {
offset += myOffset;
}
myOffset = std::max(0, std::min(offset, (int)myLength));
}
std::size_t StringInputStream::offset() const {
return myOffset;
}
std::size_t StringInputStream::sizeOfOpened() {
return myLength;
}

View file

@ -0,0 +1,46 @@
/*
* Copyright (C) 2004-2014 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#ifndef __STRINGINPUTSTREAM_H__
#define __STRINGINPUTSTREAM_H__
#include <shared_ptr.h>
#include <ZLInputStream.h>
class StringInputStream : public ZLInputStream {
public:
StringInputStream(const char *cstring, std::size_t len);
private:
bool open();
std::size_t read(char *buffer, std::size_t maxSize);
void close();
void seek(int offset, bool absoluteOffset);
std::size_t offset() const;
std::size_t sizeOfOpened();
private:
const char *myCString;
const std::size_t myLength;
std::size_t myOffset;
};
#endif /* __STRINGINPUTSTREAM_H__ */

View file

@ -26,6 +26,8 @@
#include <ZLLogger.h> #include <ZLLogger.h>
#include "StyleSheetParser.h" #include "StyleSheetParser.h"
#include "StringInputStream.h"
#include "CSSInputStream.h"
#include "../util/MiscUtil.h" #include "../util/MiscUtil.h"
StyleSheetParser::StyleSheetParser(const std::string &pathPrefix) : myPathPrefix(pathPrefix) { StyleSheetParser::StyleSheetParser(const std::string &pathPrefix) : myPathPrefix(pathPrefix) {
@ -40,13 +42,32 @@ void StyleSheetParser::reset() {
myWord.erase(); myWord.erase();
myAttributeName.erase(); myAttributeName.erase();
myReadState = WAITING_FOR_SELECTOR; myReadState = WAITING_FOR_SELECTOR;
myInsideComment = false;
mySelectorString.erase(); mySelectorString.erase();
myMap.clear(); myMap.clear();
myImportVector.clear(); myImportVector.clear();
myFirstRuleProcessed = false; myFirstRuleProcessed = false;
} }
void StyleSheetParser::parseString(const char *data, std::size_t len) {
parseStream(new StringInputStream(data, len));
}
void StyleSheetParser::parseStream(shared_ptr<ZLInputStream> stream) {
stream = new CSSInputStream(stream);
if (stream->open()) {
char *buffer = new char[1024];
while (true) {
int len = stream->read(buffer, 1024);
if (len == 0) {
break;
}
parse(buffer, len);
}
delete[] buffer;
stream->close();
}
}
void StyleSheetParser::parse(const char *text, int len, bool final) { void StyleSheetParser::parse(const char *text, int len, bool final) {
const char *start = text; const char *start = text;
const char *end = text + len; const char *end = text + len;
@ -172,25 +193,11 @@ void StyleSheetParser::processControl(const char control) {
} }
} }
void StyleSheetParser::processWord(std::string &word) { void StyleSheetParser::processWord(const std::string &word) {
while (!word.empty()) { if (word.empty()) {
int index = word.find(myInsideComment ? "*/" : "/*"); return;
if (!myInsideComment) {
if (index == -1) {
processWordWithoutComments(word);
} else if (index > 0) {
processWordWithoutComments(word.substr(0, index));
}
}
if (index == -1) {
break;
}
myInsideComment = !myInsideComment;
word.erase(0, index + 2);
}
} }
void StyleSheetParser::processWordWithoutComments(const std::string &word) {
switch (myReadState) { switch (myReadState) {
case WAITING_FOR_SELECTOR: case WAITING_FOR_SELECTOR:
mySelectorString = word; mySelectorString = word;
@ -229,7 +236,7 @@ void StyleSheetParser::processWordWithoutComments(const std::string &word) {
StyleSheetSingleStyleParser::StyleSheetSingleStyleParser(const std::string &pathPrefix) : StyleSheetParser(pathPrefix) { StyleSheetSingleStyleParser::StyleSheetSingleStyleParser(const std::string &pathPrefix) : StyleSheetParser(pathPrefix) {
} }
shared_ptr<ZLTextStyleEntry> StyleSheetSingleStyleParser::parseString(const char *text) { shared_ptr<ZLTextStyleEntry> StyleSheetSingleStyleParser::parseSingleEntry(const char *text) {
myReadState = WAITING_FOR_ATTRIBUTE; myReadState = WAITING_FOR_ATTRIBUTE;
parse(text, std::strlen(text), true); parse(text, std::strlen(text), true);
shared_ptr<ZLTextStyleEntry> control = StyleSheetTable::createControl(myMap); shared_ptr<ZLTextStyleEntry> control = StyleSheetTable::createControl(myMap);
@ -308,21 +315,6 @@ void StyleSheetMultiStyleParser::processAtRule(const std::string &name, const St
} }
} }
void StyleSheetMultiStyleParser::parseStream(ZLInputStream &stream) {
if (stream.open()) {
char *buffer = new char[1024];
while (true) {
int len = stream.read(buffer, 1024);
if (len == 0) {
break;
}
parse(buffer, len);
}
delete[] buffer;
stream.close();
}
}
StyleSheetTableParser::StyleSheetTableParser(const std::string &pathPrefix, StyleSheetTable &styleTable, FontMap &fontMap) : StyleSheetMultiStyleParser(pathPrefix, fontMap), myStyleTable(styleTable) { StyleSheetTableParser::StyleSheetTableParser(const std::string &pathPrefix, StyleSheetTable &styleTable, FontMap &fontMap) : StyleSheetMultiStyleParser(pathPrefix, fontMap), myStyleTable(styleTable) {
} }
@ -351,7 +343,7 @@ void StyleSheetParserWithCache::importCSS(const std::string &path) {
if (!stream.isNull()) { if (!stream.isNull()) {
StyleSheetParserWithCache importParser(fileToImport, myPathPrefix, myFontMap, myEncryptionMap); StyleSheetParserWithCache importParser(fileToImport, myPathPrefix, myFontMap, myEncryptionMap);
importParser.myProcessedFiles.insert(myProcessedFiles.begin(), myProcessedFiles.end()); importParser.myProcessedFiles.insert(myProcessedFiles.begin(), myProcessedFiles.end());
importParser.parseStream(*stream); importParser.parseStream(stream);
myEntries.insert(myEntries.end(), importParser.myEntries.begin(), importParser.myEntries.end()); myEntries.insert(myEntries.end(), importParser.myEntries.begin(), importParser.myEntries.end());
} }
myProcessedFiles.insert(fileToImport.path()); myProcessedFiles.insert(fileToImport.path());

View file

@ -40,7 +40,8 @@ protected:
public: public:
virtual ~StyleSheetParser(); virtual ~StyleSheetParser();
void reset(); void reset();
void parse(const char *text, int len, bool final = false); void parseStream(shared_ptr<ZLInputStream> stream);
void parseString(const char *data, std::size_t len);
protected: protected:
virtual void storeData(const std::string &selector, const StyleSheetTable::AttributeMap &map); virtual void storeData(const std::string &selector, const StyleSheetTable::AttributeMap &map);
@ -48,9 +49,9 @@ protected:
virtual void importCSS(const std::string &path); virtual void importCSS(const std::string &path);
private: private:
void parse(const char *text, int len, bool final = false);
bool isControlSymbol(const char symbol); bool isControlSymbol(const char symbol);
void processWord(std::string &word); void processWord(const std::string &word);
void processWordWithoutComments(const std::string &word);
void processControl(const char control); void processControl(const char control);
protected: protected:
@ -67,7 +68,6 @@ private:
ATTRIBUTE_NAME, ATTRIBUTE_NAME,
ATTRIBUTE_VALUE, ATTRIBUTE_VALUE,
} myReadState; } myReadState;
bool myInsideComment;
std::string mySelectorString; std::string mySelectorString;
StyleSheetTable::AttributeMap myMap; StyleSheetTable::AttributeMap myMap;
std::vector<std::string> myImportVector; std::vector<std::string> myImportVector;
@ -80,7 +80,7 @@ class StyleSheetSingleStyleParser : public StyleSheetParser {
public: public:
StyleSheetSingleStyleParser(const std::string &pathPrefix); StyleSheetSingleStyleParser(const std::string &pathPrefix);
shared_ptr<ZLTextStyleEntry> parseString(const char *text); shared_ptr<ZLTextStyleEntry> parseSingleEntry(const char *text);
}; };
class StyleSheetMultiStyleParser : public StyleSheetParser { class StyleSheetMultiStyleParser : public StyleSheetParser {
@ -88,9 +88,6 @@ class StyleSheetMultiStyleParser : public StyleSheetParser {
protected: protected:
StyleSheetMultiStyleParser(const std::string &pathPrefix, FontMap &map); StyleSheetMultiStyleParser(const std::string &pathPrefix, FontMap &map);
public:
void parseStream(ZLInputStream &stream);
protected: protected:
virtual void store(const std::string &tag, const std::string &aClass, const StyleSheetTable::AttributeMap &map) = 0; virtual void store(const std::string &tag, const std::string &aClass, const StyleSheetTable::AttributeMap &map) = 0;

View file

@ -519,7 +519,7 @@ void HtmlBookReader::preformattedCharacterDataHandler(const char *text, std::siz
bool HtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) { bool HtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) {
if (!myStyleSheetParser.isNull()) { if (!myStyleSheetParser.isNull()) {
myStyleSheetParser->parse(text, len); myStyleSheetParser->parseString(text, len);
return true; return true;
} }

View file

@ -277,7 +277,7 @@ void XHTMLTagLinkAction::doAtStart(XHTMLReader &reader, const char **xmlattribut
shared_ptr<ZLInputStream> cssStream = cssFile.inputStream(reader.myEncryptionMap); shared_ptr<ZLInputStream> cssStream = cssFile.inputStream(reader.myEncryptionMap);
if (!cssStream.isNull()) { if (!cssStream.isNull()) {
ZLLogger::Instance().println("CSS", "parsing file"); ZLLogger::Instance().println("CSS", "parsing file");
parser->parseStream(*cssStream); parser->parseStream(cssStream);
} }
} }
parser->applyToTable(reader.myStyleSheetTable); parser->applyToTable(reader.myStyleSheetTable);
@ -729,8 +729,8 @@ void XHTMLReader::startElementHandler(const char *tag, const char **attributes)
addTextStyleEntry(sTag, *it); addTextStyleEntry(sTag, *it);
const char *style = attributeValue(attributes, "style"); const char *style = attributeValue(attributes, "style");
if (style != 0) { if (style != 0) {
ZLLogger::Instance().println("CSS", std::string("parsing style attribute: ") + style); //ZLLogger::Instance().println("CSS", std::string("parsing style attribute: ") + style);
shared_ptr<ZLTextStyleEntry> entry = myStyleParser->parseString(style); shared_ptr<ZLTextStyleEntry> entry = myStyleParser->parseSingleEntry(style);
addTextStyleEntry(*entry); addTextStyleEntry(*entry);
myStyleEntryStack.push_back(entry); myStyleEntryStack.push_back(entry);
} }
@ -813,7 +813,7 @@ void XHTMLReader::characterDataHandler(const char *text, std::size_t len) {
break; break;
case XHTML_READ_STYLE: case XHTML_READ_STYLE:
if (!myTableParser.isNull()) { if (!myTableParser.isNull()) {
myTableParser->parse(text, len); myTableParser->parseString(text, len);
} }
break; break;
case XHTML_READ_BODY: case XHTML_READ_BODY: