diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 1b2dada6..cfceff4d 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -350,6 +350,9 @@ bool TextSplit::text_to_words(const string &in) m_flags & TXTS_KEEPWILD ? " keepwild" : "", in.substr(0,50).c_str())); + if (in.empty()) + return true; + m_span.erase(); m_inNumber = false; m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0; diff --git a/src/doc/user/00README.txt b/src/doc/user/00README.txt index 99c22639..3a4e38c0 100644 --- a/src/doc/user/00README.txt +++ b/src/doc/user/00README.txt @@ -31,3 +31,7 @@ making progress. The current conclusion would seem to be that the SGML version should stay operational to give an easy way to make the PDF one on FreeBSD. + +But see also notes about dblatex on the asciidoc page. Actually asciidoc would +be a candidate replacement for the source format. +http://www.methods.co.nz/asciidoc/userguide.html diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index a98f8732..6f963ec2 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -540,8 +540,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth) !stringlowercmp("x-user-defined", charset) || !stringlowercmp("x-unknown", charset) || !stringlowercmp("unknown", charset) ) { - m_config->getConfParam("maildefcharset", charset); - if (charset.empty()) + if (!m_config->getConfParam("maildefcharset", charset)) charset = "CP1252"; } diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index b2a8d4fc..8fb63d50 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -462,7 +462,7 @@ string url_encode(const string& url, string::size_type offs) string out = url.substr(0, offs); const char *cp = url.c_str(); for (string::size_type i = offs; i < url.size(); i++) { - int c; + unsigned int c; const char *h = "0123456789ABCDEF"; c = cp[i]; if (c <= 0x20 || diff --git a/src/utils/strmatcher.cpp b/src/utils/strmatcher.cpp index 44f04369..dcd3aa1b 100644 --- a/src/utils/strmatcher.cpp +++ b/src/utils/strmatcher.cpp @@ -28,6 +28,7 @@ using std::string; #include "cstr.h" #include "debuglog.h" #include "strmatcher.h" +#include "pathut.h" bool StrWildMatcher::match(const string& val) const { @@ -38,8 +39,9 @@ bool StrWildMatcher::match(const string& val) const case 0: return true; case FNM_NOMATCH: return false; default: - LOGDEB0(("StrWildMatcher::match error: [%s] against [%s]\n", - m_sexp.c_str(), val.c_str())); + LOGINFO(("StrWildMatcher::match:err: e [%s] s [%s] (%s) ret %d\n", + m_sexp.c_str(), val.c_str(), + url_encode(val).c_str(), ret)); return false; } } diff --git a/src/utils/utf8iter.h b/src/utils/utf8iter.h index ceb7bc47..4e8894a1 100644 --- a/src/utils/utf8iter.h +++ b/src/utils/utf8iter.h @@ -32,7 +32,7 @@ class Utf8Iter { public: Utf8Iter(const std::string &in) - : m_s(in), m_cl(0), m_pos(0), m_charpos(0), m_error(false) + : m_s(in), m_cl(0), m_pos(0), m_charpos(0) { update_cl(); } @@ -44,7 +44,6 @@ public: m_cl = 0; m_pos = 0; m_charpos = 0; - m_error = false; update_cl(); } @@ -62,15 +61,15 @@ public: int l; while (mypos < m_s.length() && mycp != charpos) { l = get_cl(mypos); - if (l <= 0) + if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l)) return (unsigned int)-1; mypos += l; ++mycp; } if (mypos < m_s.length() && mycp == charpos) { l = get_cl(mypos); - if (poslok(mypos, l)) - return getvalueat(mypos, get_cl(mypos)); + if (poslok(mypos, l) && checkvalidat(mypos, l)) + return getvalueat(mypos, l); } return (unsigned int)-1; } @@ -83,7 +82,7 @@ public: #ifdef UTF8ITER_CHECK assert(m_cl != 0); #endif - if (m_cl <= 0) + if (m_cl == 0) return std::string::npos; m_pos += m_cl; @@ -96,9 +95,9 @@ public: unsigned int operator*() { #ifdef UTF8ITER_CHECK - assert(m_cl != 0); + assert(m_cl > 0); #endif - return getvalueat(m_pos, m_cl); + return m_cl == 0 ? (unsigned int)-1 : getvalueat(m_pos, m_cl); } /** Append current utf-8 possibly multi-byte character to string param. @@ -116,15 +115,15 @@ public: #ifdef UTF8ITER_CHECK assert(m_cl != 0); #endif - return m_s.substr(m_pos, m_cl); + return m_cl > 0 ? m_s.substr(m_pos, m_cl) : std::string(); } - bool eof() { + bool eof() const { return m_pos == m_s.length(); } - bool error() { - return m_error; + bool error() const { + return m_cl == 0; } /** Return current byte offset in input string */ @@ -147,13 +146,11 @@ private: const std::string& m_s; // Character length at current position. A value of zero indicates // an error. - unsigned int m_cl; + unsigned int m_cl; // Current byte offset in string. std::string::size_type m_pos; // Current character position unsigned int m_charpos; - // Am I ok ? - mutable bool m_error; // Check position and cl against string length bool poslok(std::string::size_type p, int l) const { @@ -163,7 +160,7 @@ private: return p != std::string::npos && l > 0 && p + l <= m_s.length(); } - // Update current char length in object state, minimum checking + // Update current char length in object state, check // for errors inline void update_cl() { @@ -176,7 +173,34 @@ private: // basically prevents the caller to discriminate error and eof. // m_pos = m_s.length(); m_cl = 0; - m_error = true; + return; + } + if (!checkvalidat(m_pos, m_cl)) { + m_cl = 0; + } + } + + inline bool checkvalidat(std::string::size_type p, int l) const + { + switch (l) { + case 1: + return (unsigned char)m_s[p] < 128; + case 2: + return (((unsigned char)m_s[p]) & 224) == 192 + && (((unsigned char)m_s[p+1]) & 192) == 128; + case 3: + return (((unsigned char)m_s[p]) & 240) == 224 + && (((unsigned char)m_s[p+1]) & 192) == 128 + && (((unsigned char)m_s[p+2]) & 192) == 128 + ; + case 4: + return (((unsigned char)m_s[p]) & 248) == 240 + && (((unsigned char)m_s[p+1]) & 192) == 128 + && (((unsigned char)m_s[p+2]) & 192) == 128 + && (((unsigned char)m_s[p+3]) & 192) == 128 + ; + default: + return false; } } @@ -249,7 +273,6 @@ private: #ifdef UTF8ITER_CHECK assert(l <= 4); #endif - m_error = true; return (unsigned int)-1; } }