40 #include "mrmailbox.h" 42 #include "mrsaxparser.h" 50 static const char* s_ent[] = {
58 "lt;",
"<",
"gt;",
">",
"quot;",
"\"",
"apos;",
"'",
59 "amp;",
"&",
"nbsp;",
" ",
62 "iexcl;",
"¡",
"cent;",
"¢",
"pound;",
"£",
"curren;",
"¤",
63 "yen;",
"¥",
"brvbar;",
"¦",
"sect;",
"§",
"uml;",
"¨",
64 "copy;",
"©",
"ordf;",
"ª",
"laquo;",
"«",
"not;",
"¬",
65 "shy;",
"-",
"reg;",
"®",
"macr;",
"¯",
"deg;",
"°",
66 "plusmn;",
"±",
"sup2;",
"²",
"sup3;",
"³",
"acute;",
"´",
67 "micro;",
"µ",
"para;",
"¶",
"middot;",
"·",
"cedil;",
"¸",
68 "sup1;",
"¹",
"ordm;",
"º",
"raquo;",
"»",
"frac14;",
"¼",
69 "frac12;",
"½",
"frac34;",
"¾",
"iquest;",
"¿",
"Agrave;",
"À",
70 "Aacute;",
"Á",
"Acirc;",
"Â",
"Atilde;",
"Ã",
"Auml;",
"Ä",
71 "Aring;",
"Å",
"AElig;",
"Æ",
"Ccedil;",
"Ç",
"Egrave;",
"È",
72 "Eacute;",
"É",
"Ecirc;",
"Ê",
"Euml;",
"Ë",
"Igrave;",
"Ì",
73 "Iacute;",
"Í",
"Icirc;",
"Î",
"Iuml;",
"Ï",
"ETH;",
"Ð",
74 "Ntilde;",
"Ñ",
"Ograve;",
"Ò",
"Oacute;",
"Ó",
"Ocirc;",
"Ô",
75 "Otilde;",
"Õ",
"Ouml;",
"Ö",
"times;",
"×",
"Oslash;",
"Ø",
76 "Ugrave;",
"Ù",
"Uacute;",
"Ú",
"Ucirc;",
"Û",
"Uuml;",
"Ü",
77 "Yacute;",
"Ý",
"THORN;",
"Þ",
"szlig;",
"ß",
"agrave;",
"à",
78 "aacute;",
"á",
"acirc;",
"â",
"atilde;",
"ã",
"auml;",
"ä",
79 "aring;",
"å",
"aelig;",
"æ",
"ccedil;",
"ç",
"egrave;",
"è",
80 "eacute;",
"é",
"ecirc;",
"ê",
"euml;",
"ë",
"igrave;",
"ì",
81 "iacute;",
"í",
"icirc;",
"î",
"iuml;",
"ï",
"eth;",
"ð",
82 "ntilde;",
"ñ",
"ograve;",
"ò",
"oacute;",
"ó",
"ocirc;",
"ô",
83 "otilde;",
"õ",
"ouml;",
"ö",
"divide;",
"÷",
"oslash;",
"ø",
84 "ugrave;",
"ù",
"uacute;",
"ú",
"ucirc;",
"û",
"uuml;",
"ü",
85 "yacute;",
"ý",
"thorn;",
"þ",
"yuml;",
"ÿ",
"OElig;",
"Œ",
86 "oelig;",
"œ",
"Scaron;",
"Š",
"scaron;",
"š",
"Yuml;",
"Ÿ",
87 "fnof;",
"ƒ",
"circ;",
"ˆ",
"tilde;",
"˜",
"Alpha;",
"Α",
88 "Beta;",
"Β",
"Gamma;",
"Γ",
"Delta;",
"Δ",
"Epsilon;",
"Ε",
89 "Zeta;",
"Ζ",
"Eta;",
"Η",
"Theta;",
"Θ",
"Iota;",
"Ι",
90 "Kappa;",
"Κ",
"Lambda;",
"Λ",
"Mu;",
"Μ",
"Nu;",
"Ν",
91 "Xi;",
"Ξ",
"Omicron;",
"Ο",
"Pi;",
"Π",
"Rho;",
"Ρ",
92 "Sigma;",
"Σ",
"Tau;",
"Τ",
"Upsilon;",
"Υ",
"Phi;",
"Φ",
93 "Chi;",
"Χ",
"Psi;",
"Ψ",
"Omega;",
"Ω",
"alpha;",
"α",
94 "beta;",
"β",
"gamma;",
"γ",
"delta;",
"δ",
"epsilon;",
"ε",
95 "zeta;",
"ζ",
"eta;",
"η",
"theta;",
"θ",
"iota;",
"ι",
96 "kappa;",
"κ",
"lambda;",
"λ",
"mu;",
"μ",
"nu;",
"ν",
97 "xi;",
"ξ",
"omicron;",
"ο",
"pi;",
"π",
"rho;",
"ρ",
98 "sigmaf;",
"ς",
"sigma;",
"σ",
"tau;",
"τ",
"upsilon;",
"υ",
99 "phi;",
"φ",
"chi;",
"χ",
"psi;",
"ψ",
"omega;",
"ω",
100 "thetasym;",
"ϑ",
"upsih;",
"ϒ",
"piv;",
"ϖ",
"ensp;",
" ",
101 "emsp;",
" ",
"thinsp;",
" ",
"zwnj;",
"" ,
"zwj;",
"" ,
102 "lrm;",
"" ,
"rlm;",
"" ,
"ndash;",
"–",
"mdash;",
"—",
103 "lsquo;",
"‘",
"rsquo;",
"’",
"sbquo;",
"‚",
"ldquo;",
"“",
104 "rdquo;",
"”",
"bdquo;",
"„",
"dagger;",
"†",
"Dagger;",
"‡",
105 "bull;",
"•",
"hellip;",
"…",
"permil;",
"‰",
"prime;",
"′",
106 "Prime;",
"″",
"lsaquo;",
"‹",
"rsaquo;",
"›",
"oline;",
"‾",
107 "frasl;",
"⁄",
"euro;",
"€",
"image;",
"ℑ",
"weierp;",
"℘",
108 "real;",
"ℜ",
"trade;",
"™",
"alefsym;",
"ℵ",
"larr;",
"←",
109 "uarr;",
"↑",
"rarr;",
"→",
"darr;",
"↓",
"harr;",
"↔",
110 "crarr;",
"↵",
"lArr;",
"⇐",
"uArr;",
"⇑",
"rArr;",
"⇒",
111 "dArr;",
"⇓",
"hArr;",
"⇔",
"forall;",
"∀",
"part;",
"∂",
112 "exist;",
"∃",
"empty;",
"∅",
"nabla;",
"∇",
"isin;",
"∈",
113 "notin;",
"∉",
"ni;",
"∋",
"prod;",
"∏",
"sum;",
"∑",
114 "minus;",
"−",
"lowast;",
"∗",
"radic;",
"√",
"prop;",
"∝",
115 "infin;",
"∞",
"ang;",
"∠",
"and;",
"∧",
"or;",
"∨",
116 "cap;",
"∩",
"cup;",
"∪",
"int;",
"∫",
"there4;",
"∴",
117 "sim;",
"∼",
"cong;",
"≅",
"asymp;",
"≈",
"ne;",
"≠",
118 "equiv;",
"≡",
"le;",
"≤",
"ge;",
"≥",
"sub;",
"⊂",
119 "sup;",
"⊃",
"nsub;",
"⊄",
"sube;",
"⊆",
"supe;",
"⊇",
120 "oplus;",
"⊕",
"otimes;",
"⊗",
"perp;",
"⊥",
"sdot;",
"⋅",
121 "lceil;",
"⌈",
"rceil;",
"⌉",
"lfloor;",
"⌊",
"rfloor;",
"⌋",
122 "lang;",
"<",
"rang;",
">",
"loz;",
"◊",
"spades;",
"♠",
123 "clubs;",
"♣",
"hearts;",
"♥",
"diams;",
"♦",
141 static char* xml_decode(
char* s,
char type)
143 char *e, *r = s, *m = s;
149 if (*s ==
'\n') memmove(s, (s + 1), strlen(s));
154 while( *s && *s !=
'&' && !isspace(*s)) s++;
160 else if( type !=
'c' && ! strncmp(s,
"&#", 2) )
163 if (s[2] ==
'x') c = strtol(s + 3, &e, 16);
164 else c = strtol(s + 2, &e, 10);
165 if (! c || *e !=
';') { s++;
continue; }
167 if (c < 0x80) *(s++) = c;
169 for (b = 0, d = c; d; d /= 2) b++;
171 *(s++) = (0xFF << (7 - b)) | (c >> (6 * b));
172 while (b) *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F);
175 memmove(s, strchr(s,
';') + 1, strlen(strchr(s,
';')));
177 else if( (*s ==
'&' && (type ==
'&' || type ==
' ' ))
181 for (b = 0; s_ent[b] && strncmp(s + 1, s_ent[b], strlen(s_ent[b])); b += 2)
185 if ((c = strlen(s_ent[b])) - 1 > (e = strchr(s,
';')) - s) {
186 l = (d = (s - r)) + c + strlen(e);
187 r = (r == m) ? strcpy(malloc(l), r) : realloc(r, l);
188 e = strchr((s = r + d),
';');
191 memmove(s + c, e + 1, strlen(e));
192 strncpy(s, s_ent[b], c);
196 else if ((type ==
' ' ) && isspace(*s))
221 #define XML_WS "\t\r\n " 224 static void def_starttag_cb (
void* userdata,
const char* tag,
char** attr) { }
225 static void def_endtag_cb (
void* userdata,
const char* tag) { }
226 static void def_text_cb (
void* userdata,
const char* text,
int len) { }
229 static void call_text_cb(mrsaxparser_t* ths,
char* text,
size_t len,
char type)
233 char bak = text[len], *text_new;
236 text_new = xml_decode(text, type);
237 ths->m_text_cb(ths->m_userdata, text_new, len);
238 if( text != text_new ) { free(text_new); }
245 static void do_free_attr(
char** attr,
int* free_attr)
249 #define FREE_KEY 0x01 250 #define FREE_VALUE 0x02 253 if( free_attr[i>>1]&FREE_KEY && attr[i] ) { free(attr[i]); }
254 if( free_attr[i>>1]&FREE_VALUE && attr[i+1] ) { free(attr[i+1]); }
266 const char* mrattr_find(
char** attr,
const char* key)
270 while( attr[i] && strcmp(key, attr[i]) ) {
282 void mrsaxparser_init(mrsaxparser_t* ths,
void* userdata)
284 ths->m_userdata = userdata;
285 ths->m_starttag_cb = def_starttag_cb;
286 ths->m_endtag_cb = def_endtag_cb;
287 ths->m_text_cb = def_text_cb;
291 void mrsaxparser_set_tag_handler(mrsaxparser_t* ths, mrsaxparser_starttag_cb_t starttag_cb, mrsaxparser_endtag_cb_t endtag_cb)
297 ths->m_starttag_cb = starttag_cb? starttag_cb : def_starttag_cb;
298 ths->m_endtag_cb = endtag_cb? endtag_cb : def_endtag_cb;
302 void mrsaxparser_set_text_handler (mrsaxparser_t* ths, mrsaxparser_text_cb_t text_cb)
308 ths->m_text_cb = text_cb? text_cb : def_text_cb;
312 void mrsaxparser_parse(mrsaxparser_t* ths,
const char* buf_start__)
314 char bak, *buf_start, *last_text_start, *p;
317 char* attr[(MAX_ATTR+1)*2];
318 int free_attr[MAX_ATTR];
326 buf_start = safe_strdup(buf_start__);
327 last_text_start = buf_start;
333 call_text_cb(ths, last_text_start, p - last_text_start,
'&');
336 if( strncmp(p,
"!--", 3) == 0 )
341 p = strstr(p,
"-->");
342 if( p == NULL ) {
goto cleanup; }
345 else if( strncmp(p,
"![CDATA[", 8) == 0 )
350 char* text_beg = p + 8;
351 if( (p = strstr(p,
"]]>"))!=NULL ) {
352 call_text_cb(ths, text_beg, p-text_beg,
'c');
356 call_text_cb(ths, text_beg, strlen(text_beg),
'c');
360 else if( strncmp(p,
"!DOCTYPE", 8) == 0 )
365 while( *p && *p !=
'[' && *p !=
'>' ) p++;
369 else if( *p ==
'[' ) {
388 if( p == NULL ) {
goto cleanup; }
393 p += strspn(p, XML_WS);
400 p += strspn(p, XML_WS);
401 char* beg_tag_name = p;
402 p += strcspn(p, XML_WS
"/>");
403 if( p != beg_tag_name )
407 mr_strlower_in_place(beg_tag_name);
408 ths->m_endtag_cb(ths->m_userdata, beg_tag_name);
417 do_free_attr(attr, free_attr);
419 char* beg_tag_name = p;
420 p += strcspn(p, XML_WS
"/>");
421 if( p != beg_tag_name )
423 char* after_tag_name = p;
427 while( isspace(*p) ) { p++; }
428 for( ; *p && *p !=
'/' && *p !=
'>'; attr_index += 2 )
430 char *beg_attr_name = p, *beg_attr_value = NULL, *beg_attr_value_new = NULL;
432 p += strcspn(p, XML_WS
"=/>");
433 if( p != beg_attr_name )
436 char* after_attr_name = p;
437 p += strspn(p, XML_WS);
440 p += strspn(p, XML_WS
"=");
442 if( quote ==
'"' || quote ==
'\'' )
448 while( *p && *p != quote ) { p++; }
454 beg_attr_value_new = xml_decode(beg_attr_value,
' ');
460 p += strcspn(p, XML_WS
"/>");
463 char* temp = safe_strdup(beg_attr_value);
464 beg_attr_value_new = xml_decode(temp,
' ');
465 if( beg_attr_value_new!=temp ) { free(temp); }
471 beg_attr_value_new = safe_strdup(NULL);
475 if( attr_index < MAX_ATTR )
477 char* beg_attr_name_new = beg_attr_name;
478 int free_bits = (beg_attr_value_new != beg_attr_value)? FREE_VALUE : 0;
479 if( after_attr_name == p ) {
481 bak = *after_attr_name;
482 *after_attr_name =
'\0';
483 beg_attr_name_new = safe_strdup(beg_attr_name);
484 *after_attr_name = bak;
485 free_bits |= FREE_KEY;
488 *after_attr_name =
'\0';
491 mr_strlower_in_place(beg_attr_name_new);
492 attr[attr_index] = beg_attr_name_new;
493 attr[attr_index+1] = beg_attr_value_new;
494 attr[attr_index+2] = NULL;
495 free_attr[attr_index>>1] = free_bits;
499 while( isspace(*p) ) { p++; }
502 char bak = *after_tag_name;
504 mr_strlower_in_place(beg_tag_name);
505 ths->m_starttag_cb(ths->m_userdata, beg_tag_name, attr);
506 *after_tag_name = bak;
509 p += strspn(p, XML_WS);
514 ths->m_endtag_cb(ths->m_userdata, beg_tag_name);
521 if( p == NULL ) {
goto cleanup; }
534 call_text_cb(ths, last_text_start, p - last_text_start,
'&');
537 do_free_attr(attr, free_attr);