Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Contents of /trunk/src/xmlparse.l
Parent Directory
|
Revision Log
Revision 1044 -
(show annotations)
(download)
Mon Aug 6 18:34:52 2012 UTC (12 years, 8 months ago) by amb
File size: 28849 byte(s)
Mon Aug 6 18:34:52 2012 UTC (12 years, 8 months ago) by amb
File size: 28849 byte(s)
Allow an unlimited number of attributes per tag without crashing.
1 | %{ |
2 | /*************************************** |
3 | A simple generic XML parser where the structure comes from the function parameters. |
4 | Not intended to be fully conforming to XML standard or a validating parser but |
5 | sufficient to parse OSM XML and simple program configuration files. |
6 | |
7 | Part of the Routino routing software. |
8 | ******************/ /****************** |
9 | This file Copyright 2010-2012 Andrew M. Bishop |
10 | |
11 | This program is free software: you can redistribute it and/or modify |
12 | it under the terms of the GNU Affero General Public License as published by |
13 | the Free Software Foundation, either version 3 of the License, or |
14 | (at your option) any later version. |
15 | |
16 | This program is distributed in the hope that it will be useful, |
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | GNU Affero General Public License for more details. |
20 | |
21 | You should have received a copy of the GNU Affero General Public License |
22 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
23 | ***************************************/ |
24 | |
25 | |
26 | #include <stdio.h> |
27 | #include <stdlib.h> |
28 | #include <ctype.h> |
29 | #include <string.h> |
30 | #include <strings.h> |
31 | |
32 | #include "xmlparse.h" |
33 | |
34 | |
35 | /* Parser outputs */ |
36 | |
37 | #define LEX_EOF 0 |
38 | |
39 | #define LEX_TAG_BEGIN 1 |
40 | #define LEX_XML_DECL_BEGIN 2 |
41 | #define LEX_TAG_POP 3 |
42 | #define LEX_TAG_PUSH 4 |
43 | #define LEX_XML_DECL_FINISH 6 |
44 | #define LEX_TAG_FINISH 7 |
45 | #define LEX_ATTR_KEY 8 |
46 | #define LEX_ATTR_VAL 9 |
47 | |
48 | #define LEX_ERROR 100 |
49 | |
50 | #define LEX_ERROR_TAG_START 101 |
51 | #define LEX_ERROR_XML_DECL_START 102 |
52 | #define LEX_ERROR_TAG 103 |
53 | #define LEX_ERROR_XML_DECL 104 |
54 | #define LEX_ERROR_ATTR 105 |
55 | #define LEX_ERROR_END_TAG 106 |
56 | #define LEX_ERROR_COMMENT 107 |
57 | #define LEX_ERROR_CLOSE 108 |
58 | #define LEX_ERROR_ATTR_VAL 109 |
59 | #define LEX_ERROR_ENTITY_REF 110 |
60 | #define LEX_ERROR_CHAR_REF 111 |
61 | |
62 | #define LEX_ERROR_UNEXP_TAG 201 |
63 | #define LEX_ERROR_UNBALANCED 202 |
64 | #define LEX_ERROR_NO_START 203 |
65 | #define LEX_ERROR_UNEXP_ATT 204 |
66 | #define LEX_ERROR_UNEXP_EOF 205 |
67 | #define LEX_ERROR_XML_NOT_FIRST 206 |
68 | |
69 | #define LEX_ERROR_CALLBACK 255 |
70 | |
71 | |
72 | /* Lexer definitions */ |
73 | |
74 | /*+ Reset the current string. +*/ |
75 | #define reset_string \ |
76 | stringnum=-1; |
77 | |
78 | /*+ Prepare for the next string. +*/ |
79 | #define next_string \ |
80 | stringnum++; \ |
81 | if(stringnum>=numstrings) \ |
82 | { \ |
83 | int i; \ |
84 | numstrings+=32; \ |
85 | string=(char**)realloc((void*)string,numstrings*sizeof(char*)); \ |
86 | stringlen=(unsigned long*)realloc((void*)stringlen,numstrings*sizeof(unsigned long)); \ |
87 | stringused=(unsigned long*)realloc((void*)stringused,numstrings*sizeof(unsigned long)); \ |
88 | for(i=stringnum;i<numstrings;i++) \ |
89 | {string[i]=NULL;stringlen[i]=0;stringused[i]=0;} \ |
90 | } \ |
91 | if(!string[stringnum]) string[stringnum]=(char*)malloc(stringlen[stringnum]=256); \ |
92 | *string[stringnum]=0; \ |
93 | stringused[stringnum]=0; |
94 | |
95 | /*+ Append information to the current string. +*/ |
96 | #define append_string(xx) \ |
97 | newlen=strlen(xx); \ |
98 | if((stringused[stringnum]+newlen)>=stringlen[stringnum]) \ |
99 | string[stringnum]=(char*)realloc((void*)string[stringnum],stringlen[stringnum]=(stringused[stringnum]+newlen+256)); \ |
100 | strcpy(string[stringnum]+stringused[stringnum],xx); \ |
101 | stringused[stringnum]+=newlen; |
102 | |
103 | |
104 | /* Lexer functions and variables */ |
105 | |
106 | extern int yylex(void); |
107 | |
108 | static char *yylval=NULL; |
109 | |
110 | static int xmlparse_options; |
111 | |
112 | static unsigned long long lineno; |
113 | |
114 | %} |
115 | |
116 | %option 8bit |
117 | %option pointer |
118 | %option batch |
119 | %option never-interactive |
120 | |
121 | %option perf-report perf-report |
122 | %option warn |
123 | %option verbose |
124 | |
125 | %option nodefault |
126 | %option fast |
127 | %option noread |
128 | |
129 | %option noreject |
130 | %option nounput |
131 | %option noinput |
132 | %option noyywrap |
133 | %option noyymore |
134 | %option noyylineno |
135 | |
136 | |
137 | /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII tags not Unicode. */ |
138 | |
139 | S [ \t] |
140 | |
141 | U1 [\x09\x0A\x0D\x20-\x7F] |
142 | U2 [\xC2-\xDF][\x80-\xBF] |
143 | U3a \xE0[\xA0-\xBF][\x80-\xBF] |
144 | U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF] |
145 | U3c \xED[\x80-\x9F][\x80-\xBF] |
146 | U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF] |
147 | U3 {U3a}|{U3b}|{U3c}|{U3d} |
148 | U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF] |
149 | U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
150 | U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF] |
151 | U4 {U4a}|{U4b}|{U4c} |
152 | |
153 | U ({U1}|{U2}|{U3}|{U4}) |
154 | UquotedS ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4}) |
155 | UquotedD ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4}) |
156 | |
157 | N (\n|\r\n) |
158 | |
159 | letter [a-zA-Z] |
160 | digit [0-9] |
161 | xdigit [a-fA-F0-9] |
162 | |
163 | namechar ({letter}|{digit}|[-._:]) |
164 | name (({letter}|[_:]){namechar}*) |
165 | |
166 | entityref (&{name};) |
167 | charref (&#({digit}+|x{xdigit}+);) |
168 | |
169 | |
170 | %x BANGTAG |
171 | %x COMMENT |
172 | %x CDATA |
173 | %x DOCTYPE |
174 | %x XML_DECL_START XML_DECL |
175 | %x TAG_START TAG |
176 | %x ATTR_KEY ATTR_VAL |
177 | %x END_TAG1 END_TAG2 |
178 | %x DQUOTED SQUOTED |
179 | |
180 | %% |
181 | /* Must use static variables since the parser returns often. */ |
182 | static int numstrings=0,stringnum=0; |
183 | static char **string=NULL; |
184 | static unsigned long *stringlen=NULL,*stringused=NULL; |
185 | static int after_attr=0; |
186 | int newlen; |
187 | int doctype_depth=0; |
188 | |
189 | /* Handle top level entities */ |
190 | |
191 | "<!" { BEGIN(BANGTAG); } |
192 | "</" { BEGIN(END_TAG1); } |
193 | "<?" { BEGIN(XML_DECL_START); } |
194 | "<" { BEGIN(TAG_START); } |
195 | |
196 | ">" { return(LEX_ERROR_CLOSE); } |
197 | |
198 | {N} { lineno++; } |
199 | [^<>] { } |
200 | |
201 | /* Tags beginning with '!' */ |
202 | |
203 | <BANGTAG>"--" { BEGIN(COMMENT); } |
204 | <BANGTAG>"[CDATA[" { BEGIN(CDATA); } |
205 | <BANGTAG>"DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; } |
206 | <BANGTAG>{N} { /* lineno++; */ return(LEX_ERROR_TAG_START); } |
207 | <BANGTAG>. { return(LEX_ERROR_TAG_START); } |
208 | |
209 | /* Comments */ |
210 | |
211 | <COMMENT>"-->" { BEGIN(INITIAL); } |
212 | <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); } |
213 | <COMMENT>{N} { lineno++; } |
214 | <COMMENT>[^-] { } |
215 | <COMMENT>"-" { } |
216 | |
217 | /* CDATA */ |
218 | |
219 | <CDATA>"]]>" { BEGIN(INITIAL); } |
220 | <CDATA>"]" { } |
221 | <CDATA>{N} { lineno++; } |
222 | <CDATA>[^]] { } |
223 | |
224 | /* DOCTYPE */ |
225 | |
226 | <DOCTYPE>"<" { doctype_depth++; } |
227 | <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; } |
228 | <DOCTYPE>{N} { lineno++; } |
229 | <DOCTYPE>[^<>] { } |
230 | |
231 | /* XML declaration start */ |
232 | |
233 | <XML_DECL_START>xml { BEGIN(XML_DECL); reset_string; yylval=yytext; return(LEX_XML_DECL_BEGIN); } |
234 | <XML_DECL_START>{N} { /* lineno++; */ return(LEX_ERROR_XML_DECL_START); } |
235 | <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); } |
236 | |
237 | /* XML declaration middle */ |
238 | |
239 | <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); } |
240 | <XML_DECL>{S}+ { } |
241 | <XML_DECL>{N} { lineno++; } |
242 | <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
243 | <XML_DECL>. { return(LEX_ERROR_XML_DECL); } |
244 | |
245 | /* Any tag start */ |
246 | |
247 | <TAG_START>{name} { BEGIN(TAG); reset_string; yylval=yytext; return(LEX_TAG_BEGIN); } |
248 | <TAG_START>{N} { /* lineno++; */ return(LEX_ERROR_TAG_START); } |
249 | <TAG_START>. { return(LEX_ERROR_TAG_START); } |
250 | |
251 | /* End-tag start */ |
252 | |
253 | <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); } |
254 | <END_TAG1>{N} { /* lineno++; */ return(LEX_ERROR_END_TAG); } |
255 | <END_TAG1>. { return(LEX_ERROR_END_TAG); } |
256 | |
257 | <END_TAG2>">" { BEGIN(INITIAL); } |
258 | <END_TAG2>{N} { /* lineno++; */ return(LEX_ERROR_END_TAG); } |
259 | <END_TAG2>. { return(LEX_ERROR_END_TAG); } |
260 | |
261 | /* Any tag middle */ |
262 | |
263 | <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } |
264 | <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); } |
265 | <TAG>{S}+ { } |
266 | <TAG>{N} { lineno++; } |
267 | <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
268 | <TAG>. { return(LEX_ERROR_TAG); } |
269 | |
270 | /* Attributes */ |
271 | |
272 | <ATTR_KEY>= { BEGIN(ATTR_VAL); } |
273 | <ATTR_KEY>{N} { /* lineno++; */ return(LEX_ERROR_ATTR); } |
274 | <ATTR_KEY>. { return(LEX_ERROR_ATTR); } |
275 | |
276 | <ATTR_VAL>\" { BEGIN(DQUOTED); next_string; } |
277 | <ATTR_VAL>\' { BEGIN(SQUOTED); next_string; } |
278 | <ATTR_VAL>{N} { /* lineno++; */ return(LEX_ERROR_ATTR); } |
279 | <ATTR_VAL>. { return(LEX_ERROR_ATTR); } |
280 | |
281 | /* Quoted strings */ |
282 | |
283 | <DQUOTED>\" { BEGIN(after_attr); yylval=string[stringnum]; return(LEX_ATTR_VAL); } |
284 | <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
285 | else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } } |
286 | <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
287 | else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } } |
288 | <DQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
289 | <DQUOTED>{UquotedD}+ { append_string(yytext); } |
290 | <DQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
291 | |
292 | <SQUOTED>\' { BEGIN(after_attr); yylval=string[stringnum]; return(LEX_ATTR_VAL); } |
293 | <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
294 | else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } } |
295 | <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
296 | else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } } |
297 | <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
298 | <SQUOTED>{UquotedS}+ { append_string(yytext); } |
299 | <SQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
300 | |
301 | /* End of file */ |
302 | |
303 | <<EOF>> { for(stringnum=0;stringnum<numstrings;stringnum++) if(string[stringnum]) free(string[stringnum]); |
304 | if(string) free(string); string=NULL; |
305 | if(stringlen) free(stringlen); stringlen=NULL; |
306 | if(stringlen) free(stringlen); stringlen=NULL; |
307 | if(stringused) free(stringused); stringused=NULL; |
308 | numstrings=0; |
309 | BEGIN(INITIAL); return(LEX_EOF); } |
310 | |
311 | %% |
312 | |
313 | |
314 | /*++++++++++++++++++++++++++++++++++++++ |
315 | A function to call the callback function with the parameters needed. |
316 | |
317 | int call_callback Returns 1 if the callback returned with an error. |
318 | |
319 | const char *name The name of the tag. |
320 | |
321 | int (*callback)() The callback function. |
322 | |
323 | int type The type of tag (start and/or end). |
324 | |
325 | int nattributes The number of attributes collected. |
326 | |
327 | char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes. |
328 | ++++++++++++++++++++++++++++++++++++++*/ |
329 | |
330 | static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS]) |
331 | { |
332 | switch(nattributes) |
333 | { |
334 | case 0: return (*callback)(name,type); |
335 | case 1: return (*callback)(name,type,attributes[0]); |
336 | case 2: return (*callback)(name,type,attributes[0],attributes[1]); |
337 | case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]); |
338 | case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]); |
339 | case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); |
340 | case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); |
341 | case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); |
342 | case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); |
343 | case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); |
344 | case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); |
345 | case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); |
346 | case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); |
347 | case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); |
348 | case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); |
349 | case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); |
350 | case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); |
351 | |
352 | default: |
353 | fprintf(stderr,"XML Parser: Error on line %llu: too many attributes for tag '%s' source code needs changing.\n",lineno,name); |
354 | exit(1); |
355 | } |
356 | } |
357 | |
358 | |
359 | /*++++++++++++++++++++++++++++++++++++++ |
360 | Parse the XML and call the functions for each tag as seen. |
361 | |
362 | int ParseXML Returns 0 if OK or something else in case of an error. |
363 | |
364 | FILE *file The file to parse. |
365 | |
366 | xmltag **tags The array of pointers to tags for the top level. |
367 | |
368 | int options A list of XML Parser options OR-ed together. |
369 | ++++++++++++++++++++++++++++++++++++++*/ |
370 | |
371 | int ParseXML(FILE *file,xmltag **tags,int options) |
372 | { |
373 | int yychar,i; |
374 | |
375 | char *attributes[XMLPARSE_MAX_ATTRS]={NULL}; |
376 | int attribute=0; |
377 | |
378 | int stackdepth=0,stackused=0; |
379 | xmltag ***tags_stack=NULL; |
380 | xmltag **tag_stack=NULL; |
381 | xmltag *tag=NULL; |
382 | |
383 | /* The actual parser. */ |
384 | |
385 | xmlparse_options=options; |
386 | |
387 | yyin=file; |
388 | |
389 | yyrestart(yyin); |
390 | |
391 | lineno=1; |
392 | |
393 | BEGIN(INITIAL); |
394 | |
395 | do |
396 | { |
397 | yychar=yylex(); |
398 | |
399 | switch(yychar) |
400 | { |
401 | /* The start of a tag for an XML declaration */ |
402 | |
403 | case LEX_XML_DECL_BEGIN: |
404 | |
405 | if(tag_stack) |
406 | { |
407 | fprintf(stderr,"XML Parser: Error on line %llu: XML declaration not before all other tags.\n",lineno); |
408 | yychar=LEX_ERROR_XML_NOT_FIRST; |
409 | break; |
410 | } |
411 | |
412 | /* The start of a tag for an element */ |
413 | |
414 | case LEX_TAG_BEGIN: |
415 | |
416 | tag=NULL; |
417 | |
418 | for(i=0;tags[i];i++) |
419 | if(!strcasecmp(yylval,tags[i]->name)) |
420 | { |
421 | tag=tags[i]; |
422 | |
423 | for(i=0;i<tag->nattributes;i++) |
424 | attributes[i]=NULL; |
425 | |
426 | break; |
427 | } |
428 | |
429 | if(tag==NULL) |
430 | { |
431 | fprintf(stderr,"XML Parser: Error on line %llu: unexpected tag '%s'.\n",lineno,yylval); |
432 | yychar=LEX_ERROR_UNEXP_TAG; |
433 | } |
434 | |
435 | break; |
436 | |
437 | /* The end of the start-tag for an element */ |
438 | |
439 | case LEX_TAG_PUSH: |
440 | |
441 | if(stackused==stackdepth) |
442 | { |
443 | tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*)); |
444 | tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**)); |
445 | } |
446 | |
447 | tag_stack [stackused]=tag; |
448 | tags_stack[stackused]=tags; |
449 | stackused++; |
450 | |
451 | if(tag->callback) |
452 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes)) |
453 | yychar=LEX_ERROR_CALLBACK; |
454 | |
455 | tags=tag->subtags; |
456 | |
457 | break; |
458 | |
459 | /* The end of the empty-element-tag for an XML declaration */ |
460 | |
461 | case LEX_XML_DECL_FINISH: |
462 | |
463 | /* The end of the empty-element-tag for an element */ |
464 | |
465 | case LEX_TAG_FINISH: |
466 | |
467 | if(tag->callback) |
468 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes)) |
469 | yychar=LEX_ERROR_CALLBACK; |
470 | |
471 | if(stackused>0) |
472 | tag=tag_stack[stackused-1]; |
473 | else |
474 | tag=NULL; |
475 | |
476 | break; |
477 | |
478 | /* The end of the end-tag for an element */ |
479 | |
480 | case LEX_TAG_POP: |
481 | |
482 | stackused--; |
483 | tags=tags_stack[stackused]; |
484 | tag =tag_stack [stackused]; |
485 | |
486 | if(strcmp(tag->name,yylval)) |
487 | { |
488 | fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,yylval,tag->name); |
489 | yychar=LEX_ERROR_UNBALANCED; |
490 | } |
491 | |
492 | if(stackused<0) |
493 | { |
494 | fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,yylval,yylval); |
495 | yychar=LEX_ERROR_NO_START; |
496 | } |
497 | |
498 | for(i=0;i<tag->nattributes;i++) |
499 | attributes[i]=NULL; |
500 | |
501 | if(tag->callback) |
502 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes)) |
503 | yychar=LEX_ERROR_CALLBACK; |
504 | |
505 | if(stackused>0) |
506 | tag=tag_stack[stackused-1]; |
507 | else |
508 | tag=NULL; |
509 | |
510 | break; |
511 | |
512 | /* An attribute key */ |
513 | |
514 | case LEX_ATTR_KEY: |
515 | |
516 | attribute=-1; |
517 | |
518 | for(i=0;i<tag->nattributes;i++) |
519 | if(!strcasecmp(yylval,tag->attributes[i])) |
520 | { |
521 | attribute=i; |
522 | |
523 | break; |
524 | } |
525 | |
526 | if(attribute==-1) |
527 | { |
528 | if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR || |
529 | ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':'))) |
530 | { |
531 | fprintf(stderr,"XML Parser: Error on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,yylval,tag->name); |
532 | yychar=LEX_ERROR_UNEXP_ATT; |
533 | } |
534 | else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN) |
535 | fprintf(stderr,"XML Parser: Warning on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,yylval,tag->name); |
536 | } |
537 | |
538 | break; |
539 | |
540 | /* An attribute value */ |
541 | |
542 | case LEX_ATTR_VAL: |
543 | |
544 | if(tag->callback && attribute!=-1 && yylval) |
545 | attributes[attribute]=yylval; |
546 | |
547 | break; |
548 | |
549 | /* End of file */ |
550 | |
551 | case LEX_EOF: |
552 | |
553 | if(tag) |
554 | { |
555 | fprintf(stderr,"XML Parser: Error on line %llu: end of file seen without end tag '</%s>'.\n",lineno,tag->name); |
556 | yychar=LEX_ERROR_UNEXP_EOF; |
557 | } |
558 | |
559 | break; |
560 | |
561 | case LEX_ERROR_TAG_START: |
562 | fprintf(stderr,"XML Parser: Error on line %llu: character '<' seen not at start of tag.\n",lineno); |
563 | break; |
564 | |
565 | case LEX_ERROR_XML_DECL_START: |
566 | fprintf(stderr,"XML Parser: Error on line %llu: characters '<?' seen not at start of XML declaration.\n",lineno); |
567 | break; |
568 | |
569 | case LEX_ERROR_TAG: |
570 | fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside tag '<%s...>'.\n",lineno,tag->name); |
571 | break; |
572 | |
573 | case LEX_ERROR_XML_DECL: |
574 | fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside XML declaration '<?%s...>'.\n",lineno,tag->name); |
575 | break; |
576 | |
577 | case LEX_ERROR_ATTR: |
578 | fprintf(stderr,"XML Parser: Error on line %llu: invalid attribute definition seen in tag.\n",lineno); |
579 | break; |
580 | |
581 | case LEX_ERROR_END_TAG: |
582 | fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen in end-tag.\n",lineno); |
583 | break; |
584 | |
585 | case LEX_ERROR_COMMENT: |
586 | fprintf(stderr,"XML Parser: Error on line %llu: invalid comment seen.\n",lineno); |
587 | break; |
588 | |
589 | case LEX_ERROR_CLOSE: |
590 | fprintf(stderr,"XML Parser: Error on line %llu: character '>' seen not at end of tag.\n",lineno); |
591 | break; |
592 | |
593 | case LEX_ERROR_ATTR_VAL: |
594 | fprintf(stderr,"XML Parser: Error on line %llu: invalid character '%s' seen in attribute value.\n",lineno,yylval); |
595 | break; |
596 | |
597 | case LEX_ERROR_ENTITY_REF: |
598 | fprintf(stderr,"XML Parser: Error on line %llu: invalid entity reference '%s' seen in attribute value.\n",lineno,yylval); |
599 | break; |
600 | |
601 | case LEX_ERROR_CHAR_REF: |
602 | fprintf(stderr,"XML Parser: Error on line %llu: invalid character reference '%s' seen in attribute value.\n",lineno,yylval); |
603 | break; |
604 | } |
605 | } |
606 | while(yychar>LEX_EOF && yychar<LEX_ERROR); |
607 | |
608 | /* Delete the tagdata */ |
609 | |
610 | if(stackdepth) |
611 | { |
612 | free(tag_stack); |
613 | free(tags_stack); |
614 | } |
615 | |
616 | return(yychar); |
617 | } |
618 | |
619 | |
620 | /*++++++++++++++++++++++++++++++++++++++ |
621 | Return the current parser line number. |
622 | |
623 | unsigned long long ParseXML_LineNumber Returns the line number. |
624 | ++++++++++++++++++++++++++++++++++++++*/ |
625 | |
626 | unsigned long long ParseXML_LineNumber(void) |
627 | { |
628 | return(lineno); |
629 | } |
630 | |
631 | |
632 | /*++++++++++++++++++++++++++++++++++++++ |
633 | Convert an XML entity reference into an ASCII string. |
634 | |
635 | char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string. |
636 | |
637 | const char *string The entity reference string. |
638 | ++++++++++++++++++++++++++++++++++++++*/ |
639 | |
640 | char *ParseXML_Decode_Entity_Ref(const char *string) |
641 | { |
642 | if(!strcmp(string,"&")) return("&"); |
643 | if(!strcmp(string,"<")) return("<"); |
644 | if(!strcmp(string,">")) return(">"); |
645 | if(!strcmp(string,"'")) return("'"); |
646 | if(!strcmp(string,""")) return("\""); |
647 | return(NULL); |
648 | } |
649 | |
650 | |
651 | /*++++++++++++++++++++++++++++++++++++++ |
652 | Convert an XML character reference into an ASCII string. |
653 | |
654 | char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string. |
655 | |
656 | const char *string The character reference string. |
657 | ++++++++++++++++++++++++++++++++++++++*/ |
658 | |
659 | char *ParseXML_Decode_Char_Ref(const char *string) |
660 | { |
661 | static char result[5]=""; |
662 | long int unicode; |
663 | |
664 | if(string[2]=='x') unicode=strtol(string+3,NULL,16); |
665 | else unicode=strtol(string+2,NULL,10); |
666 | |
667 | if(unicode<0x80) |
668 | { |
669 | /* 0000 0000-0000 007F => 0xxxxxxx */ |
670 | result[0]=unicode; |
671 | result[1]=0; |
672 | } |
673 | else if(unicode<0x07FF) |
674 | { |
675 | /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */ |
676 | result[0]=0xC0+((unicode&0x07C0)>>6); |
677 | result[1]=0x80+ (unicode&0x003F); |
678 | result[2]=0; |
679 | } |
680 | else if(unicode<0xFFFF) |
681 | { |
682 | /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */ |
683 | result[0]=0xE0+((unicode&0xF000)>>12); |
684 | result[1]=0x80+((unicode&0x0FC0)>>6); |
685 | result[2]=0x80+ (unicode&0x003F); |
686 | result[3]=0; |
687 | } |
688 | else if(unicode<0x1FFFFF) |
689 | { |
690 | /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
691 | result[0]=0xF0+((unicode&0x1C0000)>>18); |
692 | result[1]=0x80+((unicode&0x03F000)>>12); |
693 | result[2]=0x80+((unicode&0x000FC0)>>6); |
694 | result[3]=0x80+ (unicode&0x00003F); |
695 | result[4]=0; |
696 | } |
697 | else |
698 | { |
699 | result[0]=0xFF; |
700 | result[1]=0xFD; |
701 | result[2]=0; |
702 | } |
703 | |
704 | return(result); |
705 | } |
706 | |
707 | |
708 | /*++++++++++++++++++++++++++++++++++++++ |
709 | Convert a string into something that is safe to output in an XML file. |
710 | |
711 | char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed). |
712 | |
713 | const char *string The string to convert. |
714 | ++++++++++++++++++++++++++++++++++++++*/ |
715 | |
716 | char *ParseXML_Encode_Safe_XML(const char *string) |
717 | { |
718 | static const char hexstring[17]="0123456789ABCDEF"; |
719 | int i=0,j=0,len; |
720 | char *result; |
721 | |
722 | for(i=0;string[i];i++) |
723 | if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127) |
724 | break; |
725 | |
726 | if(!string[i]) |
727 | return((char*)string); |
728 | |
729 | len=i+256-6; |
730 | |
731 | result=(char*)malloc(len+7); |
732 | strncpy(result,string,j=i); |
733 | |
734 | do |
735 | { |
736 | for(;j<len && string[i];i++) |
737 | if(string[i]=='<') |
738 | { |
739 | result[j++]='&'; |
740 | result[j++]='l'; |
741 | result[j++]='t'; |
742 | result[j++]=';'; |
743 | } |
744 | else if(string[i]=='>') |
745 | { |
746 | result[j++]='&'; |
747 | result[j++]='g'; |
748 | result[j++]='t'; |
749 | result[j++]=';'; |
750 | } |
751 | else if(string[i]=='&') |
752 | { |
753 | result[j++]='&'; |
754 | result[j++]='a'; |
755 | result[j++]='m'; |
756 | result[j++]='p'; |
757 | result[j++]=';'; |
758 | } |
759 | else if(string[i]=='\'') |
760 | { |
761 | result[j++]='&'; |
762 | result[j++]='a'; |
763 | result[j++]='p'; |
764 | result[j++]='o'; |
765 | result[j++]='s'; |
766 | result[j++]=';'; |
767 | } |
768 | else if(string[i]=='"') |
769 | { |
770 | result[j++]='&'; |
771 | result[j++]='q'; |
772 | result[j++]='u'; |
773 | result[j++]='o'; |
774 | result[j++]='t'; |
775 | result[j++]=';'; |
776 | } |
777 | else if(string[i]>=32 && (unsigned char)string[i]<=127) |
778 | result[j++]=string[i]; |
779 | else |
780 | { |
781 | unsigned int unicode; |
782 | |
783 | /* Decode the UTF-8 */ |
784 | |
785 | if((string[i]&0x80)==0) |
786 | { |
787 | /* 0000 0000-0000 007F => 0xxxxxxx */ |
788 | unicode=string[i]; |
789 | } |
790 | else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80) |
791 | { |
792 | /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */ |
793 | unicode =(string[i++]&0x1F)<<6; |
794 | unicode|= string[i ]&0x3F; |
795 | } |
796 | else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80) |
797 | { |
798 | /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */ |
799 | unicode =(string[i++]&0x0F)<<12; |
800 | unicode|=(string[i++]&0x3F)<<6; |
801 | unicode|= string[i ]&0x3F; |
802 | } |
803 | else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80) |
804 | { |
805 | /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
806 | unicode =(string[i++]&0x07)<<18; |
807 | unicode|=(string[i++]&0x3F)<<12; |
808 | unicode|=(string[i++]&0x3F)<<6; |
809 | unicode|= string[i ]&0x3F; |
810 | } |
811 | else |
812 | unicode=0xFFFD; |
813 | |
814 | /* Output the character entity */ |
815 | |
816 | result[j++]='&'; |
817 | result[j++]='#'; |
818 | result[j++]='x'; |
819 | |
820 | if(unicode&0x00FF0000) |
821 | { |
822 | result[j++]=hexstring[((unicode>>16)&0xf0)>>4]; |
823 | result[j++]=hexstring[((unicode>>16)&0x0f) ]; |
824 | } |
825 | if(unicode&0x00FFFF00) |
826 | { |
827 | result[j++]=hexstring[((unicode>>8)&0xf0)>>4]; |
828 | result[j++]=hexstring[((unicode>>8)&0x0f) ]; |
829 | } |
830 | result[j++]=hexstring[(unicode&0xf0)>>4]; |
831 | result[j++]=hexstring[(unicode&0x0f) ]; |
832 | |
833 | result[j++]=';'; |
834 | } |
835 | |
836 | if(string[i]) /* Not finished */ |
837 | { |
838 | len+=256; |
839 | result=(char*)realloc((void*)result,len+7); |
840 | } |
841 | } |
842 | while(string[i]); |
843 | |
844 | result[j]=0; |
845 | |
846 | return(result); |
847 | } |
848 | |
849 | |
850 | /*++++++++++++++++++++++++++++++++++++++ |
851 | Check that a string really is an integer. |
852 | |
853 | int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise. |
854 | |
855 | const char *string The string to be parsed. |
856 | ++++++++++++++++++++++++++++++++++++++*/ |
857 | |
858 | int ParseXML_IsInteger(const char *string) |
859 | { |
860 | const char *p=string; |
861 | |
862 | if(*p=='-' || *p=='+') |
863 | p++; |
864 | |
865 | while(isdigit(*p)) |
866 | p++; |
867 | |
868 | if(*p) |
869 | return(0); |
870 | else |
871 | return(1); |
872 | } |
873 | |
874 | |
875 | /*++++++++++++++++++++++++++++++++++++++ |
876 | Check that a string really is a floating point number. |
877 | |
878 | int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise. |
879 | |
880 | const char *string The string to be parsed. |
881 | ++++++++++++++++++++++++++++++++++++++*/ |
882 | |
883 | int ParseXML_IsFloating(const char *string) |
884 | { |
885 | const char *p=string; |
886 | |
887 | if(*p=='-' || *p=='+') |
888 | p++; |
889 | |
890 | while(isdigit(*p) || *p=='.') |
891 | p++; |
892 | |
893 | if(*p=='e' || *p=='E') |
894 | { |
895 | p++; |
896 | |
897 | if(*p=='-' || *p=='+') |
898 | p++; |
899 | |
900 | while(isdigit(*p)) |
901 | p++; |
902 | } |
903 | |
904 | if(*p) |
905 | return(0); |
906 | else |
907 | return(1); |
908 | } |
Properties
Name | Value |
---|---|
cvs:description | A simple generic XML parser. |