Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Contents of /trunk/src/xmlparse.l
Parent Directory
|
Revision Log
Revision 509 -
(show annotations)
(download)
Sat Oct 9 11:05:28 2010 UTC (14 years, 5 months ago) by amb
File size: 26247 byte(s)
Sat Oct 9 11:05:28 2010 UTC (14 years, 5 months ago) by amb
File size: 26247 byte(s)
Ensure that comparisons are made with unsigned chars.
1 | %{ |
2 | /*************************************** |
3 | $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.20 2010-10-09 11:05:28 amb Exp $ |
4 | |
5 | A simple generic XML parser where the structure comes from the function parameters. |
6 | Not intended to be fully conforming to XML staandard or a validating parser but |
7 | sufficient to parse OSM XML and simple program configuration files. |
8 | |
9 | Part of the Routino routing software. |
10 | ******************/ /****************** |
11 | This file Copyright 2010 Andrew M. Bishop |
12 | |
13 | This program is free software: you can redistribute it and/or modify |
14 | it under the terms of the GNU Affero General Public License as published by |
15 | the Free Software Foundation, either version 3 of the License, or |
16 | (at your option) any later version. |
17 | |
18 | This program is distributed in the hope that it will be useful, |
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | GNU Affero General Public License for more details. |
22 | |
23 | You should have received a copy of the GNU Affero General Public License |
24 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
25 | ***************************************/ |
26 | |
27 | |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <ctype.h> |
31 | #include <string.h> |
32 | |
33 | #include "xmlparse.h" |
34 | |
35 | |
36 | /* Parser outputs */ |
37 | |
38 | #define LEX_EOF 0 |
39 | |
40 | #define LEX_TAG_BEGIN 1 |
41 | #define LEX_XML_DECL_BEGIN 2 |
42 | #define LEX_TAG_POP 3 |
43 | #define LEX_TAG_PUSH 4 |
44 | #define LEX_XML_DECL_FINISH 6 |
45 | #define LEX_TAG_FINISH 7 |
46 | #define LEX_ATTR_KEY 8 |
47 | #define LEX_ATTR_VAL 9 |
48 | |
49 | #define LEX_ERROR 100 |
50 | |
51 | #define LEX_ERROR_TAG_START 101 |
52 | #define LEX_ERROR_XML_DECL_START 102 |
53 | #define LEX_ERROR_TAG 103 |
54 | #define LEX_ERROR_XML_DECL 104 |
55 | #define LEX_ERROR_ATTR 105 |
56 | #define LEX_ERROR_END_TAG 106 |
57 | #define LEX_ERROR_COMMENT 107 |
58 | #define LEX_ERROR_CLOSE 108 |
59 | #define LEX_ERROR_ATTR_VAL 109 |
60 | #define LEX_ERROR_ENTITY_REF 110 |
61 | #define LEX_ERROR_CHAR_REF 111 |
62 | |
63 | #define LEX_ERROR_UNEXP_TAG 201 |
64 | #define LEX_ERROR_UNBALANCED 202 |
65 | #define LEX_ERROR_NO_START 203 |
66 | #define LEX_ERROR_UNEXP_ATT 204 |
67 | #define LEX_ERROR_UNEXP_EOF 205 |
68 | #define LEX_ERROR_XML_NOT_FIRST 206 |
69 | |
70 | #define LEX_ERROR_CALLBACK 255 |
71 | |
72 | |
73 | /* Lexer definitions */ |
74 | |
75 | #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */ |
76 | #ifndef yywrap |
77 | /*+ Needed in lex but does nothing. +*/ |
78 | #define yywrap() 1 |
79 | #endif |
80 | |
81 | /*+ Reset the current string. +*/ |
82 | #define reset_string \ |
83 | if(!string) string=(char*)malloc(16); \ |
84 | *string=0; \ |
85 | stringused=0; |
86 | |
87 | /*+ append information to the current string. +*/ |
88 | #define append_string(xx) \ |
89 | newlen=strlen(xx); \ |
90 | if((stringused+newlen)>=stringlen) \ |
91 | string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \ |
92 | strcpy(string+stringused,xx); \ |
93 | stringused+=newlen; |
94 | |
95 | #define YY_NO_INPUT |
96 | |
97 | |
98 | /* Lexer functions and variables */ |
99 | |
100 | extern int yylex(void); |
101 | |
102 | static char *yylval=NULL; |
103 | |
104 | static int xmlparse_options; |
105 | |
106 | %} |
107 | |
108 | %option 8bit |
109 | %option pointer |
110 | %option batch |
111 | %option yylineno |
112 | |
113 | %option nodefault |
114 | %option perf-report |
115 | %option fast |
116 | %option nounput |
117 | |
118 | |
119 | /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII tags not Unicode. */ |
120 | |
121 | S [ \t\r\n] |
122 | |
123 | U1 [\x09\x0A\x0D\x20-\x7F] |
124 | U2 [\xC2-\xDF][\x80-\xBF] |
125 | U3a \xE0[\xA0-\xBF][\x80-\xBF] |
126 | U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF] |
127 | U3c \xED[\x80-\x9F][\x80-\xBF] |
128 | U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF] |
129 | U3 {U3a}|{U3b}|{U3c}|{U3d} |
130 | U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF] |
131 | U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
132 | U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF] |
133 | U4 {U4a}|{U4b}|{U4c} |
134 | |
135 | U ({U1}|{U2}|{U3}|{U4}) |
136 | UquotedS ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4}) |
137 | UquotedD ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4}) |
138 | |
139 | N (\n|\r\n) |
140 | |
141 | letter [a-zA-Z] |
142 | digit [0-9] |
143 | xdigit [a-fA-F0-9] |
144 | |
145 | namechar ({letter}|{digit}|[-._:]) |
146 | name ({letter}|[_:]){namechar}* |
147 | |
148 | entityref &{name}; |
149 | charref &#({digit}+|x{xdigit}+); |
150 | |
151 | |
152 | %x COMMENT |
153 | %x CDATA |
154 | %x DOCTYPE |
155 | %x XML_DECL_START XML_DECL |
156 | %x TAG_START TAG |
157 | %x ATTR_KEY ATTR_VAL |
158 | %x END_TAG1 END_TAG2 |
159 | %x DQUOTED SQUOTED |
160 | |
161 | %% |
162 | /* Must use static variables since the parser returns often. */ |
163 | static char *string=NULL; |
164 | static int stringlen=0,stringused=0; |
165 | static int after_attr=0; |
166 | int newlen; |
167 | int doctype_depth=0; |
168 | |
169 | /* Handle top level entities */ |
170 | |
171 | "<!--" { BEGIN(COMMENT); } |
172 | "<![CDATA[" { BEGIN(CDATA); } |
173 | "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; } |
174 | "</" { BEGIN(END_TAG1); } |
175 | "<?" { BEGIN(XML_DECL_START); } |
176 | "<" { BEGIN(TAG_START); } |
177 | ">" { return(LEX_ERROR_CLOSE); } |
178 | [^<>]+ { } |
179 | |
180 | /* Comments */ |
181 | |
182 | <COMMENT>"--->" { return(LEX_ERROR_COMMENT); } |
183 | <COMMENT>"-->" { BEGIN(INITIAL); } |
184 | <COMMENT>"--"[^->]+ { } |
185 | <COMMENT>[^-]+ { } |
186 | <COMMENT>"-" { } |
187 | |
188 | /* CDATA */ |
189 | |
190 | <CDATA>"]]>" { BEGIN(INITIAL); } |
191 | <CDATA>"]" { } |
192 | <CDATA>[^]]+ { } |
193 | |
194 | /* CDATA */ |
195 | |
196 | <DOCTYPE>"<" { doctype_depth++; } |
197 | <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; } |
198 | <DOCTYPE>[^<>]+ { } |
199 | |
200 | /* XML Declaration start */ |
201 | |
202 | <XML_DECL_START>xml { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); } |
203 | <XML_DECL_START>.|{N} { return(LEX_ERROR_XML_DECL_START); } |
204 | |
205 | /* Tag middle */ |
206 | |
207 | <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); } |
208 | <XML_DECL>{S}+ { } |
209 | <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
210 | <XML_DECL>.|{N} { return(LEX_ERROR_XML_DECL); } |
211 | |
212 | /* Any tag start */ |
213 | |
214 | <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } |
215 | <TAG_START>.|{N} { return(LEX_ERROR_TAG_START); } |
216 | |
217 | /* End-tag start */ |
218 | |
219 | <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); } |
220 | <END_TAG1>.|{N} { return(LEX_ERROR_END_TAG); } |
221 | |
222 | <END_TAG2>">" { BEGIN(INITIAL); } |
223 | <END_TAG2>.|{N} { return(LEX_ERROR_END_TAG); } |
224 | |
225 | /* Any tag middle */ |
226 | |
227 | <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } |
228 | <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); } |
229 | <TAG>{S}+ { } |
230 | <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
231 | <TAG>.|{N} { return(LEX_ERROR_TAG); } |
232 | |
233 | /* Attributes */ |
234 | |
235 | <ATTR_KEY>= { BEGIN(ATTR_VAL); } |
236 | <ATTR_KEY>.|{N} { return(LEX_ERROR_ATTR); } |
237 | |
238 | <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; } |
239 | <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; } |
240 | <ATTR_VAL>.|{N} { return(LEX_ERROR_ATTR); } |
241 | |
242 | /* Quoted strings */ |
243 | |
244 | <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); } |
245 | <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
246 | else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } } |
247 | <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
248 | else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } } |
249 | <DQUOTED>[<>&\"] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
250 | <DQUOTED>{UquotedD}+ { append_string(yytext); } |
251 | <DQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
252 | |
253 | <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); } |
254 | <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
255 | else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } } |
256 | <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);} |
257 | else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } } |
258 | <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
259 | <SQUOTED>{UquotedS}+ { append_string(yytext); } |
260 | <SQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); } |
261 | |
262 | /* End of file */ |
263 | |
264 | <<EOF>> { free(string); string=NULL; stringlen=stringused=0; BEGIN(INITIAL); return(LEX_EOF); } |
265 | |
266 | %% |
267 | |
268 | |
269 | /*++++++++++++++++++++++++++++++++++++++ |
270 | A function to call the callback function with the parameters needed. |
271 | |
272 | int call_callback Returns 1 if the callback returned with an error. |
273 | |
274 | const char *name The name of the tag. |
275 | |
276 | int (*callback)() The callback function. |
277 | |
278 | int type The type of tag (start and/or end). |
279 | |
280 | int nattributes The number of attributes collected. |
281 | |
282 | char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes. |
283 | ++++++++++++++++++++++++++++++++++++++*/ |
284 | |
285 | static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS]) |
286 | { |
287 | switch(nattributes) |
288 | { |
289 | case 0: return (*callback)(name,type); |
290 | case 1: return (*callback)(name,type,attributes[0]); |
291 | case 2: return (*callback)(name,type,attributes[0],attributes[1]); |
292 | case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]); |
293 | case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]); |
294 | case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); |
295 | case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); |
296 | case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); |
297 | case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); |
298 | case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); |
299 | case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); |
300 | case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); |
301 | case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); |
302 | case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); |
303 | case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); |
304 | case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); |
305 | case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); |
306 | |
307 | default: |
308 | fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s' source code needs changing.\n",yylineno,name); |
309 | exit(1); |
310 | } |
311 | } |
312 | |
313 | |
314 | /*++++++++++++++++++++++++++++++++++++++ |
315 | Parse the XML and call the functions for each tag as seen. |
316 | |
317 | int ParseXML Returns 0 if OK or something else in case of an error. |
318 | |
319 | FILE *file The file to parse. |
320 | |
321 | xmltag **tags The array of pointers to tags for the top level. |
322 | |
323 | int options A list of XML Parser options OR-ed together. |
324 | ++++++++++++++++++++++++++++++++++++++*/ |
325 | |
326 | int ParseXML(FILE *file,xmltag **tags,int options) |
327 | { |
328 | int yychar,i; |
329 | |
330 | char *attributes[XMLPARSE_MAX_ATTRS]={NULL}; |
331 | int attribute=0; |
332 | |
333 | int stackdepth=0,stackused=0; |
334 | xmltag ***tags_stack=NULL; |
335 | xmltag **tag_stack=NULL; |
336 | xmltag *tag=NULL; |
337 | |
338 | /* The actual parser. */ |
339 | |
340 | xmlparse_options=options; |
341 | |
342 | yyin=file; |
343 | |
344 | yyrestart(yyin); |
345 | |
346 | yylineno=1; |
347 | |
348 | BEGIN(INITIAL); |
349 | |
350 | do |
351 | { |
352 | yychar=yylex(); |
353 | |
354 | switch(yychar) |
355 | { |
356 | /* The start of a tag for an XML declaration */ |
357 | |
358 | case LEX_XML_DECL_BEGIN: |
359 | |
360 | if(tag_stack) |
361 | { |
362 | fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno); |
363 | yychar=LEX_ERROR_XML_NOT_FIRST; |
364 | break; |
365 | } |
366 | |
367 | /* The start of a tag for an element */ |
368 | |
369 | case LEX_TAG_BEGIN: |
370 | |
371 | tag=NULL; |
372 | |
373 | for(i=0;tags[i];i++) |
374 | if(!strcasecmp(yylval,tags[i]->name)) |
375 | { |
376 | tag=tags[i]; |
377 | |
378 | for(i=0;i<tag->nattributes;i++) |
379 | if(attributes[i]) |
380 | { |
381 | free(attributes[i]); |
382 | attributes[i]=NULL; |
383 | } |
384 | |
385 | break; |
386 | } |
387 | |
388 | if(tag==NULL) |
389 | { |
390 | fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval); |
391 | yychar=LEX_ERROR_UNEXP_TAG; |
392 | } |
393 | |
394 | break; |
395 | |
396 | /* The end of the start-tag for an element */ |
397 | |
398 | case LEX_TAG_PUSH: |
399 | |
400 | if(stackused==stackdepth) |
401 | { |
402 | tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*)); |
403 | tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**)); |
404 | } |
405 | |
406 | tag_stack [stackused]=tag; |
407 | tags_stack[stackused]=tags; |
408 | stackused++; |
409 | |
410 | if(tag->callback) |
411 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes)) |
412 | yychar=LEX_ERROR_CALLBACK; |
413 | |
414 | tags=tag->subtags; |
415 | |
416 | break; |
417 | |
418 | /* The end of the empty-element-tag for an XML declaration */ |
419 | |
420 | case LEX_XML_DECL_FINISH: |
421 | |
422 | /* The end of the empty-element-tag for an element */ |
423 | |
424 | case LEX_TAG_FINISH: |
425 | |
426 | if(tag->callback) |
427 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes)) |
428 | yychar=LEX_ERROR_CALLBACK; |
429 | |
430 | if(stackused>0) |
431 | tag=tag_stack[stackused-1]; |
432 | else |
433 | tag=NULL; |
434 | |
435 | break; |
436 | |
437 | /* The end of the end-tag for an element */ |
438 | |
439 | case LEX_TAG_POP: |
440 | |
441 | stackused--; |
442 | tags=tags_stack[stackused]; |
443 | tag =tag_stack [stackused]; |
444 | |
445 | if(strcmp(tag->name,yylval)) |
446 | { |
447 | fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name); |
448 | yychar=LEX_ERROR_UNBALANCED; |
449 | } |
450 | |
451 | if(stackused<0) |
452 | { |
453 | fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval); |
454 | yychar=LEX_ERROR_NO_START; |
455 | } |
456 | |
457 | for(i=0;i<tag->nattributes;i++) |
458 | if(attributes[i]) |
459 | { |
460 | free(attributes[i]); |
461 | attributes[i]=NULL; |
462 | } |
463 | |
464 | if(tag->callback) |
465 | if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes)) |
466 | yychar=LEX_ERROR_CALLBACK; |
467 | |
468 | if(stackused>0) |
469 | tag=tag_stack[stackused-1]; |
470 | else |
471 | tag=NULL; |
472 | |
473 | break; |
474 | |
475 | /* An attribute key */ |
476 | |
477 | case LEX_ATTR_KEY: |
478 | |
479 | attribute=-1; |
480 | |
481 | for(i=0;i<tag->nattributes;i++) |
482 | if(!strcasecmp(yylval,tag->attributes[i])) |
483 | { |
484 | attribute=i; |
485 | |
486 | break; |
487 | } |
488 | |
489 | if(attribute==-1) |
490 | { |
491 | if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR || |
492 | ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':'))) |
493 | { |
494 | fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name); |
495 | yychar=LEX_ERROR_UNEXP_ATT; |
496 | } |
497 | else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN) |
498 | fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name); |
499 | } |
500 | |
501 | break; |
502 | |
503 | /* An attribute value */ |
504 | |
505 | case LEX_ATTR_VAL: |
506 | |
507 | if(tag->callback && attribute!=-1 && yylval) |
508 | attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval); |
509 | |
510 | break; |
511 | |
512 | /* End of file */ |
513 | |
514 | case LEX_EOF: |
515 | |
516 | if(tag) |
517 | { |
518 | fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name); |
519 | yychar=LEX_ERROR_UNEXP_EOF; |
520 | } |
521 | |
522 | break; |
523 | |
524 | case LEX_ERROR_TAG_START: |
525 | fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno); |
526 | break; |
527 | |
528 | case LEX_ERROR_XML_DECL_START: |
529 | fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno); |
530 | break; |
531 | |
532 | case LEX_ERROR_TAG: |
533 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name); |
534 | break; |
535 | |
536 | case LEX_ERROR_XML_DECL: |
537 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name); |
538 | break; |
539 | |
540 | case LEX_ERROR_ATTR: |
541 | fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno); |
542 | break; |
543 | |
544 | case LEX_ERROR_END_TAG: |
545 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno); |
546 | break; |
547 | |
548 | case LEX_ERROR_COMMENT: |
549 | fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno); |
550 | break; |
551 | |
552 | case LEX_ERROR_CLOSE: |
553 | fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno); |
554 | break; |
555 | |
556 | case LEX_ERROR_ATTR_VAL: |
557 | fprintf(stderr,"XML Parser: Error on line %d: invalid character '%s' seen in attribute value.\n",yylineno,yylval); |
558 | break; |
559 | |
560 | case LEX_ERROR_ENTITY_REF: |
561 | fprintf(stderr,"XML Parser: Error on line %d: invalid entity reference '%s' seen in attribute value.\n",yylineno,yylval); |
562 | break; |
563 | |
564 | case LEX_ERROR_CHAR_REF: |
565 | fprintf(stderr,"XML Parser: Error on line %d: invalid character reference '%s' seen in attribute value.\n",yylineno,yylval); |
566 | break; |
567 | } |
568 | } |
569 | while(yychar>LEX_EOF && yychar<LEX_ERROR); |
570 | |
571 | /* Delete the tagdata */ |
572 | |
573 | for(i=0;i<XMLPARSE_MAX_ATTRS;i++) |
574 | if(attributes[i]) |
575 | free(attributes[i]); |
576 | |
577 | if(stackdepth) |
578 | { |
579 | free(tag_stack); |
580 | free(tags_stack); |
581 | } |
582 | |
583 | return(yychar); |
584 | } |
585 | |
586 | |
587 | /*++++++++++++++++++++++++++++++++++++++ |
588 | Return the current parser line number. |
589 | |
590 | unsigned long ParseXML_LineNumber Returns the line number. |
591 | ++++++++++++++++++++++++++++++++++++++*/ |
592 | |
593 | unsigned long ParseXML_LineNumber(void) |
594 | { |
595 | return(yylineno); |
596 | } |
597 | |
598 | |
599 | /*++++++++++++++++++++++++++++++++++++++ |
600 | Convert an XML entity reference into an ASCII string. |
601 | |
602 | char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string. |
603 | |
604 | const char *string The entity reference string. |
605 | ++++++++++++++++++++++++++++++++++++++*/ |
606 | |
607 | char *ParseXML_Decode_Entity_Ref(const char *string) |
608 | { |
609 | if(!strcmp(string,"&")) return("&"); |
610 | if(!strcmp(string,"<")) return("<"); |
611 | if(!strcmp(string,">")) return(">"); |
612 | if(!strcmp(string,"'")) return("'"); |
613 | if(!strcmp(string,""")) return("\""); |
614 | return(NULL); |
615 | } |
616 | |
617 | |
618 | /*++++++++++++++++++++++++++++++++++++++ |
619 | Convert an XML character reference into an ASCII string. |
620 | |
621 | char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string. |
622 | |
623 | const char *string The character reference string. |
624 | ++++++++++++++++++++++++++++++++++++++*/ |
625 | |
626 | char *ParseXML_Decode_Char_Ref(const char *string) |
627 | { |
628 | static char result[2]=" "; |
629 | long int val; |
630 | |
631 | if(string[2]=='x') val=strtol(string+3,NULL,16); |
632 | else val=strtol(string+2,NULL,10); |
633 | |
634 | if(val<0 || val>255) |
635 | return(NULL); |
636 | |
637 | result[0]=val&0xff; |
638 | |
639 | return(result); |
640 | } |
641 | |
642 | |
643 | /*++++++++++++++++++++++++++++++++++++++ |
644 | Convert a string into something that is safe to output in an XML file. |
645 | |
646 | char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed). |
647 | |
648 | const char *string The string to convert. |
649 | ++++++++++++++++++++++++++++++++++++++*/ |
650 | |
651 | char *ParseXML_Encode_Safe_XML(const char *string) |
652 | { |
653 | static const char hexstring[17]="0123456789ABCDEF"; |
654 | int i=0,j=0,len; |
655 | char *result; |
656 | |
657 | for(i=0;string[i];i++) |
658 | if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127) |
659 | break; |
660 | |
661 | if(!string[i]) |
662 | return((char*)string); |
663 | |
664 | len=i+256-6; |
665 | |
666 | result=(char*)malloc(len+7); |
667 | strncpy(result,string,j=i); |
668 | |
669 | do |
670 | { |
671 | for(;j<len && string[i];i++) |
672 | if(string[i]=='<') |
673 | { |
674 | result[j++]='&'; |
675 | result[j++]='l'; |
676 | result[j++]='t'; |
677 | result[j++]=';'; |
678 | } |
679 | else if(string[i]=='>') |
680 | { |
681 | result[j++]='&'; |
682 | result[j++]='g'; |
683 | result[j++]='t'; |
684 | result[j++]=';'; |
685 | } |
686 | else if(string[i]=='&') |
687 | { |
688 | result[j++]='&'; |
689 | result[j++]='a'; |
690 | result[j++]='m'; |
691 | result[j++]='p'; |
692 | result[j++]=';'; |
693 | } |
694 | else if(string[i]=='\'') |
695 | { |
696 | result[j++]='&'; |
697 | result[j++]='a'; |
698 | result[j++]='p'; |
699 | result[j++]='o'; |
700 | result[j++]='s'; |
701 | result[j++]=';'; |
702 | } |
703 | else if(string[i]=='"') |
704 | { |
705 | result[j++]='&'; |
706 | result[j++]='q'; |
707 | result[j++]='u'; |
708 | result[j++]='o'; |
709 | result[j++]='t'; |
710 | result[j++]=';'; |
711 | } |
712 | else if(string[i]>=32 && (unsigned char)string[i]<=127) |
713 | result[j++]=string[i]; |
714 | else |
715 | { |
716 | unsigned int unicode; |
717 | |
718 | /* Decode the UTF-8 */ |
719 | |
720 | if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80) |
721 | { |
722 | /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */ |
723 | unicode =(string[i++]&0x1F)<<6; |
724 | unicode|= string[i ]&0x3F; |
725 | } |
726 | else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80) |
727 | { |
728 | /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ |
729 | unicode =(string[i++]&0x0F)<<12; |
730 | unicode|=(string[i++]&0x3F)<<6; |
731 | unicode|= string[i ]&0x3F; |
732 | } |
733 | else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80) |
734 | { |
735 | /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
736 | unicode =(string[i++]&0x07)<<18; |
737 | unicode|=(string[i++]&0x3F)<<12; |
738 | unicode|=(string[i++]&0x3F)<<6; |
739 | unicode|= string[i ]&0x3F; |
740 | } |
741 | else |
742 | unicode=0xFFFD; |
743 | |
744 | /* Output the character entity */ |
745 | |
746 | result[j++]='&'; |
747 | result[j++]='#'; |
748 | result[j++]='x'; |
749 | |
750 | if(unicode&0x00FF0000) |
751 | { |
752 | result[j++]=hexstring[((unicode>>16)&0xf0)>>4]; |
753 | result[j++]=hexstring[((unicode>>16)&0x0f) ]; |
754 | } |
755 | if(unicode&0x00FFFF00) |
756 | { |
757 | result[j++]=hexstring[((unicode>>8)&0xf0)>>4]; |
758 | result[j++]=hexstring[((unicode>>8)&0x0f) ]; |
759 | } |
760 | result[j++]=hexstring[(unicode&0xf0)>>4]; |
761 | result[j++]=hexstring[(unicode&0x0f) ]; |
762 | |
763 | result[j++]=';'; |
764 | } |
765 | |
766 | if(string[i]) /* Not finished */ |
767 | { |
768 | len+=256; |
769 | result=(char*)realloc((void*)result,len+7); |
770 | } |
771 | } |
772 | while(string[i]); |
773 | |
774 | result[j]=0; |
775 | |
776 | return(result); |
777 | } |
778 | |
779 | |
780 | /*++++++++++++++++++++++++++++++++++++++ |
781 | Convert a string to a integer (checking that it really is a integer). |
782 | |
783 | int ParseXML_GetInteger Returns 1 if a integer could be found or 0 otherwise. |
784 | |
785 | const char *string The string to be parsed. |
786 | |
787 | int *number Returns the number. |
788 | ++++++++++++++++++++++++++++++++++++++*/ |
789 | |
790 | int ParseXML_GetInteger(const char *string,int *number) |
791 | { |
792 | const char *p=string; |
793 | |
794 | if(*p=='-' || *p=='+') |
795 | p++; |
796 | |
797 | while(isdigit(*p)) |
798 | p++; |
799 | |
800 | if(*p) |
801 | return(0); |
802 | |
803 | *number=atoi(string); |
804 | |
805 | return(1); |
806 | } |
807 | |
808 | |
809 | /*++++++++++++++++++++++++++++++++++++++ |
810 | Convert a string to a floating point number (checking that it really is a number). |
811 | |
812 | int ParseXML_GetFloating Returns 1 if a number could be found or 0 otherwise. |
813 | |
814 | const char *string The string to be parsed. |
815 | |
816 | int *number Returns the number. |
817 | ++++++++++++++++++++++++++++++++++++++*/ |
818 | |
819 | int ParseXML_GetFloating(const char *string,double *number) |
820 | { |
821 | const char *p=string; |
822 | |
823 | if(*p=='-' || *p=='+') |
824 | p++; |
825 | |
826 | while(isdigit(*p) || *p=='.') |
827 | p++; |
828 | |
829 | if(*p=='e' || *p=='E') |
830 | { |
831 | p++; |
832 | |
833 | if(*p=='-' || *p=='+') |
834 | p++; |
835 | |
836 | while(isdigit(*p)) |
837 | p++; |
838 | } |
839 | |
840 | if(*p) |
841 | return(0); |
842 | |
843 | *number=atof(string); |
844 | |
845 | return(1); |
846 | } |
Properties
Name | Value |
---|---|
cvs:description | A simple generic XML parser. |