Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Contents of /trunk/src/xmlparse.l
Parent Directory
|
Revision Log
Revision 348 -
(show annotations)
(download)
Sun Apr 4 14:29:34 2010 UTC (14 years, 11 months ago) by amb
File size: 16937 byte(s)
Sun Apr 4 14:29:34 2010 UTC (14 years, 11 months ago) by amb
File size: 16937 byte(s)
Added error checking.
1 | %{ |
2 | /*************************************** |
3 | $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.4 2010-04-04 14:29:34 amb Exp $ |
4 | |
5 | A simple generic XML parser where the structure comes from the function parameters. |
6 | Not intended to be fully conforming to XML staandard or a validating parser but |
7 | sufficient to parse OSM XML and simple program configuration files. |
8 | |
9 | Part of the Routino routing software. |
10 | ******************/ /****************** |
11 | This file Copyright 2010 Andrew M. Bishop |
12 | |
13 | This program is free software: you can redistribute it and/or modify |
14 | it under the terms of the GNU Affero General Public License as published by |
15 | the Free Software Foundation, either version 3 of the License, or |
16 | (at your option) any later version. |
17 | |
18 | This program is distributed in the hope that it will be useful, |
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | GNU Affero General Public License for more details. |
22 | |
23 | You should have received a copy of the GNU Affero General Public License |
24 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
25 | ***************************************/ |
26 | |
27 | |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <string.h> |
31 | |
32 | /* Parser outputs */ |
33 | |
34 | #define LEX_EOF 0 |
35 | |
36 | #define LEX_TAG_BEGIN 1 |
37 | #define LEX_XML_DECL_BEGIN 2 |
38 | #define LEX_TAG_POP 3 |
39 | #define LEX_TAG_PUSH 4 |
40 | #define LEX_XML_DECL_FINISH 6 |
41 | #define LEX_TAG_FINISH 7 |
42 | #define LEX_ATTR_KEY 8 |
43 | #define LEX_ATTR_VAL 9 |
44 | |
45 | #define LEX_ERROR 100 |
46 | |
47 | #define LEX_ERROR_TAG_START 101 |
48 | #define LEX_ERROR_XML_DECL_START 102 |
49 | #define LEX_ERROR_TAG 103 |
50 | #define LEX_ERROR_XML_DECL 104 |
51 | #define LEX_ERROR_ATTR 105 |
52 | #define LEX_ERROR_END_TAG 106 |
53 | #define LEX_ERROR_COMMENT 107 |
54 | #define LEX_ERROR_CLOSE 108 |
55 | |
56 | #define LEX_ERROR_UNEXP_TAG 201 |
57 | #define LEX_ERROR_UNBALANCED 202 |
58 | #define LEX_ERROR_NO_START 203 |
59 | #define LEX_ERROR_UNEXP_ATT 204 |
60 | #define LEX_ERROR_UNEXP_EOF 205 |
61 | #define LEX_ERROR_XML_NOT_FIRST 206 |
62 | |
63 | |
64 | /* Lexer definitions */ |
65 | |
66 | #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */ |
67 | #ifndef yywrap |
68 | /*+ Needed in lex but does nothing. +*/ |
69 | #define yywrap() 1 |
70 | #endif |
71 | |
72 | /*+ Reset the current string. +*/ |
73 | #define reset_string \ |
74 | if(string) *string=0; \ |
75 | stringused=0; |
76 | |
77 | /*+ append information to the current string. +*/ |
78 | #define append_string(xx) \ |
79 | newlen=strlen(xx); \ |
80 | if((stringused+newlen)>=stringlen) \ |
81 | string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \ |
82 | strcpy(string+stringused,xx); \ |
83 | stringused+=newlen; |
84 | |
85 | #define YY_NO_INPUT |
86 | |
87 | |
88 | /* Lexer functions and variables */ |
89 | |
90 | extern int yylex(void); |
91 | |
92 | static char *yylval=NULL; |
93 | |
94 | %} |
95 | |
96 | %option 8bit |
97 | %option pointer |
98 | %option batch |
99 | %option yylineno |
100 | |
101 | %option nodefault |
102 | %option perf-report |
103 | %option fast |
104 | %option nounput |
105 | |
106 | |
107 | /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII not Unicode. */ |
108 | |
109 | S [ \t\r\n] |
110 | |
111 | letter [a-zA-Z] |
112 | digit [0-9] |
113 | xdigit [a-fA-F0-9] |
114 | |
115 | namechar ({letter}|{digit}|[-._:]) |
116 | name ({letter}|[_:]){namechar}* |
117 | |
118 | entityref &{name}; |
119 | charref &#({digit}+|x{xdigit}+); |
120 | |
121 | |
122 | %x COMMENT |
123 | %x CDATA |
124 | %x DOCTYPE |
125 | %x XML_DECL_START XML_DECL |
126 | %x TAG_START TAG |
127 | %x ATTR_KEY ATTR_VAL |
128 | %x END_TAG1 END_TAG2 |
129 | %x DQUOTED SQUOTED |
130 | |
131 | %% |
132 | /* Must use static variables since the parser returns often. */ |
133 | static char *string=NULL; |
134 | static int stringlen=0,stringused=0; |
135 | static int after_attr=0; |
136 | int newlen; |
137 | int doctype_depth=0; |
138 | |
139 | /* Handle top level entities */ |
140 | |
141 | "<!--" { BEGIN(COMMENT); } |
142 | "<![CDATA[" { BEGIN(CDATA); } |
143 | "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; } |
144 | "</" { BEGIN(END_TAG1); } |
145 | "<?" { BEGIN(XML_DECL_START); } |
146 | "<" { BEGIN(TAG_START); } |
147 | ">" { return(LEX_ERROR_CLOSE); } |
148 | [^<>]+ { } |
149 | |
150 | /* Comments */ |
151 | |
152 | <COMMENT>"--->" { return(LEX_ERROR_COMMENT); } |
153 | <COMMENT>"-->" { BEGIN(INITIAL); } |
154 | <COMMENT>"--"[^->]+ { } |
155 | <COMMENT>[^-]+ { } |
156 | <COMMENT>"-" { } |
157 | |
158 | /* CDATA */ |
159 | |
160 | <CDATA>"]]>" { BEGIN(INITIAL); } |
161 | <CDATA>"]" { } |
162 | <CDATA>[^]]+ { } |
163 | |
164 | /* CDATA */ |
165 | |
166 | <DOCTYPE>"<" { doctype_depth++; } |
167 | <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; } |
168 | <DOCTYPE>[^<>]+ { } |
169 | |
170 | /* XML Declaration start */ |
171 | |
172 | <XML_DECL_START>{name} { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); } |
173 | <XML_DECL_START>.|\n { return(LEX_ERROR_XML_DECL_START); } |
174 | |
175 | /* Tag middle */ |
176 | |
177 | <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); } |
178 | <XML_DECL>{S}+ { } |
179 | <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
180 | <XML_DECL>.|\n { return(LEX_ERROR_XML_DECL); } |
181 | |
182 | /* Any tag start */ |
183 | |
184 | <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } |
185 | <TAG_START>.|\n { return(LEX_ERROR_TAG_START); } |
186 | |
187 | /* End-tag start */ |
188 | |
189 | <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); } |
190 | <END_TAG1>.|\n { return(LEX_ERROR_END_TAG); } |
191 | |
192 | <END_TAG2>">" { BEGIN(INITIAL); } |
193 | <END_TAG2>.|\n { return(LEX_ERROR_END_TAG); } |
194 | |
195 | /* Any tag middle */ |
196 | |
197 | <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } |
198 | <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); } |
199 | <TAG>{S}+ { } |
200 | <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } |
201 | <TAG>.|\n { return(LEX_ERROR_TAG); } |
202 | |
203 | /* Attributes */ |
204 | |
205 | <ATTR_KEY>= { BEGIN(ATTR_VAL); } |
206 | <ATTR_KEY>.|\n { return(LEX_ERROR_ATTR); } |
207 | |
208 | <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; } |
209 | <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; } |
210 | <ATTR_VAL>.|\n { return(LEX_ERROR_ATTR); } |
211 | |
212 | /* Quoted strings */ |
213 | |
214 | <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); } |
215 | <DQUOTED>{entityref} { append_string(yytext); } |
216 | <DQUOTED>{charref} { append_string(yytext); } |
217 | <DQUOTED>[<>&] { return(LEX_ERROR_TAG); } |
218 | <DQUOTED>[^<>&\"]+ { append_string(yytext); } |
219 | |
220 | <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); } |
221 | <SQUOTED>{entityref} { append_string(yytext); } |
222 | <SQUOTED>{charref} { append_string(yytext); } |
223 | <SQUOTED>[<>&] { return(LEX_ERROR_TAG); } |
224 | <SQUOTED>[^<>&\']+ { append_string(yytext); } |
225 | |
226 | /* End of file */ |
227 | |
228 | <<EOF>> { free(string); string=NULL; BEGIN(INITIAL); return(LEX_EOF); } |
229 | |
230 | %% |
231 | |
232 | #include "xmlparse.h" |
233 | |
234 | |
235 | /*++++++++++++++++++++++++++++++++++++++ |
236 | A function to call the callback function with the parameters needed. |
237 | |
238 | char *name The name of the tag. |
239 | |
240 | void (*callback)() The callback function. |
241 | |
242 | int type The type of tag (start and/or end). |
243 | |
244 | int nattributes The number of attributes collected. |
245 | |
246 | char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes. |
247 | ++++++++++++++++++++++++++++++++++++++*/ |
248 | |
249 | static inline void call_callback(char *name,void (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS]) |
250 | { |
251 | switch(nattributes) |
252 | { |
253 | case 0: (*callback)(type); break; |
254 | case 1: (*callback)(type,attributes[0]); break; |
255 | case 2: (*callback)(type,attributes[0],attributes[1]); break; |
256 | case 3: (*callback)(type,attributes[0],attributes[1],attributes[2]); break; |
257 | case 4: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3]); break; |
258 | case 5: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); break; |
259 | case 6: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); break; |
260 | case 7: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); break; |
261 | case 8: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); break; |
262 | case 9: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); break; |
263 | case 10: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); break; |
264 | case 11: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); break; |
265 | case 12: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); break; |
266 | case 13: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); break; |
267 | case 14: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); break; |
268 | case 15: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); break; |
269 | case 16: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); break; |
270 | |
271 | default: |
272 | fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s'.\n",yylineno,name); |
273 | exit(1); |
274 | } |
275 | } |
276 | |
277 | |
278 | /*++++++++++++++++++++++++++++++++++++++ |
279 | Parse the XML and call the functions for each tag as seen. |
280 | |
281 | int ParseXML Returns 0 if OK or something else in case of an error. |
282 | |
283 | FILE *file The file to parse. |
284 | |
285 | xmltag **tags The array of pointers to tags for the top level. |
286 | |
287 | int ignore_unknown_attributes If set to 0 then exit if unknown attribute is seen, if sete to 1 then warn, if set to 2 then ignore. |
288 | ++++++++++++++++++++++++++++++++++++++*/ |
289 | |
290 | int ParseXML(FILE *file,xmltag **tags,int ignore_unknown_attributes) |
291 | { |
292 | int yychar,i; |
293 | |
294 | int nattributes=0; |
295 | char *attributes[XMLPARSE_MAX_ATTRS]; |
296 | int attribute=0; |
297 | |
298 | int stackdepth=0,stackused=0; |
299 | xmltag ***tags_stack=NULL; |
300 | xmltag **tag_stack=NULL; |
301 | xmltag *tag=NULL; |
302 | |
303 | static int first=1; |
304 | |
305 | /* Parser (re)-initialisation */ |
306 | |
307 | yyin=file; |
308 | |
309 | if(!first) |
310 | yyrestart(NULL); |
311 | |
312 | first=0; |
313 | |
314 | /* The actual parser. */ |
315 | |
316 | do |
317 | { |
318 | yychar=yylex(); |
319 | |
320 | switch(yychar) |
321 | { |
322 | /* The start of a tag for an XML declaration */ |
323 | |
324 | case LEX_XML_DECL_BEGIN: |
325 | |
326 | if(tag_stack) |
327 | { |
328 | fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno); |
329 | yychar=LEX_ERROR_XML_NOT_FIRST; |
330 | break; |
331 | } |
332 | |
333 | /* The start of a tag for an element */ |
334 | |
335 | case LEX_TAG_BEGIN: |
336 | |
337 | tag=NULL; |
338 | |
339 | for(i=0;tags[i];i++) |
340 | if(!strcasecmp(yylval,tags[i]->name)) |
341 | { |
342 | tag=tags[i]; |
343 | |
344 | for(i=0;i<nattributes;i++) |
345 | free(attributes[i]); |
346 | |
347 | for(i=0;i<XMLPARSE_MAX_ATTRS;i++) |
348 | if(!tag->attributes[i]) |
349 | break; |
350 | |
351 | nattributes=i; |
352 | |
353 | for(i=0;i<nattributes;i++) |
354 | attributes[i]=NULL; |
355 | |
356 | break; |
357 | } |
358 | |
359 | if(tag==NULL) |
360 | { |
361 | fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval); |
362 | yychar=LEX_ERROR_UNEXP_TAG; |
363 | } |
364 | |
365 | break; |
366 | |
367 | /* The end of the start-tag for an element */ |
368 | |
369 | case LEX_TAG_PUSH: |
370 | |
371 | if(stackused==stackdepth) |
372 | { |
373 | tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*)); |
374 | tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**)); |
375 | } |
376 | |
377 | tag_stack [stackused]=tag; |
378 | tags_stack[stackused]=tags; |
379 | stackused++; |
380 | |
381 | if(tag->callback) |
382 | call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,nattributes,attributes); |
383 | |
384 | tags=tag->subtags; |
385 | |
386 | break; |
387 | |
388 | /* The end of the empty-element-tag for an XML declaration */ |
389 | |
390 | case LEX_XML_DECL_FINISH: |
391 | |
392 | /* The end of the empty-element-tag for an element */ |
393 | |
394 | case LEX_TAG_FINISH: |
395 | |
396 | if(tag->callback) |
397 | call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,nattributes,attributes); |
398 | |
399 | if(stackused>0) |
400 | tag=tag_stack[stackused-1]; |
401 | else |
402 | tag=NULL; |
403 | |
404 | break; |
405 | |
406 | /* The end of the end-tag for an element */ |
407 | |
408 | case LEX_TAG_POP: |
409 | |
410 | stackused--; |
411 | tags=tags_stack[stackused]; |
412 | tag =tag_stack [stackused]; |
413 | |
414 | if(strcmp(tag->name,yylval)) |
415 | { |
416 | fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name); |
417 | yychar=LEX_ERROR_UNBALANCED; |
418 | } |
419 | |
420 | if(stackused<0) |
421 | { |
422 | fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval); |
423 | yychar=LEX_ERROR_NO_START; |
424 | } |
425 | |
426 | if(tag->callback) |
427 | call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,nattributes,attributes); |
428 | |
429 | if(stackused>0) |
430 | tag=tag_stack[stackused-1]; |
431 | else |
432 | tag=NULL; |
433 | |
434 | break; |
435 | |
436 | /* An attribute key */ |
437 | |
438 | case LEX_ATTR_KEY: |
439 | |
440 | attribute=-1; |
441 | |
442 | for(i=0;i<nattributes;i++) |
443 | if(!strcasecmp(yylval,tag->attributes[i])) |
444 | { |
445 | attribute=i; |
446 | |
447 | break; |
448 | } |
449 | |
450 | if(attribute==-1) |
451 | { |
452 | if(ignore_unknown_attributes==0) |
453 | { |
454 | fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name); |
455 | yychar=LEX_ERROR_UNEXP_ATT; |
456 | } |
457 | else if(ignore_unknown_attributes==1) |
458 | fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name); |
459 | } |
460 | |
461 | break; |
462 | |
463 | /* An attribute value */ |
464 | |
465 | case LEX_ATTR_VAL: |
466 | |
467 | if(yylval && attribute!=-1) |
468 | attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval); |
469 | |
470 | break; |
471 | |
472 | /* End of file */ |
473 | |
474 | case LEX_EOF: |
475 | |
476 | if(tag) |
477 | { |
478 | fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name); |
479 | yychar=LEX_ERROR_UNEXP_EOF; |
480 | } |
481 | |
482 | break; |
483 | |
484 | case LEX_ERROR_TAG_START: |
485 | fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno); |
486 | break; |
487 | |
488 | case LEX_ERROR_XML_DECL_START: |
489 | fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno); |
490 | break; |
491 | |
492 | case LEX_ERROR_TAG: |
493 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name); |
494 | break; |
495 | |
496 | case LEX_ERROR_XML_DECL: |
497 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name); |
498 | break; |
499 | |
500 | case LEX_ERROR_ATTR: |
501 | fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno); |
502 | break; |
503 | |
504 | case LEX_ERROR_END_TAG: |
505 | fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno); |
506 | break; |
507 | |
508 | case LEX_ERROR_COMMENT: |
509 | fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno); |
510 | break; |
511 | |
512 | case LEX_ERROR_CLOSE: |
513 | fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno); |
514 | break; |
515 | } |
516 | } |
517 | while(yychar>LEX_EOF && yychar<LEX_ERROR); |
518 | |
519 | /* Delete the tagdata */ |
520 | |
521 | for(i=0;i<nattributes;i++) |
522 | if(attributes[i]) |
523 | free(attributes[i]); |
524 | |
525 | if(stackdepth) |
526 | free(tags_stack); |
527 | |
528 | return(yychar); |
529 | } |
530 | |
531 | |
532 | /*++++++++++++++++++++++++++++++++++++++ |
533 | Return the current parser line number. |
534 | |
535 | int ParseXML_LineNumber Returns the line number. |
536 | ++++++++++++++++++++++++++++++++++++++*/ |
537 | |
538 | int ParseXML_LineNumber(void) |
539 | { |
540 | return(yylineno); |
541 | } |
Properties
Name | Value |
---|---|
cvs:description | A simple generic XML parser. |