Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.l

Parent Directory Parent Directory | Revision Log Revision Log


Revision 348 - (show annotations) (download)
Sun Apr 4 14:29:34 2010 UTC (14 years, 11 months ago) by amb
File size: 16937 byte(s)
Added error checking.

1 %{
2 /***************************************
3 $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.4 2010-04-04 14:29:34 amb Exp $
4
5 A simple generic XML parser where the structure comes from the function parameters.
6 Not intended to be fully conforming to XML staandard or a validating parser but
7 sufficient to parse OSM XML and simple program configuration files.
8
9 Part of the Routino routing software.
10 ******************/ /******************
11 This file Copyright 2010 Andrew M. Bishop
12
13 This program is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
22
23 You should have received a copy of the GNU Affero General Public License
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 ***************************************/
26
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 /* Parser outputs */
33
34 #define LEX_EOF 0
35
36 #define LEX_TAG_BEGIN 1
37 #define LEX_XML_DECL_BEGIN 2
38 #define LEX_TAG_POP 3
39 #define LEX_TAG_PUSH 4
40 #define LEX_XML_DECL_FINISH 6
41 #define LEX_TAG_FINISH 7
42 #define LEX_ATTR_KEY 8
43 #define LEX_ATTR_VAL 9
44
45 #define LEX_ERROR 100
46
47 #define LEX_ERROR_TAG_START 101
48 #define LEX_ERROR_XML_DECL_START 102
49 #define LEX_ERROR_TAG 103
50 #define LEX_ERROR_XML_DECL 104
51 #define LEX_ERROR_ATTR 105
52 #define LEX_ERROR_END_TAG 106
53 #define LEX_ERROR_COMMENT 107
54 #define LEX_ERROR_CLOSE 108
55
56 #define LEX_ERROR_UNEXP_TAG 201
57 #define LEX_ERROR_UNBALANCED 202
58 #define LEX_ERROR_NO_START 203
59 #define LEX_ERROR_UNEXP_ATT 204
60 #define LEX_ERROR_UNEXP_EOF 205
61 #define LEX_ERROR_XML_NOT_FIRST 206
62
63
64 /* Lexer definitions */
65
66 #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */
67 #ifndef yywrap
68 /*+ Needed in lex but does nothing. +*/
69 #define yywrap() 1
70 #endif
71
72 /*+ Reset the current string. +*/
73 #define reset_string \
74 if(string) *string=0; \
75 stringused=0;
76
77 /*+ append information to the current string. +*/
78 #define append_string(xx) \
79 newlen=strlen(xx); \
80 if((stringused+newlen)>=stringlen) \
81 string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \
82 strcpy(string+stringused,xx); \
83 stringused+=newlen;
84
85 #define YY_NO_INPUT
86
87
88 /* Lexer functions and variables */
89
90 extern int yylex(void);
91
92 static char *yylval=NULL;
93
94 %}
95
96 %option 8bit
97 %option pointer
98 %option batch
99 %option yylineno
100
101 %option nodefault
102 %option perf-report
103 %option fast
104 %option nounput
105
106
107 /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII not Unicode. */
108
109 S [ \t\r\n]
110
111 letter [a-zA-Z]
112 digit [0-9]
113 xdigit [a-fA-F0-9]
114
115 namechar ({letter}|{digit}|[-._:])
116 name ({letter}|[_:]){namechar}*
117
118 entityref &{name};
119 charref &#({digit}+|x{xdigit}+);
120
121
122 %x COMMENT
123 %x CDATA
124 %x DOCTYPE
125 %x XML_DECL_START XML_DECL
126 %x TAG_START TAG
127 %x ATTR_KEY ATTR_VAL
128 %x END_TAG1 END_TAG2
129 %x DQUOTED SQUOTED
130
131 %%
132 /* Must use static variables since the parser returns often. */
133 static char *string=NULL;
134 static int stringlen=0,stringused=0;
135 static int after_attr=0;
136 int newlen;
137 int doctype_depth=0;
138
139 /* Handle top level entities */
140
141 "<!--" { BEGIN(COMMENT); }
142 "<![CDATA[" { BEGIN(CDATA); }
143 "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; }
144 "</" { BEGIN(END_TAG1); }
145 "<?" { BEGIN(XML_DECL_START); }
146 "<" { BEGIN(TAG_START); }
147 ">" { return(LEX_ERROR_CLOSE); }
148 [^<>]+ { }
149
150 /* Comments */
151
152 <COMMENT>"--->" { return(LEX_ERROR_COMMENT); }
153 <COMMENT>"-->" { BEGIN(INITIAL); }
154 <COMMENT>"--"[^->]+ { }
155 <COMMENT>[^-]+ { }
156 <COMMENT>"-" { }
157
158 /* CDATA */
159
160 <CDATA>"]]>" { BEGIN(INITIAL); }
161 <CDATA>"]" { }
162 <CDATA>[^]]+ { }
163
164 /* CDATA */
165
166 <DOCTYPE>"<" { doctype_depth++; }
167 <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; }
168 <DOCTYPE>[^<>]+ { }
169
170 /* XML Declaration start */
171
172 <XML_DECL_START>{name} { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); }
173 <XML_DECL_START>.|\n { return(LEX_ERROR_XML_DECL_START); }
174
175 /* Tag middle */
176
177 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
178 <XML_DECL>{S}+ { }
179 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
180 <XML_DECL>.|\n { return(LEX_ERROR_XML_DECL); }
181
182 /* Any tag start */
183
184 <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); }
185 <TAG_START>.|\n { return(LEX_ERROR_TAG_START); }
186
187 /* End-tag start */
188
189 <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); }
190 <END_TAG1>.|\n { return(LEX_ERROR_END_TAG); }
191
192 <END_TAG2>">" { BEGIN(INITIAL); }
193 <END_TAG2>.|\n { return(LEX_ERROR_END_TAG); }
194
195 /* Any tag middle */
196
197 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
198 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
199 <TAG>{S}+ { }
200 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
201 <TAG>.|\n { return(LEX_ERROR_TAG); }
202
203 /* Attributes */
204
205 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
206 <ATTR_KEY>.|\n { return(LEX_ERROR_ATTR); }
207
208 <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; }
209 <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; }
210 <ATTR_VAL>.|\n { return(LEX_ERROR_ATTR); }
211
212 /* Quoted strings */
213
214 <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
215 <DQUOTED>{entityref} { append_string(yytext); }
216 <DQUOTED>{charref} { append_string(yytext); }
217 <DQUOTED>[<>&] { return(LEX_ERROR_TAG); }
218 <DQUOTED>[^<>&\"]+ { append_string(yytext); }
219
220 <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
221 <SQUOTED>{entityref} { append_string(yytext); }
222 <SQUOTED>{charref} { append_string(yytext); }
223 <SQUOTED>[<>&] { return(LEX_ERROR_TAG); }
224 <SQUOTED>[^<>&\']+ { append_string(yytext); }
225
226 /* End of file */
227
228 <<EOF>> { free(string); string=NULL; BEGIN(INITIAL); return(LEX_EOF); }
229
230 %%
231
232 #include "xmlparse.h"
233
234
235 /*++++++++++++++++++++++++++++++++++++++
236 A function to call the callback function with the parameters needed.
237
238 char *name The name of the tag.
239
240 void (*callback)() The callback function.
241
242 int type The type of tag (start and/or end).
243
244 int nattributes The number of attributes collected.
245
246 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
247 ++++++++++++++++++++++++++++++++++++++*/
248
249 static inline void call_callback(char *name,void (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
250 {
251 switch(nattributes)
252 {
253 case 0: (*callback)(type); break;
254 case 1: (*callback)(type,attributes[0]); break;
255 case 2: (*callback)(type,attributes[0],attributes[1]); break;
256 case 3: (*callback)(type,attributes[0],attributes[1],attributes[2]); break;
257 case 4: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3]); break;
258 case 5: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); break;
259 case 6: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); break;
260 case 7: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); break;
261 case 8: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); break;
262 case 9: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); break;
263 case 10: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); break;
264 case 11: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); break;
265 case 12: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); break;
266 case 13: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); break;
267 case 14: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); break;
268 case 15: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); break;
269 case 16: (*callback)(type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); break;
270
271 default:
272 fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s'.\n",yylineno,name);
273 exit(1);
274 }
275 }
276
277
278 /*++++++++++++++++++++++++++++++++++++++
279 Parse the XML and call the functions for each tag as seen.
280
281 int ParseXML Returns 0 if OK or something else in case of an error.
282
283 FILE *file The file to parse.
284
285 xmltag **tags The array of pointers to tags for the top level.
286
287 int ignore_unknown_attributes If set to 0 then exit if unknown attribute is seen, if sete to 1 then warn, if set to 2 then ignore.
288 ++++++++++++++++++++++++++++++++++++++*/
289
290 int ParseXML(FILE *file,xmltag **tags,int ignore_unknown_attributes)
291 {
292 int yychar,i;
293
294 int nattributes=0;
295 char *attributes[XMLPARSE_MAX_ATTRS];
296 int attribute=0;
297
298 int stackdepth=0,stackused=0;
299 xmltag ***tags_stack=NULL;
300 xmltag **tag_stack=NULL;
301 xmltag *tag=NULL;
302
303 static int first=1;
304
305 /* Parser (re)-initialisation */
306
307 yyin=file;
308
309 if(!first)
310 yyrestart(NULL);
311
312 first=0;
313
314 /* The actual parser. */
315
316 do
317 {
318 yychar=yylex();
319
320 switch(yychar)
321 {
322 /* The start of a tag for an XML declaration */
323
324 case LEX_XML_DECL_BEGIN:
325
326 if(tag_stack)
327 {
328 fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno);
329 yychar=LEX_ERROR_XML_NOT_FIRST;
330 break;
331 }
332
333 /* The start of a tag for an element */
334
335 case LEX_TAG_BEGIN:
336
337 tag=NULL;
338
339 for(i=0;tags[i];i++)
340 if(!strcasecmp(yylval,tags[i]->name))
341 {
342 tag=tags[i];
343
344 for(i=0;i<nattributes;i++)
345 free(attributes[i]);
346
347 for(i=0;i<XMLPARSE_MAX_ATTRS;i++)
348 if(!tag->attributes[i])
349 break;
350
351 nattributes=i;
352
353 for(i=0;i<nattributes;i++)
354 attributes[i]=NULL;
355
356 break;
357 }
358
359 if(tag==NULL)
360 {
361 fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval);
362 yychar=LEX_ERROR_UNEXP_TAG;
363 }
364
365 break;
366
367 /* The end of the start-tag for an element */
368
369 case LEX_TAG_PUSH:
370
371 if(stackused==stackdepth)
372 {
373 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
374 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
375 }
376
377 tag_stack [stackused]=tag;
378 tags_stack[stackused]=tags;
379 stackused++;
380
381 if(tag->callback)
382 call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,nattributes,attributes);
383
384 tags=tag->subtags;
385
386 break;
387
388 /* The end of the empty-element-tag for an XML declaration */
389
390 case LEX_XML_DECL_FINISH:
391
392 /* The end of the empty-element-tag for an element */
393
394 case LEX_TAG_FINISH:
395
396 if(tag->callback)
397 call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,nattributes,attributes);
398
399 if(stackused>0)
400 tag=tag_stack[stackused-1];
401 else
402 tag=NULL;
403
404 break;
405
406 /* The end of the end-tag for an element */
407
408 case LEX_TAG_POP:
409
410 stackused--;
411 tags=tags_stack[stackused];
412 tag =tag_stack [stackused];
413
414 if(strcmp(tag->name,yylval))
415 {
416 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name);
417 yychar=LEX_ERROR_UNBALANCED;
418 }
419
420 if(stackused<0)
421 {
422 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval);
423 yychar=LEX_ERROR_NO_START;
424 }
425
426 if(tag->callback)
427 call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,nattributes,attributes);
428
429 if(stackused>0)
430 tag=tag_stack[stackused-1];
431 else
432 tag=NULL;
433
434 break;
435
436 /* An attribute key */
437
438 case LEX_ATTR_KEY:
439
440 attribute=-1;
441
442 for(i=0;i<nattributes;i++)
443 if(!strcasecmp(yylval,tag->attributes[i]))
444 {
445 attribute=i;
446
447 break;
448 }
449
450 if(attribute==-1)
451 {
452 if(ignore_unknown_attributes==0)
453 {
454 fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
455 yychar=LEX_ERROR_UNEXP_ATT;
456 }
457 else if(ignore_unknown_attributes==1)
458 fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
459 }
460
461 break;
462
463 /* An attribute value */
464
465 case LEX_ATTR_VAL:
466
467 if(yylval && attribute!=-1)
468 attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval);
469
470 break;
471
472 /* End of file */
473
474 case LEX_EOF:
475
476 if(tag)
477 {
478 fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name);
479 yychar=LEX_ERROR_UNEXP_EOF;
480 }
481
482 break;
483
484 case LEX_ERROR_TAG_START:
485 fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno);
486 break;
487
488 case LEX_ERROR_XML_DECL_START:
489 fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno);
490 break;
491
492 case LEX_ERROR_TAG:
493 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name);
494 break;
495
496 case LEX_ERROR_XML_DECL:
497 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name);
498 break;
499
500 case LEX_ERROR_ATTR:
501 fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno);
502 break;
503
504 case LEX_ERROR_END_TAG:
505 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno);
506 break;
507
508 case LEX_ERROR_COMMENT:
509 fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno);
510 break;
511
512 case LEX_ERROR_CLOSE:
513 fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno);
514 break;
515 }
516 }
517 while(yychar>LEX_EOF && yychar<LEX_ERROR);
518
519 /* Delete the tagdata */
520
521 for(i=0;i<nattributes;i++)
522 if(attributes[i])
523 free(attributes[i]);
524
525 if(stackdepth)
526 free(tags_stack);
527
528 return(yychar);
529 }
530
531
532 /*++++++++++++++++++++++++++++++++++++++
533 Return the current parser line number.
534
535 int ParseXML_LineNumber Returns the line number.
536 ++++++++++++++++++++++++++++++++++++++*/
537
538 int ParseXML_LineNumber(void)
539 {
540 return(yylineno);
541 }

Properties

Name Value
cvs:description A simple generic XML parser.