Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.l

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1044 - (show annotations) (download)
Mon Aug 6 18:34:52 2012 UTC (12 years, 8 months ago) by amb
File size: 28849 byte(s)
Allow an unlimited number of attributes per tag without crashing.

1 %{
2 /***************************************
3 A simple generic XML parser where the structure comes from the function parameters.
4 Not intended to be fully conforming to XML standard or a validating parser but
5 sufficient to parse OSM XML and simple program configuration files.
6
7 Part of the Routino routing software.
8 ******************/ /******************
9 This file Copyright 2010-2012 Andrew M. Bishop
10
11 This program is free software: you can redistribute it and/or modify
12 it under the terms of the GNU Affero General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU Affero General Public License for more details.
20
21 You should have received a copy of the GNU Affero General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 ***************************************/
24
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <strings.h>
31
32 #include "xmlparse.h"
33
34
35 /* Parser outputs */
36
37 #define LEX_EOF 0
38
39 #define LEX_TAG_BEGIN 1
40 #define LEX_XML_DECL_BEGIN 2
41 #define LEX_TAG_POP 3
42 #define LEX_TAG_PUSH 4
43 #define LEX_XML_DECL_FINISH 6
44 #define LEX_TAG_FINISH 7
45 #define LEX_ATTR_KEY 8
46 #define LEX_ATTR_VAL 9
47
48 #define LEX_ERROR 100
49
50 #define LEX_ERROR_TAG_START 101
51 #define LEX_ERROR_XML_DECL_START 102
52 #define LEX_ERROR_TAG 103
53 #define LEX_ERROR_XML_DECL 104
54 #define LEX_ERROR_ATTR 105
55 #define LEX_ERROR_END_TAG 106
56 #define LEX_ERROR_COMMENT 107
57 #define LEX_ERROR_CLOSE 108
58 #define LEX_ERROR_ATTR_VAL 109
59 #define LEX_ERROR_ENTITY_REF 110
60 #define LEX_ERROR_CHAR_REF 111
61
62 #define LEX_ERROR_UNEXP_TAG 201
63 #define LEX_ERROR_UNBALANCED 202
64 #define LEX_ERROR_NO_START 203
65 #define LEX_ERROR_UNEXP_ATT 204
66 #define LEX_ERROR_UNEXP_EOF 205
67 #define LEX_ERROR_XML_NOT_FIRST 206
68
69 #define LEX_ERROR_CALLBACK 255
70
71
72 /* Lexer definitions */
73
74 /*+ Reset the current string. +*/
75 #define reset_string \
76 stringnum=-1;
77
78 /*+ Prepare for the next string. +*/
79 #define next_string \
80 stringnum++; \
81 if(stringnum>=numstrings) \
82 { \
83 int i; \
84 numstrings+=32; \
85 string=(char**)realloc((void*)string,numstrings*sizeof(char*)); \
86 stringlen=(unsigned long*)realloc((void*)stringlen,numstrings*sizeof(unsigned long)); \
87 stringused=(unsigned long*)realloc((void*)stringused,numstrings*sizeof(unsigned long)); \
88 for(i=stringnum;i<numstrings;i++) \
89 {string[i]=NULL;stringlen[i]=0;stringused[i]=0;} \
90 } \
91 if(!string[stringnum]) string[stringnum]=(char*)malloc(stringlen[stringnum]=256); \
92 *string[stringnum]=0; \
93 stringused[stringnum]=0;
94
95 /*+ Append information to the current string. +*/
96 #define append_string(xx) \
97 newlen=strlen(xx); \
98 if((stringused[stringnum]+newlen)>=stringlen[stringnum]) \
99 string[stringnum]=(char*)realloc((void*)string[stringnum],stringlen[stringnum]=(stringused[stringnum]+newlen+256)); \
100 strcpy(string[stringnum]+stringused[stringnum],xx); \
101 stringused[stringnum]+=newlen;
102
103
104 /* Lexer functions and variables */
105
106 extern int yylex(void);
107
108 static char *yylval=NULL;
109
110 static int xmlparse_options;
111
112 static unsigned long long lineno;
113
114 %}
115
116 %option 8bit
117 %option pointer
118 %option batch
119 %option never-interactive
120
121 %option perf-report perf-report
122 %option warn
123 %option verbose
124
125 %option nodefault
126 %option fast
127 %option noread
128
129 %option noreject
130 %option nounput
131 %option noinput
132 %option noyywrap
133 %option noyymore
134 %option noyylineno
135
136
137 /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII tags not Unicode. */
138
139 S [ \t]
140
141 U1 [\x09\x0A\x0D\x20-\x7F]
142 U2 [\xC2-\xDF][\x80-\xBF]
143 U3a \xE0[\xA0-\xBF][\x80-\xBF]
144 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
145 U3c \xED[\x80-\x9F][\x80-\xBF]
146 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
147 U3 {U3a}|{U3b}|{U3c}|{U3d}
148 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
149 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
150 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
151 U4 {U4a}|{U4b}|{U4c}
152
153 U ({U1}|{U2}|{U3}|{U4})
154 UquotedS ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4})
155 UquotedD ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4})
156
157 N (\n|\r\n)
158
159 letter [a-zA-Z]
160 digit [0-9]
161 xdigit [a-fA-F0-9]
162
163 namechar ({letter}|{digit}|[-._:])
164 name (({letter}|[_:]){namechar}*)
165
166 entityref (&{name};)
167 charref (&#({digit}+|x{xdigit}+);)
168
169
170 %x BANGTAG
171 %x COMMENT
172 %x CDATA
173 %x DOCTYPE
174 %x XML_DECL_START XML_DECL
175 %x TAG_START TAG
176 %x ATTR_KEY ATTR_VAL
177 %x END_TAG1 END_TAG2
178 %x DQUOTED SQUOTED
179
180 %%
181 /* Must use static variables since the parser returns often. */
182 static int numstrings=0,stringnum=0;
183 static char **string=NULL;
184 static unsigned long *stringlen=NULL,*stringused=NULL;
185 static int after_attr=0;
186 int newlen;
187 int doctype_depth=0;
188
189 /* Handle top level entities */
190
191 "<!" { BEGIN(BANGTAG); }
192 "</" { BEGIN(END_TAG1); }
193 "<?" { BEGIN(XML_DECL_START); }
194 "<" { BEGIN(TAG_START); }
195
196 ">" { return(LEX_ERROR_CLOSE); }
197
198 {N} { lineno++; }
199 [^<>] { }
200
201 /* Tags beginning with '!' */
202
203 <BANGTAG>"--" { BEGIN(COMMENT); }
204 <BANGTAG>"[CDATA[" { BEGIN(CDATA); }
205 <BANGTAG>"DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; }
206 <BANGTAG>{N} { /* lineno++; */ return(LEX_ERROR_TAG_START); }
207 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
208
209 /* Comments */
210
211 <COMMENT>"-->" { BEGIN(INITIAL); }
212 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
213 <COMMENT>{N} { lineno++; }
214 <COMMENT>[^-] { }
215 <COMMENT>"-" { }
216
217 /* CDATA */
218
219 <CDATA>"]]>" { BEGIN(INITIAL); }
220 <CDATA>"]" { }
221 <CDATA>{N} { lineno++; }
222 <CDATA>[^]] { }
223
224 /* DOCTYPE */
225
226 <DOCTYPE>"<" { doctype_depth++; }
227 <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; }
228 <DOCTYPE>{N} { lineno++; }
229 <DOCTYPE>[^<>] { }
230
231 /* XML declaration start */
232
233 <XML_DECL_START>xml { BEGIN(XML_DECL); reset_string; yylval=yytext; return(LEX_XML_DECL_BEGIN); }
234 <XML_DECL_START>{N} { /* lineno++; */ return(LEX_ERROR_XML_DECL_START); }
235 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
236
237 /* XML declaration middle */
238
239 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
240 <XML_DECL>{S}+ { }
241 <XML_DECL>{N} { lineno++; }
242 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
243 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
244
245 /* Any tag start */
246
247 <TAG_START>{name} { BEGIN(TAG); reset_string; yylval=yytext; return(LEX_TAG_BEGIN); }
248 <TAG_START>{N} { /* lineno++; */ return(LEX_ERROR_TAG_START); }
249 <TAG_START>. { return(LEX_ERROR_TAG_START); }
250
251 /* End-tag start */
252
253 <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); }
254 <END_TAG1>{N} { /* lineno++; */ return(LEX_ERROR_END_TAG); }
255 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
256
257 <END_TAG2>">" { BEGIN(INITIAL); }
258 <END_TAG2>{N} { /* lineno++; */ return(LEX_ERROR_END_TAG); }
259 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
260
261 /* Any tag middle */
262
263 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
264 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
265 <TAG>{S}+ { }
266 <TAG>{N} { lineno++; }
267 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
268 <TAG>. { return(LEX_ERROR_TAG); }
269
270 /* Attributes */
271
272 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
273 <ATTR_KEY>{N} { /* lineno++; */ return(LEX_ERROR_ATTR); }
274 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
275
276 <ATTR_VAL>\" { BEGIN(DQUOTED); next_string; }
277 <ATTR_VAL>\' { BEGIN(SQUOTED); next_string; }
278 <ATTR_VAL>{N} { /* lineno++; */ return(LEX_ERROR_ATTR); }
279 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
280
281 /* Quoted strings */
282
283 <DQUOTED>\" { BEGIN(after_attr); yylval=string[stringnum]; return(LEX_ATTR_VAL); }
284 <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
285 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
286 <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
287 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
288 <DQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
289 <DQUOTED>{UquotedD}+ { append_string(yytext); }
290 <DQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
291
292 <SQUOTED>\' { BEGIN(after_attr); yylval=string[stringnum]; return(LEX_ATTR_VAL); }
293 <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
294 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
295 <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
296 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
297 <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
298 <SQUOTED>{UquotedS}+ { append_string(yytext); }
299 <SQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
300
301 /* End of file */
302
303 <<EOF>> { for(stringnum=0;stringnum<numstrings;stringnum++) if(string[stringnum]) free(string[stringnum]);
304 if(string) free(string); string=NULL;
305 if(stringlen) free(stringlen); stringlen=NULL;
306 if(stringlen) free(stringlen); stringlen=NULL;
307 if(stringused) free(stringused); stringused=NULL;
308 numstrings=0;
309 BEGIN(INITIAL); return(LEX_EOF); }
310
311 %%
312
313
314 /*++++++++++++++++++++++++++++++++++++++
315 A function to call the callback function with the parameters needed.
316
317 int call_callback Returns 1 if the callback returned with an error.
318
319 const char *name The name of the tag.
320
321 int (*callback)() The callback function.
322
323 int type The type of tag (start and/or end).
324
325 int nattributes The number of attributes collected.
326
327 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
328 ++++++++++++++++++++++++++++++++++++++*/
329
330 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
331 {
332 switch(nattributes)
333 {
334 case 0: return (*callback)(name,type);
335 case 1: return (*callback)(name,type,attributes[0]);
336 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
337 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
338 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
339 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
340 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
341 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
342 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
343 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
344 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
345 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
346 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
347 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
348 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
349 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
350 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
351
352 default:
353 fprintf(stderr,"XML Parser: Error on line %llu: too many attributes for tag '%s' source code needs changing.\n",lineno,name);
354 exit(1);
355 }
356 }
357
358
359 /*++++++++++++++++++++++++++++++++++++++
360 Parse the XML and call the functions for each tag as seen.
361
362 int ParseXML Returns 0 if OK or something else in case of an error.
363
364 FILE *file The file to parse.
365
366 xmltag **tags The array of pointers to tags for the top level.
367
368 int options A list of XML Parser options OR-ed together.
369 ++++++++++++++++++++++++++++++++++++++*/
370
371 int ParseXML(FILE *file,xmltag **tags,int options)
372 {
373 int yychar,i;
374
375 char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
376 int attribute=0;
377
378 int stackdepth=0,stackused=0;
379 xmltag ***tags_stack=NULL;
380 xmltag **tag_stack=NULL;
381 xmltag *tag=NULL;
382
383 /* The actual parser. */
384
385 xmlparse_options=options;
386
387 yyin=file;
388
389 yyrestart(yyin);
390
391 lineno=1;
392
393 BEGIN(INITIAL);
394
395 do
396 {
397 yychar=yylex();
398
399 switch(yychar)
400 {
401 /* The start of a tag for an XML declaration */
402
403 case LEX_XML_DECL_BEGIN:
404
405 if(tag_stack)
406 {
407 fprintf(stderr,"XML Parser: Error on line %llu: XML declaration not before all other tags.\n",lineno);
408 yychar=LEX_ERROR_XML_NOT_FIRST;
409 break;
410 }
411
412 /* The start of a tag for an element */
413
414 case LEX_TAG_BEGIN:
415
416 tag=NULL;
417
418 for(i=0;tags[i];i++)
419 if(!strcasecmp(yylval,tags[i]->name))
420 {
421 tag=tags[i];
422
423 for(i=0;i<tag->nattributes;i++)
424 attributes[i]=NULL;
425
426 break;
427 }
428
429 if(tag==NULL)
430 {
431 fprintf(stderr,"XML Parser: Error on line %llu: unexpected tag '%s'.\n",lineno,yylval);
432 yychar=LEX_ERROR_UNEXP_TAG;
433 }
434
435 break;
436
437 /* The end of the start-tag for an element */
438
439 case LEX_TAG_PUSH:
440
441 if(stackused==stackdepth)
442 {
443 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
444 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
445 }
446
447 tag_stack [stackused]=tag;
448 tags_stack[stackused]=tags;
449 stackused++;
450
451 if(tag->callback)
452 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
453 yychar=LEX_ERROR_CALLBACK;
454
455 tags=tag->subtags;
456
457 break;
458
459 /* The end of the empty-element-tag for an XML declaration */
460
461 case LEX_XML_DECL_FINISH:
462
463 /* The end of the empty-element-tag for an element */
464
465 case LEX_TAG_FINISH:
466
467 if(tag->callback)
468 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
469 yychar=LEX_ERROR_CALLBACK;
470
471 if(stackused>0)
472 tag=tag_stack[stackused-1];
473 else
474 tag=NULL;
475
476 break;
477
478 /* The end of the end-tag for an element */
479
480 case LEX_TAG_POP:
481
482 stackused--;
483 tags=tags_stack[stackused];
484 tag =tag_stack [stackused];
485
486 if(strcmp(tag->name,yylval))
487 {
488 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,yylval,tag->name);
489 yychar=LEX_ERROR_UNBALANCED;
490 }
491
492 if(stackused<0)
493 {
494 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,yylval,yylval);
495 yychar=LEX_ERROR_NO_START;
496 }
497
498 for(i=0;i<tag->nattributes;i++)
499 attributes[i]=NULL;
500
501 if(tag->callback)
502 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
503 yychar=LEX_ERROR_CALLBACK;
504
505 if(stackused>0)
506 tag=tag_stack[stackused-1];
507 else
508 tag=NULL;
509
510 break;
511
512 /* An attribute key */
513
514 case LEX_ATTR_KEY:
515
516 attribute=-1;
517
518 for(i=0;i<tag->nattributes;i++)
519 if(!strcasecmp(yylval,tag->attributes[i]))
520 {
521 attribute=i;
522
523 break;
524 }
525
526 if(attribute==-1)
527 {
528 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
529 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':')))
530 {
531 fprintf(stderr,"XML Parser: Error on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,yylval,tag->name);
532 yychar=LEX_ERROR_UNEXP_ATT;
533 }
534 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
535 fprintf(stderr,"XML Parser: Warning on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,yylval,tag->name);
536 }
537
538 break;
539
540 /* An attribute value */
541
542 case LEX_ATTR_VAL:
543
544 if(tag->callback && attribute!=-1 && yylval)
545 attributes[attribute]=yylval;
546
547 break;
548
549 /* End of file */
550
551 case LEX_EOF:
552
553 if(tag)
554 {
555 fprintf(stderr,"XML Parser: Error on line %llu: end of file seen without end tag '</%s>'.\n",lineno,tag->name);
556 yychar=LEX_ERROR_UNEXP_EOF;
557 }
558
559 break;
560
561 case LEX_ERROR_TAG_START:
562 fprintf(stderr,"XML Parser: Error on line %llu: character '<' seen not at start of tag.\n",lineno);
563 break;
564
565 case LEX_ERROR_XML_DECL_START:
566 fprintf(stderr,"XML Parser: Error on line %llu: characters '<?' seen not at start of XML declaration.\n",lineno);
567 break;
568
569 case LEX_ERROR_TAG:
570 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
571 break;
572
573 case LEX_ERROR_XML_DECL:
574 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside XML declaration '<?%s...>'.\n",lineno,tag->name);
575 break;
576
577 case LEX_ERROR_ATTR:
578 fprintf(stderr,"XML Parser: Error on line %llu: invalid attribute definition seen in tag.\n",lineno);
579 break;
580
581 case LEX_ERROR_END_TAG:
582 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen in end-tag.\n",lineno);
583 break;
584
585 case LEX_ERROR_COMMENT:
586 fprintf(stderr,"XML Parser: Error on line %llu: invalid comment seen.\n",lineno);
587 break;
588
589 case LEX_ERROR_CLOSE:
590 fprintf(stderr,"XML Parser: Error on line %llu: character '>' seen not at end of tag.\n",lineno);
591 break;
592
593 case LEX_ERROR_ATTR_VAL:
594 fprintf(stderr,"XML Parser: Error on line %llu: invalid character '%s' seen in attribute value.\n",lineno,yylval);
595 break;
596
597 case LEX_ERROR_ENTITY_REF:
598 fprintf(stderr,"XML Parser: Error on line %llu: invalid entity reference '%s' seen in attribute value.\n",lineno,yylval);
599 break;
600
601 case LEX_ERROR_CHAR_REF:
602 fprintf(stderr,"XML Parser: Error on line %llu: invalid character reference '%s' seen in attribute value.\n",lineno,yylval);
603 break;
604 }
605 }
606 while(yychar>LEX_EOF && yychar<LEX_ERROR);
607
608 /* Delete the tagdata */
609
610 if(stackdepth)
611 {
612 free(tag_stack);
613 free(tags_stack);
614 }
615
616 return(yychar);
617 }
618
619
620 /*++++++++++++++++++++++++++++++++++++++
621 Return the current parser line number.
622
623 unsigned long long ParseXML_LineNumber Returns the line number.
624 ++++++++++++++++++++++++++++++++++++++*/
625
626 unsigned long long ParseXML_LineNumber(void)
627 {
628 return(lineno);
629 }
630
631
632 /*++++++++++++++++++++++++++++++++++++++
633 Convert an XML entity reference into an ASCII string.
634
635 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
636
637 const char *string The entity reference string.
638 ++++++++++++++++++++++++++++++++++++++*/
639
640 char *ParseXML_Decode_Entity_Ref(const char *string)
641 {
642 if(!strcmp(string,"&amp;")) return("&");
643 if(!strcmp(string,"&lt;")) return("<");
644 if(!strcmp(string,"&gt;")) return(">");
645 if(!strcmp(string,"&apos;")) return("'");
646 if(!strcmp(string,"&quot;")) return("\"");
647 return(NULL);
648 }
649
650
651 /*++++++++++++++++++++++++++++++++++++++
652 Convert an XML character reference into an ASCII string.
653
654 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
655
656 const char *string The character reference string.
657 ++++++++++++++++++++++++++++++++++++++*/
658
659 char *ParseXML_Decode_Char_Ref(const char *string)
660 {
661 static char result[5]="";
662 long int unicode;
663
664 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
665 else unicode=strtol(string+2,NULL,10);
666
667 if(unicode<0x80)
668 {
669 /* 0000 0000-0000 007F => 0xxxxxxx */
670 result[0]=unicode;
671 result[1]=0;
672 }
673 else if(unicode<0x07FF)
674 {
675 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
676 result[0]=0xC0+((unicode&0x07C0)>>6);
677 result[1]=0x80+ (unicode&0x003F);
678 result[2]=0;
679 }
680 else if(unicode<0xFFFF)
681 {
682 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
683 result[0]=0xE0+((unicode&0xF000)>>12);
684 result[1]=0x80+((unicode&0x0FC0)>>6);
685 result[2]=0x80+ (unicode&0x003F);
686 result[3]=0;
687 }
688 else if(unicode<0x1FFFFF)
689 {
690 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
691 result[0]=0xF0+((unicode&0x1C0000)>>18);
692 result[1]=0x80+((unicode&0x03F000)>>12);
693 result[2]=0x80+((unicode&0x000FC0)>>6);
694 result[3]=0x80+ (unicode&0x00003F);
695 result[4]=0;
696 }
697 else
698 {
699 result[0]=0xFF;
700 result[1]=0xFD;
701 result[2]=0;
702 }
703
704 return(result);
705 }
706
707
708 /*++++++++++++++++++++++++++++++++++++++
709 Convert a string into something that is safe to output in an XML file.
710
711 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
712
713 const char *string The string to convert.
714 ++++++++++++++++++++++++++++++++++++++*/
715
716 char *ParseXML_Encode_Safe_XML(const char *string)
717 {
718 static const char hexstring[17]="0123456789ABCDEF";
719 int i=0,j=0,len;
720 char *result;
721
722 for(i=0;string[i];i++)
723 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
724 break;
725
726 if(!string[i])
727 return((char*)string);
728
729 len=i+256-6;
730
731 result=(char*)malloc(len+7);
732 strncpy(result,string,j=i);
733
734 do
735 {
736 for(;j<len && string[i];i++)
737 if(string[i]=='<')
738 {
739 result[j++]='&';
740 result[j++]='l';
741 result[j++]='t';
742 result[j++]=';';
743 }
744 else if(string[i]=='>')
745 {
746 result[j++]='&';
747 result[j++]='g';
748 result[j++]='t';
749 result[j++]=';';
750 }
751 else if(string[i]=='&')
752 {
753 result[j++]='&';
754 result[j++]='a';
755 result[j++]='m';
756 result[j++]='p';
757 result[j++]=';';
758 }
759 else if(string[i]=='\'')
760 {
761 result[j++]='&';
762 result[j++]='a';
763 result[j++]='p';
764 result[j++]='o';
765 result[j++]='s';
766 result[j++]=';';
767 }
768 else if(string[i]=='"')
769 {
770 result[j++]='&';
771 result[j++]='q';
772 result[j++]='u';
773 result[j++]='o';
774 result[j++]='t';
775 result[j++]=';';
776 }
777 else if(string[i]>=32 && (unsigned char)string[i]<=127)
778 result[j++]=string[i];
779 else
780 {
781 unsigned int unicode;
782
783 /* Decode the UTF-8 */
784
785 if((string[i]&0x80)==0)
786 {
787 /* 0000 0000-0000 007F => 0xxxxxxx */
788 unicode=string[i];
789 }
790 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
791 {
792 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
793 unicode =(string[i++]&0x1F)<<6;
794 unicode|= string[i ]&0x3F;
795 }
796 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
797 {
798 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
799 unicode =(string[i++]&0x0F)<<12;
800 unicode|=(string[i++]&0x3F)<<6;
801 unicode|= string[i ]&0x3F;
802 }
803 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
804 {
805 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
806 unicode =(string[i++]&0x07)<<18;
807 unicode|=(string[i++]&0x3F)<<12;
808 unicode|=(string[i++]&0x3F)<<6;
809 unicode|= string[i ]&0x3F;
810 }
811 else
812 unicode=0xFFFD;
813
814 /* Output the character entity */
815
816 result[j++]='&';
817 result[j++]='#';
818 result[j++]='x';
819
820 if(unicode&0x00FF0000)
821 {
822 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
823 result[j++]=hexstring[((unicode>>16)&0x0f) ];
824 }
825 if(unicode&0x00FFFF00)
826 {
827 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
828 result[j++]=hexstring[((unicode>>8)&0x0f) ];
829 }
830 result[j++]=hexstring[(unicode&0xf0)>>4];
831 result[j++]=hexstring[(unicode&0x0f) ];
832
833 result[j++]=';';
834 }
835
836 if(string[i]) /* Not finished */
837 {
838 len+=256;
839 result=(char*)realloc((void*)result,len+7);
840 }
841 }
842 while(string[i]);
843
844 result[j]=0;
845
846 return(result);
847 }
848
849
850 /*++++++++++++++++++++++++++++++++++++++
851 Check that a string really is an integer.
852
853 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
854
855 const char *string The string to be parsed.
856 ++++++++++++++++++++++++++++++++++++++*/
857
858 int ParseXML_IsInteger(const char *string)
859 {
860 const char *p=string;
861
862 if(*p=='-' || *p=='+')
863 p++;
864
865 while(isdigit(*p))
866 p++;
867
868 if(*p)
869 return(0);
870 else
871 return(1);
872 }
873
874
875 /*++++++++++++++++++++++++++++++++++++++
876 Check that a string really is a floating point number.
877
878 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
879
880 const char *string The string to be parsed.
881 ++++++++++++++++++++++++++++++++++++++*/
882
883 int ParseXML_IsFloating(const char *string)
884 {
885 const char *p=string;
886
887 if(*p=='-' || *p=='+')
888 p++;
889
890 while(isdigit(*p) || *p=='.')
891 p++;
892
893 if(*p=='e' || *p=='E')
894 {
895 p++;
896
897 if(*p=='-' || *p=='+')
898 p++;
899
900 while(isdigit(*p))
901 p++;
902 }
903
904 if(*p)
905 return(0);
906 else
907 return(1);
908 }

Properties

Name Value
cvs:description A simple generic XML parser.