Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.l

Parent Directory Parent Directory | Revision Log Revision Log


Revision 509 - (show annotations) (download)
Sat Oct 9 11:05:28 2010 UTC (14 years, 5 months ago) by amb
File size: 26247 byte(s)
Ensure that comparisons are made with unsigned chars.

1 %{
2 /***************************************
3 $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.20 2010-10-09 11:05:28 amb Exp $
4
5 A simple generic XML parser where the structure comes from the function parameters.
6 Not intended to be fully conforming to XML staandard or a validating parser but
7 sufficient to parse OSM XML and simple program configuration files.
8
9 Part of the Routino routing software.
10 ******************/ /******************
11 This file Copyright 2010 Andrew M. Bishop
12
13 This program is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
22
23 You should have received a copy of the GNU Affero General Public License
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 ***************************************/
26
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <ctype.h>
31 #include <string.h>
32
33 #include "xmlparse.h"
34
35
36 /* Parser outputs */
37
38 #define LEX_EOF 0
39
40 #define LEX_TAG_BEGIN 1
41 #define LEX_XML_DECL_BEGIN 2
42 #define LEX_TAG_POP 3
43 #define LEX_TAG_PUSH 4
44 #define LEX_XML_DECL_FINISH 6
45 #define LEX_TAG_FINISH 7
46 #define LEX_ATTR_KEY 8
47 #define LEX_ATTR_VAL 9
48
49 #define LEX_ERROR 100
50
51 #define LEX_ERROR_TAG_START 101
52 #define LEX_ERROR_XML_DECL_START 102
53 #define LEX_ERROR_TAG 103
54 #define LEX_ERROR_XML_DECL 104
55 #define LEX_ERROR_ATTR 105
56 #define LEX_ERROR_END_TAG 106
57 #define LEX_ERROR_COMMENT 107
58 #define LEX_ERROR_CLOSE 108
59 #define LEX_ERROR_ATTR_VAL 109
60 #define LEX_ERROR_ENTITY_REF 110
61 #define LEX_ERROR_CHAR_REF 111
62
63 #define LEX_ERROR_UNEXP_TAG 201
64 #define LEX_ERROR_UNBALANCED 202
65 #define LEX_ERROR_NO_START 203
66 #define LEX_ERROR_UNEXP_ATT 204
67 #define LEX_ERROR_UNEXP_EOF 205
68 #define LEX_ERROR_XML_NOT_FIRST 206
69
70 #define LEX_ERROR_CALLBACK 255
71
72
73 /* Lexer definitions */
74
75 #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */
76 #ifndef yywrap
77 /*+ Needed in lex but does nothing. +*/
78 #define yywrap() 1
79 #endif
80
81 /*+ Reset the current string. +*/
82 #define reset_string \
83 if(!string) string=(char*)malloc(16); \
84 *string=0; \
85 stringused=0;
86
87 /*+ append information to the current string. +*/
88 #define append_string(xx) \
89 newlen=strlen(xx); \
90 if((stringused+newlen)>=stringlen) \
91 string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \
92 strcpy(string+stringused,xx); \
93 stringused+=newlen;
94
95 #define YY_NO_INPUT
96
97
98 /* Lexer functions and variables */
99
100 extern int yylex(void);
101
102 static char *yylval=NULL;
103
104 static int xmlparse_options;
105
106 %}
107
108 %option 8bit
109 %option pointer
110 %option batch
111 %option yylineno
112
113 %option nodefault
114 %option perf-report
115 %option fast
116 %option nounput
117
118
119 /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII tags not Unicode. */
120
121 S [ \t\r\n]
122
123 U1 [\x09\x0A\x0D\x20-\x7F]
124 U2 [\xC2-\xDF][\x80-\xBF]
125 U3a \xE0[\xA0-\xBF][\x80-\xBF]
126 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
127 U3c \xED[\x80-\x9F][\x80-\xBF]
128 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
129 U3 {U3a}|{U3b}|{U3c}|{U3d}
130 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
131 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
132 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
133 U4 {U4a}|{U4b}|{U4c}
134
135 U ({U1}|{U2}|{U3}|{U4})
136 UquotedS ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4})
137 UquotedD ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4})
138
139 N (\n|\r\n)
140
141 letter [a-zA-Z]
142 digit [0-9]
143 xdigit [a-fA-F0-9]
144
145 namechar ({letter}|{digit}|[-._:])
146 name ({letter}|[_:]){namechar}*
147
148 entityref &{name};
149 charref &#({digit}+|x{xdigit}+);
150
151
152 %x COMMENT
153 %x CDATA
154 %x DOCTYPE
155 %x XML_DECL_START XML_DECL
156 %x TAG_START TAG
157 %x ATTR_KEY ATTR_VAL
158 %x END_TAG1 END_TAG2
159 %x DQUOTED SQUOTED
160
161 %%
162 /* Must use static variables since the parser returns often. */
163 static char *string=NULL;
164 static int stringlen=0,stringused=0;
165 static int after_attr=0;
166 int newlen;
167 int doctype_depth=0;
168
169 /* Handle top level entities */
170
171 "<!--" { BEGIN(COMMENT); }
172 "<![CDATA[" { BEGIN(CDATA); }
173 "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; }
174 "</" { BEGIN(END_TAG1); }
175 "<?" { BEGIN(XML_DECL_START); }
176 "<" { BEGIN(TAG_START); }
177 ">" { return(LEX_ERROR_CLOSE); }
178 [^<>]+ { }
179
180 /* Comments */
181
182 <COMMENT>"--->" { return(LEX_ERROR_COMMENT); }
183 <COMMENT>"-->" { BEGIN(INITIAL); }
184 <COMMENT>"--"[^->]+ { }
185 <COMMENT>[^-]+ { }
186 <COMMENT>"-" { }
187
188 /* CDATA */
189
190 <CDATA>"]]>" { BEGIN(INITIAL); }
191 <CDATA>"]" { }
192 <CDATA>[^]]+ { }
193
194 /* CDATA */
195
196 <DOCTYPE>"<" { doctype_depth++; }
197 <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; }
198 <DOCTYPE>[^<>]+ { }
199
200 /* XML Declaration start */
201
202 <XML_DECL_START>xml { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); }
203 <XML_DECL_START>.|{N} { return(LEX_ERROR_XML_DECL_START); }
204
205 /* Tag middle */
206
207 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
208 <XML_DECL>{S}+ { }
209 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
210 <XML_DECL>.|{N} { return(LEX_ERROR_XML_DECL); }
211
212 /* Any tag start */
213
214 <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); }
215 <TAG_START>.|{N} { return(LEX_ERROR_TAG_START); }
216
217 /* End-tag start */
218
219 <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); }
220 <END_TAG1>.|{N} { return(LEX_ERROR_END_TAG); }
221
222 <END_TAG2>">" { BEGIN(INITIAL); }
223 <END_TAG2>.|{N} { return(LEX_ERROR_END_TAG); }
224
225 /* Any tag middle */
226
227 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
228 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
229 <TAG>{S}+ { }
230 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
231 <TAG>.|{N} { return(LEX_ERROR_TAG); }
232
233 /* Attributes */
234
235 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
236 <ATTR_KEY>.|{N} { return(LEX_ERROR_ATTR); }
237
238 <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; }
239 <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; }
240 <ATTR_VAL>.|{N} { return(LEX_ERROR_ATTR); }
241
242 /* Quoted strings */
243
244 <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
245 <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
246 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
247 <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
248 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
249 <DQUOTED>[<>&\"] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
250 <DQUOTED>{UquotedD}+ { append_string(yytext); }
251 <DQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
252
253 <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
254 <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
255 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
256 <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
257 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
258 <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
259 <SQUOTED>{UquotedS}+ { append_string(yytext); }
260 <SQUOTED>. { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
261
262 /* End of file */
263
264 <<EOF>> { free(string); string=NULL; stringlen=stringused=0; BEGIN(INITIAL); return(LEX_EOF); }
265
266 %%
267
268
269 /*++++++++++++++++++++++++++++++++++++++
270 A function to call the callback function with the parameters needed.
271
272 int call_callback Returns 1 if the callback returned with an error.
273
274 const char *name The name of the tag.
275
276 int (*callback)() The callback function.
277
278 int type The type of tag (start and/or end).
279
280 int nattributes The number of attributes collected.
281
282 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
283 ++++++++++++++++++++++++++++++++++++++*/
284
285 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
286 {
287 switch(nattributes)
288 {
289 case 0: return (*callback)(name,type);
290 case 1: return (*callback)(name,type,attributes[0]);
291 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
292 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
293 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
294 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
295 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
296 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
297 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
298 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
299 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
300 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
301 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
302 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
303 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
304 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
305 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
306
307 default:
308 fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s' source code needs changing.\n",yylineno,name);
309 exit(1);
310 }
311 }
312
313
314 /*++++++++++++++++++++++++++++++++++++++
315 Parse the XML and call the functions for each tag as seen.
316
317 int ParseXML Returns 0 if OK or something else in case of an error.
318
319 FILE *file The file to parse.
320
321 xmltag **tags The array of pointers to tags for the top level.
322
323 int options A list of XML Parser options OR-ed together.
324 ++++++++++++++++++++++++++++++++++++++*/
325
326 int ParseXML(FILE *file,xmltag **tags,int options)
327 {
328 int yychar,i;
329
330 char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
331 int attribute=0;
332
333 int stackdepth=0,stackused=0;
334 xmltag ***tags_stack=NULL;
335 xmltag **tag_stack=NULL;
336 xmltag *tag=NULL;
337
338 /* The actual parser. */
339
340 xmlparse_options=options;
341
342 yyin=file;
343
344 yyrestart(yyin);
345
346 yylineno=1;
347
348 BEGIN(INITIAL);
349
350 do
351 {
352 yychar=yylex();
353
354 switch(yychar)
355 {
356 /* The start of a tag for an XML declaration */
357
358 case LEX_XML_DECL_BEGIN:
359
360 if(tag_stack)
361 {
362 fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno);
363 yychar=LEX_ERROR_XML_NOT_FIRST;
364 break;
365 }
366
367 /* The start of a tag for an element */
368
369 case LEX_TAG_BEGIN:
370
371 tag=NULL;
372
373 for(i=0;tags[i];i++)
374 if(!strcasecmp(yylval,tags[i]->name))
375 {
376 tag=tags[i];
377
378 for(i=0;i<tag->nattributes;i++)
379 if(attributes[i])
380 {
381 free(attributes[i]);
382 attributes[i]=NULL;
383 }
384
385 break;
386 }
387
388 if(tag==NULL)
389 {
390 fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval);
391 yychar=LEX_ERROR_UNEXP_TAG;
392 }
393
394 break;
395
396 /* The end of the start-tag for an element */
397
398 case LEX_TAG_PUSH:
399
400 if(stackused==stackdepth)
401 {
402 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
403 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
404 }
405
406 tag_stack [stackused]=tag;
407 tags_stack[stackused]=tags;
408 stackused++;
409
410 if(tag->callback)
411 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
412 yychar=LEX_ERROR_CALLBACK;
413
414 tags=tag->subtags;
415
416 break;
417
418 /* The end of the empty-element-tag for an XML declaration */
419
420 case LEX_XML_DECL_FINISH:
421
422 /* The end of the empty-element-tag for an element */
423
424 case LEX_TAG_FINISH:
425
426 if(tag->callback)
427 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
428 yychar=LEX_ERROR_CALLBACK;
429
430 if(stackused>0)
431 tag=tag_stack[stackused-1];
432 else
433 tag=NULL;
434
435 break;
436
437 /* The end of the end-tag for an element */
438
439 case LEX_TAG_POP:
440
441 stackused--;
442 tags=tags_stack[stackused];
443 tag =tag_stack [stackused];
444
445 if(strcmp(tag->name,yylval))
446 {
447 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name);
448 yychar=LEX_ERROR_UNBALANCED;
449 }
450
451 if(stackused<0)
452 {
453 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval);
454 yychar=LEX_ERROR_NO_START;
455 }
456
457 for(i=0;i<tag->nattributes;i++)
458 if(attributes[i])
459 {
460 free(attributes[i]);
461 attributes[i]=NULL;
462 }
463
464 if(tag->callback)
465 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
466 yychar=LEX_ERROR_CALLBACK;
467
468 if(stackused>0)
469 tag=tag_stack[stackused-1];
470 else
471 tag=NULL;
472
473 break;
474
475 /* An attribute key */
476
477 case LEX_ATTR_KEY:
478
479 attribute=-1;
480
481 for(i=0;i<tag->nattributes;i++)
482 if(!strcasecmp(yylval,tag->attributes[i]))
483 {
484 attribute=i;
485
486 break;
487 }
488
489 if(attribute==-1)
490 {
491 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
492 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':')))
493 {
494 fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
495 yychar=LEX_ERROR_UNEXP_ATT;
496 }
497 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
498 fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
499 }
500
501 break;
502
503 /* An attribute value */
504
505 case LEX_ATTR_VAL:
506
507 if(tag->callback && attribute!=-1 && yylval)
508 attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval);
509
510 break;
511
512 /* End of file */
513
514 case LEX_EOF:
515
516 if(tag)
517 {
518 fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name);
519 yychar=LEX_ERROR_UNEXP_EOF;
520 }
521
522 break;
523
524 case LEX_ERROR_TAG_START:
525 fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno);
526 break;
527
528 case LEX_ERROR_XML_DECL_START:
529 fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno);
530 break;
531
532 case LEX_ERROR_TAG:
533 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name);
534 break;
535
536 case LEX_ERROR_XML_DECL:
537 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name);
538 break;
539
540 case LEX_ERROR_ATTR:
541 fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno);
542 break;
543
544 case LEX_ERROR_END_TAG:
545 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno);
546 break;
547
548 case LEX_ERROR_COMMENT:
549 fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno);
550 break;
551
552 case LEX_ERROR_CLOSE:
553 fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno);
554 break;
555
556 case LEX_ERROR_ATTR_VAL:
557 fprintf(stderr,"XML Parser: Error on line %d: invalid character '%s' seen in attribute value.\n",yylineno,yylval);
558 break;
559
560 case LEX_ERROR_ENTITY_REF:
561 fprintf(stderr,"XML Parser: Error on line %d: invalid entity reference '%s' seen in attribute value.\n",yylineno,yylval);
562 break;
563
564 case LEX_ERROR_CHAR_REF:
565 fprintf(stderr,"XML Parser: Error on line %d: invalid character reference '%s' seen in attribute value.\n",yylineno,yylval);
566 break;
567 }
568 }
569 while(yychar>LEX_EOF && yychar<LEX_ERROR);
570
571 /* Delete the tagdata */
572
573 for(i=0;i<XMLPARSE_MAX_ATTRS;i++)
574 if(attributes[i])
575 free(attributes[i]);
576
577 if(stackdepth)
578 {
579 free(tag_stack);
580 free(tags_stack);
581 }
582
583 return(yychar);
584 }
585
586
587 /*++++++++++++++++++++++++++++++++++++++
588 Return the current parser line number.
589
590 unsigned long ParseXML_LineNumber Returns the line number.
591 ++++++++++++++++++++++++++++++++++++++*/
592
593 unsigned long ParseXML_LineNumber(void)
594 {
595 return(yylineno);
596 }
597
598
599 /*++++++++++++++++++++++++++++++++++++++
600 Convert an XML entity reference into an ASCII string.
601
602 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
603
604 const char *string The entity reference string.
605 ++++++++++++++++++++++++++++++++++++++*/
606
607 char *ParseXML_Decode_Entity_Ref(const char *string)
608 {
609 if(!strcmp(string,"&amp;")) return("&");
610 if(!strcmp(string,"&lt;")) return("<");
611 if(!strcmp(string,"&gt;")) return(">");
612 if(!strcmp(string,"&apos;")) return("'");
613 if(!strcmp(string,"&quot;")) return("\"");
614 return(NULL);
615 }
616
617
618 /*++++++++++++++++++++++++++++++++++++++
619 Convert an XML character reference into an ASCII string.
620
621 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
622
623 const char *string The character reference string.
624 ++++++++++++++++++++++++++++++++++++++*/
625
626 char *ParseXML_Decode_Char_Ref(const char *string)
627 {
628 static char result[2]=" ";
629 long int val;
630
631 if(string[2]=='x') val=strtol(string+3,NULL,16);
632 else val=strtol(string+2,NULL,10);
633
634 if(val<0 || val>255)
635 return(NULL);
636
637 result[0]=val&0xff;
638
639 return(result);
640 }
641
642
643 /*++++++++++++++++++++++++++++++++++++++
644 Convert a string into something that is safe to output in an XML file.
645
646 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
647
648 const char *string The string to convert.
649 ++++++++++++++++++++++++++++++++++++++*/
650
651 char *ParseXML_Encode_Safe_XML(const char *string)
652 {
653 static const char hexstring[17]="0123456789ABCDEF";
654 int i=0,j=0,len;
655 char *result;
656
657 for(i=0;string[i];i++)
658 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
659 break;
660
661 if(!string[i])
662 return((char*)string);
663
664 len=i+256-6;
665
666 result=(char*)malloc(len+7);
667 strncpy(result,string,j=i);
668
669 do
670 {
671 for(;j<len && string[i];i++)
672 if(string[i]=='<')
673 {
674 result[j++]='&';
675 result[j++]='l';
676 result[j++]='t';
677 result[j++]=';';
678 }
679 else if(string[i]=='>')
680 {
681 result[j++]='&';
682 result[j++]='g';
683 result[j++]='t';
684 result[j++]=';';
685 }
686 else if(string[i]=='&')
687 {
688 result[j++]='&';
689 result[j++]='a';
690 result[j++]='m';
691 result[j++]='p';
692 result[j++]=';';
693 }
694 else if(string[i]=='\'')
695 {
696 result[j++]='&';
697 result[j++]='a';
698 result[j++]='p';
699 result[j++]='o';
700 result[j++]='s';
701 result[j++]=';';
702 }
703 else if(string[i]=='"')
704 {
705 result[j++]='&';
706 result[j++]='q';
707 result[j++]='u';
708 result[j++]='o';
709 result[j++]='t';
710 result[j++]=';';
711 }
712 else if(string[i]>=32 && (unsigned char)string[i]<=127)
713 result[j++]=string[i];
714 else
715 {
716 unsigned int unicode;
717
718 /* Decode the UTF-8 */
719
720 if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
721 {
722 /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */
723 unicode =(string[i++]&0x1F)<<6;
724 unicode|= string[i ]&0x3F;
725 }
726 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
727 {
728 /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */
729 unicode =(string[i++]&0x0F)<<12;
730 unicode|=(string[i++]&0x3F)<<6;
731 unicode|= string[i ]&0x3F;
732 }
733 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
734 {
735 /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
736 unicode =(string[i++]&0x07)<<18;
737 unicode|=(string[i++]&0x3F)<<12;
738 unicode|=(string[i++]&0x3F)<<6;
739 unicode|= string[i ]&0x3F;
740 }
741 else
742 unicode=0xFFFD;
743
744 /* Output the character entity */
745
746 result[j++]='&';
747 result[j++]='#';
748 result[j++]='x';
749
750 if(unicode&0x00FF0000)
751 {
752 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
753 result[j++]=hexstring[((unicode>>16)&0x0f) ];
754 }
755 if(unicode&0x00FFFF00)
756 {
757 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
758 result[j++]=hexstring[((unicode>>8)&0x0f) ];
759 }
760 result[j++]=hexstring[(unicode&0xf0)>>4];
761 result[j++]=hexstring[(unicode&0x0f) ];
762
763 result[j++]=';';
764 }
765
766 if(string[i]) /* Not finished */
767 {
768 len+=256;
769 result=(char*)realloc((void*)result,len+7);
770 }
771 }
772 while(string[i]);
773
774 result[j]=0;
775
776 return(result);
777 }
778
779
780 /*++++++++++++++++++++++++++++++++++++++
781 Convert a string to a integer (checking that it really is a integer).
782
783 int ParseXML_GetInteger Returns 1 if a integer could be found or 0 otherwise.
784
785 const char *string The string to be parsed.
786
787 int *number Returns the number.
788 ++++++++++++++++++++++++++++++++++++++*/
789
790 int ParseXML_GetInteger(const char *string,int *number)
791 {
792 const char *p=string;
793
794 if(*p=='-' || *p=='+')
795 p++;
796
797 while(isdigit(*p))
798 p++;
799
800 if(*p)
801 return(0);
802
803 *number=atoi(string);
804
805 return(1);
806 }
807
808
809 /*++++++++++++++++++++++++++++++++++++++
810 Convert a string to a floating point number (checking that it really is a number).
811
812 int ParseXML_GetFloating Returns 1 if a number could be found or 0 otherwise.
813
814 const char *string The string to be parsed.
815
816 int *number Returns the number.
817 ++++++++++++++++++++++++++++++++++++++*/
818
819 int ParseXML_GetFloating(const char *string,double *number)
820 {
821 const char *p=string;
822
823 if(*p=='-' || *p=='+')
824 p++;
825
826 while(isdigit(*p) || *p=='.')
827 p++;
828
829 if(*p=='e' || *p=='E')
830 {
831 p++;
832
833 if(*p=='-' || *p=='+')
834 p++;
835
836 while(isdigit(*p))
837 p++;
838 }
839
840 if(*p)
841 return(0);
842
843 *number=atof(string);
844
845 return(1);
846 }

Properties

Name Value
cvs:description A simple generic XML parser.