Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.l

Parent Directory Parent Directory | Revision Log Revision Log


Revision 483 - (show annotations) (download)
Tue Sep 14 17:50:23 2010 UTC (14 years, 6 months ago) by amb
File size: 24268 byte(s)
Stricter checking on XML data (Unicode).

1 %{
2 /***************************************
3 $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.18 2010-09-14 17:50:23 amb Exp $
4
5 A simple generic XML parser where the structure comes from the function parameters.
6 Not intended to be fully conforming to XML staandard or a validating parser but
7 sufficient to parse OSM XML and simple program configuration files.
8
9 Part of the Routino routing software.
10 ******************/ /******************
11 This file Copyright 2010 Andrew M. Bishop
12
13 This program is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
22
23 You should have received a copy of the GNU Affero General Public License
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 ***************************************/
26
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <ctype.h>
31 #include <string.h>
32
33 #include "xmlparse.h"
34
35
36 /* Parser outputs */
37
38 #define LEX_EOF 0
39
40 #define LEX_TAG_BEGIN 1
41 #define LEX_XML_DECL_BEGIN 2
42 #define LEX_TAG_POP 3
43 #define LEX_TAG_PUSH 4
44 #define LEX_XML_DECL_FINISH 6
45 #define LEX_TAG_FINISH 7
46 #define LEX_ATTR_KEY 8
47 #define LEX_ATTR_VAL 9
48
49 #define LEX_ERROR 100
50
51 #define LEX_ERROR_TAG_START 101
52 #define LEX_ERROR_XML_DECL_START 102
53 #define LEX_ERROR_TAG 103
54 #define LEX_ERROR_XML_DECL 104
55 #define LEX_ERROR_ATTR 105
56 #define LEX_ERROR_END_TAG 106
57 #define LEX_ERROR_COMMENT 107
58 #define LEX_ERROR_CLOSE 108
59 #define LEX_ERROR_ATTR_VAL 109
60 #define LEX_ERROR_ENTITY_REF 110
61 #define LEX_ERROR_CHAR_REF 111
62
63 #define LEX_ERROR_UNEXP_TAG 201
64 #define LEX_ERROR_UNBALANCED 202
65 #define LEX_ERROR_NO_START 203
66 #define LEX_ERROR_UNEXP_ATT 204
67 #define LEX_ERROR_UNEXP_EOF 205
68 #define LEX_ERROR_XML_NOT_FIRST 206
69
70 #define LEX_ERROR_CALLBACK 255
71
72
73 /* Lexer definitions */
74
75 #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */
76 #ifndef yywrap
77 /*+ Needed in lex but does nothing. +*/
78 #define yywrap() 1
79 #endif
80
81 /*+ Reset the current string. +*/
82 #define reset_string \
83 if(!string) string=(char*)malloc(16); \
84 *string=0; \
85 stringused=0;
86
87 /*+ append information to the current string. +*/
88 #define append_string(xx) \
89 newlen=strlen(xx); \
90 if((stringused+newlen)>=stringlen) \
91 string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \
92 strcpy(string+stringused,xx); \
93 stringused+=newlen;
94
95 #define YY_NO_INPUT
96
97
98 /* Lexer functions and variables */
99
100 extern int yylex(void);
101
102 static char *yylval=NULL;
103
104 static int xmlparse_options;
105
106 %}
107
108 %option 8bit
109 %option pointer
110 %option batch
111 %option yylineno
112
113 %option nodefault
114 %option perf-report
115 %option fast
116 %option nounput
117
118
119 /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII not Unicode. */
120
121 S [ \t\r\n]
122
123 U1 [\x20-\x7F]
124 U2 [\xC2-\xDF][\x80-\xBF]
125 U3 [\xE0-\xEF][\x80-\xBF][\x80-\xBF]
126 U4 [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF]
127
128 U ({U1}|{U2}|{U3}|{U4})
129 Uattrval ([\x20-\x21\x23-\x25\x28-\x3B\x3D\x3F-\x7F]|{U2}|{U3}|{U4})
130
131 N (\n|\r\n)
132
133 letter [a-zA-Z]
134 digit [0-9]
135 xdigit [a-fA-F0-9]
136
137 namechar ({letter}|{digit}|[-._:])
138 name ({letter}|[_:]){namechar}*
139
140 entityref &{name};
141 charref &#({digit}+|x{xdigit}+);
142
143
144 %x COMMENT
145 %x CDATA
146 %x DOCTYPE
147 %x XML_DECL_START XML_DECL
148 %x TAG_START TAG
149 %x ATTR_KEY ATTR_VAL
150 %x END_TAG1 END_TAG2
151 %x DQUOTED SQUOTED
152
153 %%
154 /* Must use static variables since the parser returns often. */
155 static char *string=NULL;
156 static int stringlen=0,stringused=0;
157 static int after_attr=0;
158 int newlen;
159 int doctype_depth=0;
160
161 /* Handle top level entities */
162
163 "<!--" { BEGIN(COMMENT); }
164 "<![CDATA[" { BEGIN(CDATA); }
165 "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; }
166 "</" { BEGIN(END_TAG1); }
167 "<?" { BEGIN(XML_DECL_START); }
168 "<" { BEGIN(TAG_START); }
169 ">" { return(LEX_ERROR_CLOSE); }
170 [^<>]+ { }
171
172 /* Comments */
173
174 <COMMENT>"--->" { return(LEX_ERROR_COMMENT); }
175 <COMMENT>"-->" { BEGIN(INITIAL); }
176 <COMMENT>"--"[^->]+ { }
177 <COMMENT>[^-]+ { }
178 <COMMENT>"-" { }
179
180 /* CDATA */
181
182 <CDATA>"]]>" { BEGIN(INITIAL); }
183 <CDATA>"]" { }
184 <CDATA>[^]]+ { }
185
186 /* CDATA */
187
188 <DOCTYPE>"<" { doctype_depth++; }
189 <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; }
190 <DOCTYPE>[^<>]+ { }
191
192 /* XML Declaration start */
193
194 <XML_DECL_START>xml { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); }
195 <XML_DECL_START>.|{N} { return(LEX_ERROR_XML_DECL_START); }
196
197 /* Tag middle */
198
199 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
200 <XML_DECL>{S}+ { }
201 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
202 <XML_DECL>.|{N} { return(LEX_ERROR_XML_DECL); }
203
204 /* Any tag start */
205
206 <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); }
207 <TAG_START>.|{N} { return(LEX_ERROR_TAG_START); }
208
209 /* End-tag start */
210
211 <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); }
212 <END_TAG1>.|{N} { return(LEX_ERROR_END_TAG); }
213
214 <END_TAG2>">" { BEGIN(INITIAL); }
215 <END_TAG2>.|{N} { return(LEX_ERROR_END_TAG); }
216
217 /* Any tag middle */
218
219 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
220 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
221 <TAG>{S}+ { }
222 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
223 <TAG>.|{N} { return(LEX_ERROR_TAG); }
224
225 /* Attributes */
226
227 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
228 <ATTR_KEY>.|{N} { return(LEX_ERROR_ATTR); }
229
230 <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; }
231 <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; }
232 <ATTR_VAL>.|{N} { return(LEX_ERROR_ATTR); }
233
234 /* Quoted strings */
235
236 <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
237 <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
238 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
239 <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
240 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
241 <DQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
242 <DQUOTED>{Uattrval}+ { append_string(yytext); }
243 <DQUOTED>.|{N} { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
244
245 <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
246 <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
247 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
248 <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
249 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
250 <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
251 <SQUOTED>{Uattrval}+ { append_string(yytext); }
252 <SQUOTED>.|{N} { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
253
254 /* End of file */
255
256 <<EOF>> { free(string); string=NULL; stringlen=stringused=0; BEGIN(INITIAL); return(LEX_EOF); }
257
258 %%
259
260
261 /*++++++++++++++++++++++++++++++++++++++
262 A function to call the callback function with the parameters needed.
263
264 int call_callback Returns 1 if the callback returned with an error.
265
266 const char *name The name of the tag.
267
268 int (*callback)() The callback function.
269
270 int type The type of tag (start and/or end).
271
272 int nattributes The number of attributes collected.
273
274 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
275 ++++++++++++++++++++++++++++++++++++++*/
276
277 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
278 {
279 switch(nattributes)
280 {
281 case 0: return (*callback)(name,type);
282 case 1: return (*callback)(name,type,attributes[0]);
283 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
284 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
285 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
286 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
287 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
288 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
289 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
290 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
291 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
292 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
293 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
294 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
295 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
296 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
297 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
298
299 default:
300 fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s' source code needs changing.\n",yylineno,name);
301 exit(1);
302 }
303 }
304
305
306 /*++++++++++++++++++++++++++++++++++++++
307 Parse the XML and call the functions for each tag as seen.
308
309 int ParseXML Returns 0 if OK or something else in case of an error.
310
311 FILE *file The file to parse.
312
313 xmltag **tags The array of pointers to tags for the top level.
314
315 int options A list of XML Parser options OR-ed together.
316 ++++++++++++++++++++++++++++++++++++++*/
317
318 int ParseXML(FILE *file,xmltag **tags,int options)
319 {
320 int yychar,i;
321
322 char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
323 int attribute=0;
324
325 int stackdepth=0,stackused=0;
326 xmltag ***tags_stack=NULL;
327 xmltag **tag_stack=NULL;
328 xmltag *tag=NULL;
329
330 /* The actual parser. */
331
332 xmlparse_options=options;
333
334 yyin=file;
335
336 yyrestart(yyin);
337
338 yylineno=1;
339
340 BEGIN(INITIAL);
341
342 do
343 {
344 yychar=yylex();
345
346 switch(yychar)
347 {
348 /* The start of a tag for an XML declaration */
349
350 case LEX_XML_DECL_BEGIN:
351
352 if(tag_stack)
353 {
354 fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno);
355 yychar=LEX_ERROR_XML_NOT_FIRST;
356 break;
357 }
358
359 /* The start of a tag for an element */
360
361 case LEX_TAG_BEGIN:
362
363 tag=NULL;
364
365 for(i=0;tags[i];i++)
366 if(!strcasecmp(yylval,tags[i]->name))
367 {
368 tag=tags[i];
369
370 for(i=0;i<tag->nattributes;i++)
371 if(attributes[i])
372 {
373 free(attributes[i]);
374 attributes[i]=NULL;
375 }
376
377 break;
378 }
379
380 if(tag==NULL)
381 {
382 fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval);
383 yychar=LEX_ERROR_UNEXP_TAG;
384 }
385
386 break;
387
388 /* The end of the start-tag for an element */
389
390 case LEX_TAG_PUSH:
391
392 if(stackused==stackdepth)
393 {
394 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
395 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
396 }
397
398 tag_stack [stackused]=tag;
399 tags_stack[stackused]=tags;
400 stackused++;
401
402 if(tag->callback)
403 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
404 yychar=LEX_ERROR_CALLBACK;
405
406 tags=tag->subtags;
407
408 break;
409
410 /* The end of the empty-element-tag for an XML declaration */
411
412 case LEX_XML_DECL_FINISH:
413
414 /* The end of the empty-element-tag for an element */
415
416 case LEX_TAG_FINISH:
417
418 if(tag->callback)
419 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
420 yychar=LEX_ERROR_CALLBACK;
421
422 if(stackused>0)
423 tag=tag_stack[stackused-1];
424 else
425 tag=NULL;
426
427 break;
428
429 /* The end of the end-tag for an element */
430
431 case LEX_TAG_POP:
432
433 stackused--;
434 tags=tags_stack[stackused];
435 tag =tag_stack [stackused];
436
437 if(strcmp(tag->name,yylval))
438 {
439 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name);
440 yychar=LEX_ERROR_UNBALANCED;
441 }
442
443 if(stackused<0)
444 {
445 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval);
446 yychar=LEX_ERROR_NO_START;
447 }
448
449 for(i=0;i<tag->nattributes;i++)
450 if(attributes[i])
451 {
452 free(attributes[i]);
453 attributes[i]=NULL;
454 }
455
456 if(tag->callback)
457 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
458 yychar=LEX_ERROR_CALLBACK;
459
460 if(stackused>0)
461 tag=tag_stack[stackused-1];
462 else
463 tag=NULL;
464
465 break;
466
467 /* An attribute key */
468
469 case LEX_ATTR_KEY:
470
471 attribute=-1;
472
473 for(i=0;i<tag->nattributes;i++)
474 if(!strcasecmp(yylval,tag->attributes[i]))
475 {
476 attribute=i;
477
478 break;
479 }
480
481 if(attribute==-1)
482 {
483 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
484 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':')))
485 {
486 fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
487 yychar=LEX_ERROR_UNEXP_ATT;
488 }
489 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
490 fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
491 }
492
493 break;
494
495 /* An attribute value */
496
497 case LEX_ATTR_VAL:
498
499 if(tag->callback && attribute!=-1 && yylval)
500 attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval);
501
502 break;
503
504 /* End of file */
505
506 case LEX_EOF:
507
508 if(tag)
509 {
510 fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name);
511 yychar=LEX_ERROR_UNEXP_EOF;
512 }
513
514 break;
515
516 case LEX_ERROR_TAG_START:
517 fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno);
518 break;
519
520 case LEX_ERROR_XML_DECL_START:
521 fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno);
522 break;
523
524 case LEX_ERROR_TAG:
525 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name);
526 break;
527
528 case LEX_ERROR_XML_DECL:
529 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name);
530 break;
531
532 case LEX_ERROR_ATTR:
533 fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno);
534 break;
535
536 case LEX_ERROR_END_TAG:
537 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno);
538 break;
539
540 case LEX_ERROR_COMMENT:
541 fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno);
542 break;
543
544 case LEX_ERROR_CLOSE:
545 fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno);
546 break;
547
548 case LEX_ERROR_ATTR_VAL:
549 fprintf(stderr,"XML Parser: Error on line %d: invalid character '%s' seen in attribute value.\n",yylineno,yylval);
550 break;
551
552 case LEX_ERROR_ENTITY_REF:
553 fprintf(stderr,"XML Parser: Error on line %d: invalid entity reference '%s' seen in attribute value.\n",yylineno,yylval);
554 break;
555
556 case LEX_ERROR_CHAR_REF:
557 fprintf(stderr,"XML Parser: Error on line %d: invalid character reference '%s' seen in attribute value.\n",yylineno,yylval);
558 break;
559 }
560 }
561 while(yychar>LEX_EOF && yychar<LEX_ERROR);
562
563 /* Delete the tagdata */
564
565 for(i=0;i<XMLPARSE_MAX_ATTRS;i++)
566 if(attributes[i])
567 free(attributes[i]);
568
569 if(stackdepth)
570 {
571 free(tag_stack);
572 free(tags_stack);
573 }
574
575 return(yychar);
576 }
577
578
579 /*++++++++++++++++++++++++++++++++++++++
580 Return the current parser line number.
581
582 unsigned long ParseXML_LineNumber Returns the line number.
583 ++++++++++++++++++++++++++++++++++++++*/
584
585 unsigned long ParseXML_LineNumber(void)
586 {
587 return(yylineno);
588 }
589
590
591 /*++++++++++++++++++++++++++++++++++++++
592 Convert an XML entity reference into an ASCII string.
593
594 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
595
596 const char *string The entity reference string.
597 ++++++++++++++++++++++++++++++++++++++*/
598
599 char *ParseXML_Decode_Entity_Ref(const char *string)
600 {
601 if(!strcmp(string,"&amp;")) return("&");
602 if(!strcmp(string,"&lt;")) return("<");
603 if(!strcmp(string,"&gt;")) return(">");
604 if(!strcmp(string,"&apos;")) return("'");
605 if(!strcmp(string,"&quot;")) return("\"");
606 return(NULL);
607 }
608
609
610 /*++++++++++++++++++++++++++++++++++++++
611 Convert an XML character reference into an ASCII string.
612
613 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
614
615 const char *string The character reference string.
616 ++++++++++++++++++++++++++++++++++++++*/
617
618 char *ParseXML_Decode_Char_Ref(const char *string)
619 {
620 static char result[2]=" ";
621 long int val;
622
623 if(string[2]=='x') val=strtol(string+3,NULL,16);
624 else val=strtol(string+2,NULL,10);
625
626 if(val<0 || val>255)
627 return(NULL);
628
629 result[0]=val&0xff;
630
631 return(result);
632 }
633
634
635 /*++++++++++++++++++++++++++++++++++++++
636 Convert a string into something that is safe to output in an XML file.
637
638 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
639
640 const char *string The string to convert.
641 ++++++++++++++++++++++++++++++++++++++*/
642
643 char *ParseXML_Encode_Safe_XML(const char *string)
644 {
645 static const char hexstring[17]="0123456789ABCDEF";
646 int i=0,j=0,len;
647 char *result;
648
649 for(i=0;string[i];i++)
650 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || string[i]>126)
651 break;
652
653 if(!string[i])
654 return((char*)string);
655
656 len=i+256-6;
657
658 result=(char*)malloc(len+7);
659 strncpy(result,string,j=i);
660
661 do
662 {
663 for(;j<len && string[i];i++)
664 if(string[i]=='<')
665 {
666 result[j++]='&';
667 result[j++]='l';
668 result[j++]='t';
669 result[j++]=';';
670 }
671 else if(string[i]=='>')
672 {
673 result[j++]='&';
674 result[j++]='g';
675 result[j++]='t';
676 result[j++]=';';
677 }
678 else if(string[i]=='&')
679 {
680 result[j++]='&';
681 result[j++]='a';
682 result[j++]='m';
683 result[j++]='p';
684 result[j++]=';';
685 }
686 else if(string[i]=='\'')
687 {
688 result[j++]='&';
689 result[j++]='a';
690 result[j++]='p';
691 result[j++]='o';
692 result[j++]='s';
693 result[j++]=';';
694 }
695 else if(string[i]=='"')
696 {
697 result[j++]='&';
698 result[j++]='q';
699 result[j++]='u';
700 result[j++]='o';
701 result[j++]='t';
702 result[j++]=';';
703 }
704 else if(string[i]<32 || string[i]>126)
705 {
706 result[j++]='&';
707 result[j++]='#';
708 result[j++]='x';
709 result[j++]=hexstring[(string[i]&0xf0)>>4];
710 result[j++]=hexstring[ string[i]&0x0f ];
711 result[j++]=';';
712 }
713 else
714 result[j++]=string[i];
715
716 if(string[i]) /* Not finished */
717 {
718 len+=256;
719 result=(char*)realloc((void*)result,len+7);
720 }
721 }
722 while(string[i]);
723
724 result[j]=0;
725
726 return(result);
727 }
728
729
730 /*++++++++++++++++++++++++++++++++++++++
731 Convert a string to a integer (checking that it really is a integer).
732
733 int ParseXML_GetInteger Returns 1 if a integer could be found or 0 otherwise.
734
735 const char *string The string to be parsed.
736
737 int *number Returns the number.
738 ++++++++++++++++++++++++++++++++++++++*/
739
740 int ParseXML_GetInteger(const char *string,int *number)
741 {
742 const char *p=string;
743
744 if(*p=='-' || *p=='+')
745 p++;
746
747 while(isdigit(*p))
748 p++;
749
750 if(*p)
751 return(0);
752
753 *number=atoi(string);
754
755 return(1);
756 }
757
758
759 /*++++++++++++++++++++++++++++++++++++++
760 Convert a string to a floating point number (checking that it really is a number).
761
762 int ParseXML_GetFloating Returns 1 if a number could be found or 0 otherwise.
763
764 const char *string The string to be parsed.
765
766 int *number Returns the number.
767 ++++++++++++++++++++++++++++++++++++++*/
768
769 int ParseXML_GetFloating(const char *string,double *number)
770 {
771 const char *p=string;
772
773 if(*p=='-' || *p=='+')
774 p++;
775
776 while(isdigit(*p) || *p=='.')
777 p++;
778
779 if(*p=='e' || *p=='E')
780 {
781 p++;
782
783 if(*p=='-' || *p=='+')
784 p++;
785
786 while(isdigit(*p))
787 p++;
788 }
789
790 if(*p)
791 return(0);
792
793 *number=atof(string);
794
795 return(1);
796 }

Properties

Name Value
cvs:description A simple generic XML parser.