Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.l

Parent Directory Parent Directory | Revision Log Revision Log


Revision 404 - (show annotations) (download)
Tue May 25 18:24:20 2010 UTC (14 years, 9 months ago) by amb
File size: 23797 byte(s)
Fix bug with encoding XML strings.

1 %{
2 /***************************************
3 $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.17 2010-05-25 18:24:20 amb Exp $
4
5 A simple generic XML parser where the structure comes from the function parameters.
6 Not intended to be fully conforming to XML staandard or a validating parser but
7 sufficient to parse OSM XML and simple program configuration files.
8
9 Part of the Routino routing software.
10 ******************/ /******************
11 This file Copyright 2010 Andrew M. Bishop
12
13 This program is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
22
23 You should have received a copy of the GNU Affero General Public License
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 ***************************************/
26
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <ctype.h>
31 #include <string.h>
32
33 #include "xmlparse.h"
34
35
36 /* Parser outputs */
37
38 #define LEX_EOF 0
39
40 #define LEX_TAG_BEGIN 1
41 #define LEX_XML_DECL_BEGIN 2
42 #define LEX_TAG_POP 3
43 #define LEX_TAG_PUSH 4
44 #define LEX_XML_DECL_FINISH 6
45 #define LEX_TAG_FINISH 7
46 #define LEX_ATTR_KEY 8
47 #define LEX_ATTR_VAL 9
48
49 #define LEX_ERROR 100
50
51 #define LEX_ERROR_TAG_START 101
52 #define LEX_ERROR_XML_DECL_START 102
53 #define LEX_ERROR_TAG 103
54 #define LEX_ERROR_XML_DECL 104
55 #define LEX_ERROR_ATTR 105
56 #define LEX_ERROR_END_TAG 106
57 #define LEX_ERROR_COMMENT 107
58 #define LEX_ERROR_CLOSE 108
59 #define LEX_ERROR_ATTR_VAL 109
60 #define LEX_ERROR_ENTITY_REF 110
61 #define LEX_ERROR_CHAR_REF 111
62
63 #define LEX_ERROR_UNEXP_TAG 201
64 #define LEX_ERROR_UNBALANCED 202
65 #define LEX_ERROR_NO_START 203
66 #define LEX_ERROR_UNEXP_ATT 204
67 #define LEX_ERROR_UNEXP_EOF 205
68 #define LEX_ERROR_XML_NOT_FIRST 206
69
70 #define LEX_ERROR_CALLBACK 255
71
72
73 /* Lexer definitions */
74
75 #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */
76 #ifndef yywrap
77 /*+ Needed in lex but does nothing. +*/
78 #define yywrap() 1
79 #endif
80
81 /*+ Reset the current string. +*/
82 #define reset_string \
83 if(!string) string=(char*)malloc(16); \
84 *string=0; \
85 stringused=0;
86
87 /*+ append information to the current string. +*/
88 #define append_string(xx) \
89 newlen=strlen(xx); \
90 if((stringused+newlen)>=stringlen) \
91 string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \
92 strcpy(string+stringused,xx); \
93 stringused+=newlen;
94
95 #define YY_NO_INPUT
96
97
98 /* Lexer functions and variables */
99
100 extern int yylex(void);
101
102 static char *yylval=NULL;
103
104 static int xmlparse_options;
105
106 %}
107
108 %option 8bit
109 %option pointer
110 %option batch
111 %option yylineno
112
113 %option nodefault
114 %option perf-report
115 %option fast
116 %option nounput
117
118
119 /* Grammar based on http://www.w3.org/TR/2004/REC-xml-20040204/ but for ASCII not Unicode. */
120
121 S [ \t\r\n]
122
123 letter [a-zA-Z]
124 digit [0-9]
125 xdigit [a-fA-F0-9]
126
127 namechar ({letter}|{digit}|[-._:])
128 name ({letter}|[_:]){namechar}*
129
130 entityref &{name};
131 charref &#({digit}+|x{xdigit}+);
132
133
134 %x COMMENT
135 %x CDATA
136 %x DOCTYPE
137 %x XML_DECL_START XML_DECL
138 %x TAG_START TAG
139 %x ATTR_KEY ATTR_VAL
140 %x END_TAG1 END_TAG2
141 %x DQUOTED SQUOTED
142
143 %%
144 /* Must use static variables since the parser returns often. */
145 static char *string=NULL;
146 static int stringlen=0,stringused=0;
147 static int after_attr=0;
148 int newlen;
149 int doctype_depth=0;
150
151 /* Handle top level entities */
152
153 "<!--" { BEGIN(COMMENT); }
154 "<![CDATA[" { BEGIN(CDATA); }
155 "<!DOCTYPE" { BEGIN(DOCTYPE); doctype_depth=0; }
156 "</" { BEGIN(END_TAG1); }
157 "<?" { BEGIN(XML_DECL_START); }
158 "<" { BEGIN(TAG_START); }
159 ">" { return(LEX_ERROR_CLOSE); }
160 [^<>]+ { }
161
162 /* Comments */
163
164 <COMMENT>"--->" { return(LEX_ERROR_COMMENT); }
165 <COMMENT>"-->" { BEGIN(INITIAL); }
166 <COMMENT>"--"[^->]+ { }
167 <COMMENT>[^-]+ { }
168 <COMMENT>"-" { }
169
170 /* CDATA */
171
172 <CDATA>"]]>" { BEGIN(INITIAL); }
173 <CDATA>"]" { }
174 <CDATA>[^]]+ { }
175
176 /* CDATA */
177
178 <DOCTYPE>"<" { doctype_depth++; }
179 <DOCTYPE>">" { if(doctype_depth==0) BEGIN(INITIAL); else doctype_depth--; }
180 <DOCTYPE>[^<>]+ { }
181
182 /* XML Declaration start */
183
184 <XML_DECL_START>{name} { BEGIN(XML_DECL); yylval=yytext; return(LEX_XML_DECL_BEGIN); }
185 <XML_DECL_START>.|\n { return(LEX_ERROR_XML_DECL_START); }
186
187 /* Tag middle */
188
189 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
190 <XML_DECL>{S}+ { }
191 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
192 <XML_DECL>.|\n { return(LEX_ERROR_XML_DECL); }
193
194 /* Any tag start */
195
196 <TAG_START>{name} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); }
197 <TAG_START>.|\n { return(LEX_ERROR_TAG_START); }
198
199 /* End-tag start */
200
201 <END_TAG1>{name} { BEGIN(END_TAG2); yylval=yytext; return(LEX_TAG_POP); }
202 <END_TAG1>.|\n { return(LEX_ERROR_END_TAG); }
203
204 <END_TAG2>">" { BEGIN(INITIAL); }
205 <END_TAG2>.|\n { return(LEX_ERROR_END_TAG); }
206
207 /* Any tag middle */
208
209 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
210 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
211 <TAG>{S}+ { }
212 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); }
213 <TAG>.|\n { return(LEX_ERROR_TAG); }
214
215 /* Attributes */
216
217 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
218 <ATTR_KEY>.|\n { return(LEX_ERROR_ATTR); }
219
220 <ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; }
221 <ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; }
222 <ATTR_VAL>.|\n { return(LEX_ERROR_ATTR); }
223
224 /* Quoted strings */
225
226 <DQUOTED>\" { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
227 <DQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
228 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
229 <DQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
230 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
231 <DQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
232 <DQUOTED>[^<>&\"]+ { append_string(yytext); }
233
234 <SQUOTED>\' { BEGIN(after_attr); yylval=string; return(LEX_ATTR_VAL); }
235 <SQUOTED>{entityref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
236 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_ENTITY_REF);} } }
237 <SQUOTED>{charref} { if(xmlparse_options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
238 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {yylval=yytext; return(LEX_ERROR_CHAR_REF);} } }
239 <SQUOTED>[<>&] { yylval=yytext; return(LEX_ERROR_ATTR_VAL); }
240 <SQUOTED>[^<>&\']+ { append_string(yytext); }
241
242 /* End of file */
243
244 <<EOF>> { free(string); string=NULL; stringlen=stringused=0; BEGIN(INITIAL); return(LEX_EOF); }
245
246 %%
247
248
249 /*++++++++++++++++++++++++++++++++++++++
250 A function to call the callback function with the parameters needed.
251
252 int call_callback Returns 1 if the callback returned with an error.
253
254 const char *name The name of the tag.
255
256 int (*callback)() The callback function.
257
258 int type The type of tag (start and/or end).
259
260 int nattributes The number of attributes collected.
261
262 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
263 ++++++++++++++++++++++++++++++++++++++*/
264
265 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
266 {
267 switch(nattributes)
268 {
269 case 0: return (*callback)(name,type);
270 case 1: return (*callback)(name,type,attributes[0]);
271 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
272 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
273 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
274 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
275 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
276 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
277 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
278 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
279 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
280 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
281 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
282 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
283 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
284 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
285 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
286
287 default:
288 fprintf(stderr,"XML Parser: Error on line %d: too many attributes for tag '%s' source code needs changing.\n",yylineno,name);
289 exit(1);
290 }
291 }
292
293
294 /*++++++++++++++++++++++++++++++++++++++
295 Parse the XML and call the functions for each tag as seen.
296
297 int ParseXML Returns 0 if OK or something else in case of an error.
298
299 FILE *file The file to parse.
300
301 xmltag **tags The array of pointers to tags for the top level.
302
303 int options A list of XML Parser options OR-ed together.
304 ++++++++++++++++++++++++++++++++++++++*/
305
306 int ParseXML(FILE *file,xmltag **tags,int options)
307 {
308 int yychar,i;
309
310 char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
311 int attribute=0;
312
313 int stackdepth=0,stackused=0;
314 xmltag ***tags_stack=NULL;
315 xmltag **tag_stack=NULL;
316 xmltag *tag=NULL;
317
318 /* The actual parser. */
319
320 xmlparse_options=options;
321
322 yyin=file;
323
324 yyrestart(yyin);
325
326 yylineno=1;
327
328 BEGIN(INITIAL);
329
330 do
331 {
332 yychar=yylex();
333
334 switch(yychar)
335 {
336 /* The start of a tag for an XML declaration */
337
338 case LEX_XML_DECL_BEGIN:
339
340 if(tag_stack)
341 {
342 fprintf(stderr,"XML Parser: Error on line %d: XML declaration not before all other tags.\n",yylineno);
343 yychar=LEX_ERROR_XML_NOT_FIRST;
344 break;
345 }
346
347 /* The start of a tag for an element */
348
349 case LEX_TAG_BEGIN:
350
351 tag=NULL;
352
353 for(i=0;tags[i];i++)
354 if(!strcasecmp(yylval,tags[i]->name))
355 {
356 tag=tags[i];
357
358 for(i=0;i<tag->nattributes;i++)
359 if(attributes[i])
360 {
361 free(attributes[i]);
362 attributes[i]=NULL;
363 }
364
365 break;
366 }
367
368 if(tag==NULL)
369 {
370 fprintf(stderr,"XML Parser: Error on line %d: unexpected tag '%s'.\n",yylineno,yylval);
371 yychar=LEX_ERROR_UNEXP_TAG;
372 }
373
374 break;
375
376 /* The end of the start-tag for an element */
377
378 case LEX_TAG_PUSH:
379
380 if(stackused==stackdepth)
381 {
382 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
383 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
384 }
385
386 tag_stack [stackused]=tag;
387 tags_stack[stackused]=tags;
388 stackused++;
389
390 if(tag->callback)
391 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
392 yychar=LEX_ERROR_CALLBACK;
393
394 tags=tag->subtags;
395
396 break;
397
398 /* The end of the empty-element-tag for an XML declaration */
399
400 case LEX_XML_DECL_FINISH:
401
402 /* The end of the empty-element-tag for an element */
403
404 case LEX_TAG_FINISH:
405
406 if(tag->callback)
407 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
408 yychar=LEX_ERROR_CALLBACK;
409
410 if(stackused>0)
411 tag=tag_stack[stackused-1];
412 else
413 tag=NULL;
414
415 break;
416
417 /* The end of the end-tag for an element */
418
419 case LEX_TAG_POP:
420
421 stackused--;
422 tags=tags_stack[stackused];
423 tag =tag_stack [stackused];
424
425 if(strcmp(tag->name,yylval))
426 {
427 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",yylineno,yylval,tag->name);
428 yychar=LEX_ERROR_UNBALANCED;
429 }
430
431 if(stackused<0)
432 {
433 fprintf(stderr,"XML Parser: Error on line %d: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",yylineno,yylval,yylval);
434 yychar=LEX_ERROR_NO_START;
435 }
436
437 for(i=0;i<tag->nattributes;i++)
438 if(attributes[i])
439 {
440 free(attributes[i]);
441 attributes[i]=NULL;
442 }
443
444 if(tag->callback)
445 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
446 yychar=LEX_ERROR_CALLBACK;
447
448 if(stackused>0)
449 tag=tag_stack[stackused-1];
450 else
451 tag=NULL;
452
453 break;
454
455 /* An attribute key */
456
457 case LEX_ATTR_KEY:
458
459 attribute=-1;
460
461 for(i=0;i<tag->nattributes;i++)
462 if(!strcasecmp(yylval,tag->attributes[i]))
463 {
464 attribute=i;
465
466 break;
467 }
468
469 if(attribute==-1)
470 {
471 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
472 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(yylval,':')))
473 {
474 fprintf(stderr,"XML Parser: Error on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
475 yychar=LEX_ERROR_UNEXP_ATT;
476 }
477 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
478 fprintf(stderr,"XML Parser: Warning on line %d: unexpected attribute '%s' for tag '%s'.\n",yylineno,yylval,tag->name);
479 }
480
481 break;
482
483 /* An attribute value */
484
485 case LEX_ATTR_VAL:
486
487 if(tag->callback && attribute!=-1 && yylval)
488 attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval);
489
490 break;
491
492 /* End of file */
493
494 case LEX_EOF:
495
496 if(tag)
497 {
498 fprintf(stderr,"XML Parser: Error on line %d: end of file seen without end tag '</%s>'.\n",yylineno,tag->name);
499 yychar=LEX_ERROR_UNEXP_EOF;
500 }
501
502 break;
503
504 case LEX_ERROR_TAG_START:
505 fprintf(stderr,"XML Parser: Error on line %d: character '<' seen not at start of tag.\n",yylineno);
506 break;
507
508 case LEX_ERROR_XML_DECL_START:
509 fprintf(stderr,"XML Parser: Error on line %d: characters '<?' seen not at start of XML declaration.\n",yylineno);
510 break;
511
512 case LEX_ERROR_TAG:
513 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside tag '<%s...>'.\n",yylineno,tag->name);
514 break;
515
516 case LEX_ERROR_XML_DECL:
517 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen inside XML declaration '<?%s...>'.\n",yylineno,tag->name);
518 break;
519
520 case LEX_ERROR_ATTR:
521 fprintf(stderr,"XML Parser: Error on line %d: invalid attribute definition seen in tag.\n",yylineno);
522 break;
523
524 case LEX_ERROR_END_TAG:
525 fprintf(stderr,"XML Parser: Error on line %d: invalid character seen in end-tag.\n",yylineno);
526 break;
527
528 case LEX_ERROR_COMMENT:
529 fprintf(stderr,"XML Parser: Error on line %d: invalid comment seen.\n",yylineno);
530 break;
531
532 case LEX_ERROR_CLOSE:
533 fprintf(stderr,"XML Parser: Error on line %d: character '>' seen not at end of tag.\n",yylineno);
534 break;
535
536 case LEX_ERROR_ATTR_VAL:
537 fprintf(stderr,"XML Parser: Error on line %d: invalid character '%s' seen in attribute value.\n",yylineno,yylval);
538 break;
539
540 case LEX_ERROR_ENTITY_REF:
541 fprintf(stderr,"XML Parser: Error on line %d: invalid entity reference '%s' seen in attribute value.\n",yylineno,yylval);
542 break;
543
544 case LEX_ERROR_CHAR_REF:
545 fprintf(stderr,"XML Parser: Error on line %d: invalid character reference '%s' seen in attribute value.\n",yylineno,yylval);
546 break;
547 }
548 }
549 while(yychar>LEX_EOF && yychar<LEX_ERROR);
550
551 /* Delete the tagdata */
552
553 for(i=0;i<XMLPARSE_MAX_ATTRS;i++)
554 if(attributes[i])
555 free(attributes[i]);
556
557 if(stackdepth)
558 {
559 free(tag_stack);
560 free(tags_stack);
561 }
562
563 return(yychar);
564 }
565
566
567 /*++++++++++++++++++++++++++++++++++++++
568 Return the current parser line number.
569
570 unsigned long ParseXML_LineNumber Returns the line number.
571 ++++++++++++++++++++++++++++++++++++++*/
572
573 unsigned long ParseXML_LineNumber(void)
574 {
575 return(yylineno);
576 }
577
578
579 /*++++++++++++++++++++++++++++++++++++++
580 Convert an XML entity reference into an ASCII string.
581
582 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
583
584 const char *string The entity reference string.
585 ++++++++++++++++++++++++++++++++++++++*/
586
587 char *ParseXML_Decode_Entity_Ref(const char *string)
588 {
589 if(!strcmp(string,"&amp;")) return("&");
590 if(!strcmp(string,"&lt;")) return("<");
591 if(!strcmp(string,"&gt;")) return(">");
592 if(!strcmp(string,"&apos;")) return("'");
593 if(!strcmp(string,"&quot;")) return("\"");
594 return(NULL);
595 }
596
597
598 /*++++++++++++++++++++++++++++++++++++++
599 Convert an XML character reference into an ASCII string.
600
601 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
602
603 const char *string The character reference string.
604 ++++++++++++++++++++++++++++++++++++++*/
605
606 char *ParseXML_Decode_Char_Ref(const char *string)
607 {
608 static char result[2]=" ";
609 long int val;
610
611 if(string[2]=='x') val=strtol(string+3,NULL,16);
612 else val=strtol(string+2,NULL,10);
613
614 if(val<0 || val>255)
615 return(NULL);
616
617 result[0]=val&0xff;
618
619 return(result);
620 }
621
622
623 /*++++++++++++++++++++++++++++++++++++++
624 Convert a string into something that is safe to output in an XML file.
625
626 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
627
628 const char *string The string to convert.
629 ++++++++++++++++++++++++++++++++++++++*/
630
631 char *ParseXML_Encode_Safe_XML(const char *string)
632 {
633 static const char hexstring[17]="0123456789ABCDEF";
634 int i=0,j=0,len;
635 char *result;
636
637 for(i=0;string[i];i++)
638 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || string[i]>126)
639 break;
640
641 if(!string[i])
642 return((char*)string);
643
644 len=i+256-6;
645
646 result=(char*)malloc(len+7);
647 strncpy(result,string,j=i);
648
649 do
650 {
651 for(;j<len && string[i];i++)
652 if(string[i]=='<')
653 {
654 result[j++]='&';
655 result[j++]='l';
656 result[j++]='t';
657 result[j++]=';';
658 }
659 else if(string[i]=='>')
660 {
661 result[j++]='&';
662 result[j++]='g';
663 result[j++]='t';
664 result[j++]=';';
665 }
666 else if(string[i]=='&')
667 {
668 result[j++]='&';
669 result[j++]='a';
670 result[j++]='m';
671 result[j++]='p';
672 result[j++]=';';
673 }
674 else if(string[i]=='\'')
675 {
676 result[j++]='&';
677 result[j++]='a';
678 result[j++]='p';
679 result[j++]='o';
680 result[j++]='s';
681 result[j++]=';';
682 }
683 else if(string[i]=='"')
684 {
685 result[j++]='&';
686 result[j++]='q';
687 result[j++]='u';
688 result[j++]='o';
689 result[j++]='t';
690 result[j++]=';';
691 }
692 else if(string[i]<32 || string[i]>126)
693 {
694 result[j++]='&';
695 result[j++]='#';
696 result[j++]='x';
697 result[j++]=hexstring[(string[i]&0xf0)>>4];
698 result[j++]=hexstring[ string[i]&0x0f ];
699 result[j++]=';';
700 }
701 else
702 result[j++]=string[i];
703
704 if(string[i]) /* Not finished */
705 {
706 len+=256;
707 result=(char*)realloc((void*)result,len+7);
708 }
709 }
710 while(string[i]);
711
712 result[j]=0;
713
714 return(result);
715 }
716
717
718 /*++++++++++++++++++++++++++++++++++++++
719 Convert a string to a integer (checking that it really is a integer).
720
721 int ParseXML_GetInteger Returns 1 if a integer could be found or 0 otherwise.
722
723 const char *string The string to be parsed.
724
725 int *number Returns the number.
726 ++++++++++++++++++++++++++++++++++++++*/
727
728 int ParseXML_GetInteger(const char *string,int *number)
729 {
730 const char *p=string;
731
732 if(*p=='-' || *p=='+')
733 p++;
734
735 while(isdigit(*p))
736 p++;
737
738 if(*p)
739 return(0);
740
741 *number=atoi(string);
742
743 return(1);
744 }
745
746
747 /*++++++++++++++++++++++++++++++++++++++
748 Convert a string to a floating point number (checking that it really is a number).
749
750 int ParseXML_GetFloating Returns 1 if a number could be found or 0 otherwise.
751
752 const char *string The string to be parsed.
753
754 int *number Returns the number.
755 ++++++++++++++++++++++++++++++++++++++*/
756
757 int ParseXML_GetFloating(const char *string,double *number)
758 {
759 const char *p=string;
760
761 if(*p=='-' || *p=='+')
762 p++;
763
764 while(isdigit(*p) || *p=='.')
765 p++;
766
767 if(*p=='e' || *p=='E')
768 {
769 p++;
770
771 if(*p=='-' || *p=='+')
772 p++;
773
774 while(isdigit(*p))
775 p++;
776 }
777
778 if(*p)
779 return(0);
780
781 *number=atof(string);
782
783 return(1);
784 }

Properties

Name Value
cvs:description A simple generic XML parser.