Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /branches/2.4.1-dev/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1215 - (show annotations) (download) (as text)
Mon Dec 17 10:56:11 2012 UTC (12 years, 3 months ago) by amb
File MIME type: text/x-csrc
File size: 67729 byte(s)
Merge the remaining trunk changes into the 2.4.1 dev branch.

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2012 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <strings.h>
30
31 #include "xmlparse.h"
32
33
34 /* Parser states */
35
36 #define LEX_EOF 0
37
38 #define LEX_FUNC_TAG_BEGIN 1
39 #define LEX_FUNC_XML_DECL_BEGIN 2
40 #define LEX_FUNC_TAG_POP 3
41 #define LEX_FUNC_TAG_PUSH 4
42 #define LEX_FUNC_XML_DECL_FINISH 6
43 #define LEX_FUNC_TAG_FINISH 7
44 #define LEX_FUNC_ATTR_KEY 8
45 #define LEX_FUNC_ATTR_VAL 9
46
47 #define LEX_STATE_INITIAL 10
48 #define LEX_STATE_BANGTAG 11
49 #define LEX_STATE_COMMENT 12
50 #define LEX_STATE_XML_DECL_START 13
51 #define LEX_STATE_XML_DECL 14
52 #define LEX_STATE_TAG_START 15
53 #define LEX_STATE_TAG 16
54 #define LEX_STATE_ATTR_KEY 17
55 #define LEX_STATE_ATTR_VAL 18
56 #define LEX_STATE_END_TAG1 19
57 #define LEX_STATE_END_TAG2 20
58 #define LEX_STATE_DQUOTED 21
59 #define LEX_STATE_SQUOTED 22
60
61 #define LEX_ERROR_TAG_START 101
62 #define LEX_ERROR_XML_DECL_START 102
63 #define LEX_ERROR_TAG 103
64 #define LEX_ERROR_XML_DECL 104
65 #define LEX_ERROR_ATTR 105
66 #define LEX_ERROR_END_TAG 106
67 #define LEX_ERROR_COMMENT 107
68 #define LEX_ERROR_CLOSE 108
69 #define LEX_ERROR_ATTR_VAL 109
70 #define LEX_ERROR_ENTITY_REF 110
71 #define LEX_ERROR_CHAR_REF 111
72 #define LEX_ERROR_TEXT_OUTSIDE 112
73
74 #define LEX_ERROR_UNEXP_TAG 201
75 #define LEX_ERROR_UNBALANCED 202
76 #define LEX_ERROR_NO_START 203
77 #define LEX_ERROR_UNEXP_ATT 204
78 #define LEX_ERROR_UNEXP_EOF 205
79 #define LEX_ERROR_XML_NOT_FIRST 206
80
81 #define LEX_ERROR_OUT_OF_MEMORY 254
82 #define LEX_ERROR_CALLBACK 255
83
84
85 /* Parsing variables and functions */
86
87 static unsigned long long lineno;
88
89 static char buffer[2][16384];
90 static char *buffer_token,*buffer_end,*buffer_ptr;
91 static int buffer_active=0;
92
93
94 /*++++++++++++++++++++++++++++++++++++++
95 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
96
97 int buffer_refill Return 0 if everything is OK or 1 for EOF.
98
99 int fd The file descriptor to read from.
100 ++++++++++++++++++++++++++++++++++++++*/
101
102 static inline int buffer_refill(int fd)
103 {
104 ssize_t n,m=0;
105
106 m=(buffer_end-buffer[buffer_active])+1;
107
108 if(m>(sizeof(buffer[0])/2)) /* more than half full */
109 {
110 m=0;
111
112 buffer_active=!buffer_active;
113
114 if(buffer_token)
115 {
116 m=(buffer_end-buffer_token)+1;
117
118 memcpy(buffer[buffer_active],buffer_token,m);
119
120 buffer_token=buffer[buffer_active];
121 }
122 }
123
124 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
125
126 buffer_ptr=buffer[buffer_active]+m;
127 buffer_end=buffer[buffer_active]+m+n-1;
128
129 if(n<=0)
130 return(1);
131 else
132 return(0);
133 }
134
135 /* Macros to simplify the parser (and make it look more like lex) */
136
137 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
138 #define NEXT(xx) next_state=(xx)
139
140 #define START_TOKEN buffer_token=buffer_ptr
141 #define END_TOKEN buffer_token=NULL
142
143 #define NEXT_CHAR \
144 do{ \
145 if(buffer_ptr==buffer_end) \
146 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
147 else \
148 buffer_ptr++; \
149 } while(0)
150
151
152 /* -------- equivalent flex definition --------
153
154 S [ \t\r]
155 N (\n)
156
157 U1 [\x09\x0A\x0D\x20-\x7F]
158 U2 [\xC2-\xDF][\x80-\xBF]
159 U3a \xE0[\xA0-\xBF][\x80-\xBF]
160 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
161 U3c \xED[\x80-\x9F][\x80-\xBF]
162 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
163 U3 {U3a}|{U3b}|{U3c}|{U3d}
164 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
165 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
166 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
167 U4 {U4a}|{U4b}|{U4c}
168
169 U ({U1}|{U2}|{U3}|{U4})
170
171 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
172
173 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
174 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
175
176 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
177 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
178
179 letter [a-zA-Z]
180 digit [0-9]
181 xdigit [a-fA-F0-9]
182
183 namechar ({letter}|{digit}|[-._:])
184 namestart ({letter}|[_:])
185 name ({namestart}{namechar}*)
186
187 entityref (&{name};)
188 charref (&#({digit}+|x{xdigit}+);)
189
190 -------- equivalent flex definition -------- */
191
192 /* Tables containing character class defintions (advance declaration for data at end of file). */
193 static const unsigned char quotedD[256],quotedS[256];
194 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
195 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
196
197
198 /*++++++++++++++++++++++++++++++++++++++
199 A function to call the callback function with the parameters needed.
200
201 int call_callback Returns 1 if the callback returned with an error.
202
203 const char *name The name of the tag.
204
205 int (*callback)() The callback function.
206
207 int type The type of tag (start and/or end).
208
209 int nattributes The number of attributes collected.
210
211 char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
212 ++++++++++++++++++++++++++++++++++++++*/
213
214 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,char *attributes[XMLPARSE_MAX_ATTRS])
215 {
216 switch(nattributes)
217 {
218 case 0: return (*callback)(name,type);
219 case 1: return (*callback)(name,type,attributes[0]);
220 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
221 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
222 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
223 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
224 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
225 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
226 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
227 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
228 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
229 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
230 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
231 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
232 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
233 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
234 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
235
236 default:
237 fprintf(stderr,"XML Parser: Error on line %llu: too many attributes for tag '%s' source code needs changing.\n",lineno,name);
238 exit(1);
239 }
240 }
241
242
243 /*++++++++++++++++++++++++++++++++++++++
244 Parse the XML and call the functions for each tag as seen.
245
246 int ParseXML Returns 0 if OK or something else in case of an error.
247
248 in fd The file descriptor of the file to parse.
249
250 xmltag **tags The array of pointers to tags for the top level.
251
252 int options A list of XML Parser options OR-ed together.
253 ++++++++++++++++++++++++++++++++++++++*/
254
255 int ParseXML(int fd,xmltag **tags,int options)
256 {
257 int i;
258 int state,next_state,after_attr;
259 unsigned char saved_buffer_ptr=0;
260 const unsigned char *quoted;
261
262 char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
263 int attribute=0;
264
265 int stackdepth=0,stackused=0;
266 xmltag ***tags_stack=NULL;
267 xmltag **tag_stack=NULL;
268 xmltag *tag=NULL;
269
270 /* The actual parser. */
271
272 lineno=1;
273
274 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
275 buffer_token=NULL;
276
277 buffer_refill(fd);
278
279 BEGIN(LEX_STATE_INITIAL);
280
281 new_state:
282
283 switch(state)
284 {
285 /* ================ Parsing states ================ */
286
287
288 /* -------- equivalent flex definition --------
289
290 <INITIAL>"<!" { BEGIN(BANGTAG); }
291 <INITIAL>"</" { BEGIN(END_TAG1); }
292 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
293 <INITIAL>"<" { BEGIN(TAG_START); }
294
295 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
296
297 <INITIAL>{N} { lineno++; }
298 <INITIAL>{S}+ { }
299 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
300
301 -------- equivalent flex definition -------- */
302
303 case LEX_STATE_INITIAL:
304
305 while(1)
306 {
307 while(whitespace[(int)*(unsigned char*)buffer_ptr])
308 NEXT_CHAR;
309
310 if(*buffer_ptr=='\n')
311 {
312 NEXT_CHAR;
313
314 lineno++;
315 }
316 else if(*buffer_ptr=='<')
317 {
318 NEXT_CHAR;
319
320 if(*buffer_ptr=='/')
321 {
322 NEXT_CHAR;
323 BEGIN(LEX_STATE_END_TAG1);
324 }
325 else if(*buffer_ptr=='!')
326 {
327 NEXT_CHAR;
328 BEGIN(LEX_STATE_BANGTAG);
329 }
330 else if(*buffer_ptr=='?')
331 {
332 NEXT_CHAR;
333 BEGIN(LEX_STATE_XML_DECL_START);
334 }
335 else
336 BEGIN(LEX_STATE_TAG_START);
337 }
338 else if(*buffer_ptr=='>')
339 BEGIN(LEX_ERROR_CLOSE);
340 else
341 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
342 }
343
344 break;
345
346 /* -------- equivalent flex definition --------
347
348 <BANGTAG>"--" { BEGIN(COMMENT); }
349 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
350 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
351
352 -------- equivalent flex definition -------- */
353
354 case LEX_STATE_BANGTAG:
355
356 if(*buffer_ptr!='-')
357 BEGIN(LEX_ERROR_TAG_START);
358
359 NEXT_CHAR;
360
361 if(*buffer_ptr!='-')
362 BEGIN(LEX_ERROR_TAG_START);
363
364 NEXT_CHAR;
365 BEGIN(LEX_STATE_COMMENT);
366
367 break;
368
369 /* -------- equivalent flex definition --------
370
371 <COMMENT>"-->" { BEGIN(INITIAL); }
372 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
373 <COMMENT>"-" { }
374 <COMMENT>{N} { lineno++; }
375 <COMMENT>[^-\n]+ { }
376
377 -------- equivalent flex definition -------- */
378
379 case LEX_STATE_COMMENT:
380
381 while(1)
382 {
383 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
384 NEXT_CHAR;
385
386 if(*buffer_ptr=='-')
387 {
388 NEXT_CHAR;
389
390 if(*buffer_ptr!='-')
391 continue;
392
393 NEXT_CHAR;
394 if(*buffer_ptr=='>')
395 {
396 NEXT_CHAR;
397 BEGIN(LEX_STATE_INITIAL);
398 }
399
400 BEGIN(LEX_ERROR_COMMENT);
401 }
402 else /* if(*buffer_ptr=='\n') */
403 {
404 NEXT_CHAR;
405
406 lineno++;
407 }
408 }
409
410 break;
411
412 /* -------- equivalent flex definition --------
413
414 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
415 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
416 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
417
418 -------- equivalent flex definition -------- */
419
420 case LEX_STATE_XML_DECL_START:
421
422 START_TOKEN;
423
424 if(*buffer_ptr=='x')
425 {
426 NEXT_CHAR;
427 if(*buffer_ptr=='m')
428 {
429 NEXT_CHAR;
430 if(*buffer_ptr=='l')
431 {
432 NEXT_CHAR;
433
434 saved_buffer_ptr=*buffer_ptr;
435 *buffer_ptr=0;
436
437 NEXT(LEX_STATE_XML_DECL);
438 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
439 }
440 }
441 }
442
443 BEGIN(LEX_ERROR_XML_DECL_START);
444
445 /* -------- equivalent flex definition --------
446
447 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
448 <XML_DECL>{S}+ { }
449 <XML_DECL>{N} { lineno++; }
450 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
451 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
452
453 -------- equivalent flex definition -------- */
454
455 case LEX_STATE_XML_DECL:
456
457 while(1)
458 {
459 while(whitespace[(int)*(unsigned char*)buffer_ptr])
460 NEXT_CHAR;
461
462 if(namestart[(int)*(unsigned char*)buffer_ptr])
463 {
464 START_TOKEN;
465
466 NEXT_CHAR;
467 while(namechar[(int)*(unsigned char*)buffer_ptr])
468 NEXT_CHAR;
469
470 saved_buffer_ptr=*buffer_ptr;
471 *buffer_ptr=0;
472
473 after_attr=LEX_STATE_XML_DECL;
474 NEXT(LEX_STATE_ATTR_KEY);
475 BEGIN(LEX_FUNC_ATTR_KEY);
476 }
477 else if(*buffer_ptr=='?')
478 {
479 NEXT_CHAR;
480 if(*buffer_ptr=='>')
481 {
482 NEXT_CHAR;
483 NEXT(LEX_STATE_INITIAL);
484 BEGIN(LEX_FUNC_XML_DECL_FINISH);
485 }
486
487 BEGIN(LEX_ERROR_XML_DECL);
488 }
489 else if(*buffer_ptr=='\n')
490 {
491 NEXT_CHAR;
492 lineno++;
493 }
494 else
495 BEGIN(LEX_ERROR_XML_DECL);
496 }
497
498 break;
499
500 /* -------- equivalent flex definition --------
501
502 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
503 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
504 <TAG_START>. { return(LEX_ERROR_TAG_START); }
505
506 -------- equivalent flex definition -------- */
507
508 case LEX_STATE_TAG_START:
509
510 if(namestart[(int)*(unsigned char*)buffer_ptr])
511 {
512 START_TOKEN;
513
514 NEXT_CHAR;
515 while(namechar[(int)*(unsigned char*)buffer_ptr])
516 NEXT_CHAR;
517
518 saved_buffer_ptr=*buffer_ptr;
519 *buffer_ptr=0;
520
521 NEXT(LEX_STATE_TAG);
522 BEGIN(LEX_FUNC_TAG_BEGIN);
523 }
524
525 BEGIN(LEX_ERROR_TAG_START);
526
527 /* -------- equivalent flex definition --------
528
529 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
530 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
531 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
532
533 -------- equivalent flex definition -------- */
534
535 case LEX_STATE_END_TAG1:
536
537 if(namestart[(int)*(unsigned char*)buffer_ptr])
538 {
539 START_TOKEN;
540
541 NEXT_CHAR;
542 while(namechar[(int)*(unsigned char*)buffer_ptr])
543 NEXT_CHAR;
544
545 saved_buffer_ptr=*buffer_ptr;
546 *buffer_ptr=0;
547
548 NEXT(LEX_STATE_END_TAG2);
549 BEGIN(LEX_FUNC_TAG_POP);
550 }
551
552 BEGIN(LEX_ERROR_END_TAG);
553
554 /* -------- equivalent flex definition --------
555
556 <END_TAG2>">" { BEGIN(INITIAL); }
557 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
558 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
559
560 -------- equivalent flex definition -------- */
561
562 case LEX_STATE_END_TAG2:
563
564 if(*buffer_ptr=='>')
565 {
566 NEXT_CHAR;
567
568 BEGIN(LEX_STATE_INITIAL);
569 }
570
571 BEGIN(LEX_ERROR_END_TAG);
572
573 /* -------- equivalent flex definition --------
574
575 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
576 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
577 <TAG>{S}+ { }
578 <TAG>{N} { lineno++; }
579 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
580 <TAG>. { return(LEX_ERROR_TAG); }
581
582 -------- equivalent flex definition -------- */
583
584 case LEX_STATE_TAG:
585
586 while(1)
587 {
588 while(whitespace[(int)*(unsigned char*)buffer_ptr])
589 NEXT_CHAR;
590
591 if(namestart[(int)*(unsigned char*)buffer_ptr])
592 {
593 START_TOKEN;
594
595 NEXT_CHAR;
596 while(namechar[(int)*(unsigned char*)buffer_ptr])
597 NEXT_CHAR;
598
599 saved_buffer_ptr=*buffer_ptr;
600 *buffer_ptr=0;
601
602 after_attr=LEX_STATE_TAG;
603 NEXT(LEX_STATE_ATTR_KEY);
604 BEGIN(LEX_FUNC_ATTR_KEY);
605 }
606 else if(*buffer_ptr=='/')
607 {
608 NEXT_CHAR;
609 if(*buffer_ptr=='>')
610 {
611 NEXT_CHAR;
612 NEXT(LEX_STATE_INITIAL);
613 BEGIN(LEX_FUNC_TAG_FINISH);
614 }
615
616 BEGIN(LEX_ERROR_TAG);
617 }
618 else if(*buffer_ptr=='>')
619 {
620 NEXT_CHAR;
621 NEXT(LEX_STATE_INITIAL);
622 BEGIN(LEX_FUNC_TAG_PUSH);
623 }
624 else if(*buffer_ptr=='\n')
625 {
626 NEXT_CHAR;
627 lineno++;
628 }
629 else
630 BEGIN(LEX_ERROR_TAG);
631 }
632
633 break;
634
635 /* -------- equivalent flex definition --------
636
637 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
638 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
639 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
640
641 -------- equivalent flex definition -------- */
642
643 case LEX_STATE_ATTR_KEY:
644
645 if(*buffer_ptr=='=')
646 {
647 NEXT_CHAR;
648 BEGIN(LEX_STATE_ATTR_VAL);
649 }
650
651 BEGIN(LEX_ERROR_ATTR);
652
653 /* -------- equivalent flex definition --------
654
655 <ATTR_VAL>\" { BEGIN(DQUOTED); }
656 <ATTR_VAL>\' { BEGIN(SQUOTED); }
657 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
658 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
659
660 -------- equivalent flex definition -------- */
661
662 case LEX_STATE_ATTR_VAL:
663
664 if(*buffer_ptr=='"')
665 {
666 NEXT_CHAR;
667 BEGIN(LEX_STATE_DQUOTED);
668 }
669 else if(*buffer_ptr=='\'')
670 {
671 NEXT_CHAR;
672 BEGIN(LEX_STATE_SQUOTED);
673 }
674
675 BEGIN(LEX_ERROR_ATTR);
676
677 /* -------- equivalent flex definition --------
678
679 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
680 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
681 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
682 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
683 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
684 <DQUOTED>{UquotedD} { }
685 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
686 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
687
688 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
689 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
690 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
691 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
692 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
693 <SQUOTED>{UquotedS} { append_string(yytext); }
694 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
695 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
696
697 -------- equivalent flex definition -------- */
698
699 case LEX_STATE_DQUOTED:
700 case LEX_STATE_SQUOTED:
701
702 if(state==LEX_STATE_DQUOTED)
703 quoted=quotedD;
704 else
705 quoted=quotedS;
706
707 START_TOKEN;
708
709 while(1)
710 {
711 switch(quoted[(int)*(unsigned char*)buffer_ptr])
712 {
713 case 10: /* U1 - used by all tag keys and many values */
714 do
715 {
716 NEXT_CHAR;
717 }
718 while(quoted[(int)*(unsigned char*)buffer_ptr]==10);
719 break;
720
721 case 20: /* U2 */
722 NEXT_CHAR;
723 if(!U2[0][(int)*(unsigned char*)buffer_ptr])
724 BEGIN(LEX_ERROR_ATTR_VAL);
725 NEXT_CHAR;
726 break;
727
728 case 31: /* U3a */
729 NEXT_CHAR;
730 if(!U3a[0][(int)*(unsigned char*)buffer_ptr])
731 BEGIN(LEX_ERROR_ATTR_VAL);
732 NEXT_CHAR;
733 if(!U3a[1][(int)*(unsigned char*)buffer_ptr])
734 BEGIN(LEX_ERROR_ATTR_VAL);
735 NEXT_CHAR;
736 break;
737
738 case 32: /* U3b */
739 NEXT_CHAR;
740 if(!U3b[0][(int)*(unsigned char*)buffer_ptr])
741 BEGIN(LEX_ERROR_ATTR_VAL);
742 NEXT_CHAR;
743 if(!U3b[1][(int)*(unsigned char*)buffer_ptr])
744 BEGIN(LEX_ERROR_ATTR_VAL);
745 NEXT_CHAR;
746 break;
747
748 case 33: /* U3c */
749 NEXT_CHAR;
750 if(!U3c[0][(int)*(unsigned char*)buffer_ptr])
751 BEGIN(LEX_ERROR_ATTR_VAL);
752 NEXT_CHAR;
753 if(!U3c[1][(int)*(unsigned char*)buffer_ptr])
754 BEGIN(LEX_ERROR_ATTR_VAL);
755 NEXT_CHAR;
756 break;
757
758 case 34: /* U3d */
759 NEXT_CHAR;
760 if(!U3d[0][(int)*(unsigned char*)buffer_ptr])
761 BEGIN(LEX_ERROR_ATTR_VAL);
762 NEXT_CHAR;
763 if(!U3d[1][(int)*(unsigned char*)buffer_ptr])
764 BEGIN(LEX_ERROR_ATTR_VAL);
765 NEXT_CHAR;
766 break;
767
768 case 41: /* U4a */
769 NEXT_CHAR;
770 if(!U4a[0][(int)*(unsigned char*)buffer_ptr])
771 BEGIN(LEX_ERROR_ATTR_VAL);
772 NEXT_CHAR;
773 if(!U4a[1][(int)*(unsigned char*)buffer_ptr])
774 BEGIN(LEX_ERROR_ATTR_VAL);
775 NEXT_CHAR;
776 break;
777
778 case 42: /* U4b */
779 NEXT_CHAR;
780 if(!U4b[0][(int)*(unsigned char*)buffer_ptr])
781 BEGIN(LEX_ERROR_ATTR_VAL);
782 NEXT_CHAR;
783 if(!U4b[1][(int)*(unsigned char*)buffer_ptr])
784 BEGIN(LEX_ERROR_ATTR_VAL);
785 NEXT_CHAR;
786 break;
787
788 case 43: /* U4c */
789 NEXT_CHAR;
790 if(!U4c[0][(int)*(unsigned char*)buffer_ptr])
791 BEGIN(LEX_ERROR_ATTR_VAL);
792 NEXT_CHAR;
793 if(!U4c[1][(int)*(unsigned char*)buffer_ptr])
794 BEGIN(LEX_ERROR_ATTR_VAL);
795 NEXT_CHAR;
796 break;
797
798 case 50: /* entityref or charref */
799 NEXT_CHAR;
800
801 if(*buffer_ptr=='#') /* charref */
802 {
803 int charref_len=3;
804
805 NEXT_CHAR;
806 if(digit[(int)*(unsigned char*)buffer_ptr]) /* decimal */
807 {
808 NEXT_CHAR;
809 charref_len++;
810
811 while(digit[(int)*(unsigned char*)buffer_ptr])
812 {
813 NEXT_CHAR;
814 charref_len++;
815 }
816
817 if(*buffer_ptr!=';')
818 BEGIN(LEX_ERROR_ATTR_VAL);
819 }
820 else if(*buffer_ptr=='x') /* hex */
821 {
822 NEXT_CHAR;
823 charref_len++;
824
825 while(xdigit[(int)*(unsigned char*)buffer_ptr])
826 {
827 NEXT_CHAR;
828 charref_len++;
829 }
830
831 if(*buffer_ptr!=';')
832 BEGIN(LEX_ERROR_ATTR_VAL);
833 }
834 else /* other */
835 BEGIN(LEX_ERROR_ATTR_VAL);
836
837 NEXT_CHAR;
838
839 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
840 {
841 const char *str;
842
843 saved_buffer_ptr=*buffer_ptr;
844 *buffer_ptr=0;
845
846 str=ParseXML_Decode_Char_Ref(buffer_ptr-charref_len);
847
848 if(!str)
849 {
850 buffer_ptr-=charref_len;
851 BEGIN(LEX_ERROR_CHAR_REF);
852 }
853
854 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
855 memcpy(buffer_ptr-strlen(str),str,strlen(str));
856
857 *buffer_ptr=saved_buffer_ptr;
858 }
859 }
860 else if(namestart[(int)*(unsigned char*)buffer_ptr]) /* entityref */
861 {
862 int entityref_len=3;
863
864 NEXT_CHAR;
865 while(namechar[(int)*(unsigned char*)buffer_ptr])
866 {
867 NEXT_CHAR;
868 entityref_len++;
869 }
870
871 if(*buffer_ptr!=';')
872 BEGIN(LEX_ERROR_ATTR_VAL);
873
874 NEXT_CHAR;
875
876 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
877 {
878 const char *str;
879
880 saved_buffer_ptr=*buffer_ptr;
881 *buffer_ptr=0;
882
883 str=ParseXML_Decode_Entity_Ref(buffer_ptr-entityref_len);
884
885 if(!str)
886 {
887 buffer_ptr-=entityref_len;
888 BEGIN(LEX_ERROR_ENTITY_REF);
889 }
890
891 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
892 memcpy(buffer_ptr-strlen(str),str,strlen(str));
893
894 *buffer_ptr=saved_buffer_ptr;
895 }
896 }
897 else /* other */
898 BEGIN(LEX_ERROR_ATTR_VAL);
899
900 break;
901
902 case 99: /* quote */
903 *buffer_ptr=0;
904 NEXT_CHAR;
905
906 NEXT(after_attr);
907 BEGIN(LEX_FUNC_ATTR_VAL);
908
909 default: /* other */
910 BEGIN(LEX_ERROR_ATTR_VAL);
911 }
912 }
913
914 break;
915
916
917 /* ================ Functional states ================ */
918
919
920 /* The start of a tag for an XML declaration */
921
922 case LEX_FUNC_XML_DECL_BEGIN:
923
924 if(tag_stack)
925 BEGIN(LEX_ERROR_XML_NOT_FIRST);
926
927 /* The start of a tag for an element */
928
929 case LEX_FUNC_TAG_BEGIN:
930
931 tag=NULL;
932
933 for(i=0;tags[i];i++)
934 if(!strcasecmp(buffer_token,tags[i]->name))
935 {
936 tag=tags[i];
937
938 for(i=0;i<tag->nattributes;i++)
939 attributes[i]=NULL;
940
941 break;
942 }
943
944 if(tag==NULL)
945 BEGIN(LEX_ERROR_UNEXP_TAG);
946
947 END_TOKEN;
948
949 *buffer_ptr=saved_buffer_ptr;
950 BEGIN(next_state);
951
952 /* The end of the start-tag for an element */
953
954 case LEX_FUNC_TAG_PUSH:
955
956 if(stackused==stackdepth)
957 {
958 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
959 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
960 }
961
962 tag_stack [stackused]=tag;
963 tags_stack[stackused]=tags;
964 stackused++;
965
966 if(tag->callback)
967 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
968 BEGIN(LEX_ERROR_CALLBACK);
969
970 tags=tag->subtags;
971
972 BEGIN(next_state);
973
974 /* The end of the empty-element-tag for an XML declaration */
975
976 case LEX_FUNC_XML_DECL_FINISH:
977
978 /* The end of the empty-element-tag for an element */
979
980 case LEX_FUNC_TAG_FINISH:
981
982 if(tag->callback)
983 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
984 BEGIN(LEX_ERROR_CALLBACK);
985
986 if(stackused>0)
987 tag=tag_stack[stackused-1];
988 else
989 tag=NULL;
990
991 BEGIN(next_state);
992
993 /* The end of the end-tag for an element */
994
995 case LEX_FUNC_TAG_POP:
996
997 stackused--;
998 tags=tags_stack[stackused];
999 tag =tag_stack [stackused];
1000
1001 if(strcmp(tag->name,buffer_token))
1002 BEGIN(LEX_ERROR_UNBALANCED);
1003
1004 if(stackused<0)
1005 BEGIN(LEX_ERROR_NO_START);
1006
1007 for(i=0;i<tag->nattributes;i++)
1008 attributes[i]=NULL;
1009
1010 if(tag->callback)
1011 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1012 BEGIN(LEX_ERROR_CALLBACK);
1013
1014 if(stackused>0)
1015 tag=tag_stack[stackused-1];
1016 else
1017 tag=NULL;
1018
1019 END_TOKEN;
1020
1021 *buffer_ptr=saved_buffer_ptr;
1022 BEGIN(next_state);
1023
1024 /* An attribute key */
1025
1026 case LEX_FUNC_ATTR_KEY:
1027
1028 attribute=-1;
1029
1030 for(i=0;i<tag->nattributes;i++)
1031 if(!strcasecmp(buffer_token,tag->attributes[i]))
1032 {
1033 attribute=i;
1034
1035 break;
1036 }
1037
1038 if(attribute==-1)
1039 {
1040 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1041 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr(buffer_token,':')))
1042 BEGIN(LEX_ERROR_UNEXP_ATT);
1043 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1044 fprintf(stderr,"XML Parser: Warning on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1045 }
1046
1047 END_TOKEN;
1048
1049 *buffer_ptr=saved_buffer_ptr;
1050 BEGIN(next_state);
1051
1052 /* An attribute value */
1053
1054 case LEX_FUNC_ATTR_VAL:
1055
1056 if(tag->callback && attribute!=-1)
1057 attributes[attribute]=buffer_token;
1058
1059 END_TOKEN;
1060
1061 BEGIN(next_state);
1062
1063 /* End of file */
1064
1065 case LEX_EOF:
1066
1067 if(tag)
1068 BEGIN(LEX_ERROR_UNEXP_EOF);
1069
1070 break;
1071
1072
1073 /* ================ Error states ================ */
1074
1075
1076 case LEX_ERROR_TAG_START:
1077 fprintf(stderr,"XML Parser: Error on line %llu: character '<' seen not at start of tag.\n",lineno);
1078 break;
1079
1080 case LEX_ERROR_XML_DECL_START:
1081 fprintf(stderr,"XML Parser: Error on line %llu: characters '<?' seen not at start of XML declaration.\n",lineno);
1082 break;
1083
1084 case LEX_ERROR_TAG:
1085 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1086 break;
1087
1088 case LEX_ERROR_XML_DECL:
1089 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1090 break;
1091
1092 case LEX_ERROR_ATTR:
1093 fprintf(stderr,"XML Parser: Error on line %llu: invalid attribute definition seen in tag.\n",lineno);
1094 break;
1095
1096 case LEX_ERROR_END_TAG:
1097 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen in end-tag.\n",lineno);
1098 break;
1099
1100 case LEX_ERROR_COMMENT:
1101 fprintf(stderr,"XML Parser: Error on line %llu: invalid comment seen.\n",lineno);
1102 break;
1103
1104 case LEX_ERROR_CLOSE:
1105 fprintf(stderr,"XML Parser: Error on line %llu: character '>' seen not at end of tag.\n",lineno);
1106 break;
1107
1108 case LEX_ERROR_ATTR_VAL:
1109 fprintf(stderr,"XML Parser: Error on line %llu: invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1110 break;
1111
1112 case LEX_ERROR_ENTITY_REF:
1113 fprintf(stderr,"XML Parser: Error on line %llu: invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1114 break;
1115
1116 case LEX_ERROR_CHAR_REF:
1117 fprintf(stderr,"XML Parser: Error on line %llu: invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1118 break;
1119
1120 case LEX_ERROR_TEXT_OUTSIDE:
1121 fprintf(stderr,"XML Parser: Error on line %llu: non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1122 break;
1123
1124 case LEX_ERROR_UNEXP_TAG:
1125 fprintf(stderr,"XML Parser: Error on line %llu: unexpected tag '%s'.\n",lineno,buffer_token);
1126 break;
1127
1128 case LEX_ERROR_UNBALANCED:
1129 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1130 break;
1131
1132 case LEX_ERROR_NO_START:
1133 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1134 break;
1135
1136 case LEX_ERROR_UNEXP_ATT:
1137 fprintf(stderr,"XML Parser: Error on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1138 break;
1139
1140 case LEX_ERROR_UNEXP_EOF:
1141 fprintf(stderr,"XML Parser: Error on line %llu: end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1142 break;
1143
1144 case LEX_ERROR_XML_NOT_FIRST:
1145 fprintf(stderr,"XML Parser: Error on line %llu: XML declaration '<?xml...>' not before all other tags.\n",lineno);
1146 break;
1147 }
1148
1149 /* Delete the tagdata */
1150
1151 if(stackdepth)
1152 {
1153 free(tag_stack);
1154 free(tags_stack);
1155 }
1156
1157 return(state);
1158 }
1159
1160
1161 /*++++++++++++++++++++++++++++++++++++++
1162 Return the current parser line number.
1163
1164 unsigned long long ParseXML_LineNumber Returns the line number.
1165 ++++++++++++++++++++++++++++++++++++++*/
1166
1167 unsigned long long ParseXML_LineNumber(void)
1168 {
1169 return(lineno);
1170 }
1171
1172
1173 /*++++++++++++++++++++++++++++++++++++++
1174 Convert an XML entity reference into an ASCII string.
1175
1176 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1177
1178 const char *string The entity reference string.
1179 ++++++++++++++++++++++++++++++++++++++*/
1180
1181 char *ParseXML_Decode_Entity_Ref(const char *string)
1182 {
1183 if(!strcmp(string,"&amp;")) return("&");
1184 if(!strcmp(string,"&lt;")) return("<");
1185 if(!strcmp(string,"&gt;")) return(">");
1186 if(!strcmp(string,"&apos;")) return("'");
1187 if(!strcmp(string,"&quot;")) return("\"");
1188 return(NULL);
1189 }
1190
1191
1192 /*++++++++++++++++++++++++++++++++++++++
1193 Convert an XML character reference into an ASCII string.
1194
1195 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1196
1197 const char *string The character reference string.
1198 ++++++++++++++++++++++++++++++++++++++*/
1199
1200 char *ParseXML_Decode_Char_Ref(const char *string)
1201 {
1202 static char result[5]="";
1203 long int unicode;
1204
1205 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1206 else unicode=strtol(string+2,NULL,10);
1207
1208 if(unicode<0x80)
1209 {
1210 /* 0000 0000-0000 007F => 0xxxxxxx */
1211 result[0]=unicode;
1212 result[1]=0;
1213 }
1214 else if(unicode<0x07FF)
1215 {
1216 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1217 result[0]=0xC0+((unicode&0x07C0)>>6);
1218 result[1]=0x80+ (unicode&0x003F);
1219 result[2]=0;
1220 }
1221 else if(unicode<0xFFFF)
1222 {
1223 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1224 result[0]=0xE0+((unicode&0xF000)>>12);
1225 result[1]=0x80+((unicode&0x0FC0)>>6);
1226 result[2]=0x80+ (unicode&0x003F);
1227 result[3]=0;
1228 }
1229 else if(unicode<0x1FFFFF)
1230 {
1231 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1232 result[0]=0xF0+((unicode&0x1C0000)>>18);
1233 result[1]=0x80+((unicode&0x03F000)>>12);
1234 result[2]=0x80+((unicode&0x000FC0)>>6);
1235 result[3]=0x80+ (unicode&0x00003F);
1236 result[4]=0;
1237 }
1238 else
1239 {
1240 result[0]=0xFF;
1241 result[1]=0xFD;
1242 result[2]=0;
1243 }
1244
1245 return(result);
1246 }
1247
1248
1249 /*++++++++++++++++++++++++++++++++++++++
1250 Convert a string into something that is safe to output in an XML file.
1251
1252 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1253
1254 const char *string The string to convert.
1255 ++++++++++++++++++++++++++++++++++++++*/
1256
1257 char *ParseXML_Encode_Safe_XML(const char *string)
1258 {
1259 static const char hexstring[17]="0123456789ABCDEF";
1260 int i=0,j=0,len;
1261 char *result;
1262
1263 for(i=0;string[i];i++)
1264 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1265 break;
1266
1267 if(!string[i])
1268 return((char*)string);
1269
1270 len=i+256-6;
1271
1272 result=(char*)malloc(len+7);
1273 strncpy(result,string,j=i);
1274
1275 do
1276 {
1277 for(;j<len && string[i];i++)
1278 if(string[i]>=32 && (unsigned char)string[i]<=127)
1279 result[j++]=string[i];
1280 else if(string[i]=='\'')
1281 {
1282 result[j++]='&';
1283 result[j++]='a';
1284 result[j++]='p';
1285 result[j++]='o';
1286 result[j++]='s';
1287 result[j++]=';';
1288 }
1289 else if(string[i]=='&')
1290 {
1291 result[j++]='&';
1292 result[j++]='a';
1293 result[j++]='m';
1294 result[j++]='p';
1295 result[j++]=';';
1296 }
1297 else if(string[i]=='"')
1298 {
1299 result[j++]='&';
1300 result[j++]='q';
1301 result[j++]='u';
1302 result[j++]='o';
1303 result[j++]='t';
1304 result[j++]=';';
1305 }
1306 else if(string[i]=='<')
1307 {
1308 result[j++]='&';
1309 result[j++]='l';
1310 result[j++]='t';
1311 result[j++]=';';
1312 }
1313 else if(string[i]=='>')
1314 {
1315 result[j++]='&';
1316 result[j++]='g';
1317 result[j++]='t';
1318 result[j++]=';';
1319 }
1320 else
1321 {
1322 unsigned int unicode;
1323
1324 /* Decode the UTF-8 */
1325
1326 if((string[i]&0x80)==0)
1327 {
1328 /* 0000 0000-0000 007F => 0xxxxxxx */
1329 unicode=string[i];
1330 }
1331 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1332 {
1333 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1334 unicode =(string[i++]&0x1F)<<6;
1335 unicode|= string[i ]&0x3F;
1336 }
1337 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1338 {
1339 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1340 unicode =(string[i++]&0x0F)<<12;
1341 unicode|=(string[i++]&0x3F)<<6;
1342 unicode|= string[i ]&0x3F;
1343 }
1344 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1345 {
1346 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1347 unicode =(string[i++]&0x07)<<18;
1348 unicode|=(string[i++]&0x3F)<<12;
1349 unicode|=(string[i++]&0x3F)<<6;
1350 unicode|= string[i ]&0x3F;
1351 }
1352 else
1353 unicode=0xFFFD;
1354
1355 /* Output the character entity */
1356
1357 result[j++]='&';
1358 result[j++]='#';
1359 result[j++]='x';
1360
1361 if(unicode&0x00FF0000)
1362 {
1363 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1364 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1365 }
1366 if(unicode&0x00FFFF00)
1367 {
1368 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1369 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1370 }
1371 result[j++]=hexstring[(unicode&0xf0)>>4];
1372 result[j++]=hexstring[(unicode&0x0f) ];
1373
1374 result[j++]=';';
1375 }
1376
1377 if(string[i]) /* Not finished */
1378 {
1379 len+=256;
1380 result=(char*)realloc((void*)result,len+7);
1381 }
1382 }
1383 while(string[i]);
1384
1385 result[j]=0;
1386
1387 return(result);
1388 }
1389
1390
1391 /*++++++++++++++++++++++++++++++++++++++
1392 Check that a string really is an integer.
1393
1394 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1395
1396 const char *string The string to be parsed.
1397 ++++++++++++++++++++++++++++++++++++++*/
1398
1399 int ParseXML_IsInteger(const char *string)
1400 {
1401 const unsigned char *p=(unsigned char*)string;
1402
1403 if(*p=='-' || *p=='+')
1404 p++;
1405
1406 while(digit[(int)*p])
1407 p++;
1408
1409 if(*p)
1410 return(0);
1411 else
1412 return(1);
1413 }
1414
1415
1416 /*++++++++++++++++++++++++++++++++++++++
1417 Check that a string really is a floating point number.
1418
1419 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1420
1421 const char *string The string to be parsed.
1422 ++++++++++++++++++++++++++++++++++++++*/
1423
1424 int ParseXML_IsFloating(const char *string)
1425 {
1426 const unsigned char *p=(unsigned char*)string;
1427
1428 if(*p=='-' || *p=='+')
1429 p++;
1430
1431 while(digit[(int)*p] || *p=='.')
1432 p++;
1433
1434 if(*p=='e' || *p=='E')
1435 {
1436 p++;
1437
1438 if(*p=='-' || *p=='+')
1439 p++;
1440
1441 while(digit[*p])
1442 p++;
1443 }
1444
1445 if(*p)
1446 return(0);
1447 else
1448 return(1);
1449 }
1450
1451
1452 /* Table for checking for double-quoted characters. */
1453 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1455 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1456 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1457 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1458 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1459 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1460 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1465 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1466 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1467 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1468 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1469
1470 /* Table for checking for single-quoted characters. */
1471 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1473 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1474 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1475 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1476 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1477 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1478 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1483 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1484 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1485 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1486 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1487
1488 /* Table for checking for characters between 0x80 and 0x8f. */
1489 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1497 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1505
1506 /* Table for checking for characters between 0x80 and 0x9f. */
1507 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1515 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1523
1524 /* Table for checking for characters between 0x80 and 0xbf. */
1525 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1541
1542 /* Table for checking for characters between 0x90 and 0xbf. */
1543 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1552 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1559
1560 /* Table for checking for characters between 0xa0 and 0x9f. */
1561 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1571 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1577
1578 /* Table for checking for U2 characters. */
1579 static const unsigned char *U2[1]={ U_80_BF };
1580
1581 /* Table for checking for U3a characters. */
1582 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1583
1584 /* Table for checking for U3b characters. */
1585 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1586
1587 /* Table for checking for U3c characters. */
1588 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1589
1590 /* Table for checking for U3d characters. */
1591 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1592
1593 /* Table for checking for U4a characters. */
1594 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1595
1596 /* Table for checking for U4b characters. */
1597 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1598
1599 /* Table for checking for U4c characters. */
1600 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1601
1602 /* Table for checking for namestart characters. */
1603 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1606 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1607 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1608 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1609 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1610 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1619
1620 /* Table for checking for namechar characters. */
1621 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1625 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1627 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1628 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1637
1638 /* Table for checking for whitespace characters. */
1639 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1641 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1655
1656 /* Table for checking for digit characters. */
1657 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1660 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1673
1674 /* Table for checking for xdigit characters. */
1675 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1678 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1679 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1681 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */