Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1272 - (show annotations) (download) (as text)
Sat Apr 13 10:34:35 2013 UTC (12 years ago) by amb
File MIME type: text/x-csrc
File size: 67283 byte(s)
Fix XML character quoting for characters within the 7-bit printable ASCII range
(bug reported by Dirk Eversmann).

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2013 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <string.h>
30 #include <strings.h>
31
32 #include "xmlparse.h"
33
34
35 /* Parser states */
36
37 #define LEX_EOF 0
38
39 #define LEX_FUNC_TAG_BEGIN 1
40 #define LEX_FUNC_XML_DECL_BEGIN 2
41 #define LEX_FUNC_TAG_POP 3
42 #define LEX_FUNC_TAG_PUSH 4
43 #define LEX_FUNC_XML_DECL_FINISH 5
44 #define LEX_FUNC_TAG_FINISH 6
45 #define LEX_FUNC_ATTR_KEY 7
46 #define LEX_FUNC_ATTR_VAL 8
47
48 #define LEX_STATE_INITIAL 10
49 #define LEX_STATE_BANGTAG 11
50 #define LEX_STATE_COMMENT 12
51 #define LEX_STATE_XML_DECL_START 13
52 #define LEX_STATE_XML_DECL 14
53 #define LEX_STATE_TAG_START 15
54 #define LEX_STATE_TAG 16
55 #define LEX_STATE_ATTR_KEY 17
56 #define LEX_STATE_ATTR_VAL 18
57 #define LEX_STATE_END_TAG1 19
58 #define LEX_STATE_END_TAG2 20
59 #define LEX_STATE_DQUOTED 21
60 #define LEX_STATE_SQUOTED 22
61
62 #define LEX_ERROR_TAG_START 101
63 #define LEX_ERROR_XML_DECL_START 102
64 #define LEX_ERROR_TAG 103
65 #define LEX_ERROR_XML_DECL 104
66 #define LEX_ERROR_ATTR 105
67 #define LEX_ERROR_END_TAG 106
68 #define LEX_ERROR_COMMENT 107
69 #define LEX_ERROR_CLOSE 108
70 #define LEX_ERROR_ATTR_VAL 109
71 #define LEX_ERROR_ENTITY_REF 110
72 #define LEX_ERROR_CHAR_REF 111
73 #define LEX_ERROR_TEXT_OUTSIDE 112
74
75 #define LEX_ERROR_UNEXP_TAG 201
76 #define LEX_ERROR_UNBALANCED 202
77 #define LEX_ERROR_NO_START 203
78 #define LEX_ERROR_UNEXP_ATT 204
79 #define LEX_ERROR_UNEXP_EOF 205
80 #define LEX_ERROR_XML_NOT_FIRST 206
81
82 #define LEX_ERROR_OUT_OF_MEMORY 254
83 #define LEX_ERROR_CALLBACK 255
84
85
86 /* Parsing variables and functions */
87
88 static uint64_t lineno;
89
90 static unsigned char buffer[2][16384];
91 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
92 static int buffer_active=0;
93
94
95 /*++++++++++++++++++++++++++++++++++++++
96 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
97
98 int buffer_refill Return 0 if everything is OK or 1 for EOF.
99
100 int fd The file descriptor to read from.
101 ++++++++++++++++++++++++++++++++++++++*/
102
103 static inline int buffer_refill(int fd)
104 {
105 ssize_t n,m=0;
106
107 m=(buffer_end-buffer[buffer_active])+1;
108
109 if(m>(sizeof(buffer[0])/2)) /* more than half full */
110 {
111 m=0;
112
113 buffer_active=!buffer_active;
114
115 if(buffer_token)
116 {
117 m=(buffer_end-buffer_token)+1;
118
119 memcpy(buffer[buffer_active],buffer_token,m);
120
121 buffer_token=buffer[buffer_active];
122 }
123 }
124
125 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
126
127 buffer_ptr=buffer[buffer_active]+m;
128 buffer_end=buffer[buffer_active]+m+n-1;
129
130 if(n<=0)
131 return(1);
132 else
133 return(0);
134 }
135
136
137 /* Macros to simplify the parser (and make it look more like lex) */
138
139 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
140 #define NEXT(xx) next_state=(xx)
141
142 #define START_TOKEN buffer_token=buffer_ptr
143 #define END_TOKEN buffer_token=NULL
144
145 #define NEXT_CHAR \
146 do{ \
147 if(buffer_ptr==buffer_end) \
148 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
149 else \
150 buffer_ptr++; \
151 } while(0)
152
153
154 /* -------- equivalent flex definition --------
155
156 S [ \t\r]
157 N (\n)
158
159 U1 [\x09\x0A\x0D\x20-\x7F]
160 U2 [\xC2-\xDF][\x80-\xBF]
161 U3a \xE0[\xA0-\xBF][\x80-\xBF]
162 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
163 U3c \xED[\x80-\x9F][\x80-\xBF]
164 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
165 U3 {U3a}|{U3b}|{U3c}|{U3d}
166 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
167 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
168 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
169 U4 {U4a}|{U4b}|{U4c}
170
171 U ({U1}|{U2}|{U3}|{U4})
172
173 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
174
175 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
176 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
177
178 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
179 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
180
181 letter [a-zA-Z]
182 digit [0-9]
183 xdigit [a-fA-F0-9]
184
185 namechar ({letter}|{digit}|[-._:])
186 namestart ({letter}|[_:])
187 name ({namestart}{namechar}*)
188
189 entityref (&{name};)
190 charref (&#({digit}+|x{xdigit}+);)
191
192 -------- equivalent flex definition -------- */
193
194 /* Tables containing character class defintions (advance declaration for data at end of file). */
195 static const unsigned char quotedD[256],quotedS[256];
196 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
197 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
198
199
200 /*++++++++++++++++++++++++++++++++++++++
201 A function to call the callback function with the parameters needed.
202
203 int call_callback Returns 1 if the callback returned with an error.
204
205 const char *name The name of the tag.
206
207 int (*callback)() The callback function.
208
209 int type The type of tag (start and/or end).
210
211 int nattributes The number of attributes collected.
212
213 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
214 ++++++++++++++++++++++++++++++++++++++*/
215
216 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
217 {
218 switch(nattributes)
219 {
220 case 0: return (*callback)(name,type);
221 case 1: return (*callback)(name,type,attributes[0]);
222 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
223 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
224 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
225 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
226 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
227 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
228 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
229 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
230 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
231 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
232 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
233 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
234 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
235 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
236 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
237
238 default:
239 fprintf(stderr,"XML Parser: Error on line %llu: too many attributes for tag '%s' source code needs changing.\n",lineno,name);
240 exit(1);
241 }
242 }
243
244
245 /*++++++++++++++++++++++++++++++++++++++
246 Parse the XML and call the functions for each tag as seen.
247
248 int ParseXML Returns 0 if OK or something else in case of an error.
249
250 in fd The file descriptor of the file to parse.
251
252 xmltag **tags The array of pointers to tags for the top level.
253
254 int options A list of XML Parser options OR-ed together.
255 ++++++++++++++++++++++++++++++++++++++*/
256
257 int ParseXML(int fd,xmltag **tags,int options)
258 {
259 int i;
260 int state,next_state,after_attr;
261 unsigned char saved_buffer_ptr=0;
262 const unsigned char *quoted;
263
264 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
265 int attribute=0;
266
267 int stackdepth=0,stackused=0;
268 xmltag ***tags_stack=NULL;
269 xmltag **tag_stack=NULL;
270 xmltag *tag=NULL;
271
272 /* The actual parser. */
273
274 lineno=1;
275
276 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
277 buffer_token=NULL;
278
279 buffer_refill(fd);
280
281 BEGIN(LEX_STATE_INITIAL);
282
283 new_state:
284
285 switch(state)
286 {
287 /* ================ Parsing states ================ */
288
289
290 /* -------- equivalent flex definition --------
291
292 <INITIAL>"<!" { BEGIN(BANGTAG); }
293 <INITIAL>"</" { BEGIN(END_TAG1); }
294 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
295 <INITIAL>"<" { BEGIN(TAG_START); }
296
297 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
298
299 <INITIAL>{N} { lineno++; }
300 <INITIAL>{S}+ { }
301 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
302
303 -------- equivalent flex definition -------- */
304
305 case LEX_STATE_INITIAL:
306
307 while(1)
308 {
309 while(whitespace[(int)*buffer_ptr])
310 NEXT_CHAR;
311
312 if(*buffer_ptr=='\n')
313 {
314 NEXT_CHAR;
315
316 lineno++;
317 }
318 else if(*buffer_ptr=='<')
319 {
320 NEXT_CHAR;
321
322 if(*buffer_ptr=='/')
323 {
324 NEXT_CHAR;
325 BEGIN(LEX_STATE_END_TAG1);
326 }
327 else if(*buffer_ptr=='!')
328 {
329 NEXT_CHAR;
330 BEGIN(LEX_STATE_BANGTAG);
331 }
332 else if(*buffer_ptr=='?')
333 {
334 NEXT_CHAR;
335 BEGIN(LEX_STATE_XML_DECL_START);
336 }
337 else
338 BEGIN(LEX_STATE_TAG_START);
339 }
340 else if(*buffer_ptr=='>')
341 BEGIN(LEX_ERROR_CLOSE);
342 else
343 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
344 }
345
346 break;
347
348 /* -------- equivalent flex definition --------
349
350 <BANGTAG>"--" { BEGIN(COMMENT); }
351 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
352 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
353
354 -------- equivalent flex definition -------- */
355
356 case LEX_STATE_BANGTAG:
357
358 if(*buffer_ptr!='-')
359 BEGIN(LEX_ERROR_TAG_START);
360
361 NEXT_CHAR;
362
363 if(*buffer_ptr!='-')
364 BEGIN(LEX_ERROR_TAG_START);
365
366 NEXT_CHAR;
367 BEGIN(LEX_STATE_COMMENT);
368
369 break;
370
371 /* -------- equivalent flex definition --------
372
373 <COMMENT>"-->" { BEGIN(INITIAL); }
374 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
375 <COMMENT>"-" { }
376 <COMMENT>{N} { lineno++; }
377 <COMMENT>[^-\n]+ { }
378
379 -------- equivalent flex definition -------- */
380
381 case LEX_STATE_COMMENT:
382
383 while(1)
384 {
385 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
386 NEXT_CHAR;
387
388 if(*buffer_ptr=='-')
389 {
390 NEXT_CHAR;
391
392 if(*buffer_ptr!='-')
393 continue;
394
395 NEXT_CHAR;
396 if(*buffer_ptr=='>')
397 {
398 NEXT_CHAR;
399 BEGIN(LEX_STATE_INITIAL);
400 }
401
402 BEGIN(LEX_ERROR_COMMENT);
403 }
404 else /* if(*buffer_ptr=='\n') */
405 {
406 NEXT_CHAR;
407
408 lineno++;
409 }
410 }
411
412 break;
413
414 /* -------- equivalent flex definition --------
415
416 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
417 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
418 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
419
420 -------- equivalent flex definition -------- */
421
422 case LEX_STATE_XML_DECL_START:
423
424 START_TOKEN;
425
426 if(*buffer_ptr=='x')
427 {
428 NEXT_CHAR;
429 if(*buffer_ptr=='m')
430 {
431 NEXT_CHAR;
432 if(*buffer_ptr=='l')
433 {
434 NEXT_CHAR;
435
436 saved_buffer_ptr=*buffer_ptr;
437 *buffer_ptr=0;
438
439 NEXT(LEX_STATE_XML_DECL);
440 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
441 }
442 }
443 }
444
445 BEGIN(LEX_ERROR_XML_DECL_START);
446
447 /* -------- equivalent flex definition --------
448
449 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
450 <XML_DECL>{S}+ { }
451 <XML_DECL>{N} { lineno++; }
452 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
453 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
454
455 -------- equivalent flex definition -------- */
456
457 case LEX_STATE_XML_DECL:
458
459 while(1)
460 {
461 while(whitespace[(int)*buffer_ptr])
462 NEXT_CHAR;
463
464 if(namestart[(int)*buffer_ptr])
465 {
466 START_TOKEN;
467
468 NEXT_CHAR;
469 while(namechar[(int)*buffer_ptr])
470 NEXT_CHAR;
471
472 saved_buffer_ptr=*buffer_ptr;
473 *buffer_ptr=0;
474
475 after_attr=LEX_STATE_XML_DECL;
476 NEXT(LEX_STATE_ATTR_KEY);
477 BEGIN(LEX_FUNC_ATTR_KEY);
478 }
479 else if(*buffer_ptr=='?')
480 {
481 NEXT_CHAR;
482 if(*buffer_ptr=='>')
483 {
484 NEXT_CHAR;
485 NEXT(LEX_STATE_INITIAL);
486 BEGIN(LEX_FUNC_XML_DECL_FINISH);
487 }
488
489 BEGIN(LEX_ERROR_XML_DECL);
490 }
491 else if(*buffer_ptr=='\n')
492 {
493 NEXT_CHAR;
494 lineno++;
495 }
496 else
497 BEGIN(LEX_ERROR_XML_DECL);
498 }
499
500 break;
501
502 /* -------- equivalent flex definition --------
503
504 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
505 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
506 <TAG_START>. { return(LEX_ERROR_TAG_START); }
507
508 -------- equivalent flex definition -------- */
509
510 case LEX_STATE_TAG_START:
511
512 if(namestart[(int)*buffer_ptr])
513 {
514 START_TOKEN;
515
516 NEXT_CHAR;
517 while(namechar[(int)*buffer_ptr])
518 NEXT_CHAR;
519
520 saved_buffer_ptr=*buffer_ptr;
521 *buffer_ptr=0;
522
523 NEXT(LEX_STATE_TAG);
524 BEGIN(LEX_FUNC_TAG_BEGIN);
525 }
526
527 BEGIN(LEX_ERROR_TAG_START);
528
529 /* -------- equivalent flex definition --------
530
531 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
532 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
533 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
534
535 -------- equivalent flex definition -------- */
536
537 case LEX_STATE_END_TAG1:
538
539 if(namestart[(int)*buffer_ptr])
540 {
541 START_TOKEN;
542
543 NEXT_CHAR;
544 while(namechar[(int)*buffer_ptr])
545 NEXT_CHAR;
546
547 saved_buffer_ptr=*buffer_ptr;
548 *buffer_ptr=0;
549
550 NEXT(LEX_STATE_END_TAG2);
551 BEGIN(LEX_FUNC_TAG_POP);
552 }
553
554 BEGIN(LEX_ERROR_END_TAG);
555
556 /* -------- equivalent flex definition --------
557
558 <END_TAG2>">" { BEGIN(INITIAL); }
559 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
560 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
561
562 -------- equivalent flex definition -------- */
563
564 case LEX_STATE_END_TAG2:
565
566 if(*buffer_ptr=='>')
567 {
568 NEXT_CHAR;
569
570 BEGIN(LEX_STATE_INITIAL);
571 }
572
573 BEGIN(LEX_ERROR_END_TAG);
574
575 /* -------- equivalent flex definition --------
576
577 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
578 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
579 <TAG>{S}+ { }
580 <TAG>{N} { lineno++; }
581 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
582 <TAG>. { return(LEX_ERROR_TAG); }
583
584 -------- equivalent flex definition -------- */
585
586 case LEX_STATE_TAG:
587
588 while(1)
589 {
590 while(whitespace[(int)*buffer_ptr])
591 NEXT_CHAR;
592
593 if(namestart[(int)*buffer_ptr])
594 {
595 START_TOKEN;
596
597 NEXT_CHAR;
598 while(namechar[(int)*buffer_ptr])
599 NEXT_CHAR;
600
601 saved_buffer_ptr=*buffer_ptr;
602 *buffer_ptr=0;
603
604 after_attr=LEX_STATE_TAG;
605 NEXT(LEX_STATE_ATTR_KEY);
606 BEGIN(LEX_FUNC_ATTR_KEY);
607 }
608 else if(*buffer_ptr=='/')
609 {
610 NEXT_CHAR;
611 if(*buffer_ptr=='>')
612 {
613 NEXT_CHAR;
614 NEXT(LEX_STATE_INITIAL);
615 BEGIN(LEX_FUNC_TAG_FINISH);
616 }
617
618 BEGIN(LEX_ERROR_TAG);
619 }
620 else if(*buffer_ptr=='>')
621 {
622 NEXT_CHAR;
623 NEXT(LEX_STATE_INITIAL);
624 BEGIN(LEX_FUNC_TAG_PUSH);
625 }
626 else if(*buffer_ptr=='\n')
627 {
628 NEXT_CHAR;
629 lineno++;
630 }
631 else
632 BEGIN(LEX_ERROR_TAG);
633 }
634
635 break;
636
637 /* -------- equivalent flex definition --------
638
639 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
640 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
641 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
642
643 -------- equivalent flex definition -------- */
644
645 case LEX_STATE_ATTR_KEY:
646
647 if(*buffer_ptr=='=')
648 {
649 NEXT_CHAR;
650 BEGIN(LEX_STATE_ATTR_VAL);
651 }
652
653 BEGIN(LEX_ERROR_ATTR);
654
655 /* -------- equivalent flex definition --------
656
657 <ATTR_VAL>\" { BEGIN(DQUOTED); }
658 <ATTR_VAL>\' { BEGIN(SQUOTED); }
659 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
660 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
661
662 -------- equivalent flex definition -------- */
663
664 case LEX_STATE_ATTR_VAL:
665
666 if(*buffer_ptr=='"')
667 {
668 NEXT_CHAR;
669 BEGIN(LEX_STATE_DQUOTED);
670 }
671 else if(*buffer_ptr=='\'')
672 {
673 NEXT_CHAR;
674 BEGIN(LEX_STATE_SQUOTED);
675 }
676
677 BEGIN(LEX_ERROR_ATTR);
678
679 /* -------- equivalent flex definition --------
680
681 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
682 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
683 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
684 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
685 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
686 <DQUOTED>{UquotedD} { }
687 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
688 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
689
690 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
691 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
692 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
693 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
694 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
695 <SQUOTED>{UquotedS} { append_string(yytext); }
696 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
697 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
698
699 -------- equivalent flex definition -------- */
700
701 case LEX_STATE_DQUOTED:
702 case LEX_STATE_SQUOTED:
703
704 if(state==LEX_STATE_DQUOTED)
705 quoted=quotedD;
706 else
707 quoted=quotedS;
708
709 START_TOKEN;
710
711 while(1)
712 {
713 switch(quoted[(int)*buffer_ptr])
714 {
715 case 10: /* U1 - used by all tag keys and many values */
716 do
717 {
718 NEXT_CHAR;
719 }
720 while(quoted[(int)*buffer_ptr]==10);
721 break;
722
723 case 20: /* U2 */
724 NEXT_CHAR;
725 if(!U2[0][(int)*buffer_ptr])
726 BEGIN(LEX_ERROR_ATTR_VAL);
727 NEXT_CHAR;
728 break;
729
730 case 31: /* U3a */
731 NEXT_CHAR;
732 if(!U3a[0][(int)*buffer_ptr])
733 BEGIN(LEX_ERROR_ATTR_VAL);
734 NEXT_CHAR;
735 if(!U3a[1][(int)*buffer_ptr])
736 BEGIN(LEX_ERROR_ATTR_VAL);
737 NEXT_CHAR;
738 break;
739
740 case 32: /* U3b */
741 NEXT_CHAR;
742 if(!U3b[0][(int)*buffer_ptr])
743 BEGIN(LEX_ERROR_ATTR_VAL);
744 NEXT_CHAR;
745 if(!U3b[1][(int)*buffer_ptr])
746 BEGIN(LEX_ERROR_ATTR_VAL);
747 NEXT_CHAR;
748 break;
749
750 case 33: /* U3c */
751 NEXT_CHAR;
752 if(!U3c[0][(int)*buffer_ptr])
753 BEGIN(LEX_ERROR_ATTR_VAL);
754 NEXT_CHAR;
755 if(!U3c[1][(int)*buffer_ptr])
756 BEGIN(LEX_ERROR_ATTR_VAL);
757 NEXT_CHAR;
758 break;
759
760 case 34: /* U3d */
761 NEXT_CHAR;
762 if(!U3d[0][(int)*buffer_ptr])
763 BEGIN(LEX_ERROR_ATTR_VAL);
764 NEXT_CHAR;
765 if(!U3d[1][(int)*buffer_ptr])
766 BEGIN(LEX_ERROR_ATTR_VAL);
767 NEXT_CHAR;
768 break;
769
770 case 41: /* U4a */
771 NEXT_CHAR;
772 if(!U4a[0][(int)*buffer_ptr])
773 BEGIN(LEX_ERROR_ATTR_VAL);
774 NEXT_CHAR;
775 if(!U4a[1][(int)*buffer_ptr])
776 BEGIN(LEX_ERROR_ATTR_VAL);
777 NEXT_CHAR;
778 break;
779
780 case 42: /* U4b */
781 NEXT_CHAR;
782 if(!U4b[0][(int)*buffer_ptr])
783 BEGIN(LEX_ERROR_ATTR_VAL);
784 NEXT_CHAR;
785 if(!U4b[1][(int)*buffer_ptr])
786 BEGIN(LEX_ERROR_ATTR_VAL);
787 NEXT_CHAR;
788 break;
789
790 case 43: /* U4c */
791 NEXT_CHAR;
792 if(!U4c[0][(int)*buffer_ptr])
793 BEGIN(LEX_ERROR_ATTR_VAL);
794 NEXT_CHAR;
795 if(!U4c[1][(int)*buffer_ptr])
796 BEGIN(LEX_ERROR_ATTR_VAL);
797 NEXT_CHAR;
798 break;
799
800 case 50: /* entityref or charref */
801 NEXT_CHAR;
802
803 if(*buffer_ptr=='#') /* charref */
804 {
805 int charref_len=3;
806
807 NEXT_CHAR;
808 if(digit[(int)*buffer_ptr]) /* decimal */
809 {
810 NEXT_CHAR;
811 charref_len++;
812
813 while(digit[(int)*buffer_ptr])
814 {
815 NEXT_CHAR;
816 charref_len++;
817 }
818
819 if(*buffer_ptr!=';')
820 BEGIN(LEX_ERROR_ATTR_VAL);
821 }
822 else if(*buffer_ptr=='x') /* hex */
823 {
824 NEXT_CHAR;
825 charref_len++;
826
827 while(xdigit[(int)*buffer_ptr])
828 {
829 NEXT_CHAR;
830 charref_len++;
831 }
832
833 if(*buffer_ptr!=';')
834 BEGIN(LEX_ERROR_ATTR_VAL);
835 }
836 else /* other */
837 BEGIN(LEX_ERROR_ATTR_VAL);
838
839 NEXT_CHAR;
840
841 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
842 {
843 const char *str;
844
845 saved_buffer_ptr=*buffer_ptr;
846 *buffer_ptr=0;
847
848 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
849
850 if(!str)
851 {
852 buffer_ptr-=charref_len;
853 BEGIN(LEX_ERROR_CHAR_REF);
854 }
855
856 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
857 memcpy(buffer_ptr-strlen(str),str,strlen(str));
858
859 *buffer_ptr=saved_buffer_ptr;
860 }
861 }
862 else if(namestart[(int)*buffer_ptr]) /* entityref */
863 {
864 int entityref_len=3;
865
866 NEXT_CHAR;
867 while(namechar[(int)*buffer_ptr])
868 {
869 NEXT_CHAR;
870 entityref_len++;
871 }
872
873 if(*buffer_ptr!=';')
874 BEGIN(LEX_ERROR_ATTR_VAL);
875
876 NEXT_CHAR;
877
878 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
879 {
880 const char *str;
881
882 saved_buffer_ptr=*buffer_ptr;
883 *buffer_ptr=0;
884
885 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
886
887 if(!str)
888 {
889 buffer_ptr-=entityref_len;
890 BEGIN(LEX_ERROR_ENTITY_REF);
891 }
892
893 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
894 memcpy(buffer_ptr-strlen(str),str,strlen(str));
895
896 *buffer_ptr=saved_buffer_ptr;
897 }
898 }
899 else /* other */
900 BEGIN(LEX_ERROR_ATTR_VAL);
901
902 break;
903
904 case 99: /* quote */
905 *buffer_ptr=0;
906 NEXT_CHAR;
907
908 NEXT(after_attr);
909 BEGIN(LEX_FUNC_ATTR_VAL);
910
911 default: /* other */
912 BEGIN(LEX_ERROR_ATTR_VAL);
913 }
914 }
915
916 break;
917
918
919 /* ================ Functional states ================ */
920
921
922 /* The start of a tag for an XML declaration */
923
924 case LEX_FUNC_XML_DECL_BEGIN:
925
926 if(tag_stack)
927 BEGIN(LEX_ERROR_XML_NOT_FIRST);
928
929 /* The start of a tag for an element */
930
931 case LEX_FUNC_TAG_BEGIN:
932
933 tag=NULL;
934
935 for(i=0;tags[i];i++)
936 if(!strcasecmp((char*)buffer_token,tags[i]->name))
937 {
938 tag=tags[i];
939
940 for(i=0;i<tag->nattributes;i++)
941 attributes[i]=NULL;
942
943 break;
944 }
945
946 if(tag==NULL)
947 BEGIN(LEX_ERROR_UNEXP_TAG);
948
949 END_TOKEN;
950
951 *buffer_ptr=saved_buffer_ptr;
952 BEGIN(next_state);
953
954 /* The end of the start-tag for an element */
955
956 case LEX_FUNC_TAG_PUSH:
957
958 if(stackused==stackdepth)
959 {
960 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
961 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
962 }
963
964 tag_stack [stackused]=tag;
965 tags_stack[stackused]=tags;
966 stackused++;
967
968 if(tag->callback)
969 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
970 BEGIN(LEX_ERROR_CALLBACK);
971
972 tags=tag->subtags;
973
974 BEGIN(next_state);
975
976 /* The end of the empty-element-tag for an XML declaration */
977
978 case LEX_FUNC_XML_DECL_FINISH:
979
980 /* The end of the empty-element-tag for an element */
981
982 case LEX_FUNC_TAG_FINISH:
983
984 if(tag->callback)
985 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
986 BEGIN(LEX_ERROR_CALLBACK);
987
988 if(stackused>0)
989 tag=tag_stack[stackused-1];
990 else
991 tag=NULL;
992
993 BEGIN(next_state);
994
995 /* The end of the end-tag for an element */
996
997 case LEX_FUNC_TAG_POP:
998
999 stackused--;
1000 tags=tags_stack[stackused];
1001 tag =tag_stack [stackused];
1002
1003 if(strcmp((char*)buffer_token,tag->name))
1004 BEGIN(LEX_ERROR_UNBALANCED);
1005
1006 if(stackused<0)
1007 BEGIN(LEX_ERROR_NO_START);
1008
1009 for(i=0;i<tag->nattributes;i++)
1010 attributes[i]=NULL;
1011
1012 if(tag->callback)
1013 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1014 BEGIN(LEX_ERROR_CALLBACK);
1015
1016 if(stackused>0)
1017 tag=tag_stack[stackused-1];
1018 else
1019 tag=NULL;
1020
1021 END_TOKEN;
1022
1023 *buffer_ptr=saved_buffer_ptr;
1024 BEGIN(next_state);
1025
1026 /* An attribute key */
1027
1028 case LEX_FUNC_ATTR_KEY:
1029
1030 attribute=-1;
1031
1032 for(i=0;i<tag->nattributes;i++)
1033 if(!strcasecmp((char*)buffer_token,tag->attributes[i]))
1034 {
1035 attribute=i;
1036
1037 break;
1038 }
1039
1040 if(attribute==-1)
1041 {
1042 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1043 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1044 BEGIN(LEX_ERROR_UNEXP_ATT);
1045 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1046 fprintf(stderr,"XML Parser: Warning on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1047 }
1048
1049 END_TOKEN;
1050
1051 *buffer_ptr=saved_buffer_ptr;
1052 BEGIN(next_state);
1053
1054 /* An attribute value */
1055
1056 case LEX_FUNC_ATTR_VAL:
1057
1058 if(tag->callback && attribute!=-1)
1059 attributes[attribute]=buffer_token;
1060
1061 END_TOKEN;
1062
1063 BEGIN(next_state);
1064
1065 /* End of file */
1066
1067 case LEX_EOF:
1068
1069 if(tag)
1070 BEGIN(LEX_ERROR_UNEXP_EOF);
1071
1072 break;
1073
1074
1075 /* ================ Error states ================ */
1076
1077
1078 case LEX_ERROR_TAG_START:
1079 fprintf(stderr,"XML Parser: Error on line %llu: character '<' seen not at start of tag.\n",lineno);
1080 break;
1081
1082 case LEX_ERROR_XML_DECL_START:
1083 fprintf(stderr,"XML Parser: Error on line %llu: characters '<?' seen not at start of XML declaration.\n",lineno);
1084 break;
1085
1086 case LEX_ERROR_TAG:
1087 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1088 break;
1089
1090 case LEX_ERROR_XML_DECL:
1091 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1092 break;
1093
1094 case LEX_ERROR_ATTR:
1095 fprintf(stderr,"XML Parser: Error on line %llu: invalid attribute definition seen in tag.\n",lineno);
1096 break;
1097
1098 case LEX_ERROR_END_TAG:
1099 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen in end-tag.\n",lineno);
1100 break;
1101
1102 case LEX_ERROR_COMMENT:
1103 fprintf(stderr,"XML Parser: Error on line %llu: invalid comment seen.\n",lineno);
1104 break;
1105
1106 case LEX_ERROR_CLOSE:
1107 fprintf(stderr,"XML Parser: Error on line %llu: character '>' seen not at end of tag.\n",lineno);
1108 break;
1109
1110 case LEX_ERROR_ATTR_VAL:
1111 fprintf(stderr,"XML Parser: Error on line %llu: invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1112 break;
1113
1114 case LEX_ERROR_ENTITY_REF:
1115 fprintf(stderr,"XML Parser: Error on line %llu: invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1116 break;
1117
1118 case LEX_ERROR_CHAR_REF:
1119 fprintf(stderr,"XML Parser: Error on line %llu: invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1120 break;
1121
1122 case LEX_ERROR_TEXT_OUTSIDE:
1123 fprintf(stderr,"XML Parser: Error on line %llu: non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1124 break;
1125
1126 case LEX_ERROR_UNEXP_TAG:
1127 fprintf(stderr,"XML Parser: Error on line %llu: unexpected tag '%s'.\n",lineno,buffer_token);
1128 break;
1129
1130 case LEX_ERROR_UNBALANCED:
1131 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1132 break;
1133
1134 case LEX_ERROR_NO_START:
1135 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1136 break;
1137
1138 case LEX_ERROR_UNEXP_ATT:
1139 fprintf(stderr,"XML Parser: Error on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1140 break;
1141
1142 case LEX_ERROR_UNEXP_EOF:
1143 fprintf(stderr,"XML Parser: Error on line %llu: end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1144 break;
1145
1146 case LEX_ERROR_XML_NOT_FIRST:
1147 fprintf(stderr,"XML Parser: Error on line %llu: XML declaration '<?xml...>' not before all other tags.\n",lineno);
1148 break;
1149 }
1150
1151 /* Delete the tagdata */
1152
1153 if(stackdepth)
1154 {
1155 free(tag_stack);
1156 free(tags_stack);
1157 }
1158
1159 return(state);
1160 }
1161
1162
1163 /*++++++++++++++++++++++++++++++++++++++
1164 Return the current parser line number.
1165
1166 uint64_t ParseXML_LineNumber Returns the line number.
1167 ++++++++++++++++++++++++++++++++++++++*/
1168
1169 uint64_t ParseXML_LineNumber(void)
1170 {
1171 return(lineno);
1172 }
1173
1174
1175 /*++++++++++++++++++++++++++++++++++++++
1176 Convert an XML entity reference into an ASCII string.
1177
1178 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1179
1180 const char *string The entity reference string.
1181 ++++++++++++++++++++++++++++++++++++++*/
1182
1183 char *ParseXML_Decode_Entity_Ref(const char *string)
1184 {
1185 if(!strcmp(string,"&amp;")) return("&");
1186 if(!strcmp(string,"&lt;")) return("<");
1187 if(!strcmp(string,"&gt;")) return(">");
1188 if(!strcmp(string,"&apos;")) return("'");
1189 if(!strcmp(string,"&quot;")) return("\"");
1190 return(NULL);
1191 }
1192
1193
1194 /*++++++++++++++++++++++++++++++++++++++
1195 Convert an XML character reference into an ASCII string.
1196
1197 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1198
1199 const char *string The character reference string.
1200 ++++++++++++++++++++++++++++++++++++++*/
1201
1202 char *ParseXML_Decode_Char_Ref(const char *string)
1203 {
1204 static char result[5]="";
1205 long int unicode;
1206
1207 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1208 else unicode=strtol(string+2,NULL,10);
1209
1210 if(unicode<0x80)
1211 {
1212 /* 0000 0000-0000 007F => 0xxxxxxx */
1213 result[0]=unicode;
1214 result[1]=0;
1215 }
1216 else if(unicode<0x07FF)
1217 {
1218 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1219 result[0]=0xC0+((unicode&0x07C0)>>6);
1220 result[1]=0x80+ (unicode&0x003F);
1221 result[2]=0;
1222 }
1223 else if(unicode<0xFFFF)
1224 {
1225 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1226 result[0]=0xE0+((unicode&0xF000)>>12);
1227 result[1]=0x80+((unicode&0x0FC0)>>6);
1228 result[2]=0x80+ (unicode&0x003F);
1229 result[3]=0;
1230 }
1231 else if(unicode<0x1FFFFF)
1232 {
1233 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1234 result[0]=0xF0+((unicode&0x1C0000)>>18);
1235 result[1]=0x80+((unicode&0x03F000)>>12);
1236 result[2]=0x80+((unicode&0x000FC0)>>6);
1237 result[3]=0x80+ (unicode&0x00003F);
1238 result[4]=0;
1239 }
1240 else
1241 {
1242 result[0]=0xFF;
1243 result[1]=0xFD;
1244 result[2]=0;
1245 }
1246
1247 return(result);
1248 }
1249
1250
1251 /*++++++++++++++++++++++++++++++++++++++
1252 Convert a string into something that is safe to output in an XML file.
1253
1254 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1255
1256 const char *string The string to convert.
1257 ++++++++++++++++++++++++++++++++++++++*/
1258
1259 char *ParseXML_Encode_Safe_XML(const char *string)
1260 {
1261 static const char hexstring[17]="0123456789ABCDEF";
1262 int i=0,j=0,len;
1263 char *result;
1264
1265 for(i=0;string[i];i++)
1266 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1267 break;
1268
1269 if(!string[i])
1270 return((char*)string);
1271
1272 len=i+256-6;
1273
1274 result=(char*)malloc(len+7);
1275 strncpy(result,string,j=i);
1276
1277 do
1278 {
1279 for(;j<len && string[i];i++)
1280 if(string[i]=='\'')
1281 {
1282 result[j++]='&';
1283 result[j++]='a';
1284 result[j++]='p';
1285 result[j++]='o';
1286 result[j++]='s';
1287 result[j++]=';';
1288 }
1289 else if(string[i]=='&')
1290 {
1291 result[j++]='&';
1292 result[j++]='a';
1293 result[j++]='m';
1294 result[j++]='p';
1295 result[j++]=';';
1296 }
1297 else if(string[i]=='"')
1298 {
1299 result[j++]='&';
1300 result[j++]='q';
1301 result[j++]='u';
1302 result[j++]='o';
1303 result[j++]='t';
1304 result[j++]=';';
1305 }
1306 else if(string[i]=='<')
1307 {
1308 result[j++]='&';
1309 result[j++]='l';
1310 result[j++]='t';
1311 result[j++]=';';
1312 }
1313 else if(string[i]=='>')
1314 {
1315 result[j++]='&';
1316 result[j++]='g';
1317 result[j++]='t';
1318 result[j++]=';';
1319 }
1320 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1321 result[j++]=string[i];
1322 else
1323 {
1324 unsigned int unicode;
1325
1326 /* Decode the UTF-8 */
1327
1328 if((string[i]&0x80)==0)
1329 {
1330 /* 0000 0000-0000 007F => 0xxxxxxx */
1331 unicode=string[i];
1332 }
1333 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1334 {
1335 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1336 unicode =(string[i++]&0x1F)<<6;
1337 unicode|= string[i ]&0x3F;
1338 }
1339 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1340 {
1341 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1342 unicode =(string[i++]&0x0F)<<12;
1343 unicode|=(string[i++]&0x3F)<<6;
1344 unicode|= string[i ]&0x3F;
1345 }
1346 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1347 {
1348 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1349 unicode =(string[i++]&0x07)<<18;
1350 unicode|=(string[i++]&0x3F)<<12;
1351 unicode|=(string[i++]&0x3F)<<6;
1352 unicode|= string[i ]&0x3F;
1353 }
1354 else
1355 unicode=0xFFFD;
1356
1357 /* Output the character entity */
1358
1359 result[j++]='&';
1360 result[j++]='#';
1361 result[j++]='x';
1362
1363 if(unicode&0x00FF0000)
1364 {
1365 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1366 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1367 }
1368 if(unicode&0x00FFFF00)
1369 {
1370 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1371 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1372 }
1373 result[j++]=hexstring[(unicode&0xf0)>>4];
1374 result[j++]=hexstring[(unicode&0x0f) ];
1375
1376 result[j++]=';';
1377 }
1378
1379 if(string[i]) /* Not finished */
1380 {
1381 len+=256;
1382 result=(char*)realloc((void*)result,len+7);
1383 }
1384 }
1385 while(string[i]);
1386
1387 result[j]=0;
1388
1389 return(result);
1390 }
1391
1392
1393 /*++++++++++++++++++++++++++++++++++++++
1394 Check that a string really is an integer.
1395
1396 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1397
1398 const char *string The string to be parsed.
1399 ++++++++++++++++++++++++++++++++++++++*/
1400
1401 int ParseXML_IsInteger(const char *string)
1402 {
1403 const unsigned char *p=(unsigned char*)string;
1404
1405 if(*p=='-' || *p=='+')
1406 p++;
1407
1408 while(digit[(int)*p])
1409 p++;
1410
1411 if(*p)
1412 return(0);
1413 else
1414 return(1);
1415 }
1416
1417
1418 /*++++++++++++++++++++++++++++++++++++++
1419 Check that a string really is a floating point number.
1420
1421 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1422
1423 const char *string The string to be parsed.
1424 ++++++++++++++++++++++++++++++++++++++*/
1425
1426 int ParseXML_IsFloating(const char *string)
1427 {
1428 const unsigned char *p=(unsigned char*)string;
1429
1430 if(*p=='-' || *p=='+')
1431 p++;
1432
1433 while(digit[(int)*p] || *p=='.')
1434 p++;
1435
1436 if(*p=='e' || *p=='E')
1437 {
1438 p++;
1439
1440 if(*p=='-' || *p=='+')
1441 p++;
1442
1443 while(digit[*p])
1444 p++;
1445 }
1446
1447 if(*p)
1448 return(0);
1449 else
1450 return(1);
1451 }
1452
1453
1454 /* Table for checking for double-quoted characters. */
1455 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1457 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1458 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1459 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1460 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1461 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1462 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1467 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1468 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1469 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1470 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1471
1472 /* Table for checking for single-quoted characters. */
1473 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1475 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1476 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1477 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1478 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1479 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1480 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1485 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1486 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1487 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1488 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1489
1490 /* Table for checking for characters between 0x80 and 0x8f. */
1491 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1499 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1507
1508 /* Table for checking for characters between 0x80 and 0x9f. */
1509 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1517 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1518 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1525
1526 /* Table for checking for characters between 0x80 and 0xbf. */
1527 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1543
1544 /* Table for checking for characters between 0x90 and 0xbf. */
1545 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1561
1562 /* Table for checking for characters between 0xa0 and 0x9f. */
1563 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1573 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1579
1580 /* Table for checking for U2 characters. */
1581 static const unsigned char *U2[1]={ U_80_BF };
1582
1583 /* Table for checking for U3a characters. */
1584 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1585
1586 /* Table for checking for U3b characters. */
1587 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1588
1589 /* Table for checking for U3c characters. */
1590 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1591
1592 /* Table for checking for U3d characters. */
1593 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1594
1595 /* Table for checking for U4a characters. */
1596 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1597
1598 /* Table for checking for U4b characters. */
1599 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1600
1601 /* Table for checking for U4c characters. */
1602 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1603
1604 /* Table for checking for namestart characters. */
1605 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1606 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1609 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1610 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1611 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1612 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1621
1622 /* Table for checking for namechar characters. */
1623 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1627 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1628 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1629 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1630 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1639
1640 /* Table for checking for whitespace characters. */
1641 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1643 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1657
1658 /* Table for checking for digit characters. */
1659 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1662 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1675
1676 /* Table for checking for xdigit characters. */
1677 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1680 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1681 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1683 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */