Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1277 - (show annotations) (download) (as text)
Thu Apr 18 17:45:30 2013 UTC (11 years, 11 months ago) by amb
File MIME type: text/x-csrc
File size: 68001 byte(s)
Fix bug with handling UTF-8 characters that are four bytes long (it didn't since
v2.5).

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2013 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <string.h>
30 #include <strings.h>
31
32 #include "xmlparse.h"
33
34
35 /* Parser states */
36
37 #define LEX_EOF 0
38
39 #define LEX_FUNC_TAG_BEGIN 1
40 #define LEX_FUNC_XML_DECL_BEGIN 2
41 #define LEX_FUNC_TAG_POP 3
42 #define LEX_FUNC_TAG_PUSH 4
43 #define LEX_FUNC_XML_DECL_FINISH 5
44 #define LEX_FUNC_TAG_FINISH 6
45 #define LEX_FUNC_ATTR_KEY 7
46 #define LEX_FUNC_ATTR_VAL 8
47
48 #define LEX_STATE_INITIAL 10
49 #define LEX_STATE_BANGTAG 11
50 #define LEX_STATE_COMMENT 12
51 #define LEX_STATE_XML_DECL_START 13
52 #define LEX_STATE_XML_DECL 14
53 #define LEX_STATE_TAG_START 15
54 #define LEX_STATE_TAG 16
55 #define LEX_STATE_ATTR_KEY 17
56 #define LEX_STATE_ATTR_VAL 18
57 #define LEX_STATE_END_TAG1 19
58 #define LEX_STATE_END_TAG2 20
59 #define LEX_STATE_DQUOTED 21
60 #define LEX_STATE_SQUOTED 22
61
62 #define LEX_ERROR_TAG_START 101
63 #define LEX_ERROR_XML_DECL_START 102
64 #define LEX_ERROR_TAG 103
65 #define LEX_ERROR_XML_DECL 104
66 #define LEX_ERROR_ATTR 105
67 #define LEX_ERROR_END_TAG 106
68 #define LEX_ERROR_COMMENT 107
69 #define LEX_ERROR_CLOSE 108
70 #define LEX_ERROR_ATTR_VAL 109
71 #define LEX_ERROR_ENTITY_REF 110
72 #define LEX_ERROR_CHAR_REF 111
73 #define LEX_ERROR_TEXT_OUTSIDE 112
74
75 #define LEX_ERROR_UNEXP_TAG 201
76 #define LEX_ERROR_UNBALANCED 202
77 #define LEX_ERROR_NO_START 203
78 #define LEX_ERROR_UNEXP_ATT 204
79 #define LEX_ERROR_UNEXP_EOF 205
80 #define LEX_ERROR_XML_NOT_FIRST 206
81
82 #define LEX_ERROR_OUT_OF_MEMORY 254
83 #define LEX_ERROR_CALLBACK 255
84
85
86 /* Parsing variables and functions */
87
88 static uint64_t lineno;
89
90 static unsigned char buffer[2][16384];
91 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
92 static int buffer_active=0;
93
94
95 /*++++++++++++++++++++++++++++++++++++++
96 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
97
98 int buffer_refill Return 0 if everything is OK or 1 for EOF.
99
100 int fd The file descriptor to read from.
101 ++++++++++++++++++++++++++++++++++++++*/
102
103 static inline int buffer_refill(int fd)
104 {
105 ssize_t n,m=0;
106
107 m=(buffer_end-buffer[buffer_active])+1;
108
109 if(m>(sizeof(buffer[0])/2)) /* more than half full */
110 {
111 m=0;
112
113 buffer_active=!buffer_active;
114
115 if(buffer_token)
116 {
117 m=(buffer_end-buffer_token)+1;
118
119 memcpy(buffer[buffer_active],buffer_token,m);
120
121 buffer_token=buffer[buffer_active];
122 }
123 }
124
125 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
126
127 buffer_ptr=buffer[buffer_active]+m;
128 buffer_end=buffer[buffer_active]+m+n-1;
129
130 if(n<=0)
131 return(1);
132 else
133 return(0);
134 }
135
136
137 /* Macros to simplify the parser (and make it look more like lex) */
138
139 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
140 #define NEXT(xx) next_state=(xx)
141
142 #define START_TOKEN buffer_token=buffer_ptr
143 #define END_TOKEN buffer_token=NULL
144
145 #define NEXT_CHAR \
146 do{ \
147 if(buffer_ptr==buffer_end) \
148 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
149 else \
150 buffer_ptr++; \
151 } while(0)
152
153
154 /* -------- equivalent flex definition --------
155
156 S [ \t\r]
157 N (\n)
158
159 U1 [\x09\x0A\x0D\x20-\x7F]
160 U2 [\xC2-\xDF][\x80-\xBF]
161 U3a \xE0[\xA0-\xBF][\x80-\xBF]
162 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
163 U3c \xED[\x80-\x9F][\x80-\xBF]
164 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
165 U3 {U3a}|{U3b}|{U3c}|{U3d}
166 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
167 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
168 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
169 U4 {U4a}|{U4b}|{U4c}
170
171 U ({U1}|{U2}|{U3}|{U4})
172
173 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
174
175 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
176 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
177
178 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
179 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
180
181 letter [a-zA-Z]
182 digit [0-9]
183 xdigit [a-fA-F0-9]
184
185 namechar ({letter}|{digit}|[-._:])
186 namestart ({letter}|[_:])
187 name ({namestart}{namechar}*)
188
189 entityref (&{name};)
190 charref (&#({digit}+|x{xdigit}+);)
191
192 -------- equivalent flex definition -------- */
193
194 /* Tables containing character class defintions (advance declaration for data at end of file). */
195 static const unsigned char quotedD[256],quotedS[256];
196 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
197 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
198
199
200 /*++++++++++++++++++++++++++++++++++++++
201 A function to call the callback function with the parameters needed.
202
203 int call_callback Returns 1 if the callback returned with an error.
204
205 const char *name The name of the tag.
206
207 int (*callback)() The callback function.
208
209 int type The type of tag (start and/or end).
210
211 int nattributes The number of attributes collected.
212
213 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
214 ++++++++++++++++++++++++++++++++++++++*/
215
216 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
217 {
218 switch(nattributes)
219 {
220 case 0: return (*callback)(name,type);
221 case 1: return (*callback)(name,type,attributes[0]);
222 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
223 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
224 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
225 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
226 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
227 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
228 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
229 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
230 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
231 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
232 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
233 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
234 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
235 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
236 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
237
238 default:
239 fprintf(stderr,"XML Parser: Error on line %llu: too many attributes for tag '%s' source code needs changing.\n",lineno,name);
240 exit(1);
241 }
242 }
243
244
245 /*++++++++++++++++++++++++++++++++++++++
246 Parse the XML and call the functions for each tag as seen.
247
248 int ParseXML Returns 0 if OK or something else in case of an error.
249
250 in fd The file descriptor of the file to parse.
251
252 xmltag **tags The array of pointers to tags for the top level.
253
254 int options A list of XML Parser options OR-ed together.
255 ++++++++++++++++++++++++++++++++++++++*/
256
257 int ParseXML(int fd,xmltag **tags,int options)
258 {
259 int i;
260 int state,next_state,after_attr;
261 unsigned char saved_buffer_ptr=0;
262 const unsigned char *quoted;
263
264 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
265 int attribute=0;
266
267 int stackdepth=0,stackused=0;
268 xmltag ***tags_stack=NULL;
269 xmltag **tag_stack=NULL;
270 xmltag *tag=NULL;
271
272 /* The actual parser. */
273
274 lineno=1;
275
276 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
277 buffer_token=NULL;
278
279 buffer_refill(fd);
280
281 BEGIN(LEX_STATE_INITIAL);
282
283 new_state:
284
285 switch(state)
286 {
287 /* ================ Parsing states ================ */
288
289
290 /* -------- equivalent flex definition --------
291
292 <INITIAL>"<!" { BEGIN(BANGTAG); }
293 <INITIAL>"</" { BEGIN(END_TAG1); }
294 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
295 <INITIAL>"<" { BEGIN(TAG_START); }
296
297 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
298
299 <INITIAL>{N} { lineno++; }
300 <INITIAL>{S}+ { }
301 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
302
303 -------- equivalent flex definition -------- */
304
305 case LEX_STATE_INITIAL:
306
307 while(1)
308 {
309 while(whitespace[(int)*buffer_ptr])
310 NEXT_CHAR;
311
312 if(*buffer_ptr=='\n')
313 {
314 NEXT_CHAR;
315
316 lineno++;
317 }
318 else if(*buffer_ptr=='<')
319 {
320 NEXT_CHAR;
321
322 if(*buffer_ptr=='/')
323 {
324 NEXT_CHAR;
325 BEGIN(LEX_STATE_END_TAG1);
326 }
327 else if(*buffer_ptr=='!')
328 {
329 NEXT_CHAR;
330 BEGIN(LEX_STATE_BANGTAG);
331 }
332 else if(*buffer_ptr=='?')
333 {
334 NEXT_CHAR;
335 BEGIN(LEX_STATE_XML_DECL_START);
336 }
337 else
338 BEGIN(LEX_STATE_TAG_START);
339 }
340 else if(*buffer_ptr=='>')
341 BEGIN(LEX_ERROR_CLOSE);
342 else
343 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
344 }
345
346 break;
347
348 /* -------- equivalent flex definition --------
349
350 <BANGTAG>"--" { BEGIN(COMMENT); }
351 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
352 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
353
354 -------- equivalent flex definition -------- */
355
356 case LEX_STATE_BANGTAG:
357
358 if(*buffer_ptr!='-')
359 BEGIN(LEX_ERROR_TAG_START);
360
361 NEXT_CHAR;
362
363 if(*buffer_ptr!='-')
364 BEGIN(LEX_ERROR_TAG_START);
365
366 NEXT_CHAR;
367 BEGIN(LEX_STATE_COMMENT);
368
369 break;
370
371 /* -------- equivalent flex definition --------
372
373 <COMMENT>"-->" { BEGIN(INITIAL); }
374 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
375 <COMMENT>"-" { }
376 <COMMENT>{N} { lineno++; }
377 <COMMENT>[^-\n]+ { }
378
379 -------- equivalent flex definition -------- */
380
381 case LEX_STATE_COMMENT:
382
383 while(1)
384 {
385 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
386 NEXT_CHAR;
387
388 if(*buffer_ptr=='-')
389 {
390 NEXT_CHAR;
391
392 if(*buffer_ptr!='-')
393 continue;
394
395 NEXT_CHAR;
396 if(*buffer_ptr=='>')
397 {
398 NEXT_CHAR;
399 BEGIN(LEX_STATE_INITIAL);
400 }
401
402 BEGIN(LEX_ERROR_COMMENT);
403 }
404 else /* if(*buffer_ptr=='\n') */
405 {
406 NEXT_CHAR;
407
408 lineno++;
409 }
410 }
411
412 break;
413
414 /* -------- equivalent flex definition --------
415
416 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
417 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
418 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
419
420 -------- equivalent flex definition -------- */
421
422 case LEX_STATE_XML_DECL_START:
423
424 START_TOKEN;
425
426 if(*buffer_ptr=='x')
427 {
428 NEXT_CHAR;
429 if(*buffer_ptr=='m')
430 {
431 NEXT_CHAR;
432 if(*buffer_ptr=='l')
433 {
434 NEXT_CHAR;
435
436 saved_buffer_ptr=*buffer_ptr;
437 *buffer_ptr=0;
438
439 NEXT(LEX_STATE_XML_DECL);
440 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
441 }
442 }
443 }
444
445 BEGIN(LEX_ERROR_XML_DECL_START);
446
447 /* -------- equivalent flex definition --------
448
449 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
450 <XML_DECL>{S}+ { }
451 <XML_DECL>{N} { lineno++; }
452 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
453 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
454
455 -------- equivalent flex definition -------- */
456
457 case LEX_STATE_XML_DECL:
458
459 while(1)
460 {
461 while(whitespace[(int)*buffer_ptr])
462 NEXT_CHAR;
463
464 if(namestart[(int)*buffer_ptr])
465 {
466 START_TOKEN;
467
468 NEXT_CHAR;
469 while(namechar[(int)*buffer_ptr])
470 NEXT_CHAR;
471
472 saved_buffer_ptr=*buffer_ptr;
473 *buffer_ptr=0;
474
475 after_attr=LEX_STATE_XML_DECL;
476 NEXT(LEX_STATE_ATTR_KEY);
477 BEGIN(LEX_FUNC_ATTR_KEY);
478 }
479 else if(*buffer_ptr=='?')
480 {
481 NEXT_CHAR;
482 if(*buffer_ptr=='>')
483 {
484 NEXT_CHAR;
485 NEXT(LEX_STATE_INITIAL);
486 BEGIN(LEX_FUNC_XML_DECL_FINISH);
487 }
488
489 BEGIN(LEX_ERROR_XML_DECL);
490 }
491 else if(*buffer_ptr=='\n')
492 {
493 NEXT_CHAR;
494 lineno++;
495 }
496 else
497 BEGIN(LEX_ERROR_XML_DECL);
498 }
499
500 break;
501
502 /* -------- equivalent flex definition --------
503
504 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
505 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
506 <TAG_START>. { return(LEX_ERROR_TAG_START); }
507
508 -------- equivalent flex definition -------- */
509
510 case LEX_STATE_TAG_START:
511
512 if(namestart[(int)*buffer_ptr])
513 {
514 START_TOKEN;
515
516 NEXT_CHAR;
517 while(namechar[(int)*buffer_ptr])
518 NEXT_CHAR;
519
520 saved_buffer_ptr=*buffer_ptr;
521 *buffer_ptr=0;
522
523 NEXT(LEX_STATE_TAG);
524 BEGIN(LEX_FUNC_TAG_BEGIN);
525 }
526
527 BEGIN(LEX_ERROR_TAG_START);
528
529 /* -------- equivalent flex definition --------
530
531 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
532 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
533 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
534
535 -------- equivalent flex definition -------- */
536
537 case LEX_STATE_END_TAG1:
538
539 if(namestart[(int)*buffer_ptr])
540 {
541 START_TOKEN;
542
543 NEXT_CHAR;
544 while(namechar[(int)*buffer_ptr])
545 NEXT_CHAR;
546
547 saved_buffer_ptr=*buffer_ptr;
548 *buffer_ptr=0;
549
550 NEXT(LEX_STATE_END_TAG2);
551 BEGIN(LEX_FUNC_TAG_POP);
552 }
553
554 BEGIN(LEX_ERROR_END_TAG);
555
556 /* -------- equivalent flex definition --------
557
558 <END_TAG2>">" { BEGIN(INITIAL); }
559 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
560 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
561
562 -------- equivalent flex definition -------- */
563
564 case LEX_STATE_END_TAG2:
565
566 if(*buffer_ptr=='>')
567 {
568 NEXT_CHAR;
569
570 BEGIN(LEX_STATE_INITIAL);
571 }
572
573 BEGIN(LEX_ERROR_END_TAG);
574
575 /* -------- equivalent flex definition --------
576
577 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
578 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
579 <TAG>{S}+ { }
580 <TAG>{N} { lineno++; }
581 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
582 <TAG>. { return(LEX_ERROR_TAG); }
583
584 -------- equivalent flex definition -------- */
585
586 case LEX_STATE_TAG:
587
588 while(1)
589 {
590 while(whitespace[(int)*buffer_ptr])
591 NEXT_CHAR;
592
593 if(namestart[(int)*buffer_ptr])
594 {
595 START_TOKEN;
596
597 NEXT_CHAR;
598 while(namechar[(int)*buffer_ptr])
599 NEXT_CHAR;
600
601 saved_buffer_ptr=*buffer_ptr;
602 *buffer_ptr=0;
603
604 after_attr=LEX_STATE_TAG;
605 NEXT(LEX_STATE_ATTR_KEY);
606 BEGIN(LEX_FUNC_ATTR_KEY);
607 }
608 else if(*buffer_ptr=='/')
609 {
610 NEXT_CHAR;
611 if(*buffer_ptr=='>')
612 {
613 NEXT_CHAR;
614 NEXT(LEX_STATE_INITIAL);
615 BEGIN(LEX_FUNC_TAG_FINISH);
616 }
617
618 BEGIN(LEX_ERROR_TAG);
619 }
620 else if(*buffer_ptr=='>')
621 {
622 NEXT_CHAR;
623 NEXT(LEX_STATE_INITIAL);
624 BEGIN(LEX_FUNC_TAG_PUSH);
625 }
626 else if(*buffer_ptr=='\n')
627 {
628 NEXT_CHAR;
629 lineno++;
630 }
631 else
632 BEGIN(LEX_ERROR_TAG);
633 }
634
635 break;
636
637 /* -------- equivalent flex definition --------
638
639 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
640 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
641 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
642
643 -------- equivalent flex definition -------- */
644
645 case LEX_STATE_ATTR_KEY:
646
647 if(*buffer_ptr=='=')
648 {
649 NEXT_CHAR;
650 BEGIN(LEX_STATE_ATTR_VAL);
651 }
652
653 BEGIN(LEX_ERROR_ATTR);
654
655 /* -------- equivalent flex definition --------
656
657 <ATTR_VAL>\" { BEGIN(DQUOTED); }
658 <ATTR_VAL>\' { BEGIN(SQUOTED); }
659 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
660 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
661
662 -------- equivalent flex definition -------- */
663
664 case LEX_STATE_ATTR_VAL:
665
666 if(*buffer_ptr=='"')
667 {
668 NEXT_CHAR;
669 BEGIN(LEX_STATE_DQUOTED);
670 }
671 else if(*buffer_ptr=='\'')
672 {
673 NEXT_CHAR;
674 BEGIN(LEX_STATE_SQUOTED);
675 }
676
677 BEGIN(LEX_ERROR_ATTR);
678
679 /* -------- equivalent flex definition --------
680
681 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
682 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
683 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
684 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
685 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
686 <DQUOTED>{UquotedD} { }
687 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
688 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
689
690 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
691 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
692 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
693 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
694 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
695 <SQUOTED>{UquotedS} { append_string(yytext); }
696 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
697 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
698
699 -------- equivalent flex definition -------- */
700
701 case LEX_STATE_DQUOTED:
702 case LEX_STATE_SQUOTED:
703
704 if(state==LEX_STATE_DQUOTED)
705 quoted=quotedD;
706 else
707 quoted=quotedS;
708
709 START_TOKEN;
710
711 while(1)
712 {
713 switch(quoted[(int)*buffer_ptr])
714 {
715 case 10: /* U1 - used by all tag keys and many values */
716 do
717 {
718 NEXT_CHAR;
719 }
720 while(quoted[(int)*buffer_ptr]==10);
721 break;
722
723 case 20: /* U2 */
724 NEXT_CHAR;
725 if(!U2[0][(int)*buffer_ptr])
726 BEGIN(LEX_ERROR_ATTR_VAL);
727 NEXT_CHAR;
728 break;
729
730 case 31: /* U3a */
731 NEXT_CHAR;
732 if(!U3a[0][(int)*buffer_ptr])
733 BEGIN(LEX_ERROR_ATTR_VAL);
734 NEXT_CHAR;
735 if(!U3a[1][(int)*buffer_ptr])
736 BEGIN(LEX_ERROR_ATTR_VAL);
737 NEXT_CHAR;
738 break;
739
740 case 32: /* U3b */
741 NEXT_CHAR;
742 if(!U3b[0][(int)*buffer_ptr])
743 BEGIN(LEX_ERROR_ATTR_VAL);
744 NEXT_CHAR;
745 if(!U3b[1][(int)*buffer_ptr])
746 BEGIN(LEX_ERROR_ATTR_VAL);
747 NEXT_CHAR;
748 break;
749
750 case 33: /* U3c */
751 NEXT_CHAR;
752 if(!U3c[0][(int)*buffer_ptr])
753 BEGIN(LEX_ERROR_ATTR_VAL);
754 NEXT_CHAR;
755 if(!U3c[1][(int)*buffer_ptr])
756 BEGIN(LEX_ERROR_ATTR_VAL);
757 NEXT_CHAR;
758 break;
759
760 case 34: /* U3d */
761 NEXT_CHAR;
762 if(!U3d[0][(int)*buffer_ptr])
763 BEGIN(LEX_ERROR_ATTR_VAL);
764 NEXT_CHAR;
765 if(!U3d[1][(int)*buffer_ptr])
766 BEGIN(LEX_ERROR_ATTR_VAL);
767 NEXT_CHAR;
768 break;
769
770 case 41: /* U4a */
771 NEXT_CHAR;
772 if(!U4a[0][(int)*buffer_ptr])
773 BEGIN(LEX_ERROR_ATTR_VAL);
774 NEXT_CHAR;
775 if(!U4a[1][(int)*buffer_ptr])
776 BEGIN(LEX_ERROR_ATTR_VAL);
777 NEXT_CHAR;
778 if(!U4a[2][(int)*buffer_ptr])
779 BEGIN(LEX_ERROR_ATTR_VAL);
780 NEXT_CHAR;
781 break;
782
783 case 42: /* U4b */
784 NEXT_CHAR;
785 if(!U4b[0][(int)*buffer_ptr])
786 BEGIN(LEX_ERROR_ATTR_VAL);
787 NEXT_CHAR;
788 if(!U4b[1][(int)*buffer_ptr])
789 BEGIN(LEX_ERROR_ATTR_VAL);
790 NEXT_CHAR;
791 if(!U4b[2][(int)*buffer_ptr])
792 BEGIN(LEX_ERROR_ATTR_VAL);
793 NEXT_CHAR;
794 break;
795
796 case 43: /* U4c */
797 NEXT_CHAR;
798 if(!U4c[0][(int)*buffer_ptr])
799 BEGIN(LEX_ERROR_ATTR_VAL);
800 NEXT_CHAR;
801 if(!U4c[1][(int)*buffer_ptr])
802 BEGIN(LEX_ERROR_ATTR_VAL);
803 NEXT_CHAR;
804 if(!U4c[2][(int)*buffer_ptr])
805 BEGIN(LEX_ERROR_ATTR_VAL);
806 NEXT_CHAR;
807 break;
808
809 case 50: /* entityref or charref */
810 NEXT_CHAR;
811
812 if(*buffer_ptr=='#') /* charref */
813 {
814 int charref_len=3;
815
816 NEXT_CHAR;
817 if(digit[(int)*buffer_ptr]) /* decimal */
818 {
819 NEXT_CHAR;
820 charref_len++;
821
822 while(digit[(int)*buffer_ptr])
823 {
824 NEXT_CHAR;
825 charref_len++;
826 }
827
828 if(*buffer_ptr!=';')
829 BEGIN(LEX_ERROR_ATTR_VAL);
830 }
831 else if(*buffer_ptr=='x') /* hex */
832 {
833 NEXT_CHAR;
834 charref_len++;
835
836 while(xdigit[(int)*buffer_ptr])
837 {
838 NEXT_CHAR;
839 charref_len++;
840 }
841
842 if(*buffer_ptr!=';')
843 BEGIN(LEX_ERROR_ATTR_VAL);
844 }
845 else /* other */
846 BEGIN(LEX_ERROR_ATTR_VAL);
847
848 NEXT_CHAR;
849
850 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
851 {
852 const char *str;
853
854 saved_buffer_ptr=*buffer_ptr;
855 *buffer_ptr=0;
856
857 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
858
859 if(!str)
860 {
861 buffer_ptr-=charref_len;
862 BEGIN(LEX_ERROR_CHAR_REF);
863 }
864
865 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
866 memcpy(buffer_ptr-strlen(str),str,strlen(str));
867
868 *buffer_ptr=saved_buffer_ptr;
869 }
870 }
871 else if(namestart[(int)*buffer_ptr]) /* entityref */
872 {
873 int entityref_len=3;
874
875 NEXT_CHAR;
876 while(namechar[(int)*buffer_ptr])
877 {
878 NEXT_CHAR;
879 entityref_len++;
880 }
881
882 if(*buffer_ptr!=';')
883 BEGIN(LEX_ERROR_ATTR_VAL);
884
885 NEXT_CHAR;
886
887 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
888 {
889 const char *str;
890
891 saved_buffer_ptr=*buffer_ptr;
892 *buffer_ptr=0;
893
894 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
895
896 if(!str)
897 {
898 buffer_ptr-=entityref_len;
899 BEGIN(LEX_ERROR_ENTITY_REF);
900 }
901
902 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
903 memcpy(buffer_ptr-strlen(str),str,strlen(str));
904
905 *buffer_ptr=saved_buffer_ptr;
906 }
907 }
908 else /* other */
909 BEGIN(LEX_ERROR_ATTR_VAL);
910
911 break;
912
913 case 99: /* quote */
914 *buffer_ptr=0;
915 NEXT_CHAR;
916
917 NEXT(after_attr);
918 BEGIN(LEX_FUNC_ATTR_VAL);
919
920 default: /* other */
921 BEGIN(LEX_ERROR_ATTR_VAL);
922 }
923 }
924
925 break;
926
927
928 /* ================ Functional states ================ */
929
930
931 /* The start of a tag for an XML declaration */
932
933 case LEX_FUNC_XML_DECL_BEGIN:
934
935 if(tag_stack)
936 BEGIN(LEX_ERROR_XML_NOT_FIRST);
937
938 /* The start of a tag for an element */
939
940 case LEX_FUNC_TAG_BEGIN:
941
942 tag=NULL;
943
944 for(i=0;tags[i];i++)
945 if(!strcasecmp((char*)buffer_token,tags[i]->name))
946 {
947 tag=tags[i];
948
949 for(i=0;i<tag->nattributes;i++)
950 attributes[i]=NULL;
951
952 break;
953 }
954
955 if(tag==NULL)
956 BEGIN(LEX_ERROR_UNEXP_TAG);
957
958 END_TOKEN;
959
960 *buffer_ptr=saved_buffer_ptr;
961 BEGIN(next_state);
962
963 /* The end of the start-tag for an element */
964
965 case LEX_FUNC_TAG_PUSH:
966
967 if(stackused==stackdepth)
968 {
969 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
970 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
971 }
972
973 tag_stack [stackused]=tag;
974 tags_stack[stackused]=tags;
975 stackused++;
976
977 if(tag->callback)
978 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
979 BEGIN(LEX_ERROR_CALLBACK);
980
981 tags=tag->subtags;
982
983 BEGIN(next_state);
984
985 /* The end of the empty-element-tag for an XML declaration */
986
987 case LEX_FUNC_XML_DECL_FINISH:
988
989 /* The end of the empty-element-tag for an element */
990
991 case LEX_FUNC_TAG_FINISH:
992
993 if(tag->callback)
994 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
995 BEGIN(LEX_ERROR_CALLBACK);
996
997 if(stackused>0)
998 tag=tag_stack[stackused-1];
999 else
1000 tag=NULL;
1001
1002 BEGIN(next_state);
1003
1004 /* The end of the end-tag for an element */
1005
1006 case LEX_FUNC_TAG_POP:
1007
1008 stackused--;
1009 tags=tags_stack[stackused];
1010 tag =tag_stack [stackused];
1011
1012 if(strcmp((char*)buffer_token,tag->name))
1013 BEGIN(LEX_ERROR_UNBALANCED);
1014
1015 if(stackused<0)
1016 BEGIN(LEX_ERROR_NO_START);
1017
1018 for(i=0;i<tag->nattributes;i++)
1019 attributes[i]=NULL;
1020
1021 if(tag->callback)
1022 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1023 BEGIN(LEX_ERROR_CALLBACK);
1024
1025 if(stackused>0)
1026 tag=tag_stack[stackused-1];
1027 else
1028 tag=NULL;
1029
1030 END_TOKEN;
1031
1032 *buffer_ptr=saved_buffer_ptr;
1033 BEGIN(next_state);
1034
1035 /* An attribute key */
1036
1037 case LEX_FUNC_ATTR_KEY:
1038
1039 attribute=-1;
1040
1041 for(i=0;i<tag->nattributes;i++)
1042 if(!strcasecmp((char*)buffer_token,tag->attributes[i]))
1043 {
1044 attribute=i;
1045
1046 break;
1047 }
1048
1049 if(attribute==-1)
1050 {
1051 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1052 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1053 BEGIN(LEX_ERROR_UNEXP_ATT);
1054 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1055 fprintf(stderr,"XML Parser: Warning on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1056 }
1057
1058 END_TOKEN;
1059
1060 *buffer_ptr=saved_buffer_ptr;
1061 BEGIN(next_state);
1062
1063 /* An attribute value */
1064
1065 case LEX_FUNC_ATTR_VAL:
1066
1067 if(tag->callback && attribute!=-1)
1068 attributes[attribute]=buffer_token;
1069
1070 END_TOKEN;
1071
1072 BEGIN(next_state);
1073
1074 /* End of file */
1075
1076 case LEX_EOF:
1077
1078 if(tag)
1079 BEGIN(LEX_ERROR_UNEXP_EOF);
1080
1081 break;
1082
1083
1084 /* ================ Error states ================ */
1085
1086
1087 case LEX_ERROR_TAG_START:
1088 fprintf(stderr,"XML Parser: Error on line %llu: character '<' seen not at start of tag.\n",lineno);
1089 break;
1090
1091 case LEX_ERROR_XML_DECL_START:
1092 fprintf(stderr,"XML Parser: Error on line %llu: characters '<?' seen not at start of XML declaration.\n",lineno);
1093 break;
1094
1095 case LEX_ERROR_TAG:
1096 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1097 break;
1098
1099 case LEX_ERROR_XML_DECL:
1100 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1101 break;
1102
1103 case LEX_ERROR_ATTR:
1104 fprintf(stderr,"XML Parser: Error on line %llu: invalid attribute definition seen in tag.\n",lineno);
1105 break;
1106
1107 case LEX_ERROR_END_TAG:
1108 fprintf(stderr,"XML Parser: Error on line %llu: invalid character seen in end-tag.\n",lineno);
1109 break;
1110
1111 case LEX_ERROR_COMMENT:
1112 fprintf(stderr,"XML Parser: Error on line %llu: invalid comment seen.\n",lineno);
1113 break;
1114
1115 case LEX_ERROR_CLOSE:
1116 fprintf(stderr,"XML Parser: Error on line %llu: character '>' seen not at end of tag.\n",lineno);
1117 break;
1118
1119 case LEX_ERROR_ATTR_VAL:
1120 fprintf(stderr,"XML Parser: Error on line %llu: invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1121 break;
1122
1123 case LEX_ERROR_ENTITY_REF:
1124 fprintf(stderr,"XML Parser: Error on line %llu: invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1125 break;
1126
1127 case LEX_ERROR_CHAR_REF:
1128 fprintf(stderr,"XML Parser: Error on line %llu: invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1129 break;
1130
1131 case LEX_ERROR_TEXT_OUTSIDE:
1132 fprintf(stderr,"XML Parser: Error on line %llu: non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1133 break;
1134
1135 case LEX_ERROR_UNEXP_TAG:
1136 fprintf(stderr,"XML Parser: Error on line %llu: unexpected tag '%s'.\n",lineno,buffer_token);
1137 break;
1138
1139 case LEX_ERROR_UNBALANCED:
1140 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1141 break;
1142
1143 case LEX_ERROR_NO_START:
1144 fprintf(stderr,"XML Parser: Error on line %llu: end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1145 break;
1146
1147 case LEX_ERROR_UNEXP_ATT:
1148 fprintf(stderr,"XML Parser: Error on line %llu: unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1149 break;
1150
1151 case LEX_ERROR_UNEXP_EOF:
1152 fprintf(stderr,"XML Parser: Error on line %llu: end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1153 break;
1154
1155 case LEX_ERROR_XML_NOT_FIRST:
1156 fprintf(stderr,"XML Parser: Error on line %llu: XML declaration '<?xml...>' not before all other tags.\n",lineno);
1157 break;
1158 }
1159
1160 /* Delete the tagdata */
1161
1162 if(stackdepth)
1163 {
1164 free(tag_stack);
1165 free(tags_stack);
1166 }
1167
1168 return(state);
1169 }
1170
1171
1172 /*++++++++++++++++++++++++++++++++++++++
1173 Return the current parser line number.
1174
1175 uint64_t ParseXML_LineNumber Returns the line number.
1176 ++++++++++++++++++++++++++++++++++++++*/
1177
1178 uint64_t ParseXML_LineNumber(void)
1179 {
1180 return(lineno);
1181 }
1182
1183
1184 /*++++++++++++++++++++++++++++++++++++++
1185 Convert an XML entity reference into an ASCII string.
1186
1187 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1188
1189 const char *string The entity reference string.
1190 ++++++++++++++++++++++++++++++++++++++*/
1191
1192 char *ParseXML_Decode_Entity_Ref(const char *string)
1193 {
1194 if(!strcmp(string,"&amp;")) return("&");
1195 if(!strcmp(string,"&lt;")) return("<");
1196 if(!strcmp(string,"&gt;")) return(">");
1197 if(!strcmp(string,"&apos;")) return("'");
1198 if(!strcmp(string,"&quot;")) return("\"");
1199 return(NULL);
1200 }
1201
1202
1203 /*++++++++++++++++++++++++++++++++++++++
1204 Convert an XML character reference into an ASCII string.
1205
1206 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1207
1208 const char *string The character reference string.
1209 ++++++++++++++++++++++++++++++++++++++*/
1210
1211 char *ParseXML_Decode_Char_Ref(const char *string)
1212 {
1213 static char result[5]="";
1214 long int unicode;
1215
1216 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1217 else unicode=strtol(string+2,NULL,10);
1218
1219 if(unicode<0x80)
1220 {
1221 /* 0000 0000-0000 007F => 0xxxxxxx */
1222 result[0]=unicode;
1223 result[1]=0;
1224 }
1225 else if(unicode<0x07FF)
1226 {
1227 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1228 result[0]=0xC0+((unicode&0x07C0)>>6);
1229 result[1]=0x80+ (unicode&0x003F);
1230 result[2]=0;
1231 }
1232 else if(unicode<0xFFFF)
1233 {
1234 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1235 result[0]=0xE0+((unicode&0xF000)>>12);
1236 result[1]=0x80+((unicode&0x0FC0)>>6);
1237 result[2]=0x80+ (unicode&0x003F);
1238 result[3]=0;
1239 }
1240 else if(unicode<0x1FFFFF)
1241 {
1242 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1243 result[0]=0xF0+((unicode&0x1C0000)>>18);
1244 result[1]=0x80+((unicode&0x03F000)>>12);
1245 result[2]=0x80+((unicode&0x000FC0)>>6);
1246 result[3]=0x80+ (unicode&0x00003F);
1247 result[4]=0;
1248 }
1249 else
1250 {
1251 result[0]=0xFF;
1252 result[1]=0xFD;
1253 result[2]=0;
1254 }
1255
1256 return(result);
1257 }
1258
1259
1260 /*++++++++++++++++++++++++++++++++++++++
1261 Convert a string into something that is safe to output in an XML file.
1262
1263 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1264
1265 const char *string The string to convert.
1266 ++++++++++++++++++++++++++++++++++++++*/
1267
1268 char *ParseXML_Encode_Safe_XML(const char *string)
1269 {
1270 static const char hexstring[17]="0123456789ABCDEF";
1271 int i=0,j=0,len;
1272 char *result;
1273
1274 for(i=0;string[i];i++)
1275 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1276 break;
1277
1278 if(!string[i])
1279 return((char*)string);
1280
1281 len=i+256-6;
1282
1283 result=(char*)malloc(len+7);
1284 strncpy(result,string,j=i);
1285
1286 do
1287 {
1288 for(;j<len && string[i];i++)
1289 if(string[i]=='\'')
1290 {
1291 result[j++]='&';
1292 result[j++]='a';
1293 result[j++]='p';
1294 result[j++]='o';
1295 result[j++]='s';
1296 result[j++]=';';
1297 }
1298 else if(string[i]=='&')
1299 {
1300 result[j++]='&';
1301 result[j++]='a';
1302 result[j++]='m';
1303 result[j++]='p';
1304 result[j++]=';';
1305 }
1306 else if(string[i]=='"')
1307 {
1308 result[j++]='&';
1309 result[j++]='q';
1310 result[j++]='u';
1311 result[j++]='o';
1312 result[j++]='t';
1313 result[j++]=';';
1314 }
1315 else if(string[i]=='<')
1316 {
1317 result[j++]='&';
1318 result[j++]='l';
1319 result[j++]='t';
1320 result[j++]=';';
1321 }
1322 else if(string[i]=='>')
1323 {
1324 result[j++]='&';
1325 result[j++]='g';
1326 result[j++]='t';
1327 result[j++]=';';
1328 }
1329 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1330 result[j++]=string[i];
1331 else
1332 {
1333 unsigned int unicode;
1334
1335 /* Decode the UTF-8 */
1336
1337 if((string[i]&0x80)==0)
1338 {
1339 /* 0000 0000-0000 007F => 0xxxxxxx */
1340 unicode=string[i];
1341 }
1342 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1343 {
1344 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1345 unicode =(string[i++]&0x1F)<<6;
1346 unicode|= string[i ]&0x3F;
1347 }
1348 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1349 {
1350 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1351 unicode =(string[i++]&0x0F)<<12;
1352 unicode|=(string[i++]&0x3F)<<6;
1353 unicode|= string[i ]&0x3F;
1354 }
1355 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1356 {
1357 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1358 unicode =(string[i++]&0x07)<<18;
1359 unicode|=(string[i++]&0x3F)<<12;
1360 unicode|=(string[i++]&0x3F)<<6;
1361 unicode|= string[i ]&0x3F;
1362 }
1363 else
1364 unicode=0xFFFD;
1365
1366 /* Output the character entity */
1367
1368 result[j++]='&';
1369 result[j++]='#';
1370 result[j++]='x';
1371
1372 if(unicode&0x00FF0000)
1373 {
1374 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1375 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1376 }
1377 if(unicode&0x00FFFF00)
1378 {
1379 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1380 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1381 }
1382 result[j++]=hexstring[(unicode&0xf0)>>4];
1383 result[j++]=hexstring[(unicode&0x0f) ];
1384
1385 result[j++]=';';
1386 }
1387
1388 if(string[i]) /* Not finished */
1389 {
1390 len+=256;
1391 result=(char*)realloc((void*)result,len+7);
1392 }
1393 }
1394 while(string[i]);
1395
1396 result[j]=0;
1397
1398 return(result);
1399 }
1400
1401
1402 /*++++++++++++++++++++++++++++++++++++++
1403 Check that a string really is an integer.
1404
1405 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1406
1407 const char *string The string to be parsed.
1408 ++++++++++++++++++++++++++++++++++++++*/
1409
1410 int ParseXML_IsInteger(const char *string)
1411 {
1412 const unsigned char *p=(unsigned char*)string;
1413
1414 if(*p=='-' || *p=='+')
1415 p++;
1416
1417 while(digit[(int)*p])
1418 p++;
1419
1420 if(*p)
1421 return(0);
1422 else
1423 return(1);
1424 }
1425
1426
1427 /*++++++++++++++++++++++++++++++++++++++
1428 Check that a string really is a floating point number.
1429
1430 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1431
1432 const char *string The string to be parsed.
1433 ++++++++++++++++++++++++++++++++++++++*/
1434
1435 int ParseXML_IsFloating(const char *string)
1436 {
1437 const unsigned char *p=(unsigned char*)string;
1438
1439 if(*p=='-' || *p=='+')
1440 p++;
1441
1442 while(digit[(int)*p] || *p=='.')
1443 p++;
1444
1445 if(*p=='e' || *p=='E')
1446 {
1447 p++;
1448
1449 if(*p=='-' || *p=='+')
1450 p++;
1451
1452 while(digit[*p])
1453 p++;
1454 }
1455
1456 if(*p)
1457 return(0);
1458 else
1459 return(1);
1460 }
1461
1462
1463 /* Table for checking for double-quoted characters. */
1464 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1466 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1467 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1468 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1469 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1470 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1471 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1475 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1476 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1477 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1478 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1479 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1480
1481 /* Table for checking for single-quoted characters. */
1482 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1484 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1485 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1486 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1487 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1488 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1489 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1494 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1495 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1496 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1497 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1498
1499 /* Table for checking for characters between 0x80 and 0x8f. */
1500 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1508 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1516
1517 /* Table for checking for characters between 0x80 and 0x9f. */
1518 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1526 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1534
1535 /* Table for checking for characters between 0x80 and 0xbf. */
1536 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1552
1553 /* Table for checking for characters between 0x90 and 0xbf. */
1554 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1564 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1570
1571 /* Table for checking for characters between 0xa0 and 0xbf. */
1572 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1588
1589 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1590 static const unsigned char *U2[1]={ U_80_BF };
1591
1592 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1593 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1594
1595 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1596 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1597
1598 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1599 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1600
1601 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1602 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1603
1604 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1605 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1606
1607 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1608 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1609
1610 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1611 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1612
1613 /* Table for checking for namestart characters. */
1614 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1618 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1619 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1620 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1621 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1630
1631 /* Table for checking for namechar characters. */
1632 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1635 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1636 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1637 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1638 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1639 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1648
1649 /* Table for checking for whitespace characters. */
1650 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1652 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1666
1667 /* Table for checking for digit characters. */
1668 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1671 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1684
1685 /* Table for checking for xdigit characters. */
1686 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1689 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1690 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1692 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1700 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */