Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1505 - (show annotations) (download) (as text)
Fri Jan 31 15:31:42 2014 UTC (11 years, 1 month ago) by amb
File MIME type: text/x-csrc
File size: 68431 byte(s)
Output HTML4 strict DTD compliant HTML (fix bug with using ''' instead of
''').

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2014 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <inttypes.h>
29 #include <stdint.h>
30 #include <string.h>
31 #include <strings.h>
32 #include <ctype.h>
33
34 #include "xmlparse.h"
35
36
37 /* Parser states */
38
39 #define LEX_EOF 0
40
41 #define LEX_FUNC_TAG_BEGIN 1
42 #define LEX_FUNC_XML_DECL_BEGIN 2
43 #define LEX_FUNC_TAG_POP 3
44 #define LEX_FUNC_TAG_PUSH 4
45 #define LEX_FUNC_XML_DECL_FINISH 5
46 #define LEX_FUNC_TAG_FINISH 6
47 #define LEX_FUNC_ATTR_KEY 7
48 #define LEX_FUNC_ATTR_VAL 8
49
50 #define LEX_STATE_INITIAL 10
51 #define LEX_STATE_BANGTAG 11
52 #define LEX_STATE_COMMENT 12
53 #define LEX_STATE_XML_DECL_START 13
54 #define LEX_STATE_XML_DECL 14
55 #define LEX_STATE_TAG_START 15
56 #define LEX_STATE_TAG 16
57 #define LEX_STATE_ATTR_KEY 17
58 #define LEX_STATE_ATTR_VAL 18
59 #define LEX_STATE_END_TAG1 19
60 #define LEX_STATE_END_TAG2 20
61 #define LEX_STATE_DQUOTED 21
62 #define LEX_STATE_SQUOTED 22
63
64 #define LEX_ERROR_TAG_START 101
65 #define LEX_ERROR_XML_DECL_START 102
66 #define LEX_ERROR_TAG 103
67 #define LEX_ERROR_XML_DECL 104
68 #define LEX_ERROR_ATTR 105
69 #define LEX_ERROR_END_TAG 106
70 #define LEX_ERROR_COMMENT 107
71 #define LEX_ERROR_CLOSE 108
72 #define LEX_ERROR_ATTR_VAL 109
73 #define LEX_ERROR_ENTITY_REF 110
74 #define LEX_ERROR_CHAR_REF 111
75 #define LEX_ERROR_TEXT_OUTSIDE 112
76
77 #define LEX_ERROR_UNEXP_TAG 201
78 #define LEX_ERROR_UNBALANCED 202
79 #define LEX_ERROR_NO_START 203
80 #define LEX_ERROR_UNEXP_ATT 204
81 #define LEX_ERROR_UNEXP_EOF 205
82 #define LEX_ERROR_XML_NOT_FIRST 206
83
84 #define LEX_ERROR_OUT_OF_MEMORY 254
85 #define LEX_ERROR_CALLBACK 255
86
87
88 /* Parsing variables and functions */
89
90 static uint64_t lineno;
91
92 static unsigned char buffer[2][16384];
93 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
94 static int buffer_active=0;
95
96
97 /*++++++++++++++++++++++++++++++++++++++
98 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
99
100 int buffer_refill Return 0 if everything is OK or 1 for EOF.
101
102 int fd The file descriptor to read from.
103 ++++++++++++++++++++++++++++++++++++++*/
104
105 static inline int buffer_refill(int fd)
106 {
107 ssize_t n;
108 size_t m=0;
109
110 m=(buffer_end-buffer[buffer_active])+1;
111
112 if(m>(sizeof(buffer[0])/2)) /* more than half full */
113 {
114 m=0;
115
116 buffer_active=!buffer_active;
117
118 if(buffer_token)
119 {
120 m=(buffer_end-buffer_token)+1;
121
122 memcpy(buffer[buffer_active],buffer_token,m);
123
124 buffer_token=buffer[buffer_active];
125 }
126 }
127
128 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
129
130 buffer_ptr=buffer[buffer_active]+m;
131 buffer_end=buffer[buffer_active]+m+n-1;
132
133 if(n<=0)
134 return(1);
135 else
136 return(0);
137 }
138
139
140 /* Macros to simplify the parser (and make it look more like lex) */
141
142 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
143 #define NEXT(xx) next_state=(xx)
144
145 #define START_TOKEN buffer_token=buffer_ptr
146 #define END_TOKEN buffer_token=NULL
147
148 #define NEXT_CHAR \
149 do{ \
150 if(buffer_ptr==buffer_end) \
151 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
152 else \
153 buffer_ptr++; \
154 } while(0)
155
156
157 /* -------- equivalent flex definition --------
158
159 S [ \t\r]
160 N (\n)
161
162 U1 [\x09\x0A\x0D\x20-\x7F]
163 U2 [\xC2-\xDF][\x80-\xBF]
164 U3a \xE0[\xA0-\xBF][\x80-\xBF]
165 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
166 U3c \xED[\x80-\x9F][\x80-\xBF]
167 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
168 U3 {U3a}|{U3b}|{U3c}|{U3d}
169 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
170 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
171 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
172 U4 {U4a}|{U4b}|{U4c}
173
174 U ({U1}|{U2}|{U3}|{U4})
175
176 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
177
178 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
179 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
180
181 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
182 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
183
184 letter [a-zA-Z]
185 digit [0-9]
186 xdigit [a-fA-F0-9]
187
188 namechar ({letter}|{digit}|[-._:])
189 namestart ({letter}|[_:])
190 name ({namestart}{namechar}*)
191
192 entityref (&{name};)
193 charref (&#({digit}+|x{xdigit}+);)
194
195 -------- equivalent flex definition -------- */
196
197 /* Tables containing character class defintions (advance declaration for data at end of file). */
198 static const unsigned char quotedD[256],quotedS[256];
199 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
200 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
201
202
203 /*++++++++++++++++++++++++++++++++++++++
204 A function to call the callback function with the parameters needed.
205
206 int call_callback Returns 1 if the callback returned with an error.
207
208 const char *name The name of the tag.
209
210 int (*callback)() The callback function.
211
212 int type The type of tag (start and/or end).
213
214 int nattributes The number of attributes collected.
215
216 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
217 ++++++++++++++++++++++++++++++++++++++*/
218
219 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
220 {
221 switch(nattributes)
222 {
223 case 0: return (*callback)(name,type);
224 case 1: return (*callback)(name,type,attributes[0]);
225 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
226 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
227 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
228 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
229 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
230 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
231 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
232 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
233 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
234 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
235 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
236 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
237 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
238 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
239 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
240
241 default:
242 fprintf(stderr,"XML Parser: Error on line %"PRIu64": too many attributes for tag '%s' source code needs changing.\n",lineno,name);
243 exit(1);
244 }
245 }
246
247
248 /*++++++++++++++++++++++++++++++++++++++
249 Parse the XML and call the functions for each tag as seen.
250
251 int ParseXML Returns 0 if OK or something else in case of an error.
252
253 int fd The file descriptor of the file to parse.
254
255 xmltag **tags The array of pointers to tags for the top level.
256
257 int options A list of XML Parser options OR-ed together.
258 ++++++++++++++++++++++++++++++++++++++*/
259
260 int ParseXML(int fd,xmltag **tags,int options)
261 {
262 int i;
263 int state,next_state,after_attr;
264 unsigned char saved_buffer_ptr=0;
265 const unsigned char *quoted;
266
267 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
268 int attribute=0;
269
270 int stackdepth=0,stackused=0;
271 xmltag ***tags_stack=NULL;
272 xmltag **tag_stack=NULL;
273 xmltag *tag=NULL;
274
275 /* The actual parser. */
276
277 lineno=1;
278
279 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
280 buffer_token=NULL;
281
282 buffer_refill(fd);
283
284 BEGIN(LEX_STATE_INITIAL);
285
286 new_state:
287
288 switch(state)
289 {
290 /* ================ Parsing states ================ */
291
292
293 /* -------- equivalent flex definition --------
294
295 <INITIAL>"<!" { BEGIN(BANGTAG); }
296 <INITIAL>"</" { BEGIN(END_TAG1); }
297 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
298 <INITIAL>"<" { BEGIN(TAG_START); }
299
300 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
301
302 <INITIAL>{N} { lineno++; }
303 <INITIAL>{S}+ { }
304 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
305
306 -------- equivalent flex definition -------- */
307
308 case LEX_STATE_INITIAL:
309
310 while(1)
311 {
312 while(whitespace[(int)*buffer_ptr])
313 NEXT_CHAR;
314
315 if(*buffer_ptr=='\n')
316 {
317 NEXT_CHAR;
318
319 lineno++;
320 }
321 else if(*buffer_ptr=='<')
322 {
323 NEXT_CHAR;
324
325 if(*buffer_ptr=='/')
326 {
327 NEXT_CHAR;
328 BEGIN(LEX_STATE_END_TAG1);
329 }
330 else if(*buffer_ptr=='!')
331 {
332 NEXT_CHAR;
333 BEGIN(LEX_STATE_BANGTAG);
334 }
335 else if(*buffer_ptr=='?')
336 {
337 NEXT_CHAR;
338 BEGIN(LEX_STATE_XML_DECL_START);
339 }
340 else
341 BEGIN(LEX_STATE_TAG_START);
342 }
343 else if(*buffer_ptr=='>')
344 BEGIN(LEX_ERROR_CLOSE);
345 else
346 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
347 }
348
349 break;
350
351 /* -------- equivalent flex definition --------
352
353 <BANGTAG>"--" { BEGIN(COMMENT); }
354 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
355 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
356
357 -------- equivalent flex definition -------- */
358
359 case LEX_STATE_BANGTAG:
360
361 if(*buffer_ptr!='-')
362 BEGIN(LEX_ERROR_TAG_START);
363
364 NEXT_CHAR;
365
366 if(*buffer_ptr!='-')
367 BEGIN(LEX_ERROR_TAG_START);
368
369 NEXT_CHAR;
370 BEGIN(LEX_STATE_COMMENT);
371
372 break;
373
374 /* -------- equivalent flex definition --------
375
376 <COMMENT>"-->" { BEGIN(INITIAL); }
377 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
378 <COMMENT>"-" { }
379 <COMMENT>{N} { lineno++; }
380 <COMMENT>[^-\n]+ { }
381
382 -------- equivalent flex definition -------- */
383
384 case LEX_STATE_COMMENT:
385
386 while(1)
387 {
388 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
389 NEXT_CHAR;
390
391 if(*buffer_ptr=='-')
392 {
393 NEXT_CHAR;
394
395 if(*buffer_ptr!='-')
396 continue;
397
398 NEXT_CHAR;
399 if(*buffer_ptr=='>')
400 {
401 NEXT_CHAR;
402 BEGIN(LEX_STATE_INITIAL);
403 }
404
405 BEGIN(LEX_ERROR_COMMENT);
406 }
407 else /* if(*buffer_ptr=='\n') */
408 {
409 NEXT_CHAR;
410
411 lineno++;
412 }
413 }
414
415 break;
416
417 /* -------- equivalent flex definition --------
418
419 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
420 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
421 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
422
423 -------- equivalent flex definition -------- */
424
425 case LEX_STATE_XML_DECL_START:
426
427 START_TOKEN;
428
429 if(*buffer_ptr=='x')
430 {
431 NEXT_CHAR;
432 if(*buffer_ptr=='m')
433 {
434 NEXT_CHAR;
435 if(*buffer_ptr=='l')
436 {
437 NEXT_CHAR;
438
439 saved_buffer_ptr=*buffer_ptr;
440 *buffer_ptr=0;
441
442 NEXT(LEX_STATE_XML_DECL);
443 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
444 }
445 }
446 }
447
448 BEGIN(LEX_ERROR_XML_DECL_START);
449
450 /* -------- equivalent flex definition --------
451
452 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
453 <XML_DECL>{S}+ { }
454 <XML_DECL>{N} { lineno++; }
455 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
456 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
457
458 -------- equivalent flex definition -------- */
459
460 case LEX_STATE_XML_DECL:
461
462 while(1)
463 {
464 while(whitespace[(int)*buffer_ptr])
465 NEXT_CHAR;
466
467 if(namestart[(int)*buffer_ptr])
468 {
469 START_TOKEN;
470
471 NEXT_CHAR;
472 while(namechar[(int)*buffer_ptr])
473 NEXT_CHAR;
474
475 saved_buffer_ptr=*buffer_ptr;
476 *buffer_ptr=0;
477
478 after_attr=LEX_STATE_XML_DECL;
479 NEXT(LEX_STATE_ATTR_KEY);
480 BEGIN(LEX_FUNC_ATTR_KEY);
481 }
482 else if(*buffer_ptr=='?')
483 {
484 NEXT_CHAR;
485 if(*buffer_ptr=='>')
486 {
487 NEXT_CHAR;
488 NEXT(LEX_STATE_INITIAL);
489 BEGIN(LEX_FUNC_XML_DECL_FINISH);
490 }
491
492 BEGIN(LEX_ERROR_XML_DECL);
493 }
494 else if(*buffer_ptr=='\n')
495 {
496 NEXT_CHAR;
497 lineno++;
498 }
499 else
500 BEGIN(LEX_ERROR_XML_DECL);
501 }
502
503 break;
504
505 /* -------- equivalent flex definition --------
506
507 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
508 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
509 <TAG_START>. { return(LEX_ERROR_TAG_START); }
510
511 -------- equivalent flex definition -------- */
512
513 case LEX_STATE_TAG_START:
514
515 if(namestart[(int)*buffer_ptr])
516 {
517 START_TOKEN;
518
519 NEXT_CHAR;
520 while(namechar[(int)*buffer_ptr])
521 NEXT_CHAR;
522
523 saved_buffer_ptr=*buffer_ptr;
524 *buffer_ptr=0;
525
526 NEXT(LEX_STATE_TAG);
527 BEGIN(LEX_FUNC_TAG_BEGIN);
528 }
529
530 BEGIN(LEX_ERROR_TAG_START);
531
532 /* -------- equivalent flex definition --------
533
534 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
535 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
536 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
537
538 -------- equivalent flex definition -------- */
539
540 case LEX_STATE_END_TAG1:
541
542 if(namestart[(int)*buffer_ptr])
543 {
544 START_TOKEN;
545
546 NEXT_CHAR;
547 while(namechar[(int)*buffer_ptr])
548 NEXT_CHAR;
549
550 saved_buffer_ptr=*buffer_ptr;
551 *buffer_ptr=0;
552
553 NEXT(LEX_STATE_END_TAG2);
554 BEGIN(LEX_FUNC_TAG_POP);
555 }
556
557 BEGIN(LEX_ERROR_END_TAG);
558
559 /* -------- equivalent flex definition --------
560
561 <END_TAG2>">" { BEGIN(INITIAL); }
562 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
563 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
564
565 -------- equivalent flex definition -------- */
566
567 case LEX_STATE_END_TAG2:
568
569 if(*buffer_ptr=='>')
570 {
571 NEXT_CHAR;
572
573 BEGIN(LEX_STATE_INITIAL);
574 }
575
576 BEGIN(LEX_ERROR_END_TAG);
577
578 /* -------- equivalent flex definition --------
579
580 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
581 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
582 <TAG>{S}+ { }
583 <TAG>{N} { lineno++; }
584 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
585 <TAG>. { return(LEX_ERROR_TAG); }
586
587 -------- equivalent flex definition -------- */
588
589 case LEX_STATE_TAG:
590
591 while(1)
592 {
593 while(whitespace[(int)*buffer_ptr])
594 NEXT_CHAR;
595
596 if(namestart[(int)*buffer_ptr])
597 {
598 START_TOKEN;
599
600 NEXT_CHAR;
601 while(namechar[(int)*buffer_ptr])
602 NEXT_CHAR;
603
604 saved_buffer_ptr=*buffer_ptr;
605 *buffer_ptr=0;
606
607 after_attr=LEX_STATE_TAG;
608 NEXT(LEX_STATE_ATTR_KEY);
609 BEGIN(LEX_FUNC_ATTR_KEY);
610 }
611 else if(*buffer_ptr=='/')
612 {
613 NEXT_CHAR;
614 if(*buffer_ptr=='>')
615 {
616 NEXT_CHAR;
617 NEXT(LEX_STATE_INITIAL);
618 BEGIN(LEX_FUNC_TAG_FINISH);
619 }
620
621 BEGIN(LEX_ERROR_TAG);
622 }
623 else if(*buffer_ptr=='>')
624 {
625 NEXT_CHAR;
626 NEXT(LEX_STATE_INITIAL);
627 BEGIN(LEX_FUNC_TAG_PUSH);
628 }
629 else if(*buffer_ptr=='\n')
630 {
631 NEXT_CHAR;
632 lineno++;
633 }
634 else
635 BEGIN(LEX_ERROR_TAG);
636 }
637
638 break;
639
640 /* -------- equivalent flex definition --------
641
642 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
643 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
644 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
645
646 -------- equivalent flex definition -------- */
647
648 case LEX_STATE_ATTR_KEY:
649
650 if(*buffer_ptr=='=')
651 {
652 NEXT_CHAR;
653 BEGIN(LEX_STATE_ATTR_VAL);
654 }
655
656 BEGIN(LEX_ERROR_ATTR);
657
658 /* -------- equivalent flex definition --------
659
660 <ATTR_VAL>\" { BEGIN(DQUOTED); }
661 <ATTR_VAL>\' { BEGIN(SQUOTED); }
662 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
663 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
664
665 -------- equivalent flex definition -------- */
666
667 case LEX_STATE_ATTR_VAL:
668
669 if(*buffer_ptr=='"')
670 {
671 NEXT_CHAR;
672 BEGIN(LEX_STATE_DQUOTED);
673 }
674 else if(*buffer_ptr=='\'')
675 {
676 NEXT_CHAR;
677 BEGIN(LEX_STATE_SQUOTED);
678 }
679
680 BEGIN(LEX_ERROR_ATTR);
681
682 /* -------- equivalent flex definition --------
683
684 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
685 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
686 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
687 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
688 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
689 <DQUOTED>{UquotedD} { }
690 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
691 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
692
693 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
694 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
695 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
696 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
697 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
698 <SQUOTED>{UquotedS} { append_string(yytext); }
699 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
700 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
701
702 -------- equivalent flex definition -------- */
703
704 case LEX_STATE_DQUOTED:
705 case LEX_STATE_SQUOTED:
706
707 if(state==LEX_STATE_DQUOTED)
708 quoted=quotedD;
709 else
710 quoted=quotedS;
711
712 START_TOKEN;
713
714 while(1)
715 {
716 switch(quoted[(int)*buffer_ptr])
717 {
718 case 10: /* U1 - used by all tag keys and many values */
719 do
720 {
721 NEXT_CHAR;
722 }
723 while(quoted[(int)*buffer_ptr]==10);
724 break;
725
726 case 20: /* U2 */
727 NEXT_CHAR;
728 if(!U2[0][(int)*buffer_ptr])
729 BEGIN(LEX_ERROR_ATTR_VAL);
730 NEXT_CHAR;
731 break;
732
733 case 31: /* U3a */
734 NEXT_CHAR;
735 if(!U3a[0][(int)*buffer_ptr])
736 BEGIN(LEX_ERROR_ATTR_VAL);
737 NEXT_CHAR;
738 if(!U3a[1][(int)*buffer_ptr])
739 BEGIN(LEX_ERROR_ATTR_VAL);
740 NEXT_CHAR;
741 break;
742
743 case 32: /* U3b */
744 NEXT_CHAR;
745 if(!U3b[0][(int)*buffer_ptr])
746 BEGIN(LEX_ERROR_ATTR_VAL);
747 NEXT_CHAR;
748 if(!U3b[1][(int)*buffer_ptr])
749 BEGIN(LEX_ERROR_ATTR_VAL);
750 NEXT_CHAR;
751 break;
752
753 case 33: /* U3c */
754 NEXT_CHAR;
755 if(!U3c[0][(int)*buffer_ptr])
756 BEGIN(LEX_ERROR_ATTR_VAL);
757 NEXT_CHAR;
758 if(!U3c[1][(int)*buffer_ptr])
759 BEGIN(LEX_ERROR_ATTR_VAL);
760 NEXT_CHAR;
761 break;
762
763 case 34: /* U3d */
764 NEXT_CHAR;
765 if(!U3d[0][(int)*buffer_ptr])
766 BEGIN(LEX_ERROR_ATTR_VAL);
767 NEXT_CHAR;
768 if(!U3d[1][(int)*buffer_ptr])
769 BEGIN(LEX_ERROR_ATTR_VAL);
770 NEXT_CHAR;
771 break;
772
773 case 41: /* U4a */
774 NEXT_CHAR;
775 if(!U4a[0][(int)*buffer_ptr])
776 BEGIN(LEX_ERROR_ATTR_VAL);
777 NEXT_CHAR;
778 if(!U4a[1][(int)*buffer_ptr])
779 BEGIN(LEX_ERROR_ATTR_VAL);
780 NEXT_CHAR;
781 if(!U4a[2][(int)*buffer_ptr])
782 BEGIN(LEX_ERROR_ATTR_VAL);
783 NEXT_CHAR;
784 break;
785
786 case 42: /* U4b */
787 NEXT_CHAR;
788 if(!U4b[0][(int)*buffer_ptr])
789 BEGIN(LEX_ERROR_ATTR_VAL);
790 NEXT_CHAR;
791 if(!U4b[1][(int)*buffer_ptr])
792 BEGIN(LEX_ERROR_ATTR_VAL);
793 NEXT_CHAR;
794 if(!U4b[2][(int)*buffer_ptr])
795 BEGIN(LEX_ERROR_ATTR_VAL);
796 NEXT_CHAR;
797 break;
798
799 case 43: /* U4c */
800 NEXT_CHAR;
801 if(!U4c[0][(int)*buffer_ptr])
802 BEGIN(LEX_ERROR_ATTR_VAL);
803 NEXT_CHAR;
804 if(!U4c[1][(int)*buffer_ptr])
805 BEGIN(LEX_ERROR_ATTR_VAL);
806 NEXT_CHAR;
807 if(!U4c[2][(int)*buffer_ptr])
808 BEGIN(LEX_ERROR_ATTR_VAL);
809 NEXT_CHAR;
810 break;
811
812 case 50: /* entityref or charref */
813 NEXT_CHAR;
814
815 if(*buffer_ptr=='#') /* charref */
816 {
817 int charref_len=3;
818
819 NEXT_CHAR;
820 if(digit[(int)*buffer_ptr]) /* decimal */
821 {
822 NEXT_CHAR;
823 charref_len++;
824
825 while(digit[(int)*buffer_ptr])
826 {
827 NEXT_CHAR;
828 charref_len++;
829 }
830
831 if(*buffer_ptr!=';')
832 BEGIN(LEX_ERROR_ATTR_VAL);
833 }
834 else if(*buffer_ptr=='x') /* hex */
835 {
836 NEXT_CHAR;
837 charref_len++;
838
839 while(xdigit[(int)*buffer_ptr])
840 {
841 NEXT_CHAR;
842 charref_len++;
843 }
844
845 if(*buffer_ptr!=';')
846 BEGIN(LEX_ERROR_ATTR_VAL);
847 }
848 else /* other */
849 BEGIN(LEX_ERROR_ATTR_VAL);
850
851 NEXT_CHAR;
852
853 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
854 {
855 const char *str;
856
857 saved_buffer_ptr=*buffer_ptr;
858 *buffer_ptr=0;
859
860 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
861
862 if(!str)
863 {
864 buffer_ptr-=charref_len;
865 BEGIN(LEX_ERROR_CHAR_REF);
866 }
867
868 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
869 memcpy(buffer_ptr-strlen(str),str,strlen(str));
870
871 *buffer_ptr=saved_buffer_ptr;
872 }
873 }
874 else if(namestart[(int)*buffer_ptr]) /* entityref */
875 {
876 int entityref_len=3;
877
878 NEXT_CHAR;
879 while(namechar[(int)*buffer_ptr])
880 {
881 NEXT_CHAR;
882 entityref_len++;
883 }
884
885 if(*buffer_ptr!=';')
886 BEGIN(LEX_ERROR_ATTR_VAL);
887
888 NEXT_CHAR;
889
890 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
891 {
892 const char *str;
893
894 saved_buffer_ptr=*buffer_ptr;
895 *buffer_ptr=0;
896
897 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
898
899 if(!str)
900 {
901 buffer_ptr-=entityref_len;
902 BEGIN(LEX_ERROR_ENTITY_REF);
903 }
904
905 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
906 memcpy(buffer_ptr-strlen(str),str,strlen(str));
907
908 *buffer_ptr=saved_buffer_ptr;
909 }
910 }
911 else /* other */
912 BEGIN(LEX_ERROR_ATTR_VAL);
913
914 break;
915
916 case 99: /* quote */
917 *buffer_ptr=0;
918 NEXT_CHAR;
919
920 NEXT(after_attr);
921 BEGIN(LEX_FUNC_ATTR_VAL);
922
923 default: /* other */
924 BEGIN(LEX_ERROR_ATTR_VAL);
925 }
926 }
927
928 break;
929
930
931 /* ================ Functional states ================ */
932
933
934 /* The start of a tag for an XML declaration */
935
936 case LEX_FUNC_XML_DECL_BEGIN:
937
938 if(tag_stack)
939 BEGIN(LEX_ERROR_XML_NOT_FIRST);
940
941 /* The start of a tag for an element */
942
943 case LEX_FUNC_TAG_BEGIN:
944
945 tag=NULL;
946
947 for(i=0;tags[i];i++)
948 if(buffer_token[0]==tags[i]->name[0] || tolower(buffer_token[0])==tags[i]->name[0])
949 if(!strcasecmp((char*)buffer_token+1,tags[i]->name+1))
950 {
951 tag=tags[i];
952
953 for(i=0;i<tag->nattributes;i++)
954 attributes[i]=NULL;
955
956 break;
957 }
958
959 if(tag==NULL)
960 BEGIN(LEX_ERROR_UNEXP_TAG);
961
962 END_TOKEN;
963
964 *buffer_ptr=saved_buffer_ptr;
965 BEGIN(next_state);
966
967 /* The end of the start-tag for an element */
968
969 case LEX_FUNC_TAG_PUSH:
970
971 if(stackused==stackdepth)
972 {
973 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
974 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
975 }
976
977 tag_stack [stackused]=tag;
978 tags_stack[stackused]=tags;
979 stackused++;
980
981 if(tag->callback)
982 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
983 BEGIN(LEX_ERROR_CALLBACK);
984
985 tags=tag->subtags;
986
987 BEGIN(next_state);
988
989 /* The end of the empty-element-tag for an XML declaration */
990
991 case LEX_FUNC_XML_DECL_FINISH:
992
993 /* The end of the empty-element-tag for an element */
994
995 case LEX_FUNC_TAG_FINISH:
996
997 if(tag->callback)
998 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
999 BEGIN(LEX_ERROR_CALLBACK);
1000
1001 if(stackused>0)
1002 tag=tag_stack[stackused-1];
1003 else
1004 tag=NULL;
1005
1006 BEGIN(next_state);
1007
1008 /* The end of the end-tag for an element */
1009
1010 case LEX_FUNC_TAG_POP:
1011
1012 stackused--;
1013 tags=tags_stack[stackused];
1014 tag =tag_stack [stackused];
1015
1016 if(strcmp((char*)buffer_token,tag->name))
1017 BEGIN(LEX_ERROR_UNBALANCED);
1018
1019 if(stackused<0)
1020 BEGIN(LEX_ERROR_NO_START);
1021
1022 for(i=0;i<tag->nattributes;i++)
1023 attributes[i]=NULL;
1024
1025 if(tag->callback)
1026 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1027 BEGIN(LEX_ERROR_CALLBACK);
1028
1029 if(stackused>0)
1030 tag=tag_stack[stackused-1];
1031 else
1032 tag=NULL;
1033
1034 END_TOKEN;
1035
1036 *buffer_ptr=saved_buffer_ptr;
1037 BEGIN(next_state);
1038
1039 /* An attribute key */
1040
1041 case LEX_FUNC_ATTR_KEY:
1042
1043 attribute=-1;
1044
1045 for(i=0;i<tag->nattributes;i++)
1046 if(buffer_token[0]==tag->attributes[i][0] || tolower(buffer_token[0])==tag->attributes[i][0])
1047 if(!strcasecmp((char*)buffer_token+1,tag->attributes[i]+1))
1048 {
1049 attribute=i;
1050
1051 break;
1052 }
1053
1054 if(attribute==-1)
1055 {
1056 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1057 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1058 BEGIN(LEX_ERROR_UNEXP_ATT);
1059 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1060 fprintf(stderr,"XML Parser: Warning on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1061 }
1062
1063 END_TOKEN;
1064
1065 *buffer_ptr=saved_buffer_ptr;
1066 BEGIN(next_state);
1067
1068 /* An attribute value */
1069
1070 case LEX_FUNC_ATTR_VAL:
1071
1072 if(tag->callback && attribute!=-1)
1073 attributes[attribute]=buffer_token;
1074
1075 END_TOKEN;
1076
1077 BEGIN(next_state);
1078
1079 /* End of file */
1080
1081 case LEX_EOF:
1082
1083 if(tag)
1084 BEGIN(LEX_ERROR_UNEXP_EOF);
1085
1086 break;
1087
1088
1089 /* ================ Error states ================ */
1090
1091
1092 case LEX_ERROR_TAG_START:
1093 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '<' seen not at start of tag.\n",lineno);
1094 break;
1095
1096 case LEX_ERROR_XML_DECL_START:
1097 fprintf(stderr,"XML Parser: Error on line %"PRIu64": characters '<?' seen not at start of XML declaration.\n",lineno);
1098 break;
1099
1100 case LEX_ERROR_TAG:
1101 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1102 break;
1103
1104 case LEX_ERROR_XML_DECL:
1105 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1106 break;
1107
1108 case LEX_ERROR_ATTR:
1109 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid attribute definition seen in tag.\n",lineno);
1110 break;
1111
1112 case LEX_ERROR_END_TAG:
1113 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen in end-tag.\n",lineno);
1114 break;
1115
1116 case LEX_ERROR_COMMENT:
1117 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid comment seen.\n",lineno);
1118 break;
1119
1120 case LEX_ERROR_CLOSE:
1121 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '>' seen not at end of tag.\n",lineno);
1122 break;
1123
1124 case LEX_ERROR_ATTR_VAL:
1125 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1126 break;
1127
1128 case LEX_ERROR_ENTITY_REF:
1129 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1130 break;
1131
1132 case LEX_ERROR_CHAR_REF:
1133 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1134 break;
1135
1136 case LEX_ERROR_TEXT_OUTSIDE:
1137 fprintf(stderr,"XML Parser: Error on line %"PRIu64": non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1138 break;
1139
1140 case LEX_ERROR_UNEXP_TAG:
1141 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected tag '%s'.\n",lineno,buffer_token);
1142 break;
1143
1144 case LEX_ERROR_UNBALANCED:
1145 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1146 break;
1147
1148 case LEX_ERROR_NO_START:
1149 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1150 break;
1151
1152 case LEX_ERROR_UNEXP_ATT:
1153 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1154 break;
1155
1156 case LEX_ERROR_UNEXP_EOF:
1157 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1158 break;
1159
1160 case LEX_ERROR_XML_NOT_FIRST:
1161 fprintf(stderr,"XML Parser: Error on line %"PRIu64": XML declaration '<?xml...>' not before all other tags.\n",lineno);
1162 break;
1163 }
1164
1165 /* Delete the tagdata */
1166
1167 if(stackdepth)
1168 {
1169 free(tag_stack);
1170 free(tags_stack);
1171 }
1172
1173 return(state);
1174 }
1175
1176
1177 /*++++++++++++++++++++++++++++++++++++++
1178 Return the current parser line number.
1179
1180 uint64_t ParseXML_LineNumber Returns the line number.
1181 ++++++++++++++++++++++++++++++++++++++*/
1182
1183 uint64_t ParseXML_LineNumber(void)
1184 {
1185 return(lineno);
1186 }
1187
1188
1189 /*++++++++++++++++++++++++++++++++++++++
1190 Convert an XML entity reference into an ASCII string.
1191
1192 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1193
1194 const char *string The entity reference string.
1195 ++++++++++++++++++++++++++++++++++++++*/
1196
1197 char *ParseXML_Decode_Entity_Ref(const char *string)
1198 {
1199 if(!strcmp(string,"&amp;")) return("&");
1200 if(!strcmp(string,"&lt;")) return("<");
1201 if(!strcmp(string,"&gt;")) return(">");
1202 if(!strcmp(string,"&apos;")) return("'");
1203 if(!strcmp(string,"&quot;")) return("\"");
1204 return(NULL);
1205 }
1206
1207
1208 /*++++++++++++++++++++++++++++++++++++++
1209 Convert an XML character reference into an ASCII string.
1210
1211 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1212
1213 const char *string The character reference string.
1214 ++++++++++++++++++++++++++++++++++++++*/
1215
1216 char *ParseXML_Decode_Char_Ref(const char *string)
1217 {
1218 static char result[5]="";
1219 long int unicode;
1220
1221 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1222 else unicode=strtol(string+2,NULL,10);
1223
1224 if(unicode<0x80)
1225 {
1226 /* 0000 0000-0000 007F => 0xxxxxxx */
1227 result[0]=unicode;
1228 result[1]=0;
1229 }
1230 else if(unicode<0x07FF)
1231 {
1232 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1233 result[0]=0xC0+((unicode&0x07C0)>>6);
1234 result[1]=0x80+ (unicode&0x003F);
1235 result[2]=0;
1236 }
1237 else if(unicode<0xFFFF)
1238 {
1239 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1240 result[0]=0xE0+((unicode&0xF000)>>12);
1241 result[1]=0x80+((unicode&0x0FC0)>>6);
1242 result[2]=0x80+ (unicode&0x003F);
1243 result[3]=0;
1244 }
1245 else if(unicode<0x1FFFFF)
1246 {
1247 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1248 result[0]=0xF0+((unicode&0x1C0000)>>18);
1249 result[1]=0x80+((unicode&0x03F000)>>12);
1250 result[2]=0x80+((unicode&0x000FC0)>>6);
1251 result[3]=0x80+ (unicode&0x00003F);
1252 result[4]=0;
1253 }
1254 else
1255 {
1256 result[0]=0xFF;
1257 result[1]=0xFD;
1258 result[2]=0;
1259 }
1260
1261 return(result);
1262 }
1263
1264
1265 /*++++++++++++++++++++++++++++++++++++++
1266 Convert a string into something that is safe to output in an XML file.
1267
1268 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1269
1270 const char *string The string to convert.
1271 ++++++++++++++++++++++++++++++++++++++*/
1272
1273 char *ParseXML_Encode_Safe_XML(const char *string)
1274 {
1275 static const char hexstring[17]="0123456789ABCDEF";
1276 int i=0,j=0,len;
1277 char *result;
1278
1279 for(i=0;string[i];i++)
1280 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1281 break;
1282
1283 if(!string[i])
1284 return((char*)string);
1285
1286 len=i+256-6;
1287
1288 result=(char*)malloc(len+7);
1289 strncpy(result,string,j=i);
1290
1291 do
1292 {
1293 for(;j<len && string[i];i++)
1294 if(string[i]=='\'')
1295 {
1296 /* XML, HTML5 and XHTML1 allow &apos; but HTML4 doesn't. */
1297 result[j++]='&';
1298 result[j++]='#';
1299 result[j++]='3';
1300 result[j++]='9';
1301 result[j++]=';';
1302 }
1303 else if(string[i]=='&')
1304 {
1305 result[j++]='&';
1306 result[j++]='a';
1307 result[j++]='m';
1308 result[j++]='p';
1309 result[j++]=';';
1310 }
1311 else if(string[i]=='"')
1312 {
1313 result[j++]='&';
1314 result[j++]='q';
1315 result[j++]='u';
1316 result[j++]='o';
1317 result[j++]='t';
1318 result[j++]=';';
1319 }
1320 else if(string[i]=='<')
1321 {
1322 result[j++]='&';
1323 result[j++]='l';
1324 result[j++]='t';
1325 result[j++]=';';
1326 }
1327 else if(string[i]=='>')
1328 {
1329 result[j++]='&';
1330 result[j++]='g';
1331 result[j++]='t';
1332 result[j++]=';';
1333 }
1334 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1335 result[j++]=string[i];
1336 else
1337 {
1338 unsigned int unicode;
1339
1340 /* Decode the UTF-8 */
1341
1342 if((string[i]&0x80)==0)
1343 {
1344 /* 0000 0000-0000 007F => 0xxxxxxx */
1345 unicode=string[i];
1346 }
1347 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1348 {
1349 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1350 unicode =(string[i++]&0x1F)<<6;
1351 unicode|= string[i ]&0x3F;
1352 }
1353 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1354 {
1355 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1356 unicode =(string[i++]&0x0F)<<12;
1357 unicode|=(string[i++]&0x3F)<<6;
1358 unicode|= string[i ]&0x3F;
1359 }
1360 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1361 {
1362 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1363 unicode =(string[i++]&0x07)<<18;
1364 unicode|=(string[i++]&0x3F)<<12;
1365 unicode|=(string[i++]&0x3F)<<6;
1366 unicode|= string[i ]&0x3F;
1367 }
1368 else
1369 unicode=0xFFFD;
1370
1371 /* Output the character entity */
1372
1373 result[j++]='&';
1374 result[j++]='#';
1375 result[j++]='x';
1376
1377 if(unicode&0x00FF0000)
1378 {
1379 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1380 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1381 }
1382 if(unicode&0x00FFFF00)
1383 {
1384 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1385 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1386 }
1387 result[j++]=hexstring[(unicode&0xf0)>>4];
1388 result[j++]=hexstring[(unicode&0x0f) ];
1389
1390 result[j++]=';';
1391 }
1392
1393 if(string[i]) /* Not finished */
1394 {
1395 len+=256;
1396 result=(char*)realloc((void*)result,len+7);
1397 }
1398 }
1399 while(string[i]);
1400
1401 result[j]=0;
1402
1403 return(result);
1404 }
1405
1406
1407 /*++++++++++++++++++++++++++++++++++++++
1408 Check that a string really is an integer.
1409
1410 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1411
1412 const char *string The string to be parsed.
1413 ++++++++++++++++++++++++++++++++++++++*/
1414
1415 int ParseXML_IsInteger(const char *string)
1416 {
1417 const unsigned char *p=(unsigned char*)string;
1418
1419 if(*p=='-' || *p=='+')
1420 p++;
1421
1422 while(digit[(int)*p])
1423 p++;
1424
1425 if(*p)
1426 return(0);
1427 else
1428 return(1);
1429 }
1430
1431
1432 /*++++++++++++++++++++++++++++++++++++++
1433 Check that a string really is a floating point number.
1434
1435 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1436
1437 const char *string The string to be parsed.
1438 ++++++++++++++++++++++++++++++++++++++*/
1439
1440 int ParseXML_IsFloating(const char *string)
1441 {
1442 const unsigned char *p=(unsigned char*)string;
1443
1444 if(*p=='-' || *p=='+')
1445 p++;
1446
1447 while(digit[(int)*p] || *p=='.')
1448 p++;
1449
1450 if(*p=='e' || *p=='E')
1451 {
1452 p++;
1453
1454 if(*p=='-' || *p=='+')
1455 p++;
1456
1457 while(digit[*p])
1458 p++;
1459 }
1460
1461 if(*p)
1462 return(0);
1463 else
1464 return(1);
1465 }
1466
1467
1468 /* Table for checking for double-quoted characters. */
1469 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1471 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1472 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1473 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1474 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1475 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1476 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1481 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1482 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1483 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1484 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1485
1486 /* Table for checking for single-quoted characters. */
1487 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1489 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1490 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1491 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1492 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1493 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1494 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1499 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1500 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1501 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1502 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1503
1504 /* Table for checking for characters between 0x80 and 0x8f. */
1505 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1513 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1521
1522 /* Table for checking for characters between 0x80 and 0x9f. */
1523 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1539
1540 /* Table for checking for characters between 0x80 and 0xbf. */
1541 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1551 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1552 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1557
1558 /* Table for checking for characters between 0x90 and 0xbf. */
1559 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1568 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1569 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1570 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1575
1576 /* Table for checking for characters between 0xa0 and 0xbf. */
1577 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1593
1594 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1595 static const unsigned char *U2[1]={ U_80_BF };
1596
1597 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1598 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1599
1600 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1601 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1602
1603 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1604 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1605
1606 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1607 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1608
1609 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1610 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1611
1612 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1613 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1614
1615 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1616 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1617
1618 /* Table for checking for namestart characters. */
1619 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1623 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1625 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1635
1636 /* Table for checking for namechar characters. */
1637 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1640 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1641 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1643 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1653
1654 /* Table for checking for whitespace characters. */
1655 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1657 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1671
1672 /* Table for checking for digit characters. */
1673 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1676 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1689
1690 /* Table for checking for xdigit characters. */
1691 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1694 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1695 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1697 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1700 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */