Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /branches/MSVC/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1669 - (show annotations) (download) (as text)
Tue May 19 18:46:02 2015 UTC (9 years, 10 months ago) by amb
File MIME type: text/x-csrc
File size: 68712 byte(s)
Add some explicit casts for some assignments between different integer
types [patch from Oliver Eichler for compiling with Microsoft C].

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2015 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26
27 #if defined(_MSC_VER)
28 #include <io.h>
29 #define read(fd,address,length) _read(fd,address,(unsigned int)(length))
30 #else
31 #include <unistd.h>
32 #endif
33
34 #include <stdlib.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <string.h>
38 #if !defined(_MSC_VER)
39 #include <strings.h>
40 #else
41 #define strcasecmp _stricmp
42 #endif
43 #include <ctype.h>
44
45 #include "xmlparse.h"
46
47
48 /* Parser states */
49
50 #define LEX_EOF 0
51
52 #define LEX_FUNC_TAG_BEGIN 1
53 #define LEX_FUNC_XML_DECL_BEGIN 2
54 #define LEX_FUNC_TAG_POP 3
55 #define LEX_FUNC_TAG_PUSH 4
56 #define LEX_FUNC_XML_DECL_FINISH 5
57 #define LEX_FUNC_TAG_FINISH 6
58 #define LEX_FUNC_ATTR_KEY 7
59 #define LEX_FUNC_ATTR_VAL 8
60
61 #define LEX_STATE_INITIAL 10
62 #define LEX_STATE_BANGTAG 11
63 #define LEX_STATE_COMMENT 12
64 #define LEX_STATE_XML_DECL_START 13
65 #define LEX_STATE_XML_DECL 14
66 #define LEX_STATE_TAG_START 15
67 #define LEX_STATE_TAG 16
68 #define LEX_STATE_ATTR_KEY 17
69 #define LEX_STATE_ATTR_VAL 18
70 #define LEX_STATE_END_TAG1 19
71 #define LEX_STATE_END_TAG2 20
72 #define LEX_STATE_DQUOTED 21
73 #define LEX_STATE_SQUOTED 22
74
75 #define LEX_ERROR_TAG_START 101
76 #define LEX_ERROR_XML_DECL_START 102
77 #define LEX_ERROR_TAG 103
78 #define LEX_ERROR_XML_DECL 104
79 #define LEX_ERROR_ATTR 105
80 #define LEX_ERROR_END_TAG 106
81 #define LEX_ERROR_COMMENT 107
82 #define LEX_ERROR_CLOSE 108
83 #define LEX_ERROR_ATTR_VAL 109
84 #define LEX_ERROR_ENTITY_REF 110
85 #define LEX_ERROR_CHAR_REF 111
86 #define LEX_ERROR_TEXT_OUTSIDE 112
87
88 #define LEX_ERROR_UNEXP_TAG 201
89 #define LEX_ERROR_UNBALANCED 202
90 #define LEX_ERROR_NO_START 203
91 #define LEX_ERROR_UNEXP_ATT 204
92 #define LEX_ERROR_UNEXP_EOF 205
93 #define LEX_ERROR_XML_NOT_FIRST 206
94
95 #define LEX_ERROR_OUT_OF_MEMORY 254
96 #define LEX_ERROR_CALLBACK 255
97
98
99 /* Parsing variables and functions */
100
101 static uint64_t lineno;
102
103 static unsigned char buffer[2][16384];
104 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
105 static int buffer_active=0;
106
107
108 /*++++++++++++++++++++++++++++++++++++++
109 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
110
111 int buffer_refill Return 0 if everything is OK or 1 for EOF.
112
113 int fd The file descriptor to read from.
114 ++++++++++++++++++++++++++++++++++++++*/
115
116 static inline int buffer_refill(int fd)
117 {
118 ssize_t n;
119 size_t m=0;
120
121 m=(buffer_end-buffer[buffer_active])+1;
122
123 if(m>(sizeof(buffer[0])/2)) /* more than half full */
124 {
125 m=0;
126
127 buffer_active=!buffer_active;
128
129 if(buffer_token)
130 {
131 m=(buffer_end-buffer_token)+1;
132
133 memcpy(buffer[buffer_active],buffer_token,m);
134
135 buffer_token=buffer[buffer_active];
136 }
137 }
138
139 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
140
141 buffer_ptr=buffer[buffer_active]+m;
142 buffer_end=buffer[buffer_active]+m+n-1;
143
144 if(n<=0)
145 return(1);
146 else
147 return(0);
148 }
149
150
151 /* Macros to simplify the parser (and make it look more like lex) */
152
153 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
154 #define NEXT(xx) next_state=(xx)
155
156 #define START_TOKEN buffer_token=buffer_ptr
157 #define END_TOKEN buffer_token=NULL
158
159 #define NEXT_CHAR \
160 do{ \
161 if(buffer_ptr==buffer_end) \
162 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
163 else \
164 buffer_ptr++; \
165 } while(0)
166
167
168 /* -------- equivalent flex definition --------
169
170 S [ \t\r]
171 N (\n)
172
173 U1 [\x09\x0A\x0D\x20-\x7F]
174 U2 [\xC2-\xDF][\x80-\xBF]
175 U3a \xE0[\xA0-\xBF][\x80-\xBF]
176 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
177 U3c \xED[\x80-\x9F][\x80-\xBF]
178 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
179 U3 {U3a}|{U3b}|{U3c}|{U3d}
180 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
181 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
182 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
183 U4 {U4a}|{U4b}|{U4c}
184
185 U ({U1}|{U2}|{U3}|{U4})
186
187 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
188
189 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
190 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
191
192 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
193 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
194
195 letter [a-zA-Z]
196 digit [0-9]
197 xdigit [a-fA-F0-9]
198
199 namechar ({letter}|{digit}|[-._:])
200 namestart ({letter}|[_:])
201 name ({namestart}{namechar}*)
202
203 entityref (&{name};)
204 charref (&#({digit}+|x{xdigit}+);)
205
206 -------- equivalent flex definition -------- */
207
208 /* Tables containing character class defintions (advance declaration for data at end of file). */
209 static const unsigned char quotedD[256],quotedS[256];
210 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
211 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
212
213
214 /*++++++++++++++++++++++++++++++++++++++
215 A function to call the callback function with the parameters needed.
216
217 int call_callback Returns 1 if the callback returned with an error.
218
219 const char *name The name of the tag.
220
221 int (*callback)() The callback function.
222
223 int type The type of tag (start and/or end).
224
225 int nattributes The number of attributes collected.
226
227 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
228 ++++++++++++++++++++++++++++++++++++++*/
229
230 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
231 {
232 switch(nattributes)
233 {
234 case 0: return (*callback)(name,type);
235 case 1: return (*callback)(name,type,attributes[0]);
236 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
237 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
238 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
239 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
240 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
241 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
242 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
243 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
244 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
245 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
246 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
247 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
248 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
249 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
250 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
251
252 default:
253 fprintf(stderr,"XML Parser: Error on line %"PRIu64": too many attributes for tag '%s' source code needs changing.\n",lineno,name);
254 exit(1);
255 }
256 }
257
258
259 /*++++++++++++++++++++++++++++++++++++++
260 Parse the XML and call the functions for each tag as seen.
261
262 int ParseXML Returns 0 if OK or something else in case of an error.
263
264 int fd The file descriptor of the file to parse.
265
266 xmltag **tags The array of pointers to tags for the top level.
267
268 int options A list of XML Parser options OR-ed together.
269 ++++++++++++++++++++++++++++++++++++++*/
270
271 int ParseXML(int fd,xmltag **tags,int options)
272 {
273 int i;
274 int state,next_state,after_attr;
275 unsigned char saved_buffer_ptr=0;
276 const unsigned char *quoted;
277
278 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
279 int attribute=0;
280
281 int stackdepth=0,stackused=0;
282 xmltag ***tags_stack=NULL;
283 xmltag **tag_stack=NULL;
284 xmltag *tag=NULL;
285
286 /* The actual parser. */
287
288 lineno=1;
289
290 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
291 buffer_token=NULL;
292
293 buffer_refill(fd);
294
295 BEGIN(LEX_STATE_INITIAL);
296
297 new_state:
298
299 switch(state)
300 {
301 /* ================ Parsing states ================ */
302
303
304 /* -------- equivalent flex definition --------
305
306 <INITIAL>"<!" { BEGIN(BANGTAG); }
307 <INITIAL>"</" { BEGIN(END_TAG1); }
308 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
309 <INITIAL>"<" { BEGIN(TAG_START); }
310
311 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
312
313 <INITIAL>{N} { lineno++; }
314 <INITIAL>{S}+ { }
315 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
316
317 -------- equivalent flex definition -------- */
318
319 case LEX_STATE_INITIAL:
320
321 while(1)
322 {
323 while(whitespace[(int)*buffer_ptr])
324 NEXT_CHAR;
325
326 if(*buffer_ptr=='\n')
327 {
328 NEXT_CHAR;
329
330 lineno++;
331 }
332 else if(*buffer_ptr=='<')
333 {
334 NEXT_CHAR;
335
336 if(*buffer_ptr=='/')
337 {
338 NEXT_CHAR;
339 BEGIN(LEX_STATE_END_TAG1);
340 }
341 else if(*buffer_ptr=='!')
342 {
343 NEXT_CHAR;
344 BEGIN(LEX_STATE_BANGTAG);
345 }
346 else if(*buffer_ptr=='?')
347 {
348 NEXT_CHAR;
349 BEGIN(LEX_STATE_XML_DECL_START);
350 }
351 else
352 BEGIN(LEX_STATE_TAG_START);
353 }
354 else if(*buffer_ptr=='>')
355 BEGIN(LEX_ERROR_CLOSE);
356 else
357 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
358 }
359
360 break;
361
362 /* -------- equivalent flex definition --------
363
364 <BANGTAG>"--" { BEGIN(COMMENT); }
365 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
366 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
367
368 -------- equivalent flex definition -------- */
369
370 case LEX_STATE_BANGTAG:
371
372 if(*buffer_ptr!='-')
373 BEGIN(LEX_ERROR_TAG_START);
374
375 NEXT_CHAR;
376
377 if(*buffer_ptr!='-')
378 BEGIN(LEX_ERROR_TAG_START);
379
380 NEXT_CHAR;
381 BEGIN(LEX_STATE_COMMENT);
382
383 break;
384
385 /* -------- equivalent flex definition --------
386
387 <COMMENT>"-->" { BEGIN(INITIAL); }
388 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
389 <COMMENT>"-" { }
390 <COMMENT>{N} { lineno++; }
391 <COMMENT>[^-\n]+ { }
392
393 -------- equivalent flex definition -------- */
394
395 case LEX_STATE_COMMENT:
396
397 while(1)
398 {
399 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
400 NEXT_CHAR;
401
402 if(*buffer_ptr=='-')
403 {
404 NEXT_CHAR;
405
406 if(*buffer_ptr!='-')
407 continue;
408
409 NEXT_CHAR;
410 if(*buffer_ptr=='>')
411 {
412 NEXT_CHAR;
413 BEGIN(LEX_STATE_INITIAL);
414 }
415
416 BEGIN(LEX_ERROR_COMMENT);
417 }
418 else /* if(*buffer_ptr=='\n') */
419 {
420 NEXT_CHAR;
421
422 lineno++;
423 }
424 }
425
426 break;
427
428 /* -------- equivalent flex definition --------
429
430 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
431 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
432 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
433
434 -------- equivalent flex definition -------- */
435
436 case LEX_STATE_XML_DECL_START:
437
438 START_TOKEN;
439
440 if(*buffer_ptr=='x')
441 {
442 NEXT_CHAR;
443 if(*buffer_ptr=='m')
444 {
445 NEXT_CHAR;
446 if(*buffer_ptr=='l')
447 {
448 NEXT_CHAR;
449
450 saved_buffer_ptr=*buffer_ptr;
451 *buffer_ptr=0;
452
453 NEXT(LEX_STATE_XML_DECL);
454 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
455 }
456 }
457 }
458
459 BEGIN(LEX_ERROR_XML_DECL_START);
460
461 /* -------- equivalent flex definition --------
462
463 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
464 <XML_DECL>{S}+ { }
465 <XML_DECL>{N} { lineno++; }
466 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
467 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
468
469 -------- equivalent flex definition -------- */
470
471 case LEX_STATE_XML_DECL:
472
473 while(1)
474 {
475 while(whitespace[(int)*buffer_ptr])
476 NEXT_CHAR;
477
478 if(namestart[(int)*buffer_ptr])
479 {
480 START_TOKEN;
481
482 NEXT_CHAR;
483 while(namechar[(int)*buffer_ptr])
484 NEXT_CHAR;
485
486 saved_buffer_ptr=*buffer_ptr;
487 *buffer_ptr=0;
488
489 after_attr=LEX_STATE_XML_DECL;
490 NEXT(LEX_STATE_ATTR_KEY);
491 BEGIN(LEX_FUNC_ATTR_KEY);
492 }
493 else if(*buffer_ptr=='?')
494 {
495 NEXT_CHAR;
496 if(*buffer_ptr=='>')
497 {
498 NEXT_CHAR;
499 NEXT(LEX_STATE_INITIAL);
500 BEGIN(LEX_FUNC_XML_DECL_FINISH);
501 }
502
503 BEGIN(LEX_ERROR_XML_DECL);
504 }
505 else if(*buffer_ptr=='\n')
506 {
507 NEXT_CHAR;
508 lineno++;
509 }
510 else
511 BEGIN(LEX_ERROR_XML_DECL);
512 }
513
514 break;
515
516 /* -------- equivalent flex definition --------
517
518 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
519 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
520 <TAG_START>. { return(LEX_ERROR_TAG_START); }
521
522 -------- equivalent flex definition -------- */
523
524 case LEX_STATE_TAG_START:
525
526 if(namestart[(int)*buffer_ptr])
527 {
528 START_TOKEN;
529
530 NEXT_CHAR;
531 while(namechar[(int)*buffer_ptr])
532 NEXT_CHAR;
533
534 saved_buffer_ptr=*buffer_ptr;
535 *buffer_ptr=0;
536
537 NEXT(LEX_STATE_TAG);
538 BEGIN(LEX_FUNC_TAG_BEGIN);
539 }
540
541 BEGIN(LEX_ERROR_TAG_START);
542
543 /* -------- equivalent flex definition --------
544
545 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
546 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
547 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
548
549 -------- equivalent flex definition -------- */
550
551 case LEX_STATE_END_TAG1:
552
553 if(namestart[(int)*buffer_ptr])
554 {
555 START_TOKEN;
556
557 NEXT_CHAR;
558 while(namechar[(int)*buffer_ptr])
559 NEXT_CHAR;
560
561 saved_buffer_ptr=*buffer_ptr;
562 *buffer_ptr=0;
563
564 NEXT(LEX_STATE_END_TAG2);
565 BEGIN(LEX_FUNC_TAG_POP);
566 }
567
568 BEGIN(LEX_ERROR_END_TAG);
569
570 /* -------- equivalent flex definition --------
571
572 <END_TAG2>">" { BEGIN(INITIAL); }
573 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
574 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
575
576 -------- equivalent flex definition -------- */
577
578 case LEX_STATE_END_TAG2:
579
580 if(*buffer_ptr=='>')
581 {
582 NEXT_CHAR;
583
584 BEGIN(LEX_STATE_INITIAL);
585 }
586
587 BEGIN(LEX_ERROR_END_TAG);
588
589 /* -------- equivalent flex definition --------
590
591 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
592 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
593 <TAG>{S}+ { }
594 <TAG>{N} { lineno++; }
595 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
596 <TAG>. { return(LEX_ERROR_TAG); }
597
598 -------- equivalent flex definition -------- */
599
600 case LEX_STATE_TAG:
601
602 while(1)
603 {
604 while(whitespace[(int)*buffer_ptr])
605 NEXT_CHAR;
606
607 if(namestart[(int)*buffer_ptr])
608 {
609 START_TOKEN;
610
611 NEXT_CHAR;
612 while(namechar[(int)*buffer_ptr])
613 NEXT_CHAR;
614
615 saved_buffer_ptr=*buffer_ptr;
616 *buffer_ptr=0;
617
618 after_attr=LEX_STATE_TAG;
619 NEXT(LEX_STATE_ATTR_KEY);
620 BEGIN(LEX_FUNC_ATTR_KEY);
621 }
622 else if(*buffer_ptr=='/')
623 {
624 NEXT_CHAR;
625 if(*buffer_ptr=='>')
626 {
627 NEXT_CHAR;
628 NEXT(LEX_STATE_INITIAL);
629 BEGIN(LEX_FUNC_TAG_FINISH);
630 }
631
632 BEGIN(LEX_ERROR_TAG);
633 }
634 else if(*buffer_ptr=='>')
635 {
636 NEXT_CHAR;
637 NEXT(LEX_STATE_INITIAL);
638 BEGIN(LEX_FUNC_TAG_PUSH);
639 }
640 else if(*buffer_ptr=='\n')
641 {
642 NEXT_CHAR;
643 lineno++;
644 }
645 else
646 BEGIN(LEX_ERROR_TAG);
647 }
648
649 break;
650
651 /* -------- equivalent flex definition --------
652
653 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
654 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
655 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
656
657 -------- equivalent flex definition -------- */
658
659 case LEX_STATE_ATTR_KEY:
660
661 if(*buffer_ptr=='=')
662 {
663 NEXT_CHAR;
664 BEGIN(LEX_STATE_ATTR_VAL);
665 }
666
667 BEGIN(LEX_ERROR_ATTR);
668
669 /* -------- equivalent flex definition --------
670
671 <ATTR_VAL>\" { BEGIN(DQUOTED); }
672 <ATTR_VAL>\' { BEGIN(SQUOTED); }
673 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
674 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
675
676 -------- equivalent flex definition -------- */
677
678 case LEX_STATE_ATTR_VAL:
679
680 if(*buffer_ptr=='"')
681 {
682 NEXT_CHAR;
683 BEGIN(LEX_STATE_DQUOTED);
684 }
685 else if(*buffer_ptr=='\'')
686 {
687 NEXT_CHAR;
688 BEGIN(LEX_STATE_SQUOTED);
689 }
690
691 BEGIN(LEX_ERROR_ATTR);
692
693 /* -------- equivalent flex definition --------
694
695 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
696 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
697 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
698 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
699 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
700 <DQUOTED>{UquotedD} { }
701 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
702 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
703
704 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
705 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
706 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
707 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
708 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
709 <SQUOTED>{UquotedS} { append_string(yytext); }
710 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
711 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
712
713 -------- equivalent flex definition -------- */
714
715 case LEX_STATE_DQUOTED:
716 case LEX_STATE_SQUOTED:
717
718 if(state==LEX_STATE_DQUOTED)
719 quoted=quotedD;
720 else
721 quoted=quotedS;
722
723 START_TOKEN;
724
725 while(1)
726 {
727 switch(quoted[(int)*buffer_ptr])
728 {
729 case 10: /* U1 - used by all tag keys and many values */
730 do
731 {
732 NEXT_CHAR;
733 }
734 while(quoted[(int)*buffer_ptr]==10);
735 break;
736
737 case 20: /* U2 */
738 NEXT_CHAR;
739 if(!U2[0][(int)*buffer_ptr])
740 BEGIN(LEX_ERROR_ATTR_VAL);
741 NEXT_CHAR;
742 break;
743
744 case 31: /* U3a */
745 NEXT_CHAR;
746 if(!U3a[0][(int)*buffer_ptr])
747 BEGIN(LEX_ERROR_ATTR_VAL);
748 NEXT_CHAR;
749 if(!U3a[1][(int)*buffer_ptr])
750 BEGIN(LEX_ERROR_ATTR_VAL);
751 NEXT_CHAR;
752 break;
753
754 case 32: /* U3b */
755 NEXT_CHAR;
756 if(!U3b[0][(int)*buffer_ptr])
757 BEGIN(LEX_ERROR_ATTR_VAL);
758 NEXT_CHAR;
759 if(!U3b[1][(int)*buffer_ptr])
760 BEGIN(LEX_ERROR_ATTR_VAL);
761 NEXT_CHAR;
762 break;
763
764 case 33: /* U3c */
765 NEXT_CHAR;
766 if(!U3c[0][(int)*buffer_ptr])
767 BEGIN(LEX_ERROR_ATTR_VAL);
768 NEXT_CHAR;
769 if(!U3c[1][(int)*buffer_ptr])
770 BEGIN(LEX_ERROR_ATTR_VAL);
771 NEXT_CHAR;
772 break;
773
774 case 34: /* U3d */
775 NEXT_CHAR;
776 if(!U3d[0][(int)*buffer_ptr])
777 BEGIN(LEX_ERROR_ATTR_VAL);
778 NEXT_CHAR;
779 if(!U3d[1][(int)*buffer_ptr])
780 BEGIN(LEX_ERROR_ATTR_VAL);
781 NEXT_CHAR;
782 break;
783
784 case 41: /* U4a */
785 NEXT_CHAR;
786 if(!U4a[0][(int)*buffer_ptr])
787 BEGIN(LEX_ERROR_ATTR_VAL);
788 NEXT_CHAR;
789 if(!U4a[1][(int)*buffer_ptr])
790 BEGIN(LEX_ERROR_ATTR_VAL);
791 NEXT_CHAR;
792 if(!U4a[2][(int)*buffer_ptr])
793 BEGIN(LEX_ERROR_ATTR_VAL);
794 NEXT_CHAR;
795 break;
796
797 case 42: /* U4b */
798 NEXT_CHAR;
799 if(!U4b[0][(int)*buffer_ptr])
800 BEGIN(LEX_ERROR_ATTR_VAL);
801 NEXT_CHAR;
802 if(!U4b[1][(int)*buffer_ptr])
803 BEGIN(LEX_ERROR_ATTR_VAL);
804 NEXT_CHAR;
805 if(!U4b[2][(int)*buffer_ptr])
806 BEGIN(LEX_ERROR_ATTR_VAL);
807 NEXT_CHAR;
808 break;
809
810 case 43: /* U4c */
811 NEXT_CHAR;
812 if(!U4c[0][(int)*buffer_ptr])
813 BEGIN(LEX_ERROR_ATTR_VAL);
814 NEXT_CHAR;
815 if(!U4c[1][(int)*buffer_ptr])
816 BEGIN(LEX_ERROR_ATTR_VAL);
817 NEXT_CHAR;
818 if(!U4c[2][(int)*buffer_ptr])
819 BEGIN(LEX_ERROR_ATTR_VAL);
820 NEXT_CHAR;
821 break;
822
823 case 50: /* entityref or charref */
824 NEXT_CHAR;
825
826 if(*buffer_ptr=='#') /* charref */
827 {
828 int charref_len=3;
829
830 NEXT_CHAR;
831 if(digit[(int)*buffer_ptr]) /* decimal */
832 {
833 NEXT_CHAR;
834 charref_len++;
835
836 while(digit[(int)*buffer_ptr])
837 {
838 NEXT_CHAR;
839 charref_len++;
840 }
841
842 if(*buffer_ptr!=';')
843 BEGIN(LEX_ERROR_ATTR_VAL);
844 }
845 else if(*buffer_ptr=='x') /* hex */
846 {
847 NEXT_CHAR;
848 charref_len++;
849
850 while(xdigit[(int)*buffer_ptr])
851 {
852 NEXT_CHAR;
853 charref_len++;
854 }
855
856 if(*buffer_ptr!=';')
857 BEGIN(LEX_ERROR_ATTR_VAL);
858 }
859 else /* other */
860 BEGIN(LEX_ERROR_ATTR_VAL);
861
862 NEXT_CHAR;
863
864 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
865 {
866 const char *str;
867
868 saved_buffer_ptr=*buffer_ptr;
869 *buffer_ptr=0;
870
871 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
872
873 if(!str)
874 {
875 buffer_ptr-=charref_len;
876 BEGIN(LEX_ERROR_CHAR_REF);
877 }
878
879 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
880 memcpy(buffer_ptr-strlen(str),str,strlen(str));
881
882 *buffer_ptr=saved_buffer_ptr;
883 }
884 }
885 else if(namestart[(int)*buffer_ptr]) /* entityref */
886 {
887 int entityref_len=3;
888
889 NEXT_CHAR;
890 while(namechar[(int)*buffer_ptr])
891 {
892 NEXT_CHAR;
893 entityref_len++;
894 }
895
896 if(*buffer_ptr!=';')
897 BEGIN(LEX_ERROR_ATTR_VAL);
898
899 NEXT_CHAR;
900
901 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
902 {
903 const char *str;
904
905 saved_buffer_ptr=*buffer_ptr;
906 *buffer_ptr=0;
907
908 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
909
910 if(!str)
911 {
912 buffer_ptr-=entityref_len;
913 BEGIN(LEX_ERROR_ENTITY_REF);
914 }
915
916 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
917 memcpy(buffer_ptr-strlen(str),str,strlen(str));
918
919 *buffer_ptr=saved_buffer_ptr;
920 }
921 }
922 else /* other */
923 BEGIN(LEX_ERROR_ATTR_VAL);
924
925 break;
926
927 case 99: /* quote */
928 *buffer_ptr=0;
929 NEXT_CHAR;
930
931 NEXT(after_attr);
932 BEGIN(LEX_FUNC_ATTR_VAL);
933
934 default: /* other */
935 BEGIN(LEX_ERROR_ATTR_VAL);
936 }
937 }
938
939 break;
940
941
942 /* ================ Functional states ================ */
943
944
945 /* The start of a tag for an XML declaration */
946
947 case LEX_FUNC_XML_DECL_BEGIN:
948
949 if(tag_stack)
950 BEGIN(LEX_ERROR_XML_NOT_FIRST);
951
952 /* The start of a tag for an element */
953
954 case LEX_FUNC_TAG_BEGIN:
955
956 tag=NULL;
957
958 for(i=0;tags[i];i++)
959 if(buffer_token[0]==tags[i]->name[0] || tolower(buffer_token[0])==tags[i]->name[0])
960 if(!strcasecmp((char*)buffer_token+1,tags[i]->name+1))
961 {
962 tag=tags[i];
963
964 for(i=0;i<tag->nattributes;i++)
965 attributes[i]=NULL;
966
967 break;
968 }
969
970 if(tag==NULL)
971 BEGIN(LEX_ERROR_UNEXP_TAG);
972
973 END_TOKEN;
974
975 *buffer_ptr=saved_buffer_ptr;
976 BEGIN(next_state);
977
978 /* The end of the start-tag for an element */
979
980 case LEX_FUNC_TAG_PUSH:
981
982 if(stackused==stackdepth)
983 {
984 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
985 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
986 }
987
988 tag_stack [stackused]=tag;
989 tags_stack[stackused]=tags;
990 stackused++;
991
992 if(tag->callback)
993 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
994 BEGIN(LEX_ERROR_CALLBACK);
995
996 tags=tag->subtags;
997
998 BEGIN(next_state);
999
1000 /* The end of the empty-element-tag for an XML declaration */
1001
1002 case LEX_FUNC_XML_DECL_FINISH:
1003
1004 /* The end of the empty-element-tag for an element */
1005
1006 case LEX_FUNC_TAG_FINISH:
1007
1008 if(tag->callback)
1009 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
1010 BEGIN(LEX_ERROR_CALLBACK);
1011
1012 if(stackused>0)
1013 tag=tag_stack[stackused-1];
1014 else
1015 tag=NULL;
1016
1017 BEGIN(next_state);
1018
1019 /* The end of the end-tag for an element */
1020
1021 case LEX_FUNC_TAG_POP:
1022
1023 stackused--;
1024 tags=tags_stack[stackused];
1025 tag =tag_stack [stackused];
1026
1027 if(strcmp((char*)buffer_token,tag->name))
1028 BEGIN(LEX_ERROR_UNBALANCED);
1029
1030 if(stackused<0)
1031 BEGIN(LEX_ERROR_NO_START);
1032
1033 for(i=0;i<tag->nattributes;i++)
1034 attributes[i]=NULL;
1035
1036 if(tag->callback)
1037 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1038 BEGIN(LEX_ERROR_CALLBACK);
1039
1040 if(stackused>0)
1041 tag=tag_stack[stackused-1];
1042 else
1043 tag=NULL;
1044
1045 END_TOKEN;
1046
1047 *buffer_ptr=saved_buffer_ptr;
1048 BEGIN(next_state);
1049
1050 /* An attribute key */
1051
1052 case LEX_FUNC_ATTR_KEY:
1053
1054 attribute=-1;
1055
1056 for(i=0;i<tag->nattributes;i++)
1057 if(buffer_token[0]==tag->attributes[i][0] || tolower(buffer_token[0])==tag->attributes[i][0])
1058 if(!strcasecmp((char*)buffer_token+1,tag->attributes[i]+1))
1059 {
1060 attribute=i;
1061
1062 break;
1063 }
1064
1065 if(attribute==-1)
1066 {
1067 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1068 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1069 BEGIN(LEX_ERROR_UNEXP_ATT);
1070 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1071 fprintf(stderr,"XML Parser: Warning on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1072 }
1073
1074 END_TOKEN;
1075
1076 *buffer_ptr=saved_buffer_ptr;
1077 BEGIN(next_state);
1078
1079 /* An attribute value */
1080
1081 case LEX_FUNC_ATTR_VAL:
1082
1083 if(tag->callback && attribute!=-1)
1084 attributes[attribute]=buffer_token;
1085
1086 END_TOKEN;
1087
1088 BEGIN(next_state);
1089
1090 /* End of file */
1091
1092 case LEX_EOF:
1093
1094 if(tag)
1095 BEGIN(LEX_ERROR_UNEXP_EOF);
1096
1097 break;
1098
1099
1100 /* ================ Error states ================ */
1101
1102
1103 case LEX_ERROR_TAG_START:
1104 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '<' seen not at start of tag.\n",lineno);
1105 break;
1106
1107 case LEX_ERROR_XML_DECL_START:
1108 fprintf(stderr,"XML Parser: Error on line %"PRIu64": characters '<?' seen not at start of XML declaration.\n",lineno);
1109 break;
1110
1111 case LEX_ERROR_TAG:
1112 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1113 break;
1114
1115 case LEX_ERROR_XML_DECL:
1116 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1117 break;
1118
1119 case LEX_ERROR_ATTR:
1120 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid attribute definition seen in tag.\n",lineno);
1121 break;
1122
1123 case LEX_ERROR_END_TAG:
1124 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen in end-tag.\n",lineno);
1125 break;
1126
1127 case LEX_ERROR_COMMENT:
1128 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid comment seen.\n",lineno);
1129 break;
1130
1131 case LEX_ERROR_CLOSE:
1132 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '>' seen not at end of tag.\n",lineno);
1133 break;
1134
1135 case LEX_ERROR_ATTR_VAL:
1136 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1137 break;
1138
1139 case LEX_ERROR_ENTITY_REF:
1140 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1141 break;
1142
1143 case LEX_ERROR_CHAR_REF:
1144 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1145 break;
1146
1147 case LEX_ERROR_TEXT_OUTSIDE:
1148 fprintf(stderr,"XML Parser: Error on line %"PRIu64": non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1149 break;
1150
1151 case LEX_ERROR_UNEXP_TAG:
1152 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected tag '%s'.\n",lineno,buffer_token);
1153 break;
1154
1155 case LEX_ERROR_UNBALANCED:
1156 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1157 break;
1158
1159 case LEX_ERROR_NO_START:
1160 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1161 break;
1162
1163 case LEX_ERROR_UNEXP_ATT:
1164 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1165 break;
1166
1167 case LEX_ERROR_UNEXP_EOF:
1168 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1169 break;
1170
1171 case LEX_ERROR_XML_NOT_FIRST:
1172 fprintf(stderr,"XML Parser: Error on line %"PRIu64": XML declaration '<?xml...>' not before all other tags.\n",lineno);
1173 break;
1174 }
1175
1176 /* Delete the tagdata */
1177
1178 if(stackdepth)
1179 {
1180 free(tag_stack);
1181 free(tags_stack);
1182 }
1183
1184 return(state);
1185 }
1186
1187
1188 /*++++++++++++++++++++++++++++++++++++++
1189 Return the current parser line number.
1190
1191 uint64_t ParseXML_LineNumber Returns the line number.
1192 ++++++++++++++++++++++++++++++++++++++*/
1193
1194 uint64_t ParseXML_LineNumber(void)
1195 {
1196 return(lineno);
1197 }
1198
1199
1200 /*++++++++++++++++++++++++++++++++++++++
1201 Convert an XML entity reference into an ASCII string.
1202
1203 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1204
1205 const char *string The entity reference string.
1206 ++++++++++++++++++++++++++++++++++++++*/
1207
1208 char *ParseXML_Decode_Entity_Ref(const char *string)
1209 {
1210 if(!strcmp(string,"&amp;")) return("&");
1211 if(!strcmp(string,"&lt;")) return("<");
1212 if(!strcmp(string,"&gt;")) return(">");
1213 if(!strcmp(string,"&apos;")) return("'");
1214 if(!strcmp(string,"&quot;")) return("\"");
1215 return(NULL);
1216 }
1217
1218
1219 /*++++++++++++++++++++++++++++++++++++++
1220 Convert an XML character reference into an ASCII string.
1221
1222 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1223
1224 const char *string The character reference string.
1225 ++++++++++++++++++++++++++++++++++++++*/
1226
1227 char *ParseXML_Decode_Char_Ref(const char *string)
1228 {
1229 static char result[5]="";
1230 long int unicode;
1231
1232 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1233 else unicode=strtol(string+2,NULL,10);
1234
1235 if(unicode<0x80)
1236 {
1237 /* 0000 0000-0000 007F => 0xxxxxxx */
1238 result[0]=(char)unicode;
1239 result[1]=0;
1240 }
1241 else if(unicode<0x07FF)
1242 {
1243 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1244 result[0]=(char)(0xC0+((unicode&0x07C0)>>6));
1245 result[1]=(char)(0x80+ (unicode&0x003F));
1246 result[2]=0;
1247 }
1248 else if(unicode<0xFFFF)
1249 {
1250 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1251 result[0]=(char)(0xE0+((unicode&0xF000)>>12));
1252 result[1]=(char)(0x80+((unicode&0x0FC0)>>6));
1253 result[2]=(char)(0x80+ (unicode&0x003F));
1254 result[3]=0;
1255 }
1256 else if(unicode<0x1FFFFF)
1257 {
1258 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1259 result[0]=(char)(0xF0+((unicode&0x1C0000)>>18));
1260 result[1]=(char)(0x80+((unicode&0x03F000)>>12));
1261 result[2]=(char)(0x80+((unicode&0x000FC0)>>6));
1262 result[3]=(char)(0x80+ (unicode&0x00003F));
1263 result[4]=0;
1264 }
1265 else
1266 {
1267 result[0]=(char)0xFF;
1268 result[1]=(char)0xFD;
1269 result[2]=0;
1270 }
1271
1272 return(result);
1273 }
1274
1275
1276 /*++++++++++++++++++++++++++++++++++++++
1277 Convert a string into something that is safe to output in an XML file.
1278
1279 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1280
1281 const char *string The string to convert.
1282 ++++++++++++++++++++++++++++++++++++++*/
1283
1284 char *ParseXML_Encode_Safe_XML(const char *string)
1285 {
1286 static const char hexstring[17]="0123456789ABCDEF";
1287 int i=0,j=0,len;
1288 char *result;
1289
1290 for(i=0;string[i];i++)
1291 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1292 break;
1293
1294 if(!string[i])
1295 return((char*)string);
1296
1297 len=i+256-6;
1298
1299 result=(char*)malloc(len+7);
1300 strncpy(result,string,j=i);
1301
1302 do
1303 {
1304 for(;j<len && string[i];i++)
1305 if(string[i]=='\'')
1306 {
1307 /* XML, HTML5 and XHTML1 allow &apos; but HTML4 doesn't. */
1308 result[j++]='&';
1309 result[j++]='#';
1310 result[j++]='3';
1311 result[j++]='9';
1312 result[j++]=';';
1313 }
1314 else if(string[i]=='&')
1315 {
1316 result[j++]='&';
1317 result[j++]='a';
1318 result[j++]='m';
1319 result[j++]='p';
1320 result[j++]=';';
1321 }
1322 else if(string[i]=='"')
1323 {
1324 result[j++]='&';
1325 result[j++]='q';
1326 result[j++]='u';
1327 result[j++]='o';
1328 result[j++]='t';
1329 result[j++]=';';
1330 }
1331 else if(string[i]=='<')
1332 {
1333 result[j++]='&';
1334 result[j++]='l';
1335 result[j++]='t';
1336 result[j++]=';';
1337 }
1338 else if(string[i]=='>')
1339 {
1340 result[j++]='&';
1341 result[j++]='g';
1342 result[j++]='t';
1343 result[j++]=';';
1344 }
1345 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1346 result[j++]=string[i];
1347 else
1348 {
1349 unsigned int unicode;
1350
1351 /* Decode the UTF-8 */
1352
1353 if((string[i]&0x80)==0)
1354 {
1355 /* 0000 0000-0000 007F => 0xxxxxxx */
1356 unicode=string[i];
1357 }
1358 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1359 {
1360 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1361 unicode =(string[i++]&0x1F)<<6;
1362 unicode|= string[i ]&0x3F;
1363 }
1364 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1365 {
1366 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1367 unicode =(string[i++]&0x0F)<<12;
1368 unicode|=(string[i++]&0x3F)<<6;
1369 unicode|= string[i ]&0x3F;
1370 }
1371 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1372 {
1373 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1374 unicode =(string[i++]&0x07)<<18;
1375 unicode|=(string[i++]&0x3F)<<12;
1376 unicode|=(string[i++]&0x3F)<<6;
1377 unicode|= string[i ]&0x3F;
1378 }
1379 else
1380 unicode=0xFFFD;
1381
1382 /* Output the character entity */
1383
1384 result[j++]='&';
1385 result[j++]='#';
1386 result[j++]='x';
1387
1388 if(unicode&0x00FF0000)
1389 {
1390 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1391 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1392 }
1393 if(unicode&0x00FFFF00)
1394 {
1395 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1396 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1397 }
1398 result[j++]=hexstring[(unicode&0xf0)>>4];
1399 result[j++]=hexstring[(unicode&0x0f) ];
1400
1401 result[j++]=';';
1402 }
1403
1404 if(string[i]) /* Not finished */
1405 {
1406 len+=256;
1407 result=(char*)realloc((void*)result,len+7);
1408 }
1409 }
1410 while(string[i]);
1411
1412 result[j]=0;
1413
1414 return(result);
1415 }
1416
1417
1418 /*++++++++++++++++++++++++++++++++++++++
1419 Check that a string really is an integer.
1420
1421 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1422
1423 const char *string The string to be parsed.
1424 ++++++++++++++++++++++++++++++++++++++*/
1425
1426 int ParseXML_IsInteger(const char *string)
1427 {
1428 const unsigned char *p=(unsigned char*)string;
1429
1430 if(*p=='-' || *p=='+')
1431 p++;
1432
1433 while(digit[(int)*p])
1434 p++;
1435
1436 if(*p)
1437 return(0);
1438 else
1439 return(1);
1440 }
1441
1442
1443 /*++++++++++++++++++++++++++++++++++++++
1444 Check that a string really is a floating point number.
1445
1446 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1447
1448 const char *string The string to be parsed.
1449 ++++++++++++++++++++++++++++++++++++++*/
1450
1451 int ParseXML_IsFloating(const char *string)
1452 {
1453 const unsigned char *p=(unsigned char*)string;
1454
1455 if(*p=='-' || *p=='+')
1456 p++;
1457
1458 while(digit[(int)*p] || *p=='.')
1459 p++;
1460
1461 if(*p=='e' || *p=='E')
1462 {
1463 p++;
1464
1465 if(*p=='-' || *p=='+')
1466 p++;
1467
1468 while(digit[*p])
1469 p++;
1470 }
1471
1472 if(*p)
1473 return(0);
1474 else
1475 return(1);
1476 }
1477
1478
1479 /* Table for checking for double-quoted characters. */
1480 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1482 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1483 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1484 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1485 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1486 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1487 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1492 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1493 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1494 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1495 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1496
1497 /* Table for checking for single-quoted characters. */
1498 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1500 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1501 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1502 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1503 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1504 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1505 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1510 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1511 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1512 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1513 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1514
1515 /* Table for checking for characters between 0x80 and 0x8f. */
1516 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1524 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1532
1533 /* Table for checking for characters between 0x80 and 0x9f. */
1534 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1550
1551 /* Table for checking for characters between 0x80 and 0xbf. */
1552 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1561 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1568
1569 /* Table for checking for characters between 0x90 and 0xbf. */
1570 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1586
1587 /* Table for checking for characters between 0xa0 and 0xbf. */
1588 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1598 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1599 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1604
1605 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1606 static const unsigned char *U2[1]={ U_80_BF };
1607
1608 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1609 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1610
1611 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1612 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1613
1614 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1615 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1616
1617 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1618 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1619
1620 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1621 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1622
1623 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1624 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1625
1626 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1627 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1628
1629 /* Table for checking for namestart characters. */
1630 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1634 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1635 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1636 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1637 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1646
1647 /* Table for checking for namechar characters. */
1648 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1652 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1653 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1654 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1655 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1664
1665 /* Table for checking for whitespace characters. */
1666 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1668 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1682
1683 /* Table for checking for digit characters. */
1684 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1687 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1700
1701 /* Table for checking for xdigit characters. */
1702 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1705 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1706 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1708 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */