Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /branches/MS-Windows/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1686 - (show annotations) (download) (as text)
Tue May 26 19:03:41 2015 UTC (9 years, 9 months ago) by amb
File MIME type: text/x-csrc
File size: 68783 byte(s)
When compiling with MINGW there is no strcasecmp() function so
_stricmp() must be used (the same as with MSVC).

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2015 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26
27 #if defined(_MSC_VER)
28 #include <io.h>
29 #include <stdint.h>
30 #define read(fd,address,length) _read(fd,address,(unsigned int)(length))
31 typedef uint64_t ssize_t;
32 #else
33 #include <unistd.h>
34 #endif
35
36 #include <stdlib.h>
37 #include <inttypes.h>
38 #include <stdint.h>
39 #include <string.h>
40
41 #if defined(_MSC_VER) || defined(__MINGW32__)
42 #define strcasecmp _stricmp
43 #else
44 #include <strings.h>
45 #endif
46
47 #include <ctype.h>
48
49 #include "xmlparse.h"
50
51
52 /* Parser states */
53
54 #define LEX_EOF 0
55
56 #define LEX_FUNC_TAG_BEGIN 1
57 #define LEX_FUNC_XML_DECL_BEGIN 2
58 #define LEX_FUNC_TAG_POP 3
59 #define LEX_FUNC_TAG_PUSH 4
60 #define LEX_FUNC_XML_DECL_FINISH 5
61 #define LEX_FUNC_TAG_FINISH 6
62 #define LEX_FUNC_ATTR_KEY 7
63 #define LEX_FUNC_ATTR_VAL 8
64
65 #define LEX_STATE_INITIAL 10
66 #define LEX_STATE_BANGTAG 11
67 #define LEX_STATE_COMMENT 12
68 #define LEX_STATE_XML_DECL_START 13
69 #define LEX_STATE_XML_DECL 14
70 #define LEX_STATE_TAG_START 15
71 #define LEX_STATE_TAG 16
72 #define LEX_STATE_ATTR_KEY 17
73 #define LEX_STATE_ATTR_VAL 18
74 #define LEX_STATE_END_TAG1 19
75 #define LEX_STATE_END_TAG2 20
76 #define LEX_STATE_DQUOTED 21
77 #define LEX_STATE_SQUOTED 22
78
79 #define LEX_ERROR_TAG_START 101
80 #define LEX_ERROR_XML_DECL_START 102
81 #define LEX_ERROR_TAG 103
82 #define LEX_ERROR_XML_DECL 104
83 #define LEX_ERROR_ATTR 105
84 #define LEX_ERROR_END_TAG 106
85 #define LEX_ERROR_COMMENT 107
86 #define LEX_ERROR_CLOSE 108
87 #define LEX_ERROR_ATTR_VAL 109
88 #define LEX_ERROR_ENTITY_REF 110
89 #define LEX_ERROR_CHAR_REF 111
90 #define LEX_ERROR_TEXT_OUTSIDE 112
91
92 #define LEX_ERROR_UNEXP_TAG 201
93 #define LEX_ERROR_UNBALANCED 202
94 #define LEX_ERROR_NO_START 203
95 #define LEX_ERROR_UNEXP_ATT 204
96 #define LEX_ERROR_UNEXP_EOF 205
97 #define LEX_ERROR_XML_NOT_FIRST 206
98
99 #define LEX_ERROR_OUT_OF_MEMORY 254
100 #define LEX_ERROR_CALLBACK 255
101
102
103 /* Parsing variables and functions */
104
105 static uint64_t lineno;
106
107 static unsigned char buffer[2][16384];
108 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
109 static int buffer_active=0;
110
111
112 /*++++++++++++++++++++++++++++++++++++++
113 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
114
115 int buffer_refill Return 0 if everything is OK or 1 for EOF.
116
117 int fd The file descriptor to read from.
118 ++++++++++++++++++++++++++++++++++++++*/
119
120 static inline int buffer_refill(int fd)
121 {
122 ssize_t n;
123 size_t m=0;
124
125 m=(buffer_end-buffer[buffer_active])+1;
126
127 if(m>(sizeof(buffer[0])/2)) /* more than half full */
128 {
129 m=0;
130
131 buffer_active=!buffer_active;
132
133 if(buffer_token)
134 {
135 m=(buffer_end-buffer_token)+1;
136
137 memcpy(buffer[buffer_active],buffer_token,m);
138
139 buffer_token=buffer[buffer_active];
140 }
141 }
142
143 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
144
145 buffer_ptr=buffer[buffer_active]+m;
146 buffer_end=buffer[buffer_active]+m+n-1;
147
148 if(n<=0)
149 return(1);
150 else
151 return(0);
152 }
153
154
155 /* Macros to simplify the parser (and make it look more like lex) */
156
157 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
158 #define NEXT(xx) next_state=(xx)
159
160 #define START_TOKEN buffer_token=buffer_ptr
161 #define END_TOKEN buffer_token=NULL
162
163 #define NEXT_CHAR \
164 do{ \
165 if(buffer_ptr==buffer_end) \
166 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
167 else \
168 buffer_ptr++; \
169 } while(0)
170
171
172 /* -------- equivalent flex definition --------
173
174 S [ \t\r]
175 N (\n)
176
177 U1 [\x09\x0A\x0D\x20-\x7F]
178 U2 [\xC2-\xDF][\x80-\xBF]
179 U3a \xE0[\xA0-\xBF][\x80-\xBF]
180 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
181 U3c \xED[\x80-\x9F][\x80-\xBF]
182 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
183 U3 {U3a}|{U3b}|{U3c}|{U3d}
184 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
185 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
186 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
187 U4 {U4a}|{U4b}|{U4c}
188
189 U ({U1}|{U2}|{U3}|{U4})
190
191 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
192
193 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
194 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
195
196 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
197 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
198
199 letter [a-zA-Z]
200 digit [0-9]
201 xdigit [a-fA-F0-9]
202
203 namechar ({letter}|{digit}|[-._:])
204 namestart ({letter}|[_:])
205 name ({namestart}{namechar}*)
206
207 entityref (&{name};)
208 charref (&#({digit}+|x{xdigit}+);)
209
210 -------- equivalent flex definition -------- */
211
212 /* Tables containing character class defintions (advance declaration for data at end of file). */
213 static const unsigned char quotedD[256],quotedS[256];
214 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
215 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
216
217
218 /*++++++++++++++++++++++++++++++++++++++
219 A function to call the callback function with the parameters needed.
220
221 int call_callback Returns 1 if the callback returned with an error.
222
223 const char *name The name of the tag.
224
225 int (*callback)() The callback function.
226
227 int type The type of tag (start and/or end).
228
229 int nattributes The number of attributes collected.
230
231 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
232 ++++++++++++++++++++++++++++++++++++++*/
233
234 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
235 {
236 switch(nattributes)
237 {
238 case 0: return (*callback)(name,type);
239 case 1: return (*callback)(name,type,attributes[0]);
240 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
241 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
242 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
243 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
244 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
245 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
246 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
247 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
248 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
249 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
250 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
251 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
252 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
253 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
254 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
255
256 default:
257 fprintf(stderr,"XML Parser: Error on line %"PRIu64": too many attributes for tag '%s' source code needs changing.\n",lineno,name);
258 exit(1);
259 }
260 }
261
262
263 /*++++++++++++++++++++++++++++++++++++++
264 Parse the XML and call the functions for each tag as seen.
265
266 int ParseXML Returns 0 if OK or something else in case of an error.
267
268 int fd The file descriptor of the file to parse.
269
270 xmltag **tags The array of pointers to tags for the top level.
271
272 int options A list of XML Parser options OR-ed together.
273 ++++++++++++++++++++++++++++++++++++++*/
274
275 int ParseXML(int fd,xmltag **tags,int options)
276 {
277 int i;
278 int state,next_state,after_attr;
279 unsigned char saved_buffer_ptr=0;
280 const unsigned char *quoted;
281
282 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
283 int attribute=0;
284
285 int stackdepth=0,stackused=0;
286 xmltag ***tags_stack=NULL;
287 xmltag **tag_stack=NULL;
288 xmltag *tag=NULL;
289
290 /* The actual parser. */
291
292 lineno=1;
293
294 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
295 buffer_token=NULL;
296
297 buffer_refill(fd);
298
299 BEGIN(LEX_STATE_INITIAL);
300
301 new_state:
302
303 switch(state)
304 {
305 /* ================ Parsing states ================ */
306
307
308 /* -------- equivalent flex definition --------
309
310 <INITIAL>"<!" { BEGIN(BANGTAG); }
311 <INITIAL>"</" { BEGIN(END_TAG1); }
312 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
313 <INITIAL>"<" { BEGIN(TAG_START); }
314
315 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
316
317 <INITIAL>{N} { lineno++; }
318 <INITIAL>{S}+ { }
319 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
320
321 -------- equivalent flex definition -------- */
322
323 case LEX_STATE_INITIAL:
324
325 while(1)
326 {
327 while(whitespace[(int)*buffer_ptr])
328 NEXT_CHAR;
329
330 if(*buffer_ptr=='\n')
331 {
332 NEXT_CHAR;
333
334 lineno++;
335 }
336 else if(*buffer_ptr=='<')
337 {
338 NEXT_CHAR;
339
340 if(*buffer_ptr=='/')
341 {
342 NEXT_CHAR;
343 BEGIN(LEX_STATE_END_TAG1);
344 }
345 else if(*buffer_ptr=='!')
346 {
347 NEXT_CHAR;
348 BEGIN(LEX_STATE_BANGTAG);
349 }
350 else if(*buffer_ptr=='?')
351 {
352 NEXT_CHAR;
353 BEGIN(LEX_STATE_XML_DECL_START);
354 }
355 else
356 BEGIN(LEX_STATE_TAG_START);
357 }
358 else if(*buffer_ptr=='>')
359 BEGIN(LEX_ERROR_CLOSE);
360 else
361 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
362 }
363
364 break;
365
366 /* -------- equivalent flex definition --------
367
368 <BANGTAG>"--" { BEGIN(COMMENT); }
369 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
370 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
371
372 -------- equivalent flex definition -------- */
373
374 case LEX_STATE_BANGTAG:
375
376 if(*buffer_ptr!='-')
377 BEGIN(LEX_ERROR_TAG_START);
378
379 NEXT_CHAR;
380
381 if(*buffer_ptr!='-')
382 BEGIN(LEX_ERROR_TAG_START);
383
384 NEXT_CHAR;
385 BEGIN(LEX_STATE_COMMENT);
386
387 break;
388
389 /* -------- equivalent flex definition --------
390
391 <COMMENT>"-->" { BEGIN(INITIAL); }
392 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
393 <COMMENT>"-" { }
394 <COMMENT>{N} { lineno++; }
395 <COMMENT>[^-\n]+ { }
396
397 -------- equivalent flex definition -------- */
398
399 case LEX_STATE_COMMENT:
400
401 while(1)
402 {
403 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
404 NEXT_CHAR;
405
406 if(*buffer_ptr=='-')
407 {
408 NEXT_CHAR;
409
410 if(*buffer_ptr!='-')
411 continue;
412
413 NEXT_CHAR;
414 if(*buffer_ptr=='>')
415 {
416 NEXT_CHAR;
417 BEGIN(LEX_STATE_INITIAL);
418 }
419
420 BEGIN(LEX_ERROR_COMMENT);
421 }
422 else /* if(*buffer_ptr=='\n') */
423 {
424 NEXT_CHAR;
425
426 lineno++;
427 }
428 }
429
430 break;
431
432 /* -------- equivalent flex definition --------
433
434 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
435 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
436 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
437
438 -------- equivalent flex definition -------- */
439
440 case LEX_STATE_XML_DECL_START:
441
442 START_TOKEN;
443
444 if(*buffer_ptr=='x')
445 {
446 NEXT_CHAR;
447 if(*buffer_ptr=='m')
448 {
449 NEXT_CHAR;
450 if(*buffer_ptr=='l')
451 {
452 NEXT_CHAR;
453
454 saved_buffer_ptr=*buffer_ptr;
455 *buffer_ptr=0;
456
457 NEXT(LEX_STATE_XML_DECL);
458 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
459 }
460 }
461 }
462
463 BEGIN(LEX_ERROR_XML_DECL_START);
464
465 /* -------- equivalent flex definition --------
466
467 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
468 <XML_DECL>{S}+ { }
469 <XML_DECL>{N} { lineno++; }
470 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
471 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
472
473 -------- equivalent flex definition -------- */
474
475 case LEX_STATE_XML_DECL:
476
477 while(1)
478 {
479 while(whitespace[(int)*buffer_ptr])
480 NEXT_CHAR;
481
482 if(namestart[(int)*buffer_ptr])
483 {
484 START_TOKEN;
485
486 NEXT_CHAR;
487 while(namechar[(int)*buffer_ptr])
488 NEXT_CHAR;
489
490 saved_buffer_ptr=*buffer_ptr;
491 *buffer_ptr=0;
492
493 after_attr=LEX_STATE_XML_DECL;
494 NEXT(LEX_STATE_ATTR_KEY);
495 BEGIN(LEX_FUNC_ATTR_KEY);
496 }
497 else if(*buffer_ptr=='?')
498 {
499 NEXT_CHAR;
500 if(*buffer_ptr=='>')
501 {
502 NEXT_CHAR;
503 NEXT(LEX_STATE_INITIAL);
504 BEGIN(LEX_FUNC_XML_DECL_FINISH);
505 }
506
507 BEGIN(LEX_ERROR_XML_DECL);
508 }
509 else if(*buffer_ptr=='\n')
510 {
511 NEXT_CHAR;
512 lineno++;
513 }
514 else
515 BEGIN(LEX_ERROR_XML_DECL);
516 }
517
518 break;
519
520 /* -------- equivalent flex definition --------
521
522 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
523 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
524 <TAG_START>. { return(LEX_ERROR_TAG_START); }
525
526 -------- equivalent flex definition -------- */
527
528 case LEX_STATE_TAG_START:
529
530 if(namestart[(int)*buffer_ptr])
531 {
532 START_TOKEN;
533
534 NEXT_CHAR;
535 while(namechar[(int)*buffer_ptr])
536 NEXT_CHAR;
537
538 saved_buffer_ptr=*buffer_ptr;
539 *buffer_ptr=0;
540
541 NEXT(LEX_STATE_TAG);
542 BEGIN(LEX_FUNC_TAG_BEGIN);
543 }
544
545 BEGIN(LEX_ERROR_TAG_START);
546
547 /* -------- equivalent flex definition --------
548
549 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
550 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
551 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
552
553 -------- equivalent flex definition -------- */
554
555 case LEX_STATE_END_TAG1:
556
557 if(namestart[(int)*buffer_ptr])
558 {
559 START_TOKEN;
560
561 NEXT_CHAR;
562 while(namechar[(int)*buffer_ptr])
563 NEXT_CHAR;
564
565 saved_buffer_ptr=*buffer_ptr;
566 *buffer_ptr=0;
567
568 NEXT(LEX_STATE_END_TAG2);
569 BEGIN(LEX_FUNC_TAG_POP);
570 }
571
572 BEGIN(LEX_ERROR_END_TAG);
573
574 /* -------- equivalent flex definition --------
575
576 <END_TAG2>">" { BEGIN(INITIAL); }
577 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
578 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
579
580 -------- equivalent flex definition -------- */
581
582 case LEX_STATE_END_TAG2:
583
584 if(*buffer_ptr=='>')
585 {
586 NEXT_CHAR;
587
588 BEGIN(LEX_STATE_INITIAL);
589 }
590
591 BEGIN(LEX_ERROR_END_TAG);
592
593 /* -------- equivalent flex definition --------
594
595 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
596 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
597 <TAG>{S}+ { }
598 <TAG>{N} { lineno++; }
599 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
600 <TAG>. { return(LEX_ERROR_TAG); }
601
602 -------- equivalent flex definition -------- */
603
604 case LEX_STATE_TAG:
605
606 while(1)
607 {
608 while(whitespace[(int)*buffer_ptr])
609 NEXT_CHAR;
610
611 if(namestart[(int)*buffer_ptr])
612 {
613 START_TOKEN;
614
615 NEXT_CHAR;
616 while(namechar[(int)*buffer_ptr])
617 NEXT_CHAR;
618
619 saved_buffer_ptr=*buffer_ptr;
620 *buffer_ptr=0;
621
622 after_attr=LEX_STATE_TAG;
623 NEXT(LEX_STATE_ATTR_KEY);
624 BEGIN(LEX_FUNC_ATTR_KEY);
625 }
626 else if(*buffer_ptr=='/')
627 {
628 NEXT_CHAR;
629 if(*buffer_ptr=='>')
630 {
631 NEXT_CHAR;
632 NEXT(LEX_STATE_INITIAL);
633 BEGIN(LEX_FUNC_TAG_FINISH);
634 }
635
636 BEGIN(LEX_ERROR_TAG);
637 }
638 else if(*buffer_ptr=='>')
639 {
640 NEXT_CHAR;
641 NEXT(LEX_STATE_INITIAL);
642 BEGIN(LEX_FUNC_TAG_PUSH);
643 }
644 else if(*buffer_ptr=='\n')
645 {
646 NEXT_CHAR;
647 lineno++;
648 }
649 else
650 BEGIN(LEX_ERROR_TAG);
651 }
652
653 break;
654
655 /* -------- equivalent flex definition --------
656
657 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
658 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
659 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
660
661 -------- equivalent flex definition -------- */
662
663 case LEX_STATE_ATTR_KEY:
664
665 if(*buffer_ptr=='=')
666 {
667 NEXT_CHAR;
668 BEGIN(LEX_STATE_ATTR_VAL);
669 }
670
671 BEGIN(LEX_ERROR_ATTR);
672
673 /* -------- equivalent flex definition --------
674
675 <ATTR_VAL>\" { BEGIN(DQUOTED); }
676 <ATTR_VAL>\' { BEGIN(SQUOTED); }
677 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
678 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
679
680 -------- equivalent flex definition -------- */
681
682 case LEX_STATE_ATTR_VAL:
683
684 if(*buffer_ptr=='"')
685 {
686 NEXT_CHAR;
687 BEGIN(LEX_STATE_DQUOTED);
688 }
689 else if(*buffer_ptr=='\'')
690 {
691 NEXT_CHAR;
692 BEGIN(LEX_STATE_SQUOTED);
693 }
694
695 BEGIN(LEX_ERROR_ATTR);
696
697 /* -------- equivalent flex definition --------
698
699 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
700 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
701 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
702 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
703 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
704 <DQUOTED>{UquotedD} { }
705 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
706 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
707
708 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
709 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
710 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
711 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
712 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
713 <SQUOTED>{UquotedS} { append_string(yytext); }
714 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
715 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
716
717 -------- equivalent flex definition -------- */
718
719 case LEX_STATE_DQUOTED:
720 case LEX_STATE_SQUOTED:
721
722 if(state==LEX_STATE_DQUOTED)
723 quoted=quotedD;
724 else
725 quoted=quotedS;
726
727 START_TOKEN;
728
729 while(1)
730 {
731 switch(quoted[(int)*buffer_ptr])
732 {
733 case 10: /* U1 - used by all tag keys and many values */
734 do
735 {
736 NEXT_CHAR;
737 }
738 while(quoted[(int)*buffer_ptr]==10);
739 break;
740
741 case 20: /* U2 */
742 NEXT_CHAR;
743 if(!U2[0][(int)*buffer_ptr])
744 BEGIN(LEX_ERROR_ATTR_VAL);
745 NEXT_CHAR;
746 break;
747
748 case 31: /* U3a */
749 NEXT_CHAR;
750 if(!U3a[0][(int)*buffer_ptr])
751 BEGIN(LEX_ERROR_ATTR_VAL);
752 NEXT_CHAR;
753 if(!U3a[1][(int)*buffer_ptr])
754 BEGIN(LEX_ERROR_ATTR_VAL);
755 NEXT_CHAR;
756 break;
757
758 case 32: /* U3b */
759 NEXT_CHAR;
760 if(!U3b[0][(int)*buffer_ptr])
761 BEGIN(LEX_ERROR_ATTR_VAL);
762 NEXT_CHAR;
763 if(!U3b[1][(int)*buffer_ptr])
764 BEGIN(LEX_ERROR_ATTR_VAL);
765 NEXT_CHAR;
766 break;
767
768 case 33: /* U3c */
769 NEXT_CHAR;
770 if(!U3c[0][(int)*buffer_ptr])
771 BEGIN(LEX_ERROR_ATTR_VAL);
772 NEXT_CHAR;
773 if(!U3c[1][(int)*buffer_ptr])
774 BEGIN(LEX_ERROR_ATTR_VAL);
775 NEXT_CHAR;
776 break;
777
778 case 34: /* U3d */
779 NEXT_CHAR;
780 if(!U3d[0][(int)*buffer_ptr])
781 BEGIN(LEX_ERROR_ATTR_VAL);
782 NEXT_CHAR;
783 if(!U3d[1][(int)*buffer_ptr])
784 BEGIN(LEX_ERROR_ATTR_VAL);
785 NEXT_CHAR;
786 break;
787
788 case 41: /* U4a */
789 NEXT_CHAR;
790 if(!U4a[0][(int)*buffer_ptr])
791 BEGIN(LEX_ERROR_ATTR_VAL);
792 NEXT_CHAR;
793 if(!U4a[1][(int)*buffer_ptr])
794 BEGIN(LEX_ERROR_ATTR_VAL);
795 NEXT_CHAR;
796 if(!U4a[2][(int)*buffer_ptr])
797 BEGIN(LEX_ERROR_ATTR_VAL);
798 NEXT_CHAR;
799 break;
800
801 case 42: /* U4b */
802 NEXT_CHAR;
803 if(!U4b[0][(int)*buffer_ptr])
804 BEGIN(LEX_ERROR_ATTR_VAL);
805 NEXT_CHAR;
806 if(!U4b[1][(int)*buffer_ptr])
807 BEGIN(LEX_ERROR_ATTR_VAL);
808 NEXT_CHAR;
809 if(!U4b[2][(int)*buffer_ptr])
810 BEGIN(LEX_ERROR_ATTR_VAL);
811 NEXT_CHAR;
812 break;
813
814 case 43: /* U4c */
815 NEXT_CHAR;
816 if(!U4c[0][(int)*buffer_ptr])
817 BEGIN(LEX_ERROR_ATTR_VAL);
818 NEXT_CHAR;
819 if(!U4c[1][(int)*buffer_ptr])
820 BEGIN(LEX_ERROR_ATTR_VAL);
821 NEXT_CHAR;
822 if(!U4c[2][(int)*buffer_ptr])
823 BEGIN(LEX_ERROR_ATTR_VAL);
824 NEXT_CHAR;
825 break;
826
827 case 50: /* entityref or charref */
828 NEXT_CHAR;
829
830 if(*buffer_ptr=='#') /* charref */
831 {
832 int charref_len=3;
833
834 NEXT_CHAR;
835 if(digit[(int)*buffer_ptr]) /* decimal */
836 {
837 NEXT_CHAR;
838 charref_len++;
839
840 while(digit[(int)*buffer_ptr])
841 {
842 NEXT_CHAR;
843 charref_len++;
844 }
845
846 if(*buffer_ptr!=';')
847 BEGIN(LEX_ERROR_ATTR_VAL);
848 }
849 else if(*buffer_ptr=='x') /* hex */
850 {
851 NEXT_CHAR;
852 charref_len++;
853
854 while(xdigit[(int)*buffer_ptr])
855 {
856 NEXT_CHAR;
857 charref_len++;
858 }
859
860 if(*buffer_ptr!=';')
861 BEGIN(LEX_ERROR_ATTR_VAL);
862 }
863 else /* other */
864 BEGIN(LEX_ERROR_ATTR_VAL);
865
866 NEXT_CHAR;
867
868 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
869 {
870 const char *str;
871
872 saved_buffer_ptr=*buffer_ptr;
873 *buffer_ptr=0;
874
875 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
876
877 if(!str)
878 {
879 buffer_ptr-=charref_len;
880 BEGIN(LEX_ERROR_CHAR_REF);
881 }
882
883 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
884 memcpy(buffer_ptr-strlen(str),str,strlen(str));
885
886 *buffer_ptr=saved_buffer_ptr;
887 }
888 }
889 else if(namestart[(int)*buffer_ptr]) /* entityref */
890 {
891 int entityref_len=3;
892
893 NEXT_CHAR;
894 while(namechar[(int)*buffer_ptr])
895 {
896 NEXT_CHAR;
897 entityref_len++;
898 }
899
900 if(*buffer_ptr!=';')
901 BEGIN(LEX_ERROR_ATTR_VAL);
902
903 NEXT_CHAR;
904
905 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
906 {
907 const char *str;
908
909 saved_buffer_ptr=*buffer_ptr;
910 *buffer_ptr=0;
911
912 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
913
914 if(!str)
915 {
916 buffer_ptr-=entityref_len;
917 BEGIN(LEX_ERROR_ENTITY_REF);
918 }
919
920 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
921 memcpy(buffer_ptr-strlen(str),str,strlen(str));
922
923 *buffer_ptr=saved_buffer_ptr;
924 }
925 }
926 else /* other */
927 BEGIN(LEX_ERROR_ATTR_VAL);
928
929 break;
930
931 case 99: /* quote */
932 *buffer_ptr=0;
933 NEXT_CHAR;
934
935 NEXT(after_attr);
936 BEGIN(LEX_FUNC_ATTR_VAL);
937
938 default: /* other */
939 BEGIN(LEX_ERROR_ATTR_VAL);
940 }
941 }
942
943 break;
944
945
946 /* ================ Functional states ================ */
947
948
949 /* The start of a tag for an XML declaration */
950
951 case LEX_FUNC_XML_DECL_BEGIN:
952
953 if(tag_stack)
954 BEGIN(LEX_ERROR_XML_NOT_FIRST);
955
956 /* The start of a tag for an element */
957
958 case LEX_FUNC_TAG_BEGIN:
959
960 tag=NULL;
961
962 for(i=0;tags[i];i++)
963 if(buffer_token[0]==tags[i]->name[0] || tolower(buffer_token[0])==tags[i]->name[0])
964 if(!strcasecmp((char*)buffer_token+1,tags[i]->name+1))
965 {
966 tag=tags[i];
967
968 for(i=0;i<tag->nattributes;i++)
969 attributes[i]=NULL;
970
971 break;
972 }
973
974 if(tag==NULL)
975 BEGIN(LEX_ERROR_UNEXP_TAG);
976
977 END_TOKEN;
978
979 *buffer_ptr=saved_buffer_ptr;
980 BEGIN(next_state);
981
982 /* The end of the start-tag for an element */
983
984 case LEX_FUNC_TAG_PUSH:
985
986 if(stackused==stackdepth)
987 {
988 tag_stack =(xmltag**) realloc((void*)tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
989 tags_stack=(xmltag***)realloc((void*)tags_stack,(stackdepth+=8)*sizeof(xmltag**));
990 }
991
992 tag_stack [stackused]=tag;
993 tags_stack[stackused]=tags;
994 stackused++;
995
996 if(tag->callback)
997 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
998 BEGIN(LEX_ERROR_CALLBACK);
999
1000 tags=tag->subtags;
1001
1002 BEGIN(next_state);
1003
1004 /* The end of the empty-element-tag for an XML declaration */
1005
1006 case LEX_FUNC_XML_DECL_FINISH:
1007
1008 /* The end of the empty-element-tag for an element */
1009
1010 case LEX_FUNC_TAG_FINISH:
1011
1012 if(tag->callback)
1013 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
1014 BEGIN(LEX_ERROR_CALLBACK);
1015
1016 if(stackused>0)
1017 tag=tag_stack[stackused-1];
1018 else
1019 tag=NULL;
1020
1021 BEGIN(next_state);
1022
1023 /* The end of the end-tag for an element */
1024
1025 case LEX_FUNC_TAG_POP:
1026
1027 stackused--;
1028 tags=tags_stack[stackused];
1029 tag =tag_stack [stackused];
1030
1031 if(strcmp((char*)buffer_token,tag->name))
1032 BEGIN(LEX_ERROR_UNBALANCED);
1033
1034 if(stackused<0)
1035 BEGIN(LEX_ERROR_NO_START);
1036
1037 for(i=0;i<tag->nattributes;i++)
1038 attributes[i]=NULL;
1039
1040 if(tag->callback)
1041 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1042 BEGIN(LEX_ERROR_CALLBACK);
1043
1044 if(stackused>0)
1045 tag=tag_stack[stackused-1];
1046 else
1047 tag=NULL;
1048
1049 END_TOKEN;
1050
1051 *buffer_ptr=saved_buffer_ptr;
1052 BEGIN(next_state);
1053
1054 /* An attribute key */
1055
1056 case LEX_FUNC_ATTR_KEY:
1057
1058 attribute=-1;
1059
1060 for(i=0;i<tag->nattributes;i++)
1061 if(buffer_token[0]==tag->attributes[i][0] || tolower(buffer_token[0])==tag->attributes[i][0])
1062 if(!strcasecmp((char*)buffer_token+1,tag->attributes[i]+1))
1063 {
1064 attribute=i;
1065
1066 break;
1067 }
1068
1069 if(attribute==-1)
1070 {
1071 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1072 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1073 BEGIN(LEX_ERROR_UNEXP_ATT);
1074 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1075 fprintf(stderr,"XML Parser: Warning on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1076 }
1077
1078 END_TOKEN;
1079
1080 *buffer_ptr=saved_buffer_ptr;
1081 BEGIN(next_state);
1082
1083 /* An attribute value */
1084
1085 case LEX_FUNC_ATTR_VAL:
1086
1087 if(tag->callback && attribute!=-1)
1088 attributes[attribute]=buffer_token;
1089
1090 END_TOKEN;
1091
1092 BEGIN(next_state);
1093
1094 /* End of file */
1095
1096 case LEX_EOF:
1097
1098 if(tag)
1099 BEGIN(LEX_ERROR_UNEXP_EOF);
1100
1101 break;
1102
1103
1104 /* ================ Error states ================ */
1105
1106
1107 case LEX_ERROR_TAG_START:
1108 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '<' seen not at start of tag.\n",lineno);
1109 break;
1110
1111 case LEX_ERROR_XML_DECL_START:
1112 fprintf(stderr,"XML Parser: Error on line %"PRIu64": characters '<?' seen not at start of XML declaration.\n",lineno);
1113 break;
1114
1115 case LEX_ERROR_TAG:
1116 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside tag '<%s...>'.\n",lineno,tag->name);
1117 break;
1118
1119 case LEX_ERROR_XML_DECL:
1120 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen inside XML declaration '<?xml...>'.\n",lineno);
1121 break;
1122
1123 case LEX_ERROR_ATTR:
1124 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid attribute definition seen in tag.\n",lineno);
1125 break;
1126
1127 case LEX_ERROR_END_TAG:
1128 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character seen in end-tag.\n",lineno);
1129 break;
1130
1131 case LEX_ERROR_COMMENT:
1132 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid comment seen.\n",lineno);
1133 break;
1134
1135 case LEX_ERROR_CLOSE:
1136 fprintf(stderr,"XML Parser: Error on line %"PRIu64": character '>' seen not at end of tag.\n",lineno);
1137 break;
1138
1139 case LEX_ERROR_ATTR_VAL:
1140 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character '%c' seen in attribute value.\n",lineno,*buffer_ptr);
1141 break;
1142
1143 case LEX_ERROR_ENTITY_REF:
1144 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid entity reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1145 break;
1146
1147 case LEX_ERROR_CHAR_REF:
1148 fprintf(stderr,"XML Parser: Error on line %"PRIu64": invalid character reference '%s' seen in attribute value.\n",lineno,buffer_ptr);
1149 break;
1150
1151 case LEX_ERROR_TEXT_OUTSIDE:
1152 fprintf(stderr,"XML Parser: Error on line %"PRIu64": non-whitespace '%c' seen outside tag.\n",lineno,*buffer_ptr);
1153 break;
1154
1155 case LEX_ERROR_UNEXP_TAG:
1156 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected tag '%s'.\n",lineno,buffer_token);
1157 break;
1158
1159 case LEX_ERROR_UNBALANCED:
1160 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' doesn't match start tag '<%s ...>'.\n",lineno,buffer_token,tag->name);
1161 break;
1162
1163 case LEX_ERROR_NO_START:
1164 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end tag '</%s>' seen but there was no start tag '<%s ...>'.\n",lineno,buffer_token,buffer_token);
1165 break;
1166
1167 case LEX_ERROR_UNEXP_ATT:
1168 fprintf(stderr,"XML Parser: Error on line %"PRIu64": unexpected attribute '%s' for tag '%s'.\n",lineno,buffer_token,tag->name);
1169 break;
1170
1171 case LEX_ERROR_UNEXP_EOF:
1172 fprintf(stderr,"XML Parser: Error on line %"PRIu64": end of file seen without end tag '</%s>'.\n",lineno,tag->name);
1173 break;
1174
1175 case LEX_ERROR_XML_NOT_FIRST:
1176 fprintf(stderr,"XML Parser: Error on line %"PRIu64": XML declaration '<?xml...>' not before all other tags.\n",lineno);
1177 break;
1178 }
1179
1180 /* Delete the tagdata */
1181
1182 if(stackdepth)
1183 {
1184 free(tag_stack);
1185 free(tags_stack);
1186 }
1187
1188 return(state);
1189 }
1190
1191
1192 /*++++++++++++++++++++++++++++++++++++++
1193 Return the current parser line number.
1194
1195 uint64_t ParseXML_LineNumber Returns the line number.
1196 ++++++++++++++++++++++++++++++++++++++*/
1197
1198 uint64_t ParseXML_LineNumber(void)
1199 {
1200 return(lineno);
1201 }
1202
1203
1204 /*++++++++++++++++++++++++++++++++++++++
1205 Convert an XML entity reference into an ASCII string.
1206
1207 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1208
1209 const char *string The entity reference string.
1210 ++++++++++++++++++++++++++++++++++++++*/
1211
1212 char *ParseXML_Decode_Entity_Ref(const char *string)
1213 {
1214 if(!strcmp(string,"&amp;")) return("&");
1215 if(!strcmp(string,"&lt;")) return("<");
1216 if(!strcmp(string,"&gt;")) return(">");
1217 if(!strcmp(string,"&apos;")) return("'");
1218 if(!strcmp(string,"&quot;")) return("\"");
1219 return(NULL);
1220 }
1221
1222
1223 /*++++++++++++++++++++++++++++++++++++++
1224 Convert an XML character reference into an ASCII string.
1225
1226 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1227
1228 const char *string The character reference string.
1229 ++++++++++++++++++++++++++++++++++++++*/
1230
1231 char *ParseXML_Decode_Char_Ref(const char *string)
1232 {
1233 static char result[5]="";
1234 long int unicode;
1235
1236 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1237 else unicode=strtol(string+2,NULL,10);
1238
1239 if(unicode<0x80)
1240 {
1241 /* 0000 0000-0000 007F => 0xxxxxxx */
1242 result[0]=(char)unicode;
1243 result[1]=0;
1244 }
1245 else if(unicode<0x07FF)
1246 {
1247 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1248 result[0]=(char)(0xC0+((unicode&0x07C0)>>6));
1249 result[1]=(char)(0x80+ (unicode&0x003F));
1250 result[2]=0;
1251 }
1252 else if(unicode<0xFFFF)
1253 {
1254 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1255 result[0]=(char)(0xE0+((unicode&0xF000)>>12));
1256 result[1]=(char)(0x80+((unicode&0x0FC0)>>6));
1257 result[2]=(char)(0x80+ (unicode&0x003F));
1258 result[3]=0;
1259 }
1260 else if(unicode<0x1FFFFF)
1261 {
1262 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1263 result[0]=(char)(0xF0+((unicode&0x1C0000)>>18));
1264 result[1]=(char)(0x80+((unicode&0x03F000)>>12));
1265 result[2]=(char)(0x80+((unicode&0x000FC0)>>6));
1266 result[3]=(char)(0x80+ (unicode&0x00003F));
1267 result[4]=0;
1268 }
1269 else
1270 {
1271 result[0]=(char)0xFF;
1272 result[1]=(char)0xFD;
1273 result[2]=0;
1274 }
1275
1276 return(result);
1277 }
1278
1279
1280 /*++++++++++++++++++++++++++++++++++++++
1281 Convert a string into something that is safe to output in an XML file.
1282
1283 char *ParseXML_Encode_Safe_XML Returns a pointer to the replacement encoded string (or the original if no change needed).
1284
1285 const char *string The string to convert.
1286 ++++++++++++++++++++++++++++++++++++++*/
1287
1288 char *ParseXML_Encode_Safe_XML(const char *string)
1289 {
1290 static const char hexstring[17]="0123456789ABCDEF";
1291 int i=0,j=0,len;
1292 char *result;
1293
1294 for(i=0;string[i];i++)
1295 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1296 break;
1297
1298 if(!string[i])
1299 return((char*)string);
1300
1301 len=i+256-6;
1302
1303 result=(char*)malloc(len+7);
1304 strncpy(result,string,j=i);
1305
1306 do
1307 {
1308 for(;j<len && string[i];i++)
1309 if(string[i]=='\'')
1310 {
1311 /* XML, HTML5 and XHTML1 allow &apos; but HTML4 doesn't. */
1312 result[j++]='&';
1313 result[j++]='#';
1314 result[j++]='3';
1315 result[j++]='9';
1316 result[j++]=';';
1317 }
1318 else if(string[i]=='&')
1319 {
1320 result[j++]='&';
1321 result[j++]='a';
1322 result[j++]='m';
1323 result[j++]='p';
1324 result[j++]=';';
1325 }
1326 else if(string[i]=='"')
1327 {
1328 result[j++]='&';
1329 result[j++]='q';
1330 result[j++]='u';
1331 result[j++]='o';
1332 result[j++]='t';
1333 result[j++]=';';
1334 }
1335 else if(string[i]=='<')
1336 {
1337 result[j++]='&';
1338 result[j++]='l';
1339 result[j++]='t';
1340 result[j++]=';';
1341 }
1342 else if(string[i]=='>')
1343 {
1344 result[j++]='&';
1345 result[j++]='g';
1346 result[j++]='t';
1347 result[j++]=';';
1348 }
1349 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1350 result[j++]=string[i];
1351 else
1352 {
1353 unsigned int unicode;
1354
1355 /* Decode the UTF-8 */
1356
1357 if((string[i]&0x80)==0)
1358 {
1359 /* 0000 0000-0000 007F => 0xxxxxxx */
1360 unicode=string[i];
1361 }
1362 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1363 {
1364 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1365 unicode =(string[i++]&0x1F)<<6;
1366 unicode|= string[i ]&0x3F;
1367 }
1368 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1369 {
1370 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1371 unicode =(string[i++]&0x0F)<<12;
1372 unicode|=(string[i++]&0x3F)<<6;
1373 unicode|= string[i ]&0x3F;
1374 }
1375 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1376 {
1377 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1378 unicode =(string[i++]&0x07)<<18;
1379 unicode|=(string[i++]&0x3F)<<12;
1380 unicode|=(string[i++]&0x3F)<<6;
1381 unicode|= string[i ]&0x3F;
1382 }
1383 else
1384 unicode=0xFFFD;
1385
1386 /* Output the character entity */
1387
1388 result[j++]='&';
1389 result[j++]='#';
1390 result[j++]='x';
1391
1392 if(unicode&0x00FF0000)
1393 {
1394 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1395 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1396 }
1397 if(unicode&0x00FFFF00)
1398 {
1399 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1400 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1401 }
1402 result[j++]=hexstring[(unicode&0xf0)>>4];
1403 result[j++]=hexstring[(unicode&0x0f) ];
1404
1405 result[j++]=';';
1406 }
1407
1408 if(string[i]) /* Not finished */
1409 {
1410 len+=256;
1411 result=(char*)realloc((void*)result,len+7);
1412 }
1413 }
1414 while(string[i]);
1415
1416 result[j]=0;
1417
1418 return(result);
1419 }
1420
1421
1422 /*++++++++++++++++++++++++++++++++++++++
1423 Check that a string really is an integer.
1424
1425 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1426
1427 const char *string The string to be parsed.
1428 ++++++++++++++++++++++++++++++++++++++*/
1429
1430 int ParseXML_IsInteger(const char *string)
1431 {
1432 const unsigned char *p=(unsigned char*)string;
1433
1434 if(*p=='-' || *p=='+')
1435 p++;
1436
1437 while(digit[(int)*p])
1438 p++;
1439
1440 if(*p)
1441 return(0);
1442 else
1443 return(1);
1444 }
1445
1446
1447 /*++++++++++++++++++++++++++++++++++++++
1448 Check that a string really is a floating point number.
1449
1450 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1451
1452 const char *string The string to be parsed.
1453 ++++++++++++++++++++++++++++++++++++++*/
1454
1455 int ParseXML_IsFloating(const char *string)
1456 {
1457 const unsigned char *p=(unsigned char*)string;
1458
1459 if(*p=='-' || *p=='+')
1460 p++;
1461
1462 while(digit[(int)*p] || *p=='.')
1463 p++;
1464
1465 if(*p=='e' || *p=='E')
1466 {
1467 p++;
1468
1469 if(*p=='-' || *p=='+')
1470 p++;
1471
1472 while(digit[*p])
1473 p++;
1474 }
1475
1476 if(*p)
1477 return(0);
1478 else
1479 return(1);
1480 }
1481
1482
1483 /* Table for checking for double-quoted characters. */
1484 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1486 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1487 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1488 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1489 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1490 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1491 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1496 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1497 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1498 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1499 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1500
1501 /* Table for checking for single-quoted characters. */
1502 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1504 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1505 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1506 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1507 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1508 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1509 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1514 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1515 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1516 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1517 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1518
1519 /* Table for checking for characters between 0x80 and 0x8f. */
1520 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1536
1537 /* Table for checking for characters between 0x80 and 0x9f. */
1538 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1540 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1554
1555 /* Table for checking for characters between 0x80 and 0xbf. */
1556 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1564 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1566 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1572
1573 /* Table for checking for characters between 0x90 and 0xbf. */
1574 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1584 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1590
1591 /* Table for checking for characters between 0xa0 and 0xbf. */
1592 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1602 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1603 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1606 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1608
1609 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1610 static const unsigned char *U2[1]={ U_80_BF };
1611
1612 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1613 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1614
1615 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1616 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1617
1618 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1619 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1620
1621 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1622 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1623
1624 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1625 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1626
1627 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1628 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1629
1630 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1631 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1632
1633 /* Table for checking for namestart characters. */
1634 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1638 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1639 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1640 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1641 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1650
1651 /* Table for checking for namechar characters. */
1652 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1655 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1656 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1657 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1658 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1659 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1668
1669 /* Table for checking for whitespace characters. */
1670 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1672 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1686
1687 /* Table for checking for digit characters. */
1688 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1691 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1700 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1704
1705 /* Table for checking for xdigit characters. */
1706 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1709 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1710 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1712 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */