Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /branches/destination-access/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1867 - (show annotations) (download) (as text)
Sat Mar 5 14:34:32 2016 UTC (9 years, 1 month ago) by amb
File MIME type: text/x-csrc
File size: 69640 byte(s)
Update for version 3.1 release.

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2016 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26
27 #if defined(_MSC_VER)
28 #include <io.h>
29 #include <basetsd.h>
30 #define read(fd,address,length) _read(fd,address,(unsigned int)(length))
31 #define snprintf _snprintf
32 #define ssize_t SSIZE_T
33 #else
34 #include <unistd.h>
35 #endif
36
37 #include <stdlib.h>
38 #include <inttypes.h>
39 #include <stdarg.h>
40 #include <stdint.h>
41 #include <string.h>
42
43 #if defined(_MSC_VER) || defined(__MINGW32__)
44 #define strcasecmp _stricmp
45 #else
46 #include <strings.h>
47 #endif
48
49 #include <ctype.h>
50
51 #include "xmlparse.h"
52
53
54 /* Parser states */
55
56 #define LEX_EOF 0
57
58 #define LEX_FUNC_TAG_BEGIN 1
59 #define LEX_FUNC_XML_DECL_BEGIN 2
60 #define LEX_FUNC_TAG_POP 3
61 #define LEX_FUNC_TAG_PUSH 4
62 #define LEX_FUNC_XML_DECL_FINISH 5
63 #define LEX_FUNC_TAG_FINISH 6
64 #define LEX_FUNC_ATTR_KEY 7
65 #define LEX_FUNC_ATTR_VAL 8
66
67 #define LEX_STATE_INITIAL 10
68 #define LEX_STATE_BANGTAG 11
69 #define LEX_STATE_COMMENT 12
70 #define LEX_STATE_XML_DECL_START 13
71 #define LEX_STATE_XML_DECL 14
72 #define LEX_STATE_TAG_START 15
73 #define LEX_STATE_TAG 16
74 #define LEX_STATE_ATTR_KEY 17
75 #define LEX_STATE_ATTR_VAL 18
76 #define LEX_STATE_END_TAG1 19
77 #define LEX_STATE_END_TAG2 20
78 #define LEX_STATE_DQUOTED 21
79 #define LEX_STATE_SQUOTED 22
80
81 #define LEX_ERROR_TAG_START 101
82 #define LEX_ERROR_XML_DECL_START 102
83 #define LEX_ERROR_TAG 103
84 #define LEX_ERROR_XML_DECL 104
85 #define LEX_ERROR_ATTR 105
86 #define LEX_ERROR_END_TAG 106
87 #define LEX_ERROR_COMMENT 107
88 #define LEX_ERROR_CLOSE 108
89 #define LEX_ERROR_ATTR_VAL 109
90 #define LEX_ERROR_ENTITY_REF 110
91 #define LEX_ERROR_CHAR_REF 111
92 #define LEX_ERROR_TEXT_OUTSIDE 112
93
94 #define LEX_ERROR_UNEXP_TAG 201
95 #define LEX_ERROR_UNBALANCED 202
96 #define LEX_ERROR_NO_START 203
97 #define LEX_ERROR_UNEXP_ATT 204
98 #define LEX_ERROR_UNEXP_EOF 205
99 #define LEX_ERROR_XML_NOT_FIRST 206
100
101 #define LEX_ERROR_CALLBACK 255
102
103
104 /* Parsing variables and functions (re-initialised for each file) */
105
106 static uint64_t lineno;
107
108 static unsigned char buffer[2][16384];
109 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
110 static int buffer_active=0;
111
112 static char *stored_message=NULL;
113
114
115 /*++++++++++++++++++++++++++++++++++++++
116 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
117
118 int buffer_refill Return 0 if everything is OK or 1 for EOF.
119
120 int fd The file descriptor to read from.
121 ++++++++++++++++++++++++++++++++++++++*/
122
123 static inline int buffer_refill(int fd)
124 {
125 ssize_t n;
126 size_t m=0;
127
128 m=(buffer_end-buffer[buffer_active])+1;
129
130 if(m>(sizeof(buffer[0])/2)) /* more than half full */
131 {
132 m=0;
133
134 buffer_active=!buffer_active;
135
136 if(buffer_token)
137 {
138 m=(buffer_end-buffer_token)+1;
139
140 memcpy(buffer[buffer_active],buffer_token,m);
141
142 buffer_token=buffer[buffer_active];
143 }
144 }
145
146 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
147
148 buffer_ptr=buffer[buffer_active]+m;
149 buffer_end=buffer[buffer_active]+m+n-1;
150
151 if(n<=0)
152 return(1);
153 else
154 return(0);
155 }
156
157
158 /* Macros to simplify the parser (and make it look more like lex) */
159
160 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
161 #define NEXT(xx) next_state=(xx)
162
163 #define START_TOKEN buffer_token=buffer_ptr
164 #define END_TOKEN buffer_token=NULL
165
166 #define NEXT_CHAR \
167 do{ \
168 if(buffer_ptr==buffer_end) \
169 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
170 else \
171 buffer_ptr++; \
172 } while(0)
173
174
175 /* -------- equivalent flex definition --------
176
177 S [ \t\r]
178 N (\n)
179
180 U1 [\x09\x0A\x0D\x20-\x7F]
181 U2 [\xC2-\xDF][\x80-\xBF]
182 U3a \xE0[\xA0-\xBF][\x80-\xBF]
183 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
184 U3c \xED[\x80-\x9F][\x80-\xBF]
185 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
186 U3 {U3a}|{U3b}|{U3c}|{U3d}
187 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
188 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
189 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
190 U4 {U4a}|{U4b}|{U4c}
191
192 U ({U1}|{U2}|{U3}|{U4})
193
194 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
195
196 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
197 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
198
199 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
200 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
201
202 letter [a-zA-Z]
203 digit [0-9]
204 xdigit [a-fA-F0-9]
205
206 namechar ({letter}|{digit}|[-._:])
207 namestart ({letter}|[_:])
208 name ({namestart}{namechar}*)
209
210 entityref (&{name};)
211 charref (&#({digit}+|x{xdigit}+);)
212
213 -------- equivalent flex definition -------- */
214
215 /* Tables containing character class defintions (advance declaration for data at end of file). */
216 static const unsigned char quotedD[256],quotedS[256];
217 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
218 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
219
220
221 /*++++++++++++++++++++++++++++++++++++++
222 A function to call the callback function with the parameters needed.
223
224 int call_callback Returns 1 if the callback returned with an error.
225
226 const char *name The name of the tag.
227
228 int (*callback)() The callback function.
229
230 int type The type of tag (start and/or end).
231
232 int nattributes The number of attributes collected.
233
234 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
235 ++++++++++++++++++++++++++++++++++++++*/
236
237 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
238 {
239 switch(nattributes)
240 {
241 case 0: return (*callback)(name,type);
242 case 1: return (*callback)(name,type,attributes[0]);
243 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
244 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
245 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
246 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
247 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
248 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
249 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
250 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
251 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
252 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
253 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
254 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
255 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
256 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
257 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
258
259 default:
260 ParseXML_SetError("Too many attributes for tag '%s' source code needs changing.",name);
261 return(1);
262 }
263 }
264
265
266 /*++++++++++++++++++++++++++++++++++++++
267 Parse the XML and call the functions for each tag as seen.
268
269 int ParseXML Returns 0 if OK or something else in case of an error.
270
271 int fd The file descriptor of the file to parse.
272
273 const xmltag *const *tags The array of pointers to tags for the top level.
274
275 int options A list of XML Parser options OR-ed together.
276 ++++++++++++++++++++++++++++++++++++++*/
277
278 int ParseXML(int fd,const xmltag *const *tags,int options)
279 {
280 int i;
281 int state,next_state,after_attr;
282 unsigned char saved_buffer_ptr=0;
283 const unsigned char *quoted;
284
285 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
286 int attribute=0;
287
288 int stackdepth=0,stackused=0;
289 const xmltag * const **tags_stack=NULL;
290 const xmltag **tag_stack=NULL;
291 const xmltag *tag=NULL;
292
293 /* The actual parser. */
294
295 lineno=1;
296
297 if(stored_message)
298 free(stored_message);
299 stored_message=NULL;
300
301 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
302 buffer_token=NULL;
303
304 buffer_refill(fd);
305
306 BEGIN(LEX_STATE_INITIAL);
307
308 new_state:
309
310 switch(state)
311 {
312 /* ================ Parsing states ================ */
313
314
315 /* -------- equivalent flex definition --------
316
317 <INITIAL>"<!" { BEGIN(BANGTAG); }
318 <INITIAL>"</" { BEGIN(END_TAG1); }
319 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
320 <INITIAL>"<" { BEGIN(TAG_START); }
321
322 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
323
324 <INITIAL>{N} { lineno++; }
325 <INITIAL>{S}+ { }
326 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
327
328 -------- equivalent flex definition -------- */
329
330 case LEX_STATE_INITIAL:
331
332 while(1)
333 {
334 while(whitespace[(int)*buffer_ptr])
335 NEXT_CHAR;
336
337 if(*buffer_ptr=='\n')
338 {
339 NEXT_CHAR;
340
341 lineno++;
342 }
343 else if(*buffer_ptr=='<')
344 {
345 NEXT_CHAR;
346
347 if(*buffer_ptr=='/')
348 {
349 NEXT_CHAR;
350 BEGIN(LEX_STATE_END_TAG1);
351 }
352 else if(*buffer_ptr=='!')
353 {
354 NEXT_CHAR;
355 BEGIN(LEX_STATE_BANGTAG);
356 }
357 else if(*buffer_ptr=='?')
358 {
359 NEXT_CHAR;
360 BEGIN(LEX_STATE_XML_DECL_START);
361 }
362 else
363 BEGIN(LEX_STATE_TAG_START);
364 }
365 else if(*buffer_ptr=='>')
366 BEGIN(LEX_ERROR_CLOSE);
367 else
368 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
369 }
370
371 break;
372
373 /* -------- equivalent flex definition --------
374
375 <BANGTAG>"--" { BEGIN(COMMENT); }
376 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
377 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
378
379 -------- equivalent flex definition -------- */
380
381 case LEX_STATE_BANGTAG:
382
383 if(*buffer_ptr!='-')
384 BEGIN(LEX_ERROR_TAG_START);
385
386 NEXT_CHAR;
387
388 if(*buffer_ptr!='-')
389 BEGIN(LEX_ERROR_TAG_START);
390
391 NEXT_CHAR;
392 BEGIN(LEX_STATE_COMMENT);
393
394 break;
395
396 /* -------- equivalent flex definition --------
397
398 <COMMENT>"-->" { BEGIN(INITIAL); }
399 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
400 <COMMENT>"-" { }
401 <COMMENT>{N} { lineno++; }
402 <COMMENT>[^-\n]+ { }
403
404 -------- equivalent flex definition -------- */
405
406 case LEX_STATE_COMMENT:
407
408 while(1)
409 {
410 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
411 NEXT_CHAR;
412
413 if(*buffer_ptr=='-')
414 {
415 NEXT_CHAR;
416
417 if(*buffer_ptr!='-')
418 continue;
419
420 NEXT_CHAR;
421 if(*buffer_ptr=='>')
422 {
423 NEXT_CHAR;
424 BEGIN(LEX_STATE_INITIAL);
425 }
426
427 BEGIN(LEX_ERROR_COMMENT);
428 }
429 else /* if(*buffer_ptr=='\n') */
430 {
431 NEXT_CHAR;
432
433 lineno++;
434 }
435 }
436
437 break;
438
439 /* -------- equivalent flex definition --------
440
441 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
442 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
443 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
444
445 -------- equivalent flex definition -------- */
446
447 case LEX_STATE_XML_DECL_START:
448
449 START_TOKEN;
450
451 if(*buffer_ptr=='x')
452 {
453 NEXT_CHAR;
454 if(*buffer_ptr=='m')
455 {
456 NEXT_CHAR;
457 if(*buffer_ptr=='l')
458 {
459 NEXT_CHAR;
460
461 saved_buffer_ptr=*buffer_ptr;
462 *buffer_ptr=0;
463
464 NEXT(LEX_STATE_XML_DECL);
465 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
466 }
467 }
468 }
469
470 BEGIN(LEX_ERROR_XML_DECL_START);
471
472 /* -------- equivalent flex definition --------
473
474 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
475 <XML_DECL>{S}+ { }
476 <XML_DECL>{N} { lineno++; }
477 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
478 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
479
480 -------- equivalent flex definition -------- */
481
482 case LEX_STATE_XML_DECL:
483
484 while(1)
485 {
486 while(whitespace[(int)*buffer_ptr])
487 NEXT_CHAR;
488
489 if(namestart[(int)*buffer_ptr])
490 {
491 START_TOKEN;
492
493 NEXT_CHAR;
494 while(namechar[(int)*buffer_ptr])
495 NEXT_CHAR;
496
497 saved_buffer_ptr=*buffer_ptr;
498 *buffer_ptr=0;
499
500 after_attr=LEX_STATE_XML_DECL;
501 NEXT(LEX_STATE_ATTR_KEY);
502 BEGIN(LEX_FUNC_ATTR_KEY);
503 }
504 else if(*buffer_ptr=='?')
505 {
506 NEXT_CHAR;
507 if(*buffer_ptr=='>')
508 {
509 NEXT_CHAR;
510 NEXT(LEX_STATE_INITIAL);
511 BEGIN(LEX_FUNC_XML_DECL_FINISH);
512 }
513
514 BEGIN(LEX_ERROR_XML_DECL);
515 }
516 else if(*buffer_ptr=='\n')
517 {
518 NEXT_CHAR;
519 lineno++;
520 }
521 else
522 BEGIN(LEX_ERROR_XML_DECL);
523 }
524
525 break;
526
527 /* -------- equivalent flex definition --------
528
529 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
530 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
531 <TAG_START>. { return(LEX_ERROR_TAG_START); }
532
533 -------- equivalent flex definition -------- */
534
535 case LEX_STATE_TAG_START:
536
537 if(namestart[(int)*buffer_ptr])
538 {
539 START_TOKEN;
540
541 NEXT_CHAR;
542 while(namechar[(int)*buffer_ptr])
543 NEXT_CHAR;
544
545 saved_buffer_ptr=*buffer_ptr;
546 *buffer_ptr=0;
547
548 NEXT(LEX_STATE_TAG);
549 BEGIN(LEX_FUNC_TAG_BEGIN);
550 }
551
552 BEGIN(LEX_ERROR_TAG_START);
553
554 /* -------- equivalent flex definition --------
555
556 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
557 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
558 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
559
560 -------- equivalent flex definition -------- */
561
562 case LEX_STATE_END_TAG1:
563
564 if(namestart[(int)*buffer_ptr])
565 {
566 START_TOKEN;
567
568 NEXT_CHAR;
569 while(namechar[(int)*buffer_ptr])
570 NEXT_CHAR;
571
572 saved_buffer_ptr=*buffer_ptr;
573 *buffer_ptr=0;
574
575 NEXT(LEX_STATE_END_TAG2);
576 BEGIN(LEX_FUNC_TAG_POP);
577 }
578
579 BEGIN(LEX_ERROR_END_TAG);
580
581 /* -------- equivalent flex definition --------
582
583 <END_TAG2>">" { BEGIN(INITIAL); }
584 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
585 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
586
587 -------- equivalent flex definition -------- */
588
589 case LEX_STATE_END_TAG2:
590
591 if(*buffer_ptr=='>')
592 {
593 NEXT_CHAR;
594
595 BEGIN(LEX_STATE_INITIAL);
596 }
597
598 BEGIN(LEX_ERROR_END_TAG);
599
600 /* -------- equivalent flex definition --------
601
602 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
603 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
604 <TAG>{S}+ { }
605 <TAG>{N} { lineno++; }
606 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
607 <TAG>. { return(LEX_ERROR_TAG); }
608
609 -------- equivalent flex definition -------- */
610
611 case LEX_STATE_TAG:
612
613 while(1)
614 {
615 while(whitespace[(int)*buffer_ptr])
616 NEXT_CHAR;
617
618 if(namestart[(int)*buffer_ptr])
619 {
620 START_TOKEN;
621
622 NEXT_CHAR;
623 while(namechar[(int)*buffer_ptr])
624 NEXT_CHAR;
625
626 saved_buffer_ptr=*buffer_ptr;
627 *buffer_ptr=0;
628
629 after_attr=LEX_STATE_TAG;
630 NEXT(LEX_STATE_ATTR_KEY);
631 BEGIN(LEX_FUNC_ATTR_KEY);
632 }
633 else if(*buffer_ptr=='/')
634 {
635 NEXT_CHAR;
636 if(*buffer_ptr=='>')
637 {
638 NEXT_CHAR;
639 NEXT(LEX_STATE_INITIAL);
640 BEGIN(LEX_FUNC_TAG_FINISH);
641 }
642
643 BEGIN(LEX_ERROR_TAG);
644 }
645 else if(*buffer_ptr=='>')
646 {
647 NEXT_CHAR;
648 NEXT(LEX_STATE_INITIAL);
649 BEGIN(LEX_FUNC_TAG_PUSH);
650 }
651 else if(*buffer_ptr=='\n')
652 {
653 NEXT_CHAR;
654 lineno++;
655 }
656 else
657 BEGIN(LEX_ERROR_TAG);
658 }
659
660 break;
661
662 /* -------- equivalent flex definition --------
663
664 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
665 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
666 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
667
668 -------- equivalent flex definition -------- */
669
670 case LEX_STATE_ATTR_KEY:
671
672 if(*buffer_ptr=='=')
673 {
674 NEXT_CHAR;
675 BEGIN(LEX_STATE_ATTR_VAL);
676 }
677
678 BEGIN(LEX_ERROR_ATTR);
679
680 /* -------- equivalent flex definition --------
681
682 <ATTR_VAL>\" { BEGIN(DQUOTED); }
683 <ATTR_VAL>\' { BEGIN(SQUOTED); }
684 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
685 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
686
687 -------- equivalent flex definition -------- */
688
689 case LEX_STATE_ATTR_VAL:
690
691 if(*buffer_ptr=='"')
692 {
693 NEXT_CHAR;
694 BEGIN(LEX_STATE_DQUOTED);
695 }
696 else if(*buffer_ptr=='\'')
697 {
698 NEXT_CHAR;
699 BEGIN(LEX_STATE_SQUOTED);
700 }
701
702 BEGIN(LEX_ERROR_ATTR);
703
704 /* -------- equivalent flex definition --------
705
706 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
707 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
708 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
709 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
710 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
711 <DQUOTED>{UquotedD} { }
712 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
713 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
714
715 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
716 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
717 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
718 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
719 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
720 <SQUOTED>{UquotedS} { append_string(yytext); }
721 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
722 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
723
724 -------- equivalent flex definition -------- */
725
726 case LEX_STATE_DQUOTED:
727 case LEX_STATE_SQUOTED:
728
729 if(state==LEX_STATE_DQUOTED)
730 quoted=quotedD;
731 else
732 quoted=quotedS;
733
734 START_TOKEN;
735
736 while(1)
737 {
738 switch(quoted[(int)*buffer_ptr])
739 {
740 case 10: /* U1 - used by all tag keys and many values */
741 do
742 {
743 NEXT_CHAR;
744 }
745 while(quoted[(int)*buffer_ptr]==10);
746 break;
747
748 case 20: /* U2 */
749 NEXT_CHAR;
750 if(!U2[0][(int)*buffer_ptr])
751 BEGIN(LEX_ERROR_ATTR_VAL);
752 NEXT_CHAR;
753 break;
754
755 case 31: /* U3a */
756 NEXT_CHAR;
757 if(!U3a[0][(int)*buffer_ptr])
758 BEGIN(LEX_ERROR_ATTR_VAL);
759 NEXT_CHAR;
760 if(!U3a[1][(int)*buffer_ptr])
761 BEGIN(LEX_ERROR_ATTR_VAL);
762 NEXT_CHAR;
763 break;
764
765 case 32: /* U3b */
766 NEXT_CHAR;
767 if(!U3b[0][(int)*buffer_ptr])
768 BEGIN(LEX_ERROR_ATTR_VAL);
769 NEXT_CHAR;
770 if(!U3b[1][(int)*buffer_ptr])
771 BEGIN(LEX_ERROR_ATTR_VAL);
772 NEXT_CHAR;
773 break;
774
775 case 33: /* U3c */
776 NEXT_CHAR;
777 if(!U3c[0][(int)*buffer_ptr])
778 BEGIN(LEX_ERROR_ATTR_VAL);
779 NEXT_CHAR;
780 if(!U3c[1][(int)*buffer_ptr])
781 BEGIN(LEX_ERROR_ATTR_VAL);
782 NEXT_CHAR;
783 break;
784
785 case 34: /* U3d */
786 NEXT_CHAR;
787 if(!U3d[0][(int)*buffer_ptr])
788 BEGIN(LEX_ERROR_ATTR_VAL);
789 NEXT_CHAR;
790 if(!U3d[1][(int)*buffer_ptr])
791 BEGIN(LEX_ERROR_ATTR_VAL);
792 NEXT_CHAR;
793 break;
794
795 case 41: /* U4a */
796 NEXT_CHAR;
797 if(!U4a[0][(int)*buffer_ptr])
798 BEGIN(LEX_ERROR_ATTR_VAL);
799 NEXT_CHAR;
800 if(!U4a[1][(int)*buffer_ptr])
801 BEGIN(LEX_ERROR_ATTR_VAL);
802 NEXT_CHAR;
803 if(!U4a[2][(int)*buffer_ptr])
804 BEGIN(LEX_ERROR_ATTR_VAL);
805 NEXT_CHAR;
806 break;
807
808 case 42: /* U4b */
809 NEXT_CHAR;
810 if(!U4b[0][(int)*buffer_ptr])
811 BEGIN(LEX_ERROR_ATTR_VAL);
812 NEXT_CHAR;
813 if(!U4b[1][(int)*buffer_ptr])
814 BEGIN(LEX_ERROR_ATTR_VAL);
815 NEXT_CHAR;
816 if(!U4b[2][(int)*buffer_ptr])
817 BEGIN(LEX_ERROR_ATTR_VAL);
818 NEXT_CHAR;
819 break;
820
821 case 43: /* U4c */
822 NEXT_CHAR;
823 if(!U4c[0][(int)*buffer_ptr])
824 BEGIN(LEX_ERROR_ATTR_VAL);
825 NEXT_CHAR;
826 if(!U4c[1][(int)*buffer_ptr])
827 BEGIN(LEX_ERROR_ATTR_VAL);
828 NEXT_CHAR;
829 if(!U4c[2][(int)*buffer_ptr])
830 BEGIN(LEX_ERROR_ATTR_VAL);
831 NEXT_CHAR;
832 break;
833
834 case 50: /* entityref or charref */
835 NEXT_CHAR;
836
837 if(*buffer_ptr=='#') /* charref */
838 {
839 int charref_len=3;
840
841 NEXT_CHAR;
842 if(digit[(int)*buffer_ptr]) /* decimal */
843 {
844 NEXT_CHAR;
845 charref_len++;
846
847 while(digit[(int)*buffer_ptr])
848 {
849 NEXT_CHAR;
850 charref_len++;
851 }
852
853 if(*buffer_ptr!=';')
854 BEGIN(LEX_ERROR_ATTR_VAL);
855 }
856 else if(*buffer_ptr=='x') /* hex */
857 {
858 NEXT_CHAR;
859 charref_len++;
860
861 while(xdigit[(int)*buffer_ptr])
862 {
863 NEXT_CHAR;
864 charref_len++;
865 }
866
867 if(*buffer_ptr!=';')
868 BEGIN(LEX_ERROR_ATTR_VAL);
869 }
870 else /* other */
871 BEGIN(LEX_ERROR_ATTR_VAL);
872
873 NEXT_CHAR;
874
875 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
876 {
877 const char *str;
878
879 saved_buffer_ptr=*buffer_ptr;
880 *buffer_ptr=0;
881
882 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
883
884 if(!str)
885 {
886 buffer_ptr-=charref_len;
887 BEGIN(LEX_ERROR_CHAR_REF);
888 }
889
890 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
891 memcpy(buffer_ptr-strlen(str),str,strlen(str));
892
893 *buffer_ptr=saved_buffer_ptr;
894 }
895 }
896 else if(namestart[(int)*buffer_ptr]) /* entityref */
897 {
898 int entityref_len=3;
899
900 NEXT_CHAR;
901 while(namechar[(int)*buffer_ptr])
902 {
903 NEXT_CHAR;
904 entityref_len++;
905 }
906
907 if(*buffer_ptr!=';')
908 BEGIN(LEX_ERROR_ATTR_VAL);
909
910 NEXT_CHAR;
911
912 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
913 {
914 const char *str;
915
916 saved_buffer_ptr=*buffer_ptr;
917 *buffer_ptr=0;
918
919 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
920
921 if(!str)
922 {
923 buffer_ptr-=entityref_len;
924 BEGIN(LEX_ERROR_ENTITY_REF);
925 }
926
927 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
928 memcpy(buffer_ptr-strlen(str),str,strlen(str));
929
930 *buffer_ptr=saved_buffer_ptr;
931 }
932 }
933 else /* other */
934 BEGIN(LEX_ERROR_ATTR_VAL);
935
936 break;
937
938 case 99: /* quote */
939 *buffer_ptr=0;
940 NEXT_CHAR;
941
942 NEXT(after_attr);
943 BEGIN(LEX_FUNC_ATTR_VAL);
944
945 default: /* other */
946 BEGIN(LEX_ERROR_ATTR_VAL);
947 }
948 }
949
950 break;
951
952
953 /* ================ Functional states ================ */
954
955
956 /* The start of a tag for an XML declaration */
957
958 case LEX_FUNC_XML_DECL_BEGIN:
959
960 if(tag_stack)
961 BEGIN(LEX_ERROR_XML_NOT_FIRST);
962
963 /* The start of a tag for an element */
964
965 case LEX_FUNC_TAG_BEGIN:
966
967 tag=NULL;
968
969 for(i=0;tags[i];i++)
970 if(buffer_token[0]==tags[i]->name[0] || tolower(buffer_token[0])==tags[i]->name[0])
971 if(!strcasecmp((char*)buffer_token+1,tags[i]->name+1))
972 {
973 tag=tags[i];
974
975 for(i=0;i<tag->nattributes;i++)
976 attributes[i]=NULL;
977
978 break;
979 }
980
981 if(tag==NULL)
982 BEGIN(LEX_ERROR_UNEXP_TAG);
983
984 END_TOKEN;
985
986 *buffer_ptr=saved_buffer_ptr;
987 BEGIN(next_state);
988
989 /* The end of the start-tag for an element */
990
991 case LEX_FUNC_TAG_PUSH:
992
993 if(stackused==stackdepth)
994 {
995 tag_stack =realloc(tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
996 tags_stack=realloc(tags_stack,(stackdepth+=8)*sizeof(xmltag**));
997 }
998
999 tag_stack [stackused]=tag;
1000 tags_stack[stackused]=tags;
1001 stackused++;
1002
1003 if(tag->callback)
1004 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
1005 BEGIN(LEX_ERROR_CALLBACK);
1006
1007 tags=tag->subtags;
1008
1009 BEGIN(next_state);
1010
1011 /* The end of the empty-element-tag for an XML declaration */
1012
1013 case LEX_FUNC_XML_DECL_FINISH:
1014
1015 /* The end of the empty-element-tag for an element */
1016
1017 case LEX_FUNC_TAG_FINISH:
1018
1019 if(tag->callback)
1020 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
1021 BEGIN(LEX_ERROR_CALLBACK);
1022
1023 if(stackused>0)
1024 tag=tag_stack[stackused-1];
1025 else
1026 tag=NULL;
1027
1028 BEGIN(next_state);
1029
1030 /* The end of the end-tag for an element */
1031
1032 case LEX_FUNC_TAG_POP:
1033
1034 stackused--;
1035
1036 if(stackused<0)
1037 BEGIN(LEX_ERROR_NO_START);
1038
1039 tags=tags_stack[stackused];
1040 tag =tag_stack [stackused];
1041
1042 if(strcmp((char*)buffer_token,tag->name))
1043 BEGIN(LEX_ERROR_UNBALANCED);
1044
1045 for(i=0;i<tag->nattributes;i++)
1046 attributes[i]=NULL;
1047
1048 if(tag->callback)
1049 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1050 BEGIN(LEX_ERROR_CALLBACK);
1051
1052 if(stackused>0)
1053 tag=tag_stack[stackused-1];
1054 else
1055 tag=NULL;
1056
1057 END_TOKEN;
1058
1059 *buffer_ptr=saved_buffer_ptr;
1060 BEGIN(next_state);
1061
1062 /* An attribute key */
1063
1064 case LEX_FUNC_ATTR_KEY:
1065
1066 attribute=-1;
1067
1068 for(i=0;i<tag->nattributes;i++)
1069 if(buffer_token[0]==tag->attributes[i][0] || tolower(buffer_token[0])==tag->attributes[i][0])
1070 if(!strcasecmp((char*)buffer_token+1,tag->attributes[i]+1))
1071 {
1072 attribute=i;
1073
1074 break;
1075 }
1076
1077 if(attribute==-1)
1078 {
1079 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1080 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1081 BEGIN(LEX_ERROR_UNEXP_ATT);
1082 #ifndef LIBROUTINO
1083 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1084 ParseXML_SetError("Warning on line %"PRIu64": unexpected attribute '%s' for tag '%s'.",lineno,buffer_token,tag->name);
1085 #endif
1086 }
1087
1088 END_TOKEN;
1089
1090 *buffer_ptr=saved_buffer_ptr;
1091 BEGIN(next_state);
1092
1093 /* An attribute value */
1094
1095 case LEX_FUNC_ATTR_VAL:
1096
1097 if(tag->callback && attribute!=-1)
1098 attributes[attribute]=buffer_token;
1099
1100 END_TOKEN;
1101
1102 BEGIN(next_state);
1103
1104 /* End of file */
1105
1106 case LEX_EOF:
1107
1108 if(tag)
1109 BEGIN(LEX_ERROR_UNEXP_EOF);
1110
1111 break;
1112
1113
1114 /* ================ Error states ================ */
1115
1116
1117 case LEX_ERROR_TAG_START:
1118 ParseXML_SetError("Character '<' seen not at start of tag.");
1119 break;
1120
1121 case LEX_ERROR_XML_DECL_START:
1122 ParseXML_SetError("Characters '<?' seen not at start of XML declaration.");
1123 break;
1124
1125 case LEX_ERROR_TAG:
1126 ParseXML_SetError("Invalid character seen inside tag '<%s...>'.",tag->name);
1127 break;
1128
1129 case LEX_ERROR_XML_DECL:
1130 ParseXML_SetError("Invalid character seen inside XML declaration '<?xml...>'.");
1131 break;
1132
1133 case LEX_ERROR_ATTR:
1134 ParseXML_SetError("Invalid attribute definition seen in tag.");
1135 break;
1136
1137 case LEX_ERROR_END_TAG:
1138 ParseXML_SetError("Invalid character seen in end-tag.");
1139 break;
1140
1141 case LEX_ERROR_COMMENT:
1142 ParseXML_SetError("Invalid comment seen.");
1143 break;
1144
1145 case LEX_ERROR_CLOSE:
1146 ParseXML_SetError("Character '>' seen not at end of tag.");
1147 break;
1148
1149 case LEX_ERROR_ATTR_VAL:
1150 ParseXML_SetError("Invalid character '%c' seen in attribute value.",*buffer_ptr);
1151 break;
1152
1153 case LEX_ERROR_ENTITY_REF:
1154 ParseXML_SetError("Invalid entity reference '%s' seen in attribute value.",buffer_ptr);
1155 break;
1156
1157 case LEX_ERROR_CHAR_REF:
1158 ParseXML_SetError("Invalid character reference '%s' seen in attribute value.",buffer_ptr);
1159 break;
1160
1161 case LEX_ERROR_TEXT_OUTSIDE:
1162 ParseXML_SetError("Non-whitespace '%c' seen outside tag.",*buffer_ptr);
1163 break;
1164
1165 case LEX_ERROR_UNEXP_TAG:
1166 ParseXML_SetError("Unexpected tag '%s'.",buffer_token);
1167 break;
1168
1169 case LEX_ERROR_UNBALANCED:
1170 ParseXML_SetError("End tag '</%s>' doesn't match start tag '<%s ...>'.",buffer_token,tag->name);
1171 break;
1172
1173 case LEX_ERROR_NO_START:
1174 ParseXML_SetError("End tag '</%s>' seen but there was no start tag '<%s ...>'.",buffer_token,buffer_token);
1175 break;
1176
1177 case LEX_ERROR_UNEXP_ATT:
1178 ParseXML_SetError("Unexpected attribute '%s' for tag '%s'.",buffer_token,tag->name);
1179 break;
1180
1181 case LEX_ERROR_UNEXP_EOF:
1182 ParseXML_SetError("End of file seen without end tag '</%s>'.",tag->name);
1183 break;
1184
1185 case LEX_ERROR_XML_NOT_FIRST:
1186 ParseXML_SetError("XML declaration '<?xml...>' not before all other tags.");
1187 break;
1188
1189 case LEX_ERROR_CALLBACK:
1190 /* The error message should have been set by the callback function, have a fallback just in case */
1191 if(!stored_message)
1192 ParseXML_SetError("Unknown error from tag callback function.");
1193 break;
1194 }
1195
1196 /* Print the error message */
1197
1198 #ifndef LIBROUTINO
1199 if(state)
1200 fprintf(stderr,"XML Parser: %s\n",stored_message);
1201 #endif
1202
1203 /* Delete the tagdata */
1204
1205 if(stackdepth)
1206 {
1207 free(tag_stack);
1208 free(tags_stack);
1209 }
1210
1211 return(state);
1212 }
1213
1214
1215 /*++++++++++++++++++++++++++++++++++++++
1216 Return the current parser line number.
1217
1218 uint64_t ParseXML_LineNumber Returns the line number.
1219 ++++++++++++++++++++++++++++++++++++++*/
1220
1221 uint64_t ParseXML_LineNumber(void)
1222 {
1223 return(lineno);
1224 }
1225
1226
1227 /*++++++++++++++++++++++++++++++++++++++
1228 Store an error message for later.
1229
1230 const char *format The format string.
1231
1232 ... The other arguments.
1233 ++++++++++++++++++++++++++++++++++++++*/
1234
1235 void ParseXML_SetError(const char *format, ...)
1236 {
1237 va_list ap;
1238 char temp[2];
1239 int line_length,error_length;
1240
1241 line_length=snprintf(temp,1,"Error on line %" PRIu64 ": ",lineno);
1242
1243 va_start(ap,format);
1244 error_length=vsnprintf(temp,1,format,ap);
1245 va_end(ap);
1246
1247 if(stored_message)
1248 free(stored_message);
1249
1250 stored_message=malloc(error_length+line_length+1);
1251
1252 line_length=sprintf(stored_message,"Error on line %" PRIu64 ": ",lineno);
1253
1254 va_start(ap,format);
1255 vsprintf(stored_message+line_length,format,ap);
1256 va_end(ap);
1257 }
1258
1259
1260 /*++++++++++++++++++++++++++++++++++++++
1261 Return a stored error message.
1262
1263 char *ParseXML_GetError Returns the most recent stored error.
1264 ++++++++++++++++++++++++++++++++++++++*/
1265
1266 char *ParseXML_GetError(void)
1267 {
1268 return(stored_message);
1269 }
1270
1271
1272 /*++++++++++++++++++++++++++++++++++++++
1273 Convert an XML entity reference into an ASCII string.
1274
1275 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1276
1277 const char *string The entity reference string.
1278 ++++++++++++++++++++++++++++++++++++++*/
1279
1280 char *ParseXML_Decode_Entity_Ref(const char *string)
1281 {
1282 if(!strcmp(string,"&amp;")) return("&");
1283 if(!strcmp(string,"&lt;")) return("<");
1284 if(!strcmp(string,"&gt;")) return(">");
1285 if(!strcmp(string,"&apos;")) return("'");
1286 if(!strcmp(string,"&quot;")) return("\"");
1287 return(NULL);
1288 }
1289
1290
1291 /*++++++++++++++++++++++++++++++++++++++
1292 Convert an XML character reference into an ASCII string.
1293
1294 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1295
1296 const char *string The character reference string.
1297 ++++++++++++++++++++++++++++++++++++++*/
1298
1299 char *ParseXML_Decode_Char_Ref(const char *string)
1300 {
1301 static char result[5]=""; /* static allocation of return value (set each call) */
1302 long int unicode;
1303
1304 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1305 else unicode=strtol(string+2,NULL,10);
1306
1307 if(unicode<0x80)
1308 {
1309 /* 0000 0000-0000 007F => 0xxxxxxx */
1310 result[0]=(char)unicode;
1311 result[1]=0;
1312 }
1313 else if(unicode<0x07FF)
1314 {
1315 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1316 result[0]=(char)(0xC0+((unicode&0x07C0)>>6));
1317 result[1]=(char)(0x80+ (unicode&0x003F));
1318 result[2]=0;
1319 }
1320 else if(unicode<0xFFFF)
1321 {
1322 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1323 result[0]=(char)(0xE0+((unicode&0xF000)>>12));
1324 result[1]=(char)(0x80+((unicode&0x0FC0)>>6));
1325 result[2]=(char)(0x80+ (unicode&0x003F));
1326 result[3]=0;
1327 }
1328 else if(unicode<0x1FFFFF)
1329 {
1330 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1331 result[0]=(char)(0xF0+((unicode&0x1C0000)>>18));
1332 result[1]=(char)(0x80+((unicode&0x03F000)>>12));
1333 result[2]=(char)(0x80+((unicode&0x000FC0)>>6));
1334 result[3]=(char)(0x80+ (unicode&0x00003F));
1335 result[4]=0;
1336 }
1337 else
1338 {
1339 result[0]=(char)0xFF;
1340 result[1]=(char)0xFD;
1341 result[2]=0;
1342 }
1343
1344 return(result);
1345 }
1346
1347
1348 /*++++++++++++++++++++++++++++++++++++++
1349 Convert a string into something that is safe to output in an XML file.
1350
1351 char *ParseXML_Encode_Safe_XML Returns a pointer to a static replacement encoded string (or the original if no change needed).
1352
1353 const char *string The string to convert.
1354 ++++++++++++++++++++++++++++++++++++++*/
1355
1356 char *ParseXML_Encode_Safe_XML(const char *string)
1357 {
1358 static const char hexstring[17]="0123456789ABCDEF"; /* local lookup table */
1359 static char *result=NULL; /* static allocation of return value */
1360 int i=0,j=0,len;
1361
1362 for(i=0;string[i];i++)
1363 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1364 break;
1365
1366 if(!string[i])
1367 return((char*)string);
1368
1369 len=i+256-6;
1370
1371 result=(char*)realloc((void*)result,len+7);
1372 strncpy(result,string,j=i);
1373
1374 do
1375 {
1376 for(;j<len && string[i];i++)
1377 if(string[i]=='\'')
1378 {
1379 /* XML, HTML5 and XHTML1 allow &apos; but HTML4 doesn't. */
1380 result[j++]='&';
1381 result[j++]='#';
1382 result[j++]='3';
1383 result[j++]='9';
1384 result[j++]=';';
1385 }
1386 else if(string[i]=='&')
1387 {
1388 result[j++]='&';
1389 result[j++]='a';
1390 result[j++]='m';
1391 result[j++]='p';
1392 result[j++]=';';
1393 }
1394 else if(string[i]=='"')
1395 {
1396 result[j++]='&';
1397 result[j++]='q';
1398 result[j++]='u';
1399 result[j++]='o';
1400 result[j++]='t';
1401 result[j++]=';';
1402 }
1403 else if(string[i]=='<')
1404 {
1405 result[j++]='&';
1406 result[j++]='l';
1407 result[j++]='t';
1408 result[j++]=';';
1409 }
1410 else if(string[i]=='>')
1411 {
1412 result[j++]='&';
1413 result[j++]='g';
1414 result[j++]='t';
1415 result[j++]=';';
1416 }
1417 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1418 result[j++]=string[i];
1419 else
1420 {
1421 unsigned int unicode;
1422
1423 /* Decode the UTF-8 */
1424
1425 if((string[i]&0x80)==0)
1426 {
1427 /* 0000 0000-0000 007F => 0xxxxxxx */
1428 unicode=string[i];
1429 }
1430 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1431 {
1432 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1433 unicode =(string[i++]&0x1F)<<6;
1434 unicode|= string[i ]&0x3F;
1435 }
1436 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1437 {
1438 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1439 unicode =(string[i++]&0x0F)<<12;
1440 unicode|=(string[i++]&0x3F)<<6;
1441 unicode|= string[i ]&0x3F;
1442 }
1443 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1444 {
1445 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1446 unicode =(string[i++]&0x07)<<18;
1447 unicode|=(string[i++]&0x3F)<<12;
1448 unicode|=(string[i++]&0x3F)<<6;
1449 unicode|= string[i ]&0x3F;
1450 }
1451 else
1452 unicode=0xFFFD;
1453
1454 /* Output the character entity */
1455
1456 result[j++]='&';
1457 result[j++]='#';
1458 result[j++]='x';
1459
1460 if(unicode&0x00FF0000)
1461 {
1462 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1463 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1464 }
1465 if(unicode&0x00FFFF00)
1466 {
1467 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1468 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1469 }
1470 result[j++]=hexstring[(unicode&0xf0)>>4];
1471 result[j++]=hexstring[(unicode&0x0f) ];
1472
1473 result[j++]=';';
1474 }
1475
1476 if(string[i]) /* Not finished */
1477 {
1478 len+=256;
1479 result=(char*)realloc((void*)result,len+7);
1480 }
1481 }
1482 while(string[i]);
1483
1484 result[j]=0;
1485
1486 return(result);
1487 }
1488
1489
1490 /*++++++++++++++++++++++++++++++++++++++
1491 Check that a string really is an integer.
1492
1493 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1494
1495 const char *string The string to be parsed.
1496 ++++++++++++++++++++++++++++++++++++++*/
1497
1498 int ParseXML_IsInteger(const char *string)
1499 {
1500 const unsigned char *p=(unsigned char*)string;
1501
1502 if(*p=='-' || *p=='+')
1503 p++;
1504
1505 while(digit[(int)*p])
1506 p++;
1507
1508 if(*p)
1509 return(0);
1510 else
1511 return(1);
1512 }
1513
1514
1515 /*++++++++++++++++++++++++++++++++++++++
1516 Check that a string really is a floating point number.
1517
1518 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1519
1520 const char *string The string to be parsed.
1521 ++++++++++++++++++++++++++++++++++++++*/
1522
1523 int ParseXML_IsFloating(const char *string)
1524 {
1525 const unsigned char *p=(unsigned char*)string;
1526
1527 if(*p=='-' || *p=='+')
1528 p++;
1529
1530 while(digit[(int)*p] || *p=='.')
1531 p++;
1532
1533 if(*p=='e' || *p=='E')
1534 {
1535 p++;
1536
1537 if(*p=='-' || *p=='+')
1538 p++;
1539
1540 while(digit[*p])
1541 p++;
1542 }
1543
1544 if(*p)
1545 return(0);
1546 else
1547 return(1);
1548 }
1549
1550
1551 /* Table for checking for double-quoted characters. */
1552 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1554 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1555 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1556 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1557 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1558 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1559 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1564 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1565 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1566 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1567 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1568
1569 /* Table for checking for single-quoted characters. */
1570 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1572 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1573 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1574 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1575 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1576 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1577 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1582 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1583 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1584 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1585 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1586
1587 /* Table for checking for characters between 0x80 and 0x8f. */
1588 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1596 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1604
1605 /* Table for checking for characters between 0x80 and 0x9f. */
1606 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1610 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1614 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1615 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1622
1623 /* Table for checking for characters between 0x80 and 0xbf. */
1624 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1632 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1633 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1634 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1635 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1640
1641 /* Table for checking for characters between 0x90 and 0xbf. */
1642 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1652 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1653 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1658
1659 /* Table for checking for characters between 0xa0 and 0xbf. */
1660 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1670 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1671 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1676
1677 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1678 static const unsigned char *U2[1]={ U_80_BF };
1679
1680 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1681 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1682
1683 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1684 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1685
1686 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1687 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1688
1689 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1690 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1691
1692 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1693 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1694
1695 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1696 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1697
1698 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1699 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1700
1701 /* Table for checking for namestart characters. */
1702 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1706 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1707 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1708 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1709 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1718
1719 /* Table for checking for namechar characters. */
1720 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1723 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1724 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1725 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1726 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1727 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1730 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1731 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1732 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1733 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1734 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1735 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1736
1737 /* Table for checking for whitespace characters. */
1738 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1740 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1749 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1750 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1753 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1754
1755 /* Table for checking for digit characters. */
1756 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1757 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1758 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1759 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1760 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1772
1773 /* Table for checking for xdigit characters. */
1774 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1777 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1778 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1780 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1788 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1789 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */