Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /branches/destination-access/src/xmlparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1807 - (show annotations) (download) (as text)
Wed Sep 23 18:20:13 2015 UTC (9 years, 6 months ago) by amb
File MIME type: text/x-csrc
File size: 69639 byte(s)
Merge the trunk changes back into the destination-access branch.

1 /***************************************
2 A simple generic XML parser where the structure comes from the function parameters.
3 Not intended to be fully conforming to XML standard or a validating parser but
4 sufficient to parse OSM XML and simple program configuration files.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2010-2015 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26
27 #if defined(_MSC_VER)
28 #include <io.h>
29 #include <basetsd.h>
30 #define read(fd,address,length) _read(fd,address,(unsigned int)(length))
31 #define snprintf _snprintf
32 #define ssize_t SSIZE_T
33 #else
34 #include <unistd.h>
35 #endif
36
37 #include <stdlib.h>
38 #include <inttypes.h>
39 #include <stdarg.h>
40 #include <stdint.h>
41 #include <string.h>
42
43 #if defined(_MSC_VER) || defined(__MINGW32__)
44 #define strcasecmp _stricmp
45 #else
46 #include <strings.h>
47 #endif
48
49 #include <ctype.h>
50
51 #include "xmlparse.h"
52
53
54 /* Parser states */
55
56 #define LEX_EOF 0
57
58 #define LEX_FUNC_TAG_BEGIN 1
59 #define LEX_FUNC_XML_DECL_BEGIN 2
60 #define LEX_FUNC_TAG_POP 3
61 #define LEX_FUNC_TAG_PUSH 4
62 #define LEX_FUNC_XML_DECL_FINISH 5
63 #define LEX_FUNC_TAG_FINISH 6
64 #define LEX_FUNC_ATTR_KEY 7
65 #define LEX_FUNC_ATTR_VAL 8
66
67 #define LEX_STATE_INITIAL 10
68 #define LEX_STATE_BANGTAG 11
69 #define LEX_STATE_COMMENT 12
70 #define LEX_STATE_XML_DECL_START 13
71 #define LEX_STATE_XML_DECL 14
72 #define LEX_STATE_TAG_START 15
73 #define LEX_STATE_TAG 16
74 #define LEX_STATE_ATTR_KEY 17
75 #define LEX_STATE_ATTR_VAL 18
76 #define LEX_STATE_END_TAG1 19
77 #define LEX_STATE_END_TAG2 20
78 #define LEX_STATE_DQUOTED 21
79 #define LEX_STATE_SQUOTED 22
80
81 #define LEX_ERROR_TAG_START 101
82 #define LEX_ERROR_XML_DECL_START 102
83 #define LEX_ERROR_TAG 103
84 #define LEX_ERROR_XML_DECL 104
85 #define LEX_ERROR_ATTR 105
86 #define LEX_ERROR_END_TAG 106
87 #define LEX_ERROR_COMMENT 107
88 #define LEX_ERROR_CLOSE 108
89 #define LEX_ERROR_ATTR_VAL 109
90 #define LEX_ERROR_ENTITY_REF 110
91 #define LEX_ERROR_CHAR_REF 111
92 #define LEX_ERROR_TEXT_OUTSIDE 112
93
94 #define LEX_ERROR_UNEXP_TAG 201
95 #define LEX_ERROR_UNBALANCED 202
96 #define LEX_ERROR_NO_START 203
97 #define LEX_ERROR_UNEXP_ATT 204
98 #define LEX_ERROR_UNEXP_EOF 205
99 #define LEX_ERROR_XML_NOT_FIRST 206
100
101 #define LEX_ERROR_CALLBACK 255
102
103
104 /* Parsing variables and functions (re-initialised for each file) */
105
106 static uint64_t lineno;
107
108 static unsigned char buffer[2][16384];
109 static unsigned char *buffer_token,*buffer_end,*buffer_ptr;
110 static int buffer_active=0;
111
112 static char *stored_message=NULL;
113
114
115 /*++++++++++++++++++++++++++++++++++++++
116 Refill the data buffer making sure that the string starting at buffer_token is contiguous.
117
118 int buffer_refill Return 0 if everything is OK or 1 for EOF.
119
120 int fd The file descriptor to read from.
121 ++++++++++++++++++++++++++++++++++++++*/
122
123 static inline int buffer_refill(int fd)
124 {
125 ssize_t n;
126 size_t m=0;
127
128 m=(buffer_end-buffer[buffer_active])+1;
129
130 if(m>(sizeof(buffer[0])/2)) /* more than half full */
131 {
132 m=0;
133
134 buffer_active=!buffer_active;
135
136 if(buffer_token)
137 {
138 m=(buffer_end-buffer_token)+1;
139
140 memcpy(buffer[buffer_active],buffer_token,m);
141
142 buffer_token=buffer[buffer_active];
143 }
144 }
145
146 n=read(fd,buffer[buffer_active]+m,sizeof(buffer[0])-m);
147
148 buffer_ptr=buffer[buffer_active]+m;
149 buffer_end=buffer[buffer_active]+m+n-1;
150
151 if(n<=0)
152 return(1);
153 else
154 return(0);
155 }
156
157
158 /* Macros to simplify the parser (and make it look more like lex) */
159
160 #define BEGIN(xx) do{ state=(xx); goto new_state; } while(0)
161 #define NEXT(xx) next_state=(xx)
162
163 #define START_TOKEN buffer_token=buffer_ptr
164 #define END_TOKEN buffer_token=NULL
165
166 #define NEXT_CHAR \
167 do{ \
168 if(buffer_ptr==buffer_end) \
169 { if(buffer_refill(fd)) BEGIN(LEX_EOF); } \
170 else \
171 buffer_ptr++; \
172 } while(0)
173
174
175 /* -------- equivalent flex definition --------
176
177 S [ \t\r]
178 N (\n)
179
180 U1 [\x09\x0A\x0D\x20-\x7F]
181 U2 [\xC2-\xDF][\x80-\xBF]
182 U3a \xE0[\xA0-\xBF][\x80-\xBF]
183 U3b [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
184 U3c \xED[\x80-\x9F][\x80-\xBF]
185 U3d [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
186 U3 {U3a}|{U3b}|{U3c}|{U3d}
187 U4a \xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]
188 U4b [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
189 U4c \xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]
190 U4 {U4a}|{U4b}|{U4c}
191
192 U ({U1}|{U2}|{U3}|{U4})
193
194 U1_xml ([\x09\x0A\x0D\x20-\x25\x27-\x3B\x3D\x3F-\x7F])
195
196 U1quotedS_xml ([\x09\x0A\x0D\x20-\x25\x28-\x3B\x3D\x3F-\x7F])
197 U1quotedD_xml ([\x09\x0A\x0D\x20-\x21\x23-\x25\x27-\x3B\x3D\x3F-\x7F])
198
199 UquotedS ({U1quotedS_xml}|{U2}|{U3}|{U4})
200 UquotedD ({U1quotedD_xml}|{U2}|{U3}|{U4})
201
202 letter [a-zA-Z]
203 digit [0-9]
204 xdigit [a-fA-F0-9]
205
206 namechar ({letter}|{digit}|[-._:])
207 namestart ({letter}|[_:])
208 name ({namestart}{namechar}*)
209
210 entityref (&{name};)
211 charref (&#({digit}+|x{xdigit}+);)
212
213 -------- equivalent flex definition -------- */
214
215 /* Tables containing character class defintions (advance declaration for data at end of file). */
216 static const unsigned char quotedD[256],quotedS[256];
217 static const unsigned char *U2[1],*U3a[2],*U3b[2],*U3c[2],*U3d[2],*U4a[3],*U4b[3],*U4c[3];
218 static const unsigned char namestart[256],namechar[256],whitespace[256],digit[256],xdigit[256];
219
220
221 /*++++++++++++++++++++++++++++++++++++++
222 A function to call the callback function with the parameters needed.
223
224 int call_callback Returns 1 if the callback returned with an error.
225
226 const char *name The name of the tag.
227
228 int (*callback)() The callback function.
229
230 int type The type of tag (start and/or end).
231
232 int nattributes The number of attributes collected.
233
234 unsigned char *attributes[XMLPARSE_MAX_ATTRS] The list of attributes.
235 ++++++++++++++++++++++++++++++++++++++*/
236
237 static inline int call_callback(const char *name,int (*callback)(),int type,int nattributes,unsigned char *attributes[XMLPARSE_MAX_ATTRS])
238 {
239 switch(nattributes)
240 {
241 case 0: return (*callback)(name,type);
242 case 1: return (*callback)(name,type,attributes[0]);
243 case 2: return (*callback)(name,type,attributes[0],attributes[1]);
244 case 3: return (*callback)(name,type,attributes[0],attributes[1],attributes[2]);
245 case 4: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3]);
246 case 5: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]);
247 case 6: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]);
248 case 7: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]);
249 case 8: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]);
250 case 9: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]);
251 case 10: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]);
252 case 11: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]);
253 case 12: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]);
254 case 13: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]);
255 case 14: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]);
256 case 15: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]);
257 case 16: return (*callback)(name,type,attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]);
258
259 default:
260 ParseXML_SetError("Too many attributes for tag '%s' source code needs changing.",name);
261 return(1);
262 }
263 }
264
265
266 /*++++++++++++++++++++++++++++++++++++++
267 Parse the XML and call the functions for each tag as seen.
268
269 int ParseXML Returns 0 if OK or something else in case of an error.
270
271 int fd The file descriptor of the file to parse.
272
273 const xmltag *const *tags The array of pointers to tags for the top level.
274
275 int options A list of XML Parser options OR-ed together.
276 ++++++++++++++++++++++++++++++++++++++*/
277
278 int ParseXML(int fd,const xmltag *const *tags,int options)
279 {
280 int i;
281 int state,next_state,after_attr;
282 unsigned char saved_buffer_ptr=0;
283 const unsigned char *quoted;
284
285 unsigned char *attributes[XMLPARSE_MAX_ATTRS]={NULL};
286 int attribute=0;
287
288 int stackdepth=0,stackused=0;
289 const xmltag * const **tags_stack=NULL;
290 const xmltag **tag_stack=NULL;
291 const xmltag *tag=NULL;
292
293 /* The actual parser. */
294
295 lineno=1;
296
297 if(stored_message)
298 free(stored_message);
299 stored_message=NULL;
300
301 buffer_end=buffer[buffer_active]+sizeof(buffer[0])-1;
302 buffer_token=NULL;
303
304 buffer_refill(fd);
305
306 BEGIN(LEX_STATE_INITIAL);
307
308 new_state:
309
310 switch(state)
311 {
312 /* ================ Parsing states ================ */
313
314
315 /* -------- equivalent flex definition --------
316
317 <INITIAL>"<!" { BEGIN(BANGTAG); }
318 <INITIAL>"</" { BEGIN(END_TAG1); }
319 <INITIAL>"<?" { BEGIN(XML_DECL_START); }
320 <INITIAL>"<" { BEGIN(TAG_START); }
321
322 <INITIAL>">" { return(LEX_ERROR_CLOSE); }
323
324 <INITIAL>{N} { lineno++; }
325 <INITIAL>{S}+ { }
326 <INITIAL>. { return(LEX_ERROR_TEXT_OUTSIDE); }
327
328 -------- equivalent flex definition -------- */
329
330 case LEX_STATE_INITIAL:
331
332 while(1)
333 {
334 while(whitespace[(int)*buffer_ptr])
335 NEXT_CHAR;
336
337 if(*buffer_ptr=='\n')
338 {
339 NEXT_CHAR;
340
341 lineno++;
342 }
343 else if(*buffer_ptr=='<')
344 {
345 NEXT_CHAR;
346
347 if(*buffer_ptr=='/')
348 {
349 NEXT_CHAR;
350 BEGIN(LEX_STATE_END_TAG1);
351 }
352 else if(*buffer_ptr=='!')
353 {
354 NEXT_CHAR;
355 BEGIN(LEX_STATE_BANGTAG);
356 }
357 else if(*buffer_ptr=='?')
358 {
359 NEXT_CHAR;
360 BEGIN(LEX_STATE_XML_DECL_START);
361 }
362 else
363 BEGIN(LEX_STATE_TAG_START);
364 }
365 else if(*buffer_ptr=='>')
366 BEGIN(LEX_ERROR_CLOSE);
367 else
368 BEGIN(LEX_ERROR_TEXT_OUTSIDE);
369 }
370
371 break;
372
373 /* -------- equivalent flex definition --------
374
375 <BANGTAG>"--" { BEGIN(COMMENT); }
376 <BANGTAG>{N} { return(LEX_ERROR_TAG_START); }
377 <BANGTAG>. { return(LEX_ERROR_TAG_START); }
378
379 -------- equivalent flex definition -------- */
380
381 case LEX_STATE_BANGTAG:
382
383 if(*buffer_ptr!='-')
384 BEGIN(LEX_ERROR_TAG_START);
385
386 NEXT_CHAR;
387
388 if(*buffer_ptr!='-')
389 BEGIN(LEX_ERROR_TAG_START);
390
391 NEXT_CHAR;
392 BEGIN(LEX_STATE_COMMENT);
393
394 break;
395
396 /* -------- equivalent flex definition --------
397
398 <COMMENT>"-->" { BEGIN(INITIAL); }
399 <COMMENT>"--"[^>] { return(LEX_ERROR_COMMENT); }
400 <COMMENT>"-" { }
401 <COMMENT>{N} { lineno++; }
402 <COMMENT>[^-\n]+ { }
403
404 -------- equivalent flex definition -------- */
405
406 case LEX_STATE_COMMENT:
407
408 while(1)
409 {
410 while(*buffer_ptr!='-' && *buffer_ptr!='\n')
411 NEXT_CHAR;
412
413 if(*buffer_ptr=='-')
414 {
415 NEXT_CHAR;
416
417 if(*buffer_ptr!='-')
418 continue;
419
420 NEXT_CHAR;
421 if(*buffer_ptr=='>')
422 {
423 NEXT_CHAR;
424 BEGIN(LEX_STATE_INITIAL);
425 }
426
427 BEGIN(LEX_ERROR_COMMENT);
428 }
429 else /* if(*buffer_ptr=='\n') */
430 {
431 NEXT_CHAR;
432
433 lineno++;
434 }
435 }
436
437 break;
438
439 /* -------- equivalent flex definition --------
440
441 <XML_DECL_START>xml { BEGIN(XML_DECL); return(LEX_XML_DECL_BEGIN); }
442 <XML_DECL_START>{N} { return(LEX_ERROR_XML_DECL_START); }
443 <XML_DECL_START>. { return(LEX_ERROR_XML_DECL_START); }
444
445 -------- equivalent flex definition -------- */
446
447 case LEX_STATE_XML_DECL_START:
448
449 START_TOKEN;
450
451 if(*buffer_ptr=='x')
452 {
453 NEXT_CHAR;
454 if(*buffer_ptr=='m')
455 {
456 NEXT_CHAR;
457 if(*buffer_ptr=='l')
458 {
459 NEXT_CHAR;
460
461 saved_buffer_ptr=*buffer_ptr;
462 *buffer_ptr=0;
463
464 NEXT(LEX_STATE_XML_DECL);
465 BEGIN(LEX_FUNC_XML_DECL_BEGIN);
466 }
467 }
468 }
469
470 BEGIN(LEX_ERROR_XML_DECL_START);
471
472 /* -------- equivalent flex definition --------
473
474 <XML_DECL>"?>" { BEGIN(INITIAL); return(LEX_XML_DECL_FINISH); }
475 <XML_DECL>{S}+ { }
476 <XML_DECL>{N} { lineno++; }
477 <XML_DECL>{name} { after_attr=XML_DECL; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
478 <XML_DECL>. { return(LEX_ERROR_XML_DECL); }
479
480 -------- equivalent flex definition -------- */
481
482 case LEX_STATE_XML_DECL:
483
484 while(1)
485 {
486 while(whitespace[(int)*buffer_ptr])
487 NEXT_CHAR;
488
489 if(namestart[(int)*buffer_ptr])
490 {
491 START_TOKEN;
492
493 NEXT_CHAR;
494 while(namechar[(int)*buffer_ptr])
495 NEXT_CHAR;
496
497 saved_buffer_ptr=*buffer_ptr;
498 *buffer_ptr=0;
499
500 after_attr=LEX_STATE_XML_DECL;
501 NEXT(LEX_STATE_ATTR_KEY);
502 BEGIN(LEX_FUNC_ATTR_KEY);
503 }
504 else if(*buffer_ptr=='?')
505 {
506 NEXT_CHAR;
507 if(*buffer_ptr=='>')
508 {
509 NEXT_CHAR;
510 NEXT(LEX_STATE_INITIAL);
511 BEGIN(LEX_FUNC_XML_DECL_FINISH);
512 }
513
514 BEGIN(LEX_ERROR_XML_DECL);
515 }
516 else if(*buffer_ptr=='\n')
517 {
518 NEXT_CHAR;
519 lineno++;
520 }
521 else
522 BEGIN(LEX_ERROR_XML_DECL);
523 }
524
525 break;
526
527 /* -------- equivalent flex definition --------
528
529 <TAG_START>{name} { BEGIN(TAG); return(LEX_TAG_BEGIN); }
530 <TAG_START>{N} { return(LEX_ERROR_TAG_START); }
531 <TAG_START>. { return(LEX_ERROR_TAG_START); }
532
533 -------- equivalent flex definition -------- */
534
535 case LEX_STATE_TAG_START:
536
537 if(namestart[(int)*buffer_ptr])
538 {
539 START_TOKEN;
540
541 NEXT_CHAR;
542 while(namechar[(int)*buffer_ptr])
543 NEXT_CHAR;
544
545 saved_buffer_ptr=*buffer_ptr;
546 *buffer_ptr=0;
547
548 NEXT(LEX_STATE_TAG);
549 BEGIN(LEX_FUNC_TAG_BEGIN);
550 }
551
552 BEGIN(LEX_ERROR_TAG_START);
553
554 /* -------- equivalent flex definition --------
555
556 <END_TAG1>{name} { BEGIN(END_TAG2); return(LEX_TAG_POP); }
557 <END_TAG1>{N} { return(LEX_ERROR_END_TAG); }
558 <END_TAG1>. { return(LEX_ERROR_END_TAG); }
559
560 -------- equivalent flex definition -------- */
561
562 case LEX_STATE_END_TAG1:
563
564 if(namestart[(int)*buffer_ptr])
565 {
566 START_TOKEN;
567
568 NEXT_CHAR;
569 while(namechar[(int)*buffer_ptr])
570 NEXT_CHAR;
571
572 saved_buffer_ptr=*buffer_ptr;
573 *buffer_ptr=0;
574
575 NEXT(LEX_STATE_END_TAG2);
576 BEGIN(LEX_FUNC_TAG_POP);
577 }
578
579 BEGIN(LEX_ERROR_END_TAG);
580
581 /* -------- equivalent flex definition --------
582
583 <END_TAG2>">" { BEGIN(INITIAL); }
584 <END_TAG2>{N} { return(LEX_ERROR_END_TAG); }
585 <END_TAG2>. { return(LEX_ERROR_END_TAG); }
586
587 -------- equivalent flex definition -------- */
588
589 case LEX_STATE_END_TAG2:
590
591 if(*buffer_ptr=='>')
592 {
593 NEXT_CHAR;
594
595 BEGIN(LEX_STATE_INITIAL);
596 }
597
598 BEGIN(LEX_ERROR_END_TAG);
599
600 /* -------- equivalent flex definition --------
601
602 <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); }
603 <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); }
604 <TAG>{S}+ { }
605 <TAG>{N} { lineno++; }
606 <TAG>{name} { after_attr=TAG; BEGIN(ATTR_KEY); return(LEX_ATTR_KEY); }
607 <TAG>. { return(LEX_ERROR_TAG); }
608
609 -------- equivalent flex definition -------- */
610
611 case LEX_STATE_TAG:
612
613 while(1)
614 {
615 while(whitespace[(int)*buffer_ptr])
616 NEXT_CHAR;
617
618 if(namestart[(int)*buffer_ptr])
619 {
620 START_TOKEN;
621
622 NEXT_CHAR;
623 while(namechar[(int)*buffer_ptr])
624 NEXT_CHAR;
625
626 saved_buffer_ptr=*buffer_ptr;
627 *buffer_ptr=0;
628
629 after_attr=LEX_STATE_TAG;
630 NEXT(LEX_STATE_ATTR_KEY);
631 BEGIN(LEX_FUNC_ATTR_KEY);
632 }
633 else if(*buffer_ptr=='/')
634 {
635 NEXT_CHAR;
636 if(*buffer_ptr=='>')
637 {
638 NEXT_CHAR;
639 NEXT(LEX_STATE_INITIAL);
640 BEGIN(LEX_FUNC_TAG_FINISH);
641 }
642
643 BEGIN(LEX_ERROR_TAG);
644 }
645 else if(*buffer_ptr=='>')
646 {
647 NEXT_CHAR;
648 NEXT(LEX_STATE_INITIAL);
649 BEGIN(LEX_FUNC_TAG_PUSH);
650 }
651 else if(*buffer_ptr=='\n')
652 {
653 NEXT_CHAR;
654 lineno++;
655 }
656 else
657 BEGIN(LEX_ERROR_TAG);
658 }
659
660 break;
661
662 /* -------- equivalent flex definition --------
663
664 <ATTR_KEY>= { BEGIN(ATTR_VAL); }
665 <ATTR_KEY>{N} { return(LEX_ERROR_ATTR); }
666 <ATTR_KEY>. { return(LEX_ERROR_ATTR); }
667
668 -------- equivalent flex definition -------- */
669
670 case LEX_STATE_ATTR_KEY:
671
672 if(*buffer_ptr=='=')
673 {
674 NEXT_CHAR;
675 BEGIN(LEX_STATE_ATTR_VAL);
676 }
677
678 BEGIN(LEX_ERROR_ATTR);
679
680 /* -------- equivalent flex definition --------
681
682 <ATTR_VAL>\" { BEGIN(DQUOTED); }
683 <ATTR_VAL>\' { BEGIN(SQUOTED); }
684 <ATTR_VAL>{N} { return(LEX_ERROR_ATTR); }
685 <ATTR_VAL>. { return(LEX_ERROR_ATTR); }
686
687 -------- equivalent flex definition -------- */
688
689 case LEX_STATE_ATTR_VAL:
690
691 if(*buffer_ptr=='"')
692 {
693 NEXT_CHAR;
694 BEGIN(LEX_STATE_DQUOTED);
695 }
696 else if(*buffer_ptr=='\'')
697 {
698 NEXT_CHAR;
699 BEGIN(LEX_STATE_SQUOTED);
700 }
701
702 BEGIN(LEX_ERROR_ATTR);
703
704 /* -------- equivalent flex definition --------
705
706 <DQUOTED>\" { BEGIN(after_attr); return(LEX_ATTR_VAL); }
707 <DQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
708 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
709 <DQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
710 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
711 <DQUOTED>{UquotedD} { }
712 <DQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
713 <DQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
714
715 <SQUOTED>\' { BEGIN(after_attr); return(LEX_ATTR_VAL); }
716 <SQUOTED>{entityref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
717 else { const char *str=ParseXML_Decode_Entity_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_ENTITY_REF);} } }
718 <SQUOTED>{charref} { if(options&XMLPARSE_RETURN_ATTR_ENCODED) {append_string(yytext);}
719 else { const char *str=ParseXML_Decode_Char_Ref(yytext); if(str) {append_string(str);} else {return(LEX_ERROR_CHAR_REF);} } }
720 <SQUOTED>{UquotedS} { append_string(yytext); }
721 <SQUOTED>[<>&] { return(LEX_ERROR_ATTR_VAL); }
722 <SQUOTED>. { return(LEX_ERROR_ATTR_VAL); }
723
724 -------- equivalent flex definition -------- */
725
726 case LEX_STATE_DQUOTED:
727 case LEX_STATE_SQUOTED:
728
729 if(state==LEX_STATE_DQUOTED)
730 quoted=quotedD;
731 else
732 quoted=quotedS;
733
734 START_TOKEN;
735
736 while(1)
737 {
738 switch(quoted[(int)*buffer_ptr])
739 {
740 case 10: /* U1 - used by all tag keys and many values */
741 do
742 {
743 NEXT_CHAR;
744 }
745 while(quoted[(int)*buffer_ptr]==10);
746 break;
747
748 case 20: /* U2 */
749 NEXT_CHAR;
750 if(!U2[0][(int)*buffer_ptr])
751 BEGIN(LEX_ERROR_ATTR_VAL);
752 NEXT_CHAR;
753 break;
754
755 case 31: /* U3a */
756 NEXT_CHAR;
757 if(!U3a[0][(int)*buffer_ptr])
758 BEGIN(LEX_ERROR_ATTR_VAL);
759 NEXT_CHAR;
760 if(!U3a[1][(int)*buffer_ptr])
761 BEGIN(LEX_ERROR_ATTR_VAL);
762 NEXT_CHAR;
763 break;
764
765 case 32: /* U3b */
766 NEXT_CHAR;
767 if(!U3b[0][(int)*buffer_ptr])
768 BEGIN(LEX_ERROR_ATTR_VAL);
769 NEXT_CHAR;
770 if(!U3b[1][(int)*buffer_ptr])
771 BEGIN(LEX_ERROR_ATTR_VAL);
772 NEXT_CHAR;
773 break;
774
775 case 33: /* U3c */
776 NEXT_CHAR;
777 if(!U3c[0][(int)*buffer_ptr])
778 BEGIN(LEX_ERROR_ATTR_VAL);
779 NEXT_CHAR;
780 if(!U3c[1][(int)*buffer_ptr])
781 BEGIN(LEX_ERROR_ATTR_VAL);
782 NEXT_CHAR;
783 break;
784
785 case 34: /* U3d */
786 NEXT_CHAR;
787 if(!U3d[0][(int)*buffer_ptr])
788 BEGIN(LEX_ERROR_ATTR_VAL);
789 NEXT_CHAR;
790 if(!U3d[1][(int)*buffer_ptr])
791 BEGIN(LEX_ERROR_ATTR_VAL);
792 NEXT_CHAR;
793 break;
794
795 case 41: /* U4a */
796 NEXT_CHAR;
797 if(!U4a[0][(int)*buffer_ptr])
798 BEGIN(LEX_ERROR_ATTR_VAL);
799 NEXT_CHAR;
800 if(!U4a[1][(int)*buffer_ptr])
801 BEGIN(LEX_ERROR_ATTR_VAL);
802 NEXT_CHAR;
803 if(!U4a[2][(int)*buffer_ptr])
804 BEGIN(LEX_ERROR_ATTR_VAL);
805 NEXT_CHAR;
806 break;
807
808 case 42: /* U4b */
809 NEXT_CHAR;
810 if(!U4b[0][(int)*buffer_ptr])
811 BEGIN(LEX_ERROR_ATTR_VAL);
812 NEXT_CHAR;
813 if(!U4b[1][(int)*buffer_ptr])
814 BEGIN(LEX_ERROR_ATTR_VAL);
815 NEXT_CHAR;
816 if(!U4b[2][(int)*buffer_ptr])
817 BEGIN(LEX_ERROR_ATTR_VAL);
818 NEXT_CHAR;
819 break;
820
821 case 43: /* U4c */
822 NEXT_CHAR;
823 if(!U4c[0][(int)*buffer_ptr])
824 BEGIN(LEX_ERROR_ATTR_VAL);
825 NEXT_CHAR;
826 if(!U4c[1][(int)*buffer_ptr])
827 BEGIN(LEX_ERROR_ATTR_VAL);
828 NEXT_CHAR;
829 if(!U4c[2][(int)*buffer_ptr])
830 BEGIN(LEX_ERROR_ATTR_VAL);
831 NEXT_CHAR;
832 break;
833
834 case 50: /* entityref or charref */
835 NEXT_CHAR;
836
837 if(*buffer_ptr=='#') /* charref */
838 {
839 int charref_len=3;
840
841 NEXT_CHAR;
842 if(digit[(int)*buffer_ptr]) /* decimal */
843 {
844 NEXT_CHAR;
845 charref_len++;
846
847 while(digit[(int)*buffer_ptr])
848 {
849 NEXT_CHAR;
850 charref_len++;
851 }
852
853 if(*buffer_ptr!=';')
854 BEGIN(LEX_ERROR_ATTR_VAL);
855 }
856 else if(*buffer_ptr=='x') /* hex */
857 {
858 NEXT_CHAR;
859 charref_len++;
860
861 while(xdigit[(int)*buffer_ptr])
862 {
863 NEXT_CHAR;
864 charref_len++;
865 }
866
867 if(*buffer_ptr!=';')
868 BEGIN(LEX_ERROR_ATTR_VAL);
869 }
870 else /* other */
871 BEGIN(LEX_ERROR_ATTR_VAL);
872
873 NEXT_CHAR;
874
875 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
876 {
877 const char *str;
878
879 saved_buffer_ptr=*buffer_ptr;
880 *buffer_ptr=0;
881
882 str=ParseXML_Decode_Char_Ref((char*)(buffer_ptr-charref_len));
883
884 if(!str)
885 {
886 buffer_ptr-=charref_len;
887 BEGIN(LEX_ERROR_CHAR_REF);
888 }
889
890 buffer_token=memmove(buffer_token+(charref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-charref_len);
891 memcpy(buffer_ptr-strlen(str),str,strlen(str));
892
893 *buffer_ptr=saved_buffer_ptr;
894 }
895 }
896 else if(namestart[(int)*buffer_ptr]) /* entityref */
897 {
898 int entityref_len=3;
899
900 NEXT_CHAR;
901 while(namechar[(int)*buffer_ptr])
902 {
903 NEXT_CHAR;
904 entityref_len++;
905 }
906
907 if(*buffer_ptr!=';')
908 BEGIN(LEX_ERROR_ATTR_VAL);
909
910 NEXT_CHAR;
911
912 if(!(options&XMLPARSE_RETURN_ATTR_ENCODED))
913 {
914 const char *str;
915
916 saved_buffer_ptr=*buffer_ptr;
917 *buffer_ptr=0;
918
919 str=ParseXML_Decode_Entity_Ref((char*)(buffer_ptr-entityref_len));
920
921 if(!str)
922 {
923 buffer_ptr-=entityref_len;
924 BEGIN(LEX_ERROR_ENTITY_REF);
925 }
926
927 buffer_token=memmove(buffer_token+(entityref_len-strlen(str)),buffer_token,buffer_ptr-buffer_token-entityref_len);
928 memcpy(buffer_ptr-strlen(str),str,strlen(str));
929
930 *buffer_ptr=saved_buffer_ptr;
931 }
932 }
933 else /* other */
934 BEGIN(LEX_ERROR_ATTR_VAL);
935
936 break;
937
938 case 99: /* quote */
939 *buffer_ptr=0;
940 NEXT_CHAR;
941
942 NEXT(after_attr);
943 BEGIN(LEX_FUNC_ATTR_VAL);
944
945 default: /* other */
946 BEGIN(LEX_ERROR_ATTR_VAL);
947 }
948 }
949
950 break;
951
952
953 /* ================ Functional states ================ */
954
955
956 /* The start of a tag for an XML declaration */
957
958 case LEX_FUNC_XML_DECL_BEGIN:
959
960 if(tag_stack)
961 BEGIN(LEX_ERROR_XML_NOT_FIRST);
962
963 /* The start of a tag for an element */
964
965 case LEX_FUNC_TAG_BEGIN:
966
967 tag=NULL;
968
969 for(i=0;tags[i];i++)
970 if(buffer_token[0]==tags[i]->name[0] || tolower(buffer_token[0])==tags[i]->name[0])
971 if(!strcasecmp((char*)buffer_token+1,tags[i]->name+1))
972 {
973 tag=tags[i];
974
975 for(i=0;i<tag->nattributes;i++)
976 attributes[i]=NULL;
977
978 break;
979 }
980
981 if(tag==NULL)
982 BEGIN(LEX_ERROR_UNEXP_TAG);
983
984 END_TOKEN;
985
986 *buffer_ptr=saved_buffer_ptr;
987 BEGIN(next_state);
988
989 /* The end of the start-tag for an element */
990
991 case LEX_FUNC_TAG_PUSH:
992
993 if(stackused==stackdepth)
994 {
995 tag_stack =realloc(tag_stack ,(stackdepth+=8)*sizeof(xmltag*));
996 tags_stack=realloc(tags_stack,(stackdepth+=8)*sizeof(xmltag**));
997 }
998
999 tag_stack [stackused]=tag;
1000 tags_stack[stackused]=tags;
1001 stackused++;
1002
1003 if(tag->callback)
1004 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START,tag->nattributes,attributes))
1005 BEGIN(LEX_ERROR_CALLBACK);
1006
1007 tags=tag->subtags;
1008
1009 BEGIN(next_state);
1010
1011 /* The end of the empty-element-tag for an XML declaration */
1012
1013 case LEX_FUNC_XML_DECL_FINISH:
1014
1015 /* The end of the empty-element-tag for an element */
1016
1017 case LEX_FUNC_TAG_FINISH:
1018
1019 if(tag->callback)
1020 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_START|XMLPARSE_TAG_END,tag->nattributes,attributes))
1021 BEGIN(LEX_ERROR_CALLBACK);
1022
1023 if(stackused>0)
1024 tag=tag_stack[stackused-1];
1025 else
1026 tag=NULL;
1027
1028 BEGIN(next_state);
1029
1030 /* The end of the end-tag for an element */
1031
1032 case LEX_FUNC_TAG_POP:
1033
1034 stackused--;
1035 tags=tags_stack[stackused];
1036 tag =tag_stack [stackused];
1037
1038 if(strcmp((char*)buffer_token,tag->name))
1039 BEGIN(LEX_ERROR_UNBALANCED);
1040
1041 if(stackused<0)
1042 BEGIN(LEX_ERROR_NO_START);
1043
1044 for(i=0;i<tag->nattributes;i++)
1045 attributes[i]=NULL;
1046
1047 if(tag->callback)
1048 if(call_callback(tag->name,tag->callback,XMLPARSE_TAG_END,tag->nattributes,attributes))
1049 BEGIN(LEX_ERROR_CALLBACK);
1050
1051 if(stackused>0)
1052 tag=tag_stack[stackused-1];
1053 else
1054 tag=NULL;
1055
1056 END_TOKEN;
1057
1058 *buffer_ptr=saved_buffer_ptr;
1059 BEGIN(next_state);
1060
1061 /* An attribute key */
1062
1063 case LEX_FUNC_ATTR_KEY:
1064
1065 attribute=-1;
1066
1067 for(i=0;i<tag->nattributes;i++)
1068 if(buffer_token[0]==tag->attributes[i][0] || tolower(buffer_token[0])==tag->attributes[i][0])
1069 if(!strcasecmp((char*)buffer_token+1,tag->attributes[i]+1))
1070 {
1071 attribute=i;
1072
1073 break;
1074 }
1075
1076 if(attribute==-1)
1077 {
1078 if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERROR ||
1079 ((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_ERRNONAME && !strchr((char*)buffer_token,':')))
1080 BEGIN(LEX_ERROR_UNEXP_ATT);
1081 #ifndef LIBROUTINO
1082 else if((options&XMLPARSE_UNKNOWN_ATTRIBUTES)==XMLPARSE_UNKNOWN_ATTR_WARN)
1083 ParseXML_SetError("Warning on line %"PRIu64": unexpected attribute '%s' for tag '%s'.",lineno,buffer_token,tag->name);
1084 #endif
1085 }
1086
1087 END_TOKEN;
1088
1089 *buffer_ptr=saved_buffer_ptr;
1090 BEGIN(next_state);
1091
1092 /* An attribute value */
1093
1094 case LEX_FUNC_ATTR_VAL:
1095
1096 if(tag->callback && attribute!=-1)
1097 attributes[attribute]=buffer_token;
1098
1099 END_TOKEN;
1100
1101 BEGIN(next_state);
1102
1103 /* End of file */
1104
1105 case LEX_EOF:
1106
1107 if(tag)
1108 BEGIN(LEX_ERROR_UNEXP_EOF);
1109
1110 break;
1111
1112
1113 /* ================ Error states ================ */
1114
1115
1116 case LEX_ERROR_TAG_START:
1117 ParseXML_SetError("Character '<' seen not at start of tag.");
1118 break;
1119
1120 case LEX_ERROR_XML_DECL_START:
1121 ParseXML_SetError("Characters '<?' seen not at start of XML declaration.");
1122 break;
1123
1124 case LEX_ERROR_TAG:
1125 ParseXML_SetError("Invalid character seen inside tag '<%s...>'.",tag->name);
1126 break;
1127
1128 case LEX_ERROR_XML_DECL:
1129 ParseXML_SetError("Invalid character seen inside XML declaration '<?xml...>'.");
1130 break;
1131
1132 case LEX_ERROR_ATTR:
1133 ParseXML_SetError("Invalid attribute definition seen in tag.");
1134 break;
1135
1136 case LEX_ERROR_END_TAG:
1137 ParseXML_SetError("Invalid character seen in end-tag.");
1138 break;
1139
1140 case LEX_ERROR_COMMENT:
1141 ParseXML_SetError("Invalid comment seen.");
1142 break;
1143
1144 case LEX_ERROR_CLOSE:
1145 ParseXML_SetError("Character '>' seen not at end of tag.");
1146 break;
1147
1148 case LEX_ERROR_ATTR_VAL:
1149 ParseXML_SetError("Invalid character '%c' seen in attribute value.",*buffer_ptr);
1150 break;
1151
1152 case LEX_ERROR_ENTITY_REF:
1153 ParseXML_SetError("Invalid entity reference '%s' seen in attribute value.",buffer_ptr);
1154 break;
1155
1156 case LEX_ERROR_CHAR_REF:
1157 ParseXML_SetError("Invalid character reference '%s' seen in attribute value.",buffer_ptr);
1158 break;
1159
1160 case LEX_ERROR_TEXT_OUTSIDE:
1161 ParseXML_SetError("Non-whitespace '%c' seen outside tag.",*buffer_ptr);
1162 break;
1163
1164 case LEX_ERROR_UNEXP_TAG:
1165 ParseXML_SetError("Unexpected tag '%s'.",buffer_token);
1166 break;
1167
1168 case LEX_ERROR_UNBALANCED:
1169 ParseXML_SetError("End tag '</%s>' doesn't match start tag '<%s ...>'.",buffer_token,tag->name);
1170 break;
1171
1172 case LEX_ERROR_NO_START:
1173 ParseXML_SetError("End tag '</%s>' seen but there was no start tag '<%s ...>'.",buffer_token,buffer_token);
1174 break;
1175
1176 case LEX_ERROR_UNEXP_ATT:
1177 ParseXML_SetError("Unexpected attribute '%s' for tag '%s'.",buffer_token,tag->name);
1178 break;
1179
1180 case LEX_ERROR_UNEXP_EOF:
1181 ParseXML_SetError("End of file seen without end tag '</%s>'.",tag->name);
1182 break;
1183
1184 case LEX_ERROR_XML_NOT_FIRST:
1185 ParseXML_SetError("XML declaration '<?xml...>' not before all other tags.");
1186 break;
1187
1188 case LEX_ERROR_CALLBACK:
1189 /* The error message should have been set by the callback function, have a fallback just in case */
1190 if(!stored_message)
1191 ParseXML_SetError("Unknown error from tag callback function.");
1192 break;
1193 }
1194
1195 /* Print the error message */
1196
1197 #ifndef LIBROUTINO
1198 if(state)
1199 fprintf(stderr,"XML Parser: %s\n",stored_message);
1200 #endif
1201
1202 /* Delete the tagdata */
1203
1204 if(stackdepth)
1205 {
1206 free(tag_stack);
1207 free(tags_stack);
1208 }
1209
1210 return(state);
1211 }
1212
1213
1214 /*++++++++++++++++++++++++++++++++++++++
1215 Return the current parser line number.
1216
1217 uint64_t ParseXML_LineNumber Returns the line number.
1218 ++++++++++++++++++++++++++++++++++++++*/
1219
1220 uint64_t ParseXML_LineNumber(void)
1221 {
1222 return(lineno);
1223 }
1224
1225
1226 /*++++++++++++++++++++++++++++++++++++++
1227 Store an error message for later.
1228
1229 const char *format The format string.
1230
1231 ... The other arguments.
1232 ++++++++++++++++++++++++++++++++++++++*/
1233
1234 void ParseXML_SetError(const char *format, ...)
1235 {
1236 va_list ap;
1237 char temp[2];
1238 int line_length,error_length;
1239
1240 line_length=snprintf(temp,1,"Error on line %" PRIu64 ": ",lineno);
1241
1242 va_start(ap,format);
1243 error_length=vsnprintf(temp,1,format,ap);
1244 va_end(ap);
1245
1246 if(stored_message)
1247 free(stored_message);
1248
1249 stored_message=malloc(error_length+line_length+1);
1250
1251 line_length=sprintf(stored_message,"Error on line %" PRIu64 ": ",lineno);
1252
1253 va_start(ap,format);
1254 vsprintf(stored_message+line_length,format,ap);
1255 va_end(ap);
1256 }
1257
1258
1259 /*++++++++++++++++++++++++++++++++++++++
1260 Return a stored error message.
1261
1262 char *ParseXML_GetError Returns the most recent stored error.
1263 ++++++++++++++++++++++++++++++++++++++*/
1264
1265 char *ParseXML_GetError(void)
1266 {
1267 return(stored_message);
1268 }
1269
1270
1271 /*++++++++++++++++++++++++++++++++++++++
1272 Convert an XML entity reference into an ASCII string.
1273
1274 char *ParseXML_Decode_Entity_Ref Returns a pointer to the replacement decoded string.
1275
1276 const char *string The entity reference string.
1277 ++++++++++++++++++++++++++++++++++++++*/
1278
1279 char *ParseXML_Decode_Entity_Ref(const char *string)
1280 {
1281 if(!strcmp(string,"&amp;")) return("&");
1282 if(!strcmp(string,"&lt;")) return("<");
1283 if(!strcmp(string,"&gt;")) return(">");
1284 if(!strcmp(string,"&apos;")) return("'");
1285 if(!strcmp(string,"&quot;")) return("\"");
1286 return(NULL);
1287 }
1288
1289
1290 /*++++++++++++++++++++++++++++++++++++++
1291 Convert an XML character reference into an ASCII string.
1292
1293 char *ParseXML_Decode_Char_Ref Returns a pointer to the replacement decoded string.
1294
1295 const char *string The character reference string.
1296 ++++++++++++++++++++++++++++++++++++++*/
1297
1298 char *ParseXML_Decode_Char_Ref(const char *string)
1299 {
1300 static char result[5]=""; /* static allocation of return value (set each call) */
1301 long int unicode;
1302
1303 if(string[2]=='x') unicode=strtol(string+3,NULL,16);
1304 else unicode=strtol(string+2,NULL,10);
1305
1306 if(unicode<0x80)
1307 {
1308 /* 0000 0000-0000 007F => 0xxxxxxx */
1309 result[0]=(char)unicode;
1310 result[1]=0;
1311 }
1312 else if(unicode<0x07FF)
1313 {
1314 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1315 result[0]=(char)(0xC0+((unicode&0x07C0)>>6));
1316 result[1]=(char)(0x80+ (unicode&0x003F));
1317 result[2]=0;
1318 }
1319 else if(unicode<0xFFFF)
1320 {
1321 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1322 result[0]=(char)(0xE0+((unicode&0xF000)>>12));
1323 result[1]=(char)(0x80+((unicode&0x0FC0)>>6));
1324 result[2]=(char)(0x80+ (unicode&0x003F));
1325 result[3]=0;
1326 }
1327 else if(unicode<0x1FFFFF)
1328 {
1329 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1330 result[0]=(char)(0xF0+((unicode&0x1C0000)>>18));
1331 result[1]=(char)(0x80+((unicode&0x03F000)>>12));
1332 result[2]=(char)(0x80+((unicode&0x000FC0)>>6));
1333 result[3]=(char)(0x80+ (unicode&0x00003F));
1334 result[4]=0;
1335 }
1336 else
1337 {
1338 result[0]=(char)0xFF;
1339 result[1]=(char)0xFD;
1340 result[2]=0;
1341 }
1342
1343 return(result);
1344 }
1345
1346
1347 /*++++++++++++++++++++++++++++++++++++++
1348 Convert a string into something that is safe to output in an XML file.
1349
1350 char *ParseXML_Encode_Safe_XML Returns a pointer to a static replacement encoded string (or the original if no change needed).
1351
1352 const char *string The string to convert.
1353 ++++++++++++++++++++++++++++++++++++++*/
1354
1355 char *ParseXML_Encode_Safe_XML(const char *string)
1356 {
1357 static const char hexstring[17]="0123456789ABCDEF"; /* local lookup table */
1358 static char *result=NULL; /* static allocation of return value */
1359 int i=0,j=0,len;
1360
1361 for(i=0;string[i];i++)
1362 if(string[i]=='<' || string[i]=='>' || string[i]=='&' || string[i]=='\'' || string[i]=='"' || string[i]<32 || (unsigned char)string[i]>127)
1363 break;
1364
1365 if(!string[i])
1366 return((char*)string);
1367
1368 len=i+256-6;
1369
1370 result=(char*)realloc((void*)result,len+7);
1371 strncpy(result,string,j=i);
1372
1373 do
1374 {
1375 for(;j<len && string[i];i++)
1376 if(string[i]=='\'')
1377 {
1378 /* XML, HTML5 and XHTML1 allow &apos; but HTML4 doesn't. */
1379 result[j++]='&';
1380 result[j++]='#';
1381 result[j++]='3';
1382 result[j++]='9';
1383 result[j++]=';';
1384 }
1385 else if(string[i]=='&')
1386 {
1387 result[j++]='&';
1388 result[j++]='a';
1389 result[j++]='m';
1390 result[j++]='p';
1391 result[j++]=';';
1392 }
1393 else if(string[i]=='"')
1394 {
1395 result[j++]='&';
1396 result[j++]='q';
1397 result[j++]='u';
1398 result[j++]='o';
1399 result[j++]='t';
1400 result[j++]=';';
1401 }
1402 else if(string[i]=='<')
1403 {
1404 result[j++]='&';
1405 result[j++]='l';
1406 result[j++]='t';
1407 result[j++]=';';
1408 }
1409 else if(string[i]=='>')
1410 {
1411 result[j++]='&';
1412 result[j++]='g';
1413 result[j++]='t';
1414 result[j++]=';';
1415 }
1416 else if(string[i]>=32 && (unsigned char)string[i]<=127)
1417 result[j++]=string[i];
1418 else
1419 {
1420 unsigned int unicode;
1421
1422 /* Decode the UTF-8 */
1423
1424 if((string[i]&0x80)==0)
1425 {
1426 /* 0000 0000-0000 007F => 0xxxxxxx */
1427 unicode=string[i];
1428 }
1429 else if((string[i]&0xE0)==0xC0 && (string[i]&0x1F)>=2 && (string[i+1]&0xC0)==0x80)
1430 {
1431 /* 0000 0080-0000 07FF => 110xxxxx 10xxxxxx */
1432 unicode =(string[i++]&0x1F)<<6;
1433 unicode|= string[i ]&0x3F;
1434 }
1435 else if((string[i]&0xF0)==0xE0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80)
1436 {
1437 /* 0000 0800-0000 FFFF => 1110xxxx 10xxxxxx 10xxxxxx */
1438 unicode =(string[i++]&0x0F)<<12;
1439 unicode|=(string[i++]&0x3F)<<6;
1440 unicode|= string[i ]&0x3F;
1441 }
1442 else if((string[i]&0xF8)==0xF0 && (string[i+1]&0xC0)==0x80 && (string[i+2]&0xC0)==0x80 && (string[i+3]&0xC0)==0x80)
1443 {
1444 /* 0001 0000-001F FFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1445 unicode =(string[i++]&0x07)<<18;
1446 unicode|=(string[i++]&0x3F)<<12;
1447 unicode|=(string[i++]&0x3F)<<6;
1448 unicode|= string[i ]&0x3F;
1449 }
1450 else
1451 unicode=0xFFFD;
1452
1453 /* Output the character entity */
1454
1455 result[j++]='&';
1456 result[j++]='#';
1457 result[j++]='x';
1458
1459 if(unicode&0x00FF0000)
1460 {
1461 result[j++]=hexstring[((unicode>>16)&0xf0)>>4];
1462 result[j++]=hexstring[((unicode>>16)&0x0f) ];
1463 }
1464 if(unicode&0x00FFFF00)
1465 {
1466 result[j++]=hexstring[((unicode>>8)&0xf0)>>4];
1467 result[j++]=hexstring[((unicode>>8)&0x0f) ];
1468 }
1469 result[j++]=hexstring[(unicode&0xf0)>>4];
1470 result[j++]=hexstring[(unicode&0x0f) ];
1471
1472 result[j++]=';';
1473 }
1474
1475 if(string[i]) /* Not finished */
1476 {
1477 len+=256;
1478 result=(char*)realloc((void*)result,len+7);
1479 }
1480 }
1481 while(string[i]);
1482
1483 result[j]=0;
1484
1485 return(result);
1486 }
1487
1488
1489 /*++++++++++++++++++++++++++++++++++++++
1490 Check that a string really is an integer.
1491
1492 int ParseXML_IsInteger Returns 1 if an integer could be found or 0 otherwise.
1493
1494 const char *string The string to be parsed.
1495 ++++++++++++++++++++++++++++++++++++++*/
1496
1497 int ParseXML_IsInteger(const char *string)
1498 {
1499 const unsigned char *p=(unsigned char*)string;
1500
1501 if(*p=='-' || *p=='+')
1502 p++;
1503
1504 while(digit[(int)*p])
1505 p++;
1506
1507 if(*p)
1508 return(0);
1509 else
1510 return(1);
1511 }
1512
1513
1514 /*++++++++++++++++++++++++++++++++++++++
1515 Check that a string really is a floating point number.
1516
1517 int ParseXML_IsFloating Returns 1 if a floating point number could be found or 0 otherwise.
1518
1519 const char *string The string to be parsed.
1520 ++++++++++++++++++++++++++++++++++++++*/
1521
1522 int ParseXML_IsFloating(const char *string)
1523 {
1524 const unsigned char *p=(unsigned char*)string;
1525
1526 if(*p=='-' || *p=='+')
1527 p++;
1528
1529 while(digit[(int)*p] || *p=='.')
1530 p++;
1531
1532 if(*p=='e' || *p=='E')
1533 {
1534 p++;
1535
1536 if(*p=='-' || *p=='+')
1537 p++;
1538
1539 while(digit[*p])
1540 p++;
1541 }
1542
1543 if(*p)
1544 return(0);
1545 else
1546 return(1);
1547 }
1548
1549
1550 /* Table for checking for double-quoted characters. */
1551 static const unsigned char quotedD[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1553 10,10,99,10,10,10,50,10,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1554 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1555 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1556 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1557 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1558 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1563 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1564 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1565 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1566 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1567
1568 /* Table for checking for single-quoted characters. */
1569 static const unsigned char quotedS[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0,10,10, 0, 0,10, 0, 0, /* 0x00-0x0f " " */
1570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1571 10,10,10,10,10,10,50,99,10,10,10,10,10,10,10,10, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1572 10,10,10,10,10,10,10,10,10,10,10,10, 0,10, 0,10, /* 0x30-0x3f "0123456789:;<=>?" */
1573 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1574 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1575 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x60-0x6f "`abcdefghijklmno" */
1576 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1581 0, 0,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xc0-0xcf " " */
1582 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, /* 0xd0-0xdf " " */
1583 31,32,32,32,32,32,32,32,32,32,32,32,32,33,34,34, /* 0xe0-0xef " " */
1584 41,42,42,42,43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1585
1586 /* Table for checking for characters between 0x80 and 0x8f. */
1587 static const unsigned char U_80_8F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1595 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1600 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1601 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1603
1604 /* Table for checking for characters between 0x80 and 0x9f. */
1605 static const unsigned char U_80_9F[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1606 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1610 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1613 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1614 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1621
1622 /* Table for checking for characters between 0x80 and 0xbf. */
1623 static const unsigned char U_80_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1631 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80-0x8f " " */
1632 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1633 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1634 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1639
1640 /* Table for checking for characters between 0x90 and 0xbf. */
1641 static const unsigned char U_90_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1650 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90-0x9f " " */
1651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1652 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1657
1658 /* Table for checking for characters between 0xa0 and 0xbf. */
1659 static const unsigned char U_A0_BF[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1660 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1669 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xa0-0xaf " " */
1670 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0-0xbf " " */
1671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1675
1676 /* Table for checking for U2 characters = C2-DF,80-BF = U+0080-U+07FF. */
1677 static const unsigned char *U2[1]={ U_80_BF };
1678
1679 /* Table for checking for U3a characters = E0,A0-BF,80-BF = U+0800-U+0FFF. */
1680 static const unsigned char *U3a[2]={ U_A0_BF, U_80_BF };
1681
1682 /* Table for checking for U3b characters = E1-EC,80-BF,80-BF = U+1000-U+CFFF. */
1683 static const unsigned char *U3b[2]={ U_80_BF, U_80_BF };
1684
1685 /* Table for checking for U3c characters = ED,80-9F,80-BF = U+D000-U+D7FF (U+D800-U+DFFF are not legal in XML). */
1686 static const unsigned char *U3c[2]={ U_80_9F, U_80_BF };
1687
1688 /* Table for checking for U3d characters = EE-EF,80-BF,80-BF = U+E000-U+FFFF (U+FFFE-U+FFFF are not legal in XML but handled). */
1689 static const unsigned char *U3d[2]={ U_80_BF, U_80_BF };
1690
1691 /* Table for checking for U4a characters = F0,90-BF,80-BF,80-BF = U+10000-U+3FFFF. */
1692 static const unsigned char *U4a[3]={ U_90_BF, U_80_BF, U_80_BF };
1693
1694 /* Table for checking for U4b characters = F1-F3,80-BF,80-BF,80-BF = U+40000-U+FFFFF. */
1695 static const unsigned char *U4b[3]={ U_80_BF, U_80_BF, U_80_BF };
1696
1697 /* Table for checking for U4c characters = F4,80-8F,80-BF,80-BF = U+100000-U+10FFFF (U+110000- are not legal in XML). */
1698 static const unsigned char *U4c[3]={ U_80_8F, U_80_BF, U_80_BF };
1699
1700 /* Table for checking for namestart characters. */
1701 static const unsigned char namestart[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1705 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1706 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1707 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1708 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1717
1718 /* Table for checking for namechar characters. */
1719 static const unsigned char namechar[256] ={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1722 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1723 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1724 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1725 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6f "`abcdefghijklmno" */
1726 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1729 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1730 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1731 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1732 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1733 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1734 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1735
1736 /* Table for checking for whitespace characters. */
1737 static const unsigned char whitespace[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, /* 0x00-0x0f " " */
1738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1739 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1749 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1750 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1753
1754 /* Table for checking for digit characters. */
1755 static const unsigned char digit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1756 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1757 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1758 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1759 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1760 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */
1771
1772 /* Table for checking for xdigit characters. */
1773 static const unsigned char xdigit[256]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0f " " */
1774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1f " " */
1775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x2f " !"#$%&'()*+,-./" */
1776 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3f "0123456789:;<=>?" */
1777 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4f "@ABCDEFGHIJKLMNO" */
1778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x5f "PQRSTUVWXYZ[\]^_" */
1779 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x6f "`abcdefghijklmno" */
1780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x7f "pqrstuvwxyz{|}~ " */
1781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80-0x8f " " */
1782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90-0x9f " " */
1783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0-0xaf " " */
1784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0-0xbf " " */
1785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0-0xcf " " */
1786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0-0xdf " " */
1787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0-0xef " " */
1788 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* 0xf0-0xff " " */