Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Annotation of /trunk/src/xmlparse.l
Parent Directory
|
Revision Log
Revision 334 -
(hide annotations)
(download)
Sun Mar 28 15:27:05 2010 UTC (14 years, 11 months ago) by amb
File size: 10814 byte(s)
Sun Mar 28 15:27:05 2010 UTC (14 years, 11 months ago) by amb
File size: 10814 byte(s)
Initial revision
1 | amb | 334 | %{ |
2 | /*************************************** | ||
3 | $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.1 2010-03-28 15:23:47 amb Exp $ | ||
4 | |||
5 | A simple generic XML parser where the structure comes from the function parameters. | ||
6 | |||
7 | Part of the Routino routing software. | ||
8 | ******************/ /****************** | ||
9 | This file Copyright 2010 Andrew M. Bishop | ||
10 | |||
11 | This program is free software: you can redistribute it and/or modify | ||
12 | it under the terms of the GNU Affero General Public License as published by | ||
13 | the Free Software Foundation, either version 3 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU Affero General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU Affero General Public License | ||
22 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
23 | ***************************************/ | ||
24 | |||
25 | |||
26 | #include <stdio.h> | ||
27 | #include <stdlib.h> | ||
28 | #include <string.h> | ||
29 | |||
30 | /* Parser outputs */ | ||
31 | |||
32 | #define LEX_TAG_BEGIN 1 | ||
33 | #define LEX_TAG_POP 2 | ||
34 | #define LEX_TAG_PUSH 3 | ||
35 | #define LEX_TAG_FINISH 4 | ||
36 | #define LEX_ATTR_KEY 5 | ||
37 | #define LEX_ATTR_VAL 6 | ||
38 | |||
39 | |||
40 | /* Lexer definitions */ | ||
41 | |||
42 | #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */ | ||
43 | #ifndef yywrap | ||
44 | /*+ Needed in lex but does nothing. +*/ | ||
45 | #define yywrap() 1 | ||
46 | #endif | ||
47 | |||
48 | /*+ Reset the current string. +*/ | ||
49 | #define reset_string \ | ||
50 | if(string) *string=0; \ | ||
51 | stringused=0; | ||
52 | |||
53 | /*+ append information to the current string. +*/ | ||
54 | #define append_string(xx) \ | ||
55 | newlen=strlen(xx); \ | ||
56 | if((stringused+newlen)>=stringlen) \ | ||
57 | string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \ | ||
58 | strcpy(string+stringused,xx); \ | ||
59 | stringused+=newlen; | ||
60 | |||
61 | #define YY_NO_INPUT | ||
62 | |||
63 | |||
64 | /* Lexer functions and variables */ | ||
65 | |||
66 | extern int yylex(void); | ||
67 | |||
68 | static char *yylval=NULL; | ||
69 | static int lineno=0; | ||
70 | |||
71 | %} | ||
72 | |||
73 | W [ \t] | ||
74 | |||
75 | nonascii [\200-\377] | ||
76 | ascii [ -~] | ||
77 | alphanum [a-zA-Z0-9] | ||
78 | punct [][!\"#$%&\'()*+,-./:;<=>?@\\^_`{|}~] | ||
79 | safepunct [][!\#$%\()*+,-./:;=?@\\^_`{|}~] | ||
80 | |||
81 | tag ({alphanum}|[-:])+ | ||
82 | key ({alphanum}|[-:])+ | ||
83 | val ({alphanum}|{nonascii}|{safepunct})+ | ||
84 | |||
85 | %x COMMENT | ||
86 | %x TAG_START TAG TAG_ATTR_KEY TAG_ATTR_VAL | ||
87 | %x DQUOTED SQUOTED | ||
88 | |||
89 | %% | ||
90 | /* Must use static variables since the parser returns often. */ | ||
91 | static char *string=NULL; | ||
92 | static int stringlen=0,stringused=0; | ||
93 | int newlen; | ||
94 | |||
95 | /* Handle comments and other tags */ | ||
96 | |||
97 | "<!--" { BEGIN(COMMENT); } | ||
98 | "<" { BEGIN(TAG_START); } | ||
99 | \n { lineno++; } | ||
100 | [^<\n]+ { } | ||
101 | |||
102 | /* Comments - not strictly correct. */ | ||
103 | |||
104 | <COMMENT>"--"{W}*">" { BEGIN(INITIAL); } | ||
105 | <COMMENT>">" { } | ||
106 | <COMMENT>"-" { } | ||
107 | <COMMENT>\n { lineno++; } | ||
108 | <COMMENT>[^->\n]+ { } | ||
109 | |||
110 | /* Tags */ | ||
111 | |||
112 | <TAG_START>{W}+ { } | ||
113 | <TAG_START>"?xml" { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } | ||
114 | <TAG_START>{tag} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } | ||
115 | |||
116 | <TAG_START>"/"{tag}">" { BEGIN(INITIAL); return(LEX_TAG_POP); } | ||
117 | |||
118 | <TAG_START>\n { BEGIN(INITIAL); lineno++; } | ||
119 | <TAG_START>. { BEGIN(INITIAL); } | ||
120 | |||
121 | <TAG>{W}+ { } | ||
122 | <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } | ||
123 | <TAG>"?>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } | ||
124 | <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); } | ||
125 | <TAG>{key} { BEGIN(TAG_ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } | ||
126 | <TAG>\n { lineno++; } | ||
127 | <TAG>. { } | ||
128 | |||
129 | <TAG_ATTR_KEY>{W}*= { BEGIN(TAG_ATTR_VAL); } | ||
130 | <TAG_ATTR_KEY>\n { lineno++; } | ||
131 | <TAG_ATTR_KEY>. { BEGIN(TAG); unput(yytext[0]); yylval=NULL; return(LEX_ATTR_VAL); } | ||
132 | |||
133 | <TAG_ATTR_VAL>{W}+ { } | ||
134 | <TAG_ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; } | ||
135 | <TAG_ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; } | ||
136 | <TAG_ATTR_VAL>{val} { BEGIN(TAG); yylval=yytext; return(LEX_ATTR_VAL); } | ||
137 | <TAG_ATTR_VAL>\n { lineno++; } | ||
138 | <TAG_ATTR_VAL>. { BEGIN(TAG); unput(yytext[0]); yylval=NULL; return(LEX_ATTR_VAL); } | ||
139 | |||
140 | /* Quoted strings */ | ||
141 | |||
142 | <DQUOTED>\\\\ { append_string(yytext); } | ||
143 | <DQUOTED>\\\" { append_string(yytext); } | ||
144 | <DQUOTED>\\ { append_string(yytext); } | ||
145 | <DQUOTED>\" { BEGIN(TAG); yylval=string; return(LEX_ATTR_VAL); } | ||
146 | <DQUOTED>\n { lineno++; } | ||
147 | <DQUOTED>[^\\\"\n]+ { append_string(yytext); } | ||
148 | |||
149 | <SQUOTED>\\\\ { append_string(yytext); } | ||
150 | <SQUOTED>\\\' { append_string(yytext); } | ||
151 | <SQUOTED>\\ { append_string(yytext); } | ||
152 | <SQUOTED>\' { BEGIN(TAG); yylval=string; return(LEX_ATTR_VAL); } | ||
153 | <SQUOTED>\n { lineno++; } | ||
154 | <SQUOTED>[^\\\'\n]+ { append_string(yytext); } | ||
155 | |||
156 | /* End of file */ | ||
157 | |||
158 | <<EOF>> { free(string); string=NULL; BEGIN(INITIAL); return(0); } | ||
159 | |||
160 | %% | ||
161 | |||
162 | #include "xmlparse.h" | ||
163 | |||
164 | |||
165 | /*++++++++++++++++++++++++++++++++++++++ | ||
166 | Parse the XML and call the functions for each tag as seen. | ||
167 | |||
168 | FILE *file The file to parse. | ||
169 | |||
170 | xmltag **tags The array of pointers to tags for the top level. | ||
171 | ++++++++++++++++++++++++++++++++++++++*/ | ||
172 | |||
173 | void ParseXML(FILE *file,xmltag **tags) | ||
174 | { | ||
175 | int yychar,i; | ||
176 | |||
177 | int nattributes=0; | ||
178 | char *attributes[XMLPARSE_MAX_ATTRS]; | ||
179 | int attribute=0; | ||
180 | |||
181 | int stackdepth=0,stackused=0; | ||
182 | xmltag ***tagstack=NULL; | ||
183 | xmltag *tag=NULL; | ||
184 | |||
185 | static int first=1; | ||
186 | |||
187 | /* Parser (re)-initialisation */ | ||
188 | |||
189 | yyin=file; | ||
190 | |||
191 | if(!first) | ||
192 | yyrestart(NULL); | ||
193 | |||
194 | first=0; | ||
195 | |||
196 | lineno=1; | ||
197 | |||
198 | /* The actual parser. */ | ||
199 | |||
200 | while((yychar=yylex())) | ||
201 | switch(yychar) | ||
202 | { | ||
203 | /* The start of a tag for an element */ | ||
204 | |||
205 | case LEX_TAG_BEGIN: | ||
206 | |||
207 | tag=NULL; | ||
208 | |||
209 | for(i=0;tags[i];i++) | ||
210 | if(!strcasecmp(yylval,tags[i]->name)) | ||
211 | { | ||
212 | tag=tags[i]; | ||
213 | |||
214 | for(i=0;i<nattributes;i++) | ||
215 | free(attributes[i]); | ||
216 | |||
217 | for(i=0;i<XMLPARSE_MAX_ATTRS;i++) | ||
218 | if(!tag->attributes[i]) | ||
219 | break; | ||
220 | |||
221 | nattributes=i; | ||
222 | |||
223 | for(i=0;i<nattributes;i++) | ||
224 | attributes[i]=NULL; | ||
225 | |||
226 | break; | ||
227 | } | ||
228 | |||
229 | if(tag==NULL) | ||
230 | { | ||
231 | fprintf(stderr,"XML Parser: Error unexpected tag '%s' on line %d.\n",yylval,lineno); | ||
232 | exit(1); | ||
233 | } | ||
234 | break; | ||
235 | |||
236 | /* The end of the start-tag for an element */ | ||
237 | |||
238 | case LEX_TAG_PUSH: | ||
239 | |||
240 | if(stackused==stackdepth) | ||
241 | tagstack=(xmltag***)realloc((void*)tagstack,(stackdepth+=8)*sizeof(xmltag**)); | ||
242 | |||
243 | tagstack[stackused++]=tags; | ||
244 | tags=tag->subtags; | ||
245 | |||
246 | /* The end of the empty-element-tag for an element */ | ||
247 | |||
248 | case LEX_TAG_FINISH: | ||
249 | |||
250 | if(tag->callback) | ||
251 | switch(nattributes) | ||
252 | { | ||
253 | case 0: (*tag->callback)(); break; | ||
254 | case 1: (*tag->callback)(attributes[0]); break; | ||
255 | case 2: (*tag->callback)(attributes[0],attributes[1]); break; | ||
256 | case 3: (*tag->callback)(attributes[0],attributes[1],attributes[2]); break; | ||
257 | case 4: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3]); break; | ||
258 | case 5: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); break; | ||
259 | case 6: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); break; | ||
260 | case 7: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); break; | ||
261 | case 8: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); break; | ||
262 | case 9: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); break; | ||
263 | case 10: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); break; | ||
264 | case 11: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); break; | ||
265 | case 12: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); break; | ||
266 | case 13: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); break; | ||
267 | case 14: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); break; | ||
268 | case 15: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); break; | ||
269 | case 16: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); break; | ||
270 | |||
271 | default: | ||
272 | fprintf(stderr,"XML Parser: Error too many attributes for tag '%s' on line %d.\n",tag->name,lineno); | ||
273 | exit(1); | ||
274 | } | ||
275 | |||
276 | tag=NULL; | ||
277 | |||
278 | break; | ||
279 | |||
280 | /* The end of the end-tag for an element */ | ||
281 | |||
282 | case LEX_TAG_POP: | ||
283 | |||
284 | tags=tagstack[--stackused]; | ||
285 | tag=NULL; | ||
286 | |||
287 | break; | ||
288 | |||
289 | /* An attribute key */ | ||
290 | |||
291 | case LEX_ATTR_KEY: | ||
292 | |||
293 | attribute=-1; | ||
294 | |||
295 | for(i=0;i<nattributes;i++) | ||
296 | if(!strcasecmp(yylval,tag->attributes[i])) | ||
297 | { | ||
298 | attribute=i; | ||
299 | |||
300 | break; | ||
301 | } | ||
302 | |||
303 | if(attribute==-1) | ||
304 | { | ||
305 | fprintf(stderr,"XML Parser: Error unexpected attribute '%s' for tag '%s' on line %d.\n",yylval,tag->name,lineno); | ||
306 | exit(1); | ||
307 | } | ||
308 | break; | ||
309 | |||
310 | /* An attribute value */ | ||
311 | |||
312 | case LEX_ATTR_VAL: | ||
313 | |||
314 | if(yylval) | ||
315 | attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval); | ||
316 | } | ||
317 | |||
318 | /* Delete the tagdata */ | ||
319 | |||
320 | for(i=0;i<nattributes;i++) | ||
321 | if(attributes[i]) | ||
322 | free(attributes[i]); | ||
323 | |||
324 | if(stackdepth) | ||
325 | free(tagstack); | ||
326 | } |
Properties
Name | Value |
---|---|
cvs:description | A simple generic XML parser. |