%{ /*************************************** $Header: /home/amb/CVS/routino/src/xmlparse.l,v 1.2 2010-03-29 18:13:20 amb Exp $ A simple generic XML parser where the structure comes from the function parameters. Part of the Routino routing software. ******************/ /****************** This file Copyright 2010 Andrew M. Bishop This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. ***************************************/ #include <stdio.h> #include <stdlib.h> #include <string.h> /* Parser outputs */ #define LEX_TAG_BEGIN 1 #define LEX_TAG_POP 2 #define LEX_TAG_PUSH 3 #define LEX_TAG_FINISH 4 #define LEX_ATTR_KEY 5 #define LEX_ATTR_VAL 6 /* Lexer definitions */ #define YY_SKIP_YYWRAP 1 /* Remove error with prototype of ..._yywrap */ #ifndef yywrap /*+ Needed in lex but does nothing. +*/ #define yywrap() 1 #endif /*+ Reset the current string. +*/ #define reset_string \ if(string) *string=0; \ stringused=0; /*+ append information to the current string. +*/ #define append_string(xx) \ newlen=strlen(xx); \ if((stringused+newlen)>=stringlen) \ string=(char*)realloc((void*)string,stringlen=(stringused+newlen+16)); \ strcpy(string+stringused,xx); \ stringused+=newlen; #define YY_NO_INPUT /* Lexer functions and variables */ extern int yylex(void); static char *yylval=NULL; static int lineno=0; %} W [ \t] nonascii [\200-\377] ascii [ -~] alphanum [a-zA-Z0-9] punct [][!\"#$%&\'()*+,-./:;<=>?@\\^_`{|}~] safepunct [][!\#$%\()*+,-./:;=?@\\^_`{|}~] tag ({alphanum}|[-:])+ key ({alphanum}|[-:])+ val ({alphanum}|{nonascii}|{safepunct})+ %x COMMENT %x TAG_START TAG TAG_ATTR_KEY TAG_ATTR_VAL %x DQUOTED SQUOTED %% /* Must use static variables since the parser returns often. */ static char *string=NULL; static int stringlen=0,stringused=0; int newlen; /* Handle comments and other tags */ "<!--" { BEGIN(COMMENT); } "<" { BEGIN(TAG_START); } \n { lineno++; } [^<\n]+ { } /* Comments - not strictly correct. */ <COMMENT>"--"{W}*">" { BEGIN(INITIAL); } <COMMENT>">" { } <COMMENT>"-" { } <COMMENT>\n { lineno++; } <COMMENT>[^->\n]+ { } /* Tags */ <TAG_START>{W}+ { } <TAG_START>"?xml" { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } <TAG_START>{tag} { BEGIN(TAG); yylval=yytext; return(LEX_TAG_BEGIN); } <TAG_START>"/"{tag}">" { BEGIN(INITIAL); return(LEX_TAG_POP); } <TAG_START>\n { BEGIN(INITIAL); lineno++; } <TAG_START>. { BEGIN(INITIAL); } <TAG>{W}+ { } <TAG>"/>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } <TAG>"?>" { BEGIN(INITIAL); return(LEX_TAG_FINISH); } <TAG>">" { BEGIN(INITIAL); return(LEX_TAG_PUSH); } <TAG>{key} { BEGIN(TAG_ATTR_KEY); yylval=yytext; return(LEX_ATTR_KEY); } <TAG>\n { lineno++; } <TAG>. { } <TAG_ATTR_KEY>{W}*= { BEGIN(TAG_ATTR_VAL); } <TAG_ATTR_KEY>\n { lineno++; } <TAG_ATTR_KEY>. { BEGIN(TAG); unput(yytext[0]); yylval=NULL; return(LEX_ATTR_VAL); } <TAG_ATTR_VAL>{W}+ { } <TAG_ATTR_VAL>\" { BEGIN(DQUOTED); reset_string; } <TAG_ATTR_VAL>\' { BEGIN(SQUOTED); reset_string; } <TAG_ATTR_VAL>{val} { BEGIN(TAG); yylval=yytext; return(LEX_ATTR_VAL); } <TAG_ATTR_VAL>\n { lineno++; } <TAG_ATTR_VAL>. { BEGIN(TAG); unput(yytext[0]); yylval=NULL; return(LEX_ATTR_VAL); } /* Quoted strings */ <DQUOTED>\\\\ { append_string(yytext); } <DQUOTED>\\\" { append_string(yytext); } <DQUOTED>\\ { append_string(yytext); } <DQUOTED>\" { BEGIN(TAG); yylval=string; return(LEX_ATTR_VAL); } <DQUOTED>\n { lineno++; } <DQUOTED>[^\\\"\n]+ { append_string(yytext); } <SQUOTED>\\\\ { append_string(yytext); } <SQUOTED>\\\' { append_string(yytext); } <SQUOTED>\\ { append_string(yytext); } <SQUOTED>\' { BEGIN(TAG); yylval=string; return(LEX_ATTR_VAL); } <SQUOTED>\n { lineno++; } <SQUOTED>[^\\\'\n]+ { append_string(yytext); } /* End of file */ <<EOF>> { free(string); string=NULL; BEGIN(INITIAL); return(0); } %% #include "xmlparse.h" /*++++++++++++++++++++++++++++++++++++++ Parse the XML and call the functions for each tag as seen. FILE *file The file to parse. xmltag **tags The array of pointers to tags for the top level. int ignore_unknown_attributes If set to 0 then exit if unknown attribute is seen, if sete to 1 then warn, if set to 2 then ignore. ++++++++++++++++++++++++++++++++++++++*/ void ParseXML(FILE *file,xmltag **tags,int ignore_unknown_attributes) { int yychar,i; int nattributes=0; char *attributes[XMLPARSE_MAX_ATTRS]; int attribute=0; int stackdepth=0,stackused=0; xmltag ***tagstack=NULL; xmltag *tag=NULL; static int first=1; /* Parser (re)-initialisation */ yyin=file; if(!first) yyrestart(NULL); first=0; lineno=1; /* The actual parser. */ while((yychar=yylex())) switch(yychar) { /* The start of a tag for an element */ case LEX_TAG_BEGIN: tag=NULL; for(i=0;tags[i];i++) if(!strcasecmp(yylval,tags[i]->name)) { tag=tags[i]; for(i=0;i<nattributes;i++) free(attributes[i]); for(i=0;i<XMLPARSE_MAX_ATTRS;i++) if(!tag->attributes[i]) break; nattributes=i; for(i=0;i<nattributes;i++) attributes[i]=NULL; break; } if(tag==NULL) { fprintf(stderr,"XML Parser: Error unexpected tag '%s' on line %d.\n",yylval,lineno); exit(1); } break; /* The end of the start-tag for an element */ case LEX_TAG_PUSH: if(stackused==stackdepth) tagstack=(xmltag***)realloc((void*)tagstack,(stackdepth+=8)*sizeof(xmltag**)); tagstack[stackused++]=tags; tags=tag->subtags; /* The end of the empty-element-tag for an element */ case LEX_TAG_FINISH: if(tag->callback) switch(nattributes) { case 0: (*tag->callback)(); break; case 1: (*tag->callback)(attributes[0]); break; case 2: (*tag->callback)(attributes[0],attributes[1]); break; case 3: (*tag->callback)(attributes[0],attributes[1],attributes[2]); break; case 4: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3]); break; case 5: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4]); break; case 6: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5]); break; case 7: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6]); break; case 8: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7]); break; case 9: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8]); break; case 10: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9]); break; case 11: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10]); break; case 12: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11]); break; case 13: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12]); break; case 14: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13]); break; case 15: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14]); break; case 16: (*tag->callback)(attributes[0],attributes[1],attributes[2],attributes[3],attributes[4],attributes[5],attributes[6],attributes[7],attributes[8],attributes[9],attributes[10],attributes[11],attributes[12],attributes[13],attributes[14],attributes[15]); break; default: fprintf(stderr,"XML Parser: Error too many attributes for tag '%s' on line %d.\n",tag->name,lineno); exit(1); } tag=NULL; break; /* The end of the end-tag for an element */ case LEX_TAG_POP: tags=tagstack[--stackused]; tag=NULL; break; /* An attribute key */ case LEX_ATTR_KEY: attribute=-1; for(i=0;i<nattributes;i++) if(!strcasecmp(yylval,tag->attributes[i])) { attribute=i; break; } if(attribute==-1) { if(ignore_unknown_attributes==0) { fprintf(stderr,"XML Parser: Error unexpected attribute '%s' for tag '%s' on line %d.\n",yylval,tag->name,lineno); exit(1); } else if(ignore_unknown_attributes==1) fprintf(stderr,"XML Parser: Warning unexpected attribute '%s' for tag '%s' on line %d.\n",yylval,tag->name,lineno); } break; /* An attribute value */ case LEX_ATTR_VAL: if(yylval && attribute!=-1) attributes[attribute]=strcpy(malloc(strlen(yylval)+1),yylval); } /* Delete the tagdata */ for(i=0;i<nattributes;i++) if(attributes[i]) free(attributes[i]); if(stackdepth) free(tagstack); }