// one of the tiniest but still very functional xml parsers (see test.cpp for example usage) // by Wouter van Oortmerssen, ZLIB license applies. #ifndef NANO_XML #define NANO_XML struct ParseError { char *err; ParseError(char *e) : err(e) {}; }; struct NanoXMLSink { virtual void StartDocument(bool unicode)=0; virtual void StartElement(wchar_t *name)=0; virtual void EndElement()=0; virtual void Data(wchar_t *name)=0; virtual void Attribute(wchar_t *name,wchar_t *value)=0; virtual void EndDocument(const char *error)=0; virtual unsigned int GetByte()=0; }; struct NanoXML { int token, ch, lineno, linepos; bool unicode; NanoXMLSink *sink; void Parse(NanoXMLSink *_sink) { sink = _sink; lineno = linepos = 1; unicode = false; GetC(); if(ch==0xFF) { GetC(); if(ch==0xFE) { unicode = true; GetC(); linepos = 1; }; }; sink->StartDocument(unicode); try { Lex(); Expect('<'); ParseNode(); } catch(ParseError pe) { sink->EndDocument(pe.err); return; }; sink->EndDocument(NULL); // meaning no error return; }; void Expect(int n) { if(token!=n) Error("unexpected token"); Lex(); }; void Error(char *e) { throw ParseError(e); }; /* void AddList(NanoNode **&n, NanoNode *c) { *n = Node(c, false); n = &(*n)->next; };*/ int GetC() { ch = sink->GetByte(); if(unicode) ch |= sink->GetByte()<<8; linepos++; return ch; }; void ExpectWord() { if(token!='\"') Error("word expected"); }; void NewLine() { lineno++; linepos = 0; }; #define MAXWORDSIZE 10000 #define AddToWord(c) { if(p-word>=MAXWORDSIZE-1) Error("increase max word size"); *p++ = c; } wchar_t word[MAXWORDSIZE], attrib[MAXWORDSIZE]; void Lex() { for(wchar_t *p = word;;) switch(token = ch) { case '\n': NewLine(); case ' ': case '\t': case '\r': GetC(); continue; case 0: case -1: return; case '=': case '>': GetC(); return; case '<': GetC(); if(ch=='!') { GetC(); if(ch=='[') { if(GetC()!='C' || GetC()!='D' || GetC()!='A' || GetC()!='T' || GetC()!='A' || GetC()!='[') Error("CDATA block expected"); GetC(); if(ch=='\r') { GetC(); if(ch=='\n') GetC(); }; for(;;) { if(ch<=0) Error("EOF inside CDATA block"); if(ch=='\n') NewLine(); AddToWord(ch); GetC(); if(p-word>=3 && p[-1]=='>' && p[-2]==']' && p[-3]==']') break; }; p -= 3; *p++ = 0; token = '\"'; return; } else { int nest = 1; while(nest) { if(ch=='>') nest--; if(ch=='<') nest++; GetC(); }; continue; }; }; return; case '/': GetC(); if(ch=='>') { GetC(); token = '/>'; }; return; case '\"': GetC(); while(ch!='\"' && ch!='\n' && ch>=0) { AddToWord(ch); GetC(); }; GetC(); *p++ = 0; return; default: while(ch>' ' && ch!='<' && ch!='>' && ch!='/' && ch!='=') { AddToWord(ch); GetC(); }; if(p-word==0) Error("empty word"); *p++ = 0; token = '\"'; return; }; }; void ParseNode() { // Skip { for(;;) { Lex(); if(token=='\"' && word[0]=='?') break; }; Lex(); Expect('>'); Expect('<'); } else break; }; ExpectWord(); sink->StartElement(word); wchar_t elemname[256]; wcscpy(elemname,word); Lex(); for(;;) { if(token=='>') { Lex(); break; }; if(token=='/>') { Lex(); sink->EndElement(); return; }; ExpectWord(); if(word[0]=='?') // assume xml header, just skip to ?> { /* for(;;) { Lex(); if(token=='\"' && word[0]=='?') break; }; */ Lex(); Expect('>'); break; } else { wcscpy(attrib,word); Lex(); Expect('='); ExpectWord(); sink->Attribute(attrib, word); Lex(); } }; for(;;) { if(token=='\"') { sink->Data(word); Lex(); continue; }; Expect('<'); if(token!='/') { ParseNode(); continue; }; Lex(); ExpectWord(); if(wcscmp(word, elemname)) Error("closing tag and opening don't match"); sink->EndElement(); Lex(); Expect('>'); return; }; }; }; #endif //NANO_XML