diff options
Diffstat (limited to 'examples/xml.g')
-rw-r--r-- | examples/xml.g | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/examples/xml.g b/examples/xml.g new file mode 100644 index 0000000..cec2709 --- /dev/null +++ b/examples/xml.g @@ -0,0 +1,66 @@ +#!/usr/bin/python2 + +# xml.g +# +# Amit J. Patel, August 2003 +# +# Simple (non-conforming, non-validating) parsing of XML documents, +# based on Robert D. Cameron's "REX" shallow parser. It doesn't +# handle CDATA and lots of other stuff; it's meant to demonstrate +# Yapps, not replace a proper XML parser. + +%% + +parser xml: + token nodetext: r'[^<>]+' + token attrtext_singlequote: "[^']*" + token attrtext_doublequote: '[^"]*' + token SP: r'\s' + token id: r'[a-zA-Z_:][a-zA-Z0-9_:.-]*' + + rule node: + r'<!--.*?-->' {{ return ['!--comment'] }} + | r'<!\[CDATA\[.*?\]\]>' {{ return ['![CDATA['] }} + | r'<!' SP* id '[^>]*>' {{ return ['!doctype'] }} + | '<' SP* id SP* attributes SP* {{ startid = id }} + ( '>' nodes '</' SP* id SP* '>' {{ assert startid == id, 'Mismatched tags <%s> ... </%s>' % (startid, id) }} + {{ return [id, attributes] + nodes }} + | '/\s*>' {{ return [id, attributes] }} + ) + | nodetext {{ return nodetext }} + + rule nodes: {{ result = [] }} + ( node {{ result.append(node) }} + ) * {{ return result }} + + rule attribute: id SP* '=' SP* + ( '"' attrtext_doublequote '"' {{ return (id, attrtext_doublequote) }} + | "'" attrtext_singlequote "'" {{ return (id, attrtext_singlequote) }} + ) + + rule attributes: {{ result = {} }} + ( attribute SP* {{ result[attribute[0]] = attribute[1] }} + ) * {{ return result }} + +%% + +if __name__ == '__main__': + tests = ['<!-- hello -->', + 'some text', + '< bad xml', + '<br />', + '< spacey a = "foo" / >', + '<a href="foo">text ... </a>', + '<begin> middle </end>', + '<begin> <nested attr=\'baz\' another="hey"> foo </nested> <nested> bar </nested> </begin>', + ] + print + print '____Running tests_______________________________________' + for test in tests: + print + try: + parser = xml(xmlScanner(test)) + output = '%s ==> %s' % (repr(test), repr(parser.node())) + except (yappsrt.SyntaxError, AssertionError), e: + output = '%s ==> FAILED ==> %s' % (repr(test), e) + print output |