From be5a70d3aa1c30d7c86d77649b747de2838566ce Mon Sep 17 00:00:00 2001
From: sienkiew <sienkiew@d34015c8-bcbb-4646-8ac8-8ba5febf221d>
Date: Thu, 21 Jul 2011 15:17:58 +0000
Subject: initial import of yapps from debian sources

git-svn-id: http://svn.stsci.edu/svn/ssb/etal/exyapps/trunk@356 d34015c8-bcbb-4646-8ac8-8ba5febf221d
---
 LICENSE              |   20 +
 changelog            |  983 +++++++++++++++++++++++++++++++++++++++
 doc/yapps2.haux      |   31 ++
 doc/yapps2.html      | 1206 ++++++++++++++++++++++++++++++++++++++++++++++++
 doc/yapps2.htoc      |   36 ++
 doc/yapps2.tex       | 1246 ++++++++++++++++++++++++++++++++++++++++++++++++++
 examples/calc.g      |   64 +++
 examples/expr.g      |   21 +
 examples/lisp.g      |   13 +
 examples/notes       |   44 ++
 examples/xml.g       |   66 +++
 setup.py             |   42 ++
 test/empty_clauses.g |   10 +
 test/line_numbers.g  |   10 +
 test/option.g        |   17 +
 yapps/__init__.py    |    1 +
 yapps/grammar.py     |  211 +++++++++
 yapps/parsetree.py   |  673 +++++++++++++++++++++++++++
 yapps/runtime.py     |  442 ++++++++++++++++++
 yapps2.py            |  113 +++++
 yapps_grammar.g      |  121 +++++
 21 files changed, 5370 insertions(+)
 create mode 100644 LICENSE
 create mode 100644 changelog
 create mode 100644 doc/yapps2.haux
 create mode 100644 doc/yapps2.html
 create mode 100644 doc/yapps2.htoc
 create mode 100644 doc/yapps2.tex
 create mode 100644 examples/calc.g
 create mode 100644 examples/expr.g
 create mode 100644 examples/lisp.g
 create mode 100644 examples/notes
 create mode 100644 examples/xml.g
 create mode 100644 setup.py
 create mode 100644 test/empty_clauses.g
 create mode 100644 test/line_numbers.g
 create mode 100644 test/option.g
 create mode 100644 yapps/__init__.py
 create mode 100644 yapps/grammar.py
 create mode 100644 yapps/parsetree.py
 create mode 100644 yapps/runtime.py
 create mode 100755 yapps2.py
 create mode 100644 yapps_grammar.g

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..87fd179
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,20 @@
+<http://www.opensource.org/licenses/mit-license.php>
+
+Permission is hereby granted, free of charge, to any person obtaining 
+a copy of this software and associated documentation files (the 
+"Software"), to deal in the Software without restriction, including 
+without limitation the rights to use, copy, modify, merge, publish, 
+distribute, sublicense, and/or sell copies of the Software, and to 
+permit persons to whom the Software is furnished to do so, subject to 
+the following conditions: 
+  
+The above copyright notice and this permission notice shall be included 
+in all copies or substantial portions of the Software. 
+  
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
diff --git a/changelog b/changelog
new file mode 100644
index 0000000..4fdf12e
--- /dev/null
+++ b/changelog
@@ -0,0 +1,983 @@
+ChangeSet
+  1.38 05/01/22 19:36:32 smurf@smurf.noris.de +2 -0
+  Add option to limit backtrace depth on syntax errors.
+
+  yapps/runtime.py
+    1.15 05/01/22 19:36:31 smurf@smurf.noris.de +5 -1
+    Add option to limit backtrace depth on syntax errors.
+
+  debian/changelog
+    1.20 05/01/22 19:36:31 smurf@smurf.noris.de +2 -1
+    Add option to limit backtrace depth on syntax errors.
+
+ChangeSet
+  1.37 05/01/22 03:39:56 smurf@smurf.noris.de +2 -0
+  Fix recursive includes.
+
+  yapps/runtime.py
+    1.14 05/01/22 03:39:54 smurf@smurf.noris.de +395 -381
+    Fix recursive includes.
+
+  debian/changelog
+    1.19 05/01/22 03:39:54 smurf@smurf.noris.de +6 -0
+    Fix recursive includes.
+
+ChangeSet
+  1.36 04/12/23 23:49:52 smurf@smurf.noris.de +1 -0
+  Brown paper bag -- fix Python 2.4 stuff.
+
+  debian/changelog
+    1.18 04/12/23 23:49:52 smurf@smurf.noris.de +6 -0
+    Brown paper bag -- fix Python 2.4 stuff.
+
+ChangeSet
+  1.35 04/12/23 21:00:34 smurf@smurf.noris.de +1 -0
+  typo
+
+  debian/control
+    1.10 04/12/23 21:00:33 smurf@smurf.noris.de +1 -1
+    typo
+
+ChangeSet
+  1.34 04/12/12 20:22:54 smurf@smurf.noris.de +2 -0
+  Add support for Python 2.4
+
+  debian/control
+    1.9 04/12/12 20:22:52 smurf@smurf.noris.de +1 -1
+    Add support for Python 2.4
+
+  debian/changelog
+    1.17 04/12/12 20:22:52 smurf@smurf.noris.de +6 -0
+    Add support for Python 2.4
+
+ChangeSet
+  1.33 04/09/23 11:24:16 smurf@smurf.noris.de +3 -0
+  update documentation:
+  - toss hyphens
+  - document extensions
+
+  doc/yapps2.tex
+    1.3 04/09/23 11:24:16 smurf@smurf.noris.de +21 -0
+    add a Debian Extensions section
+
+  debian/yapps.1
+    1.2 04/09/23 11:24:16 smurf@smurf.noris.de +14 -9
+    escape more hyphens (i.e., all the rest)
+
+  debian/changelog
+    1.16 04/09/23 11:24:16 smurf@smurf.noris.de +2 -0
+    update documentation:
+    - toss hyphens
+    - document extensions
+
+ChangeSet
+  1.32 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0
+  turn off triggers
+
+  BitKeeper/triggers/pre-commit.upversion
+    1.2 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0
+    off
+
+  BitKeeper/triggers/post-commit.changelog
+    1.2 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0
+    off
+
+ChangeSet
+  1.31 04/09/23 10:55:24 smurf@smurf.noris.de +1 -0
+  ignore new package's files
+
+  BitKeeper/etc/ignore
+    1.17 04/09/23 10:55:23 smurf@smurf.noris.de +1 -0
+    added debian/yapps2-runtime/*
+
+  debian/yapps2-runtime.README
+    1.1 04/09/23 10:50:33 smurf@smurf.noris.de +11 -0
+
+ChangeSet
+  1.30 04/09/23 10:50:33 smurf@smurf.noris.de +8 -0
+  split off runtime to its own package
+  document the fact that I can't use the original runtime
+
+  debian/yapps2-runtime.dirs
+    1.6 04/09/23 10:50:33 smurf@smurf.noris.de +2 -4
+    split off runtime
+
+  debian/yapps2-runtime.README
+    1.0 04/09/23 10:50:33 smurf@smurf.noris.de +0 -0
+    BitKeeper file /daten/src/debian/python_yapps/debian/yapps2-runtime.README
+
+  debian/rules
+    1.5 04/09/23 10:50:33 smurf@smurf.noris.de +4 -1
+    move runtime files to their own package
+
+  debian/control
+    1.8 04/09/23 10:50:33 smurf@smurf.noris.de +14 -1
+    split off runtime to its own package
+
+  debian/changelog
+    1.15 04/09/23 10:50:33 smurf@smurf.noris.de +9 -0
+    document package split
+
+  debian/README
+    1.2 04/09/23 10:50:33 smurf@smurf.noris.de +21 -4
+    Updated for package split
+
+  debian/yapps2.docs
+    1.3 04/09/23 10:31:15 smurf@smurf.noris.de +0 -0
+    Rename: debian/docs -> debian/yapps2.docs
+
+  debian/yapps2-runtime.dirs
+    1.5 04/09/23 10:30:48 smurf@smurf.noris.de +0 -0
+    bk cp yapps2.dirs yapps2-runtime.dirs
+
+  debian/yapps2.dirs
+    1.4 04/09/23 10:30:42 smurf@smurf.noris.de +0 -0
+    Rename: debian/dirs -> debian/yapps2.dirs
+
+  debian/yapps2.dirs
+    1.4 04/09/23 10:30:42 smurf@smurf.noris.de +0 -0
+    Rename: debian/dirs -> debian/yapps2.dirs
+
+ChangeSet
+  1.29 04/07/19 09:30:22 smurf@smurf.noris.de +5 -0
+  latex2html => hevea
+
+  debian/yapps2.doc-base
+    1.2 04/07/19 09:30:21 smurf@smurf.noris.de +2 -2
+    latex2html => hevea
+
+  debian/rules
+    1.4 04/07/19 09:30:21 smurf@smurf.noris.de +4 -2
+    latex2html => hevea
+
+  debian/control
+    1.7 04/07/19 09:30:21 smurf@smurf.noris.de +1 -1
+    latex2html => hevea
+
+  debian/changelog
+    1.14 04/07/19 09:30:21 smurf@smurf.noris.de +6 -0
+    latex2html => hevea
+
+  BitKeeper/etc/ignore
+    1.16 04/07/19 09:30:06 smurf@smurf.noris.de +1 -0
+    added doc/yapps2.haux
+
+  BitKeeper/etc/ignore
+    1.15 04/07/19 09:29:55 smurf@smurf.noris.de +1 -0
+    added doc/yapps2.ht*
+
+ChangeSet
+  1.28 04/07/12 09:35:59 smurf@smurf.noris.de +2 -0
+  Build-Depend on python.
+
+  debian/control
+    1.6 04/07/12 09:35:58 smurf@smurf.noris.de +1 -1
+    Build-Depend on python.
+
+  debian/changelog
+    1.13 04/07/12 09:35:58 smurf@smurf.noris.de +6 -0
+    doc
+
+ChangeSet
+  1.27 04/05/16 22:02:40 smurf@smurf.noris.de +2 -0
+  ship "empty" file
+
+  yapps/__init__.py
+    1.2 04/05/16 22:02:39 smurf@smurf.noris.de +1 -0
+    ship "empty" file
+
+  debian/changelog
+    1.12 04/05/16 22:02:39 smurf@smurf.noris.de +2 -1
+    doc
+
+ChangeSet
+  1.26 04/05/16 22:01:42 smurf@smurf.noris.de +2 -0
+  Typo (made large file handling slow)
+
+  yapps/runtime.py
+    1.13 04/05/16 22:01:42 smurf@smurf.noris.de +1 -1
+    Typo
+
+  debian/changelog
+    1.11 04/05/16 22:01:42 smurf@smurf.noris.de +6 -0
+    Version 2.1.1-11
+
+ChangeSet
+  1.25 04/05/14 12:25:51 smurf@smurf.noris.de +1 -0
+  exporter: test was in wrong dir
+
+  debian/exporter
+    1.3 04/05/14 12:25:51 smurf@smurf.noris.de +1 -0
+    wrong dir
+
+ChangeSet
+  1.24 04/05/14 12:20:04 smurf@smurf.noris.de +1 -0
+  Clean up external source before generating a diff
+
+  debian/exporter
+    1.2 04/05/14 12:20:04 smurf@smurf.noris.de +7 -0
+    Clean up external source before generating a diff
+
+ChangeSet
+  1.23 04/05/14 12:14:34 smurf@linux.smurf.noris.de +13 -0
+  Documentation update:
+   build and install HTML documentation from LaTex source
+
+  debian/changelog
+    1.10 04/05/14 12:14:33 smurf@linux.smurf.noris.de +7 -0
+    Version 2.1.1-10
+
+  debian/yapps2.doc-base
+    1.1 04/05/14 12:14:32 smurf@smurf.noris.de +13 -0
+
+  yapps_grammar.g
+    1.5 04/05/14 12:14:31 smurf@smurf.noris.de +1 -0
+    add my copyright notice
+
+  yapps/runtime.py
+    1.12 04/05/14 12:14:31 smurf@smurf.noris.de +1 -0
+    add my copyright notice
+
+  debian/yapps2.doc-base
+    1.0 04/05/14 12:14:31 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/yapps2.doc-base
+
+  debian/rules
+    1.3 04/05/14 12:14:31 smurf@smurf.noris.de +5 -5
+    gernerate and install html documentation
+    don't install LICENSE file
+
+  debian/yapps2-runtime.dirs
+    1.3 04/05/14 12:14:30 smurf@smurf.noris.de +1 -2
+    drop overrides
+    add doc dir +html
+
+  debian/docs
+    1.2 04/05/14 12:14:30 smurf@smurf.noris.de +1 -1
+    install latex documentation
+
+  debian/dirs
+    1.3 04/05/14 12:14:30 smurf@smurf.noris.de +1 -2
+    drop overrides
+    add doc dir +html
+
+  debian/copyright
+    1.3 04/05/14 12:14:30 smurf@smurf.noris.de +21 -3
+    include license here instead of installing a LICENSE file
+
+  debian/control
+    1.5 04/05/14 12:14:30 smurf@smurf.noris.de +6 -6
+    Dep on latex2html
+    indent list
+
+  BitKeeper/etc/ignore
+    1.14 04/05/14 12:06:12 smurf@smurf.noris.de +1 -0
+    added doc/yapps2/*
+
+  BitKeeper/etc/ignore
+    1.13 04/05/14 12:06:07 smurf@smurf.noris.de +3 -0
+    added debian/yapps2/* debian/*.substvars debian/*.debhelper
+
+  BitKeeper/deleted/.del-overrides.lintian~19711613dc4ce90f
+    1.3 04/05/14 11:51:33 smurf@smurf.noris.de +0 -0
+    Delete: debian/overrides.lintian
+
+  BitKeeper/deleted/.del-overrides.linda~b0c6fa08da170a16
+    1.2 04/05/14 11:51:33 smurf@smurf.noris.de +0 -0
+    Delete: debian/overrides.linda
+
+  doc/yapps2.tex
+    1.2 04/05/14 11:34:34 smurf@smurf.noris.de +0 -0
+    Rename: yapps2.tex -> doc/yapps2.tex
+
+ChangeSet
+  1.22 04/05/14 11:33:27 smurf@smurf.noris.de +1 -0
+  Merge bk://server/public/python_yapps
+  into smurf.noris.de:/usr/local/src/misc/yapps
+
+  BitKeeper/deleted/.del-logging_ok~530b65bc14e5cc7c
+    1.2 04/05/14 11:33:26 smurf@smurf.noris.de +0 -0
+    'Auto converge rename'
+
+  BitKeeper/etc/logging_ok
+    1.1 04/05/14 11:33:13 smurf@smurf.noris.de +1 -0
+
+ChangeSet
+  1.4.1.1 04/05/14 11:33:13 smurf@linux.smurf.noris.de +2 -0
+  Added tex documentation from yapps-2.0.4.
+
+  BitKeeper/etc/logging_ok
+    1.0 04/05/14 11:33:13 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/BitKeeper/etc/logging_ok
+
+  yapps2.tex
+    1.1 04/05/14 11:33:10 smurf@smurf.noris.de +1225 -0
+
+  yapps2.tex
+    1.0 04/05/14 11:33:10 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/yapps2.tex
+
+ChangeSet
+  1.21 04/05/14 11:31:18 smurf@linux.smurf.noris.de +7 -0
+  Renamed the package to "yapps2".
+
+  setup.py
+    1.2 04/05/14 11:31:17 smurf@smurf.noris.de +17 -2
+    Fixed name
+    Updated long description
+
+  debian/yapps2-runtime.dirs
+    1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1
+    rename: python-yapps => yapps2
+
+  debian/rules
+    1.2 04/05/14 11:31:17 smurf@smurf.noris.de +8 -8
+    rename: python-yapps => yapps2
+
+  debian/overrides.lintian
+    1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1
+    rename: python-yapps => yapps2
+
+  debian/dirs
+    1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1
+    rename: python-yapps => yapps2
+
+  debian/copyright
+    1.2 04/05/14 11:31:16 smurf@smurf.noris.de +11 -3
+    Added pointer to original source
+
+  debian/control
+    1.4 04/05/14 11:31:16 smurf@smurf.noris.de +18 -10
+    rename: python-yapps => yapps2
+
+  debian/changelog
+    1.9 04/05/14 11:31:16 smurf@smurf.noris.de +13 -13
+    Cleanup
+
+ChangeSet
+  1.20 03/12/31 14:00:42 smurf@linux.smurf.noris.de +2 -0
+  require python-dev because of distutils
+
+  debian/changelog
+    1.8 03/12/31 14:00:42 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-8
+
+  debian/control
+    1.3 03/12/31 14:00:40 smurf@smurf.noris.de +1 -1
+    require python-dev because of distutils
+
+ChangeSet
+  1.19 03/12/31 13:57:38 smurf@linux.smurf.noris.de +2 -0
+  Change yapps.py t exit 1 on failure to parse
+
+  debian/changelog
+    1.7 03/12/31 13:57:38 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-7
+
+  yapps2.py
+    1.6 03/12/31 13:57:37 smurf@smurf.noris.de +3 -2
+    exit 1 on error
+
+ChangeSet
+  1.18 03/12/30 15:36:56 smurf@linux.smurf.noris.de +2 -0
+  Update to 3.6.1, use build-depends-indep.
+
+  debian/changelog
+    1.6 03/12/30 15:36:56 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-6
+
+  debian/control
+    1.2 03/12/30 15:36:55 smurf@smurf.noris.de +2 -2
+    Update to 3.6.1, use build-depends-indep.
+
+ChangeSet
+  1.17 03/12/30 15:33:19 smurf@linux.smurf.noris.de +2 -0
+  Add some notes.
+
+  debian/changelog
+    1.5 03/12/30 15:33:19 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-5
+
+  examples/notes
+    1.1 03/12/30 15:33:18 smurf@smurf.noris.de +44 -0
+
+  examples/notes
+    1.0 03/12/30 15:33:17 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/examples/notes
+
+ChangeSet
+  1.16 03/12/30 15:30:05 smurf@linux.smurf.noris.de +2 -0
+  Correctly report syntax errors without line number
+
+  debian/changelog
+    1.4 03/12/30 15:30:05 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-4
+
+  yapps/runtime.py
+    1.11 03/12/30 15:30:04 smurf@smurf.noris.de +6 -2
+    Report syntax errors with no line number
+
+ChangeSet
+  1.15 03/12/30 14:02:37 smurf@linux.smurf.noris.de +2 -0
+  Repair ignored-pattern upcall.
+
+  debian/changelog
+    1.3 03/12/30 14:02:37 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-3
+
+  yapps/runtime.py
+    1.10 03/12/30 14:02:36 smurf@smurf.noris.de +4 -2
+    Repair ignore upcall.
+
+ChangeSet
+  1.14 03/12/30 13:30:14 smurf@linux.smurf.noris.de +2 -0
+  runtime: fix error reporting
+
+  debian/changelog
+    1.2 03/12/30 13:30:14 smurf@linux.smurf.noris.de +6 -0
+    Version 2.1.1-2
+
+  yapps/runtime.py
+    1.9 03/12/30 13:30:12 smurf@smurf.noris.de +9 -9
+    Fix error reporting
+
+ChangeSet
+  1.13 03/12/30 12:25:29 smurf@linux.smurf.noris.de +2 -0
+  replace runtime grammar
+  yapps_grammar.g: delete shebang line, fix imports
+
+  yapps_grammar.g
+    1.4 03/12/30 12:25:28 smurf@smurf.noris.de +1 -3
+    fix import
+    delete shebang line
+
+  yapps/grammar.py
+    1.11 03/12/30 12:25:28 smurf@smurf.noris.de +49 -66
+    replace runtime grammar
+
+ChangeSet
+  1.12 03/12/30 11:51:26 smurf@linux.smurf.noris.de +19 -0
+  D
+
+  setup.py
+    1.1 03/12/30 11:51:25 smurf@smurf.noris.de +27 -0
+
+  debian/yapps.1
+    1.1 03/12/30 11:51:25 smurf@smurf.noris.de +58 -0
+
+  debian/rules
+    1.1 03/12/30 11:51:25 smurf@smurf.noris.de +91 -0
+
+  debian/overrides.lintian
+    1.1 03/12/30 11:51:25 smurf@smurf.noris.de +1 -0
+
+  setup.py
+    1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/setup.py
+
+  debian/yapps2-runtime.dirs
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +5 -0
+
+  debian/yapps.1
+    1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/yapps.1
+
+  debian/rules
+    1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/rules
+
+  debian/overrides.lintian
+    1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/overrides.lintian
+
+  debian/overrides.linda
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +3 -0
+
+  debian/exporter
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +10 -0
+
+  debian/docs
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +3 -0
+
+  debian/dirs
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +5 -0
+
+  debian/copyright
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +15 -0
+
+  debian/control
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +19 -0
+
+  debian/compat
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +1 -0
+
+  debian/README
+    1.1 03/12/30 11:51:24 smurf@smurf.noris.de +6 -0
+
+  yapps_grammar.g
+    1.3 03/12/30 11:51:24 smurf@smurf.noris.de +0 -1
+    Make the scanner context-sensitive. Works better.
+
+  yapps2.py
+    1.5 03/12/30 11:51:24 smurf@smurf.noris.de +1 -1
+    Fix path
+
+  yapps/runtime.py
+    1.8 03/12/30 11:51:24 smurf@smurf.noris.de +0 -1
+    Regularize header
+
+  yapps/parsetree.py
+    1.7 03/12/30 11:51:24 smurf@smurf.noris.de +0 -2
+    Drop shebang line, this is not a program.
+
+  yapps/grammar.py
+    1.10 03/12/30 11:51:24 smurf@smurf.noris.de +0 -2
+    Drop shebang line, this is not a program.
+
+  debian/yapps2-runtime.dirs
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/dirs
+
+  debian/overrides.linda
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/overrides.linda
+
+  debian/exporter
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/exporter
+
+  debian/docs
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/docs
+
+  debian/dirs
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/dirs
+
+  debian/copyright
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/copyright
+
+  debian/control
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/control
+
+  debian/compat
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/compat
+
+  debian/README
+    1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/README
+
+  debian/changelog
+    1.1 03/12/30 11:41:14 smurf@smurf.noris.de +15 -0
+
+  debian/changelog
+    1.0 03/12/30 11:41:14 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/debian/changelog
+
+  BitKeeper/etc/ignore
+    1.12 03/12/30 11:40:56 smurf@smurf.noris.de +1 -0
+    added changelog
+
+ChangeSet
+  1.11 03/12/30 11:23:09 smurf@linux.smurf.noris.de +5 -0
+  Rewrote imports et al. to create+use a real "yapps" module
+
+  yapps/__init__.py
+    1.1 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0
+
+  yapps2.py
+    1.4 03/12/30 11:23:08 smurf@smurf.noris.de +3 -3
+    Refactor to use reasonable "yapps" module
+
+  yapps/runtime.py
+    1.7 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0
+    Rename: yappsrt.py -> yapps/runtime.py
+
+  yapps/parsetree.py
+    1.6 03/12/30 11:23:08 smurf@smurf.noris.de +10 -10
+    Refactor to use reasonable "yapps" module
+    Rename: parsetree.py -> yapps/parsetree.py
+
+  yapps/__init__.py
+    1.0 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/yapps/__init__.py
+
+  yapps/grammar.py
+    1.9 03/12/30 11:23:07 smurf@smurf.noris.de +16 -16
+    Refactor to use reasonable "yapps" module
+    Rename: grammar.py -> yapps/grammar.py
+
+  BitKeeper/etc/ignore
+    1.11 03/12/30 11:22:15 smurf@smurf.noris.de +4 -0
+    added build/* debian/python-yapps/* debian/*.debhelper debian/*.substvars
+
+ChangeSet
+  1.10 03/12/29 22:10:59 smurf@linux.smurf.noris.de +1 -0
+  Added context-insensitive-scanner end test
+  to a couple of composite grammar statements.
+      
+  It's probably overkill to do that with all statements..?
+
+  parsetree.py
+    1.5 03/12/29 22:10:58 smurf@smurf.noris.de +19 -5
+    Added context-insensitive-scanner end test
+    to a couple of composite statements.
+    
+    It's probably overkill to do that with all statements..?
+
+ChangeSet
+  1.9 03/12/29 22:05:00 smurf@linux.smurf.noris.de +1 -0
+  yappsrt.py:    Bugfix: stored token type
+
+  yappsrt.py
+    1.6 03/12/29 22:04:59 smurf@smurf.noris.de +1 -1
+    Bugfix: stored token type
+
+ChangeSet
+  1.8 03/12/29 21:42:41 smurf@linux.smurf.noris.de +4 -0
+  Pass init arguments to the scanner
+  Simplify stuff a bit
+
+  yappsrt.py
+    1.5 03/12/29 21:42:40 smurf@smurf.noris.de +16 -8
+    Fix line counting
+    simplify pattern length determination
+
+  yapps2.py
+    1.3 03/12/29 21:42:40 smurf@smurf.noris.de +3 -2
+    Pass filename to scanner
+
+  parsetree.py
+    1.4 03/12/29 21:42:40 smurf@smurf.noris.de +2 -2
+    Pass init arguments to the scanner
+
+  grammar.py
+    1.8 03/12/29 21:42:40 smurf@smurf.noris.de +3 -2
+    Update for changed yapps2.py
+
+ChangeSet
+  1.7 03/12/29 20:37:55 smurf@linux.smurf.noris.de +6 -0
+  Cleanup ignored-symbol commands
+  Fix including and error reporting
+
+  yappsrt.py
+    1.4 03/12/29 20:37:54 smurf@smurf.noris.de +88 -52
+    cleanup ignored symbol handling
+    refactor _scan and _peek: move to Scanner
+    generate pseudo filenames for inline documents
+    accept context for error handling
+
+  yapps_grammar.g
+    1.2 03/12/29 20:37:54 smurf@smurf.noris.de +4 -1
+    Cleanup statements for ignored symbols
+
+  yapps2.py
+    1.2 03/12/29 20:37:54 smurf@smurf.noris.de +1 -1
+    Setup line numbers correctly
+
+  parsetree.py
+    1.3 03/12/29 20:37:54 smurf@smurf.noris.de +22 -6
+    Ignored-symbol handling extended
+    Pass context to scanners
+
+  grammar.py
+    1.7 03/12/29 20:37:54 smurf@smurf.noris.de +1 -1
+    Use a hash for ignored stuff
+
+  examples/calc.g
+    1.3 03/12/29 20:37:54 smurf@smurf.noris.de +7 -4
+    Cleanup include handling: use an ignored token
+
+ChangeSet
+  1.6 03/12/29 18:16:19 smurf@linux.smurf.noris.de +6 -0
+  Reproduce current grammar file
+  
+  One problem with attribute remains.
+
+  yapps_grammar.g
+    1.1 03/12/29 18:16:18 smurf@smurf.noris.de +120 -0
+
+  yappsrt.py
+    1.3 03/12/29 18:16:18 smurf@smurf.noris.de +5 -5
+    charpos => pos
+
+  yapps_grammar.g
+    1.0 03/12/29 18:16:18 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/yapps_grammar.g
+
+  BitKeeper/etc/ignore
+    1.10 03/12/29 18:15:23 smurf@smurf.noris.de +1 -0
+    added *.swp
+
+  BitKeeper/etc/ignore
+    1.9 03/12/29 18:15:02 smurf@smurf.noris.de +1 -0
+    added yapps_grammar.py
+
+  BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250
+    1.4 03/12/29 18:14:35 smurf@smurf.noris.de +0 -0
+    Delete: yapps_grammar.py
+
+  grammar.py
+    1.6 03/12/29 18:14:17 smurf@smurf.noris.de +0 -0
+    Rename: BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 -> grammar.py
+
+  BitKeeper/deleted/.del-grammar.py~46b22024b3b85127
+    1.5 03/12/29 18:13:11 smurf@smurf.noris.de +0 -0
+    Delete: grammar.py
+
+  yapps_grammar.py
+    1.3 03/12/29 18:13:04 smurf@smurf.noris.de +0 -0
+    Rename: BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250 -> yapps_grammar.py
+
+  grammar.py
+    1.4 03/12/29 18:12:51 smurf@smurf.noris.de +0 -0
+    Rename: BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 -> grammar.py
+
+  BitKeeper/deleted/.del-grammar.py~46b22024b3b85127
+    1.3 03/12/29 18:12:30 smurf@smurf.noris.de +19 -20
+    Delete: grammar.py
+
+  BitKeeper/etc/ignore
+    1.8 03/12/29 17:15:10 smurf@smurf.noris.de +3 -0
+    added *-stamp debian/files debian/tmp/*
+
+  BitKeeper/etc/ignore
+    1.7 03/12/29 17:15:08 smurf@smurf.noris.de +3 -0
+    added *-stamp debian/files debian/tmp/*
+
+  BitKeeper/etc/ignore
+    1.6 03/12/29 17:14:00 smurf@smurf.noris.de +3 -0
+    added *-stamp debian/files debian/tmp/*
+
+  BitKeeper/triggers/post-commit.changelog
+    1.1 03/12/29 17:13:58 smurf@smurf.noris.de +3 -0
+
+  BitKeeper/triggers/post-commit.changelog
+    1.0 03/12/29 17:13:58 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/BitKeeper/triggers/post-commit.changelog
+
+  BitKeeper/etc/logging_ok
+    1.1 03/12/29 17:13:41 smurf@smurf.noris.de +1 -0
+
+ChangeSet
+  1.5 03/12/29 17:13:41 smurf@linux.smurf.noris.de +7 -0
+  Major enhancements:
+      - Use a Token object
+      - Allow incremental reading from a file
+      - Allow stacking of inputs (#include, whatever)
+      - Remember line numbers
+      - Refactor print_line_with_error into the Scanner object
+
+  BitKeeper/etc/logging_ok
+    1.0 03/12/29 17:13:41 smurf@smurf.noris.de +0 -0
+    BitKeeper file /usr/local/src/misc/yapps/BitKeeper/etc/logging_ok
+
+  yappsrt.py
+    1.2 03/12/29 17:13:38 smurf@smurf.noris.de +219 -141
+    Major enhancements:
+    - Use a Token object
+    - Allow incremental reading from a file
+    - Allow stacking of inputs (#include, whatever)
+    - Remember line numbers
+    - Refactor print_line_with_error into the Scanner object
+
+  parsetree.py
+    1.2 03/12/29 17:13:38 smurf@smurf.noris.de +2 -2
+    don't pass pos explicitly 
+
+  grammar.py
+    1.2 03/12/29 17:13:38 smurf@smurf.noris.de +15 -19
+    Cleanup
+    
+    generated from non-available file!
+
+  examples/calc.g
+    1.2 03/12/29 17:13:38 smurf@smurf.noris.de +5 -2
+    cleanup (strip, atoi)
+    
+    allow reading in of expressions via stacking (TEST)
+
+  BitKeeper/etc/ignore
+    1.5 03/12/29 17:10:56 smurf@smurf.noris.de +1 -0
+    added *.pyc
+
+  BitKeeper/etc/ignore
+    1.4 03/12/29 17:10:51 smurf@smurf.noris.de +1 -0
+    added test/*.py
+
+  BitKeeper/etc/ignore
+    1.3 03/12/29 17:10:46 smurf@smurf.noris.de +1 -0
+    added examples/*.py
+
+  BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250
+    1.2 03/12/29 13:58:49 smurf@smurf.noris.de +0 -0
+    Delete: yapps_grammar.py
+
+ChangeSet
+  1.4 03/08/28 00:22:57 ?@smurf.noris.de +15 -0
+  Version 2.1.1
+
+ChangeSet
+  1.3 03/08/28 00:22:57 ?@smurf.noris.de +1 -0
+  CVS-Ignore
+
+ChangeSet
+  1.2 03/12/29 12:56:52 smurf@smurf.noris.de +1 -0
+  Versionsnummern-Updateskript
+
+  BitKeeper/etc/ignore
+    1.2 03/08/28 00:22:57 ?@smurf.noris.de +3 -0
+    added CVS .cvsignore CVSROOT
+
+ChangeSet
+  1.1 03/12/29 12:56:52 smurf@smurf.noris.de +2 -0
+  Initial repository create
+
+  BitKeeper/triggers/pre-commit.upversion
+    1.1 03/12/29 12:56:52 smurf@smurf.noris.de +3 -0
+
+  BitKeeper/etc/ignore
+    1.1 03/12/29 12:56:52 smurf@smurf.noris.de +2 -0
+
+  BitKeeper/etc/config
+    1.1 03/12/29 12:56:52 smurf@smurf.noris.de +13 -0
+
+ChangeSet
+  1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0
+  BitKeeper file /tmp/b.s.20059/ChangeSet
+
+  BitKeeper/triggers/pre-commit.upversion
+    1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0
+    BitKeeper file /tmp/b.s.20059/BitKeeper/triggers/pre-commit.upversion
+
+  BitKeeper/etc/ignore
+    1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0
+    BitKeeper file /tmp/b.s.20059/BitKeeper/etc/ignore
+
+  BitKeeper/etc/config
+    1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0
+    BitKeeper file /tmp/b.s.20059/BitKeeper/etc/config
+
+  yappsrt.py
+    1.1 03/08/27 23:12:19 ?@smurf.noris.de +296 -0
+    New:Version 2.1.1
+
+  yapps_grammar.py
+    1.1 03/08/28 00:22:32 ?@smurf.noris.de +234 -0
+    New:Version 2.1.1
+
+  yapps2.py
+    1.1 03/08/12 20:25:55 ?@smurf.noris.de +111 -0
+    New:Version 2.1.1
+
+  test/option.g
+    1.1 03/08/11 20:43:22 ?@smurf.noris.de +17 -0
+    New:Version 2.1.1
+
+  test/line_numbers.g
+    1.1 03/08/28 00:22:56 ?@smurf.noris.de +10 -0
+    New:Version 2.1.1
+
+  test/empty_clauses.g
+    1.1 03/08/27 23:48:11 ?@smurf.noris.de +10 -0
+    New:Version 2.1.1
+
+  parsetree.py
+    1.1 03/08/28 00:18:14 ?@smurf.noris.de +645 -0
+    New:Version 2.1.1
+
+  grammar.py
+    1.1 03/08/28 00:16:28 ?@smurf.noris.de +234 -0
+    New:Version 2.1.1
+
+  examples/xml.g
+    1.1 03/08/27 20:53:39 ?@smurf.noris.de +66 -0
+    New:Version 2.1.1
+
+  examples/lisp.g
+    1.1 03/08/11 20:18:18 ?@smurf.noris.de +13 -0
+    New:Version 2.1.1
+
+  examples/expr.g
+    1.1 03/08/08 06:47:58 ?@smurf.noris.de +21 -0
+    New:Version 2.1.1
+
+  examples/calc.g
+    1.1 03/08/11 20:17:09 ?@smurf.noris.de +58 -0
+    New:Version 2.1.1
+
+  NOTES
+    1.1 03/08/12 18:59:41 ?@smurf.noris.de +78 -0
+    New:Version 2.1.1
+
+  LICENSE
+    1.1 03/08/11 19:41:27 ?@smurf.noris.de +20 -0
+    New:Version 2.1.1
+
+  ChangeLog
+    1.1 03/08/28 00:22:16 ?@smurf.noris.de +108 -0
+    New:Version 2.1.1
+
+  yappsrt.py
+    1.0 03/08/27 23:12:19 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yappsrt.py
+
+  yapps_grammar.py
+    1.0 03/08/28 00:22:32 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yapps_grammar.py
+
+  yapps2.py
+    1.0 03/08/12 20:25:55 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yapps2.py
+
+  test/option.g
+    1.0 03/08/11 20:43:22 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/option.g
+
+  test/line_numbers.g
+    1.0 03/08/28 00:22:56 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/line_numbers.g
+
+  test/empty_clauses.g
+    1.0 03/08/27 23:48:11 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/empty_clauses.g
+
+  parsetree.py
+    1.0 03/08/28 00:18:14 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/parsetree.py
+
+  grammar.py
+    1.0 03/08/28 00:16:28 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/grammar.py
+
+  examples/xml.g
+    1.0 03/08/27 20:53:39 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/xml.g
+
+  examples/lisp.g
+    1.0 03/08/11 20:18:18 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/lisp.g
+
+  examples/expr.g
+    1.0 03/08/08 06:47:58 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/expr.g
+
+  examples/calc.g
+    1.0 03/08/11 20:17:09 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/calc.g
+
+  NOTES
+    1.0 03/08/12 18:59:41 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/NOTES
+
+  LICENSE
+    1.0 03/08/11 19:41:27 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/LICENSE
+
+  ChangeLog
+    1.0 03/08/28 00:22:16 ?@smurf.noris.de +0 -0
+    BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/ChangeLog
+
diff --git a/doc/yapps2.haux b/doc/yapps2.haux
new file mode 100644
index 0000000..ad51bcd
--- /dev/null
+++ b/doc/yapps2.haux
@@ -0,0 +1,31 @@
+\@addtocsec{htoc}{1}{0}{\@print{1}\quad{}Introduction}
+\@addtocsec{htoc}{2}{0}{\@print{2}\quad{}Examples}
+\@addtocsec{htoc}{3}{1}{\@print{2.1}\quad{}Introduction to Grammars}
+\@addtocsec{htoc}{4}{1}{\@print{2.2}\quad{}Lisp Expressions}
+\@addtocsec{htoc}{5}{1}{\@print{2.3}\quad{}Calculator}
+\@addtocsec{htoc}{6}{1}{\@print{2.4}\quad{}Calculator with Memory}
+\@addtocsec{htoc}{7}{0}{\@print{3}\quad{}Grammars}
+\@addtocsec{htoc}{8}{1}{\@print{3.1}\quad{}Left Factoring}
+\newlabel{sec:Left-Factoring}{{3.1}{X}}
+\@addtocsec{htoc}{9}{1}{\@print{3.2}\quad{}Left Recursion}
+\@addtocsec{htoc}{10}{1}{\@print{3.3}\quad{}Ambiguous Grammars}
+\newlabel{sec:Ambiguous-Grammars}{{3.3}{X}}
+\@addtocsec{htoc}{11}{0}{\@print{4}\quad{}Customization}
+\@addtocsec{htoc}{12}{1}{\@print{4.1}\quad{}Customizing Parsers}
+\@addtocsec{htoc}{13}{1}{\@print{4.2}\quad{}Customizing Scanners}
+\@addtocsec{htoc}{14}{0}{\@print{5}\quad{}Parser Mechanics}
+\@addtocsec{htoc}{15}{1}{\@print{5.1}\quad{}Parser Objects}
+\newlabel{sec:Parser-Objects}{{5.1}{X}}
+\@addtocsec{htoc}{16}{1}{\@print{5.2}\quad{}Context Sensitive Scanner}
+\@addtocsec{htoc}{17}{1}{\@print{5.3}\quad{}Internal Variables}
+\@addtocsec{htoc}{18}{1}{\@print{5.4}\quad{}Pre- and Post-Parser Code}
+\@addtocsec{htoc}{19}{1}{\@print{5.5}\quad{}Representation of Grammars}
+\@addtocsec{htoc}{20}{0}{\@print{A}\quad{}Grammar for Parsers}
+\@addtocsec{htoc}{21}{0}{\@print{B}\quad{}Upgrading}
+\@addtocsec{htoc}{22}{0}{\@print{C}\quad{}Troubleshooting}
+\@addtocsec{htoc}{23}{0}{\@print{D}\quad{}History}
+\@addtocsec{htoc}{24}{0}{\@print{E}\quad{}Debian Extensions}
+\newlabel{sec:debian}{{E}{X}}
+\@addtocsec{htoc}{25}{0}{\@print{F}\quad{}Future Extensions}
+\newlabel{sec:future}{{F}{X}}
+\@addtocsec{htoc}{26}{0}{\@print{G}\quad{}References}
diff --git a/doc/yapps2.html b/doc/yapps2.html
new file mode 100644
index 0000000..f95be4d
--- /dev/null
+++ b/doc/yapps2.html
@@ -0,0 +1,1206 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+            "http://www.w3.org/TR/REC-html40/loose.dtd">
+<HTML>
+
+<HEAD>
+
+
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<META name="GENERATOR" content="hevea 1.08">
+<STYLE type="text/css">
+.toc{list-style:none;}
+.title{margin:auto;text-align:center}
+.center{text-align:center;margin-left:auto;margin-right:auto;}
+.flushleft{text-align:left;margin-left:0ex;margin-right:auto;}
+.flushright{text-align:right;margin-left:auto;margin-right:0ex;}
+DIV TABLE{margin-left:inherit;margin-right:inherit;}
+PRE{text-align:left;margin-left:0ex;margin-right:auto;}
+BLOCKQUOTE{margin-left:4ex;margin-right:4ex;text-align:left;}
+.part{margin:auto;text-align:center}
+</STYLE>
+</HEAD>
+
+<BODY >
+<!--HEVEA command line is: /usr/bin/hevea yapps2.tex -->
+<!--HTMLHEAD-->
+<!--ENDHTML-->
+<!--PREFIX <ARG ></ARG>-->
+<!--CUT DEF section 1 -->
+
+<BR>
+<BR>
+<DIV CLASS="center">
+<TABLE CELLSPACING=2 CELLPADDING=0>
+<TR><TD ALIGN=center NOWRAP><FONT SIZE=5>The <EM>Yapps</EM> Parser Generator System</FONT></TD>
+</TR>
+<TR><TD ALIGN=center NOWRAP><CODE>http://theory.stanford.edu/~amitp/Yapps/</CODE></TD>
+</TR>
+<TR><TD ALIGN=center NOWRAP>Version 2</TD>
+</TR>
+<TR><TD ALIGN=center NOWRAP>&nbsp;</TD>
+</TR>
+<TR><TD ALIGN=center NOWRAP>Amit J. Patel</TD>
+</TR>
+<TR><TD ALIGN=center NOWRAP>http://www-cs-students.stanford.edu/&nbsp;amitp/
+http://www-cs-students.stanford.edu/&nbsp;amitp/</TD>
+</TR></TABLE> <HR SIZE=2>
+</DIV><BR>
+<BR>
+<!--TOC section Introduction-->
+
+<H2 CLASS="section"><A NAME="htoc1">1</A>&nbsp;&nbsp;Introduction</H2><!--SEC END -->
+
+<EM>Yapps</EM> (<U>Y</U>et <U>A</U>nother <U>P</U>ython
+<U>P</U>arser <U>S</U>ystem) is an easy to use parser
+generator that is written in Python and generates Python code. There
+are several parser generator systems already available for Python,
+including <TT>PyLR, kjParsing, PyBison,</TT> and <TT>mcf.pars,</TT>
+but I had different goals for my parser. Yapps is simple, is easy to
+use, and produces human-readable parsers. It is not the fastest or
+most powerful parser. Yapps is designed to be used when regular
+expressions are not enough and other parser systems are too much:
+situations where you may write your own recursive descent parser.<BR>
+<BR>
+Some unusual features of Yapps that may be of interest are:
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">Yapps produces recursive descent parsers that are readable by
+ humans, as opposed to table-driven parsers that are difficult to
+ read. A Yapps parser for a simple calculator looks similar to the
+ one that Mark Lutz wrote by hand for <EM>Programming Python.</EM><BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps also allows for rules that accept parameters and pass
+ arguments to be used while parsing subexpressions. Grammars that
+ allow for arguments to be passed to subrules and for values to be
+ passed back are often called <EM>attribute grammars.</EM> In many
+ cases parameterized rules can be used to perform actions at &#8220;parse
+ time&#8221; that are usually delayed until later. For example,
+ information about variable declarations can be passed into the
+ rules that parse a procedure body, so that undefined variables can
+ be detected at parse time. The types of defined variables can be
+ used in parsing as well&mdash;for example, if the type of <TT>X</TT> is
+ known, we can determine whether <TT>X(1)</TT> is an array reference or 
+ a function call.<BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps grammars are fairly easy to write, although there are
+ some inconveniences having to do with ELL(1) parsing that have to be
+ worked around. For example, rules have to be left factored and
+ rules may not be left recursive. However, neither limitation seems 
+ to be a problem in practice. <BR>
+<BR>
+Yapps grammars look similar to the notation used in the Python
+ reference manual, with operators like <CODE>*</CODE>, <CODE>+</CODE>, <CODE>|</CODE>,
+ <CODE>[]</CODE>, and <CODE>()</CODE> for patterns, names (<TT>tim</TT>) for rules,
+ regular expressions (<CODE>"[a-z]+"</CODE>) for tokens, and <CODE>#</CODE> for
+ comments.<BR>
+<BR>
+<LI CLASS="li-enumerate">The Yapps parser generator is written as a single Python module
+ with no C extensions. Yapps produces parsers that are written
+ entirely in Python, and require only the Yapps run-time module (5k)
+ for support.<BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps's scanner is context-sensitive, picking tokens based on
+ the types of the tokens accepted by the parser. This can be
+ helpful when implementing certain kinds of parsers, such as for a
+ preprocessor.</OL>
+There are several disadvantages of using Yapps over another parser system:
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">Yapps parsers are <TT>ELL(1)</TT> (Extended LL(1)), which is
+ less powerful than <TT>LALR</TT> (used by <TT>PyLR</TT>) or
+ <TT>SLR</TT> (used by <TT>kjParsing</TT>), so Yapps would not be a
+ good choice for parsing complex languages. For example, allowing
+ both <TT>x := 5;</TT> and <TT>x;</TT> as statements is difficult
+ because we must distinguish based on only one token of lookahead.
+ Seeing only <TT>x</TT>, we cannot decide whether we have an
+ assignment statement or an expression statement. (Note however
+ that this kind of grammar can be matched with backtracking; see
+ section <A HREF="#sec:future">F</A>.)<BR>
+<BR>
+<LI CLASS="li-enumerate">The scanner that Yapps provides can only read from strings, not
+ files, so an entire file has to be read in before scanning can
+ begin. It is possible to build a custom scanner, though, so in
+ cases where stream input is needed (from the console, a network, or
+ a large file are examples), the Yapps parser can be given a custom
+ scanner that reads from a stream instead of a string.<BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps is not designed with efficiency in mind.</OL>
+Yapps provides an easy to use parser generator that produces parsers
+similar to what you might write by hand. It is not meant to be a
+solution for all parsing problems, but instead an aid for those times
+you would write a parser by hand rather than using one of the more
+powerful parsing packages available.<BR>
+<BR>
+Yapps 2.0 is easier to use than Yapps 1.0. New features include a
+less restrictive input syntax, which allows mixing of sequences,
+choices, terminals, and nonterminals; optional matching; the ability
+to insert single-line statements into the generated parser; and
+looping constructs <CODE>*</CODE> and <CODE>+</CODE> similar to the repetitive
+matching constructs in regular expressions. Unfortunately, the
+addition of these constructs has made Yapps 2.0 incompatible with
+Yapps 1.0, so grammars will have to be rewritten. See section
+<A HREF="#sec:Upgrading">??</A> for tips on changing Yapps 1.0 grammars for use
+with Yapps 2.0.<BR>
+<BR>
+<!--TOC section Examples-->
+
+<H2 CLASS="section"><A NAME="htoc2">2</A>&nbsp;&nbsp;Examples</H2><!--SEC END -->
+
+In this section are several examples that show the use of Yapps.
+First, an introduction shows how to construct grammars and write them
+in Yapps form. This example can be skipped by someone familiar with
+grammars and parsing. Next is a Lisp expression grammar that produces
+a parse tree as output. This example demonstrates the use of tokens
+and rules, as well as returning values from rules. The third example
+is a expression evaluation grammar that evaluates during parsing
+(instead of producing a parse tree).<BR>
+<BR>
+<!--TOC subsection Introduction to Grammars-->
+
+<H3 CLASS="subsection"><A NAME="htoc3">2.1</A>&nbsp;&nbsp;Introduction to Grammars</H3><!--SEC END -->
+
+A <EM>grammar</EM> for a natural language specifies how words can be put
+together to form large structures, such as phrases and sentences. A
+grammar for a computer language is similar in that it specifies how
+small components (called <EM>tokens</EM>) can be put together to form
+larger structures. In this section we will write a grammar for a tiny
+subset of English.<BR>
+<BR>
+Simple English sentences can be described as being a noun phrase
+followed by a verb followed by a noun phrase. For example, in the
+sentence, &#8220;Jack sank the blue ship,&#8221; the word &#8220;Jack&#8221; is the first
+noun phrase, &#8220;sank&#8221; is the verb, and &#8220;the blue ship&#8221; is the second
+noun phrase. In addition we should say what a noun phrase is; for
+this example we shall say that a noun phrase is an optional article
+(a, an, the) followed by any number of adjectives followed by a noun.
+The tokens in our language are the articles, nouns, verbs, and
+adjectives. The <EM>rules</EM> in our language will tell us how to
+combine the tokens together to form lists of adjectives, noun phrases,
+and sentences:
+<UL CLASS="itemize"><LI CLASS="li-itemize">
+ <TT>sentence: noun_phrase verb noun_phrase</TT>
+ <LI CLASS="li-itemize"><TT>noun_phrase: [article] adjective* noun</TT>
+</UL>
+Notice that some things that we said easily in English, such as
+&#8220;optional article,&#8221; are expressed using special syntax, such as
+brackets. When we said, &#8220;any number of adjectives,&#8221; we wrote
+<TT>adjective*</TT>, where the <TT>*</TT> means &#8220;zero or more of the
+preceding pattern&#8221;.<BR>
+<BR>
+The grammar given above is close to a Yapps grammar. We also have to
+specify what the tokens are, and what to do when a pattern is matched.
+For this example, we will do nothing when patterns are matched; the
+next example will explain how to perform match actions.
+<PRE CLASS="verbatim">
+parser TinyEnglish:
+  ignore:          "\\W+"
+  token noun:      "(Jack|spam|ship)"
+  token verb:      "(sank|threw)"
+  token article:   "(a|an|the)"
+  token adjective: "(blue|red|green)"
+
+  rule sentence:       noun_phrase verb noun_phrase
+  rule noun_phrase:    [article] adjective* noun
+</PRE>
+The tokens are specified as Python <EM>regular expressions</EM>. Since
+Yapps produces Python code, you can write any regular expression that
+would be accepted by Python. (<EM>Note:</EM> These are Python 1.5
+regular expressions from the <TT>re</TT> module, not Python 1.4
+regular expressions from the <TT>regex</TT> module.) In addition to
+tokens that you want to see (which are given names), you can also
+specify tokens to ignore, marked by the <TT>ignore</TT> keyword. In
+this parser we want to ignore whitespace.<BR>
+<BR>
+The TinyEnglish grammar shows how you define tokens and rules, but it
+does not specify what should happen once we've matched the rules. In
+the next example, we will take a grammar and produce a <EM>parse
+tree</EM> from it.<BR>
+<BR>
+<!--TOC subsection Lisp Expressions-->
+
+<H3 CLASS="subsection"><A NAME="htoc4">2.2</A>&nbsp;&nbsp;Lisp Expressions</H3><!--SEC END -->
+
+Lisp syntax, although hated by many, has a redeeming quality: it is
+simple to parse. In this section we will construct a Yapps grammar to
+parse Lisp expressions and produce a parse tree as output.<BR>
+<BR>
+<!--TOC subsubsection Defining the Grammar-->
+
+<H4 CLASS="subsubsection">Defining the Grammar</H4><!--SEC END -->
+
+The syntax of Lisp is simple. It has expressions, which are
+identifiers, strings, numbers, and lists. A list is a left
+parenthesis followed by some number of expressions (separated by
+spaces) followed by a right parenthesis. For example, <CODE>5</CODE>,
+<CODE>"ni"</CODE>, and <CODE>(print "1+2 = " (+ 1 2))</CODE> are Lisp expressions.
+Written as a grammar,
+<PRE CLASS="verbatim">
+    expr:   ID | STR | NUM | list
+    list:   ( expr* )  
+</PRE>
+In addition to having a grammar, we need to specify what to do every
+time something is matched. For the tokens, which are strings, we just
+want to get the &#8220;value&#8221; of the token, attach its type (identifier,
+string, or number) in some way, and return it. For the lists, we want
+to construct and return a Python list.<BR>
+<BR>
+Once some pattern is matched, we enclose a return statement enclosed
+in <CODE>{{...}}</CODE>. The braces allow us to insert any one-line
+statement into the parser. Within this statement, we can refer to the
+values returned by matching each part of the rule. After matching a
+token such as <TT>ID</TT>, &#8220;ID&#8221; will be bound to the text of the
+matched token. Let's take a look at the rule:
+<PRE CLASS="verbatim">
+    rule expr: ID   {{ return ('id', ID) }}
+      ...
+</PRE>
+In a rule, tokens return the text that was matched. For identifiers,
+we just return the identifier, along with a &#8220;tag&#8221; telling us that
+this is an identifier and not a string or some other value. Sometimes
+we may need to convert this text to a different form. For example, if
+a string is matched, we want to remove quotes and handle special forms
+like <CODE>\n</CODE>. If a number is matched, we want to convert it into a
+number. Let's look at the return values for the other tokens:
+<PRE CLASS="verbatim">
+      ...
+             | STR  {{ return ('str', eval(STR)) }}
+             | NUM  {{ return ('num', atoi(NUM)) }}
+      ...
+</PRE>
+If we get a string, we want to remove the quotes and process any
+special backslash codes, so we run <TT>eval</TT> on the quoted string.
+If we get a number, we convert it to an integer with <TT>atoi</TT> and
+then return the number along with its type tag.<BR>
+<BR>
+For matching a list, we need to do something slightly more
+complicated. If we match a Lisp list of expressions, we want to
+create a Python list with those values.
+<PRE CLASS="verbatim">
+    rule list: "\\("                 # Match the opening parenthesis
+               {{ result = [] }}     # Create a Python list
+               ( 
+                  expr               # When we match an expression,
+                  {{ result.append(expr) }}   # add it to the list
+               )*                    # * means repeat this if needed
+               "\\)"                 # Match the closing parenthesis
+               {{ return result }}   # Return the Python list
+</PRE>
+In this rule we first match the opening parenthesis, then go into a
+loop. In this loop we match expressions and add them to the list.
+When there are no more expressions to match, we match the closing
+parenthesis and return the resulting. Note that <CODE>#</CODE> is used for
+comments, just as in Python.<BR>
+<BR>
+The complete grammar is specified as follows:
+<PRE CLASS="verbatim">
+parser Lisp:
+    ignore:      '\\s+'
+    token NUM:   '[0-9]+'
+    token ID:    '[-+*/!@%^&amp;=.a-zA-Z0-9_]+' 
+    token STR:   '"([^\\"]+|\\\\.)*"'
+
+    rule expr:   ID     {{ return ('id', ID) }}
+               | STR    {{ return ('str', eval(STR)) }}
+               | NUM    {{ return ('num', atoi(NUM)) }}
+               | list   {{ return list }}
+    rule list: "\\("    {{ result = [] }} 
+               ( expr   {{ result.append(expr) }}
+               )*  
+               "\\)"    {{ return result }} 
+</PRE>
+One thing you may have noticed is that <CODE>"\\("</CODE> and <CODE>"\\)"</CODE>
+appear in the <TT>list</TT> rule. These are <EM>inline tokens</EM>:
+they appear in the rules without being given a name with the
+<TT>token</TT> keyword. Inline tokens are more convenient to use, but
+since they do not have a name, the text that is matched cannot be used
+in the return value. They are best used for short simple patterns
+(usually punctuation or keywords).<BR>
+<BR>
+Another thing to notice is that the number and identifier tokens
+overlap. For example, &#8220;487&#8221; matches both NUM and ID. In Yapps, the
+scanner only tries to match tokens that are acceptable to the parser.
+This rule doesn't help here, since both NUM and ID can appear in the
+same place in the grammar. There are two rules used to pick tokens if
+more than one matches. One is that the <EM>longest</EM> match is
+preferred. For example, &#8220;487x&#8221; will match as an ID (487x) rather
+than as a NUM (487) followed by an ID (x). The second rule is that if
+the two matches are the same length, the <EM>first</EM> one listed in
+the grammar is preferred. For example, &#8220;487&#8221; will match as an NUM
+rather than an ID because NUM is listed first in the grammar. Inline
+tokens have preference over any tokens you have listed.<BR>
+<BR>
+Now that our grammar is defined, we can run Yapps to produce a parser,
+and then run the parser to produce a parse tree.<BR>
+<BR>
+<!--TOC subsubsection Running Yapps-->
+
+<H4 CLASS="subsubsection">Running Yapps</H4><!--SEC END -->
+
+In the Yapps module is a function <TT>generate</TT> that takes an
+input filename and writes a parser to another file. We can use this
+function to generate the Lisp parser, which is assumed to be in
+<TT>lisp.g</TT>.
+<PRE CLASS="verbatim">
+% python
+Python 1.5.1 (#1, Sep  3 1998, 22:51:17)  [GCC 2.7.2.3] on linux-i386
+Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
+&gt;&gt;&gt; import yapps
+&gt;&gt;&gt; yapps.generate('lisp.g')
+</PRE>
+At this point, Yapps has written a file <TT>lisp.py</TT> that contains
+the parser. In that file are two classes (one scanner and one parser)
+and a function (called <TT>parse</TT>) that puts things together for
+you.<BR>
+<BR>
+Alternatively, we can run Yapps from the command line to generate the
+parser file:
+<PRE CLASS="verbatim">
+% python yapps.py lisp.g
+</PRE>
+After running Yapps either from within Python or from the command
+line, we can use the Lisp parser by calling the <TT>parse</TT>
+function. The first parameter should be the rule we want to match,
+and the second parameter should be the string to parse.
+<PRE CLASS="verbatim">
+&gt;&gt;&gt; import lisp
+&gt;&gt;&gt; lisp.parse('expr', '(+ 3 4)')
+[('id', '+'), ('num', 3), ('num', 4)]
+&gt;&gt;&gt; lisp.parse('expr', '(print "3 = " (+ 1 2))')
+[('id', 'print'), ('str', '3 = '), [('id', '+'), ('num', 1), ('num', 2)]]
+</PRE>
+The <TT>parse</TT> function is not the only way to use the parser;
+section <A HREF="#sec:Parser-Objects">5.1</A> describes how to access parser objects
+directly.<BR>
+<BR>
+We've now gone through the steps in creating a grammar, writing a
+grammar file for Yapps, producing a parser, and using the parser. In
+the next example we'll see how rules can take parameters and also how
+to do computations instead of just returning a parse tree.<BR>
+<BR>
+<!--TOC subsection Calculator-->
+
+<H3 CLASS="subsection"><A NAME="htoc5">2.3</A>&nbsp;&nbsp;Calculator</H3><!--SEC END -->
+
+A common example parser given in many textbooks is that for simple
+expressions, with numbers, addition, subtraction, multiplication,
+division, and parenthesization of subexpressions. We'll write this
+example in Yapps, evaluating the expression as we parse.<BR>
+<BR>
+Unlike <TT>yacc</TT>, Yapps does not have any way to specify
+precedence rules, so we have to do it ourselves. We say that an
+expression is the sum of terms, and that a term is the product of
+factors, and that a factor is a number or a parenthesized expression:
+<PRE CLASS="verbatim">
+    expr:           factor ( ("+"|"-") factor )*
+    factor:         term   ( ("*"|"/") term )*
+    term:           NUM | "(" expr ")"
+</PRE>
+In order to evaluate the expression as we go, we should keep along an
+accumulator while evaluating the lists of terms or factors. Just as
+we kept a &#8220;result&#8221; variable to build a parse tree for Lisp
+expressions, we will use a variable to evaluate numerical
+expressions. The full grammar is given below:
+<PRE CLASS="verbatim">
+parser Calculator:
+    token END: "$"         # $ means end of string
+    token NUM: "[0-9]+"
+
+    rule goal:           expr END         {{ return expr }}
+
+    # An expression is the sum and difference of factors
+    rule expr:           factor           {{ v = factor }}
+                       ( "[+]" factor       {{ v = v+factor }}
+                       |  "-"  factor       {{ v = v-factor }}
+                       )*                 {{ return v }}
+
+    # A factor is the product and division of terms
+    rule factor:         term             {{ v = term }}
+                       ( "[*]" term         {{ v = v*term }}
+                       |  "/"  term         {{ v = v/term }}
+                       )*                 {{ return v }}
+
+    # A term is either a number or an expression surrounded by parentheses
+    rule term:           NUM              {{ return atoi(NUM) }}
+                       | "\\(" expr "\\)" {{ return expr }}
+</PRE>
+The top-level rule is <EM>goal</EM>, which says that we are looking for
+an expression followed by the end of the string. The <TT>END</TT>
+token is needed because without it, it isn't clear when to stop
+parsing. For example, the string &#8220;1+3&#8221; could be parsed either as
+the expression &#8220;1&#8221; followed by the string &#8220;+3&#8221; or it could be
+parsed as the expression &#8220;1+3&#8221;. By requiring expressions to end
+with <TT>END</TT>, the parser is forced to take &#8220;1+3&#8221;.<BR>
+<BR>
+In the two rules with repetition, the accumulator is named <TT>v</TT>.
+After reading in one expression, we initialize the accumulator. Each
+time through the loop, we modify the accumulator by adding,
+subtracting, multiplying by, or dividing the previous accumulator by
+the expression that has been parsed. At the end of the rule, we
+return the accumulator.<BR>
+<BR>
+The calculator example shows how to process lists of elements using
+loops, as well as how to handle precedence of operators.<BR>
+<BR>
+<EM>Note:</EM> It's often important to put the <TT>END</TT> token in, so 
+put it in unless you are sure that your grammar has some other
+non-ambiguous token marking the end of the program.<BR>
+<BR>
+<!--TOC subsection Calculator with Memory-->
+
+<H3 CLASS="subsection"><A NAME="htoc6">2.4</A>&nbsp;&nbsp;Calculator with Memory</H3><!--SEC END -->
+
+In the previous example we learned how to write a calculator that
+evaluates simple numerical expressions. In this section we will
+extend the example to support both local and global variables.<BR>
+<BR>
+To support global variables, we will add assignment statements to the
+&#8220;goal&#8221; rule.
+<PRE CLASS="verbatim">
+    rule goal:           expr END         {{ return expr }}
+              | 'set' ID expr END         {{ global_vars[ID] = expr }}
+                                          {{ return expr }}   
+</PRE>
+To use these variables, we need a new kind of terminal:
+<PRE CLASS="verbatim">
+    rule term: ... | ID {{ return global_vars[ID] }} 
+</PRE>
+So far, these changes are straightforward. We simply have a global
+dictionary <TT>global_vars</TT> that stores the variables and values, 
+we modify it when there is an assignment statement, and we look up
+variables in it when we see a variable name.<BR>
+<BR>
+To support local variables, we will add variable declarations to the
+set of allowed expressions.
+<PRE CLASS="verbatim">
+    rule term: ... | 'let' VAR '=' expr 'in' expr ...
+</PRE>
+This is where it becomes tricky. Local variables should be stored in
+a local dictionary, not in the global one. One trick would be to save 
+a copy of the global dictionary, modify it, and then restore it
+later. In this example we will instead use <EM>attributes</EM> to
+create local information and pass it to subrules.<BR>
+<BR>
+A rule can optionally take parameters. When we invoke the rule, we
+must pass in arguments. For local variables, let's use a single
+parameter, <TT>local_vars</TT>:
+<PRE CLASS="verbatim">
+    rule expr&lt;&lt;local_vars&gt;&gt;:   ...
+    rule factor&lt;&lt;local_vars&gt;&gt;: ...
+    rule term&lt;&lt;local_vars&gt;&gt;:   ...
+</PRE>
+Each time we want to match <TT>expr</TT>, <TT>factor</TT>, or
+<TT>term</TT>, we will pass the local variables in the current rule to
+the subrule. One interesting case is when we pass as an argument
+something <EM>other</EM> than <TT>local_vars</TT>:
+<PRE CLASS="verbatim">
+   rule term&lt;&lt;local_vars&gt;&gt;: ...
+                | 'let' VAR '=' expr&lt;&lt;local_vars&gt;&gt;
+                  {{ local_vars = [(VAR, expr)] + local_vars }}
+                  'in' expr&lt;&lt;local_vars&gt;&gt;
+                  {{ return expr }}
+</PRE>
+Note that the assignment to the local variables list does not modify
+the original list. This is important to keep local variables from
+being seen outside the &#8220;let&#8221;.<BR>
+<BR>
+The other interesting case is when we find a variable:
+<PRE CLASS="verbatim">
+global_vars = {}
+
+def lookup(map, name):
+    for x,v in map:  if x==name: return v
+    return global_vars[name]
+%%
+   ...
+   rule term&lt;&lt;local_vars&gt;: ...
+                | VAR {{ return lookup(local_vars, VAR) }}
+</PRE>
+The lookup function will search through the local variable list, and
+if it cannot find the name there, it will look it up in the global
+variable dictionary.<BR>
+<BR>
+A complete grammar for this example, including a read-eval-print loop
+for interacting with the calculator, can be found in the examples
+subdirectory included with Yapps.<BR>
+<BR>
+In this section we saw how to insert code before the parser. We also
+saw how to use attributes to transmit local information from one rule
+to its subrules.<BR>
+<BR>
+<!--TOC section Grammars-->
+
+<H2 CLASS="section"><A NAME="htoc7">3</A>&nbsp;&nbsp;Grammars</H2><!--SEC END -->
+
+Each Yapps grammar has a name, a list of tokens, and a set of
+production rules. A grammar named <TT>X</TT> will be used to produce
+a parser named <TT>X</TT> and a scanner anmed <TT>XScanner</TT>. As
+in Python, names are case sensitive, start with a letter, and contain
+letters, numbers, and underscores (_).<BR>
+<BR>
+There are three kinds of tokens in Yapps: named, inline, and ignored.
+As their name implies, named tokens are given a name, using the token
+construct: <TT>token <EM>name</EM> : <EM>regexp</EM></TT>. In a rule, the
+token can be matched by using the name. Inline tokens are regular
+expressions that are used in rules without being declared. Ignored
+tokens are declared using the ignore construct: <TT>ignore:
+ <EM>regexp</EM></TT>. These tokens are ignored by the scanner, and are
+not seen by the parser. Often whitespace is an ignored token. The
+regular expressions used to define tokens should use the syntax
+defined in the <TT>re</TT> module, so some symbols may have to be
+backslashed.<BR>
+<BR>
+Production rules in Yapps have a name and a pattern to match. If the
+rule is parameterized, the name should be followed by a list of
+parameter names in <CODE>&lt;&lt;...&gt;&gt;</CODE>. A pattern can be a simple pattern
+or a compound pattern. Simple patterns are the name of a named token,
+a regular expression in quotes (inline token), the name of a
+production rule (followed by arguments in <CODE>&lt;&lt;...&gt;&gt;</CODE>, if the rule
+has parameters), and single line Python statements (<CODE>{{...}}</CODE>).
+Compound patterns are sequences (<CODE>A B C ...</CODE>), choices (
+<CODE>A | B | C | ...</CODE>), options (<CODE>[...]</CODE>), zero-or-more repetitions
+(<CODE>...*</CODE>), and one-or-more repetitions (<CODE>...+</CODE>). Like
+regular expressions, repetition operators have a higher precedence
+than sequences, and sequences have a higher precedence than choices.<BR>
+<BR>
+Whenever <CODE>{{...}}</CODE> is used, a legal one-line Python statement
+should be put inside the braces. The token <CODE>}}</CODE> should not
+appear within the <CODE>{{...}}</CODE> section, even within a string, since
+Yapps does not attempt to parse the Python statement. A workaround
+for strings is to put two strings together (<CODE>"}" "}"</CODE>), or to use
+backslashes (<CODE>"}\}"</CODE>). At the end of a rule you should use a
+<CODE>{{ return X }}</CODE> statement to return a value. However, you
+should <EM>not</EM> use any control statements (<TT>return</TT>,
+<TT>continue</TT>, <TT>break</TT>) in the middle of a rule. Yapps
+needs to make assumptions about the control flow to generate a parser,
+and any changes to the control flow will confuse Yapps.<BR>
+<BR>
+The <CODE>&lt;&lt;...&gt;&gt;</CODE> form can occur in two places: to define parameters
+to a rule and to give arguments when matching a rule. Parameters use
+the syntax used for Python functions, so they can include default
+arguments and the special forms (<CODE>*args</CODE> and <CODE>**kwargs</CODE>).
+Arguments use the syntax for Python function call arguments, so they
+can include normal arguments and keyword arguments. The token
+<CODE>&gt;&gt;</CODE> should not appear within the <CODE>&lt;&lt;...&gt;&gt;</CODE> section.<BR>
+<BR>
+In both the statements and rule arguments, you can use names defined
+by the parser to refer to matched patterns. You can refer to the text
+matched by a named token by using the token name. You can use the
+value returned by a production rule by using the name of that rule.
+If a name <TT>X</TT> is matched more than once (such as in loops), you
+will have to save the earlier value(s) in a temporary variable, and
+then use that temporary variable in the return value. The next
+section has an example of a name that occurs more than once.<BR>
+<BR>
+<!--TOC subsection Left Factoring-->
+
+<H3 CLASS="subsection"><A NAME="htoc8">3.1</A>&nbsp;&nbsp;Left Factoring</H3><!--SEC END -->
+
+<A NAME="sec:Left-Factoring"></A>
+Yapps produces ELL(1) parsers, which determine which clause to match
+based on the first token available. Sometimes the leftmost tokens of
+several clauses may be the same. The classic example is the
+<EM>if/then/else</EM> construct in Pascal:
+<PRE CLASS="verbatim">
+rule stmt:  "if" expr "then" stmt {{ then_part = stmt }} 
+                      "else" stmt {{ return ('If',expr,then_part,stmt) }}
+          | "if" expr "then" stmt {{ return ('If',expr,stmt,[]) }}
+</PRE>
+(Note that we have to save the first <TT>stmt</TT> into a variable
+because there is another <TT>stmt</TT> that will be matched.) The
+left portions of the two clauses are the same, which presents a
+problem for the parser. The solution is <EM>left-factoring</EM>: the
+common parts are put together, and <EM>then</EM> a choice is made about
+the remaining part:
+<PRE CLASS="verbatim">
+rule stmt:  "if" expr 
+              "then" stmt {{ then_part = stmt }}
+              {{ else_part = [] }}
+              [ "else" stmt {{ else_part = stmt }} ]
+            {{ return ('If', expr, then_part, else_part) }}
+</PRE>
+Unfortunately, the classic <EM>if/then/else</EM> situation is
+<EM>still</EM> ambiguous when you left-factor. Yapps can deal with this
+situation, but will report a warning; see section
+<A HREF="#sec:Ambiguous-Grammars">3.3</A> for details.<BR>
+<BR>
+In general, replace rules of the form:
+<PRE CLASS="verbatim">
+rule A:   a b1 {{ return E1 }}
+        | a b2 {{ return E2 }}
+        | c3   {{ return E3 }}
+        | c4   {{ return E4 }}
+</PRE>
+with rules of the form:
+<PRE CLASS="verbatim">
+rule A:   a ( b1 {{ return E1 }}
+            | b2 {{ return E2 }}
+            )
+        | c3   {{ return E3 }}
+        | c4   {{ return E4 }}
+</PRE>
+<!--TOC subsection Left Recursion-->
+
+<H3 CLASS="subsection"><A NAME="htoc9">3.2</A>&nbsp;&nbsp;Left Recursion</H3><!--SEC END -->
+
+A common construct in grammars is for matching a list of patterns,
+sometimes separated with delimiters such as commas or semicolons. In
+LR-based parser systems, we can parse a list with something like this:
+<PRE CLASS="verbatim">
+rule sum:  NUM             {{ return NUM }}
+         | sum "+" NUM     {{ return (sum, NUM) }}
+</PRE>
+Parsing <TT>1+2+3+4</TT> would produce the output
+<TT>(((1,2),3),4)</TT>, which is what we want from a left-associative
+addition operator. Unfortunately, this grammar is <EM>left
+recursive,</EM> because the <TT>sum</TT> rule contains a clause that
+begins with <TT>sum</TT>. (The recursion occurs at the left side of
+the clause.)<BR>
+<BR>
+We must restructure this grammar to be <EM>right recursive</EM> instead:
+<PRE CLASS="verbatim">
+rule sum:  NUM             {{ return NUM }}
+         | NUM "+" sum     {{ return (NUM, sum) }}
+</PRE>
+Unfortunately, using this grammar, <TT>1+2+3+4</TT> would be parsed as
+<TT>(1,(2,(3,4)))</TT>, which no longer follows left associativity.
+The rule also needs to be left-factored. Instead, we write the
+pattern as a loop instead:
+<PRE CLASS="verbatim">
+rule sum:       NUM {{ v = NUM }}
+                ( "[+]" NUM {{ v = (v,NUM) }} )*
+                {{ return v }}
+</PRE>
+In general, replace rules of the form:
+<PRE CLASS="verbatim">
+rule A:  A a1 -&gt; &lt;&lt; E1 &gt;&gt; 
+       | A a2 -&gt; &lt;&lt; E2 &gt;&gt;
+       | b3   -&gt; &lt;&lt; E3 &gt;&gt;
+       | b4   -&gt; &lt;&lt; E4 &gt;&gt;
+</PRE>
+with rules of the form:
+<PRE CLASS="verbatim">
+rule A:  ( b3 {{ A = E3 }} 
+         | b4 {{ A = E4 }} )
+         ( a1 {{ A = E1 }}
+         | a2 {{ A = E2 }} )*
+         {{ return A }}
+</PRE>
+We have taken a rule that proved problematic for with recursion and
+turned it into a rule that works well with looping constructs.<BR>
+<BR>
+<!--TOC subsection Ambiguous Grammars-->
+
+<H3 CLASS="subsection"><A NAME="htoc10">3.3</A>&nbsp;&nbsp;Ambiguous Grammars</H3><!--SEC END -->
+
+<A NAME="sec:Ambiguous-Grammars"></A>
+In section <A HREF="#sec:Left-Factoring">3.1</A> we saw the classic if/then/else
+ambiguity, which occurs because the &#8220;else ...&#8221; portion of an &#8220;if
+...then ...else ...&#8221; construct is optional. Programs with 
+nested if/then/else constructs can be ambiguous when one of the else
+clauses is missing:
+<PRE CLASS="verbatim">
+if 1 then            if 1 then
+    if 5 then            if 5 then
+        x := 1;              x := 1;
+    else             else
+        y := 9;          y := 9;
+</PRE>
+The indentation shows that the program can be parsed in two different
+ways. (Of course, if we all would adopt Python's indentation-based
+structuring, this would never happen!) Usually we want the parsing on
+the left: the &#8220;else&#8221; should be associated with the closest &#8220;if&#8221;
+statement. In section <A HREF="#sec:Left-Factoring">3.1</A> we &#8220;solved&#8221; the
+problem by using the following grammar:
+<PRE CLASS="verbatim">
+rule stmt:  "if" expr 
+              "then" stmt {{ then_part = stmt }}
+              {{ else_part = [] }}
+              [ "else" stmt {{ else_part = stmt }} ]
+            {{ return ('If', expr, then_part, else_part) }}
+</PRE>
+Here, we have an optional match of &#8220;else&#8221; followed by a statement.
+The ambiguity is that if an &#8220;else&#8221; is present, it is not clear
+whether you want it parsed immediately or if you want it to be parsed
+by the outer &#8220;if&#8221;.<BR>
+<BR>
+Yapps will deal with the situation by matching when the else pattern
+when it can. The parser will work in this case because it prefers the
+<EM>first</EM> matching clause, which tells Yapps to parse the &#8220;else&#8221;.
+That is exactly what we want!<BR>
+<BR>
+For ambiguity cases with choices, Yapps will choose the <EM>first</EM>
+matching choice. However, remember that Yapps only looks at the first 
+token to determine its decision, so <TT>(a b | a c)</TT> will result in
+Yapps choosing <TT>a b</TT> even when the input is <TT>a c</TT>. It only
+looks at the first token, <TT>a</TT>, to make its decision.<BR>
+<BR>
+<!--TOC section Customization-->
+
+<H2 CLASS="section"><A NAME="htoc11">4</A>&nbsp;&nbsp;Customization</H2><!--SEC END -->
+
+Both the parsers and the scanners can be customized. The parser is
+usually extended by subclassing, and the scanner can either be
+subclassed or completely replaced.<BR>
+<BR>
+<!--TOC subsection Customizing Parsers-->
+
+<H3 CLASS="subsection"><A NAME="htoc12">4.1</A>&nbsp;&nbsp;Customizing Parsers</H3><!--SEC END -->
+
+If additional fields and methods are needed in order for a parser to
+work, Python subclassing can be used. (This is unlike parser classes
+written in static languages, in which these fields and methods must be
+defined in the generated parser class.) We simply subclass the
+generated parser, and add any fields or methods required. Expressions
+in the grammar can call methods of the subclass to perform any actions
+that cannot be expressed as a simple expression. For example,
+consider this simple grammar:
+<PRE CLASS="verbatim">
+parser X:
+    rule goal:  "something"  {{ self.printmsg() }}
+</PRE>
+The <TT>printmsg</TT> function need not be implemented in the parser
+class <TT>X</TT>; it can be implemented in a subclass:
+<PRE CLASS="verbatim">
+import Xparser
+
+class MyX(Xparser.X):
+    def printmsg(self):
+        print "Hello!"
+</PRE>
+<!--TOC subsection Customizing Scanners-->
+
+<H3 CLASS="subsection"><A NAME="htoc13">4.2</A>&nbsp;&nbsp;Customizing Scanners</H3><!--SEC END -->
+
+The generated parser class is not dependent on the generated scanner
+class. A scanner object is passed to the parser object's constructor
+in the <TT>parse</TT> function. To use a different scanner, write
+your own function to construct parser objects, with an instance of a
+different scanner. Scanner objects must have a <TT>token</TT> method
+that accepts an integer <TT>N</TT> as well as a list of allowed token
+types, and returns the Nth token, as a tuple. The default scanner
+raises <TT>NoMoreTokens</TT> if no tokens are available, and
+<TT>SyntaxError</TT> if no token could be matched. However, the
+parser does not rely on these exceptions; only the <TT>parse</TT>
+convenience function (which calls <TT>wrap_error_reporter</TT>) and
+the <TT>print_error</TT> error display function use those exceptions.<BR>
+<BR>
+The tuples representing tokens have four elements. The first two are
+the beginning and ending indices of the matched text in the input
+string. The third element is the type tag, matching either the name
+of a named token or the quoted regexp of an inline or ignored token.
+The fourth element of the token tuple is the matched text. If the
+input string is <TT>s</TT>, and the token tuple is
+<TT>(b,e,type,val)</TT>, then <TT>val</TT> should be equal to
+<TT>s[b:e]</TT>.<BR>
+<BR>
+The generated parsers do not the beginning or ending index. They use
+only the token type and value. However, the default error reporter
+uses the beginning and ending index to show the user where the error
+is.<BR>
+<BR>
+<!--TOC section Parser Mechanics-->
+
+<H2 CLASS="section"><A NAME="htoc14">5</A>&nbsp;&nbsp;Parser Mechanics</H2><!--SEC END -->
+
+The base parser class (Parser) defines two methods, <TT>_scan</TT>
+and <TT>_peek</TT>, and two fields, <TT>_pos</TT> and
+<TT>_scanner</TT>. The generated parser inherits from the base
+parser, and contains one method for each rule in the grammar. To
+avoid name clashes, do not use names that begin with an underscore
+(<TT>_</TT>).<BR>
+<BR>
+<!--TOC subsection Parser Objects-->
+
+<H3 CLASS="subsection"><A NAME="htoc15">5.1</A>&nbsp;&nbsp;Parser Objects</H3><!--SEC END -->
+
+<A NAME="sec:Parser-Objects"></A>
+Yapps produces as output two exception classes, a scanner class, a
+parser class, and a function <TT>parse</TT> that puts everything
+together. The <TT>parse</TT> function does not have to be used;
+instead, one can create a parser and scanner object and use them
+together for parsing.
+<PRE CLASS="verbatim">
+    def parse(rule, text):
+        P = X(XScanner(text))
+        return wrap_error_reporter(P, rule)
+</PRE>
+The <TT>parse</TT> function takes a name of a rule and an input string
+as input. It creates a scanner and parser object, then calls
+<TT>wrap_error_reporter</TT> to execute the method in the parser
+object named <TT>rule</TT>. The wrapper function will call the
+appropriate parser rule and report any parsing errors to standard
+output.<BR>
+<BR>
+There are several situations in which the <TT>parse</TT> function
+would not be useful. If a different parser or scanner is being used,
+or exceptions are to be handled differently, a new <TT>parse</TT>
+function would be required. The supplied <TT>parse</TT> function can
+be used as a template for writing a function for your own needs. An
+example of a custom parse function is the <TT>generate</TT> function
+in <TT>Yapps.py</TT>.<BR>
+<BR>
+<!--TOC subsection Context Sensitive Scanner-->
+
+<H3 CLASS="subsection"><A NAME="htoc16">5.2</A>&nbsp;&nbsp;Context Sensitive Scanner</H3><!--SEC END -->
+
+Unlike most scanners, the scanner produced by Yapps can take into
+account the context in which tokens are needed, and try to match only
+good tokens. For example, in the grammar:
+<PRE CLASS="verbatim">
+parser IniFile:
+   token ID:   "[a-zA-Z_0-9]+"
+   token VAL:  ".*"
+
+   rule pair:  ID "[ \t]*=[ \t]*" VAL "\n"
+</PRE>
+we would like to scan lines of text and pick out a name/value pair.
+In a conventional scanner, the input string <TT>shell=progman.exe</TT>
+would be turned into a single token of type <TT>VAL</TT>. The Yapps
+scanner, however, knows that at the beginning of the line, an
+<TT>ID</TT> is expected, so it will return <TT>"shell"</TT> as a token
+of type <TT>ID</TT>. Later, it will return <TT>"progman.exe"</TT> as
+a token of type <TT>VAL</TT>.<BR>
+<BR>
+Context sensitivity decreases the separation between scanner and
+parser, but it is useful in parsers like <TT>IniFile</TT>, where the
+tokens themselves are not unambiguous, but <EM>are</EM> unambiguous
+given a particular stage in the parsing process.<BR>
+<BR>
+Unfortunately, context sensitivity can make it more difficult to
+detect errors in the input. For example, in parsing a Pascal-like
+language with &#8220;begin&#8221; and &#8220;end&#8221; as keywords, a context sensitive
+scanner would only match &#8220;end&#8221; as the END token if the parser is in
+a place that will accept the END token. If not, then the scanner
+would match &#8220;end&#8221; as an identifier. To disable the context
+sensitive scanner in Yapps, add the
+<TT>context-insensitive-scanner</TT> option to the grammar:
+<PRE CLASS="verbatim">
+Parser X:
+    option:  "context-insensitive-scanner"
+</PRE>
+Context-insensitive scanning makes the parser look cleaner as well.<BR>
+<BR>
+<!--TOC subsection Internal Variables-->
+
+<H3 CLASS="subsection"><A NAME="htoc17">5.3</A>&nbsp;&nbsp;Internal Variables</H3><!--SEC END -->
+
+There are two internal fields that may be of use. The parser object
+has two fields, <TT>_pos</TT>, which is the index of the current
+token being matched, and <TT>_scanner</TT>, which is the scanner
+object. The token itself can be retrieved by accessing the scanner
+object and calling the <TT>token</TT> method with the token index. However, if you call <TT>token</TT> before the token has been requested by the parser, it may mess up a context-sensitive scanner.<SUP><A NAME="text1" HREF="#note1">1</A></SUP> A
+potentially useful combination of these fields is to extract the
+portion of the input matched by the current rule. To do this, just save the scanner state (<TT>_scanner.pos</TT>) before the text is matched and then again after the text is matched:
+<PRE CLASS="verbatim">
+  rule R: 
+      {{ start = self._scanner.pos }}
+      a b c 
+      {{ end = self._scanner.pos }}
+      {{ print 'Text is', self._scanner.input[start:end] }}
+</PRE>
+<!--TOC subsection Pre- and Post-Parser Code-->
+
+<H3 CLASS="subsection"><A NAME="htoc18">5.4</A>&nbsp;&nbsp;Pre- and Post-Parser Code</H3><!--SEC END -->
+
+Sometimes the parser code needs to rely on helper variables,
+functions, and classes. A Yapps grammar can optionally be surrounded
+by double percent signs, to separate the grammar from Python code.
+<PRE CLASS="verbatim">
+... Python code ...
+%%
+... Yapps grammar ...
+%%
+... Python code ...
+</PRE>
+The second <CODE>%%</CODE> can be omitted if there is no Python code at the
+end, and the first <CODE>%%</CODE> can be omitted if there is no extra
+Python code at all. (To have code only at the end, both separators
+are required.)<BR>
+<BR>
+If the second <CODE>%%</CODE> is omitted, Yapps will insert testing code
+that allows you to use the generated parser to parse a file.<BR>
+<BR>
+The extended calculator example in the Yapps examples subdirectory
+includes both pre-parser and post-parser code.<BR>
+<BR>
+<!--TOC subsection Representation of Grammars-->
+
+<H3 CLASS="subsection"><A NAME="htoc19">5.5</A>&nbsp;&nbsp;Representation of Grammars</H3><!--SEC END -->
+
+For each kind of pattern there is a class derived from Pattern. Yapps 
+has classes for Terminal, NonTerminal, Sequence, Choice, Option, Plus, 
+Star, and Eval. Each of these classes has the following interface:
+<UL CLASS="itemize"><LI CLASS="li-itemize">
+ setup(<EM>gen</EM>) Set accepts-&#1108;, and call
+ <EM>gen.changed()</EM> if it changed. This function can change the
+ flag from false to true but <EM>not</EM> from true to false.
+ <LI CLASS="li-itemize">update(<EM>(</EM>gen)) Set <SPAN STYLE="font-variant:small-caps">first</SPAN>and <SPAN STYLE="font-variant:small-caps">follow</SPAN>, and call
+ <EM>gen.changed()</EM> if either changed. This function can add to
+ the sets but <EM>not</EM> remove from them.
+ <LI CLASS="li-itemize">output(<EM>gen</EM>, <EM>indent</EM>) Generate code for matching
+ this rule, using <EM>indent</EM> as the current indentation level.
+ Writes are performed using <EM>gen.write</EM>.
+ <LI CLASS="li-itemize">used(<EM>vars</EM>) Given a list of variables <EM>vars</EM>,
+ return two lists: one containing the variables that are used, and
+ one containing the variables that are assigned. This function is
+ used for optimizing the resulting code.
+</UL>
+Both <EM>setup</EM> and <EM>update</EM> monotonically increase the
+variables they modify. Since the variables can only increase a finite
+number of times, we can repeatedly call the function until the
+variable stabilized. The <EM>used</EM> function is not currently
+implemented.<BR>
+<BR>
+With each pattern in the grammar Yapps associates three pieces of
+information: the <SPAN STYLE="font-variant:small-caps">first</SPAN>set, the <SPAN STYLE="font-variant:small-caps">follow</SPAN>set, and the
+accepts-&#1108; flag.<BR>
+<BR>
+The <SPAN STYLE="font-variant:small-caps">first</SPAN>set contains the tokens that can appear as we start
+matching the pattern. The <SPAN STYLE="font-variant:small-caps">follow</SPAN>set contains the tokens that can
+appear immediately after we match the pattern. The accepts-&#1108; 
+flag is true if the pattern can match no tokens. In this case, <SPAN STYLE="font-variant:small-caps">first</SPAN>will contain all the elements in <SPAN STYLE="font-variant:small-caps">follow</SPAN>. The <SPAN STYLE="font-variant:small-caps">follow</SPAN>set is not
+needed when accepts-&#1108; is false, and may not be accurate in
+those cases.<BR>
+<BR>
+Yapps does not compute these sets precisely. Its approximation can
+miss certain cases, such as this one:
+<PRE CLASS="verbatim">
+  rule C: ( A* | B )
+  rule B: C [A]
+</PRE>
+Yapps will calculate <TT>C</TT>'s <SPAN STYLE="font-variant:small-caps">follow</SPAN>set to include <TT>A</TT>.
+However, <TT>C</TT> will always match all the <TT>A</TT>'s, so <TT>A</TT> will
+never follow it. Yapps 2.0 does not properly handle this construct,
+but if it seems important, I may add support for it in a future
+version.<BR>
+<BR>
+Yapps also cannot handle constructs that depend on the calling
+sequence. For example:
+<PRE CLASS="verbatim">
+  rule R: U | 'b'
+  rule S:   | 'c'
+  rule T: S 'b'
+  rule U: S 'a'
+</PRE>
+The <SPAN STYLE="font-variant:small-caps">follow</SPAN>set for <TT>S</TT> includes <TT>a</TT> and <TT>b</TT>. Since <TT>S</TT> can be empty, the <SPAN STYLE="font-variant:small-caps">first</SPAN>set for <TT>S</TT> should include <TT>a</TT>,
+<TT>b</TT>, and <TT>c</TT>. However, when parsing <TT>R</TT>, if the lookahead
+is <TT>b</TT> we should <EM>not</EM> parse <TT>U</TT>. That's because in <TT>U</TT>, <TT>S</TT> is followed by <TT>a</TT> and not <TT>b</TT>. Therefore in
+<TT>R</TT>, we should choose rule <TT>U</TT> only if there is an <TT>a</TT> or
+<TT>c</TT>, but not if there is a <TT>b</TT>. Yapps and many other LL(1)
+systems do not distinguish <TT>S b</TT> and <TT>S a</TT>, making <TT>S</TT>'s <SPAN STYLE="font-variant:small-caps">follow</SPAN>set <TT>a, b</TT>, and making <TT>R</TT> always try to match
+<TT>U</TT>. In this case we can solve the problem by changing <TT>R</TT> to 
+<CODE>'b' | U</CODE> but it may not always be possible to solve all such
+problems in this way.<BR>
+<BR>
+<!--TOC section Grammar for Parsers-->
+
+<H2 CLASS="section"><A NAME="htoc20">A</A>&nbsp;&nbsp;Grammar for Parsers</H2><!--SEC END -->
+
+This is the grammar for parsers, without any Python code mixed in.
+The complete grammar can be found in <TT>parsedesc.g</TT> in the Yapps
+distribution.
+<PRE CLASS="verbatim">
+parser ParserDescription:
+    ignore:      "\\s+"
+    ignore:      "#.*?\r?\n"
+    token END:   "$"  # $ means end of string
+    token ATTR:  "&lt;&lt;.+?&gt;&gt;"
+    token STMT:  "{{.+?}}"
+    token ID:    '[a-zA-Z_][a-zA-Z_0-9]*'
+    token STR:   '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'
+
+    rule Parser: "parser" ID ":"
+                   Options
+                   Tokens
+                   Rules
+                 END 
+
+    rule Options:  ( "option" ":" STR )*
+    rule Tokens:   ( "token" ID ":" STR | "ignore"   ":" STR )*
+    rule Rules:    ( "rule" ID OptParam ":" ClauseA )*
+
+    rule ClauseA:  ClauseB ( '[|]' ClauseB )*
+    rule ClauseB:  ClauseC*
+    rule ClauseC:  ClauseD [ '[+]' | '[*]' ]
+    rule ClauseD:  STR | ID [ATTR] | STMT
+                 | '\\(' ClauseA '\\) | '\\[' ClauseA '\\]'
+</PRE>
+<!--TOC section Upgrading-->
+
+<H2 CLASS="section"><A NAME="htoc21">B</A>&nbsp;&nbsp;Upgrading</H2><!--SEC END -->
+
+Yapps 2.0 is not backwards compatible with Yapps 1.0. In this section 
+are some tips for upgrading:
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">
+ Yapps 1.0 was distributed as a single file. Yapps 2.0 is
+ instead distributed as two Python files: a <EM>parser generator</EM>
+ (26k) and a <EM>parser runtime</EM> (5k). You need both files to
+ create parsers, but you need only the runtime (<TT>yappsrt.py</TT>)
+ to use the parsers.<BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps 1.0 supported Python 1.4 regular expressions from the
+ <TT>regex</TT> module. Yapps 2.0 uses Python 1.5 regular
+ expressions from the <TT>re</TT> module. <EM>The new syntax for
+ regular expressions is not compatible with the old syntax.</EM>
+ Andrew Kuchling has a guide to converting
+ regular
+ expressionshttp://www.python.org/doc/howto/regex-to-re/ on his
+ web page.<BR>
+<BR>
+<LI CLASS="li-enumerate">Yapps 1.0 wants a pattern and then a return value in <CODE>-&gt;</CODE>
+ <CODE>&lt;&lt;...&gt;&gt;</CODE>. Yapps 2.0 allows patterns and Python statements to
+ be mixed. To convert a rule like this:
+<PRE CLASS="verbatim">
+rule R: A B C -&gt; &lt;&lt; E1 &gt;&gt;
+      | X Y Z -&gt; &lt;&lt; E2 &gt;&gt;
+</PRE>to Yapps 2.0 form, replace the return value specifiers with return
+ statements:
+<PRE CLASS="verbatim">
+rule R: A B C {{ return E1 }}
+      | X Y Z {{ return E2 }}
+</PRE><LI CLASS="li-enumerate">Yapps 2.0 does not perform tail recursion elimination. This
+ means any recursive rules you write will be turned into recursive
+ methods in the parser. The parser will work, but may be slower.
+ It can be made faster by rewriting recursive rules, using instead
+ the looping operators <CODE>*</CODE> and <CODE>+</CODE> provided in Yapps 2.0.</OL>
+<!--TOC section Troubleshooting-->
+
+<H2 CLASS="section"><A NAME="htoc22">C</A>&nbsp;&nbsp;Troubleshooting</H2><!--SEC END -->
+
+<UL CLASS="itemize"><LI CLASS="li-itemize">
+ A common error is to write a grammar that doesn't have an END
+ token. End tokens are needed when it is not clear when to stop
+ parsing. For example, when parsing the expression <TT>3+5</TT>, it is 
+ not clear after reading <TT>3</TT> whether to treat it as a complete
+ expression or whether the parser should continue reading.
+ Therefore the grammar for numeric expressions should include an end 
+ token. Another example is the grammar for Lisp expressions. In
+ Lisp, it is always clear when you should stop parsing, so you do
+ <EM>not</EM> need an end token. In fact, it may be more useful not
+ to have an end token, so that you can read in several Lisp expressions.
+ <LI CLASS="li-itemize">If there is a chance of ambiguity, make sure to put the choices 
+ in the order you want them checked. Usually the most specific
+ choice should be first. Empty sequences should usually be last.
+ <LI CLASS="li-itemize">The context sensitive scanner is not appropriate for all
+ grammars. You might try using the insensitive scanner with the
+ <TT>context-insensitive-scanner</TT> option in the grammar.
+ <LI CLASS="li-itemize">If performance turns out to be a problem, try writing a custom
+ scanner. The Yapps scanner is rather slow (but flexible and easy
+ to understand).
+</UL>
+<!--TOC section History-->
+
+<H2 CLASS="section"><A NAME="htoc23">D</A>&nbsp;&nbsp;History</H2><!--SEC END -->
+
+Yapps 1 had several limitations that bothered me while writing
+parsers:
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">
+ It was not possible to insert statements into the generated
+ parser. A common workaround was to write an auxilliary function
+ that executed those statements, and to call that function as part
+ of the return value calculation. For example, several of my
+ parsers had an &#8220;append(x,y)&#8221; function that existed solely to call 
+ &#8220;x.append(y)&#8221;.
+ <LI CLASS="li-enumerate">The way in which grammars were specified was rather
+ restrictive: a rule was a choice of clauses. Each clause was a
+ sequence of tokens and rule names, followed by a return value.
+ <LI CLASS="li-enumerate">Optional matching had to be put into a separate rule because
+ choices were only made at the beginning of a rule.
+ <LI CLASS="li-enumerate">Repetition had to be specified in terms of recursion. Not only 
+ was this awkward (sometimes requiring additional rules), I had to
+ add a tail recursion optimization to Yapps to transform the
+ recursion back into a loop.
+</OL>
+Yapps 2 addresses each of these limitations.
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">
+ Statements can occur anywhere within a rule. (However, only
+ one-line statements are allowed; multiline blocks marked by
+ indentation are not.)
+ <LI CLASS="li-enumerate">Grammars can be specified using any mix of sequences, choices,
+ tokens, and rule names. To allow for complex structures,
+ parentheses can be used for grouping.
+ <LI CLASS="li-enumerate">Given choices and parenthesization, optional matching can be
+ expressed as a choice between some pattern and nothing. In
+ addition, Yapps 2 has the convenience syntax <CODE>[A B ...]</CODE> for
+ matching <CODE>A B ...</CODE> optionally.
+ <LI CLASS="li-enumerate">Repetition operators <CODE>*</CODE> for zero or more and <CODE>+</CODE> for 
+ one or more make it easy to specify repeating patterns.
+</OL>
+It is my hope that Yapps 2 will be flexible enough to meet my needs
+for another year, yet simple enough that I do not hesitate to use it.<BR>
+<BR>
+<!--TOC section Debian Extensions-->
+
+<H2 CLASS="section"><A NAME="htoc24">E</A>&nbsp;&nbsp;Debian Extensions</H2><!--SEC END -->
+
+<A NAME="sec:debian"></A>
+The Debian version adds the following enhancements to the original
+Yapps code. They were written by Matthias Urlichs.
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">
+ Yapps can stack input sources ("include files"). A usage example
+ is supplied with the calc.g sample program.
+ <LI CLASS="li-enumerate">Yapps now understands augmented ignore-able patterns.
+ This means that Yapps can parse multi-line C comments; this wasn't
+ possible before.
+ <LI CLASS="li-enumerate">Better error reporting.
+ <LI CLASS="li-enumerate">Yapps now reads its input incrementally.
+</OL>
+The generated parser has been renamed to <TT>yapps/runtime.py</TT>.
+In Debian, this file is provided by the <TT>yapps2-runtime</TT> package.
+You need to depend on it if you Debianize Python programs which use
+yapps.<BR>
+<BR>
+<!--TOC section Future Extensions-->
+
+<H2 CLASS="section"><A NAME="htoc25">F</A>&nbsp;&nbsp;Future Extensions</H2><!--SEC END -->
+
+<A NAME="sec:future"></A>
+I am still investigating the possibility of LL(2) and higher
+lookahead. However, it looks like the resulting parsers will be
+somewhat ugly. <BR>
+<BR>
+It would be nice to control choices with user-defined predicates.<BR>
+<BR>
+The most likely future extension is backtracking. A grammar pattern
+like <CODE>(VAR ':=' expr)? {{ return Assign(VAR,expr) }} : expr {{ return expr }}</CODE>
+would turn into code that attempted to match <CODE>VAR ':=' expr</CODE>. If 
+it succeeded, it would run <CODE>{{ return ... }}</CODE>. If it failed, it
+would match <CODE>expr {{ return expr }}</CODE>. Backtracking may make it
+less necessary to write LL(2) grammars.<BR>
+<BR>
+<!--TOC section References-->
+
+<H2 CLASS="section"><A NAME="htoc26">G</A>&nbsp;&nbsp;References</H2><!--SEC END -->
+
+<OL CLASS="enumerate" type=1><LI CLASS="li-enumerate">
+ The Python-Parser
+ SIGhttp://www.python.org/sigs/parser-sig/ is the first place
+ to look for a list of parser systems for Python.<BR>
+<BR>
+<LI CLASS="li-enumerate">ANTLR/PCCTS, by Terrence Parr, is available at
+ The ANTLR Home Pagehttp://www.antlr.org/.<BR>
+<BR>
+<LI CLASS="li-enumerate">PyLR, by Scott Cotton, is at his Starship
+ pagehttp://starship.skyport.net/crew/scott/PyLR.html.<BR>
+<BR>
+<LI CLASS="li-enumerate">John Aycock's Compiling Little Languages
+ Frameworkhttp://www.foretec.com/python/workshops/1998-11/proceedings/papers/aycock-little/aycock-little.html.<BR>
+<BR>
+<LI CLASS="li-enumerate">PyBison, by Scott Hassan, can be found at
+ his Python Projects
+ pagehttp://coho.stanford.edu/~hassan/Python/.<BR>
+<BR>
+<LI CLASS="li-enumerate">mcf.pars, by Mike C. Fletcher, is available at
+ his web
+ pagehttp://members.rogers.com/mcfletch/programming/simpleparse/simpleparse.html.<BR>
+<BR>
+<LI CLASS="li-enumerate">kwParsing, by Aaron Watters, is available at
+ his Starship
+ pagehttp://starship.skyport.net/crew/aaron_watters/kwParsing/.
+</OL>
+<!--BEGIN NOTES document-->
+<HR WIDTH="50%" SIZE=1><DL CLASS="list"><DT CLASS="dt-list"><A NAME="note1" HREF="#text1"><FONT SIZE=5>1</FONT></A><DD CLASS="dd-list">When using a context-sensitive scanner, the parser tells the scanner what the valid token types are at each point. If you call <TT>token</TT> before the parser can tell the scanner the valid token types, the scanner will attempt to match without considering the context.
+</DL>
+<!--END NOTES-->
+<!--HTMLFOOT-->
+<!--ENDHTML-->
+<!--FOOTER-->
+<HR SIZE=2><BLOCKQUOTE CLASS="quote"><EM>This document was translated from L<sup>A</sup>T<sub>E</sub>X by
+</EM><A HREF="http://pauillac.inria.fr/~maranget/hevea/index.html"><EM>H<FONT SIZE=2><sup>E</sup></FONT>V<FONT SIZE=2><sup>E</sup></FONT>A</EM></A><EM>.</EM></BLOCKQUOTE></BODY>
+</HTML>
diff --git a/doc/yapps2.htoc b/doc/yapps2.htoc
new file mode 100644
index 0000000..9fabbc6
--- /dev/null
+++ b/doc/yapps2.htoc
@@ -0,0 +1,36 @@
+\begin{tocenv}
+\tocitem \@locref{htoc1}{\begin{@norefs}\@print{1}\quad{}Introduction\end{@norefs}}
+\tocitem \@locref{htoc2}{\begin{@norefs}\@print{2}\quad{}Examples\end{@norefs}}
+\begin{tocenv}
+\tocitem \@locref{htoc3}{\begin{@norefs}\@print{2.1}\quad{}Introduction to Grammars\end{@norefs}}
+\tocitem \@locref{htoc4}{\begin{@norefs}\@print{2.2}\quad{}Lisp Expressions\end{@norefs}}
+\tocitem \@locref{htoc5}{\begin{@norefs}\@print{2.3}\quad{}Calculator\end{@norefs}}
+\tocitem \@locref{htoc6}{\begin{@norefs}\@print{2.4}\quad{}Calculator with Memory\end{@norefs}}
+\end{tocenv}
+\tocitem \@locref{htoc7}{\begin{@norefs}\@print{3}\quad{}Grammars\end{@norefs}}
+\begin{tocenv}
+\tocitem \@locref{htoc8}{\begin{@norefs}\@print{3.1}\quad{}Left Factoring\end{@norefs}}
+\tocitem \@locref{htoc9}{\begin{@norefs}\@print{3.2}\quad{}Left Recursion\end{@norefs}}
+\tocitem \@locref{htoc10}{\begin{@norefs}\@print{3.3}\quad{}Ambiguous Grammars\end{@norefs}}
+\end{tocenv}
+\tocitem \@locref{htoc11}{\begin{@norefs}\@print{4}\quad{}Customization\end{@norefs}}
+\begin{tocenv}
+\tocitem \@locref{htoc12}{\begin{@norefs}\@print{4.1}\quad{}Customizing Parsers\end{@norefs}}
+\tocitem \@locref{htoc13}{\begin{@norefs}\@print{4.2}\quad{}Customizing Scanners\end{@norefs}}
+\end{tocenv}
+\tocitem \@locref{htoc14}{\begin{@norefs}\@print{5}\quad{}Parser Mechanics\end{@norefs}}
+\begin{tocenv}
+\tocitem \@locref{htoc15}{\begin{@norefs}\@print{5.1}\quad{}Parser Objects\end{@norefs}}
+\tocitem \@locref{htoc16}{\begin{@norefs}\@print{5.2}\quad{}Context Sensitive Scanner\end{@norefs}}
+\tocitem \@locref{htoc17}{\begin{@norefs}\@print{5.3}\quad{}Internal Variables\end{@norefs}}
+\tocitem \@locref{htoc18}{\begin{@norefs}\@print{5.4}\quad{}Pre- and Post-Parser Code\end{@norefs}}
+\tocitem \@locref{htoc19}{\begin{@norefs}\@print{5.5}\quad{}Representation of Grammars\end{@norefs}}
+\end{tocenv}
+\tocitem \@locref{htoc20}{\begin{@norefs}\@print{A}\quad{}Grammar for Parsers\end{@norefs}}
+\tocitem \@locref{htoc21}{\begin{@norefs}\@print{B}\quad{}Upgrading\end{@norefs}}
+\tocitem \@locref{htoc22}{\begin{@norefs}\@print{C}\quad{}Troubleshooting\end{@norefs}}
+\tocitem \@locref{htoc23}{\begin{@norefs}\@print{D}\quad{}History\end{@norefs}}
+\tocitem \@locref{htoc24}{\begin{@norefs}\@print{E}\quad{}Debian Extensions\end{@norefs}}
+\tocitem \@locref{htoc25}{\begin{@norefs}\@print{F}\quad{}Future Extensions\end{@norefs}}
+\tocitem \@locref{htoc26}{\begin{@norefs}\@print{G}\quad{}References\end{@norefs}}
+\end{tocenv}
diff --git a/doc/yapps2.tex b/doc/yapps2.tex
new file mode 100644
index 0000000..31b44be
--- /dev/null
+++ b/doc/yapps2.tex
@@ -0,0 +1,1246 @@
+\documentclass[10pt]{article}
+\usepackage{palatino}
+\usepackage{html}
+\usepackage{color}
+
+\setlength{\headsep}{0in}
+\setlength{\headheight}{0in}
+\setlength{\textheight}{8.5in}
+\setlength{\textwidth}{5.9in}
+\setlength{\oddsidemargin}{0.25in}
+
+\definecolor{darkblue}{rgb}{0,0,0.6}
+\definecolor{darkerblue}{rgb}{0,0,0.3}
+
+%% \newcommand{\mysection}[1]{\section{\textcolor{darkblue}{#1}}}
+%% \newcommand{\mysubsection}[1]{\subsection{\textcolor{darkerblue}{#1}}}
+\newcommand{\mysection}[1]{\section{#1}}
+\newcommand{\mysubsection}[1]{\subsection{#1}}
+
+\bodytext{bgcolor=white text=black link=#004080 vlink=#006020}
+
+\newcommand{\first}{\textsc{first}}
+\newcommand{\follow}{\textsc{follow}}
+
+\begin{document}
+
+\begin{center}
+\hfill \begin{tabular}{c}
+{\Large The \emph{Yapps} Parser Generator System}\\
+\verb|http://theory.stanford.edu/~amitp/Yapps/|\\
+                Version 2\\
+\\
+Amit J. Patel\\
+\htmladdnormallink{http://www-cs-students.stanford.edu/~amitp/}
+{http://www-cs-students.stanford.edu/~amitp/}
+
+\end{tabular} \hfill \rule{0in}{0in}
+\end{center}
+
+\mysection{Introduction}
+
+\emph{Yapps} (\underline{Y}et \underline{A}nother \underline{P}ython
+\underline{P}arser \underline{S}ystem) is an easy to use parser
+generator that is written in Python and generates Python code.  There
+are several parser generator systems already available for Python,
+including \texttt{PyLR, kjParsing, PyBison,} and \texttt{mcf.pars,}
+but I had different goals for my parser.  Yapps is simple, is easy to
+use, and produces human-readable parsers.  It is not the fastest or
+most powerful parser.  Yapps is designed to be used when regular
+expressions are not enough and other parser systems are too much:
+situations where you may write your own recursive descent parser.
+
+Some unusual features of Yapps that may be of interest are:
+
+\begin{enumerate}
+   
+ \item Yapps produces recursive descent parsers that are readable by
+   humans, as opposed to table-driven parsers that are difficult to
+   read.  A Yapps parser for a simple calculator looks similar to the
+   one that Mark Lutz wrote by hand for \emph{Programming Python.}
+        
+ \item Yapps also allows for rules that accept parameters and pass
+   arguments to be used while parsing subexpressions.  Grammars that
+   allow for arguments to be passed to subrules and for values to be
+   passed back are often called \emph{attribute grammars.}  In many
+   cases parameterized rules can be used to perform actions at ``parse
+   time'' that are usually delayed until later.  For example,
+   information about variable declarations can be passed into the
+   rules that parse a procedure body, so that undefined variables can
+   be detected at parse time.  The types of defined variables can be
+   used in parsing as well---for example, if the type of {\tt X} is
+   known, we can determine whether {\tt X(1)} is an array reference or 
+   a function call.
+   
+ \item Yapps grammars are fairly easy to write, although there are
+   some inconveniences having to do with ELL(1) parsing that have to be
+   worked around.  For example, rules have to be left factored and
+   rules may not be left recursive.  However, neither limitation seems 
+   to be a problem in practice.  
+   
+   Yapps grammars look similar to the notation used in the Python
+   reference manual, with operators like \verb:*:, \verb:+:, \verb:|:,
+   \verb:[]:, and \verb:(): for patterns, names ({\tt tim}) for rules,
+   regular expressions (\verb:"[a-z]+":) for tokens, and \verb:#: for
+   comments.
+
+ \item The Yapps parser generator is written as a single Python module
+   with no C extensions.  Yapps produces parsers that are written
+   entirely in Python, and require only the Yapps run-time module (5k)
+   for support.
+   
+ \item Yapps's scanner is context-sensitive, picking tokens based on
+   the types of the tokens accepted by the parser.  This can be
+   helpful when implementing certain kinds of parsers, such as for a
+   preprocessor.
+
+\end{enumerate}
+
+There are several disadvantages of using Yapps over another parser system:
+
+\begin{enumerate}
+   
+ \item Yapps parsers are \texttt{ELL(1)} (Extended LL(1)), which is
+   less powerful than \texttt{LALR} (used by \texttt{PyLR}) or
+   \texttt{SLR} (used by \texttt{kjParsing}), so Yapps would not be a
+   good choice for parsing complex languages.  For example, allowing
+   both \texttt{x := 5;} and \texttt{x;} as statements is difficult
+   because we must distinguish based on only one token of lookahead.
+   Seeing only \texttt{x}, we cannot decide whether we have an
+   assignment statement or an expression statement.  (Note however
+   that this kind of grammar can be matched with backtracking; see
+   section \ref{sec:future}.)
+
+ \item The scanner that Yapps provides can only read from strings, not
+   files, so an entire file has to be read in before scanning can
+   begin.  It is possible to build a custom scanner, though, so in
+   cases where stream input is needed (from the console, a network, or
+   a large file are examples), the Yapps parser can be given a custom
+   scanner that reads from a stream instead of a string.
+   
+ \item Yapps is not designed with efficiency in mind.
+
+\end{enumerate}
+
+Yapps provides an easy to use parser generator that produces parsers
+similar to what you might write by hand.  It is not meant to be a
+solution for all parsing problems, but instead an aid for those times
+you would write a parser by hand rather than using one of the more
+powerful parsing packages available.
+
+Yapps 2.0 is easier to use than Yapps 1.0.  New features include a
+less restrictive input syntax, which allows mixing of sequences,
+choices, terminals, and nonterminals; optional matching; the ability
+to insert single-line statements into the generated parser; and
+looping constructs \verb|*| and \verb|+| similar to the repetitive
+matching constructs in regular expressions.  Unfortunately, the
+addition of these constructs has made Yapps 2.0 incompatible with
+Yapps 1.0, so grammars will have to be rewritten.  See section
+\ref{sec:Upgrading} for tips on changing Yapps 1.0 grammars for use
+with Yapps 2.0.
+
+\mysection{Examples}
+
+In this section are several examples that show the use of Yapps.
+First, an introduction shows how to construct grammars and write them
+in Yapps form.  This example can be skipped by someone familiar with
+grammars and parsing.  Next is a Lisp expression grammar that produces
+a parse tree as output.  This example demonstrates the use of tokens
+and rules, as well as returning values from rules.  The third example
+is a expression evaluation grammar that evaluates during parsing
+(instead of producing a parse tree).
+
+\mysubsection{Introduction to Grammars}
+
+A \emph{grammar} for a natural language specifies how words can be put
+together to form large structures, such as phrases and sentences.  A
+grammar for a computer language is similar in that it specifies how
+small components (called \emph{tokens}) can be put together to form
+larger structures.  In this section we will write a grammar for a tiny
+subset of English.
+
+Simple English sentences can be described as being a noun phrase
+followed by a verb followed by a noun phrase.  For example, in the
+sentence, ``Jack sank the blue ship,'' the word ``Jack'' is the first
+noun phrase, ``sank'' is the verb, and ``the blue ship'' is the second
+noun phrase.  In addition we should say what a noun phrase is; for
+this example we shall say that a noun phrase is an optional article
+(a, an, the) followed by any number of adjectives followed by a noun.
+The tokens in our language are the articles, nouns, verbs, and
+adjectives.  The \emph{rules} in our language will tell us how to
+combine the tokens together to form lists of adjectives, noun phrases,
+and sentences:
+
+\begin{itemize}
+  \item \texttt{sentence: noun\_phrase verb noun\_phrase}
+  \item \texttt{noun\_phrase: [article] adjective* noun}
+\end{itemize}
+
+Notice that some things that we said easily in English, such as
+``optional article,'' are expressed using special syntax, such as
+brackets.  When we said, ``any number of adjectives,'' we wrote
+\texttt{adjective*}, where the \texttt{*} means ``zero or more of the
+preceding pattern''.
+
+The grammar given above is close to a Yapps grammar.  We also have to
+specify what the tokens are, and what to do when a pattern is matched.
+For this example, we will do nothing when patterns are matched; the
+next example will explain how to perform match actions.
+
+\begin{verbatim}
+parser TinyEnglish:
+  ignore:          "\\W+"
+  token noun:      "(Jack|spam|ship)"
+  token verb:      "(sank|threw)"
+  token article:   "(a|an|the)"
+  token adjective: "(blue|red|green)"
+
+  rule sentence:       noun_phrase verb noun_phrase
+  rule noun_phrase:    [article] adjective* noun
+\end{verbatim}
+
+The tokens are specified as Python \emph{regular expressions}.  Since
+Yapps produces Python code, you can write any regular expression that
+would be accepted by Python.  (\emph{Note:} These are Python 1.5
+regular expressions from the \texttt{re} module, not Python 1.4
+regular expressions from the \texttt{regex} module.)  In addition to
+tokens that you want to see (which are given names), you can also
+specify tokens to ignore, marked by the \texttt{ignore} keyword.  In
+this parser we want to ignore whitespace.
+
+The TinyEnglish grammar shows how you define tokens and rules, but it
+does not specify what should happen once we've matched the rules.  In
+the next example, we will take a grammar and produce a \emph{parse
+tree} from it.
+
+\mysubsection{Lisp Expressions}
+
+Lisp syntax, although hated by many, has a redeeming quality: it is
+simple to parse.  In this section we will construct a Yapps grammar to
+parse Lisp expressions and produce a parse tree as output.
+
+\subsubsection*{Defining the Grammar}
+
+The syntax of Lisp is simple.  It has expressions, which are
+identifiers, strings, numbers, and lists.  A list is a left
+parenthesis followed by some number of expressions (separated by
+spaces) followed by a right parenthesis.  For example, \verb|5|,
+\verb|"ni"|, and \verb|(print "1+2 = " (+ 1 2))| are Lisp expressions.
+Written as a grammar,
+
+\begin{verbatim}
+    expr:   ID | STR | NUM | list
+    list:   ( expr* )  
+\end{verbatim}
+
+In addition to having a grammar, we need to specify what to do every
+time something is matched.  For the tokens, which are strings, we just
+want to get the ``value'' of the token, attach its type (identifier,
+string, or number) in some way, and return it.  For the lists, we want
+to construct and return a Python list.
+
+Once some pattern is matched, we enclose a return statement enclosed
+in \verb|{{...}}|.  The braces allow us to insert any one-line
+statement into the parser.  Within this statement, we can refer to the
+values returned by matching each part of the rule.  After matching a
+token such as \texttt{ID}, ``ID'' will be bound to the text of the
+matched token.  Let's take a look at the rule:
+
+\begin{verbatim}
+    rule expr: ID   {{ return ('id', ID) }}
+      ...
+\end{verbatim}
+
+In a rule, tokens return the text that was matched.  For identifiers,
+we just return the identifier, along with a ``tag'' telling us that
+this is an identifier and not a string or some other value.  Sometimes
+we may need to convert this text to a different form.  For example, if
+a string is matched, we want to remove quotes and handle special forms
+like \verb|\n|.  If a number is matched, we want to convert it into a
+number.  Let's look at the return values for the other tokens:
+
+\begin{verbatim}
+      ...
+             | STR  {{ return ('str', eval(STR)) }}
+             | NUM  {{ return ('num', atoi(NUM)) }}
+      ...
+\end{verbatim}
+
+If we get a string, we want to remove the quotes and process any
+special backslash codes, so we run \texttt{eval} on the quoted string.
+If we get a number, we convert it to an integer with \texttt{atoi} and
+then return the number along with its type tag.
+
+For matching a list, we need to do something slightly more
+complicated.  If we match a Lisp list of expressions, we want to
+create a Python list with those values.
+
+\begin{verbatim}
+    rule list: "\\("                 # Match the opening parenthesis
+               {{ result = [] }}     # Create a Python list
+               ( 
+                  expr               # When we match an expression,
+                  {{ result.append(expr) }}   # add it to the list
+               )*                    # * means repeat this if needed
+               "\\)"                 # Match the closing parenthesis
+               {{ return result }}   # Return the Python list
+\end{verbatim}
+
+In this rule we first match the opening parenthesis, then go into a
+loop.  In this loop we match expressions and add them to the list.
+When there are no more expressions to match, we match the closing
+parenthesis and return the resulting.  Note that \verb:#: is used for
+comments, just as in Python.
+
+The complete grammar is specified as follows:
+\begin{verbatim}
+parser Lisp:
+    ignore:      '\\s+'
+    token NUM:   '[0-9]+'
+    token ID:    '[-+*/!@%^&=.a-zA-Z0-9_]+' 
+    token STR:   '"([^\\"]+|\\\\.)*"'
+
+    rule expr:   ID     {{ return ('id', ID) }}
+               | STR    {{ return ('str', eval(STR)) }}
+               | NUM    {{ return ('num', atoi(NUM)) }}
+               | list   {{ return list }}
+    rule list: "\\("    {{ result = [] }} 
+               ( expr   {{ result.append(expr) }}
+               )*  
+               "\\)"    {{ return result }} 
+\end{verbatim}
+
+One thing you may have noticed is that \verb|"\\("| and \verb|"\\)"|
+appear in the \texttt{list} rule.  These are \emph{inline tokens}:
+they appear in the rules without being given a name with the
+\texttt{token} keyword.  Inline tokens are more convenient to use, but
+since they do not have a name, the text that is matched cannot be used
+in the return value.  They are best used for short simple patterns
+(usually punctuation or keywords).
+
+Another thing to notice is that the number and identifier tokens
+overlap.  For example, ``487'' matches both NUM and ID.  In Yapps, the
+scanner only tries to match tokens that are acceptable to the parser.
+This rule doesn't help here, since both NUM and ID can appear in the
+same place in the grammar.  There are two rules used to pick tokens if
+more than one matches.  One is that the \emph{longest} match is
+preferred.  For example, ``487x'' will match as an ID (487x) rather
+than as a NUM (487) followed by an ID (x).  The second rule is that if
+the two matches are the same length, the \emph{first} one listed in
+the grammar is preferred.  For example, ``487'' will match as an NUM
+rather than an ID because NUM is listed first in the grammar.  Inline
+tokens have preference over any tokens you have listed.
+
+Now that our grammar is defined, we can run Yapps to produce a parser,
+and then run the parser to produce a parse tree.
+
+\subsubsection*{Running Yapps}
+
+In the Yapps module is a function \texttt{generate} that takes an
+input filename and writes a parser to another file.  We can use this
+function to generate the Lisp parser, which is assumed to be in
+\texttt{lisp.g}.
+
+\begin{verbatim}
+% python
+Python 1.5.1 (#1, Sep  3 1998, 22:51:17)  [GCC 2.7.2.3] on linux-i386
+Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
+>>> import yapps
+>>> yapps.generate('lisp.g')
+\end{verbatim}
+
+At this point, Yapps has written a file \texttt{lisp.py} that contains
+the parser.  In that file are two classes (one scanner and one parser)
+and a function (called \texttt{parse}) that puts things together for
+you.
+
+Alternatively, we can run Yapps from the command line to generate the
+parser file:
+
+\begin{verbatim}
+% python yapps.py lisp.g
+\end{verbatim}
+
+After running Yapps either from within Python or from the command
+line, we can use the Lisp parser by calling the \texttt{parse}
+function.  The first parameter should be the rule we want to match,
+and the second parameter should be the string to parse.
+
+\begin{verbatim}
+>>> import lisp
+>>> lisp.parse('expr', '(+ 3 4)')
+[('id', '+'), ('num', 3), ('num', 4)]
+>>> lisp.parse('expr', '(print "3 = " (+ 1 2))')
+[('id', 'print'), ('str', '3 = '), [('id', '+'), ('num', 1), ('num', 2)]]
+\end{verbatim}
+
+The \texttt{parse} function is not the only way to use the parser;
+section \ref{sec:Parser-Objects} describes how to access parser objects
+directly.
+
+We've now gone through the steps in creating a grammar, writing a
+grammar file for Yapps, producing a parser, and using the parser.  In
+the next example we'll see how rules can take parameters and also how
+to do computations instead of just returning a parse tree.
+
+\mysubsection{Calculator}
+
+A common example parser given in many textbooks is that for simple
+expressions, with numbers, addition, subtraction, multiplication,
+division, and parenthesization of subexpressions.  We'll write this
+example in Yapps, evaluating the expression as we parse.
+
+Unlike \texttt{yacc}, Yapps does not have any way to specify
+precedence rules, so we have to do it ourselves.  We say that an
+expression is the sum of terms, and that a term is the product of
+factors, and that a factor is a number or a parenthesized expression:
+
+\begin{verbatim}
+    expr:           factor ( ("+"|"-") factor )*
+    factor:         term   ( ("*"|"/") term )*
+    term:           NUM | "(" expr ")"
+\end{verbatim}
+
+In order to evaluate the expression as we go, we should keep along an
+accumulator while evaluating the lists of terms or factors.  Just as
+we kept a ``result'' variable to build a parse tree for Lisp
+expressions, we will use a variable to evaluate numerical
+expressions.  The full grammar is given below:
+
+\begin{verbatim}
+parser Calculator:
+    token END: "$"         # $ means end of string
+    token NUM: "[0-9]+"
+
+    rule goal:           expr END         {{ return expr }}
+
+    # An expression is the sum and difference of factors
+    rule expr:           factor           {{ v = factor }}
+                       ( "[+]" factor       {{ v = v+factor }}
+                       |  "-"  factor       {{ v = v-factor }}
+                       )*                 {{ return v }}
+
+    # A factor is the product and division of terms
+    rule factor:         term             {{ v = term }}
+                       ( "[*]" term         {{ v = v*term }}
+                       |  "/"  term         {{ v = v/term }}
+                       )*                 {{ return v }}
+
+    # A term is either a number or an expression surrounded by parentheses
+    rule term:           NUM              {{ return atoi(NUM) }}
+                       | "\\(" expr "\\)" {{ return expr }}
+\end{verbatim}
+
+The top-level rule is \emph{goal}, which says that we are looking for
+an expression followed by the end of the string.  The \texttt{END}
+token is needed because without it, it isn't clear when to stop
+parsing.  For example, the string ``1+3'' could be parsed either as
+the expression ``1'' followed by the string ``+3'' or it could be
+parsed as the expression ``1+3''.  By requiring expressions to end
+with \texttt{END}, the parser is forced to take ``1+3''.
+
+In the two rules with repetition, the accumulator is named \texttt{v}.
+After reading in one expression, we initialize the accumulator.  Each
+time through the loop, we modify the accumulator by adding,
+subtracting, multiplying by, or dividing the previous accumulator by
+the expression that has been parsed.  At the end of the rule, we
+return the accumulator.
+
+The calculator example shows how to process lists of elements using
+loops, as well as how to handle precedence of operators.
+
+\emph{Note:} It's often important to put the \texttt{END} token in, so 
+put it in unless you are sure that your grammar has some other
+non-ambiguous token marking the end of the program.
+
+\mysubsection{Calculator with Memory}
+
+In the previous example we learned how to write a calculator that
+evaluates simple numerical expressions.  In this section we will
+extend the example to support both local and global variables.
+
+To support global variables, we will add assignment statements to the
+``goal'' rule.
+
+\begin{verbatim}
+    rule goal:           expr END         {{ return expr }}
+              | 'set' ID expr END         {{ global_vars[ID] = expr }}
+                                          {{ return expr }}   
+\end{verbatim}
+
+To use these variables, we need a new kind of terminal:
+
+\begin{verbatim}
+    rule term: ... | ID {{ return global_vars[ID] }} 
+\end{verbatim}
+
+So far, these changes are straightforward.  We simply have a global
+dictionary \texttt{global\_vars} that stores the variables and values, 
+we modify it when there is an assignment statement, and we look up
+variables in it when we see a variable name.
+
+To support local variables, we will add variable declarations to the
+set of allowed expressions.
+
+\begin{verbatim}
+    rule term: ... | 'let' VAR '=' expr 'in' expr ...
+\end{verbatim}
+
+This is where it becomes tricky.  Local variables should be stored in
+a local dictionary, not in the global one.  One trick would be to save 
+a copy of the global dictionary, modify it, and then restore it
+later.  In this example we will instead use \emph{attributes} to
+create local information and pass it to subrules.
+
+A rule can optionally take parameters.  When we invoke the rule, we
+must pass in arguments.  For local variables, let's use a single
+parameter, \texttt{local\_vars}:
+
+\begin{verbatim}
+    rule expr<<local_vars>>:   ...
+    rule factor<<local_vars>>: ...
+    rule term<<local_vars>>:   ...
+\end{verbatim}
+
+Each time we want to match \texttt{expr}, \texttt{factor}, or
+\texttt{term}, we will pass the local variables in the current rule to
+the subrule.  One interesting case is when we pass as an argument
+something \emph{other} than \texttt{local\_vars}:
+
+\begin{verbatim}
+   rule term<<local_vars>>: ...
+                | 'let' VAR '=' expr<<local_vars>>
+                  {{ local_vars = [(VAR, expr)] + local_vars }}
+                  'in' expr<<local_vars>>
+                  {{ return expr }}
+\end{verbatim}
+
+Note that the assignment to the local variables list does not modify
+the original list.  This is important to keep local variables from
+being seen outside the ``let''.
+
+The other interesting case is when we find a variable:
+
+\begin{verbatim}
+global_vars = {}
+
+def lookup(map, name):
+    for x,v in map:  if x==name: return v
+    return global_vars[name]
+%%
+   ...
+   rule term<<local_vars>: ...
+                | VAR {{ return lookup(local_vars, VAR) }}
+\end{verbatim}
+
+The lookup function will search through the local variable list, and
+if it cannot find the name there, it will look it up in the global
+variable dictionary.
+
+A complete grammar for this example, including a read-eval-print loop
+for interacting with the calculator, can be found in the examples
+subdirectory included with Yapps.
+
+In this section we saw how to insert code before the parser.  We also
+saw how to use attributes to transmit local information from one rule
+to its subrules.
+
+\mysection{Grammars}
+
+Each Yapps grammar has a name, a list of tokens, and a set of
+production rules.  A grammar named \texttt{X} will be used to produce
+a parser named \texttt{X} and a scanner anmed \texttt{XScanner}.  As
+in Python, names are case sensitive, start with a letter, and contain
+letters, numbers, and underscores (\_).
+
+There are three kinds of tokens in Yapps: named, inline, and ignored.
+As their name implies, named tokens are given a name, using the token
+construct: \texttt{token \emph{name} : \emph{regexp}}.  In a rule, the
+token can be matched by using the name.  Inline tokens are regular
+expressions that are used in rules without being declared.  Ignored
+tokens are declared using the ignore construct: \texttt{ignore:
+  \emph{regexp}}.  These tokens are ignored by the scanner, and are
+not seen by the parser.  Often whitespace is an ignored token.  The
+regular expressions used to define tokens should use the syntax
+defined in the \texttt{re} module, so some symbols may have to be
+backslashed.
+
+Production rules in Yapps have a name and a pattern to match.  If the
+rule is parameterized, the name should be followed by a list of
+parameter names in \verb|<<...>>|.  A pattern can be a simple pattern
+or a compound pattern.  Simple patterns are the name of a named token,
+a regular expression in quotes (inline token), the name of a
+production rule (followed by arguments in \verb|<<...>>|, if the rule
+has parameters), and single line Python statements (\verb|{{...}}|).
+Compound patterns are sequences (\verb|A B C ...|), choices (
+\verb:A | B | C | ...:), options (\verb|[...]|), zero-or-more repetitions
+(\verb|...*|), and one-or-more repetitions (\verb|...+|).  Like
+regular expressions, repetition operators have a higher precedence
+than sequences, and sequences have a higher precedence than choices.
+
+Whenever \verb|{{...}}| is used, a legal one-line Python statement
+should be put inside the braces.  The token \verb|}}| should not
+appear within the \verb|{{...}}| section, even within a string, since
+Yapps does not attempt to parse the Python statement.  A workaround
+for strings is to put two strings together (\verb|"}" "}"|), or to use
+backslashes (\verb|"}\}"|).  At the end of a rule you should use a
+\verb|{{ return X }}| statement to return a value.  However, you
+should \emph{not} use any control statements (\texttt{return},
+\texttt{continue}, \texttt{break}) in the middle of a rule.  Yapps
+needs to make assumptions about the control flow to generate a parser,
+and any changes to the control flow will confuse Yapps.
+
+The \verb|<<...>>| form can occur in two places: to define parameters
+to a rule and to give arguments when matching a rule.  Parameters use
+the syntax used for Python functions, so they can include default
+arguments and the special forms (\verb|*args| and \verb|**kwargs|).
+Arguments use the syntax for Python function call arguments, so they
+can include normal arguments and keyword arguments.  The token
+\verb|>>| should not appear within the \verb|<<...>>| section.
+
+In both the statements and rule arguments, you can use names defined
+by the parser to refer to matched patterns.  You can refer to the text
+matched by a named token by using the token name.  You can use the
+value returned by a production rule by using the name of that rule.
+If a name \texttt{X} is matched more than once (such as in loops), you
+will have to save the earlier value(s) in a temporary variable, and
+then use that temporary variable in the return value.  The next
+section has an example of a name that occurs more than once.
+
+\mysubsection{Left Factoring}
+\label{sec:Left-Factoring}
+
+Yapps produces ELL(1) parsers, which determine which clause to match
+based on the first token available.  Sometimes the leftmost tokens of
+several clauses may be the same.  The classic example is the
+\emph{if/then/else} construct in Pascal:
+
+\begin{verbatim}
+rule stmt:  "if" expr "then" stmt {{ then_part = stmt }} 
+                      "else" stmt {{ return ('If',expr,then_part,stmt) }}
+          | "if" expr "then" stmt {{ return ('If',expr,stmt,[]) }}
+\end{verbatim}
+
+(Note that we have to save the first \texttt{stmt} into a variable
+because there is another \texttt{stmt} that will be matched.)  The
+left portions of the two clauses are the same, which presents a
+problem for the parser.  The solution is \emph{left-factoring}: the
+common parts are put together, and \emph{then} a choice is made about
+the remaining part:
+
+\begin{verbatim}
+rule stmt:  "if" expr 
+              "then" stmt {{ then_part = stmt }}
+              {{ else_part = [] }}
+              [ "else" stmt {{ else_part = stmt }} ]
+            {{ return ('If', expr, then_part, else_part) }}
+\end{verbatim}
+
+Unfortunately, the classic \emph{if/then/else} situation is
+\emph{still} ambiguous when you left-factor.  Yapps can deal with this
+situation, but will report a warning; see section
+\ref{sec:Ambiguous-Grammars} for details.
+
+In general, replace rules of the form:
+
+\begin{verbatim}
+rule A:   a b1 {{ return E1 }}
+        | a b2 {{ return E2 }}
+        | c3   {{ return E3 }}
+        | c4   {{ return E4 }}
+\end{verbatim}
+
+with rules of the form:
+
+\begin{verbatim}
+rule A:   a ( b1 {{ return E1 }}
+            | b2 {{ return E2 }}
+            )
+        | c3   {{ return E3 }}
+        | c4   {{ return E4 }}
+\end{verbatim}
+
+\mysubsection{Left Recursion}
+
+A common construct in grammars is for matching a list of patterns,
+sometimes separated with delimiters such as commas or semicolons.  In
+LR-based parser systems, we can parse a list with something like this:
+
+\begin{verbatim}
+rule sum:  NUM             {{ return NUM }}
+         | sum "+" NUM     {{ return (sum, NUM) }}
+\end{verbatim}
+
+Parsing \texttt{1+2+3+4} would produce the output
+\texttt{(((1,2),3),4)}, which is what we want from a left-associative
+addition operator.  Unfortunately, this grammar is \emph{left
+recursive,} because the \texttt{sum} rule contains a clause that
+begins with \texttt{sum}.  (The recursion occurs at the left side of
+the clause.)
+
+We must restructure this grammar to be \emph{right recursive} instead:
+
+\begin{verbatim}
+rule sum:  NUM             {{ return NUM }}
+         | NUM "+" sum     {{ return (NUM, sum) }}
+\end{verbatim}
+
+Unfortunately, using this grammar, \texttt{1+2+3+4} would be parsed as
+\texttt{(1,(2,(3,4)))}, which no longer follows left associativity.
+The rule also needs to be left-factored.  Instead, we write the
+pattern as a loop instead:
+
+\begin{verbatim}
+rule sum:       NUM {{ v = NUM }}
+                ( "[+]" NUM {{ v = (v,NUM) }} )*
+                {{ return v }}
+\end{verbatim}
+
+In general, replace rules of the form:
+
+\begin{verbatim}
+rule A:  A a1 -> << E1 >> 
+       | A a2 -> << E2 >>
+       | b3   -> << E3 >>
+       | b4   -> << E4 >>
+\end{verbatim}
+
+with rules of the form:
+
+\begin{verbatim}
+rule A:  ( b3 {{ A = E3 }} 
+         | b4 {{ A = E4 }} )
+         ( a1 {{ A = E1 }}
+         | a2 {{ A = E2 }} )*
+         {{ return A }}
+\end{verbatim}
+
+We have taken a rule that proved problematic for with recursion and
+turned it into a rule that works well with looping constructs.
+
+\mysubsection{Ambiguous Grammars}
+\label{sec:Ambiguous-Grammars}
+
+In section \ref{sec:Left-Factoring} we saw the classic if/then/else
+ambiguity, which occurs because the ``else \ldots'' portion of an ``if
+\ldots then \ldots else \ldots'' construct is optional.  Programs with 
+nested if/then/else constructs can be ambiguous when one of the else
+clauses is missing:
+\begin{verbatim}
+if 1 then            if 1 then
+    if 5 then            if 5 then
+        x := 1;              x := 1;
+    else             else
+        y := 9;          y := 9;
+\end{verbatim}
+
+The indentation shows that the program can be parsed in two different
+ways.  (Of course, if we all would adopt Python's indentation-based
+structuring, this would never happen!)  Usually we want the parsing on
+the left: the ``else'' should be associated with the closest ``if''
+statement.  In section \ref{sec:Left-Factoring} we ``solved'' the
+problem by using the following grammar:
+
+\begin{verbatim}
+rule stmt:  "if" expr 
+              "then" stmt {{ then_part = stmt }}
+              {{ else_part = [] }}
+              [ "else" stmt {{ else_part = stmt }} ]
+            {{ return ('If', expr, then_part, else_part) }}
+\end{verbatim}
+
+Here, we have an optional match of ``else'' followed by a statement.
+The ambiguity is that if an ``else'' is present, it is not clear
+whether you want it parsed immediately or if you want it to be parsed
+by the outer ``if''.
+
+Yapps will deal with the situation by matching when the else pattern
+when it can.  The parser will work in this case because it prefers the
+\emph{first} matching clause, which tells Yapps to parse the ``else''.
+That is exactly what we want!
+
+For ambiguity cases with choices, Yapps will choose the \emph{first}
+matching choice.  However, remember that Yapps only looks at the first 
+token to determine its decision, so {\tt (a b | a c)} will result in
+Yapps choosing {\tt a b} even when the input is {\tt a c}.  It only
+looks at the first token, {\tt a}, to make its decision.
+
+\mysection{Customization}
+
+Both the parsers and the scanners can be customized.  The parser is
+usually extended by subclassing, and the scanner can either be
+subclassed or completely replaced.
+
+\mysubsection{Customizing Parsers}
+
+If additional fields and methods are needed in order for a parser to
+work, Python subclassing can be used.  (This is unlike parser classes
+written in static languages, in which these fields and methods must be
+defined in the generated parser class.)  We simply subclass the
+generated parser, and add any fields or methods required.  Expressions
+in the grammar can call methods of the subclass to perform any actions
+that cannot be expressed as a simple expression.  For example,
+consider this simple grammar:
+
+\begin{verbatim}
+parser X:
+    rule goal:  "something"  {{ self.printmsg() }}
+\end{verbatim}
+
+The \texttt{printmsg} function need not be implemented in the parser
+class \texttt{X}; it can be implemented in a subclass:
+
+\begin{verbatim}
+import Xparser
+
+class MyX(Xparser.X):
+    def printmsg(self):
+        print "Hello!"
+\end{verbatim}
+
+\mysubsection{Customizing Scanners}
+
+The generated parser class is not dependent on the generated scanner
+class.  A scanner object is passed to the parser object's constructor
+in the \texttt{parse} function.  To use a different scanner, write
+your own function to construct parser objects, with an instance of a
+different scanner.  Scanner objects must have a \texttt{token} method
+that accepts an integer \texttt{N} as well as a list of allowed token
+types, and returns the Nth token, as a tuple.  The default scanner
+raises \texttt{NoMoreTokens} if no tokens are available, and
+\texttt{SyntaxError} if no token could be matched.  However, the
+parser does not rely on these exceptions; only the \texttt{parse}
+convenience function (which calls \texttt{wrap\_error\_reporter}) and
+the \texttt{print\_error} error display function use those exceptions.
+
+The tuples representing tokens have four elements.  The first two are
+the beginning and ending indices of the matched text in the input
+string.  The third element is the type tag, matching either the name
+of a named token or the quoted regexp of an inline or ignored token.
+The fourth element of the token tuple is the matched text.  If the
+input string is \texttt{s}, and the token tuple is
+\texttt{(b,e,type,val)}, then \texttt{val} should be equal to
+\texttt{s[b:e]}.
+
+The generated parsers do not the beginning or ending index.  They use
+only the token type and value.  However, the default error reporter
+uses the beginning and ending index to show the user where the error
+is.
+
+\mysection{Parser Mechanics}
+
+The base parser class (Parser) defines two methods, \texttt{\_scan}
+and \texttt{\_peek}, and two fields, \texttt{\_pos} and
+\texttt{\_scanner}.  The generated parser inherits from the base
+parser, and contains one method for each rule in the grammar.  To
+avoid name clashes, do not use names that begin with an underscore
+(\texttt{\_}).
+
+\mysubsection{Parser Objects}
+\label{sec:Parser-Objects}
+
+Yapps produces as output two exception classes, a scanner class, a
+parser class, and a function \texttt{parse} that puts everything
+together.  The \texttt{parse} function does not have to be used;
+instead, one can create a parser and scanner object and use them
+together for parsing.
+
+\begin{verbatim}
+    def parse(rule, text):
+        P = X(XScanner(text))
+        return wrap_error_reporter(P, rule)
+\end{verbatim}
+
+The \texttt{parse} function takes a name of a rule and an input string
+as input.  It creates a scanner and parser object, then calls
+\texttt{wrap\_error\_reporter} to execute the method in the parser
+object named \texttt{rule}.  The wrapper function will call the
+appropriate parser rule and report any parsing errors to standard
+output.
+
+There are several situations in which the \texttt{parse} function
+would not be useful.  If a different parser or scanner is being used,
+or exceptions are to be handled differently, a new \texttt{parse}
+function would be required.  The supplied \texttt{parse} function can
+be used as a template for writing a function for your own needs.  An
+example of a custom parse function is the \texttt{generate} function
+in \texttt{Yapps.py}.
+
+\mysubsection{Context Sensitive Scanner}
+
+Unlike most scanners, the scanner produced by Yapps can take into
+account the context in which tokens are needed, and try to match only
+good tokens.  For example, in the grammar:
+
+\begin{verbatim}
+parser IniFile:
+   token ID:   "[a-zA-Z_0-9]+"
+   token VAL:  ".*"
+
+   rule pair:  ID "[ \t]*=[ \t]*" VAL "\n"
+\end{verbatim}
+
+we would like to scan lines of text and pick out a name/value pair.
+In a conventional scanner, the input string \texttt{shell=progman.exe}
+would be turned into a single token of type \texttt{VAL}.  The Yapps
+scanner, however, knows that at the beginning of the line, an
+\texttt{ID} is expected, so it will return \texttt{"shell"} as a token
+of type \texttt{ID}.  Later, it will return \texttt{"progman.exe"} as
+a token of type \texttt{VAL}.
+
+Context sensitivity decreases the separation between scanner and
+parser, but it is useful in parsers like \texttt{IniFile}, where the
+tokens themselves are not unambiguous, but \emph{are} unambiguous
+given a particular stage in the parsing process.
+
+Unfortunately, context sensitivity can make it more difficult to
+detect errors in the input.  For example, in parsing a Pascal-like
+language with ``begin'' and ``end'' as keywords, a context sensitive
+scanner would only match ``end'' as the END token if the parser is in
+a place that will accept the END token.  If not, then the scanner
+would match ``end'' as an identifier.  To disable the context
+sensitive scanner in Yapps, add the
+\texttt{context-insensitive-scanner} option to the grammar:
+
+\begin{verbatim}
+Parser X:
+    option:  "context-insensitive-scanner"
+\end{verbatim}
+
+Context-insensitive scanning makes the parser look cleaner as well.
+
+\mysubsection{Internal Variables}
+
+There are two internal fields that may be of use.  The parser object
+has two fields, \texttt{\_pos}, which is the index of the current
+token being matched, and \texttt{\_scanner}, which is the scanner
+object.  The token itself can be retrieved by accessing the scanner
+object and calling the \texttt{token} method with the token index.  However, if you call \texttt{token} before the token has been requested by the parser, it may mess up a context-sensitive scanner.\footnote{When using a context-sensitive scanner, the parser tells the scanner what the valid token types are at each point.  If you call \texttt{token} before the parser can tell the scanner the valid token types, the scanner will attempt to match without considering the context.}  A
+potentially useful combination of these fields is to extract the
+portion of the input matched by the current rule.  To do this, just save the scanner state (\texttt{\_scanner.pos}) before the text is matched and then again after the text is matched:
+
+\begin{verbatim}
+  rule R: 
+      {{ start = self._scanner.pos }}
+      a b c 
+      {{ end = self._scanner.pos }}
+      {{ print 'Text is', self._scanner.input[start:end] }}
+\end{verbatim}
+
+\mysubsection{Pre- and Post-Parser Code}
+
+Sometimes the parser code needs to rely on helper variables,
+functions, and classes.  A Yapps grammar can optionally be surrounded
+by double percent signs, to separate the grammar from Python code.
+
+\begin{verbatim}
+... Python code ...
+%%
+... Yapps grammar ...
+%%
+... Python code ...
+\end{verbatim}
+
+The second \verb|%%| can be omitted if there is no Python code at the
+end, and the first \verb|%%| can be omitted if there is no extra
+Python code at all.  (To have code only at the end, both separators
+are required.)
+
+If the second \verb|%%| is omitted, Yapps will insert testing code
+that allows you to use the generated parser to parse a file.
+
+The extended calculator example in the Yapps examples subdirectory
+includes both pre-parser and post-parser code.
+
+\mysubsection{Representation of Grammars}
+
+For each kind of pattern there is a class derived from Pattern.  Yapps 
+has classes for Terminal, NonTerminal, Sequence, Choice, Option, Plus, 
+Star, and Eval.  Each of these classes has the following interface:
+
+\begin{itemize}
+ \item[setup(\emph{gen})] Set accepts-$\epsilon$, and call
+   \emph{gen.changed()} if it changed.  This function can change the
+   flag from false to true but \emph{not} from true to false.
+ \item[update(\emph(gen))] Set \first and \follow, and call
+   \emph{gen.changed()} if either changed.  This function can add to
+   the sets but \emph{not} remove from them.
+ \item[output(\emph{gen}, \emph{indent})] Generate code for matching
+   this rule, using \emph{indent} as the current indentation level.
+   Writes are performed using \emph{gen.write}.
+ \item[used(\emph{vars})] Given a list of variables \emph{vars},
+   return two lists: one containing the variables that are used, and
+   one containing the variables that are assigned.  This function is
+   used for optimizing the resulting code.
+\end{itemize}
+
+Both \emph{setup} and \emph{update} monotonically increase the
+variables they modify.  Since the variables can only increase a finite
+number of times, we can repeatedly call the function until the
+variable stabilized.  The \emph{used} function is not currently
+implemented.
+
+With each pattern in the grammar Yapps associates three pieces of
+information: the \first set, the \follow set, and the
+accepts-$\epsilon$ flag.
+
+The \first set contains the tokens that can appear as we start
+matching the pattern.  The \follow set contains the tokens that can
+appear immediately after we match the pattern.  The accepts-$\epsilon$ 
+flag is true if the pattern can match no tokens.  In this case, \first 
+will contain all the elements in \follow.  The \follow set is not
+needed when accepts-$\epsilon$ is false, and may not be accurate in
+those cases.
+
+Yapps does not compute these sets precisely.  Its approximation can
+miss certain cases, such as this one:
+
+\begin{verbatim}
+  rule C: ( A* | B )
+  rule B: C [A]
+\end{verbatim}
+
+Yapps will calculate {\tt C}'s \follow set to include {\tt A}.
+However, {\tt C} will always match all the {\tt A}'s, so {\tt A} will
+never follow it.  Yapps 2.0 does not properly handle this construct,
+but if it seems important, I may add support for it in a future
+version.
+
+Yapps also cannot handle constructs that depend on the calling
+sequence.  For example:
+
+\begin{verbatim}
+  rule R: U | 'b'
+  rule S:   | 'c'
+  rule T: S 'b'
+  rule U: S 'a'
+\end{verbatim}
+
+The \follow set for {\tt S} includes {\tt a} and {\tt b}.  Since {\tt
+  S} can be empty, the \first set for {\tt S} should include {\tt a},
+{\tt b}, and {\tt c}.  However, when parsing {\tt R}, if the lookahead
+is {\tt b} we should \emph{not} parse {\tt U}.  That's because in {\tt
+  U}, {\tt S} is followed by {\tt a} and not {\tt b}.  Therefore in
+{\tt R}, we should choose rule {\tt U} only if there is an {\tt a} or
+{\tt c}, but not if there is a {\tt b}.  Yapps and many other LL(1)
+systems do not distinguish {\tt S b} and {\tt S a}, making {\tt
+  S}'s \follow set {\tt a, b}, and making {\tt R} always try to match
+{\tt U}.  In this case we can solve the problem by changing {\tt R} to 
+\verb:'b' | U: but it may not always be possible to solve all such
+problems in this way.
+
+\appendix
+
+\mysection{Grammar for Parsers}
+
+This is the grammar for parsers, without any Python code mixed in.
+The complete grammar can be found in \texttt{parsedesc.g} in the Yapps
+distribution.
+
+\begin{verbatim}
+parser ParserDescription:
+    ignore:      "\\s+"
+    ignore:      "#.*?\r?\n"
+    token END:   "$"  # $ means end of string
+    token ATTR:  "<<.+?>>"
+    token STMT:  "{{.+?}}"
+    token ID:    '[a-zA-Z_][a-zA-Z_0-9]*'
+    token STR:   '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'
+
+    rule Parser: "parser" ID ":"
+                   Options
+                   Tokens
+                   Rules
+                 END 
+
+    rule Options:  ( "option" ":" STR )*
+    rule Tokens:   ( "token" ID ":" STR | "ignore"   ":" STR )*
+    rule Rules:    ( "rule" ID OptParam ":" ClauseA )*
+
+    rule ClauseA:  ClauseB ( '[|]' ClauseB )*
+    rule ClauseB:  ClauseC*
+    rule ClauseC:  ClauseD [ '[+]' | '[*]' ]
+    rule ClauseD:  STR | ID [ATTR] | STMT
+                 | '\\(' ClauseA '\\) | '\\[' ClauseA '\\]'
+\end{verbatim}
+
+\mysection{Upgrading}
+
+Yapps 2.0 is not backwards compatible with Yapps 1.0.  In this section 
+are some tips for upgrading:
+
+\begin{enumerate}
+ \item Yapps 1.0 was distributed as a single file.  Yapps 2.0 is
+   instead distributed as two Python files: a \emph{parser generator}
+   (26k) and a \emph{parser runtime} (5k).  You need both files to
+   create parsers, but you need only the runtime (\texttt{yappsrt.py})
+   to use the parsers.
+   
+ \item Yapps 1.0 supported Python 1.4 regular expressions from the
+   \texttt{regex} module.  Yapps 2.0 uses Python 1.5 regular
+   expressions from the \texttt{re} module.  \emph{The new syntax for
+     regular expressions is not compatible with the old syntax.}
+   Andrew Kuchling has a \htmladdnormallink{guide to converting
+     regular
+     expressions}{http://www.python.org/doc/howto/regex-to-re/} on his
+   web page.
+   
+ \item Yapps 1.0 wants a pattern and then a return value in \verb|->|
+   \verb|<<...>>|.  Yapps 2.0 allows patterns and Python statements to
+   be mixed.  To convert a rule like this:
+
+\begin{verbatim}
+rule R: A B C -> << E1 >>
+      | X Y Z -> << E2 >>
+\end{verbatim}
+   
+   to Yapps 2.0 form, replace the return value specifiers with return
+   statements:
+
+\begin{verbatim}
+rule R: A B C {{ return E1 }}
+      | X Y Z {{ return E2 }}
+\end{verbatim}
+   
+ \item Yapps 2.0 does not perform tail recursion elimination.  This
+   means any recursive rules you write will be turned into recursive
+   methods in the parser.  The parser will work, but may be slower.
+   It can be made faster by rewriting recursive rules, using instead
+   the looping operators \verb|*| and \verb|+| provided in Yapps 2.0.
+
+\end{enumerate}
+
+\mysection{Troubleshooting}
+
+\begin{itemize}
+ \item A common error is to write a grammar that doesn't have an END
+   token.  End tokens are needed when it is not clear when to stop
+   parsing.  For example, when parsing the expression {\tt 3+5}, it is 
+   not clear after reading {\tt 3} whether to treat it as a complete
+   expression or whether the parser should continue reading.
+   Therefore the grammar for numeric expressions should include an end 
+   token.  Another example is the grammar for Lisp expressions.  In
+   Lisp, it is always clear when you should stop parsing, so you do
+   \emph{not} need an end token.  In fact, it may be more useful not
+   to have an end token, so that you can read in several Lisp expressions.
+ \item If there is a chance of ambiguity, make sure to put the choices 
+   in the order you want them checked.  Usually the most specific
+   choice should be first.  Empty sequences should usually be last.
+ \item The context sensitive scanner is not appropriate for all
+   grammars.  You might try using the insensitive scanner with the
+   {\tt context-insensitive-scanner} option in the grammar.
+ \item If performance turns out to be a problem, try writing a custom
+   scanner.  The Yapps scanner is rather slow (but flexible and easy
+   to understand).
+\end{itemize}
+
+\mysection{History}
+
+Yapps 1 had several limitations that bothered me while writing
+parsers:
+
+\begin{enumerate}
+ \item It was not possible to insert statements into the generated
+   parser.  A common workaround was to write an auxilliary function
+   that executed those statements, and to call that function as part
+   of the return value calculation.  For example, several of my
+   parsers had an ``append(x,y)'' function that existed solely to call 
+   ``x.append(y)''.
+ \item The way in which grammars were specified was rather
+   restrictive: a rule was a choice of clauses.  Each clause was a
+   sequence of tokens and rule names, followed by a return value.
+ \item Optional matching had to be put into a separate rule because
+   choices were only made at the beginning of a rule.
+ \item Repetition had to be specified in terms of recursion.  Not only 
+   was this awkward (sometimes requiring additional rules), I had to
+   add a tail recursion optimization to Yapps to transform the
+   recursion back into a loop.
+\end{enumerate}
+
+Yapps 2 addresses each of these limitations.
+
+\begin{enumerate}
+ \item Statements can occur anywhere within a rule.  (However, only
+   one-line statements are allowed; multiline blocks marked by
+   indentation are not.)
+ \item Grammars can be specified using any mix of sequences, choices,
+   tokens, and rule names.  To allow for complex structures,
+   parentheses can be used for grouping.
+ \item Given choices and parenthesization, optional matching can be
+   expressed as a choice between some pattern and nothing.  In
+   addition, Yapps 2 has the convenience syntax \verb|[A B ...]| for
+   matching \verb|A B ...| optionally.
+ \item Repetition operators \verb|*| for zero or more and \verb|+| for 
+   one or more make it easy to specify repeating patterns.
+\end{enumerate}
+
+It is my hope that Yapps 2 will be flexible enough to meet my needs
+for another year, yet simple enough that I do not hesitate to use it.
+
+\mysection{Debian Extensions}
+\label{sec:debian}
+
+The Debian version adds the following enhancements to the original
+Yapps code. They were written by Matthias Urlichs.
+
+\begin{enumerate}
+ \item Yapps can stack input sources ("include files"). A usage example
+ is supplied with the calc.g sample program.
+ \item Yapps now understands augmented ignore-able patterns.
+  This means that Yapps can parse multi-line C comments; this wasn't
+  possible before.
+ \item Better error reporting.
+ \item Yapps now reads its input incrementally.
+\end{enumerate}
+
+The generated parser has been renamed to \texttt{yapps/runtime.py}.
+In Debian, this file is provided by the \texttt{yapps2-runtime} package.
+You need to depend on it if you Debianize Python programs which use
+yapps.
+
+\mysection{Future Extensions}
+\label{sec:future}
+
+I am still investigating the possibility of LL(2) and higher
+lookahead.  However, it looks like the resulting parsers will be
+somewhat ugly.  
+
+It would be nice to control choices with user-defined predicates.
+
+The most likely future extension is backtracking.  A grammar pattern
+like \verb|(VAR ':=' expr)? {{ return Assign(VAR,expr) }} : expr {{ return expr }}|
+would turn into code that attempted to match \verb|VAR ':=' expr|.  If 
+it succeeded, it would run \verb|{{ return ... }}|.  If it failed, it
+would match \verb|expr {{ return expr }}|.  Backtracking may make it
+less necessary to write LL(2) grammars.
+
+\mysection{References}
+
+\begin{enumerate}
+ \item The \htmladdnormallink{Python-Parser
+     SIG}{http://www.python.org/sigs/parser-sig/} is the first place
+   to look for a list of parser systems for Python.
+    
+  \item ANTLR/PCCTS, by Terrence Parr, is available at
+  \htmladdnormallink{The ANTLR Home Page}{http://www.antlr.org/}.
+  
+  \item PyLR, by Scott Cotton, is at \htmladdnormallink{his Starship
+    page}{http://starship.skyport.net/crew/scott/PyLR.html}.
+  
+  \item John Aycock's \htmladdnormallink{Compiling Little Languages
+      Framework}{http://www.foretec.com/python/workshops/1998-11/proceedings/papers/aycock-little/aycock-little.html}.
+
+  \item PyBison, by Scott Hassan, can be found at
+  \htmladdnormallink{his Python Projects
+    page}{http://coho.stanford.edu/\~{}hassan/Python/}.
+  
+  \item mcf.pars, by Mike C. Fletcher, is available at
+  \htmladdnormallink{his web
+    page}{http://members.rogers.com/mcfletch/programming/simpleparse/simpleparse.html}.
+  
+  \item kwParsing, by Aaron Watters, is available at
+  \htmladdnormallink{his Starship
+    page}{http://starship.skyport.net/crew/aaron_watters/kwParsing/}.
+\end{enumerate}
+
+\end{document}
diff --git a/examples/calc.g b/examples/calc.g
new file mode 100644
index 0000000..5432855
--- /dev/null
+++ b/examples/calc.g
@@ -0,0 +1,64 @@
+globalvars = {}       # We will store the calculator's variables here
+
+def lookup(map, name):
+    for x,v in map:  
+        if x == name: return v
+    if not globalvars.has_key(name): print 'Undefined (defaulting to 0):', name
+    return globalvars.get(name, 0)
+
+def stack_input(scanner,ign):
+    """Grab more input"""
+    scanner.stack_input(raw_input(">?> "))
+
+%%
+parser Calculator:
+    ignore:    "[ \r\t\n]+"
+    ignore:    "[?]"         {{ stack_input }}
+
+    token END: "$"
+    token NUM: "[0-9]+"
+    token VAR: "[a-zA-Z_]+"
+
+    # Each line can either be an expression or an assignment statement
+    rule goal:   expr<<[]>> END            {{ print '=', expr }}
+                                           {{ return expr }}
+               | "set" VAR expr<<[]>> END  {{ globalvars[VAR] = expr }}
+                                           {{ print VAR, '=', expr }}
+                                           {{ return expr }}
+
+    # An expression is the sum and difference of factors
+    rule expr<<V>>:   factor<<V>>         {{ n = factor }}
+                     ( "[+]" factor<<V>>  {{ n = n+factor }}
+                     |  "-"  factor<<V>>  {{ n = n-factor }}
+                     )*                   {{ return n }}
+
+    # A factor is the product and division of terms
+    rule factor<<V>>: term<<V>>           {{ v = term }}
+                     ( "[*]" term<<V>>    {{ v = v*term }}
+                     |  "/"  term<<V>>    {{ v = v/term }}
+                     )*                   {{ return v }}
+
+    # A term is a number, variable, or an expression surrounded by parentheses
+    rule term<<V>>:   
+                 NUM                      {{ return int(NUM) }}
+               | VAR                      {{ return lookup(V, VAR) }}
+               | "\\(" expr "\\)"         {{ return expr }}
+               | "let" VAR "=" expr<<V>>  {{ V = [(VAR, expr)] + V }}
+                 "in" expr<<V>>           {{ return expr }}
+%%
+if __name__=='__main__':
+    print 'Welcome to the calculator sample for Yapps 2.'
+    print '  Enter either "<expression>" or "set <var> <expression>",'
+    print '  or just press return to exit.  An expression can have'
+    print '  local variables:  let x = expr in expr'
+    # We could have put this loop into the parser, by making the
+    # `goal' rule use (expr | set var expr)*, but by putting the
+    # loop into Python code, we can make it interactive (i.e., enter
+    # one expression, get the result, enter another expression, etc.)
+    while 1:
+        try: s = raw_input('>>> ')
+        except EOFError: break
+        if not s.strip(): break
+        parse('goal', s)
+    print 'Bye.'
+
diff --git a/examples/expr.g b/examples/expr.g
new file mode 100644
index 0000000..ae807b7
--- /dev/null
+++ b/examples/expr.g
@@ -0,0 +1,21 @@
+parser Calculator:
+    token END: "$"
+    token NUM: "[0-9]+"
+
+    rule goal:           expr END         {{ return expr }}
+
+    # An expression is the sum and difference of factors
+    rule expr:           factor           {{ v = factor }}
+                       ( "[+]" factor       {{ v = v+factor }}
+                       |  "-"  factor       {{ v = v-factor }}
+                       )*                 {{ return v }}
+
+    # A factor is the product and division of terms
+    rule factor:         term             {{ v = term }}
+                       ( "[*]" term         {{ v = v*term }}
+                       |  "/"  term         {{ v = v/term }}
+                       )*                 {{ return v }}
+
+    # A term is either a number or an expression surrounded by parentheses
+    rule term:           NUM              {{ return atoi(NUM) }}
+                       | "\\(" expr "\\)" {{ return expr }}
diff --git a/examples/lisp.g b/examples/lisp.g
new file mode 100644
index 0000000..551e6f9
--- /dev/null
+++ b/examples/lisp.g
@@ -0,0 +1,13 @@
+parser Lisp:
+    ignore:      r'\s+'
+    token NUM:   r'[0-9]+'
+    token ID:    r'[-+*/!@$%^&=.a-zA-Z0-9_]+'
+    token STR:   r'"([^\\"]+|\\.)*"'
+
+    rule expr:   ID              {{ return ('id',ID) }}
+               | STR             {{ return ('str',eval(STR)) }}
+               | NUM             {{ return ('num',int(NUM)) }}
+               | r"\("           
+                        {{ e = [] }}             # initialize the list
+                 ( expr {{ e.append(expr) }} ) * # put each expr into the list
+                 r"\)"  {{ return e }}           # return the list
diff --git a/examples/notes b/examples/notes
new file mode 100644
index 0000000..aa45bfb
--- /dev/null
+++ b/examples/notes
@@ -0,0 +1,44 @@
+Hints
+#####
+
+Some additional hints for your edification.
+
+Author: Matthias Urlichs <smurf@debian.org>
+
+How to process C preprocessor codes:
+====================================
+
+Rudimentary include handling has been added to the parser by me.
+
+However, if you want to do anything fancy, like for instance whatever
+the C preprocessor does, things get more complicated. Fortunately,
+there's already a nice tool to handle C preprocessing -- CPP itself.
+
+If you want to report errors correctly in that situation, do this:
+
+	def set_line(s,m):
+		"""Fixup the scanner's idea of the current line"""
+		s.filename = m.group(2)
+		line = int(m.group(1))
+		s.del_line = line - s.line
+
+	%%
+	parser whatever:
+		ignore:    '^#\s*(\d+)\s*"([^"\n]+)"\s*\n' {{ set_line }}
+		ignore:    '^#.*\n'
+
+	[...]
+	%%
+	if __name__=='__main__':
+		import sys,os
+		for a in sys.argv[1:]:
+			f=os.popen("cpp "+repr(a),"r")
+
+			P = whatever(whateverScanner("", filename=a, file=f))
+			try: P.goal()
+			except runtime.SyntaxError, e:
+				runtime.print_error(e, P._scanner)
+				sys.exit(1)
+
+			f.close()
+
diff --git a/examples/xml.g b/examples/xml.g
new file mode 100644
index 0000000..cec2709
--- /dev/null
+++ b/examples/xml.g
@@ -0,0 +1,66 @@
+#!/usr/bin/python2
+
+# xml.g
+#
+# Amit J. Patel, August 2003
+#
+# Simple (non-conforming, non-validating) parsing of XML documents,
+# based on Robert D. Cameron's "REX" shallow parser.  It doesn't
+# handle CDATA and lots of other stuff; it's meant to demonstrate
+# Yapps, not replace a proper XML parser.
+
+%%
+
+parser xml:
+    token nodetext: r'[^<>]+'
+    token attrtext_singlequote: "[^']*"
+    token attrtext_doublequote: '[^"]*'
+    token SP: r'\s'
+    token id: r'[a-zA-Z_:][a-zA-Z0-9_:.-]*'
+
+    rule node:
+        r'<!--.*?-->'                   {{ return ['!--comment'] }}
+      | r'<!\[CDATA\[.*?\]\]>'          {{ return ['![CDATA['] }}
+      | r'<!' SP* id '[^>]*>'           {{ return ['!doctype'] }}
+      | '<' SP* id SP* attributes SP*   {{ startid = id }}
+        ( '>' nodes '</' SP* id SP* '>' {{ assert startid == id, 'Mismatched tags <%s> ... </%s>' % (startid, id) }}
+                                        {{ return [id, attributes] + nodes }}
+        | '/\s*>'                       {{ return [id, attributes] }}
+        )
+      | nodetext                        {{ return nodetext }}
+
+    rule nodes:                         {{ result = [] }}
+         ( node                         {{ result.append(node) }}
+         ) *                            {{ return result }}
+
+    rule attribute: id SP* '=' SP*
+         ( '"' attrtext_doublequote '"' {{ return (id, attrtext_doublequote) }}
+         | "'" attrtext_singlequote "'" {{ return (id, attrtext_singlequote) }}
+         )
+
+    rule attributes:                    {{ result = {} }}
+         ( attribute SP*                {{ result[attribute[0]] = attribute[1] }}
+         ) *                            {{ return result }}
+
+%%
+
+if __name__ == '__main__':
+    tests = ['<!-- hello -->',
+             'some text',
+             '< bad xml',
+             '<br />',
+             '<     spacey      a  =   "foo"    /   >',
+             '<a href="foo">text ... </a>',
+             '<begin> middle </end>',
+             '<begin> <nested attr=\'baz\' another="hey"> foo </nested> <nested> bar </nested> </begin>',
+            ]
+    print
+    print '____Running tests_______________________________________'
+    for test in tests:
+        print
+        try:
+            parser = xml(xmlScanner(test))
+            output = '%s ==> %s' % (repr(test), repr(parser.node()))
+        except (yappsrt.SyntaxError, AssertionError), e:
+            output = '%s ==> FAILED ==> %s' % (repr(test), e)
+        print output
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..0d7c98d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+"""Setup script for 'yapps'"""
+
+from distutils.core import setup
+
+description = "Yet Another Python Parser System"
+long_description = \
+"""
+YAPPS is an easy to use parser generator that is written in Python and
+generates Python code.  There are several parser generator systems
+already available for Python, but this parser has different goals:
+Yapps is simple, very easy to use, and produces human-readable parsers.
+
+It is not the fastest or most powerful parser.  Yapps is designed to be
+used when regular expressions are not enough and other parser systems
+are too much: situations where you might otherwise write your own
+recursive descent parser.
+
+This package contains several upward-compatible enhancements to the
+original YAPPS source:
+- Handle stacked input ("include files")
+- augmented ignore-able patterns (can parse multi-line C comments correctly)
+- better error reporting
+- read input incrementally
+"""
+
+setup (name = "python-yapps",
+       version = "2.1.1",
+       description = description,
+       long_description = long_description,
+       author = "Amit J. Patel",
+       author_email = "amitp@cs.stanford.edu",
+       maintainer = "Matthias Urlichs",
+       maintainer_email = "smurf@debian.org",
+       url = "http://theory.stanford.edu/~amitp/yapps/",
+       license = 'MIT',
+       platforms = ['POSIX'],
+       keywords = ['parsing'],
+       packages = ['yapps'],
+       #cmdclass = {'bdist_rpm': MyBDist_RPM},
+      )
diff --git a/test/empty_clauses.g b/test/empty_clauses.g
new file mode 100644
index 0000000..3b435f2
--- /dev/null
+++ b/test/empty_clauses.g
@@ -0,0 +1,10 @@
+# This parser tests the use of OR clauses with one of them being empty
+#
+# The output of --dump should indicate the FOLLOW set for (A | ) is 'c'.
+
+parser Test:
+    rule TestPlus: ( A | ) 'c'
+    rule A: 'a'+
+
+    rule TestStar: ( B | ) 'c'
+    rule B: 'b'*
\ No newline at end of file
diff --git a/test/line_numbers.g b/test/line_numbers.g
new file mode 100644
index 0000000..19eee9a
--- /dev/null
+++ b/test/line_numbers.g
@@ -0,0 +1,10 @@
+#
+# The error messages produced by Yapps have a line number.
+# The line number should take the Python code section into account.
+
+# The line number should be 10.
+
+%%
+
+parser error_1:
+    this_is_an_error;
diff --git a/test/option.g b/test/option.g
new file mode 100644
index 0000000..5980362
--- /dev/null
+++ b/test/option.g
@@ -0,0 +1,17 @@
+
+%%
+
+parser test_option:
+    ignore: r'\s+'
+    token a: 'a'
+    token b: 'b'
+    token EOF: r'$'
+
+    rule test_brackets: a [b] EOF
+
+    rule test_question_mark: a b? EOF
+
+%%
+
+# The generated code for test_brackets and test_question_mark should
+# be the same.
diff --git a/yapps/__init__.py b/yapps/__init__.py
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/yapps/__init__.py
@@ -0,0 +1 @@
+# empty
diff --git a/yapps/grammar.py b/yapps/grammar.py
new file mode 100644
index 0000000..1714344
--- /dev/null
+++ b/yapps/grammar.py
@@ -0,0 +1,211 @@
+# grammar.py, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+#
+# This version of the Yapps 2 grammar can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE
+# file included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Parser for Yapps grammars.
+
+This file defines the grammar of Yapps grammars.  Naturally, it is
+implemented in Yapps.  The grammar.py module needed by Yapps is built
+by running Yapps on yapps_grammar.g.  (Holy circularity, Batman!)
+
+"""
+
+import sys, re
+from yapps import parsetree
+
+######################################################################
+def cleanup_choice(rule, lst):
+    if len(lst) == 0: return Sequence(rule, [])
+    if len(lst) == 1: return lst[0]
+    return parsetree.Choice(rule, *tuple(lst))
+
+def cleanup_sequence(rule, lst):
+    if len(lst) == 1: return lst[0]
+    return parsetree.Sequence(rule, *tuple(lst))
+
+def resolve_name(rule, tokens, id, args):
+    if id in [x[0] for x in tokens]:
+	# It's a token
+	if args:
+	    print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
+        return parsetree.Terminal(rule, id)
+    else:
+        # It's a name, so assume it's a nonterminal
+        return parsetree.NonTerminal(rule, id, args)
+
+
+# Begin -- grammar generated by Yapps
+import sys, re
+from yapps import runtime
+
+class ParserDescriptionScanner(runtime.Scanner):
+    patterns = [
+        ('"rule"', re.compile('rule')),
+        ('"ignore"', re.compile('ignore')),
+        ('"token"', re.compile('token')),
+        ('"option"', re.compile('option')),
+        ('":"', re.compile(':')),
+        ('"parser"', re.compile('parser')),
+        ('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
+        ('#.*?\r?\n', re.compile('#.*?\r?\n')),
+        ('EOF', re.compile('$')),
+        ('ATTR', re.compile('<<.+?>>')),
+        ('STMT', re.compile('{{.+?}}')),
+        ('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
+        ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
+        ('LP', re.compile('\\(')),
+        ('RP', re.compile('\\)')),
+        ('LB', re.compile('\\[')),
+        ('RB', re.compile('\\]')),
+        ('OR', re.compile('[|]')),
+        ('STAR', re.compile('[*]')),
+        ('PLUS', re.compile('[+]')),
+        ('QUEST', re.compile('[?]')),
+        ('COLON', re.compile(':')),
+    ]
+    def __init__(self, str,*args,**kw):
+        runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw)
+
+class ParserDescription(runtime.Parser):
+    Context = runtime.Context
+    def Parser(self, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'Parser', [])
+        self._scan('"parser"', context=_context)
+        ID = self._scan('ID', context=_context)
+        self._scan('":"', context=_context)
+        Options = self.Options(_context)
+        Tokens = self.Tokens(_context)
+        Rules = self.Rules(Tokens, _context)
+        EOF = self._scan('EOF', context=_context)
+        return parsetree.Generator(ID,Options,Tokens,Rules)
+
+    def Options(self, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'Options', [])
+        opt = {}
+        while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"':
+            self._scan('"option"', context=_context)
+            self._scan('":"', context=_context)
+            Str = self.Str(_context)
+            opt[Str] = 1
+        return opt
+
+    def Tokens(self, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'Tokens', [])
+        tok = []
+        while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']:
+            _token = self._peek('"token"', '"ignore"', context=_context)
+            if _token == '"token"':
+                self._scan('"token"', context=_context)
+                ID = self._scan('ID', context=_context)
+                self._scan('":"', context=_context)
+                Str = self.Str(_context)
+                tok.append( (ID,Str) )
+            else: # == '"ignore"'
+                self._scan('"ignore"', context=_context)
+                self._scan('":"', context=_context)
+                Str = self.Str(_context)
+                ign = ('#ignore',Str)
+                if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT':
+                    STMT = self._scan('STMT', context=_context)
+                    ign = ign + (STMT[2:-2],)
+                tok.append( ign )
+        return tok
+
+    def Rules(self, tokens, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'Rules', [tokens])
+        rul = []
+        while self._peek('"rule"', 'EOF', context=_context) == '"rule"':
+            self._scan('"rule"', context=_context)
+            ID = self._scan('ID', context=_context)
+            OptParam = self.OptParam(_context)
+            self._scan('":"', context=_context)
+            ClauseA = self.ClauseA(ID, tokens, _context)
+            rul.append( (ID, OptParam, ClauseA) )
+        return rul
+
+    def ClauseA(self, rule, tokens, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens])
+        ClauseB = self.ClauseB(rule,tokens, _context)
+        v = [ClauseB]
+        while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR':
+            OR = self._scan('OR', context=_context)
+            ClauseB = self.ClauseB(rule,tokens, _context)
+            v.append(ClauseB)
+        return cleanup_choice(rule,v)
+
+    def ClauseB(self, rule,tokens, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens])
+        v = []
+        while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']:
+            ClauseC = self.ClauseC(rule,tokens, _context)
+            v.append(ClauseC)
+        return cleanup_sequence(rule, v)
+
+    def ClauseC(self, rule,tokens, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens])
+        ClauseD = self.ClauseD(rule,tokens, _context)
+        _token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context)
+        if _token == 'PLUS':
+            PLUS = self._scan('PLUS', context=_context)
+            return parsetree.Plus(rule, ClauseD)
+        elif _token == 'STAR':
+            STAR = self._scan('STAR', context=_context)
+            return parsetree.Star(rule, ClauseD)
+        elif _token == 'QUEST':
+            QUEST = self._scan('QUEST', context=_context)
+            return parsetree.Option(rule, ClauseD)
+        else:
+            return ClauseD
+
+    def ClauseD(self, rule,tokens, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens])
+        _token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context)
+        if _token == 'STR':
+            STR = self._scan('STR', context=_context)
+            t = (STR, eval(STR,{},{}))
+            if t not in tokens: tokens.insert( 0, t )
+            return parsetree.Terminal(rule, STR)
+        elif _token == 'ID':
+            ID = self._scan('ID', context=_context)
+            OptParam = self.OptParam(_context)
+            return resolve_name(rule,tokens, ID, OptParam)
+        elif _token == 'LP':
+            LP = self._scan('LP', context=_context)
+            ClauseA = self.ClauseA(rule,tokens, _context)
+            RP = self._scan('RP', context=_context)
+            return ClauseA
+        elif _token == 'LB':
+            LB = self._scan('LB', context=_context)
+            ClauseA = self.ClauseA(rule,tokens, _context)
+            RB = self._scan('RB', context=_context)
+            return parsetree.Option(rule, ClauseA)
+        else: # == 'STMT'
+            STMT = self._scan('STMT', context=_context)
+            return parsetree.Eval(rule, STMT[2:-2])
+
+    def OptParam(self, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'OptParam', [])
+        if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR':
+            ATTR = self._scan('ATTR', context=_context)
+            return ATTR[2:-2]
+        return ''
+
+    def Str(self, _parent=None):
+        _context = self.Context(_parent, self._scanner, 'Str', [])
+        STR = self._scan('STR', context=_context)
+        return eval(STR,{},{})
+
+
+def parse(rule, text):
+    P = ParserDescription(ParserDescriptionScanner(text))
+    return runtime.wrap_error_reporter(P, rule)
+
+# End -- grammar generated by Yapps
+
+
diff --git a/yapps/parsetree.py b/yapps/parsetree.py
new file mode 100644
index 0000000..e5e0ae0
--- /dev/null
+++ b/yapps/parsetree.py
@@ -0,0 +1,673 @@
+# parsetree.py, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+#
+# This version of the Yapps 2 Runtime can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Classes used to represent parse trees and generate output.
+
+This module defines the Generator class, which drives the generation
+of Python output from a grammar parse tree.  It also defines nodes
+used to represent the parse tree; they are derived from class Node.
+
+The main logic of Yapps is in this module.
+"""
+
+import sys, re
+
+######################################################################
+INDENT = ' '*4
+class Generator:
+
+    # TODO: many of the methods here should be class methods, not instance methods
+    
+    def __init__(self, name, options, tokens, rules):
+        self.change_count = 0
+        self.name = name
+        self.options = options
+        self.preparser = ''
+        self.postparser = None
+        
+        self.tokens = {} # Map from tokens to regexps
+        self.ignore = {} # List of token names to ignore in parsing, map to statements
+        self.terminals = [] # List of token names (to maintain ordering)
+        for t in tokens:
+            if len(t) == 3:
+                n,t,s = t
+            else:
+                n,t = t
+                s = None
+
+            if n == '#ignore':
+                n = t
+                self.ignore[n] = s
+            if n in self.tokens.keys() and self.tokens[n] != t:
+                print >>sys.stderr, 'Warning: token %s defined more than once.' % n
+            self.tokens[n] = t
+            self.terminals.append(n)
+            
+        self.rules = {} # Map from rule names to parser nodes
+        self.params = {} # Map from rule names to parameters
+        self.goals = [] # List of rule names (to maintain ordering)
+        for n,p,r in rules:
+            self.params[n] = p
+            self.rules[n] = r
+            self.goals.append(n)
+            
+        self.output = sys.stdout
+
+    def has_option(self, name):
+        return self.options.get(name, 0)
+    
+    def non_ignored_tokens(self):
+        return [x for x in self.terminals if x not in self.ignore]
+    
+    def changed(self):
+        """Increments the change count.
+
+        >>> t = Generator('', [], [], [])
+        >>> old_count = t.change_count
+        >>> t.changed()
+        >>> assert t.change_count == old_count + 1
+        """
+        self.change_count = 1+self.change_count
+
+    def set_subtract(self, a, b):
+        """Returns the elements of a that are not in b.
+
+        >>> t = Generator('', [], [], [])
+        >>> t.set_subtract([], [])
+        []
+        >>> t.set_subtract([1, 2], [1, 2])
+        []
+        >>> t.set_subtract([1, 2, 3], [2])
+        [1, 3]
+        >>> t.set_subtract([1], [2, 3, 4])
+        [1]
+        """
+        result = []
+        for x in a:
+            if x not in b:
+                result.append(x)
+        return result
+    
+    def subset(self, a, b):
+        """True iff all elements of sequence a are inside sequence b
+
+        >>> t = Generator('', [], [], [])
+        >>> t.subset([], [1, 2, 3])
+        1
+        >>> t.subset([1, 2, 3], [])
+        0
+        >>> t.subset([1], [1, 2, 3])
+        1
+        >>> t.subset([3, 2, 1], [1, 2, 3])
+        1
+        >>> t.subset([1, 1, 1], [1, 2, 3])
+        1
+        >>> t.subset([1, 2, 3], [1, 1, 1])
+        0
+        """
+        for x in a:
+            if x not in b:
+                return 0
+        return 1
+
+    def equal_set(self, a, b):
+        """True iff subset(a, b) and subset(b, a)
+
+        >>> t = Generator('', [], [], [])
+        >>> a_set = [1, 2, 3]
+        >>> t.equal_set(a_set, a_set)
+        1
+        >>> t.equal_set(a_set, a_set[:])
+        1
+        >>> t.equal_set([], a_set)
+        0
+        >>> t.equal_set([1, 2, 3], [3, 2, 1])
+        1
+        """
+        if len(a) != len(b): return 0
+        if a == b: return 1
+        return self.subset(a, b) and self.subset(b, a)
+    
+    def add_to(self, parent, additions):
+        "Modify _parent_ to include all elements in _additions_"
+        for x in additions:
+            if x not in parent:
+                parent.append(x)
+                self.changed()
+
+    def equate(self, a, b):
+        """Extend (a) and (b) so that they contain each others' elements.
+
+        >>> t = Generator('', [], [], [])
+        >>> a = [1, 2]
+        >>> b = [2, 3]
+        >>> t.equate(a, b)
+        >>> a
+        [1, 2, 3]
+        >>> b
+        [2, 3, 1]
+        """
+        self.add_to(a, b)
+        self.add_to(b, a)
+
+    def write(self, *args):
+        for a in args:
+            self.output.write(a)
+
+    def in_test(self, expr, full, set):
+        """Generate a test of (expr) being in (set), where (set) is a subset of (full)
+
+        expr is a string (Python expression)
+        set is a list of values (which will be converted with repr)
+        full is the list of all values expr could possibly evaluate to
+        
+        >>> t = Generator('', [], [], [])
+        >>> t.in_test('x', [1,2,3,4], [])
+        '0'
+        >>> t.in_test('x', [1,2,3,4], [1,2,3,4])
+        '1'
+        >>> t.in_test('x', [1,2,3,4], [1])
+        'x == 1'
+        >>> t.in_test('a+b', [1,2,3,4], [1,2])
+        'a+b in [1, 2]'
+        >>> t.in_test('x', [1,2,3,4,5], [1,2,3])
+        'x not in [4, 5]'
+        >>> t.in_test('x', [1,2,3,4,5], [1,2,3,4])
+        'x != 5'
+        """
+        
+        if not set: return '0'
+        if len(set) == 1: return '%s == %s' % (expr, repr(set[0]))
+        if full and len(set) > len(full)/2:
+            # Reverse the sense of the test.
+            not_set = [x for x in full if x not in set]
+            return self.not_in_test(expr, full, not_set)
+        return '%s in %s' % (expr, repr(set))
+    
+    def not_in_test(self, expr, full, set):
+        """Like in_test, but the reverse test."""
+        if not set: return '1'
+        if len(set) == 1: return '%s != %s' % (expr, repr(set[0]))
+        return '%s not in %s' % (expr, repr(set))
+
+    def peek_call(self, a):
+        """Generate a call to scan for a token in the set 'a'"""
+        assert type(a) == type([])
+        a_set = (repr(a)[1:-1])
+        if self.equal_set(a, self.non_ignored_tokens()): a_set = ''
+        if self.has_option('context-insensitive-scanner'): a_set = ''
+        if a_set: a_set += ","
+        
+        return 'self._peek(%s context=_context)' % a_set
+    
+    def peek_test(self, a, b):
+        """Generate a call to test whether the next token (which could be any of
+        the elements in a) is in the set b."""
+        if self.subset(a, b): return '1'
+        if self.has_option('context-insensitive-scanner'): a = self.non_ignored_tokens()
+        return self.in_test(self.peek_call(a), a, b)
+
+    def not_peek_test(self, a, b):
+        """Like peek_test, but the opposite sense."""
+        if self.subset(a, b): return '0'
+        return self.not_in_test(self.peek_call(a), a, b)
+
+    def calculate(self):
+        """The main loop to compute the epsilon, first, follow sets.
+        The loop continues until the sets converge.  This works because
+        each set can only get larger, so when they stop getting larger,
+        we're done."""
+        # First we determine whether a rule accepts epsilon (the empty sequence)
+        while 1:
+            for r in self.goals:
+                self.rules[r].setup(self)
+            if self.change_count == 0: break
+            self.change_count = 0
+
+        # Now we compute the first/follow sets
+        while 1:
+            for r in self.goals:
+                self.rules[r].update(self)
+            if self.change_count == 0: break
+            self.change_count = 0
+
+    def dump_information(self):
+        """Display the grammar in somewhat human-readable form."""
+        self.calculate()
+        for r in self.goals:
+            print '    _____' + '_'*len(r)
+            print ('___/Rule '+r+'\\' + '_'*80)[:79]
+            queue = [self.rules[r]]
+            while queue:
+                top = queue[0]
+                del queue[0]
+
+                print 'Rule', repr(top), 'of class', top.__class__.__name__
+                top.first.sort()
+                top.follow.sort()
+                eps = []
+                if top.accepts_epsilon: eps = ['(null)']
+                print '     FIRST:', ', '.join(top.first+eps)
+                print '    FOLLOW:', ', '.join(top.follow)
+                for x in top.get_children(): queue.append(x)
+                
+    def repr_ignore(self):
+        out="{"
+        for t,s in self.ignore.iteritems():
+            if s is None: s=repr(s)
+            out += "%s:%s," % (repr(t),s)
+        out += "}"
+        return out
+        
+    def generate_output(self):
+        self.calculate()
+        self.write(self.preparser)
+        self.write("# Begin -- grammar generated by Yapps\n")
+        self.write("import sys, re\n")
+        self.write("from yapps import runtime\n")
+        self.write("\n")
+        self.write("class ", self.name, "Scanner(runtime.Scanner):\n")
+        self.write("    patterns = [\n")
+        for p in self.terminals:
+            self.write("        (%s, re.compile(%s)),\n" % (
+                repr(p), repr(self.tokens[p])))
+        self.write("    ]\n")
+        self.write("    def __init__(self, str,*args,**kw):\n")
+        self.write("        runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" %
+                   self.repr_ignore())
+        self.write("\n")
+        
+        self.write("class ", self.name, "(runtime.Parser):\n")
+        self.write(INDENT, "Context = runtime.Context\n")
+        for r in self.goals:
+            self.write(INDENT, "def ", r, "(self")
+            if self.params[r]: self.write(", ", self.params[r])
+            self.write(", _parent=None):\n")
+            self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" %
+                       (repr(r), self.params.get(r, '')))
+            self.rules[r].output(self, INDENT+INDENT)
+            self.write("\n")
+
+        self.write("\n")
+        self.write("def parse(rule, text):\n")
+        self.write("    P = ", self.name, "(", self.name, "Scanner(text))\n")
+        self.write("    return runtime.wrap_error_reporter(P, rule)\n")
+        self.write("\n")
+        if self.postparser is not None:
+            self.write("# End -- grammar generated by Yapps\n")
+            self.write(self.postparser)
+        else:
+            self.write("if __name__ == '__main__':\n")
+            self.write(INDENT, "from sys import argv, stdin\n")
+            self.write(INDENT, "if len(argv) >= 2:\n")
+            self.write(INDENT*2, "if len(argv) >= 3:\n")
+            self.write(INDENT*3, "f = open(argv[2],'r')\n")
+            self.write(INDENT*2, "else:\n")
+            self.write(INDENT*3, "f = stdin\n")
+            self.write(INDENT*2, "print parse(argv[1], f.read())\n")
+            self.write(INDENT, "else: print >>sys.stderr, 'Args:  <rule> [<filename>]'\n")
+            self.write("# End -- grammar generated by Yapps\n")
+
+######################################################################
+class Node:
+    """This is the base class for all components of a grammar."""
+    def __init__(self, rule):
+        self.rule = rule # name of the rule containing this node
+        self.first = []
+        self.follow = []
+        self.accepts_epsilon = 0
+        
+    def setup(self, gen):
+        # Setup will change accepts_epsilon,
+        # sometimes from 0 to 1 but never 1 to 0.
+        # It will take a finite number of steps to set things up
+        pass
+    
+    def used(self, vars):
+        "Return two lists: one of vars used, and the other of vars assigned"
+        return vars, []
+
+    def get_children(self):
+        "Return a list of sub-nodes"
+        return []
+    
+    def __repr__(self):
+        return str(self)
+    
+    def update(self, gen):
+        if self.accepts_epsilon:
+            gen.add_to(self.first, self.follow)
+
+    def output(self, gen, indent):
+        "Write out code to _gen_ with _indent_:string indentation"
+        gen.write(indent, "assert 0 # Invalid parser node\n")
+    
+class Terminal(Node):
+    """This class stores terminal nodes, which are tokens."""
+    def __init__(self, rule, token):
+        Node.__init__(self, rule)
+        self.token = token
+        self.accepts_epsilon = 0
+
+    def __str__(self):
+        return self.token
+
+    def update(self, gen):
+        Node.update(self, gen)
+        if self.first != [self.token]:
+            self.first = [self.token]
+            gen.changed()
+
+    def output(self, gen, indent):
+        gen.write(indent)
+        if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token):
+            gen.write(self.token, " = ")
+        gen.write("self._scan(%s, context=_context)\n" % repr(self.token))
+        
+class Eval(Node):
+    """This class stores evaluation nodes, from {{ ... }} clauses."""
+    def __init__(self, rule, expr):
+        Node.__init__(self, rule)
+        self.expr = expr
+
+    def setup(self, gen):
+        Node.setup(self, gen)
+        if not self.accepts_epsilon:
+            self.accepts_epsilon = 1
+            gen.changed()
+
+    def __str__(self):
+        return '{{ %s }}' % self.expr.strip()
+
+    def output(self, gen, indent):
+        gen.write(indent, self.expr.strip(), '\n')
+        
+class NonTerminal(Node):
+    """This class stores nonterminal nodes, which are rules with arguments."""
+    def __init__(self, rule, name, args):
+        Node.__init__(self, rule)
+        self.name = name
+        self.args = args
+
+    def setup(self, gen):
+        Node.setup(self, gen)
+        try:
+            self.target = gen.rules[self.name]
+            if self.accepts_epsilon != self.target.accepts_epsilon:
+                self.accepts_epsilon = self.target.accepts_epsilon
+                gen.changed()
+        except KeyError: # Oops, it's nonexistent
+            print >>sys.stderr, 'Error: no rule <%s>' % self.name
+            self.target = self
+            
+    def __str__(self):
+        return '%s' % self.name
+
+    def update(self, gen):
+        Node.update(self, gen)
+        gen.equate(self.first, self.target.first)
+        gen.equate(self.follow, self.target.follow)
+
+    def output(self, gen, indent):
+        gen.write(indent)
+        gen.write(self.name, " = ")
+        args = self.args
+        if args: args += ', '
+        args += '_context'
+        gen.write("self.", self.name, "(", args, ")\n")
+        
+class Sequence(Node):
+    """This class stores a sequence of nodes (A B C ...)"""
+    def __init__(self, rule, *children):
+        Node.__init__(self, rule)
+        self.children = children
+
+    def setup(self, gen):
+        Node.setup(self, gen)
+        for c in self.children: c.setup(gen)
+        
+        if not self.accepts_epsilon:
+            # If it's not already accepting epsilon, it might now do so.
+            for c in self.children:
+                # any non-epsilon means all is non-epsilon
+                if not c.accepts_epsilon: break
+            else:
+                self.accepts_epsilon = 1
+                gen.changed()
+
+    def get_children(self):
+        return self.children
+    
+    def __str__(self):
+        return '( %s )' % ' '.join(map(str, self.children))
+
+    def update(self, gen):
+        Node.update(self, gen)
+        for g in self.children:
+            g.update(gen)
+
+        empty = 1
+        for g_i in range(len(self.children)):
+            g = self.children[g_i]
+
+            if empty: gen.add_to(self.first, g.first)
+            if not g.accepts_epsilon: empty = 0
+            
+            if g_i == len(self.children)-1:
+                next = self.follow
+            else:
+                next = self.children[1+g_i].first
+            gen.add_to(g.follow, next)
+
+        if self.children:
+            gen.add_to(self.follow, self.children[-1].follow)
+
+    def output(self, gen, indent):
+        if self.children:
+            for c in self.children:
+                c.output(gen, indent)
+        else:
+            # Placeholder for empty sequences, just in case
+            gen.write(indent, 'pass\n')
+            
+class Choice(Node):
+    """This class stores a choice between nodes (A | B | C | ...)"""
+    def __init__(self, rule, *children):
+        Node.__init__(self, rule)
+        self.children = children
+
+    def setup(self, gen):
+        Node.setup(self, gen)
+        for c in self.children: c.setup(gen)
+            
+        if not self.accepts_epsilon:
+            for c in self.children:
+                if c.accepts_epsilon:
+                    self.accepts_epsilon = 1
+                    gen.changed()
+
+    def get_children(self):
+        return self.children
+    
+    def __str__(self):
+        return '( %s )' % ' | '.join(map(str, self.children))
+
+    def update(self, gen):
+        Node.update(self, gen)
+        for g in self.children:
+            g.update(gen)
+
+        for g in self.children:
+            gen.add_to(self.first, g.first)
+            gen.add_to(self.follow, g.follow)
+        for g in self.children:
+            gen.add_to(g.follow, self.follow)
+        if self.accepts_epsilon:
+            gen.add_to(self.first, self.follow)
+
+    def output(self, gen, indent):
+        test = "if"
+        gen.write(indent, "_token = ", gen.peek_call(self.first), "\n")
+        tokens_seen = []
+        tokens_unseen = self.first[:]
+        if gen.has_option('context-insensitive-scanner'):
+            # Context insensitive scanners can return ANY token,
+            # not only the ones in first.
+            tokens_unseen = gen.non_ignored_tokens()
+        for c in self.children:
+            testset = c.first[:]
+            removed = []
+            for x in testset:
+                if x in tokens_seen:
+                    testset.remove(x)
+                    removed.append(x)
+                if x in tokens_unseen: tokens_unseen.remove(x)
+            tokens_seen = tokens_seen + testset
+            if removed:
+                if not testset:
+                    print >>sys.stderr, 'Error in rule', self.rule+':'
+                else:
+                    print >>sys.stderr, 'Warning in rule', self.rule+':'
+                print >>sys.stderr, ' *', self
+                print >>sys.stderr, ' * These tokens could be matched by more than one clause:'
+                print >>sys.stderr, ' *', ' '.join(removed)
+                
+            if testset:
+                if not tokens_unseen: # context sensitive scanners only!
+                    if test == 'if':
+                        # if it's the first AND last test, then
+                        # we can simply put the code without an if/else
+                        c.output(gen, indent)
+                    else:
+                        gen.write(indent, "else:")
+                        t = gen.in_test('', [], testset)
+                        if len(t) < 70-len(indent):
+                            gen.write(' #', t)
+                        gen.write("\n")
+                        c.output(gen, indent+INDENT)
+                else:
+                    gen.write(indent, test, " ",
+                              gen.in_test('_token', tokens_unseen, testset),
+                              ":\n")
+                    c.output(gen, indent+INDENT)
+                test = "elif"
+
+        if tokens_unseen:
+            gen.write(indent, "else:\n")
+            gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ")
+            gen.write("'Could not match ", self.rule, "')\n")
+        
+class Wrapper(Node):
+    """This is a base class for nodes that modify a single child."""
+    def __init__(self, rule, child):
+        Node.__init__(self, rule)
+        self.child = child
+
+    def setup(self, gen):
+        Node.setup(self, gen)
+        self.child.setup(gen)
+
+    def get_children(self):
+        return [self.child]
+    
+    def update(self, gen):
+        Node.update(self, gen)
+        self.child.update(gen)
+        gen.add_to(self.first, self.child.first)
+        gen.equate(self.follow, self.child.follow)
+
+class Option(Wrapper):
+    """This class represents an optional clause of the form [A]"""
+    def setup(self, gen):
+        Wrapper.setup(self, gen)
+        if not self.accepts_epsilon:
+            self.accepts_epsilon = 1
+            gen.changed()
+
+    def __str__(self):
+        return '[ %s ]' % str(self.child)
+
+    def output(self, gen, indent):
+        if self.child.accepts_epsilon:
+            print >>sys.stderr, 'Warning in rule', self.rule+': contents may be empty.'
+        gen.write(indent, "if %s:\n" %
+                  gen.peek_test(self.first, self.child.first))
+        self.child.output(gen, indent+INDENT)
+
+        if gen.has_option('context-insensitive-scanner'):
+            gen.write(indent, "if %s:\n" %
+                    gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
+            gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
+                    repr(self.first))
+
+        
+class Plus(Wrapper):
+    """This class represents a 1-or-more repetition clause of the form A+"""
+    def setup(self, gen):
+        Wrapper.setup(self, gen)
+        if self.accepts_epsilon != self.child.accepts_epsilon:
+            self.accepts_epsilon = self.child.accepts_epsilon
+            gen.changed()
+
+    def __str__(self):
+        return '%s+' % str(self.child)
+
+    def update(self, gen):
+        Wrapper.update(self, gen)
+        gen.add_to(self.child.follow, self.child.first)
+        
+    def output(self, gen, indent):
+        if self.child.accepts_epsilon:
+            print >>sys.stderr, 'Warning in rule', self.rule+':'
+            print >>sys.stderr, ' * The repeated pattern could be empty.  The resulting parser may not work properly.'
+        gen.write(indent, "while 1:\n")
+        self.child.output(gen, indent+INDENT)
+        union = self.first[:]
+        gen.add_to(union, self.follow)
+        gen.write(indent+INDENT, "if %s: break\n" %
+                  gen.not_peek_test(union, self.child.first))
+
+        if gen.has_option('context-insensitive-scanner'):
+            gen.write(indent, "if %s:\n" %
+                    gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
+            gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
+                    repr(self.first))
+
+
+class Star(Wrapper):
+    """This class represents a 0-or-more repetition clause of the form A*"""
+    def setup(self, gen):
+        Wrapper.setup(self, gen)
+        if not self.accepts_epsilon:
+            self.accepts_epsilon = 1
+            gen.changed()
+
+    def __str__(self):
+        return '%s*' % str(self.child)
+
+    def update(self, gen):
+        Wrapper.update(self, gen)
+        gen.add_to(self.child.follow, self.child.first)
+        
+    def output(self, gen, indent):
+        if self.child.accepts_epsilon:
+            print >>sys.stderr, 'Warning in rule', self.rule+':'
+            print >>sys.stderr, ' * The repeated pattern could be empty.  The resulting parser probably will not work properly.'
+        gen.write(indent, "while %s:\n" %
+                  gen.peek_test(self.follow, self.child.first))
+        self.child.output(gen, indent+INDENT)
+
+        # TODO: need to generate tests like this in lots of rules
+        if gen.has_option('context-insensitive-scanner'):
+            gen.write(indent, "if %s:\n" %
+                    gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
+            gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
+                    repr(self.first))
+
diff --git a/yapps/runtime.py b/yapps/runtime.py
new file mode 100644
index 0000000..5d9d1d6
--- /dev/null
+++ b/yapps/runtime.py
@@ -0,0 +1,442 @@
+# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
+#
+# This version of the Yapps 2 Runtime can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Run time libraries needed to run parsers generated by Yapps.
+
+This module defines parse-time exception classes, a scanner class, a
+base class for parsers produced by Yapps, and a context class that
+keeps track of the parse stack.
+
+"""
+
+import sys, re
+
+MIN_WINDOW=4096
+# File lookup window
+
+class SyntaxError(Exception):
+	"""When we run into an unexpected token, this is the exception to use"""
+	def __init__(self, pos=None, msg="Bad Token", context=None):
+		Exception.__init__(self)
+		self.pos = pos
+		self.msg = msg
+		self.context = context
+		
+	def __str__(self):
+		if not self.pos: return 'SyntaxError'
+		else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
+
+class NoMoreTokens(Exception):
+	"""Another exception object, for when we run out of tokens"""
+	pass
+
+class Token(object):
+	"""Yapps token.
+
+	This is a container for a scanned token.
+	"""
+
+	def __init__(self, type,value, pos=None):
+		"""Initialize a token."""
+		self.type = type
+		self.value = value
+		self.pos = pos
+
+	def __repr__(self):
+		output = '<%s: %s' % (self.type, repr(self.value))
+		if self.pos:
+			output += " @ "
+			if self.pos[0]:
+				output += "%s:" % self.pos[0]
+			if self.pos[1]:
+				output += "%d" % self.pos[1]
+			if self.pos[2] is not None:
+				output += ".%d" % self.pos[2]
+		output += ">"
+		return output
+
+in_name=0
+class Scanner(object):
+	"""Yapps scanner.
+
+	The Yapps scanner can work in context sensitive or context
+	insensitive modes.  The token(i) method is used to retrieve the
+	i-th token.  It takes a restrict set that limits the set of tokens
+	it is allowed to return.  In context sensitive mode, this restrict
+	set guides the scanner.  In context insensitive mode, there is no
+	restriction (the set is always the full set of tokens).
+	
+	"""
+	
+	def __init__(self, patterns, ignore, input="",
+			file=None,filename=None,stacked=False):
+		"""Initialize the scanner.
+
+		Parameters:
+		  patterns : [(terminal, uncompiled regex), ...] or None
+		  ignore : {terminal:None, ...}
+		  input : string
+
+		If patterns is None, we assume that the subclass has
+		defined self.patterns : [(terminal, compiled regex), ...].
+		Note that the patterns parameter expects uncompiled regexes,
+		whereas the self.patterns field expects compiled regexes.
+
+		The 'ignore' value is either None or a callable, which is called
+		with the scanner and the to-be-ignored match object; this can
+		be used for include file or comment handling.
+		"""
+
+		if not filename:
+			global in_name
+			filename="<f.%d>" % in_name
+			in_name += 1
+
+		self.input = input
+		self.ignore = ignore
+		self.file = file
+		self.filename = filename
+		self.pos = 0
+		self.del_pos = 0 # skipped
+		self.line = 1
+		self.del_line = 0 # skipped
+		self.col = 0
+		self.tokens = []
+		self.stack = None
+		self.stacked = stacked
+		
+		self.last_read_token = None
+		self.last_token = None
+		self.last_types = None
+
+		if patterns is not None:
+			# Compile the regex strings into regex objects
+			self.patterns = []
+			for terminal, regex in patterns:
+				self.patterns.append( (terminal, re.compile(regex)) )
+
+	def stack_input(self, input="", file=None, filename=None):
+		"""Temporarily parse from a second file."""
+
+		# Already reading from somewhere else: Go on top of that, please.
+		if self.stack:
+			# autogenerate a recursion-level-identifying filename
+			if not filename:
+				filename = 1
+			else:
+				try:
+					filename += 1
+				except TypeError:
+					pass
+				# now pass off to the include file
+			self.stack.stack_input(input,file,filename)
+		else:
+
+			try:
+				filename += 0
+			except TypeError:
+				pass
+			else:
+				filename = "<str_%d>" % filename
+
+#			self.stack = object.__new__(self.__class__)
+#			Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
+
+			# Note that the pattern+ignore are added by the generated
+			# scanner code
+			self.stack = self.__class__(input,file,filename, stacked=True)
+
+	def get_pos(self):
+		"""Return a file/line/char tuple."""
+		if self.stack: return self.stack.get_pos()
+
+		return (self.filename, self.line+self.del_line, self.col)
+
+#	def __repr__(self):
+#		"""Print the last few tokens that have been scanned in"""
+#		output = ''
+#		for t in self.tokens:
+#			output += '%s\n' % (repr(t),)
+#		return output
+	
+	def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
+		"""Print the line of 'text' that includes position 'p',
+		along with a second line with a single caret (^) at position p"""
+
+		file,line,p = pos
+		if file != self.filename:
+			if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
+			print >>out, "(%s: not in input buffer)" % file
+			return
+
+		text = self.input
+		p += length-1 # starts at pos 1
+
+		origline=line
+		line -= self.del_line
+		spos=0
+		if line > 0:
+			while 1:
+				line = line - 1
+				try:
+					cr = text.index("\n",spos)
+				except ValueError:
+					if line:
+						text = ""
+					break
+				if line == 0:
+					text = text[spos:cr]
+					break
+				spos = cr+1
+		else:
+			print >>out, "(%s:%d not in input buffer)" % (file,origline)
+			return
+
+		# Now try printing part of the line
+		text = text[max(p-80, 0):p+80]
+		p = p - max(p-80, 0)
+
+		# Strip to the left
+		i = text[:p].rfind('\n')
+		j = text[:p].rfind('\r')
+		if i < 0 or (0 <= j < i): i = j
+		if 0 <= i < p:
+			p = p - i - 1
+			text = text[i+1:]
+
+		# Strip to the right
+		i = text.find('\n', p)
+		j = text.find('\r', p)
+		if i < 0 or (0 <= j < i): i = j
+		if i >= 0:
+			text = text[:i]
+
+		# Now shorten the text
+		while len(text) > 70 and p > 60:
+			# Cut off 10 chars
+			text = "..." + text[10:]
+			p = p - 7
+
+		# Now print the string, along with an indicator
+		print >>out, '> ',text
+		print >>out, '> ',' '*p + '^'
+	
+	def grab_input(self):
+		"""Get more input if possible."""
+		if not self.file: return
+		if len(self.input) - self.pos >= MIN_WINDOW: return
+
+		data = self.file.read(MIN_WINDOW)
+		if data is None or data == "":
+			self.file = None
+
+		# Drop bytes from the start, if necessary.
+		if self.pos > 2*MIN_WINDOW:
+			self.del_pos += MIN_WINDOW
+			self.del_line += self.input[:MIN_WINDOW].count("\n")
+			self.pos -= MIN_WINDOW
+			self.input = self.input[MIN_WINDOW:] + data
+		else:
+			self.input = self.input + data
+
+	def getchar(self):
+		"""Return the next character."""
+		self.grab_input()
+
+		c = self.input[self.pos]
+		self.pos += 1
+		return c
+
+	def token(self, restrict, context=None):
+		"""Scan for another token."""
+
+		while 1:
+			if self.stack:
+				try:
+					return self.stack.token(restrict, context)
+				except StopIteration:
+					self.stack = None
+
+		# Keep looking for a token, ignoring any in self.ignore
+			self.grab_input()
+
+			# special handling for end-of-file
+			if self.stacked and self.pos==len(self.input):
+				raise StopIteration
+
+			# Search the patterns for the longest match, with earlier
+			# tokens in the list having preference
+			best_match = -1
+			best_pat = '(error)'
+			best_m = None
+			for p, regexp in self.patterns:
+				# First check to see if we're ignoring this token
+				if restrict and p not in restrict and p not in self.ignore:
+					continue
+				m = regexp.match(self.input, self.pos)
+				if m and m.end()-m.start() > best_match:
+					# We got a match that's better than the previous one
+					best_pat = p
+					best_match = m.end()-m.start()
+					best_m = m
+					
+			# If we didn't find anything, raise an error
+			if best_pat == '(error)' and best_match < 0:
+				msg = 'Bad Token'
+				if restrict:
+					msg = 'Trying to find one of '+', '.join(restrict)
+				raise SyntaxError(self.get_pos(), msg, context=context)
+
+			ignore = best_pat in self.ignore
+			value = self.input[self.pos:self.pos+best_match]
+			if not ignore:
+				tok=Token(type=best_pat, value=value, pos=self.get_pos())
+
+			self.pos += best_match
+
+			npos = value.rfind("\n")
+			if npos > -1:
+				self.col = best_match-npos
+				self.line += value.count("\n")
+			else:
+				self.col += best_match
+
+			# If we found something that isn't to be ignored, return it
+			if not ignore:
+				if len(self.tokens) >= 10:
+					del self.tokens[0]
+				self.tokens.append(tok)
+				self.last_read_token = tok
+				# print repr(tok)
+				return tok
+			else:
+				ignore = self.ignore[best_pat]
+				if ignore:
+					ignore(self, best_m)
+
+	def peek(self, *types, **kw):
+		"""Returns the token type for lookahead; if there are any args
+		then the list of args is the set of token types to allow"""
+		context = kw.get("context",None)
+		if self.last_token is None:
+			self.last_types = types
+			self.last_token = self.token(types,context)
+		elif self.last_types:
+			for t in types:
+				if t not in self.last_types:
+					raise NotImplementedError("Unimplemented: restriction set changed")
+		return self.last_token.type
+		
+	def scan(self, type, **kw):
+		"""Returns the matched text, and moves to the next token"""
+		context = kw.get("context",None)
+
+		if self.last_token is None:
+			tok = self.token([type],context)
+		else:
+			if self.last_types and type not in self.last_types:
+				raise NotImplementedError("Unimplemented: restriction set changed")
+
+			tok = self.last_token
+			self.last_token = None
+		if tok.type != type:
+			if not self.last_types: self.last_types=[]
+			raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
+		return tok.value
+
+class Parser(object):
+	"""Base class for Yapps-generated parsers.
+
+	"""
+	
+	def __init__(self, scanner):
+		self._scanner = scanner
+		
+	def _stack(self, input="",file=None,filename=None):
+		"""Temporarily read from someplace else"""
+		self._scanner.stack_input(input,file,filename)
+		self._tok = None
+
+	def _peek(self, *types, **kw):
+		"""Returns the token type for lookahead; if there are any args
+		then the list of args is the set of token types to allow"""
+		return self._scanner.peek(*types, **kw)
+		
+	def _scan(self, type, **kw):
+		"""Returns the matched text, and moves to the next token"""
+		return self._scanner.scan(type, **kw)
+
+class Context(object):
+	"""Class to represent the parser's call stack.
+
+	Every rule creates a Context that links to its parent rule.  The
+	contexts can be used for debugging.
+
+	"""
+	
+	def __init__(self, parent, scanner, rule, args=()):
+		"""Create a new context.
+
+		Args:
+		parent: Context object or None
+		scanner: Scanner object
+		rule: string (name of the rule)
+		args: tuple listing parameters to the rule
+
+		"""
+		self.parent = parent
+		self.scanner = scanner
+		self.rule = rule
+		self.args = args
+		while scanner.stack: scanner = scanner.stack
+		self.token = scanner.last_read_token
+
+	def __str__(self):
+		output = ''
+		if self.parent: output = str(self.parent) + ' > '
+		output += self.rule
+		return output
+	
+def print_error(err, scanner, max_ctx=None):
+	"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
+	# NOTE: this function assumes 80 columns :-(
+	# Figure out the line number
+	pos = err.pos
+	if not pos:
+		pos = scanner.get_pos()
+
+	file_name, line_number, column_number = pos
+	print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
+
+	scanner.print_line_with_pointer(pos)
+		
+	context = err.context
+	token = None
+	while context:
+		print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
+		if context.token:
+			token = context.token
+		if token:
+			scanner.print_line_with_pointer(token.pos, length=len(token.value))
+		context = context.parent
+		if max_ctx:
+			max_ctx = max_ctx-1
+			if not max_ctx:
+				break
+
+def wrap_error_reporter(parser, rule, *args,**kw):
+	try:
+		return getattr(parser, rule)(*args,**kw)
+	except SyntaxError, e:
+		print_error(e, parser._scanner)
+	except NoMoreTokens:
+		print >>sys.stderr, 'Could not complete parsing; stopped around here:'
+		print >>sys.stderr, parser._scanner
diff --git a/yapps2.py b/yapps2.py
new file mode 100755
index 0000000..d6fd101
--- /dev/null
+++ b/yapps2.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+#
+# Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+#
+# This version of Yapps 2 can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+import sys, re
+
+from yapps import runtime, parsetree
+
+def generate(inputfilename, outputfilename='', dump=0, **flags):
+    """Generate a grammar, given an input filename (X.g)
+    and an output filename (defaulting to X.py)."""
+
+    if not outputfilename:
+        if inputfilename.endswith('.g'):
+            outputfilename = inputfilename[:-2] + '.py'
+        else:
+            raise Exception('Must specify output filename if input filename is not *.g')
+        
+    DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers
+    preparser, postparser = None, None # Code before and after the parser desc
+
+    # Read the entire file
+    s = open(inputfilename,'r').read()
+
+    # See if there's a separation between the pre-parser and parser
+    f = s.find(DIVIDER)
+    if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):]
+
+    # See if there's a separation between the parser and post-parser
+    f = s.find(DIVIDER)
+    if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):]
+
+    # Create the parser and scanner and parse the text
+    scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename)
+    if preparser: scanner.del_line += preparser.count('\n')
+
+    parser = grammar.ParserDescription(scanner)
+    t = runtime.wrap_error_reporter(parser, 'Parser')
+    if t is None: return 1 # Failure
+    if preparser is not None: t.preparser = preparser
+    if postparser is not None: t.postparser = postparser
+
+    # Check the options
+    for f in t.options.keys():
+        for opt,_,_ in yapps_options:
+            if f == opt: break
+        else:
+            print >>sys.stderr, 'Warning: unrecognized option', f
+    # Add command line options to the set
+    for f in flags.keys(): t.options[f] = flags[f]
+            
+    # Generate the output
+    if dump:
+        t.dump_information()
+    else:
+        t.output = open(outputfilename, 'w')
+        t.generate_output()
+    return 0
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(sys.modules['__main__'])
+    doctest.testmod(parsetree)
+
+    # Someday I will use optparse, but Python 2.3 is too new at the moment.
+    yapps_options = [
+        ('context-insensitive-scanner',
+         'context-insensitive-scanner',
+         'Scan all tokens (see docs)'),
+        ]
+
+    import getopt
+    optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['help', 'dump', 'use-devel-grammar'])
+    if not args or len(args) > 2:
+        print >>sys.stderr, 'Usage:'
+        print >>sys.stderr, '  python', sys.argv[0], '[flags] input.g [output.py]'
+        print >>sys.stderr, 'Flags:'
+        print >>sys.stderr, ('  --dump' + ' '*40)[:35] + 'Dump out grammar information'
+        print >>sys.stderr, ('  --use-devel-grammar' + ' '*40)[:35] + 'Use the devel grammar parser from yapps_grammar.py instead of the stable grammar from grammar.py'
+        for flag, _, doc in yapps_options:
+            print >>sys.stderr, ('  -f' + flag + ' '*40)[:35] + doc
+    else:
+        # Read in the options and create a list of flags
+        flags = {}
+        use_devel_grammar = 0
+        for opt in optlist:
+            for flag, name, _ in yapps_options:
+                if opt == ('-f', flag):
+                    flags[name] = 1
+                    break
+            else:
+                if opt == ('--dump', ''):
+                    flags['dump'] = 1
+                elif opt == ('--use-devel-grammar', ''):
+                    use_devel_grammar = 1
+                else:
+                    print >>sys.stderr, 'Warning: unrecognized option', opt[0], opt[1]
+
+        if use_devel_grammar:
+            import yapps_grammar as grammar
+        else:
+            from yapps import grammar
+            
+        sys.exit(generate(*tuple(args), **flags))
diff --git a/yapps_grammar.g b/yapps_grammar.g
new file mode 100644
index 0000000..cd21a9e
--- /dev/null
+++ b/yapps_grammar.g
@@ -0,0 +1,121 @@
+# grammar.py, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
+#
+# This version of the Yapps 2 grammar can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE
+# file included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Parser for Yapps grammars.
+
+This file defines the grammar of Yapps grammars.  Naturally, it is
+implemented in Yapps.  The grammar.py module needed by Yapps is built
+by running Yapps on yapps_grammar.g.  (Holy circularity, Batman!)
+
+"""
+
+import sys, re
+from yapps import parsetree
+
+######################################################################
+def cleanup_choice(rule, lst):
+    if len(lst) == 0: return Sequence(rule, [])
+    if len(lst) == 1: return lst[0]
+    return parsetree.Choice(rule, *tuple(lst))
+
+def cleanup_sequence(rule, lst):
+    if len(lst) == 1: return lst[0]
+    return parsetree.Sequence(rule, *tuple(lst))
+
+def resolve_name(rule, tokens, id, args):
+    if id in [x[0] for x in tokens]:
+	# It's a token
+	if args:
+	    print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
+        return parsetree.Terminal(rule, id)
+    else:
+        # It's a name, so assume it's a nonterminal
+        return parsetree.NonTerminal(rule, id, args)
+
+%%
+parser ParserDescription:
+
+    ignore:      "[ \t\r\n]+"
+    ignore:      "#.*?\r?\n"
+    token EOF:   "$"
+    token ATTR:  "<<.+?>>"
+    token STMT:  "{{.+?}}"
+    token ID:    '[a-zA-Z_][a-zA-Z_0-9]*'
+    token STR:   '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'
+    token LP:    '\\('
+    token RP:    '\\)'
+    token LB:    '\\['
+    token RB:    '\\]'
+    token OR:    '[|]'
+    token STAR:  '[*]'
+    token PLUS:  '[+]'
+    token QUEST: '[?]'
+    token COLON: ':'
+
+    rule Parser: "parser" ID ":"
+                   Options
+                   Tokens
+                   Rules<<Tokens>> 
+                 EOF 
+                 {{ return parsetree.Generator(ID,Options,Tokens,Rules) }}
+
+    rule Options: {{ opt = {} }}
+                  ( "option" ":" Str {{ opt[Str] = 1 }} )*
+                  {{ return opt }}
+
+    rule Tokens:  {{ tok = [] }}
+                  (
+                    "token" ID ":" Str {{ tok.append( (ID,Str) ) }}
+                  | "ignore"  
+				    ":" Str {{ ign = ('#ignore',Str) }}
+				    ( STMT  {{ ign = ign + (STMT[2:-2],) }} )?
+				            {{ tok.append( ign ) }}
+                  )*
+                  {{ return tok }}
+
+    rule Rules<<tokens>>:
+                  {{ rul = [] }}
+                  (
+                    "rule" ID OptParam ":" ClauseA<<ID, tokens>>
+                    {{ rul.append( (ID, OptParam, ClauseA) ) }}
+                  )*
+                  {{ return rul }}
+
+    rule ClauseA<<rule, tokens>>:
+                  ClauseB<<rule,tokens>>
+                  {{ v = [ClauseB] }}
+                  ( OR ClauseB<<rule,tokens>> {{ v.append(ClauseB) }} )*
+                  {{ return cleanup_choice(rule,v) }}
+
+    rule ClauseB<<rule,tokens>>:
+                  {{ v = [] }}
+                  ( ClauseC<<rule,tokens>> {{ v.append(ClauseC) }} )*
+                  {{ return cleanup_sequence(rule, v) }}
+
+    rule ClauseC<<rule,tokens>>:
+                  ClauseD<<rule,tokens>>
+                  ( PLUS {{ return parsetree.Plus(rule, ClauseD) }}
+                  | STAR {{ return parsetree.Star(rule, ClauseD) }}
+                  | QUEST {{ return parsetree.Option(rule, ClauseD) }}
+                  |      {{ return ClauseD }} )
+
+    rule ClauseD<<rule,tokens>>:
+                  STR {{ t = (STR, eval(STR,{},{})) }}
+                      {{ if t not in tokens: tokens.insert( 0, t ) }} 
+                      {{ return parsetree.Terminal(rule, STR) }}
+                | ID OptParam {{ return resolve_name(rule,tokens, ID, OptParam) }}
+                | LP ClauseA<<rule,tokens>> RP {{ return ClauseA }}
+                | LB ClauseA<<rule,tokens>> RB {{ return parsetree.Option(rule, ClauseA) }}
+                | STMT {{ return parsetree.Eval(rule, STMT[2:-2]) }}
+
+    rule OptParam: [ ATTR {{ return ATTR[2:-2] }} ] {{ return '' }}
+    rule Str:   STR {{ return eval(STR,{},{}) }}
+%%
-- 
cgit