From e87b107fd80aee219a39ac0ecd52cc2b3a766990 Mon Sep 17 00:00:00 2001 From: sienkiew Date: Fri, 5 Aug 2011 16:43:50 +0000 Subject: mogrify yapps2 into exyapps git-svn-id: http://svn.stsci.edu/svn/ssb/etal/exyapps/trunk@361 d34015c8-bcbb-4646-8ac8-8ba5febf221d --- Makefile | 2 + README | 50 +++ changelog | 983 --------------------------------------------------- exyapps/__init__.py | 1 + exyapps/grammar.py | 213 +++++++++++ exyapps/main.py | 135 +++++++ exyapps/parsetree.py | 674 +++++++++++++++++++++++++++++++++++ exyapps/runtime.py | 442 +++++++++++++++++++++++ scripts/exyapps | 4 + setup.py | 41 ++- yapps/__init__.py | 1 - yapps/grammar.py | 211 ----------- yapps/parsetree.py | 673 ----------------------------------- yapps/runtime.py | 442 ----------------------- yapps2.py | 113 ------ yapps_grammar.g | 2 +- 16 files changed, 1546 insertions(+), 2441 deletions(-) create mode 100644 Makefile create mode 100644 README delete mode 100644 changelog create mode 100644 exyapps/__init__.py create mode 100644 exyapps/grammar.py create mode 100755 exyapps/main.py create mode 100644 exyapps/parsetree.py create mode 100644 exyapps/runtime.py create mode 100644 scripts/exyapps delete mode 100644 yapps/__init__.py delete mode 100644 yapps/grammar.py delete mode 100644 yapps/parsetree.py delete mode 100644 yapps/runtime.py delete mode 100755 yapps2.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3c1f2a6 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +exyapps/grammar.py: yapps_grammar.g + exyapps yapps_grammar.g exyapps/grammar.py diff --git a/README b/README new file mode 100644 index 0000000..75d00c8 --- /dev/null +++ b/README @@ -0,0 +1,50 @@ +This is exyapps, a LL(1) parser generator. + +It is derived from yapps ( http://theory.stanford.edu/~amitp/yapps/ +) by Amit J. Patel . He is no longer +maintaining it, and there seem to be several forks out there, all +with varying version numbers. This copy was derived from some +patches included with Debian by Matthias Urlichs + +Since I want to make some of my own specific changes, I'm actually changing +the name so this package can have a distinctive identity. + + +Installing / using exyapps +-- + + python setup.py install + + cd /your/project + + exyapps my_grammar.exy + +For now, the package is compatible with yapps; Someday, I want to +make parsers run without the exyapps package installed. + +What is here? +-- + +Makefile +yapps_grammar.g + yapps_grammar.g is the source code for exyapps/grammar.py + type "make" to re-generate it, then do an svn commit + +doc + looks like latex source for the documentation + +examples + duh + +exyapps + the exyapps package that gets installed + +scripts + "exyapps" command that compiles a parser into python code. + +setup.py + +test + not actual tests, but apparently some interesting input to + run through the parser for testing + diff --git a/changelog b/changelog deleted file mode 100644 index 4fdf12e..0000000 --- a/changelog +++ /dev/null @@ -1,983 +0,0 @@ -ChangeSet - 1.38 05/01/22 19:36:32 smurf@smurf.noris.de +2 -0 - Add option to limit backtrace depth on syntax errors. - - yapps/runtime.py - 1.15 05/01/22 19:36:31 smurf@smurf.noris.de +5 -1 - Add option to limit backtrace depth on syntax errors. - - debian/changelog - 1.20 05/01/22 19:36:31 smurf@smurf.noris.de +2 -1 - Add option to limit backtrace depth on syntax errors. - -ChangeSet - 1.37 05/01/22 03:39:56 smurf@smurf.noris.de +2 -0 - Fix recursive includes. - - yapps/runtime.py - 1.14 05/01/22 03:39:54 smurf@smurf.noris.de +395 -381 - Fix recursive includes. - - debian/changelog - 1.19 05/01/22 03:39:54 smurf@smurf.noris.de +6 -0 - Fix recursive includes. - -ChangeSet - 1.36 04/12/23 23:49:52 smurf@smurf.noris.de +1 -0 - Brown paper bag -- fix Python 2.4 stuff. - - debian/changelog - 1.18 04/12/23 23:49:52 smurf@smurf.noris.de +6 -0 - Brown paper bag -- fix Python 2.4 stuff. - -ChangeSet - 1.35 04/12/23 21:00:34 smurf@smurf.noris.de +1 -0 - typo - - debian/control - 1.10 04/12/23 21:00:33 smurf@smurf.noris.de +1 -1 - typo - -ChangeSet - 1.34 04/12/12 20:22:54 smurf@smurf.noris.de +2 -0 - Add support for Python 2.4 - - debian/control - 1.9 04/12/12 20:22:52 smurf@smurf.noris.de +1 -1 - Add support for Python 2.4 - - debian/changelog - 1.17 04/12/12 20:22:52 smurf@smurf.noris.de +6 -0 - Add support for Python 2.4 - -ChangeSet - 1.33 04/09/23 11:24:16 smurf@smurf.noris.de +3 -0 - update documentation: - - toss hyphens - - document extensions - - doc/yapps2.tex - 1.3 04/09/23 11:24:16 smurf@smurf.noris.de +21 -0 - add a Debian Extensions section - - debian/yapps.1 - 1.2 04/09/23 11:24:16 smurf@smurf.noris.de +14 -9 - escape more hyphens (i.e., all the rest) - - debian/changelog - 1.16 04/09/23 11:24:16 smurf@smurf.noris.de +2 -0 - update documentation: - - toss hyphens - - document extensions - -ChangeSet - 1.32 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0 - turn off triggers - - BitKeeper/triggers/pre-commit.upversion - 1.2 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0 - off - - BitKeeper/triggers/post-commit.changelog - 1.2 04/09/23 11:23:24 smurf@smurf.noris.de +2 -0 - off - -ChangeSet - 1.31 04/09/23 10:55:24 smurf@smurf.noris.de +1 -0 - ignore new package's files - - BitKeeper/etc/ignore - 1.17 04/09/23 10:55:23 smurf@smurf.noris.de +1 -0 - added debian/yapps2-runtime/* - - debian/yapps2-runtime.README - 1.1 04/09/23 10:50:33 smurf@smurf.noris.de +11 -0 - -ChangeSet - 1.30 04/09/23 10:50:33 smurf@smurf.noris.de +8 -0 - split off runtime to its own package - document the fact that I can't use the original runtime - - debian/yapps2-runtime.dirs - 1.6 04/09/23 10:50:33 smurf@smurf.noris.de +2 -4 - split off runtime - - debian/yapps2-runtime.README - 1.0 04/09/23 10:50:33 smurf@smurf.noris.de +0 -0 - BitKeeper file /daten/src/debian/python_yapps/debian/yapps2-runtime.README - - debian/rules - 1.5 04/09/23 10:50:33 smurf@smurf.noris.de +4 -1 - move runtime files to their own package - - debian/control - 1.8 04/09/23 10:50:33 smurf@smurf.noris.de +14 -1 - split off runtime to its own package - - debian/changelog - 1.15 04/09/23 10:50:33 smurf@smurf.noris.de +9 -0 - document package split - - debian/README - 1.2 04/09/23 10:50:33 smurf@smurf.noris.de +21 -4 - Updated for package split - - debian/yapps2.docs - 1.3 04/09/23 10:31:15 smurf@smurf.noris.de +0 -0 - Rename: debian/docs -> debian/yapps2.docs - - debian/yapps2-runtime.dirs - 1.5 04/09/23 10:30:48 smurf@smurf.noris.de +0 -0 - bk cp yapps2.dirs yapps2-runtime.dirs - - debian/yapps2.dirs - 1.4 04/09/23 10:30:42 smurf@smurf.noris.de +0 -0 - Rename: debian/dirs -> debian/yapps2.dirs - - debian/yapps2.dirs - 1.4 04/09/23 10:30:42 smurf@smurf.noris.de +0 -0 - Rename: debian/dirs -> debian/yapps2.dirs - -ChangeSet - 1.29 04/07/19 09:30:22 smurf@smurf.noris.de +5 -0 - latex2html => hevea - - debian/yapps2.doc-base - 1.2 04/07/19 09:30:21 smurf@smurf.noris.de +2 -2 - latex2html => hevea - - debian/rules - 1.4 04/07/19 09:30:21 smurf@smurf.noris.de +4 -2 - latex2html => hevea - - debian/control - 1.7 04/07/19 09:30:21 smurf@smurf.noris.de +1 -1 - latex2html => hevea - - debian/changelog - 1.14 04/07/19 09:30:21 smurf@smurf.noris.de +6 -0 - latex2html => hevea - - BitKeeper/etc/ignore - 1.16 04/07/19 09:30:06 smurf@smurf.noris.de +1 -0 - added doc/yapps2.haux - - BitKeeper/etc/ignore - 1.15 04/07/19 09:29:55 smurf@smurf.noris.de +1 -0 - added doc/yapps2.ht* - -ChangeSet - 1.28 04/07/12 09:35:59 smurf@smurf.noris.de +2 -0 - Build-Depend on python. - - debian/control - 1.6 04/07/12 09:35:58 smurf@smurf.noris.de +1 -1 - Build-Depend on python. - - debian/changelog - 1.13 04/07/12 09:35:58 smurf@smurf.noris.de +6 -0 - doc - -ChangeSet - 1.27 04/05/16 22:02:40 smurf@smurf.noris.de +2 -0 - ship "empty" file - - yapps/__init__.py - 1.2 04/05/16 22:02:39 smurf@smurf.noris.de +1 -0 - ship "empty" file - - debian/changelog - 1.12 04/05/16 22:02:39 smurf@smurf.noris.de +2 -1 - doc - -ChangeSet - 1.26 04/05/16 22:01:42 smurf@smurf.noris.de +2 -0 - Typo (made large file handling slow) - - yapps/runtime.py - 1.13 04/05/16 22:01:42 smurf@smurf.noris.de +1 -1 - Typo - - debian/changelog - 1.11 04/05/16 22:01:42 smurf@smurf.noris.de +6 -0 - Version 2.1.1-11 - -ChangeSet - 1.25 04/05/14 12:25:51 smurf@smurf.noris.de +1 -0 - exporter: test was in wrong dir - - debian/exporter - 1.3 04/05/14 12:25:51 smurf@smurf.noris.de +1 -0 - wrong dir - -ChangeSet - 1.24 04/05/14 12:20:04 smurf@smurf.noris.de +1 -0 - Clean up external source before generating a diff - - debian/exporter - 1.2 04/05/14 12:20:04 smurf@smurf.noris.de +7 -0 - Clean up external source before generating a diff - -ChangeSet - 1.23 04/05/14 12:14:34 smurf@linux.smurf.noris.de +13 -0 - Documentation update: - build and install HTML documentation from LaTex source - - debian/changelog - 1.10 04/05/14 12:14:33 smurf@linux.smurf.noris.de +7 -0 - Version 2.1.1-10 - - debian/yapps2.doc-base - 1.1 04/05/14 12:14:32 smurf@smurf.noris.de +13 -0 - - yapps_grammar.g - 1.5 04/05/14 12:14:31 smurf@smurf.noris.de +1 -0 - add my copyright notice - - yapps/runtime.py - 1.12 04/05/14 12:14:31 smurf@smurf.noris.de +1 -0 - add my copyright notice - - debian/yapps2.doc-base - 1.0 04/05/14 12:14:31 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/yapps2.doc-base - - debian/rules - 1.3 04/05/14 12:14:31 smurf@smurf.noris.de +5 -5 - gernerate and install html documentation - don't install LICENSE file - - debian/yapps2-runtime.dirs - 1.3 04/05/14 12:14:30 smurf@smurf.noris.de +1 -2 - drop overrides - add doc dir +html - - debian/docs - 1.2 04/05/14 12:14:30 smurf@smurf.noris.de +1 -1 - install latex documentation - - debian/dirs - 1.3 04/05/14 12:14:30 smurf@smurf.noris.de +1 -2 - drop overrides - add doc dir +html - - debian/copyright - 1.3 04/05/14 12:14:30 smurf@smurf.noris.de +21 -3 - include license here instead of installing a LICENSE file - - debian/control - 1.5 04/05/14 12:14:30 smurf@smurf.noris.de +6 -6 - Dep on latex2html - indent list - - BitKeeper/etc/ignore - 1.14 04/05/14 12:06:12 smurf@smurf.noris.de +1 -0 - added doc/yapps2/* - - BitKeeper/etc/ignore - 1.13 04/05/14 12:06:07 smurf@smurf.noris.de +3 -0 - added debian/yapps2/* debian/*.substvars debian/*.debhelper - - BitKeeper/deleted/.del-overrides.lintian~19711613dc4ce90f - 1.3 04/05/14 11:51:33 smurf@smurf.noris.de +0 -0 - Delete: debian/overrides.lintian - - BitKeeper/deleted/.del-overrides.linda~b0c6fa08da170a16 - 1.2 04/05/14 11:51:33 smurf@smurf.noris.de +0 -0 - Delete: debian/overrides.linda - - doc/yapps2.tex - 1.2 04/05/14 11:34:34 smurf@smurf.noris.de +0 -0 - Rename: yapps2.tex -> doc/yapps2.tex - -ChangeSet - 1.22 04/05/14 11:33:27 smurf@smurf.noris.de +1 -0 - Merge bk://server/public/python_yapps - into smurf.noris.de:/usr/local/src/misc/yapps - - BitKeeper/deleted/.del-logging_ok~530b65bc14e5cc7c - 1.2 04/05/14 11:33:26 smurf@smurf.noris.de +0 -0 - 'Auto converge rename' - - BitKeeper/etc/logging_ok - 1.1 04/05/14 11:33:13 smurf@smurf.noris.de +1 -0 - -ChangeSet - 1.4.1.1 04/05/14 11:33:13 smurf@linux.smurf.noris.de +2 -0 - Added tex documentation from yapps-2.0.4. - - BitKeeper/etc/logging_ok - 1.0 04/05/14 11:33:13 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/BitKeeper/etc/logging_ok - - yapps2.tex - 1.1 04/05/14 11:33:10 smurf@smurf.noris.de +1225 -0 - - yapps2.tex - 1.0 04/05/14 11:33:10 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/yapps2.tex - -ChangeSet - 1.21 04/05/14 11:31:18 smurf@linux.smurf.noris.de +7 -0 - Renamed the package to "yapps2". - - setup.py - 1.2 04/05/14 11:31:17 smurf@smurf.noris.de +17 -2 - Fixed name - Updated long description - - debian/yapps2-runtime.dirs - 1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1 - rename: python-yapps => yapps2 - - debian/rules - 1.2 04/05/14 11:31:17 smurf@smurf.noris.de +8 -8 - rename: python-yapps => yapps2 - - debian/overrides.lintian - 1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1 - rename: python-yapps => yapps2 - - debian/dirs - 1.2 04/05/14 11:31:17 smurf@smurf.noris.de +1 -1 - rename: python-yapps => yapps2 - - debian/copyright - 1.2 04/05/14 11:31:16 smurf@smurf.noris.de +11 -3 - Added pointer to original source - - debian/control - 1.4 04/05/14 11:31:16 smurf@smurf.noris.de +18 -10 - rename: python-yapps => yapps2 - - debian/changelog - 1.9 04/05/14 11:31:16 smurf@smurf.noris.de +13 -13 - Cleanup - -ChangeSet - 1.20 03/12/31 14:00:42 smurf@linux.smurf.noris.de +2 -0 - require python-dev because of distutils - - debian/changelog - 1.8 03/12/31 14:00:42 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-8 - - debian/control - 1.3 03/12/31 14:00:40 smurf@smurf.noris.de +1 -1 - require python-dev because of distutils - -ChangeSet - 1.19 03/12/31 13:57:38 smurf@linux.smurf.noris.de +2 -0 - Change yapps.py t exit 1 on failure to parse - - debian/changelog - 1.7 03/12/31 13:57:38 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-7 - - yapps2.py - 1.6 03/12/31 13:57:37 smurf@smurf.noris.de +3 -2 - exit 1 on error - -ChangeSet - 1.18 03/12/30 15:36:56 smurf@linux.smurf.noris.de +2 -0 - Update to 3.6.1, use build-depends-indep. - - debian/changelog - 1.6 03/12/30 15:36:56 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-6 - - debian/control - 1.2 03/12/30 15:36:55 smurf@smurf.noris.de +2 -2 - Update to 3.6.1, use build-depends-indep. - -ChangeSet - 1.17 03/12/30 15:33:19 smurf@linux.smurf.noris.de +2 -0 - Add some notes. - - debian/changelog - 1.5 03/12/30 15:33:19 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-5 - - examples/notes - 1.1 03/12/30 15:33:18 smurf@smurf.noris.de +44 -0 - - examples/notes - 1.0 03/12/30 15:33:17 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/examples/notes - -ChangeSet - 1.16 03/12/30 15:30:05 smurf@linux.smurf.noris.de +2 -0 - Correctly report syntax errors without line number - - debian/changelog - 1.4 03/12/30 15:30:05 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-4 - - yapps/runtime.py - 1.11 03/12/30 15:30:04 smurf@smurf.noris.de +6 -2 - Report syntax errors with no line number - -ChangeSet - 1.15 03/12/30 14:02:37 smurf@linux.smurf.noris.de +2 -0 - Repair ignored-pattern upcall. - - debian/changelog - 1.3 03/12/30 14:02:37 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-3 - - yapps/runtime.py - 1.10 03/12/30 14:02:36 smurf@smurf.noris.de +4 -2 - Repair ignore upcall. - -ChangeSet - 1.14 03/12/30 13:30:14 smurf@linux.smurf.noris.de +2 -0 - runtime: fix error reporting - - debian/changelog - 1.2 03/12/30 13:30:14 smurf@linux.smurf.noris.de +6 -0 - Version 2.1.1-2 - - yapps/runtime.py - 1.9 03/12/30 13:30:12 smurf@smurf.noris.de +9 -9 - Fix error reporting - -ChangeSet - 1.13 03/12/30 12:25:29 smurf@linux.smurf.noris.de +2 -0 - replace runtime grammar - yapps_grammar.g: delete shebang line, fix imports - - yapps_grammar.g - 1.4 03/12/30 12:25:28 smurf@smurf.noris.de +1 -3 - fix import - delete shebang line - - yapps/grammar.py - 1.11 03/12/30 12:25:28 smurf@smurf.noris.de +49 -66 - replace runtime grammar - -ChangeSet - 1.12 03/12/30 11:51:26 smurf@linux.smurf.noris.de +19 -0 - D - - setup.py - 1.1 03/12/30 11:51:25 smurf@smurf.noris.de +27 -0 - - debian/yapps.1 - 1.1 03/12/30 11:51:25 smurf@smurf.noris.de +58 -0 - - debian/rules - 1.1 03/12/30 11:51:25 smurf@smurf.noris.de +91 -0 - - debian/overrides.lintian - 1.1 03/12/30 11:51:25 smurf@smurf.noris.de +1 -0 - - setup.py - 1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/setup.py - - debian/yapps2-runtime.dirs - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +5 -0 - - debian/yapps.1 - 1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/yapps.1 - - debian/rules - 1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/rules - - debian/overrides.lintian - 1.0 03/12/30 11:51:25 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/overrides.lintian - - debian/overrides.linda - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +3 -0 - - debian/exporter - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +10 -0 - - debian/docs - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +3 -0 - - debian/dirs - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +5 -0 - - debian/copyright - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +15 -0 - - debian/control - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +19 -0 - - debian/compat - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +1 -0 - - debian/README - 1.1 03/12/30 11:51:24 smurf@smurf.noris.de +6 -0 - - yapps_grammar.g - 1.3 03/12/30 11:51:24 smurf@smurf.noris.de +0 -1 - Make the scanner context-sensitive. Works better. - - yapps2.py - 1.5 03/12/30 11:51:24 smurf@smurf.noris.de +1 -1 - Fix path - - yapps/runtime.py - 1.8 03/12/30 11:51:24 smurf@smurf.noris.de +0 -1 - Regularize header - - yapps/parsetree.py - 1.7 03/12/30 11:51:24 smurf@smurf.noris.de +0 -2 - Drop shebang line, this is not a program. - - yapps/grammar.py - 1.10 03/12/30 11:51:24 smurf@smurf.noris.de +0 -2 - Drop shebang line, this is not a program. - - debian/yapps2-runtime.dirs - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/dirs - - debian/overrides.linda - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/overrides.linda - - debian/exporter - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/exporter - - debian/docs - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/docs - - debian/dirs - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/dirs - - debian/copyright - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/copyright - - debian/control - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/control - - debian/compat - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/compat - - debian/README - 1.0 03/12/30 11:51:24 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/README - - debian/changelog - 1.1 03/12/30 11:41:14 smurf@smurf.noris.de +15 -0 - - debian/changelog - 1.0 03/12/30 11:41:14 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/debian/changelog - - BitKeeper/etc/ignore - 1.12 03/12/30 11:40:56 smurf@smurf.noris.de +1 -0 - added changelog - -ChangeSet - 1.11 03/12/30 11:23:09 smurf@linux.smurf.noris.de +5 -0 - Rewrote imports et al. to create+use a real "yapps" module - - yapps/__init__.py - 1.1 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0 - - yapps2.py - 1.4 03/12/30 11:23:08 smurf@smurf.noris.de +3 -3 - Refactor to use reasonable "yapps" module - - yapps/runtime.py - 1.7 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0 - Rename: yappsrt.py -> yapps/runtime.py - - yapps/parsetree.py - 1.6 03/12/30 11:23:08 smurf@smurf.noris.de +10 -10 - Refactor to use reasonable "yapps" module - Rename: parsetree.py -> yapps/parsetree.py - - yapps/__init__.py - 1.0 03/12/30 11:23:08 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/yapps/__init__.py - - yapps/grammar.py - 1.9 03/12/30 11:23:07 smurf@smurf.noris.de +16 -16 - Refactor to use reasonable "yapps" module - Rename: grammar.py -> yapps/grammar.py - - BitKeeper/etc/ignore - 1.11 03/12/30 11:22:15 smurf@smurf.noris.de +4 -0 - added build/* debian/python-yapps/* debian/*.debhelper debian/*.substvars - -ChangeSet - 1.10 03/12/29 22:10:59 smurf@linux.smurf.noris.de +1 -0 - Added context-insensitive-scanner end test - to a couple of composite grammar statements. - - It's probably overkill to do that with all statements..? - - parsetree.py - 1.5 03/12/29 22:10:58 smurf@smurf.noris.de +19 -5 - Added context-insensitive-scanner end test - to a couple of composite statements. - - It's probably overkill to do that with all statements..? - -ChangeSet - 1.9 03/12/29 22:05:00 smurf@linux.smurf.noris.de +1 -0 - yappsrt.py: Bugfix: stored token type - - yappsrt.py - 1.6 03/12/29 22:04:59 smurf@smurf.noris.de +1 -1 - Bugfix: stored token type - -ChangeSet - 1.8 03/12/29 21:42:41 smurf@linux.smurf.noris.de +4 -0 - Pass init arguments to the scanner - Simplify stuff a bit - - yappsrt.py - 1.5 03/12/29 21:42:40 smurf@smurf.noris.de +16 -8 - Fix line counting - simplify pattern length determination - - yapps2.py - 1.3 03/12/29 21:42:40 smurf@smurf.noris.de +3 -2 - Pass filename to scanner - - parsetree.py - 1.4 03/12/29 21:42:40 smurf@smurf.noris.de +2 -2 - Pass init arguments to the scanner - - grammar.py - 1.8 03/12/29 21:42:40 smurf@smurf.noris.de +3 -2 - Update for changed yapps2.py - -ChangeSet - 1.7 03/12/29 20:37:55 smurf@linux.smurf.noris.de +6 -0 - Cleanup ignored-symbol commands - Fix including and error reporting - - yappsrt.py - 1.4 03/12/29 20:37:54 smurf@smurf.noris.de +88 -52 - cleanup ignored symbol handling - refactor _scan and _peek: move to Scanner - generate pseudo filenames for inline documents - accept context for error handling - - yapps_grammar.g - 1.2 03/12/29 20:37:54 smurf@smurf.noris.de +4 -1 - Cleanup statements for ignored symbols - - yapps2.py - 1.2 03/12/29 20:37:54 smurf@smurf.noris.de +1 -1 - Setup line numbers correctly - - parsetree.py - 1.3 03/12/29 20:37:54 smurf@smurf.noris.de +22 -6 - Ignored-symbol handling extended - Pass context to scanners - - grammar.py - 1.7 03/12/29 20:37:54 smurf@smurf.noris.de +1 -1 - Use a hash for ignored stuff - - examples/calc.g - 1.3 03/12/29 20:37:54 smurf@smurf.noris.de +7 -4 - Cleanup include handling: use an ignored token - -ChangeSet - 1.6 03/12/29 18:16:19 smurf@linux.smurf.noris.de +6 -0 - Reproduce current grammar file - - One problem with attribute remains. - - yapps_grammar.g - 1.1 03/12/29 18:16:18 smurf@smurf.noris.de +120 -0 - - yappsrt.py - 1.3 03/12/29 18:16:18 smurf@smurf.noris.de +5 -5 - charpos => pos - - yapps_grammar.g - 1.0 03/12/29 18:16:18 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/yapps_grammar.g - - BitKeeper/etc/ignore - 1.10 03/12/29 18:15:23 smurf@smurf.noris.de +1 -0 - added *.swp - - BitKeeper/etc/ignore - 1.9 03/12/29 18:15:02 smurf@smurf.noris.de +1 -0 - added yapps_grammar.py - - BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250 - 1.4 03/12/29 18:14:35 smurf@smurf.noris.de +0 -0 - Delete: yapps_grammar.py - - grammar.py - 1.6 03/12/29 18:14:17 smurf@smurf.noris.de +0 -0 - Rename: BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 -> grammar.py - - BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 - 1.5 03/12/29 18:13:11 smurf@smurf.noris.de +0 -0 - Delete: grammar.py - - yapps_grammar.py - 1.3 03/12/29 18:13:04 smurf@smurf.noris.de +0 -0 - Rename: BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250 -> yapps_grammar.py - - grammar.py - 1.4 03/12/29 18:12:51 smurf@smurf.noris.de +0 -0 - Rename: BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 -> grammar.py - - BitKeeper/deleted/.del-grammar.py~46b22024b3b85127 - 1.3 03/12/29 18:12:30 smurf@smurf.noris.de +19 -20 - Delete: grammar.py - - BitKeeper/etc/ignore - 1.8 03/12/29 17:15:10 smurf@smurf.noris.de +3 -0 - added *-stamp debian/files debian/tmp/* - - BitKeeper/etc/ignore - 1.7 03/12/29 17:15:08 smurf@smurf.noris.de +3 -0 - added *-stamp debian/files debian/tmp/* - - BitKeeper/etc/ignore - 1.6 03/12/29 17:14:00 smurf@smurf.noris.de +3 -0 - added *-stamp debian/files debian/tmp/* - - BitKeeper/triggers/post-commit.changelog - 1.1 03/12/29 17:13:58 smurf@smurf.noris.de +3 -0 - - BitKeeper/triggers/post-commit.changelog - 1.0 03/12/29 17:13:58 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/BitKeeper/triggers/post-commit.changelog - - BitKeeper/etc/logging_ok - 1.1 03/12/29 17:13:41 smurf@smurf.noris.de +1 -0 - -ChangeSet - 1.5 03/12/29 17:13:41 smurf@linux.smurf.noris.de +7 -0 - Major enhancements: - - Use a Token object - - Allow incremental reading from a file - - Allow stacking of inputs (#include, whatever) - - Remember line numbers - - Refactor print_line_with_error into the Scanner object - - BitKeeper/etc/logging_ok - 1.0 03/12/29 17:13:41 smurf@smurf.noris.de +0 -0 - BitKeeper file /usr/local/src/misc/yapps/BitKeeper/etc/logging_ok - - yappsrt.py - 1.2 03/12/29 17:13:38 smurf@smurf.noris.de +219 -141 - Major enhancements: - - Use a Token object - - Allow incremental reading from a file - - Allow stacking of inputs (#include, whatever) - - Remember line numbers - - Refactor print_line_with_error into the Scanner object - - parsetree.py - 1.2 03/12/29 17:13:38 smurf@smurf.noris.de +2 -2 - don't pass pos explicitly - - grammar.py - 1.2 03/12/29 17:13:38 smurf@smurf.noris.de +15 -19 - Cleanup - - generated from non-available file! - - examples/calc.g - 1.2 03/12/29 17:13:38 smurf@smurf.noris.de +5 -2 - cleanup (strip, atoi) - - allow reading in of expressions via stacking (TEST) - - BitKeeper/etc/ignore - 1.5 03/12/29 17:10:56 smurf@smurf.noris.de +1 -0 - added *.pyc - - BitKeeper/etc/ignore - 1.4 03/12/29 17:10:51 smurf@smurf.noris.de +1 -0 - added test/*.py - - BitKeeper/etc/ignore - 1.3 03/12/29 17:10:46 smurf@smurf.noris.de +1 -0 - added examples/*.py - - BitKeeper/deleted/.del-yapps_grammar.py~276aa227aa238250 - 1.2 03/12/29 13:58:49 smurf@smurf.noris.de +0 -0 - Delete: yapps_grammar.py - -ChangeSet - 1.4 03/08/28 00:22:57 ?@smurf.noris.de +15 -0 - Version 2.1.1 - -ChangeSet - 1.3 03/08/28 00:22:57 ?@smurf.noris.de +1 -0 - CVS-Ignore - -ChangeSet - 1.2 03/12/29 12:56:52 smurf@smurf.noris.de +1 -0 - Versionsnummern-Updateskript - - BitKeeper/etc/ignore - 1.2 03/08/28 00:22:57 ?@smurf.noris.de +3 -0 - added CVS .cvsignore CVSROOT - -ChangeSet - 1.1 03/12/29 12:56:52 smurf@smurf.noris.de +2 -0 - Initial repository create - - BitKeeper/triggers/pre-commit.upversion - 1.1 03/12/29 12:56:52 smurf@smurf.noris.de +3 -0 - - BitKeeper/etc/ignore - 1.1 03/12/29 12:56:52 smurf@smurf.noris.de +2 -0 - - BitKeeper/etc/config - 1.1 03/12/29 12:56:52 smurf@smurf.noris.de +13 -0 - -ChangeSet - 1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0 - BitKeeper file /tmp/b.s.20059/ChangeSet - - BitKeeper/triggers/pre-commit.upversion - 1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0 - BitKeeper file /tmp/b.s.20059/BitKeeper/triggers/pre-commit.upversion - - BitKeeper/etc/ignore - 1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0 - BitKeeper file /tmp/b.s.20059/BitKeeper/etc/ignore - - BitKeeper/etc/config - 1.0 03/12/29 12:56:52 smurf@smurf.noris.de +0 -0 - BitKeeper file /tmp/b.s.20059/BitKeeper/etc/config - - yappsrt.py - 1.1 03/08/27 23:12:19 ?@smurf.noris.de +296 -0 - New:Version 2.1.1 - - yapps_grammar.py - 1.1 03/08/28 00:22:32 ?@smurf.noris.de +234 -0 - New:Version 2.1.1 - - yapps2.py - 1.1 03/08/12 20:25:55 ?@smurf.noris.de +111 -0 - New:Version 2.1.1 - - test/option.g - 1.1 03/08/11 20:43:22 ?@smurf.noris.de +17 -0 - New:Version 2.1.1 - - test/line_numbers.g - 1.1 03/08/28 00:22:56 ?@smurf.noris.de +10 -0 - New:Version 2.1.1 - - test/empty_clauses.g - 1.1 03/08/27 23:48:11 ?@smurf.noris.de +10 -0 - New:Version 2.1.1 - - parsetree.py - 1.1 03/08/28 00:18:14 ?@smurf.noris.de +645 -0 - New:Version 2.1.1 - - grammar.py - 1.1 03/08/28 00:16:28 ?@smurf.noris.de +234 -0 - New:Version 2.1.1 - - examples/xml.g - 1.1 03/08/27 20:53:39 ?@smurf.noris.de +66 -0 - New:Version 2.1.1 - - examples/lisp.g - 1.1 03/08/11 20:18:18 ?@smurf.noris.de +13 -0 - New:Version 2.1.1 - - examples/expr.g - 1.1 03/08/08 06:47:58 ?@smurf.noris.de +21 -0 - New:Version 2.1.1 - - examples/calc.g - 1.1 03/08/11 20:17:09 ?@smurf.noris.de +58 -0 - New:Version 2.1.1 - - NOTES - 1.1 03/08/12 18:59:41 ?@smurf.noris.de +78 -0 - New:Version 2.1.1 - - LICENSE - 1.1 03/08/11 19:41:27 ?@smurf.noris.de +20 -0 - New:Version 2.1.1 - - ChangeLog - 1.1 03/08/28 00:22:16 ?@smurf.noris.de +108 -0 - New:Version 2.1.1 - - yappsrt.py - 1.0 03/08/27 23:12:19 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yappsrt.py - - yapps_grammar.py - 1.0 03/08/28 00:22:32 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yapps_grammar.py - - yapps2.py - 1.0 03/08/12 20:25:55 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/yapps2.py - - test/option.g - 1.0 03/08/11 20:43:22 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/option.g - - test/line_numbers.g - 1.0 03/08/28 00:22:56 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/line_numbers.g - - test/empty_clauses.g - 1.0 03/08/27 23:48:11 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/test/empty_clauses.g - - parsetree.py - 1.0 03/08/28 00:18:14 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/parsetree.py - - grammar.py - 1.0 03/08/28 00:16:28 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/grammar.py - - examples/xml.g - 1.0 03/08/27 20:53:39 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/xml.g - - examples/lisp.g - 1.0 03/08/11 20:18:18 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/lisp.g - - examples/expr.g - 1.0 03/08/08 06:47:58 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/expr.g - - examples/calc.g - 1.0 03/08/11 20:17:09 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/examples/calc.g - - NOTES - 1.0 03/08/12 18:59:41 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/NOTES - - LICENSE - 1.0 03/08/11 19:41:27 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/LICENSE - - ChangeLog - 1.0 03/08/28 00:22:16 ?@smurf.noris.de +0 -0 - BitKeeper file /home/smurf/neu/yapps-2.1.1/yapps3/ChangeLog - diff --git a/exyapps/__init__.py b/exyapps/__init__.py new file mode 100644 index 0000000..1bb8bf6 --- /dev/null +++ b/exyapps/__init__.py @@ -0,0 +1 @@ +# empty diff --git a/exyapps/grammar.py b/exyapps/grammar.py new file mode 100644 index 0000000..258c464 --- /dev/null +++ b/exyapps/grammar.py @@ -0,0 +1,213 @@ +# THIS FILE WAS AUTOMATICALLY GENERATED +# grammar.py, part of Yapps 2 - yet another python parser system +# Copyright 1999-2003 by Amit J. Patel +# Enhancements copyright 2003-2004 by Matthias Urlichs +# +# This version of the Yapps 2 grammar can be distributed under the +# terms of the MIT open source license, either found in the LICENSE +# file included with the Yapps distribution +# or at +# +# + +"""Parser for Yapps grammars. + +This file defines the grammar of Yapps grammars. Naturally, it is +implemented in Yapps. The grammar.py module needed by Yapps is built +by running Yapps on yapps_grammar.g. (Holy circularity, Batman!) + +""" + +import sys, re +from exyapps import parsetree + +###################################################################### +def cleanup_choice(rule, lst): + if len(lst) == 0: return Sequence(rule, []) + if len(lst) == 1: return lst[0] + return parsetree.Choice(rule, *tuple(lst)) + +def cleanup_sequence(rule, lst): + if len(lst) == 1: return lst[0] + return parsetree.Sequence(rule, *tuple(lst)) + +def resolve_name(rule, tokens, id, args): + if id in [x[0] for x in tokens]: + # It's a token + if args: + print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) + return parsetree.Terminal(rule, id) + else: + # It's a name, so assume it's a nonterminal + return parsetree.NonTerminal(rule, id, args) + + +# Begin -- grammar generated by Yapps +import sys, re +from exyapps import runtime + +class ParserDescriptionScanner(runtime.Scanner): + patterns = [ + ('"rule"', re.compile('rule')), + ('"ignore"', re.compile('ignore')), + ('"token"', re.compile('token')), + ('"option"', re.compile('option')), + ('":"', re.compile(':')), + ('"parser"', re.compile('parser')), + ('[ \t\r\n]+', re.compile('[ \t\r\n]+')), + ('#.*?\r?\n', re.compile('#.*?\r?\n')), + ('EOF', re.compile('$')), + ('ATTR', re.compile('<<.+?>>')), + ('STMT', re.compile('{{.+?}}')), + ('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')), + ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')), + ('LP', re.compile('\\(')), + ('RP', re.compile('\\)')), + ('LB', re.compile('\\[')), + ('RB', re.compile('\\]')), + ('OR', re.compile('[|]')), + ('STAR', re.compile('[*]')), + ('PLUS', re.compile('[+]')), + ('QUEST', re.compile('[?]')), + ('COLON', re.compile(':')), + ] + def __init__(self, str,*args,**kw): + runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw) + +class ParserDescription(runtime.Parser): + Context = runtime.Context + def Parser(self, _parent=None): + _context = self.Context(_parent, self._scanner, 'Parser', []) + self._scan('"parser"', context=_context) + ID = self._scan('ID', context=_context) + self._scan('":"', context=_context) + Options = self.Options(_context) + Tokens = self.Tokens(_context) + Rules = self.Rules(Tokens, _context) + EOF = self._scan('EOF', context=_context) + return parsetree.Generator(ID,Options,Tokens,Rules) + + def Options(self, _parent=None): + _context = self.Context(_parent, self._scanner, 'Options', []) + opt = {} + while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"': + self._scan('"option"', context=_context) + self._scan('":"', context=_context) + Str = self.Str(_context) + opt[Str] = 1 + return opt + + def Tokens(self, _parent=None): + _context = self.Context(_parent, self._scanner, 'Tokens', []) + tok = [] + while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']: + _token = self._peek('"token"', '"ignore"', context=_context) + if _token == '"token"': + self._scan('"token"', context=_context) + ID = self._scan('ID', context=_context) + self._scan('":"', context=_context) + Str = self.Str(_context) + tok.append( (ID,Str) ) + else: # == '"ignore"' + self._scan('"ignore"', context=_context) + self._scan('":"', context=_context) + Str = self.Str(_context) + ign = ('#ignore',Str) + if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT': + STMT = self._scan('STMT', context=_context) + ign = ign + (STMT[2:-2],) + tok.append( ign ) + return tok + + def Rules(self, tokens, _parent=None): + _context = self.Context(_parent, self._scanner, 'Rules', [tokens]) + rul = [] + while self._peek('"rule"', 'EOF', context=_context) == '"rule"': + self._scan('"rule"', context=_context) + ID = self._scan('ID', context=_context) + OptParam = self.OptParam(_context) + self._scan('":"', context=_context) + ClauseA = self.ClauseA(ID, tokens, _context) + rul.append( (ID, OptParam, ClauseA) ) + return rul + + def ClauseA(self, rule, tokens, _parent=None): + _context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens]) + ClauseB = self.ClauseB(rule,tokens, _context) + v = [ClauseB] + while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR': + OR = self._scan('OR', context=_context) + ClauseB = self.ClauseB(rule,tokens, _context) + v.append(ClauseB) + return cleanup_choice(rule,v) + + def ClauseB(self, rule,tokens, _parent=None): + _context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens]) + v = [] + while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']: + ClauseC = self.ClauseC(rule,tokens, _context) + v.append(ClauseC) + return cleanup_sequence(rule, v) + + def ClauseC(self, rule,tokens, _parent=None): + _context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens]) + ClauseD = self.ClauseD(rule,tokens, _context) + _token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) + if _token == 'PLUS': + PLUS = self._scan('PLUS', context=_context) + return parsetree.Plus(rule, ClauseD) + elif _token == 'STAR': + STAR = self._scan('STAR', context=_context) + return parsetree.Star(rule, ClauseD) + elif _token == 'QUEST': + QUEST = self._scan('QUEST', context=_context) + return parsetree.Option(rule, ClauseD) + else: + return ClauseD + + def ClauseD(self, rule,tokens, _parent=None): + _context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens]) + _token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context) + if _token == 'STR': + STR = self._scan('STR', context=_context) + t = (STR, eval(STR,{},{})) + if t not in tokens: tokens.insert( 0, t ) + return parsetree.Terminal(rule, STR) + elif _token == 'ID': + ID = self._scan('ID', context=_context) + OptParam = self.OptParam(_context) + return resolve_name(rule,tokens, ID, OptParam) + elif _token == 'LP': + LP = self._scan('LP', context=_context) + ClauseA = self.ClauseA(rule,tokens, _context) + RP = self._scan('RP', context=_context) + return ClauseA + elif _token == 'LB': + LB = self._scan('LB', context=_context) + ClauseA = self.ClauseA(rule,tokens, _context) + RB = self._scan('RB', context=_context) + return parsetree.Option(rule, ClauseA) + else: # == 'STMT' + STMT = self._scan('STMT', context=_context) + return parsetree.Eval(rule, STMT[2:-2]) + + def OptParam(self, _parent=None): + _context = self.Context(_parent, self._scanner, 'OptParam', []) + if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR': + ATTR = self._scan('ATTR', context=_context) + return ATTR[2:-2] + return '' + + def Str(self, _parent=None): + _context = self.Context(_parent, self._scanner, 'Str', []) + STR = self._scan('STR', context=_context) + return eval(STR,{},{}) + + +def parse(rule, text): + P = ParserDescription(ParserDescriptionScanner(text)) + return runtime.wrap_error_reporter(P, rule) + +# End -- grammar generated by Yapps + + diff --git a/exyapps/main.py b/exyapps/main.py new file mode 100755 index 0000000..881c3cc --- /dev/null +++ b/exyapps/main.py @@ -0,0 +1,135 @@ +#!/usr/bin/python + +# +# Yapps 2 - yet another python parser system +# Copyright 1999-2003 by Amit J. Patel +# +# This version of Yapps 2 can be distributed under the +# terms of the MIT open source license, either found in the LICENSE file +# included with the Yapps distribution +# or at +# +# + +import sys, re + +import exyapps.runtime as runtime +import exyapps.parsetree as parsetree + +def generate(inputfilename, outputfilename=None, dump=0, **flags): + """Generate a grammar, given an input filename (X.g) + and an output filename (defaulting to X.py).""" + + if not outputfilename: + # recognize *.g because that is what the old yapps used + if inputfilename.endswith('.g'): + outputfilename = inputfilename[:-2] + '.py' + + # recognize *.exy for the new grammar + elif inputfilename.endswith('.exy'): + outputfilename = inputfilename[:-3] + '.py' + + # cannot automatically generate the output file name if we don't recognize extension + else: + raise Exception('Must specify output filename if input filename is not *.exy or *.g') + + DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers + preparser, postparser = None, None # Code before and after the parser desc + + # Read the entire file + s = open(inputfilename,'r').read() + + # See if there's a separation between the pre-parser and parser + f = s.find(DIVIDER) + if f >= 0: + preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):] + + # See if there's a separation between the parser and post-parser + f = s.find(DIVIDER) + if f >= 0: + s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):] + + # Create the parser and scanner and parse the text + scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename) + if preparser: + scanner.del_line += preparser.count('\n') + + parser = grammar.ParserDescription(scanner) + t = runtime.wrap_error_reporter(parser, 'Parser') + if t is None: + return 1 # Failure + if preparser is not None: + t.preparser = preparser + if postparser is not None: + t.postparser = postparser + + # Check the options + for f in t.options.keys(): + for opt,_,_ in yapps_options: + if f == opt: + break + else: + print >>sys.stderr, 'Warning: unrecognized option', f + + # Add command line options to the set + for f in flags.keys(): + t.options[f] = flags[f] + + # Generate the output + if dump: + t.dump_information() + else: + t.output = open(outputfilename, 'w') + t.generate_output() + return 0 + +def main() : + import doctest + doctest.testmod(sys.modules['__main__']) + doctest.testmod(parsetree) + + # Someday I will use optparse, but Python 2.3 is too new at the moment. + yapps_options = [ + ('context-insensitive-scanner', + 'context-insensitive-scanner', + 'Scan all tokens (see docs)'), + ] + + import getopt + optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['help', 'dump', 'use-devel-grammar']) + if not args or len(args) > 2: + print >>sys.stderr, 'Usage:' + print >>sys.stderr, ' python', sys.argv[0], '[flags] input.g [output.py]' + print >>sys.stderr, 'Flags:' + print >>sys.stderr, (' --dump' + ' '*40)[:35] + 'Dump out grammar information' + print >>sys.stderr, (' --use-devel-grammar' + ' '*40)[:35] + 'Use the devel grammar parser from yapps_grammar.py instead of the stable grammar from grammar.py' + for flag, _, doc in yapps_options: + print >>sys.stderr, (' -f' + flag + ' '*40)[:35] + doc + return 1 + else: + # Read in the options and create a list of flags + flags = {} + use_devel_grammar = 0 + for opt in optlist: + for flag, name, _ in yapps_options: + if opt == ('-f', flag): + flags[name] = 1 + break + else: + if opt == ('--dump', ''): + flags['dump'] = 1 + elif opt == ('--use-devel-grammar', ''): + use_devel_grammar = 1 + else: + print >>sys.stderr, 'Warning: unrecognized option', opt[0], opt[1] + + if use_devel_grammar: + import yapps_grammar as g2 + else: + import exyapps.grammar as g2 + + global grammar + grammar = g2 + + return generate(*tuple(args), **flags) + diff --git a/exyapps/parsetree.py b/exyapps/parsetree.py new file mode 100644 index 0000000..8831609 --- /dev/null +++ b/exyapps/parsetree.py @@ -0,0 +1,674 @@ +# parsetree.py, part of Yapps 2 - yet another python parser system +# Copyright 1999-2003 by Amit J. Patel +# +# This version of the Yapps 2 Runtime can be distributed under the +# terms of the MIT open source license, either found in the LICENSE file +# included with the Yapps distribution +# or at +# +# + +"""Classes used to represent parse trees and generate output. + +This module defines the Generator class, which drives the generation +of Python output from a grammar parse tree. It also defines nodes +used to represent the parse tree; they are derived from class Node. + +The main logic of Yapps is in this module. +""" + +import sys, re + +###################################################################### +INDENT = ' '*4 +class Generator: + + # TODO: many of the methods here should be class methods, not instance methods + + def __init__(self, name, options, tokens, rules): + self.change_count = 0 + self.name = name + self.options = options + self.preparser = '' + self.postparser = None + + self.tokens = {} # Map from tokens to regexps + self.ignore = {} # List of token names to ignore in parsing, map to statements + self.terminals = [] # List of token names (to maintain ordering) + for t in tokens: + if len(t) == 3: + n,t,s = t + else: + n,t = t + s = None + + if n == '#ignore': + n = t + self.ignore[n] = s + if n in self.tokens.keys() and self.tokens[n] != t: + print >>sys.stderr, 'Warning: token %s defined more than once.' % n + self.tokens[n] = t + self.terminals.append(n) + + self.rules = {} # Map from rule names to parser nodes + self.params = {} # Map from rule names to parameters + self.goals = [] # List of rule names (to maintain ordering) + for n,p,r in rules: + self.params[n] = p + self.rules[n] = r + self.goals.append(n) + + self.output = sys.stdout + + def has_option(self, name): + return self.options.get(name, 0) + + def non_ignored_tokens(self): + return [x for x in self.terminals if x not in self.ignore] + + def changed(self): + """Increments the change count. + + >>> t = Generator('', [], [], []) + >>> old_count = t.change_count + >>> t.changed() + >>> assert t.change_count == old_count + 1 + """ + self.change_count = 1+self.change_count + + def set_subtract(self, a, b): + """Returns the elements of a that are not in b. + + >>> t = Generator('', [], [], []) + >>> t.set_subtract([], []) + [] + >>> t.set_subtract([1, 2], [1, 2]) + [] + >>> t.set_subtract([1, 2, 3], [2]) + [1, 3] + >>> t.set_subtract([1], [2, 3, 4]) + [1] + """ + result = [] + for x in a: + if x not in b: + result.append(x) + return result + + def subset(self, a, b): + """True iff all elements of sequence a are inside sequence b + + >>> t = Generator('', [], [], []) + >>> t.subset([], [1, 2, 3]) + 1 + >>> t.subset([1, 2, 3], []) + 0 + >>> t.subset([1], [1, 2, 3]) + 1 + >>> t.subset([3, 2, 1], [1, 2, 3]) + 1 + >>> t.subset([1, 1, 1], [1, 2, 3]) + 1 + >>> t.subset([1, 2, 3], [1, 1, 1]) + 0 + """ + for x in a: + if x not in b: + return 0 + return 1 + + def equal_set(self, a, b): + """True iff subset(a, b) and subset(b, a) + + >>> t = Generator('', [], [], []) + >>> a_set = [1, 2, 3] + >>> t.equal_set(a_set, a_set) + 1 + >>> t.equal_set(a_set, a_set[:]) + 1 + >>> t.equal_set([], a_set) + 0 + >>> t.equal_set([1, 2, 3], [3, 2, 1]) + 1 + """ + if len(a) != len(b): return 0 + if a == b: return 1 + return self.subset(a, b) and self.subset(b, a) + + def add_to(self, parent, additions): + "Modify _parent_ to include all elements in _additions_" + for x in additions: + if x not in parent: + parent.append(x) + self.changed() + + def equate(self, a, b): + """Extend (a) and (b) so that they contain each others' elements. + + >>> t = Generator('', [], [], []) + >>> a = [1, 2] + >>> b = [2, 3] + >>> t.equate(a, b) + >>> a + [1, 2, 3] + >>> b + [2, 3, 1] + """ + self.add_to(a, b) + self.add_to(b, a) + + def write(self, *args): + for a in args: + self.output.write(a) + + def in_test(self, expr, full, set): + """Generate a test of (expr) being in (set), where (set) is a subset of (full) + + expr is a string (Python expression) + set is a list of values (which will be converted with repr) + full is the list of all values expr could possibly evaluate to + + >>> t = Generator('', [], [], []) + >>> t.in_test('x', [1,2,3,4], []) + '0' + >>> t.in_test('x', [1,2,3,4], [1,2,3,4]) + '1' + >>> t.in_test('x', [1,2,3,4], [1]) + 'x == 1' + >>> t.in_test('a+b', [1,2,3,4], [1,2]) + 'a+b in [1, 2]' + >>> t.in_test('x', [1,2,3,4,5], [1,2,3]) + 'x not in [4, 5]' + >>> t.in_test('x', [1,2,3,4,5], [1,2,3,4]) + 'x != 5' + """ + + if not set: return '0' + if len(set) == 1: return '%s == %s' % (expr, repr(set[0])) + if full and len(set) > len(full)/2: + # Reverse the sense of the test. + not_set = [x for x in full if x not in set] + return self.not_in_test(expr, full, not_set) + return '%s in %s' % (expr, repr(set)) + + def not_in_test(self, expr, full, set): + """Like in_test, but the reverse test.""" + if not set: return '1' + if len(set) == 1: return '%s != %s' % (expr, repr(set[0])) + return '%s not in %s' % (expr, repr(set)) + + def peek_call(self, a): + """Generate a call to scan for a token in the set 'a'""" + assert type(a) == type([]) + a_set = (repr(a)[1:-1]) + if self.equal_set(a, self.non_ignored_tokens()): a_set = '' + if self.has_option('context-insensitive-scanner'): a_set = '' + if a_set: a_set += "," + + return 'self._peek(%s context=_context)' % a_set + + def peek_test(self, a, b): + """Generate a call to test whether the next token (which could be any of + the elements in a) is in the set b.""" + if self.subset(a, b): return '1' + if self.has_option('context-insensitive-scanner'): a = self.non_ignored_tokens() + return self.in_test(self.peek_call(a), a, b) + + def not_peek_test(self, a, b): + """Like peek_test, but the opposite sense.""" + if self.subset(a, b): return '0' + return self.not_in_test(self.peek_call(a), a, b) + + def calculate(self): + """The main loop to compute the epsilon, first, follow sets. + The loop continues until the sets converge. This works because + each set can only get larger, so when they stop getting larger, + we're done.""" + # First we determine whether a rule accepts epsilon (the empty sequence) + while 1: + for r in self.goals: + self.rules[r].setup(self) + if self.change_count == 0: break + self.change_count = 0 + + # Now we compute the first/follow sets + while 1: + for r in self.goals: + self.rules[r].update(self) + if self.change_count == 0: break + self.change_count = 0 + + def dump_information(self): + """Display the grammar in somewhat human-readable form.""" + self.calculate() + for r in self.goals: + print ' _____' + '_'*len(r) + print ('___/Rule '+r+'\\' + '_'*80)[:79] + queue = [self.rules[r]] + while queue: + top = queue[0] + del queue[0] + + print 'Rule', repr(top), 'of class', top.__class__.__name__ + top.first.sort() + top.follow.sort() + eps = [] + if top.accepts_epsilon: eps = ['(null)'] + print ' FIRST:', ', '.join(top.first+eps) + print ' FOLLOW:', ', '.join(top.follow) + for x in top.get_children(): queue.append(x) + + def repr_ignore(self): + out="{" + for t,s in self.ignore.iteritems(): + if s is None: s=repr(s) + out += "%s:%s," % (repr(t),s) + out += "}" + return out + + def generate_output(self): + self.write("# THIS FILE WAS AUTOMATICALLY GENERATED\n") + self.calculate() + self.write(self.preparser) + self.write("# Begin -- grammar generated by Yapps\n") + self.write("import sys, re\n") + self.write("from exyapps import runtime\n") + self.write("\n") + self.write("class ", self.name, "Scanner(runtime.Scanner):\n") + self.write(" patterns = [\n") + for p in self.terminals: + self.write(" (%s, re.compile(%s)),\n" % ( + repr(p), repr(self.tokens[p]))) + self.write(" ]\n") + self.write(" def __init__(self, str,*args,**kw):\n") + self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" % + self.repr_ignore()) + self.write("\n") + + self.write("class ", self.name, "(runtime.Parser):\n") + self.write(INDENT, "Context = runtime.Context\n") + for r in self.goals: + self.write(INDENT, "def ", r, "(self") + if self.params[r]: self.write(", ", self.params[r]) + self.write(", _parent=None):\n") + self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" % + (repr(r), self.params.get(r, ''))) + self.rules[r].output(self, INDENT+INDENT) + self.write("\n") + + self.write("\n") + self.write("def parse(rule, text):\n") + self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n") + self.write(" return runtime.wrap_error_reporter(P, rule)\n") + self.write("\n") + if self.postparser is not None: + self.write("# End -- grammar generated by Yapps\n") + self.write(self.postparser) + else: + self.write("if __name__ == '__main__':\n") + self.write(INDENT, "from sys import argv, stdin\n") + self.write(INDENT, "if len(argv) >= 2:\n") + self.write(INDENT*2, "if len(argv) >= 3:\n") + self.write(INDENT*3, "f = open(argv[2],'r')\n") + self.write(INDENT*2, "else:\n") + self.write(INDENT*3, "f = stdin\n") + self.write(INDENT*2, "print parse(argv[1], f.read())\n") + self.write(INDENT, "else: print >>sys.stderr, 'Args: []'\n") + self.write("# End -- grammar generated by Yapps\n") + +###################################################################### +class Node: + """This is the base class for all components of a grammar.""" + def __init__(self, rule): + self.rule = rule # name of the rule containing this node + self.first = [] + self.follow = [] + self.accepts_epsilon = 0 + + def setup(self, gen): + # Setup will change accepts_epsilon, + # sometimes from 0 to 1 but never 1 to 0. + # It will take a finite number of steps to set things up + pass + + def used(self, vars): + "Return two lists: one of vars used, and the other of vars assigned" + return vars, [] + + def get_children(self): + "Return a list of sub-nodes" + return [] + + def __repr__(self): + return str(self) + + def update(self, gen): + if self.accepts_epsilon: + gen.add_to(self.first, self.follow) + + def output(self, gen, indent): + "Write out code to _gen_ with _indent_:string indentation" + gen.write(indent, "assert 0 # Invalid parser node\n") + +class Terminal(Node): + """This class stores terminal nodes, which are tokens.""" + def __init__(self, rule, token): + Node.__init__(self, rule) + self.token = token + self.accepts_epsilon = 0 + + def __str__(self): + return self.token + + def update(self, gen): + Node.update(self, gen) + if self.first != [self.token]: + self.first = [self.token] + gen.changed() + + def output(self, gen, indent): + gen.write(indent) + if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token): + gen.write(self.token, " = ") + gen.write("self._scan(%s, context=_context)\n" % repr(self.token)) + +class Eval(Node): + """This class stores evaluation nodes, from {{ ... }} clauses.""" + def __init__(self, rule, expr): + Node.__init__(self, rule) + self.expr = expr + + def setup(self, gen): + Node.setup(self, gen) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '{{ %s }}' % self.expr.strip() + + def output(self, gen, indent): + gen.write(indent, self.expr.strip(), '\n') + +class NonTerminal(Node): + """This class stores nonterminal nodes, which are rules with arguments.""" + def __init__(self, rule, name, args): + Node.__init__(self, rule) + self.name = name + self.args = args + + def setup(self, gen): + Node.setup(self, gen) + try: + self.target = gen.rules[self.name] + if self.accepts_epsilon != self.target.accepts_epsilon: + self.accepts_epsilon = self.target.accepts_epsilon + gen.changed() + except KeyError: # Oops, it's nonexistent + print >>sys.stderr, 'Error: no rule <%s>' % self.name + self.target = self + + def __str__(self): + return '%s' % self.name + + def update(self, gen): + Node.update(self, gen) + gen.equate(self.first, self.target.first) + gen.equate(self.follow, self.target.follow) + + def output(self, gen, indent): + gen.write(indent) + gen.write(self.name, " = ") + args = self.args + if args: args += ', ' + args += '_context' + gen.write("self.", self.name, "(", args, ")\n") + +class Sequence(Node): + """This class stores a sequence of nodes (A B C ...)""" + def __init__(self, rule, *children): + Node.__init__(self, rule) + self.children = children + + def setup(self, gen): + Node.setup(self, gen) + for c in self.children: c.setup(gen) + + if not self.accepts_epsilon: + # If it's not already accepting epsilon, it might now do so. + for c in self.children: + # any non-epsilon means all is non-epsilon + if not c.accepts_epsilon: break + else: + self.accepts_epsilon = 1 + gen.changed() + + def get_children(self): + return self.children + + def __str__(self): + return '( %s )' % ' '.join(map(str, self.children)) + + def update(self, gen): + Node.update(self, gen) + for g in self.children: + g.update(gen) + + empty = 1 + for g_i in range(len(self.children)): + g = self.children[g_i] + + if empty: gen.add_to(self.first, g.first) + if not g.accepts_epsilon: empty = 0 + + if g_i == len(self.children)-1: + next = self.follow + else: + next = self.children[1+g_i].first + gen.add_to(g.follow, next) + + if self.children: + gen.add_to(self.follow, self.children[-1].follow) + + def output(self, gen, indent): + if self.children: + for c in self.children: + c.output(gen, indent) + else: + # Placeholder for empty sequences, just in case + gen.write(indent, 'pass\n') + +class Choice(Node): + """This class stores a choice between nodes (A | B | C | ...)""" + def __init__(self, rule, *children): + Node.__init__(self, rule) + self.children = children + + def setup(self, gen): + Node.setup(self, gen) + for c in self.children: c.setup(gen) + + if not self.accepts_epsilon: + for c in self.children: + if c.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def get_children(self): + return self.children + + def __str__(self): + return '( %s )' % ' | '.join(map(str, self.children)) + + def update(self, gen): + Node.update(self, gen) + for g in self.children: + g.update(gen) + + for g in self.children: + gen.add_to(self.first, g.first) + gen.add_to(self.follow, g.follow) + for g in self.children: + gen.add_to(g.follow, self.follow) + if self.accepts_epsilon: + gen.add_to(self.first, self.follow) + + def output(self, gen, indent): + test = "if" + gen.write(indent, "_token = ", gen.peek_call(self.first), "\n") + tokens_seen = [] + tokens_unseen = self.first[:] + if gen.has_option('context-insensitive-scanner'): + # Context insensitive scanners can return ANY token, + # not only the ones in first. + tokens_unseen = gen.non_ignored_tokens() + for c in self.children: + testset = c.first[:] + removed = [] + for x in testset: + if x in tokens_seen: + testset.remove(x) + removed.append(x) + if x in tokens_unseen: tokens_unseen.remove(x) + tokens_seen = tokens_seen + testset + if removed: + if not testset: + print >>sys.stderr, 'Error in rule', self.rule+':' + else: + print >>sys.stderr, 'Warning in rule', self.rule+':' + print >>sys.stderr, ' *', self + print >>sys.stderr, ' * These tokens could be matched by more than one clause:' + print >>sys.stderr, ' *', ' '.join(removed) + + if testset: + if not tokens_unseen: # context sensitive scanners only! + if test == 'if': + # if it's the first AND last test, then + # we can simply put the code without an if/else + c.output(gen, indent) + else: + gen.write(indent, "else:") + t = gen.in_test('', [], testset) + if len(t) < 70-len(indent): + gen.write(' #', t) + gen.write("\n") + c.output(gen, indent+INDENT) + else: + gen.write(indent, test, " ", + gen.in_test('_token', tokens_unseen, testset), + ":\n") + c.output(gen, indent+INDENT) + test = "elif" + + if tokens_unseen: + gen.write(indent, "else:\n") + gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ") + gen.write("'Could not match ", self.rule, "')\n") + +class Wrapper(Node): + """This is a base class for nodes that modify a single child.""" + def __init__(self, rule, child): + Node.__init__(self, rule) + self.child = child + + def setup(self, gen): + Node.setup(self, gen) + self.child.setup(gen) + + def get_children(self): + return [self.child] + + def update(self, gen): + Node.update(self, gen) + self.child.update(gen) + gen.add_to(self.first, self.child.first) + gen.equate(self.follow, self.child.follow) + +class Option(Wrapper): + """This class represents an optional clause of the form [A]""" + def setup(self, gen): + Wrapper.setup(self, gen) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '[ %s ]' % str(self.child) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print >>sys.stderr, 'Warning in rule', self.rule+': contents may be empty.' + gen.write(indent, "if %s:\n" % + gen.peek_test(self.first, self.child.first)) + self.child.output(gen, indent+INDENT) + + if gen.has_option('context-insensitive-scanner'): + gen.write(indent, "if %s:\n" % + gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) + gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % + repr(self.first)) + + +class Plus(Wrapper): + """This class represents a 1-or-more repetition clause of the form A+""" + def setup(self, gen): + Wrapper.setup(self, gen) + if self.accepts_epsilon != self.child.accepts_epsilon: + self.accepts_epsilon = self.child.accepts_epsilon + gen.changed() + + def __str__(self): + return '%s+' % str(self.child) + + def update(self, gen): + Wrapper.update(self, gen) + gen.add_to(self.child.follow, self.child.first) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print >>sys.stderr, 'Warning in rule', self.rule+':' + print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser may not work properly.' + gen.write(indent, "while 1:\n") + self.child.output(gen, indent+INDENT) + union = self.first[:] + gen.add_to(union, self.follow) + gen.write(indent+INDENT, "if %s: break\n" % + gen.not_peek_test(union, self.child.first)) + + if gen.has_option('context-insensitive-scanner'): + gen.write(indent, "if %s:\n" % + gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) + gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % + repr(self.first)) + + +class Star(Wrapper): + """This class represents a 0-or-more repetition clause of the form A*""" + def setup(self, gen): + Wrapper.setup(self, gen) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '%s*' % str(self.child) + + def update(self, gen): + Wrapper.update(self, gen) + gen.add_to(self.child.follow, self.child.first) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print >>sys.stderr, 'Warning in rule', self.rule+':' + print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser probably will not work properly.' + gen.write(indent, "while %s:\n" % + gen.peek_test(self.follow, self.child.first)) + self.child.output(gen, indent+INDENT) + + # TODO: need to generate tests like this in lots of rules + if gen.has_option('context-insensitive-scanner'): + gen.write(indent, "if %s:\n" % + gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) + gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % + repr(self.first)) + diff --git a/exyapps/runtime.py b/exyapps/runtime.py new file mode 100644 index 0000000..5d9d1d6 --- /dev/null +++ b/exyapps/runtime.py @@ -0,0 +1,442 @@ +# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system +# Copyright 1999-2003 by Amit J. Patel +# Enhancements copyright 2003-2004 by Matthias Urlichs +# +# This version of the Yapps 2 Runtime can be distributed under the +# terms of the MIT open source license, either found in the LICENSE file +# included with the Yapps distribution +# or at +# +# + +"""Run time libraries needed to run parsers generated by Yapps. + +This module defines parse-time exception classes, a scanner class, a +base class for parsers produced by Yapps, and a context class that +keeps track of the parse stack. + +""" + +import sys, re + +MIN_WINDOW=4096 +# File lookup window + +class SyntaxError(Exception): + """When we run into an unexpected token, this is the exception to use""" + def __init__(self, pos=None, msg="Bad Token", context=None): + Exception.__init__(self) + self.pos = pos + self.msg = msg + self.context = context + + def __str__(self): + if not self.pos: return 'SyntaxError' + else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg) + +class NoMoreTokens(Exception): + """Another exception object, for when we run out of tokens""" + pass + +class Token(object): + """Yapps token. + + This is a container for a scanned token. + """ + + def __init__(self, type,value, pos=None): + """Initialize a token.""" + self.type = type + self.value = value + self.pos = pos + + def __repr__(self): + output = '<%s: %s' % (self.type, repr(self.value)) + if self.pos: + output += " @ " + if self.pos[0]: + output += "%s:" % self.pos[0] + if self.pos[1]: + output += "%d" % self.pos[1] + if self.pos[2] is not None: + output += ".%d" % self.pos[2] + output += ">" + return output + +in_name=0 +class Scanner(object): + """Yapps scanner. + + The Yapps scanner can work in context sensitive or context + insensitive modes. The token(i) method is used to retrieve the + i-th token. It takes a restrict set that limits the set of tokens + it is allowed to return. In context sensitive mode, this restrict + set guides the scanner. In context insensitive mode, there is no + restriction (the set is always the full set of tokens). + + """ + + def __init__(self, patterns, ignore, input="", + file=None,filename=None,stacked=False): + """Initialize the scanner. + + Parameters: + patterns : [(terminal, uncompiled regex), ...] or None + ignore : {terminal:None, ...} + input : string + + If patterns is None, we assume that the subclass has + defined self.patterns : [(terminal, compiled regex), ...]. + Note that the patterns parameter expects uncompiled regexes, + whereas the self.patterns field expects compiled regexes. + + The 'ignore' value is either None or a callable, which is called + with the scanner and the to-be-ignored match object; this can + be used for include file or comment handling. + """ + + if not filename: + global in_name + filename="" % in_name + in_name += 1 + + self.input = input + self.ignore = ignore + self.file = file + self.filename = filename + self.pos = 0 + self.del_pos = 0 # skipped + self.line = 1 + self.del_line = 0 # skipped + self.col = 0 + self.tokens = [] + self.stack = None + self.stacked = stacked + + self.last_read_token = None + self.last_token = None + self.last_types = None + + if patterns is not None: + # Compile the regex strings into regex objects + self.patterns = [] + for terminal, regex in patterns: + self.patterns.append( (terminal, re.compile(regex)) ) + + def stack_input(self, input="", file=None, filename=None): + """Temporarily parse from a second file.""" + + # Already reading from somewhere else: Go on top of that, please. + if self.stack: + # autogenerate a recursion-level-identifying filename + if not filename: + filename = 1 + else: + try: + filename += 1 + except TypeError: + pass + # now pass off to the include file + self.stack.stack_input(input,file,filename) + else: + + try: + filename += 0 + except TypeError: + pass + else: + filename = "" % filename + +# self.stack = object.__new__(self.__class__) +# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True) + + # Note that the pattern+ignore are added by the generated + # scanner code + self.stack = self.__class__(input,file,filename, stacked=True) + + def get_pos(self): + """Return a file/line/char tuple.""" + if self.stack: return self.stack.get_pos() + + return (self.filename, self.line+self.del_line, self.col) + +# def __repr__(self): +# """Print the last few tokens that have been scanned in""" +# output = '' +# for t in self.tokens: +# output += '%s\n' % (repr(t),) +# return output + + def print_line_with_pointer(self, pos, length=0, out=sys.stderr): + """Print the line of 'text' that includes position 'p', + along with a second line with a single caret (^) at position p""" + + file,line,p = pos + if file != self.filename: + if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out) + print >>out, "(%s: not in input buffer)" % file + return + + text = self.input + p += length-1 # starts at pos 1 + + origline=line + line -= self.del_line + spos=0 + if line > 0: + while 1: + line = line - 1 + try: + cr = text.index("\n",spos) + except ValueError: + if line: + text = "" + break + if line == 0: + text = text[spos:cr] + break + spos = cr+1 + else: + print >>out, "(%s:%d not in input buffer)" % (file,origline) + return + + # Now try printing part of the line + text = text[max(p-80, 0):p+80] + p = p - max(p-80, 0) + + # Strip to the left + i = text[:p].rfind('\n') + j = text[:p].rfind('\r') + if i < 0 or (0 <= j < i): i = j + if 0 <= i < p: + p = p - i - 1 + text = text[i+1:] + + # Strip to the right + i = text.find('\n', p) + j = text.find('\r', p) + if i < 0 or (0 <= j < i): i = j + if i >= 0: + text = text[:i] + + # Now shorten the text + while len(text) > 70 and p > 60: + # Cut off 10 chars + text = "..." + text[10:] + p = p - 7 + + # Now print the string, along with an indicator + print >>out, '> ',text + print >>out, '> ',' '*p + '^' + + def grab_input(self): + """Get more input if possible.""" + if not self.file: return + if len(self.input) - self.pos >= MIN_WINDOW: return + + data = self.file.read(MIN_WINDOW) + if data is None or data == "": + self.file = None + + # Drop bytes from the start, if necessary. + if self.pos > 2*MIN_WINDOW: + self.del_pos += MIN_WINDOW + self.del_line += self.input[:MIN_WINDOW].count("\n") + self.pos -= MIN_WINDOW + self.input = self.input[MIN_WINDOW:] + data + else: + self.input = self.input + data + + def getchar(self): + """Return the next character.""" + self.grab_input() + + c = self.input[self.pos] + self.pos += 1 + return c + + def token(self, restrict, context=None): + """Scan for another token.""" + + while 1: + if self.stack: + try: + return self.stack.token(restrict, context) + except StopIteration: + self.stack = None + + # Keep looking for a token, ignoring any in self.ignore + self.grab_input() + + # special handling for end-of-file + if self.stacked and self.pos==len(self.input): + raise StopIteration + + # Search the patterns for the longest match, with earlier + # tokens in the list having preference + best_match = -1 + best_pat = '(error)' + best_m = None + for p, regexp in self.patterns: + # First check to see if we're ignoring this token + if restrict and p not in restrict and p not in self.ignore: + continue + m = regexp.match(self.input, self.pos) + if m and m.end()-m.start() > best_match: + # We got a match that's better than the previous one + best_pat = p + best_match = m.end()-m.start() + best_m = m + + # If we didn't find anything, raise an error + if best_pat == '(error)' and best_match < 0: + msg = 'Bad Token' + if restrict: + msg = 'Trying to find one of '+', '.join(restrict) + raise SyntaxError(self.get_pos(), msg, context=context) + + ignore = best_pat in self.ignore + value = self.input[self.pos:self.pos+best_match] + if not ignore: + tok=Token(type=best_pat, value=value, pos=self.get_pos()) + + self.pos += best_match + + npos = value.rfind("\n") + if npos > -1: + self.col = best_match-npos + self.line += value.count("\n") + else: + self.col += best_match + + # If we found something that isn't to be ignored, return it + if not ignore: + if len(self.tokens) >= 10: + del self.tokens[0] + self.tokens.append(tok) + self.last_read_token = tok + # print repr(tok) + return tok + else: + ignore = self.ignore[best_pat] + if ignore: + ignore(self, best_m) + + def peek(self, *types, **kw): + """Returns the token type for lookahead; if there are any args + then the list of args is the set of token types to allow""" + context = kw.get("context",None) + if self.last_token is None: + self.last_types = types + self.last_token = self.token(types,context) + elif self.last_types: + for t in types: + if t not in self.last_types: + raise NotImplementedError("Unimplemented: restriction set changed") + return self.last_token.type + + def scan(self, type, **kw): + """Returns the matched text, and moves to the next token""" + context = kw.get("context",None) + + if self.last_token is None: + tok = self.token([type],context) + else: + if self.last_types and type not in self.last_types: + raise NotImplementedError("Unimplemented: restriction set changed") + + tok = self.last_token + self.last_token = None + if tok.type != type: + if not self.last_types: self.last_types=[] + raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context) + return tok.value + +class Parser(object): + """Base class for Yapps-generated parsers. + + """ + + def __init__(self, scanner): + self._scanner = scanner + + def _stack(self, input="",file=None,filename=None): + """Temporarily read from someplace else""" + self._scanner.stack_input(input,file,filename) + self._tok = None + + def _peek(self, *types, **kw): + """Returns the token type for lookahead; if there are any args + then the list of args is the set of token types to allow""" + return self._scanner.peek(*types, **kw) + + def _scan(self, type, **kw): + """Returns the matched text, and moves to the next token""" + return self._scanner.scan(type, **kw) + +class Context(object): + """Class to represent the parser's call stack. + + Every rule creates a Context that links to its parent rule. The + contexts can be used for debugging. + + """ + + def __init__(self, parent, scanner, rule, args=()): + """Create a new context. + + Args: + parent: Context object or None + scanner: Scanner object + rule: string (name of the rule) + args: tuple listing parameters to the rule + + """ + self.parent = parent + self.scanner = scanner + self.rule = rule + self.args = args + while scanner.stack: scanner = scanner.stack + self.token = scanner.last_read_token + + def __str__(self): + output = '' + if self.parent: output = str(self.parent) + ' > ' + output += self.rule + return output + +def print_error(err, scanner, max_ctx=None): + """Print error messages, the parser stack, and the input text -- for human-readable error messages.""" + # NOTE: this function assumes 80 columns :-( + # Figure out the line number + pos = err.pos + if not pos: + pos = scanner.get_pos() + + file_name, line_number, column_number = pos + print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg) + + scanner.print_line_with_pointer(pos) + + context = err.context + token = None + while context: + print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args)) + if context.token: + token = context.token + if token: + scanner.print_line_with_pointer(token.pos, length=len(token.value)) + context = context.parent + if max_ctx: + max_ctx = max_ctx-1 + if not max_ctx: + break + +def wrap_error_reporter(parser, rule, *args,**kw): + try: + return getattr(parser, rule)(*args,**kw) + except SyntaxError, e: + print_error(e, parser._scanner) + except NoMoreTokens: + print >>sys.stderr, 'Could not complete parsing; stopped around here:' + print >>sys.stderr, parser._scanner diff --git a/scripts/exyapps b/scripts/exyapps new file mode 100644 index 0000000..a0d610d --- /dev/null +++ b/scripts/exyapps @@ -0,0 +1,4 @@ +#!python + +import exyapps.main +exyapps.main.main() diff --git a/setup.py b/setup.py index 0d7c98d..24d59c6 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -"""Setup script for 'yapps'""" +"""Setup script for 'exyapps'""" from distutils.core import setup -description = "Yet Another Python Parser System" +description = "Extensions of Yet Another Python Parser System" long_description = \ """ YAPPS is an easy to use parser generator that is written in Python and @@ -23,20 +23,27 @@ original YAPPS source: - augmented ignore-able patterns (can parse multi-line C comments correctly) - better error reporting - read input incrementally + +Exyapps is an extended fork of yapps with these new features: +- (to be written) + """ -setup (name = "python-yapps", - version = "2.1.1", - description = description, - long_description = long_description, - author = "Amit J. Patel", - author_email = "amitp@cs.stanford.edu", - maintainer = "Matthias Urlichs", - maintainer_email = "smurf@debian.org", - url = "http://theory.stanford.edu/~amitp/yapps/", - license = 'MIT', - platforms = ['POSIX'], - keywords = ['parsing'], - packages = ['yapps'], - #cmdclass = {'bdist_rpm': MyBDist_RPM}, - ) +setup ( + name = "exyapps", + version = "3.0", + description = description, + long_description = long_description, + # bug: replace this and put acknowledgements of these guys in the docs + # url = "http://theory.stanford.edu/~amitp/yapps/", + # author = "Amit J. Patel", + # author_email = "amitp@cs.stanford.edu", + # maintainer = "Matthias Urlichs", + # maintainer_email = "smurf@debian.org", + license = 'MIT', + platforms = ['POSIX'], + keywords = ['parsing'], + packages = ['exyapps'], + scripts = ['scripts/exyapps'], + #cmdclass = {'bdist_rpm': MyBDist_RPM}, + ) diff --git a/yapps/__init__.py b/yapps/__init__.py deleted file mode 100644 index 1bb8bf6..0000000 --- a/yapps/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# empty diff --git a/yapps/grammar.py b/yapps/grammar.py deleted file mode 100644 index 1714344..0000000 --- a/yapps/grammar.py +++ /dev/null @@ -1,211 +0,0 @@ -# grammar.py, part of Yapps 2 - yet another python parser system -# Copyright 1999-2003 by Amit J. Patel -# -# This version of the Yapps 2 grammar can be distributed under the -# terms of the MIT open source license, either found in the LICENSE -# file included with the Yapps distribution -# or at -# -# - -"""Parser for Yapps grammars. - -This file defines the grammar of Yapps grammars. Naturally, it is -implemented in Yapps. The grammar.py module needed by Yapps is built -by running Yapps on yapps_grammar.g. (Holy circularity, Batman!) - -""" - -import sys, re -from yapps import parsetree - -###################################################################### -def cleanup_choice(rule, lst): - if len(lst) == 0: return Sequence(rule, []) - if len(lst) == 1: return lst[0] - return parsetree.Choice(rule, *tuple(lst)) - -def cleanup_sequence(rule, lst): - if len(lst) == 1: return lst[0] - return parsetree.Sequence(rule, *tuple(lst)) - -def resolve_name(rule, tokens, id, args): - if id in [x[0] for x in tokens]: - # It's a token - if args: - print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) - return parsetree.Terminal(rule, id) - else: - # It's a name, so assume it's a nonterminal - return parsetree.NonTerminal(rule, id, args) - - -# Begin -- grammar generated by Yapps -import sys, re -from yapps import runtime - -class ParserDescriptionScanner(runtime.Scanner): - patterns = [ - ('"rule"', re.compile('rule')), - ('"ignore"', re.compile('ignore')), - ('"token"', re.compile('token')), - ('"option"', re.compile('option')), - ('":"', re.compile(':')), - ('"parser"', re.compile('parser')), - ('[ \t\r\n]+', re.compile('[ \t\r\n]+')), - ('#.*?\r?\n', re.compile('#.*?\r?\n')), - ('EOF', re.compile('$')), - ('ATTR', re.compile('<<.+?>>')), - ('STMT', re.compile('{{.+?}}')), - ('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')), - ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')), - ('LP', re.compile('\\(')), - ('RP', re.compile('\\)')), - ('LB', re.compile('\\[')), - ('RB', re.compile('\\]')), - ('OR', re.compile('[|]')), - ('STAR', re.compile('[*]')), - ('PLUS', re.compile('[+]')), - ('QUEST', re.compile('[?]')), - ('COLON', re.compile(':')), - ] - def __init__(self, str,*args,**kw): - runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw) - -class ParserDescription(runtime.Parser): - Context = runtime.Context - def Parser(self, _parent=None): - _context = self.Context(_parent, self._scanner, 'Parser', []) - self._scan('"parser"', context=_context) - ID = self._scan('ID', context=_context) - self._scan('":"', context=_context) - Options = self.Options(_context) - Tokens = self.Tokens(_context) - Rules = self.Rules(Tokens, _context) - EOF = self._scan('EOF', context=_context) - return parsetree.Generator(ID,Options,Tokens,Rules) - - def Options(self, _parent=None): - _context = self.Context(_parent, self._scanner, 'Options', []) - opt = {} - while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"': - self._scan('"option"', context=_context) - self._scan('":"', context=_context) - Str = self.Str(_context) - opt[Str] = 1 - return opt - - def Tokens(self, _parent=None): - _context = self.Context(_parent, self._scanner, 'Tokens', []) - tok = [] - while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']: - _token = self._peek('"token"', '"ignore"', context=_context) - if _token == '"token"': - self._scan('"token"', context=_context) - ID = self._scan('ID', context=_context) - self._scan('":"', context=_context) - Str = self.Str(_context) - tok.append( (ID,Str) ) - else: # == '"ignore"' - self._scan('"ignore"', context=_context) - self._scan('":"', context=_context) - Str = self.Str(_context) - ign = ('#ignore',Str) - if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT': - STMT = self._scan('STMT', context=_context) - ign = ign + (STMT[2:-2],) - tok.append( ign ) - return tok - - def Rules(self, tokens, _parent=None): - _context = self.Context(_parent, self._scanner, 'Rules', [tokens]) - rul = [] - while self._peek('"rule"', 'EOF', context=_context) == '"rule"': - self._scan('"rule"', context=_context) - ID = self._scan('ID', context=_context) - OptParam = self.OptParam(_context) - self._scan('":"', context=_context) - ClauseA = self.ClauseA(ID, tokens, _context) - rul.append( (ID, OptParam, ClauseA) ) - return rul - - def ClauseA(self, rule, tokens, _parent=None): - _context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens]) - ClauseB = self.ClauseB(rule,tokens, _context) - v = [ClauseB] - while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR': - OR = self._scan('OR', context=_context) - ClauseB = self.ClauseB(rule,tokens, _context) - v.append(ClauseB) - return cleanup_choice(rule,v) - - def ClauseB(self, rule,tokens, _parent=None): - _context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens]) - v = [] - while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']: - ClauseC = self.ClauseC(rule,tokens, _context) - v.append(ClauseC) - return cleanup_sequence(rule, v) - - def ClauseC(self, rule,tokens, _parent=None): - _context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens]) - ClauseD = self.ClauseD(rule,tokens, _context) - _token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) - if _token == 'PLUS': - PLUS = self._scan('PLUS', context=_context) - return parsetree.Plus(rule, ClauseD) - elif _token == 'STAR': - STAR = self._scan('STAR', context=_context) - return parsetree.Star(rule, ClauseD) - elif _token == 'QUEST': - QUEST = self._scan('QUEST', context=_context) - return parsetree.Option(rule, ClauseD) - else: - return ClauseD - - def ClauseD(self, rule,tokens, _parent=None): - _context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens]) - _token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context) - if _token == 'STR': - STR = self._scan('STR', context=_context) - t = (STR, eval(STR,{},{})) - if t not in tokens: tokens.insert( 0, t ) - return parsetree.Terminal(rule, STR) - elif _token == 'ID': - ID = self._scan('ID', context=_context) - OptParam = self.OptParam(_context) - return resolve_name(rule,tokens, ID, OptParam) - elif _token == 'LP': - LP = self._scan('LP', context=_context) - ClauseA = self.ClauseA(rule,tokens, _context) - RP = self._scan('RP', context=_context) - return ClauseA - elif _token == 'LB': - LB = self._scan('LB', context=_context) - ClauseA = self.ClauseA(rule,tokens, _context) - RB = self._scan('RB', context=_context) - return parsetree.Option(rule, ClauseA) - else: # == 'STMT' - STMT = self._scan('STMT', context=_context) - return parsetree.Eval(rule, STMT[2:-2]) - - def OptParam(self, _parent=None): - _context = self.Context(_parent, self._scanner, 'OptParam', []) - if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR': - ATTR = self._scan('ATTR', context=_context) - return ATTR[2:-2] - return '' - - def Str(self, _parent=None): - _context = self.Context(_parent, self._scanner, 'Str', []) - STR = self._scan('STR', context=_context) - return eval(STR,{},{}) - - -def parse(rule, text): - P = ParserDescription(ParserDescriptionScanner(text)) - return runtime.wrap_error_reporter(P, rule) - -# End -- grammar generated by Yapps - - diff --git a/yapps/parsetree.py b/yapps/parsetree.py deleted file mode 100644 index e5e0ae0..0000000 --- a/yapps/parsetree.py +++ /dev/null @@ -1,673 +0,0 @@ -# parsetree.py, part of Yapps 2 - yet another python parser system -# Copyright 1999-2003 by Amit J. Patel -# -# This version of the Yapps 2 Runtime can be distributed under the -# terms of the MIT open source license, either found in the LICENSE file -# included with the Yapps distribution -# or at -# -# - -"""Classes used to represent parse trees and generate output. - -This module defines the Generator class, which drives the generation -of Python output from a grammar parse tree. It also defines nodes -used to represent the parse tree; they are derived from class Node. - -The main logic of Yapps is in this module. -""" - -import sys, re - -###################################################################### -INDENT = ' '*4 -class Generator: - - # TODO: many of the methods here should be class methods, not instance methods - - def __init__(self, name, options, tokens, rules): - self.change_count = 0 - self.name = name - self.options = options - self.preparser = '' - self.postparser = None - - self.tokens = {} # Map from tokens to regexps - self.ignore = {} # List of token names to ignore in parsing, map to statements - self.terminals = [] # List of token names (to maintain ordering) - for t in tokens: - if len(t) == 3: - n,t,s = t - else: - n,t = t - s = None - - if n == '#ignore': - n = t - self.ignore[n] = s - if n in self.tokens.keys() and self.tokens[n] != t: - print >>sys.stderr, 'Warning: token %s defined more than once.' % n - self.tokens[n] = t - self.terminals.append(n) - - self.rules = {} # Map from rule names to parser nodes - self.params = {} # Map from rule names to parameters - self.goals = [] # List of rule names (to maintain ordering) - for n,p,r in rules: - self.params[n] = p - self.rules[n] = r - self.goals.append(n) - - self.output = sys.stdout - - def has_option(self, name): - return self.options.get(name, 0) - - def non_ignored_tokens(self): - return [x for x in self.terminals if x not in self.ignore] - - def changed(self): - """Increments the change count. - - >>> t = Generator('', [], [], []) - >>> old_count = t.change_count - >>> t.changed() - >>> assert t.change_count == old_count + 1 - """ - self.change_count = 1+self.change_count - - def set_subtract(self, a, b): - """Returns the elements of a that are not in b. - - >>> t = Generator('', [], [], []) - >>> t.set_subtract([], []) - [] - >>> t.set_subtract([1, 2], [1, 2]) - [] - >>> t.set_subtract([1, 2, 3], [2]) - [1, 3] - >>> t.set_subtract([1], [2, 3, 4]) - [1] - """ - result = [] - for x in a: - if x not in b: - result.append(x) - return result - - def subset(self, a, b): - """True iff all elements of sequence a are inside sequence b - - >>> t = Generator('', [], [], []) - >>> t.subset([], [1, 2, 3]) - 1 - >>> t.subset([1, 2, 3], []) - 0 - >>> t.subset([1], [1, 2, 3]) - 1 - >>> t.subset([3, 2, 1], [1, 2, 3]) - 1 - >>> t.subset([1, 1, 1], [1, 2, 3]) - 1 - >>> t.subset([1, 2, 3], [1, 1, 1]) - 0 - """ - for x in a: - if x not in b: - return 0 - return 1 - - def equal_set(self, a, b): - """True iff subset(a, b) and subset(b, a) - - >>> t = Generator('', [], [], []) - >>> a_set = [1, 2, 3] - >>> t.equal_set(a_set, a_set) - 1 - >>> t.equal_set(a_set, a_set[:]) - 1 - >>> t.equal_set([], a_set) - 0 - >>> t.equal_set([1, 2, 3], [3, 2, 1]) - 1 - """ - if len(a) != len(b): return 0 - if a == b: return 1 - return self.subset(a, b) and self.subset(b, a) - - def add_to(self, parent, additions): - "Modify _parent_ to include all elements in _additions_" - for x in additions: - if x not in parent: - parent.append(x) - self.changed() - - def equate(self, a, b): - """Extend (a) and (b) so that they contain each others' elements. - - >>> t = Generator('', [], [], []) - >>> a = [1, 2] - >>> b = [2, 3] - >>> t.equate(a, b) - >>> a - [1, 2, 3] - >>> b - [2, 3, 1] - """ - self.add_to(a, b) - self.add_to(b, a) - - def write(self, *args): - for a in args: - self.output.write(a) - - def in_test(self, expr, full, set): - """Generate a test of (expr) being in (set), where (set) is a subset of (full) - - expr is a string (Python expression) - set is a list of values (which will be converted with repr) - full is the list of all values expr could possibly evaluate to - - >>> t = Generator('', [], [], []) - >>> t.in_test('x', [1,2,3,4], []) - '0' - >>> t.in_test('x', [1,2,3,4], [1,2,3,4]) - '1' - >>> t.in_test('x', [1,2,3,4], [1]) - 'x == 1' - >>> t.in_test('a+b', [1,2,3,4], [1,2]) - 'a+b in [1, 2]' - >>> t.in_test('x', [1,2,3,4,5], [1,2,3]) - 'x not in [4, 5]' - >>> t.in_test('x', [1,2,3,4,5], [1,2,3,4]) - 'x != 5' - """ - - if not set: return '0' - if len(set) == 1: return '%s == %s' % (expr, repr(set[0])) - if full and len(set) > len(full)/2: - # Reverse the sense of the test. - not_set = [x for x in full if x not in set] - return self.not_in_test(expr, full, not_set) - return '%s in %s' % (expr, repr(set)) - - def not_in_test(self, expr, full, set): - """Like in_test, but the reverse test.""" - if not set: return '1' - if len(set) == 1: return '%s != %s' % (expr, repr(set[0])) - return '%s not in %s' % (expr, repr(set)) - - def peek_call(self, a): - """Generate a call to scan for a token in the set 'a'""" - assert type(a) == type([]) - a_set = (repr(a)[1:-1]) - if self.equal_set(a, self.non_ignored_tokens()): a_set = '' - if self.has_option('context-insensitive-scanner'): a_set = '' - if a_set: a_set += "," - - return 'self._peek(%s context=_context)' % a_set - - def peek_test(self, a, b): - """Generate a call to test whether the next token (which could be any of - the elements in a) is in the set b.""" - if self.subset(a, b): return '1' - if self.has_option('context-insensitive-scanner'): a = self.non_ignored_tokens() - return self.in_test(self.peek_call(a), a, b) - - def not_peek_test(self, a, b): - """Like peek_test, but the opposite sense.""" - if self.subset(a, b): return '0' - return self.not_in_test(self.peek_call(a), a, b) - - def calculate(self): - """The main loop to compute the epsilon, first, follow sets. - The loop continues until the sets converge. This works because - each set can only get larger, so when they stop getting larger, - we're done.""" - # First we determine whether a rule accepts epsilon (the empty sequence) - while 1: - for r in self.goals: - self.rules[r].setup(self) - if self.change_count == 0: break - self.change_count = 0 - - # Now we compute the first/follow sets - while 1: - for r in self.goals: - self.rules[r].update(self) - if self.change_count == 0: break - self.change_count = 0 - - def dump_information(self): - """Display the grammar in somewhat human-readable form.""" - self.calculate() - for r in self.goals: - print ' _____' + '_'*len(r) - print ('___/Rule '+r+'\\' + '_'*80)[:79] - queue = [self.rules[r]] - while queue: - top = queue[0] - del queue[0] - - print 'Rule', repr(top), 'of class', top.__class__.__name__ - top.first.sort() - top.follow.sort() - eps = [] - if top.accepts_epsilon: eps = ['(null)'] - print ' FIRST:', ', '.join(top.first+eps) - print ' FOLLOW:', ', '.join(top.follow) - for x in top.get_children(): queue.append(x) - - def repr_ignore(self): - out="{" - for t,s in self.ignore.iteritems(): - if s is None: s=repr(s) - out += "%s:%s," % (repr(t),s) - out += "}" - return out - - def generate_output(self): - self.calculate() - self.write(self.preparser) - self.write("# Begin -- grammar generated by Yapps\n") - self.write("import sys, re\n") - self.write("from yapps import runtime\n") - self.write("\n") - self.write("class ", self.name, "Scanner(runtime.Scanner):\n") - self.write(" patterns = [\n") - for p in self.terminals: - self.write(" (%s, re.compile(%s)),\n" % ( - repr(p), repr(self.tokens[p]))) - self.write(" ]\n") - self.write(" def __init__(self, str,*args,**kw):\n") - self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" % - self.repr_ignore()) - self.write("\n") - - self.write("class ", self.name, "(runtime.Parser):\n") - self.write(INDENT, "Context = runtime.Context\n") - for r in self.goals: - self.write(INDENT, "def ", r, "(self") - if self.params[r]: self.write(", ", self.params[r]) - self.write(", _parent=None):\n") - self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" % - (repr(r), self.params.get(r, ''))) - self.rules[r].output(self, INDENT+INDENT) - self.write("\n") - - self.write("\n") - self.write("def parse(rule, text):\n") - self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n") - self.write(" return runtime.wrap_error_reporter(P, rule)\n") - self.write("\n") - if self.postparser is not None: - self.write("# End -- grammar generated by Yapps\n") - self.write(self.postparser) - else: - self.write("if __name__ == '__main__':\n") - self.write(INDENT, "from sys import argv, stdin\n") - self.write(INDENT, "if len(argv) >= 2:\n") - self.write(INDENT*2, "if len(argv) >= 3:\n") - self.write(INDENT*3, "f = open(argv[2],'r')\n") - self.write(INDENT*2, "else:\n") - self.write(INDENT*3, "f = stdin\n") - self.write(INDENT*2, "print parse(argv[1], f.read())\n") - self.write(INDENT, "else: print >>sys.stderr, 'Args: []'\n") - self.write("# End -- grammar generated by Yapps\n") - -###################################################################### -class Node: - """This is the base class for all components of a grammar.""" - def __init__(self, rule): - self.rule = rule # name of the rule containing this node - self.first = [] - self.follow = [] - self.accepts_epsilon = 0 - - def setup(self, gen): - # Setup will change accepts_epsilon, - # sometimes from 0 to 1 but never 1 to 0. - # It will take a finite number of steps to set things up - pass - - def used(self, vars): - "Return two lists: one of vars used, and the other of vars assigned" - return vars, [] - - def get_children(self): - "Return a list of sub-nodes" - return [] - - def __repr__(self): - return str(self) - - def update(self, gen): - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - "Write out code to _gen_ with _indent_:string indentation" - gen.write(indent, "assert 0 # Invalid parser node\n") - -class Terminal(Node): - """This class stores terminal nodes, which are tokens.""" - def __init__(self, rule, token): - Node.__init__(self, rule) - self.token = token - self.accepts_epsilon = 0 - - def __str__(self): - return self.token - - def update(self, gen): - Node.update(self, gen) - if self.first != [self.token]: - self.first = [self.token] - gen.changed() - - def output(self, gen, indent): - gen.write(indent) - if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token): - gen.write(self.token, " = ") - gen.write("self._scan(%s, context=_context)\n" % repr(self.token)) - -class Eval(Node): - """This class stores evaluation nodes, from {{ ... }} clauses.""" - def __init__(self, rule, expr): - Node.__init__(self, rule) - self.expr = expr - - def setup(self, gen): - Node.setup(self, gen) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '{{ %s }}' % self.expr.strip() - - def output(self, gen, indent): - gen.write(indent, self.expr.strip(), '\n') - -class NonTerminal(Node): - """This class stores nonterminal nodes, which are rules with arguments.""" - def __init__(self, rule, name, args): - Node.__init__(self, rule) - self.name = name - self.args = args - - def setup(self, gen): - Node.setup(self, gen) - try: - self.target = gen.rules[self.name] - if self.accepts_epsilon != self.target.accepts_epsilon: - self.accepts_epsilon = self.target.accepts_epsilon - gen.changed() - except KeyError: # Oops, it's nonexistent - print >>sys.stderr, 'Error: no rule <%s>' % self.name - self.target = self - - def __str__(self): - return '%s' % self.name - - def update(self, gen): - Node.update(self, gen) - gen.equate(self.first, self.target.first) - gen.equate(self.follow, self.target.follow) - - def output(self, gen, indent): - gen.write(indent) - gen.write(self.name, " = ") - args = self.args - if args: args += ', ' - args += '_context' - gen.write("self.", self.name, "(", args, ")\n") - -class Sequence(Node): - """This class stores a sequence of nodes (A B C ...)""" - def __init__(self, rule, *children): - Node.__init__(self, rule) - self.children = children - - def setup(self, gen): - Node.setup(self, gen) - for c in self.children: c.setup(gen) - - if not self.accepts_epsilon: - # If it's not already accepting epsilon, it might now do so. - for c in self.children: - # any non-epsilon means all is non-epsilon - if not c.accepts_epsilon: break - else: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % ' '.join(map(str, self.children)) - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - empty = 1 - for g_i in range(len(self.children)): - g = self.children[g_i] - - if empty: gen.add_to(self.first, g.first) - if not g.accepts_epsilon: empty = 0 - - if g_i == len(self.children)-1: - next = self.follow - else: - next = self.children[1+g_i].first - gen.add_to(g.follow, next) - - if self.children: - gen.add_to(self.follow, self.children[-1].follow) - - def output(self, gen, indent): - if self.children: - for c in self.children: - c.output(gen, indent) - else: - # Placeholder for empty sequences, just in case - gen.write(indent, 'pass\n') - -class Choice(Node): - """This class stores a choice between nodes (A | B | C | ...)""" - def __init__(self, rule, *children): - Node.__init__(self, rule) - self.children = children - - def setup(self, gen): - Node.setup(self, gen) - for c in self.children: c.setup(gen) - - if not self.accepts_epsilon: - for c in self.children: - if c.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % ' | '.join(map(str, self.children)) - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - for g in self.children: - gen.add_to(self.first, g.first) - gen.add_to(self.follow, g.follow) - for g in self.children: - gen.add_to(g.follow, self.follow) - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - test = "if" - gen.write(indent, "_token = ", gen.peek_call(self.first), "\n") - tokens_seen = [] - tokens_unseen = self.first[:] - if gen.has_option('context-insensitive-scanner'): - # Context insensitive scanners can return ANY token, - # not only the ones in first. - tokens_unseen = gen.non_ignored_tokens() - for c in self.children: - testset = c.first[:] - removed = [] - for x in testset: - if x in tokens_seen: - testset.remove(x) - removed.append(x) - if x in tokens_unseen: tokens_unseen.remove(x) - tokens_seen = tokens_seen + testset - if removed: - if not testset: - print >>sys.stderr, 'Error in rule', self.rule+':' - else: - print >>sys.stderr, 'Warning in rule', self.rule+':' - print >>sys.stderr, ' *', self - print >>sys.stderr, ' * These tokens could be matched by more than one clause:' - print >>sys.stderr, ' *', ' '.join(removed) - - if testset: - if not tokens_unseen: # context sensitive scanners only! - if test == 'if': - # if it's the first AND last test, then - # we can simply put the code without an if/else - c.output(gen, indent) - else: - gen.write(indent, "else:") - t = gen.in_test('', [], testset) - if len(t) < 70-len(indent): - gen.write(' #', t) - gen.write("\n") - c.output(gen, indent+INDENT) - else: - gen.write(indent, test, " ", - gen.in_test('_token', tokens_unseen, testset), - ":\n") - c.output(gen, indent+INDENT) - test = "elif" - - if tokens_unseen: - gen.write(indent, "else:\n") - gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ") - gen.write("'Could not match ", self.rule, "')\n") - -class Wrapper(Node): - """This is a base class for nodes that modify a single child.""" - def __init__(self, rule, child): - Node.__init__(self, rule) - self.child = child - - def setup(self, gen): - Node.setup(self, gen) - self.child.setup(gen) - - def get_children(self): - return [self.child] - - def update(self, gen): - Node.update(self, gen) - self.child.update(gen) - gen.add_to(self.first, self.child.first) - gen.equate(self.follow, self.child.follow) - -class Option(Wrapper): - """This class represents an optional clause of the form [A]""" - def setup(self, gen): - Wrapper.setup(self, gen) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '[ %s ]' % str(self.child) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print >>sys.stderr, 'Warning in rule', self.rule+': contents may be empty.' - gen.write(indent, "if %s:\n" % - gen.peek_test(self.first, self.child.first)) - self.child.output(gen, indent+INDENT) - - if gen.has_option('context-insensitive-scanner'): - gen.write(indent, "if %s:\n" % - gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) - gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % - repr(self.first)) - - -class Plus(Wrapper): - """This class represents a 1-or-more repetition clause of the form A+""" - def setup(self, gen): - Wrapper.setup(self, gen) - if self.accepts_epsilon != self.child.accepts_epsilon: - self.accepts_epsilon = self.child.accepts_epsilon - gen.changed() - - def __str__(self): - return '%s+' % str(self.child) - - def update(self, gen): - Wrapper.update(self, gen) - gen.add_to(self.child.follow, self.child.first) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print >>sys.stderr, 'Warning in rule', self.rule+':' - print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser may not work properly.' - gen.write(indent, "while 1:\n") - self.child.output(gen, indent+INDENT) - union = self.first[:] - gen.add_to(union, self.follow) - gen.write(indent+INDENT, "if %s: break\n" % - gen.not_peek_test(union, self.child.first)) - - if gen.has_option('context-insensitive-scanner'): - gen.write(indent, "if %s:\n" % - gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) - gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % - repr(self.first)) - - -class Star(Wrapper): - """This class represents a 0-or-more repetition clause of the form A*""" - def setup(self, gen): - Wrapper.setup(self, gen) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '%s*' % str(self.child) - - def update(self, gen): - Wrapper.update(self, gen) - gen.add_to(self.child.follow, self.child.first) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print >>sys.stderr, 'Warning in rule', self.rule+':' - print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser probably will not work properly.' - gen.write(indent, "while %s:\n" % - gen.peek_test(self.follow, self.child.first)) - self.child.output(gen, indent+INDENT) - - # TODO: need to generate tests like this in lots of rules - if gen.has_option('context-insensitive-scanner'): - gen.write(indent, "if %s:\n" % - gen.not_peek_test(gen.non_ignored_tokens(), self.follow)) - gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" % - repr(self.first)) - diff --git a/yapps/runtime.py b/yapps/runtime.py deleted file mode 100644 index 5d9d1d6..0000000 --- a/yapps/runtime.py +++ /dev/null @@ -1,442 +0,0 @@ -# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system -# Copyright 1999-2003 by Amit J. Patel -# Enhancements copyright 2003-2004 by Matthias Urlichs -# -# This version of the Yapps 2 Runtime can be distributed under the -# terms of the MIT open source license, either found in the LICENSE file -# included with the Yapps distribution -# or at -# -# - -"""Run time libraries needed to run parsers generated by Yapps. - -This module defines parse-time exception classes, a scanner class, a -base class for parsers produced by Yapps, and a context class that -keeps track of the parse stack. - -""" - -import sys, re - -MIN_WINDOW=4096 -# File lookup window - -class SyntaxError(Exception): - """When we run into an unexpected token, this is the exception to use""" - def __init__(self, pos=None, msg="Bad Token", context=None): - Exception.__init__(self) - self.pos = pos - self.msg = msg - self.context = context - - def __str__(self): - if not self.pos: return 'SyntaxError' - else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg) - -class NoMoreTokens(Exception): - """Another exception object, for when we run out of tokens""" - pass - -class Token(object): - """Yapps token. - - This is a container for a scanned token. - """ - - def __init__(self, type,value, pos=None): - """Initialize a token.""" - self.type = type - self.value = value - self.pos = pos - - def __repr__(self): - output = '<%s: %s' % (self.type, repr(self.value)) - if self.pos: - output += " @ " - if self.pos[0]: - output += "%s:" % self.pos[0] - if self.pos[1]: - output += "%d" % self.pos[1] - if self.pos[2] is not None: - output += ".%d" % self.pos[2] - output += ">" - return output - -in_name=0 -class Scanner(object): - """Yapps scanner. - - The Yapps scanner can work in context sensitive or context - insensitive modes. The token(i) method is used to retrieve the - i-th token. It takes a restrict set that limits the set of tokens - it is allowed to return. In context sensitive mode, this restrict - set guides the scanner. In context insensitive mode, there is no - restriction (the set is always the full set of tokens). - - """ - - def __init__(self, patterns, ignore, input="", - file=None,filename=None,stacked=False): - """Initialize the scanner. - - Parameters: - patterns : [(terminal, uncompiled regex), ...] or None - ignore : {terminal:None, ...} - input : string - - If patterns is None, we assume that the subclass has - defined self.patterns : [(terminal, compiled regex), ...]. - Note that the patterns parameter expects uncompiled regexes, - whereas the self.patterns field expects compiled regexes. - - The 'ignore' value is either None or a callable, which is called - with the scanner and the to-be-ignored match object; this can - be used for include file or comment handling. - """ - - if not filename: - global in_name - filename="" % in_name - in_name += 1 - - self.input = input - self.ignore = ignore - self.file = file - self.filename = filename - self.pos = 0 - self.del_pos = 0 # skipped - self.line = 1 - self.del_line = 0 # skipped - self.col = 0 - self.tokens = [] - self.stack = None - self.stacked = stacked - - self.last_read_token = None - self.last_token = None - self.last_types = None - - if patterns is not None: - # Compile the regex strings into regex objects - self.patterns = [] - for terminal, regex in patterns: - self.patterns.append( (terminal, re.compile(regex)) ) - - def stack_input(self, input="", file=None, filename=None): - """Temporarily parse from a second file.""" - - # Already reading from somewhere else: Go on top of that, please. - if self.stack: - # autogenerate a recursion-level-identifying filename - if not filename: - filename = 1 - else: - try: - filename += 1 - except TypeError: - pass - # now pass off to the include file - self.stack.stack_input(input,file,filename) - else: - - try: - filename += 0 - except TypeError: - pass - else: - filename = "" % filename - -# self.stack = object.__new__(self.__class__) -# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True) - - # Note that the pattern+ignore are added by the generated - # scanner code - self.stack = self.__class__(input,file,filename, stacked=True) - - def get_pos(self): - """Return a file/line/char tuple.""" - if self.stack: return self.stack.get_pos() - - return (self.filename, self.line+self.del_line, self.col) - -# def __repr__(self): -# """Print the last few tokens that have been scanned in""" -# output = '' -# for t in self.tokens: -# output += '%s\n' % (repr(t),) -# return output - - def print_line_with_pointer(self, pos, length=0, out=sys.stderr): - """Print the line of 'text' that includes position 'p', - along with a second line with a single caret (^) at position p""" - - file,line,p = pos - if file != self.filename: - if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out) - print >>out, "(%s: not in input buffer)" % file - return - - text = self.input - p += length-1 # starts at pos 1 - - origline=line - line -= self.del_line - spos=0 - if line > 0: - while 1: - line = line - 1 - try: - cr = text.index("\n",spos) - except ValueError: - if line: - text = "" - break - if line == 0: - text = text[spos:cr] - break - spos = cr+1 - else: - print >>out, "(%s:%d not in input buffer)" % (file,origline) - return - - # Now try printing part of the line - text = text[max(p-80, 0):p+80] - p = p - max(p-80, 0) - - # Strip to the left - i = text[:p].rfind('\n') - j = text[:p].rfind('\r') - if i < 0 or (0 <= j < i): i = j - if 0 <= i < p: - p = p - i - 1 - text = text[i+1:] - - # Strip to the right - i = text.find('\n', p) - j = text.find('\r', p) - if i < 0 or (0 <= j < i): i = j - if i >= 0: - text = text[:i] - - # Now shorten the text - while len(text) > 70 and p > 60: - # Cut off 10 chars - text = "..." + text[10:] - p = p - 7 - - # Now print the string, along with an indicator - print >>out, '> ',text - print >>out, '> ',' '*p + '^' - - def grab_input(self): - """Get more input if possible.""" - if not self.file: return - if len(self.input) - self.pos >= MIN_WINDOW: return - - data = self.file.read(MIN_WINDOW) - if data is None or data == "": - self.file = None - - # Drop bytes from the start, if necessary. - if self.pos > 2*MIN_WINDOW: - self.del_pos += MIN_WINDOW - self.del_line += self.input[:MIN_WINDOW].count("\n") - self.pos -= MIN_WINDOW - self.input = self.input[MIN_WINDOW:] + data - else: - self.input = self.input + data - - def getchar(self): - """Return the next character.""" - self.grab_input() - - c = self.input[self.pos] - self.pos += 1 - return c - - def token(self, restrict, context=None): - """Scan for another token.""" - - while 1: - if self.stack: - try: - return self.stack.token(restrict, context) - except StopIteration: - self.stack = None - - # Keep looking for a token, ignoring any in self.ignore - self.grab_input() - - # special handling for end-of-file - if self.stacked and self.pos==len(self.input): - raise StopIteration - - # Search the patterns for the longest match, with earlier - # tokens in the list having preference - best_match = -1 - best_pat = '(error)' - best_m = None - for p, regexp in self.patterns: - # First check to see if we're ignoring this token - if restrict and p not in restrict and p not in self.ignore: - continue - m = regexp.match(self.input, self.pos) - if m and m.end()-m.start() > best_match: - # We got a match that's better than the previous one - best_pat = p - best_match = m.end()-m.start() - best_m = m - - # If we didn't find anything, raise an error - if best_pat == '(error)' and best_match < 0: - msg = 'Bad Token' - if restrict: - msg = 'Trying to find one of '+', '.join(restrict) - raise SyntaxError(self.get_pos(), msg, context=context) - - ignore = best_pat in self.ignore - value = self.input[self.pos:self.pos+best_match] - if not ignore: - tok=Token(type=best_pat, value=value, pos=self.get_pos()) - - self.pos += best_match - - npos = value.rfind("\n") - if npos > -1: - self.col = best_match-npos - self.line += value.count("\n") - else: - self.col += best_match - - # If we found something that isn't to be ignored, return it - if not ignore: - if len(self.tokens) >= 10: - del self.tokens[0] - self.tokens.append(tok) - self.last_read_token = tok - # print repr(tok) - return tok - else: - ignore = self.ignore[best_pat] - if ignore: - ignore(self, best_m) - - def peek(self, *types, **kw): - """Returns the token type for lookahead; if there are any args - then the list of args is the set of token types to allow""" - context = kw.get("context",None) - if self.last_token is None: - self.last_types = types - self.last_token = self.token(types,context) - elif self.last_types: - for t in types: - if t not in self.last_types: - raise NotImplementedError("Unimplemented: restriction set changed") - return self.last_token.type - - def scan(self, type, **kw): - """Returns the matched text, and moves to the next token""" - context = kw.get("context",None) - - if self.last_token is None: - tok = self.token([type],context) - else: - if self.last_types and type not in self.last_types: - raise NotImplementedError("Unimplemented: restriction set changed") - - tok = self.last_token - self.last_token = None - if tok.type != type: - if not self.last_types: self.last_types=[] - raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context) - return tok.value - -class Parser(object): - """Base class for Yapps-generated parsers. - - """ - - def __init__(self, scanner): - self._scanner = scanner - - def _stack(self, input="",file=None,filename=None): - """Temporarily read from someplace else""" - self._scanner.stack_input(input,file,filename) - self._tok = None - - def _peek(self, *types, **kw): - """Returns the token type for lookahead; if there are any args - then the list of args is the set of token types to allow""" - return self._scanner.peek(*types, **kw) - - def _scan(self, type, **kw): - """Returns the matched text, and moves to the next token""" - return self._scanner.scan(type, **kw) - -class Context(object): - """Class to represent the parser's call stack. - - Every rule creates a Context that links to its parent rule. The - contexts can be used for debugging. - - """ - - def __init__(self, parent, scanner, rule, args=()): - """Create a new context. - - Args: - parent: Context object or None - scanner: Scanner object - rule: string (name of the rule) - args: tuple listing parameters to the rule - - """ - self.parent = parent - self.scanner = scanner - self.rule = rule - self.args = args - while scanner.stack: scanner = scanner.stack - self.token = scanner.last_read_token - - def __str__(self): - output = '' - if self.parent: output = str(self.parent) + ' > ' - output += self.rule - return output - -def print_error(err, scanner, max_ctx=None): - """Print error messages, the parser stack, and the input text -- for human-readable error messages.""" - # NOTE: this function assumes 80 columns :-( - # Figure out the line number - pos = err.pos - if not pos: - pos = scanner.get_pos() - - file_name, line_number, column_number = pos - print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg) - - scanner.print_line_with_pointer(pos) - - context = err.context - token = None - while context: - print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args)) - if context.token: - token = context.token - if token: - scanner.print_line_with_pointer(token.pos, length=len(token.value)) - context = context.parent - if max_ctx: - max_ctx = max_ctx-1 - if not max_ctx: - break - -def wrap_error_reporter(parser, rule, *args,**kw): - try: - return getattr(parser, rule)(*args,**kw) - except SyntaxError, e: - print_error(e, parser._scanner) - except NoMoreTokens: - print >>sys.stderr, 'Could not complete parsing; stopped around here:' - print >>sys.stderr, parser._scanner diff --git a/yapps2.py b/yapps2.py deleted file mode 100755 index d6fd101..0000000 --- a/yapps2.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - -# -# Yapps 2 - yet another python parser system -# Copyright 1999-2003 by Amit J. Patel -# -# This version of Yapps 2 can be distributed under the -# terms of the MIT open source license, either found in the LICENSE file -# included with the Yapps distribution -# or at -# -# - -import sys, re - -from yapps import runtime, parsetree - -def generate(inputfilename, outputfilename='', dump=0, **flags): - """Generate a grammar, given an input filename (X.g) - and an output filename (defaulting to X.py).""" - - if not outputfilename: - if inputfilename.endswith('.g'): - outputfilename = inputfilename[:-2] + '.py' - else: - raise Exception('Must specify output filename if input filename is not *.g') - - DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers - preparser, postparser = None, None # Code before and after the parser desc - - # Read the entire file - s = open(inputfilename,'r').read() - - # See if there's a separation between the pre-parser and parser - f = s.find(DIVIDER) - if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):] - - # See if there's a separation between the parser and post-parser - f = s.find(DIVIDER) - if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):] - - # Create the parser and scanner and parse the text - scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename) - if preparser: scanner.del_line += preparser.count('\n') - - parser = grammar.ParserDescription(scanner) - t = runtime.wrap_error_reporter(parser, 'Parser') - if t is None: return 1 # Failure - if preparser is not None: t.preparser = preparser - if postparser is not None: t.postparser = postparser - - # Check the options - for f in t.options.keys(): - for opt,_,_ in yapps_options: - if f == opt: break - else: - print >>sys.stderr, 'Warning: unrecognized option', f - # Add command line options to the set - for f in flags.keys(): t.options[f] = flags[f] - - # Generate the output - if dump: - t.dump_information() - else: - t.output = open(outputfilename, 'w') - t.generate_output() - return 0 - -if __name__ == '__main__': - import doctest - doctest.testmod(sys.modules['__main__']) - doctest.testmod(parsetree) - - # Someday I will use optparse, but Python 2.3 is too new at the moment. - yapps_options = [ - ('context-insensitive-scanner', - 'context-insensitive-scanner', - 'Scan all tokens (see docs)'), - ] - - import getopt - optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['help', 'dump', 'use-devel-grammar']) - if not args or len(args) > 2: - print >>sys.stderr, 'Usage:' - print >>sys.stderr, ' python', sys.argv[0], '[flags] input.g [output.py]' - print >>sys.stderr, 'Flags:' - print >>sys.stderr, (' --dump' + ' '*40)[:35] + 'Dump out grammar information' - print >>sys.stderr, (' --use-devel-grammar' + ' '*40)[:35] + 'Use the devel grammar parser from yapps_grammar.py instead of the stable grammar from grammar.py' - for flag, _, doc in yapps_options: - print >>sys.stderr, (' -f' + flag + ' '*40)[:35] + doc - else: - # Read in the options and create a list of flags - flags = {} - use_devel_grammar = 0 - for opt in optlist: - for flag, name, _ in yapps_options: - if opt == ('-f', flag): - flags[name] = 1 - break - else: - if opt == ('--dump', ''): - flags['dump'] = 1 - elif opt == ('--use-devel-grammar', ''): - use_devel_grammar = 1 - else: - print >>sys.stderr, 'Warning: unrecognized option', opt[0], opt[1] - - if use_devel_grammar: - import yapps_grammar as grammar - else: - from yapps import grammar - - sys.exit(generate(*tuple(args), **flags)) diff --git a/yapps_grammar.g b/yapps_grammar.g index cd21a9e..bf2b14d 100644 --- a/yapps_grammar.g +++ b/yapps_grammar.g @@ -18,7 +18,7 @@ by running Yapps on yapps_grammar.g. (Holy circularity, Batman!) """ import sys, re -from yapps import parsetree +from exyapps import parsetree ###################################################################### def cleanup_choice(rule, lst): -- cgit