From 7f01b5bd084eccff8d551ba34c01e6375617cc93 Mon Sep 17 00:00:00 2001 From: sienkiew Date: Fri, 16 Aug 2013 16:03:33 +0000 Subject: checkpoint some changes git-svn-id: http://svn.stsci.edu/svn/ssb/etal/exyapps/trunk@1018 d34015c8-bcbb-4646-8ac8-8ba5febf221d --- LICENSE | 6 + MANIFEST.in | 4 + README | 58 +++--- doc/Makefile | 153 ++++++++++++++++ doc/README | 12 ++ doc/make.bat | 190 +++++++++++++++++++ doc/source/conf.py | 242 +++++++++++++++++++++++++ doc/source/index.rst | 503 +++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 8 +- 9 files changed, 1143 insertions(+), 33 deletions(-) create mode 100644 doc/Makefile create mode 100644 doc/README create mode 100644 doc/make.bat create mode 100644 doc/source/conf.py create mode 100644 doc/source/index.rst diff --git a/LICENSE b/LICENSE index 87fd179..c013697 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,9 @@ +Copyright 1999-2003 by Amit J. Patel +Enhancements copyright 2003-2004 by Matthias Urlichs +Conversion to exyapps copyright 2011-2013 + by Association of Universities for Research in Astronomy + (work-for-hire by Mark Sienkiewicz, Space Telescope Science Institute) + Permission is hereby granted, free of charge, to any person obtaining diff --git a/MANIFEST.in b/MANIFEST.in index f101b4b..62d76b1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,6 @@ include * recursive-include * * +prune doc/build +prune build +prune dist +exclude *.pyc *.pyo diff --git a/README b/README index 1cf78e3..c489de4 100644 --- a/README +++ b/README @@ -1,22 +1,29 @@ This is exyapps, a LL(1) parser generator. -It is derived from yapps ( http://theory.stanford.edu/~amitp/yapps/ ) -by Amit J. Patel . He is no longer maintaining -it, and there seem to be several forks out there, all with varying -version numbers. Matthias Urlichs made some patches -for Debian; this copy was derived from the Debian distribution by Mark -Sienkiewicz at the Space Telescope Science Institute. (For email, -use the first 8 letters of my last name and @stsci.edu) +It is derived from yapps ( http://theory.stanford.edu/~amitp/yapps/ +) by Amit J. Patel . He is no longer +maintaining yapps, and there seem to be several forks out there, +all with varying version numbers. Matthias Urlichs made some patches for Debian; this copy was derived +from the Debian distribution, by Mark Sienkiewicz at the Space Telescope Science Institute. Some of the modifications that changed yapps to exyapps introduced the possibility of a fundmamental incompatibility with existing yapps2-based parsers. This, coupled with the non-linear version numbers of the various forks, prompted me to rename yapps to exyapps. -(STScI is a subsidiary of the Association of Universities for Research -in Astronomy, which is why the copyright to the new code in exyapps -belongs to AURA.) +New Features of exyapps relative to yapps +-- + +- The generated parser no longer needs to have exyapps installed at +run time. The entire runtime is incorporated into the parser. + +- You can pass a data object to the parser for it to use as +parser-global data. I know the OO way is to subclass the parser +object and hope you don't accidentally override anything important, +but I find easier to use in a particular application I have in mind. Installing / using exyapps @@ -28,6 +35,9 @@ Installing / using exyapps exyapps my_grammar.exy + it will create a file named my_grammar.py + + Modifying exyapps -- @@ -47,10 +57,13 @@ install it somewhere else to try it. VIM -- -Put this in .vimrc +To recognize .exy files and treat them as python, put this in .vimrc autocmd BufRead,BufNewFile *.exy set filetype=python +This is not exactly an ideal configuration, but it is good enough. + + What is here? -- @@ -59,14 +72,16 @@ yapps_grammar.g yapps_grammar.g is the source code for exyapps/grammar.py doc - latex source for the documentation + latex source for the documentation - this is the original + document by Amit Patel; it has not been updated to reflect + the exyapps modifications. examples exyapps - the exyapps package that gets installed - as of exyapps 3.0, - this is only need to compile the parser; you do not need to - install exyapps to run a generated parser. + the exyapps package that gets installed - this is only need + to compile the parser; you do not need to install exyapps + to run a generated parser. scripts "exyapps" command that compiles a parser into python code. @@ -78,16 +93,3 @@ test not actual tests, but apparently some interesting input to run through the parser for testing - -New Features --- - -- The generated parser no longer needs to have exyapps installed at -run time. The entire runtime is incorporated into the parser. - -- You can pass a data object to the parser for it to use as -parser-global data. I know the OO way is to subclass the parser -object and hope you don't accidentally override/smash anything -important, but this is easier to use in a particular application -I have in mind. - diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..611a456 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,153 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/exyapps.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/exyapps.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/exyapps" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/exyapps" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/README b/doc/README new file mode 100644 index 0000000..6cc49f0 --- /dev/null +++ b/doc/README @@ -0,0 +1,12 @@ +Makefile +build +make.bat +source + New documentation written in RST for Sphinx to process + TRANSLATION FROM LATEX INCOMPLETE + +yapps2.haux +yapps2.html +yapps2.htoc +yapps2.tex + Original latex documentation diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..8c3cbf1 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +set I18NSPHINXOPTS=%SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\exyapps.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\exyapps.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..aa0c70f --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +# +# exyapps documentation build configuration file, created by +# sphinx-quickstart on Tue Dec 18 13:22:40 2012. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'exyapps' +copyright = u'2012, Amit J. Patel, Mark Sienkiewicz' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = 'v' +# The full version, including alpha/beta/rc tags. +release = 'v' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'exyappsdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'exyapps.tex', u'exyapps Documentation', + u'Amit J. Patel, Mark Sienkiewicz', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'exyapps', u'exyapps Documentation', + [u'Amit J. Patel, Mark Sienkiewicz'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'exyapps', u'exyapps Documentation', + u'Amit J. Patel, Mark Sienkiewicz', 'exyapps', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..19f6fa6 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,503 @@ +.. exyapps documentation master file, created by + sphinx-quickstart on Tue Dec 18 13:22:40 2012. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + + +Welcome to exyapps's documentation! +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + +The Yapps Parser Generator System +http://theory.stanford.edu/~amitp/Yapps/ +Version 2 + +Amit J. Patel +http://www-cs-students.stanford.edu/ amitp/ http://www-cs-students.stanford.edu/ amitp/ + + +Introduction +====================================================================== + + +Yapps (Yet Another Python Parser System) is an easy to use parser +generator that is written in Python and generates Python code. There +are several parser generator systems already available for Python, +including PyLR, kjParsing, PyBison, and mcf.pars, but I had different +goals for my parser. Yapps is simple, is easy to use, and produces +human-readable parsers. It is not the fastest or most powerful +parser. Yapps is designed to be used when regular expressions are +not enough and other parser systems are too much: situations where +you may write your own recursive descent parser. + +Some unusual features of Yapps that may be of interest are: + + - Yapps produces recursive descent parsers that are readable by + humans, as opposed to table-driven parsers that are difficult to + read. A Yapps parser for a simple calculator looks similar to the + one that Mark Lutz wrote by hand for Programming Python. + + - Yapps also allows for rules that accept parameters and pass + arguments to be used while parsing subexpressions. Grammars that + allow for arguments to be passed to subrules and for values to be + passed back are often called attribute grammars. In many cases + parameterized rules can be used to perform actions at parse time + that are usually delayed until later. For example, information + about variable declarations can be passed into the rules that parse + a procedure body, so that undefined variables can be detected at + parse time. The types of defined variables can be used in parsing + as well -- for example, if the type of X is known, we can determine + whether X(1) is an array reference or a function call. + + - Yapps grammars are fairly easy to write, although there are some + inconveniences having to do with ELL(1) parsing that have to be + worked around. For example, rules have to be left factored and + rules may not be left recursive. However, neither limitation seems + to be a problem in practice. + + - Yapps grammars look similar to the notation used in the Python + reference manual, with operators like \*, +, \|, [], and () for + patterns, names (tim) for rules, regular expressions ("[a-z]+") + for tokens, and # for comments. + + - The Yapps parser generator is written as a single Python module + with no C extensions. Yapps produces parsers that are written + entirely in Python, and require only the Yapps run-time module + (5k) for support. + + - Yapps's scanner is context-sensitive, picking tokens based on + the types of the tokens accepted by the parser. This can be helpful + when implementing certain kinds of parsers, such as for a preprocessor. + +There are several disadvantages of using Yapps over another parser system: + + - Yapps parsers are ELL(1) (Extended LL(1)), which is less powerful + than LALR (used by PyLR) or SLR (used by kjParsing), so Yapps would + not be a good choice for parsing complex languages. For example, + allowing both x := 5; and x; as statements is difficult because + we must distinguish based on only one token of lookahead. Seeing + only x, we cannot decide whether we have an assignment statement + or an expression statement. (Note however that this kind of grammar + can be matched with backtracking; see section F.) + + - The scanner that Yapps provides can only read from strings, not + files, so an entire file has to be read in before scanning can + begin. It is possible to build a custom scanner, though, so in + cases where stream input is needed (from the console, a network, + or a large file are examples), the Yapps parser can be given a + custom scanner that reads from a stream instead of a string. + + - Yapps is not designed with efficiency in mind. + +Yapps provides an easy to use parser generator that produces parsers +similar to what you might write by hand. It is not meant to be a +solution for all parsing problems, but instead an aid for those +times you would write a parser by hand rather than using one of the +more powerful parsing packages available. + +Yapps 2.0 is easier to use than Yapps 1.0. New features include a +less restrictive input syntax, which allows mixing of sequences, +choices, terminals, and nonterminals; optional matching; the ability +to insert single-line statements into the generated parser; and +looping constructs \* and + similar to the repetitive matching +constructs in regular expressions. Unfortunately, the addition of +these constructs has made Yapps 2.0 incompatible with Yapps 1.0, +so grammars will have to be rewritten. See section ?? for tips on +changing Yapps 1.0 grammars for use with Yapps 2.0. + + +Examples +====================================================================== + +In this section are several examples that show the use of Yapps. +First, an introduction shows how to construct grammars and write +them in Yapps form. This example can be skipped by someone familiar +with grammars and parsing. Next is a Lisp expression grammar that +produces a parse tree as output. This example demonstrates the use +of tokens and rules, as well as returning values from rules. The +third example is a expression evaluation grammar that evaluates +during parsing (instead of producing a parse tree). + +Introduction to Grammars +------------------------------------------------------------------------------- + +A grammar for a natural language specifies how words can be put +together to form large structures, such as phrases and sentences. +A grammar for a computer language is similar in that it specifies +how small components (called tokens) can be put together to form +larger structures. In this section we will write a grammar for a +tiny subset of English. + +Simple English sentences can be described as being a noun phrase +followed by a verb followed by a noun phrase. For example, in the +sentence, "Jack sank the blue ship," the word "Jack" is the first +noun phrase, "sank" is the verb, and "the blue ship" is the second +noun phrase. In addition we should say what a noun phrase is; for +this example we shall say that a noun phrase is an optional article +(a, an, the) followed by any number of adjectives followed by a +noun. The tokens in our language are the articles, nouns, verbs, +and adjectives. The rules in our language will tell us how to combine +the tokens together to form lists of adjectives, noun phrases, and +sentences: :: + + sentence: noun_phrase verb noun_phrase + noun_phrase: [article] adjective* noun + +Notice that some things that we said easily in English, such as +"optional article" are expressed using special syntax, such as +brackets. When we said, "any number of adjectives," we wrote +adjective\*, where the \* means "zero or more of the preceding +pattern". + +The grammar given above is close to a Yapps grammar. We also have +to specify what the tokens are, and what to do when a pattern is +matched. For this example, we will do nothing when patterns are +matched; the next example will explain how to perform match actions. :: + + parser TinyEnglish: + ignore: "\\W+" + token noun: "(Jack|spam|ship)" + token verb: "(sank|threw)" + token article: "(a|an|the)" + token adjective: "(blue|red|green)" + + rule sentence: noun_phrase verb noun_phrase + rule noun_phrase: [article] adjective* noun + +The tokens are specified as Python regular expressions. Since Yapps +produces Python code, you can write any regular expression that +would be accepted by Python. (Note: These are Python 1.5 regular +expressions from the re module, not Python 1.4 regular expressions +from the regex module.) In addition to tokens that you want to see +(which are given names), you can also specify tokens to ignore, +marked by the ignore keyword. In this parser we want to ignore +whitespace. + +The TinyEnglish grammar shows how you define tokens and rules, but +it does not specify what should happen once we've matched the rules. +In the next example, we will take a grammar and produce a parse +tree from it. + + +Lisp Expressions +--------------------------------------------------------------------- + +Lisp syntax, although hated by many, has a redeeming quality: it +is simple to parse. In this section we will construct a Yapps grammar +to parse Lisp expressions and produce a parse tree as output. + +Defining the Grammar +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The syntax of Lisp is simple. It has expressions, which are +identifiers, strings, numbers, and lists. A list is a left parenthesis +followed by some number of expressions (separated by spaces) followed +by a right parenthesis. For example, 5, "ni", and (print "1+2 = " +(+ 1 2)) are Lisp expressions. Written as a grammar, :: + + expr: ID | STR | NUM | list + list: ( expr* ) + +In addition to having a grammar, we need to specify what to do every +time something is matched. For the tokens, which are strings, we +just want to get the "value" of the token, attach its type (identifier, +string, or number) in some way, and return it. For the lists, we +want to construct and return a Python list. + +Once some pattern is matched, we enclose a return statement enclosed +in {{...}}. The braces allow us to insert any one-line statement +into the parser. Within this statement, we can refer to the values +returned by matching each part of the rule. After matching a token +such as ID, "ID" will be bound to the text of the matched token. +Let's take a look at the rule: :: + + rule expr: ID {{ return ('id', ID) }} + ... + +In a rule, tokens return the text that was matched. For identifiers, +we just return the identifier, along with a "tag" telling us that +this is an identifier and not a string or some other value. Sometimes +we may need to convert this text to a different form. For example, +if a string is matched, we want to remove quotes and handle special +forms like \n. If a number is matched, we want to convert it into +a number. Let's look at the return values for the other tokens: :: + + ... + | STR {{ return ('str', eval(STR)) }} + | NUM {{ return ('num', atoi(NUM)) }} + ... + +If we get a string, we want to remove the quotes and process any +special backslash codes, so we run eval on the quoted string. If +we get a number, we convert it to an integer with atoi and then +return the number along with its type tag. + +For matching a list, we need to do something slightly more complicated. +If we match a Lisp list of expressions, we want to create a Python +list with those values. :: + + rule list: "\\(" # Match the opening parenthesis + {{ result = [] }} # Create a Python list + ( + expr # When we match an expression, + {{ result.append(expr) }} # add it to the list + )* # * means repeat this if needed + "\\)" # Match the closing parenthesis + {{ return result }} # Return the Python list + +In this rule we first match the opening parenthesis, then go into +a loop. In this loop we match expressions and add them to the list. +When there are no more expressions to match, we match the closing +parenthesis and return the resulting. Note that # is used for +comments, just as in Python. + +The complete grammar is specified as follows: :: + + parser Lisp: + ignore: '\\s+' + token NUM: '[0-9]+' + token ID: '[-+*/!@%^&=.a-zA-Z0-9_]+' + token STR: '"([^\\"]+|\\\\.)*"' + + rule expr: ID {{ return ('id', ID) }} + | STR {{ return ('str', eval(STR)) }} + | NUM {{ return ('num', atoi(NUM)) }} + | list {{ return list }} + rule list: "\\(" {{ result = [] }} + ( expr {{ result.append(expr) }} + )* + "\\)" {{ return result }} + +One thing you may have noticed is that "\\(" and "\\)" appear in +the list rule. These are inline tokens: they appear in the rules +without being given a name with the token keyword. Inline tokens +are more convenient to use, but since they do not have a name, the +text that is matched cannot be used in the return value. They are +best used for short simple patterns (usually punctuation or keywords). + +Another thing to notice is that the number and identifier tokens +overlap. For example, "487" matches both NUM and ID. In Yapps, the +scanner only tries to match tokens that are acceptable to the parser. +This rule doesn't help here, since both NUM and ID can appear in +the same place in the grammar. There are two rules used to pick +tokens if more than one matches. One is that the longest match is +preferred. For example, "487x" will match as an ID (487x) rather +than as a NUM (487) followed by an ID (x). The second rule is that +if the two matches are the same length, the first one listed in the +grammar is preferred. For example, "487" will match as an NUM rather +than an ID because NUM is listed first in the grammar. Inline tokens +have preference over any tokens you have listed. + +Now that our grammar is defined, we can run Yapps to produce a +parser, and then run the parser to produce a parse tree. + + +Running Yapps +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the Yapps module is a function generate that takes an input +filename and writes a parser to another file. We can use this +function to generate the Lisp parser, which is assumed to be in +lisp.g. :: + + % python + Python 1.5.1 (#1, Sep 3 1998, 22:51:17) [GCC 2.7.2.3] on linux-i386 + Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam + >>> import yapps + >>> yapps.generate('lisp.g') + +At this point, Yapps has written a file lisp.py that contains the +parser. In that file are two classes (one scanner and one parser) +and a function (called parse) that puts things together for you. + +Alternatively, we can run Yapps from the command line to generate +the parser file: :: + + % python yapps.py lisp.g + +After running Yapps either from within Python or from the command +line, we can use the Lisp parser by calling the parse function. The +first parameter should be the rule we want to match, and the second +parameter should be the string to parse. :: + + >>> import lisp + >>> lisp.parse('expr', '(+ 3 4)') + [('id', '+'), ('num', 3), ('num', 4)] + >>> lisp.parse('expr', '(print "3 = " (+ 1 2))') + [('id', 'print'), ('str', '3 = '), [('id', '+'), ('num', 1), ('num', 2)]] + +The parse function is not the only way to use the parser; section +5.1 describes how to access parser objects directly. + +We've now gone through the steps in creating a grammar, writing a +grammar file for Yapps, producing a parser, and using the parser. +In the next example we'll see how rules can take parameters and +also how to do computations instead of just returning a parse tree. + + +Calculator +------------------------------------------------------------------------------- + +A common example parser given in many textbooks is that for simple +expressions, with numbers, addition, subtraction, multiplication, +division, and parenthesization of subexpressions. We'll write this +example in Yapps, evaluating the expression as we parse. + +Unlike yacc, Yapps does not have any way to specify precedence +rules, so we have to do it ourselves. We say that an expression is +the sum of terms, and that a term is the product of factors, and +that a factor is a number or a parenthesized expression: :: + + expr: factor ( ("+"|"-") factor )* + factor: term ( ("*"|"/") term )* + term: NUM | "(" expr ")" + +In order to evaluate the expression as we go, we should keep along +an accumulator while evaluating the lists of terms or factors. Just +as we kept a "result" variable to build a parse tree for Lisp +expressions, we will use a variable to evaluate numerical expressions. +The full grammar is given below: :: + + parser Calculator: + token END: "$" # $ means end of string + token NUM: "[0-9]+" + + rule goal: expr END {{ return expr }} + + # An expression is the sum and difference of factors + rule expr: factor {{ v = factor }} + ( "[+]" factor {{ v = v+factor }} + | "-" factor {{ v = v-factor }} + )* {{ return v }} + + # A factor is the product and division of terms + rule factor: term {{ v = term }} + ( "[*]" term {{ v = v*term }} + | "/" term {{ v = v/term }} + )* {{ return v }} + + # A term is either a number or an expression surrounded by parentheses + rule term: NUM {{ return atoi(NUM) }} + | "\\(" expr "\\)" {{ return expr }} + +The top-level rule is goal, which says that we are looking for an +expression followed by the end of the string. The END token is +needed because without it, it isn't clear when to stop parsing. For +example, the string "1+3" could be parsed either as the expression +"1" followed by the string "+3" or it could be parsed as the +expression "1+3". By requiring expressions to end with END, the +parser is forced to take "1+3". + +In the two rules with repetition, the accumulator is named v. After +reading in one expression, we initialize the accumulator. Each time +through the loop, we modify the accumulator by adding, subtracting, +multiplying by, or dividing the previous accumulator by the expression +that has been parsed. At the end of the rule, we return the +accumulator. + +The calculator example shows how to process lists of elements using +loops, as well as how to handle precedence of operators. + +Note: It's often important to put the END token in, so put it in +unless you are sure that your grammar has some other non-ambiguous +token marking the end of the program. + + +Calculator with Memory +------------------------------------------------------------------------------- + +In the previous example we learned how to write a calculator that +evaluates simple numerical expressions. In this section we will +extend the example to support both local and global variables. + +To support global variables, we will add assignment statements to +the "goal" rule. :: + + rule goal: expr END {{ return expr }} + | 'set' ID expr END {{ global_vars[ID] = expr }} + {{ return expr }} + +To use these variables, we need a new kind of terminal: :: + + rule term: ... | ID {{ return global_vars[ID] }} + +So far, these changes are straightforward. We simply have a global +dictionary global_vars that stores the variables and values, we +modify it when there is an assignment statement, and we look up +variables in it when we see a variable name. + +To support local variables, we will add variable declarations to +the set of allowed expressions. :: + + rule term: ... | 'let' VAR '=' expr 'in' expr ... + +This is where it becomes tricky. Local variables should be stored +in a local dictionary, not in the global one. One trick would be +to save a copy of the global dictionary, modify it, and then restore +it later. In this example we will instead use attributes to create +local information and pass it to subrules. + +A rule can optionally take parameters. When we invoke the rule, we +must pass in arguments. For local variables, let's use a single +parameter, local_vars: :: + + rule expr<>: ... + rule factor<>: ... + rule term<>: ... + +Each time we want to match expr, factor, or term, we will pass the +local variables in the current rule to the subrule. One interesting +case is when we pass as an argument something other than local_vars: :: + + rule term<>: ... + | 'let' VAR '=' expr<> + {{ local_vars = [(VAR, expr)] + local_vars }} + 'in' expr<> + {{ return expr }} + +Note that the assignment to the local variables list does not modify +the original list. This is important to keep local variables from +being seen outside the "let". + +The other interesting case is when we find a variable: :: + + global_vars = {} + + def lookup(map, name): + for x,v in map: if x==name: return v + return global_vars[name] + %% + ... + rule term<: ... + | VAR {{ return lookup(local_vars, VAR) }} + +The lookup function will search through the local variable list, +and if it cannot find the name there, it will look it up in the +global variable dictionary. + +A complete grammar for this example, including a read-eval-print +loop for interacting with the calculator, can be found in the +examples subdirectory included with Yapps. + +In this section we saw how to insert code before the parser. We +also saw how to use attributes to transmit local information from +one rule to its subrules. + + +BEGIN WITH SECTION 3 HERE + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + diff --git a/setup.py b/setup.py index 4bbb3cc..f3ce0ad 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ from distutils.core import setup description = "Extensions of Yet Another Python Parser System" + long_description = \ """ EXYAPPS is an easy to use parser generator that is written in Python and @@ -27,7 +28,7 @@ Exyapps is derived from YAPPS, with various extensions: setup ( name = "exyapps", - version = "3.0dev", + version = "3.0", description = description, long_description = long_description, url="https://svn.stsci.edu/trac/ssb/etal/wiki/exyapps", @@ -35,10 +36,7 @@ setup ( maintainer_email='no_spam@see_url', # bug: replace this and put acknowledgements of these guys in the docs # url = "http://theory.stanford.edu/~amitp/yapps/", - # author = "Amit J. Patel", - # author_email = "amitp@cs.stanford.edu", - # maintainer = "Matthias Urlichs", - # maintainer_email = "smurf@debian.org", + # author = "Amit J. Patel, Matthias Urlichs, Mark Sienkiewicz", license = 'MIT', platforms = ['POSIX'], keywords = ['parsing'], -- cgit