aboutsummaryrefslogtreecommitdiff
path: root/yapps
diff options
context:
space:
mode:
Diffstat (limited to 'yapps')
-rw-r--r--yapps/__init__.py1
-rw-r--r--yapps/grammar.py211
-rw-r--r--yapps/parsetree.py673
-rw-r--r--yapps/runtime.py442
4 files changed, 0 insertions, 1327 deletions
diff --git a/yapps/__init__.py b/yapps/__init__.py
deleted file mode 100644
index 1bb8bf6..0000000
--- a/yapps/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# empty
diff --git a/yapps/grammar.py b/yapps/grammar.py
deleted file mode 100644
index 1714344..0000000
--- a/yapps/grammar.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# grammar.py, part of Yapps 2 - yet another python parser system
-# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
-#
-# This version of the Yapps 2 grammar can be distributed under the
-# terms of the MIT open source license, either found in the LICENSE
-# file included with the Yapps distribution
-# <http://theory.stanford.edu/~amitp/yapps/> or at
-# <http://www.opensource.org/licenses/mit-license.php>
-#
-
-"""Parser for Yapps grammars.
-
-This file defines the grammar of Yapps grammars. Naturally, it is
-implemented in Yapps. The grammar.py module needed by Yapps is built
-by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
-
-"""
-
-import sys, re
-from yapps import parsetree
-
-######################################################################
-def cleanup_choice(rule, lst):
- if len(lst) == 0: return Sequence(rule, [])
- if len(lst) == 1: return lst[0]
- return parsetree.Choice(rule, *tuple(lst))
-
-def cleanup_sequence(rule, lst):
- if len(lst) == 1: return lst[0]
- return parsetree.Sequence(rule, *tuple(lst))
-
-def resolve_name(rule, tokens, id, args):
- if id in [x[0] for x in tokens]:
- # It's a token
- if args:
- print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
- return parsetree.Terminal(rule, id)
- else:
- # It's a name, so assume it's a nonterminal
- return parsetree.NonTerminal(rule, id, args)
-
-
-# Begin -- grammar generated by Yapps
-import sys, re
-from yapps import runtime
-
-class ParserDescriptionScanner(runtime.Scanner):
- patterns = [
- ('"rule"', re.compile('rule')),
- ('"ignore"', re.compile('ignore')),
- ('"token"', re.compile('token')),
- ('"option"', re.compile('option')),
- ('":"', re.compile(':')),
- ('"parser"', re.compile('parser')),
- ('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
- ('#.*?\r?\n', re.compile('#.*?\r?\n')),
- ('EOF', re.compile('$')),
- ('ATTR', re.compile('<<.+?>>')),
- ('STMT', re.compile('{{.+?}}')),
- ('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
- ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
- ('LP', re.compile('\\(')),
- ('RP', re.compile('\\)')),
- ('LB', re.compile('\\[')),
- ('RB', re.compile('\\]')),
- ('OR', re.compile('[|]')),
- ('STAR', re.compile('[*]')),
- ('PLUS', re.compile('[+]')),
- ('QUEST', re.compile('[?]')),
- ('COLON', re.compile(':')),
- ]
- def __init__(self, str,*args,**kw):
- runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw)
-
-class ParserDescription(runtime.Parser):
- Context = runtime.Context
- def Parser(self, _parent=None):
- _context = self.Context(_parent, self._scanner, 'Parser', [])
- self._scan('"parser"', context=_context)
- ID = self._scan('ID', context=_context)
- self._scan('":"', context=_context)
- Options = self.Options(_context)
- Tokens = self.Tokens(_context)
- Rules = self.Rules(Tokens, _context)
- EOF = self._scan('EOF', context=_context)
- return parsetree.Generator(ID,Options,Tokens,Rules)
-
- def Options(self, _parent=None):
- _context = self.Context(_parent, self._scanner, 'Options', [])
- opt = {}
- while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"':
- self._scan('"option"', context=_context)
- self._scan('":"', context=_context)
- Str = self.Str(_context)
- opt[Str] = 1
- return opt
-
- def Tokens(self, _parent=None):
- _context = self.Context(_parent, self._scanner, 'Tokens', [])
- tok = []
- while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']:
- _token = self._peek('"token"', '"ignore"', context=_context)
- if _token == '"token"':
- self._scan('"token"', context=_context)
- ID = self._scan('ID', context=_context)
- self._scan('":"', context=_context)
- Str = self.Str(_context)
- tok.append( (ID,Str) )
- else: # == '"ignore"'
- self._scan('"ignore"', context=_context)
- self._scan('":"', context=_context)
- Str = self.Str(_context)
- ign = ('#ignore',Str)
- if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT':
- STMT = self._scan('STMT', context=_context)
- ign = ign + (STMT[2:-2],)
- tok.append( ign )
- return tok
-
- def Rules(self, tokens, _parent=None):
- _context = self.Context(_parent, self._scanner, 'Rules', [tokens])
- rul = []
- while self._peek('"rule"', 'EOF', context=_context) == '"rule"':
- self._scan('"rule"', context=_context)
- ID = self._scan('ID', context=_context)
- OptParam = self.OptParam(_context)
- self._scan('":"', context=_context)
- ClauseA = self.ClauseA(ID, tokens, _context)
- rul.append( (ID, OptParam, ClauseA) )
- return rul
-
- def ClauseA(self, rule, tokens, _parent=None):
- _context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens])
- ClauseB = self.ClauseB(rule,tokens, _context)
- v = [ClauseB]
- while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR':
- OR = self._scan('OR', context=_context)
- ClauseB = self.ClauseB(rule,tokens, _context)
- v.append(ClauseB)
- return cleanup_choice(rule,v)
-
- def ClauseB(self, rule,tokens, _parent=None):
- _context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens])
- v = []
- while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']:
- ClauseC = self.ClauseC(rule,tokens, _context)
- v.append(ClauseC)
- return cleanup_sequence(rule, v)
-
- def ClauseC(self, rule,tokens, _parent=None):
- _context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens])
- ClauseD = self.ClauseD(rule,tokens, _context)
- _token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context)
- if _token == 'PLUS':
- PLUS = self._scan('PLUS', context=_context)
- return parsetree.Plus(rule, ClauseD)
- elif _token == 'STAR':
- STAR = self._scan('STAR', context=_context)
- return parsetree.Star(rule, ClauseD)
- elif _token == 'QUEST':
- QUEST = self._scan('QUEST', context=_context)
- return parsetree.Option(rule, ClauseD)
- else:
- return ClauseD
-
- def ClauseD(self, rule,tokens, _parent=None):
- _context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens])
- _token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context)
- if _token == 'STR':
- STR = self._scan('STR', context=_context)
- t = (STR, eval(STR,{},{}))
- if t not in tokens: tokens.insert( 0, t )
- return parsetree.Terminal(rule, STR)
- elif _token == 'ID':
- ID = self._scan('ID', context=_context)
- OptParam = self.OptParam(_context)
- return resolve_name(rule,tokens, ID, OptParam)
- elif _token == 'LP':
- LP = self._scan('LP', context=_context)
- ClauseA = self.ClauseA(rule,tokens, _context)
- RP = self._scan('RP', context=_context)
- return ClauseA
- elif _token == 'LB':
- LB = self._scan('LB', context=_context)
- ClauseA = self.ClauseA(rule,tokens, _context)
- RB = self._scan('RB', context=_context)
- return parsetree.Option(rule, ClauseA)
- else: # == 'STMT'
- STMT = self._scan('STMT', context=_context)
- return parsetree.Eval(rule, STMT[2:-2])
-
- def OptParam(self, _parent=None):
- _context = self.Context(_parent, self._scanner, 'OptParam', [])
- if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR':
- ATTR = self._scan('ATTR', context=_context)
- return ATTR[2:-2]
- return ''
-
- def Str(self, _parent=None):
- _context = self.Context(_parent, self._scanner, 'Str', [])
- STR = self._scan('STR', context=_context)
- return eval(STR,{},{})
-
-
-def parse(rule, text):
- P = ParserDescription(ParserDescriptionScanner(text))
- return runtime.wrap_error_reporter(P, rule)
-
-# End -- grammar generated by Yapps
-
-
diff --git a/yapps/parsetree.py b/yapps/parsetree.py
deleted file mode 100644
index e5e0ae0..0000000
--- a/yapps/parsetree.py
+++ /dev/null
@@ -1,673 +0,0 @@
-# parsetree.py, part of Yapps 2 - yet another python parser system
-# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
-#
-# This version of the Yapps 2 Runtime can be distributed under the
-# terms of the MIT open source license, either found in the LICENSE file
-# included with the Yapps distribution
-# <http://theory.stanford.edu/~amitp/yapps/> or at
-# <http://www.opensource.org/licenses/mit-license.php>
-#
-
-"""Classes used to represent parse trees and generate output.
-
-This module defines the Generator class, which drives the generation
-of Python output from a grammar parse tree. It also defines nodes
-used to represent the parse tree; they are derived from class Node.
-
-The main logic of Yapps is in this module.
-"""
-
-import sys, re
-
-######################################################################
-INDENT = ' '*4
-class Generator:
-
- # TODO: many of the methods here should be class methods, not instance methods
-
- def __init__(self, name, options, tokens, rules):
- self.change_count = 0
- self.name = name
- self.options = options
- self.preparser = ''
- self.postparser = None
-
- self.tokens = {} # Map from tokens to regexps
- self.ignore = {} # List of token names to ignore in parsing, map to statements
- self.terminals = [] # List of token names (to maintain ordering)
- for t in tokens:
- if len(t) == 3:
- n,t,s = t
- else:
- n,t = t
- s = None
-
- if n == '#ignore':
- n = t
- self.ignore[n] = s
- if n in self.tokens.keys() and self.tokens[n] != t:
- print >>sys.stderr, 'Warning: token %s defined more than once.' % n
- self.tokens[n] = t
- self.terminals.append(n)
-
- self.rules = {} # Map from rule names to parser nodes
- self.params = {} # Map from rule names to parameters
- self.goals = [] # List of rule names (to maintain ordering)
- for n,p,r in rules:
- self.params[n] = p
- self.rules[n] = r
- self.goals.append(n)
-
- self.output = sys.stdout
-
- def has_option(self, name):
- return self.options.get(name, 0)
-
- def non_ignored_tokens(self):
- return [x for x in self.terminals if x not in self.ignore]
-
- def changed(self):
- """Increments the change count.
-
- >>> t = Generator('', [], [], [])
- >>> old_count = t.change_count
- >>> t.changed()
- >>> assert t.change_count == old_count + 1
- """
- self.change_count = 1+self.change_count
-
- def set_subtract(self, a, b):
- """Returns the elements of a that are not in b.
-
- >>> t = Generator('', [], [], [])
- >>> t.set_subtract([], [])
- []
- >>> t.set_subtract([1, 2], [1, 2])
- []
- >>> t.set_subtract([1, 2, 3], [2])
- [1, 3]
- >>> t.set_subtract([1], [2, 3, 4])
- [1]
- """
- result = []
- for x in a:
- if x not in b:
- result.append(x)
- return result
-
- def subset(self, a, b):
- """True iff all elements of sequence a are inside sequence b
-
- >>> t = Generator('', [], [], [])
- >>> t.subset([], [1, 2, 3])
- 1
- >>> t.subset([1, 2, 3], [])
- 0
- >>> t.subset([1], [1, 2, 3])
- 1
- >>> t.subset([3, 2, 1], [1, 2, 3])
- 1
- >>> t.subset([1, 1, 1], [1, 2, 3])
- 1
- >>> t.subset([1, 2, 3], [1, 1, 1])
- 0
- """
- for x in a:
- if x not in b:
- return 0
- return 1
-
- def equal_set(self, a, b):
- """True iff subset(a, b) and subset(b, a)
-
- >>> t = Generator('', [], [], [])
- >>> a_set = [1, 2, 3]
- >>> t.equal_set(a_set, a_set)
- 1
- >>> t.equal_set(a_set, a_set[:])
- 1
- >>> t.equal_set([], a_set)
- 0
- >>> t.equal_set([1, 2, 3], [3, 2, 1])
- 1
- """
- if len(a) != len(b): return 0
- if a == b: return 1
- return self.subset(a, b) and self.subset(b, a)
-
- def add_to(self, parent, additions):
- "Modify _parent_ to include all elements in _additions_"
- for x in additions:
- if x not in parent:
- parent.append(x)
- self.changed()
-
- def equate(self, a, b):
- """Extend (a) and (b) so that they contain each others' elements.
-
- >>> t = Generator('', [], [], [])
- >>> a = [1, 2]
- >>> b = [2, 3]
- >>> t.equate(a, b)
- >>> a
- [1, 2, 3]
- >>> b
- [2, 3, 1]
- """
- self.add_to(a, b)
- self.add_to(b, a)
-
- def write(self, *args):
- for a in args:
- self.output.write(a)
-
- def in_test(self, expr, full, set):
- """Generate a test of (expr) being in (set), where (set) is a subset of (full)
-
- expr is a string (Python expression)
- set is a list of values (which will be converted with repr)
- full is the list of all values expr could possibly evaluate to
-
- >>> t = Generator('', [], [], [])
- >>> t.in_test('x', [1,2,3,4], [])
- '0'
- >>> t.in_test('x', [1,2,3,4], [1,2,3,4])
- '1'
- >>> t.in_test('x', [1,2,3,4], [1])
- 'x == 1'
- >>> t.in_test('a+b', [1,2,3,4], [1,2])
- 'a+b in [1, 2]'
- >>> t.in_test('x', [1,2,3,4,5], [1,2,3])
- 'x not in [4, 5]'
- >>> t.in_test('x', [1,2,3,4,5], [1,2,3,4])
- 'x != 5'
- """
-
- if not set: return '0'
- if len(set) == 1: return '%s == %s' % (expr, repr(set[0]))
- if full and len(set) > len(full)/2:
- # Reverse the sense of the test.
- not_set = [x for x in full if x not in set]
- return self.not_in_test(expr, full, not_set)
- return '%s in %s' % (expr, repr(set))
-
- def not_in_test(self, expr, full, set):
- """Like in_test, but the reverse test."""
- if not set: return '1'
- if len(set) == 1: return '%s != %s' % (expr, repr(set[0]))
- return '%s not in %s' % (expr, repr(set))
-
- def peek_call(self, a):
- """Generate a call to scan for a token in the set 'a'"""
- assert type(a) == type([])
- a_set = (repr(a)[1:-1])
- if self.equal_set(a, self.non_ignored_tokens()): a_set = ''
- if self.has_option('context-insensitive-scanner'): a_set = ''
- if a_set: a_set += ","
-
- return 'self._peek(%s context=_context)' % a_set
-
- def peek_test(self, a, b):
- """Generate a call to test whether the next token (which could be any of
- the elements in a) is in the set b."""
- if self.subset(a, b): return '1'
- if self.has_option('context-insensitive-scanner'): a = self.non_ignored_tokens()
- return self.in_test(self.peek_call(a), a, b)
-
- def not_peek_test(self, a, b):
- """Like peek_test, but the opposite sense."""
- if self.subset(a, b): return '0'
- return self.not_in_test(self.peek_call(a), a, b)
-
- def calculate(self):
- """The main loop to compute the epsilon, first, follow sets.
- The loop continues until the sets converge. This works because
- each set can only get larger, so when they stop getting larger,
- we're done."""
- # First we determine whether a rule accepts epsilon (the empty sequence)
- while 1:
- for r in self.goals:
- self.rules[r].setup(self)
- if self.change_count == 0: break
- self.change_count = 0
-
- # Now we compute the first/follow sets
- while 1:
- for r in self.goals:
- self.rules[r].update(self)
- if self.change_count == 0: break
- self.change_count = 0
-
- def dump_information(self):
- """Display the grammar in somewhat human-readable form."""
- self.calculate()
- for r in self.goals:
- print ' _____' + '_'*len(r)
- print ('___/Rule '+r+'\\' + '_'*80)[:79]
- queue = [self.rules[r]]
- while queue:
- top = queue[0]
- del queue[0]
-
- print 'Rule', repr(top), 'of class', top.__class__.__name__
- top.first.sort()
- top.follow.sort()
- eps = []
- if top.accepts_epsilon: eps = ['(null)']
- print ' FIRST:', ', '.join(top.first+eps)
- print ' FOLLOW:', ', '.join(top.follow)
- for x in top.get_children(): queue.append(x)
-
- def repr_ignore(self):
- out="{"
- for t,s in self.ignore.iteritems():
- if s is None: s=repr(s)
- out += "%s:%s," % (repr(t),s)
- out += "}"
- return out
-
- def generate_output(self):
- self.calculate()
- self.write(self.preparser)
- self.write("# Begin -- grammar generated by Yapps\n")
- self.write("import sys, re\n")
- self.write("from yapps import runtime\n")
- self.write("\n")
- self.write("class ", self.name, "Scanner(runtime.Scanner):\n")
- self.write(" patterns = [\n")
- for p in self.terminals:
- self.write(" (%s, re.compile(%s)),\n" % (
- repr(p), repr(self.tokens[p])))
- self.write(" ]\n")
- self.write(" def __init__(self, str,*args,**kw):\n")
- self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" %
- self.repr_ignore())
- self.write("\n")
-
- self.write("class ", self.name, "(runtime.Parser):\n")
- self.write(INDENT, "Context = runtime.Context\n")
- for r in self.goals:
- self.write(INDENT, "def ", r, "(self")
- if self.params[r]: self.write(", ", self.params[r])
- self.write(", _parent=None):\n")
- self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" %
- (repr(r), self.params.get(r, '')))
- self.rules[r].output(self, INDENT+INDENT)
- self.write("\n")
-
- self.write("\n")
- self.write("def parse(rule, text):\n")
- self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n")
- self.write(" return runtime.wrap_error_reporter(P, rule)\n")
- self.write("\n")
- if self.postparser is not None:
- self.write("# End -- grammar generated by Yapps\n")
- self.write(self.postparser)
- else:
- self.write("if __name__ == '__main__':\n")
- self.write(INDENT, "from sys import argv, stdin\n")
- self.write(INDENT, "if len(argv) >= 2:\n")
- self.write(INDENT*2, "if len(argv) >= 3:\n")
- self.write(INDENT*3, "f = open(argv[2],'r')\n")
- self.write(INDENT*2, "else:\n")
- self.write(INDENT*3, "f = stdin\n")
- self.write(INDENT*2, "print parse(argv[1], f.read())\n")
- self.write(INDENT, "else: print >>sys.stderr, 'Args: <rule> [<filename>]'\n")
- self.write("# End -- grammar generated by Yapps\n")
-
-######################################################################
-class Node:
- """This is the base class for all components of a grammar."""
- def __init__(self, rule):
- self.rule = rule # name of the rule containing this node
- self.first = []
- self.follow = []
- self.accepts_epsilon = 0
-
- def setup(self, gen):
- # Setup will change accepts_epsilon,
- # sometimes from 0 to 1 but never 1 to 0.
- # It will take a finite number of steps to set things up
- pass
-
- def used(self, vars):
- "Return two lists: one of vars used, and the other of vars assigned"
- return vars, []
-
- def get_children(self):
- "Return a list of sub-nodes"
- return []
-
- def __repr__(self):
- return str(self)
-
- def update(self, gen):
- if self.accepts_epsilon:
- gen.add_to(self.first, self.follow)
-
- def output(self, gen, indent):
- "Write out code to _gen_ with _indent_:string indentation"
- gen.write(indent, "assert 0 # Invalid parser node\n")
-
-class Terminal(Node):
- """This class stores terminal nodes, which are tokens."""
- def __init__(self, rule, token):
- Node.__init__(self, rule)
- self.token = token
- self.accepts_epsilon = 0
-
- def __str__(self):
- return self.token
-
- def update(self, gen):
- Node.update(self, gen)
- if self.first != [self.token]:
- self.first = [self.token]
- gen.changed()
-
- def output(self, gen, indent):
- gen.write(indent)
- if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token):
- gen.write(self.token, " = ")
- gen.write("self._scan(%s, context=_context)\n" % repr(self.token))
-
-class Eval(Node):
- """This class stores evaluation nodes, from {{ ... }} clauses."""
- def __init__(self, rule, expr):
- Node.__init__(self, rule)
- self.expr = expr
-
- def setup(self, gen):
- Node.setup(self, gen)
- if not self.accepts_epsilon:
- self.accepts_epsilon = 1
- gen.changed()
-
- def __str__(self):
- return '{{ %s }}' % self.expr.strip()
-
- def output(self, gen, indent):
- gen.write(indent, self.expr.strip(), '\n')
-
-class NonTerminal(Node):
- """This class stores nonterminal nodes, which are rules with arguments."""
- def __init__(self, rule, name, args):
- Node.__init__(self, rule)
- self.name = name
- self.args = args
-
- def setup(self, gen):
- Node.setup(self, gen)
- try:
- self.target = gen.rules[self.name]
- if self.accepts_epsilon != self.target.accepts_epsilon:
- self.accepts_epsilon = self.target.accepts_epsilon
- gen.changed()
- except KeyError: # Oops, it's nonexistent
- print >>sys.stderr, 'Error: no rule <%s>' % self.name
- self.target = self
-
- def __str__(self):
- return '%s' % self.name
-
- def update(self, gen):
- Node.update(self, gen)
- gen.equate(self.first, self.target.first)
- gen.equate(self.follow, self.target.follow)
-
- def output(self, gen, indent):
- gen.write(indent)
- gen.write(self.name, " = ")
- args = self.args
- if args: args += ', '
- args += '_context'
- gen.write("self.", self.name, "(", args, ")\n")
-
-class Sequence(Node):
- """This class stores a sequence of nodes (A B C ...)"""
- def __init__(self, rule, *children):
- Node.__init__(self, rule)
- self.children = children
-
- def setup(self, gen):
- Node.setup(self, gen)
- for c in self.children: c.setup(gen)
-
- if not self.accepts_epsilon:
- # If it's not already accepting epsilon, it might now do so.
- for c in self.children:
- # any non-epsilon means all is non-epsilon
- if not c.accepts_epsilon: break
- else:
- self.accepts_epsilon = 1
- gen.changed()
-
- def get_children(self):
- return self.children
-
- def __str__(self):
- return '( %s )' % ' '.join(map(str, self.children))
-
- def update(self, gen):
- Node.update(self, gen)
- for g in self.children:
- g.update(gen)
-
- empty = 1
- for g_i in range(len(self.children)):
- g = self.children[g_i]
-
- if empty: gen.add_to(self.first, g.first)
- if not g.accepts_epsilon: empty = 0
-
- if g_i == len(self.children)-1:
- next = self.follow
- else:
- next = self.children[1+g_i].first
- gen.add_to(g.follow, next)
-
- if self.children:
- gen.add_to(self.follow, self.children[-1].follow)
-
- def output(self, gen, indent):
- if self.children:
- for c in self.children:
- c.output(gen, indent)
- else:
- # Placeholder for empty sequences, just in case
- gen.write(indent, 'pass\n')
-
-class Choice(Node):
- """This class stores a choice between nodes (A | B | C | ...)"""
- def __init__(self, rule, *children):
- Node.__init__(self, rule)
- self.children = children
-
- def setup(self, gen):
- Node.setup(self, gen)
- for c in self.children: c.setup(gen)
-
- if not self.accepts_epsilon:
- for c in self.children:
- if c.accepts_epsilon:
- self.accepts_epsilon = 1
- gen.changed()
-
- def get_children(self):
- return self.children
-
- def __str__(self):
- return '( %s )' % ' | '.join(map(str, self.children))
-
- def update(self, gen):
- Node.update(self, gen)
- for g in self.children:
- g.update(gen)
-
- for g in self.children:
- gen.add_to(self.first, g.first)
- gen.add_to(self.follow, g.follow)
- for g in self.children:
- gen.add_to(g.follow, self.follow)
- if self.accepts_epsilon:
- gen.add_to(self.first, self.follow)
-
- def output(self, gen, indent):
- test = "if"
- gen.write(indent, "_token = ", gen.peek_call(self.first), "\n")
- tokens_seen = []
- tokens_unseen = self.first[:]
- if gen.has_option('context-insensitive-scanner'):
- # Context insensitive scanners can return ANY token,
- # not only the ones in first.
- tokens_unseen = gen.non_ignored_tokens()
- for c in self.children:
- testset = c.first[:]
- removed = []
- for x in testset:
- if x in tokens_seen:
- testset.remove(x)
- removed.append(x)
- if x in tokens_unseen: tokens_unseen.remove(x)
- tokens_seen = tokens_seen + testset
- if removed:
- if not testset:
- print >>sys.stderr, 'Error in rule', self.rule+':'
- else:
- print >>sys.stderr, 'Warning in rule', self.rule+':'
- print >>sys.stderr, ' *', self
- print >>sys.stderr, ' * These tokens could be matched by more than one clause:'
- print >>sys.stderr, ' *', ' '.join(removed)
-
- if testset:
- if not tokens_unseen: # context sensitive scanners only!
- if test == 'if':
- # if it's the first AND last test, then
- # we can simply put the code without an if/else
- c.output(gen, indent)
- else:
- gen.write(indent, "else:")
- t = gen.in_test('', [], testset)
- if len(t) < 70-len(indent):
- gen.write(' #', t)
- gen.write("\n")
- c.output(gen, indent+INDENT)
- else:
- gen.write(indent, test, " ",
- gen.in_test('_token', tokens_unseen, testset),
- ":\n")
- c.output(gen, indent+INDENT)
- test = "elif"
-
- if tokens_unseen:
- gen.write(indent, "else:\n")
- gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ")
- gen.write("'Could not match ", self.rule, "')\n")
-
-class Wrapper(Node):
- """This is a base class for nodes that modify a single child."""
- def __init__(self, rule, child):
- Node.__init__(self, rule)
- self.child = child
-
- def setup(self, gen):
- Node.setup(self, gen)
- self.child.setup(gen)
-
- def get_children(self):
- return [self.child]
-
- def update(self, gen):
- Node.update(self, gen)
- self.child.update(gen)
- gen.add_to(self.first, self.child.first)
- gen.equate(self.follow, self.child.follow)
-
-class Option(Wrapper):
- """This class represents an optional clause of the form [A]"""
- def setup(self, gen):
- Wrapper.setup(self, gen)
- if not self.accepts_epsilon:
- self.accepts_epsilon = 1
- gen.changed()
-
- def __str__(self):
- return '[ %s ]' % str(self.child)
-
- def output(self, gen, indent):
- if self.child.accepts_epsilon:
- print >>sys.stderr, 'Warning in rule', self.rule+': contents may be empty.'
- gen.write(indent, "if %s:\n" %
- gen.peek_test(self.first, self.child.first))
- self.child.output(gen, indent+INDENT)
-
- if gen.has_option('context-insensitive-scanner'):
- gen.write(indent, "if %s:\n" %
- gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
- gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
- repr(self.first))
-
-
-class Plus(Wrapper):
- """This class represents a 1-or-more repetition clause of the form A+"""
- def setup(self, gen):
- Wrapper.setup(self, gen)
- if self.accepts_epsilon != self.child.accepts_epsilon:
- self.accepts_epsilon = self.child.accepts_epsilon
- gen.changed()
-
- def __str__(self):
- return '%s+' % str(self.child)
-
- def update(self, gen):
- Wrapper.update(self, gen)
- gen.add_to(self.child.follow, self.child.first)
-
- def output(self, gen, indent):
- if self.child.accepts_epsilon:
- print >>sys.stderr, 'Warning in rule', self.rule+':'
- print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser may not work properly.'
- gen.write(indent, "while 1:\n")
- self.child.output(gen, indent+INDENT)
- union = self.first[:]
- gen.add_to(union, self.follow)
- gen.write(indent+INDENT, "if %s: break\n" %
- gen.not_peek_test(union, self.child.first))
-
- if gen.has_option('context-insensitive-scanner'):
- gen.write(indent, "if %s:\n" %
- gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
- gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
- repr(self.first))
-
-
-class Star(Wrapper):
- """This class represents a 0-or-more repetition clause of the form A*"""
- def setup(self, gen):
- Wrapper.setup(self, gen)
- if not self.accepts_epsilon:
- self.accepts_epsilon = 1
- gen.changed()
-
- def __str__(self):
- return '%s*' % str(self.child)
-
- def update(self, gen):
- Wrapper.update(self, gen)
- gen.add_to(self.child.follow, self.child.first)
-
- def output(self, gen, indent):
- if self.child.accepts_epsilon:
- print >>sys.stderr, 'Warning in rule', self.rule+':'
- print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser probably will not work properly.'
- gen.write(indent, "while %s:\n" %
- gen.peek_test(self.follow, self.child.first))
- self.child.output(gen, indent+INDENT)
-
- # TODO: need to generate tests like this in lots of rules
- if gen.has_option('context-insensitive-scanner'):
- gen.write(indent, "if %s:\n" %
- gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
- gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
- repr(self.first))
-
diff --git a/yapps/runtime.py b/yapps/runtime.py
deleted file mode 100644
index 5d9d1d6..0000000
--- a/yapps/runtime.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
-# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
-# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
-#
-# This version of the Yapps 2 Runtime can be distributed under the
-# terms of the MIT open source license, either found in the LICENSE file
-# included with the Yapps distribution
-# <http://theory.stanford.edu/~amitp/yapps/> or at
-# <http://www.opensource.org/licenses/mit-license.php>
-#
-
-"""Run time libraries needed to run parsers generated by Yapps.
-
-This module defines parse-time exception classes, a scanner class, a
-base class for parsers produced by Yapps, and a context class that
-keeps track of the parse stack.
-
-"""
-
-import sys, re
-
-MIN_WINDOW=4096
-# File lookup window
-
-class SyntaxError(Exception):
- """When we run into an unexpected token, this is the exception to use"""
- def __init__(self, pos=None, msg="Bad Token", context=None):
- Exception.__init__(self)
- self.pos = pos
- self.msg = msg
- self.context = context
-
- def __str__(self):
- if not self.pos: return 'SyntaxError'
- else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
-
-class NoMoreTokens(Exception):
- """Another exception object, for when we run out of tokens"""
- pass
-
-class Token(object):
- """Yapps token.
-
- This is a container for a scanned token.
- """
-
- def __init__(self, type,value, pos=None):
- """Initialize a token."""
- self.type = type
- self.value = value
- self.pos = pos
-
- def __repr__(self):
- output = '<%s: %s' % (self.type, repr(self.value))
- if self.pos:
- output += " @ "
- if self.pos[0]:
- output += "%s:" % self.pos[0]
- if self.pos[1]:
- output += "%d" % self.pos[1]
- if self.pos[2] is not None:
- output += ".%d" % self.pos[2]
- output += ">"
- return output
-
-in_name=0
-class Scanner(object):
- """Yapps scanner.
-
- The Yapps scanner can work in context sensitive or context
- insensitive modes. The token(i) method is used to retrieve the
- i-th token. It takes a restrict set that limits the set of tokens
- it is allowed to return. In context sensitive mode, this restrict
- set guides the scanner. In context insensitive mode, there is no
- restriction (the set is always the full set of tokens).
-
- """
-
- def __init__(self, patterns, ignore, input="",
- file=None,filename=None,stacked=False):
- """Initialize the scanner.
-
- Parameters:
- patterns : [(terminal, uncompiled regex), ...] or None
- ignore : {terminal:None, ...}
- input : string
-
- If patterns is None, we assume that the subclass has
- defined self.patterns : [(terminal, compiled regex), ...].
- Note that the patterns parameter expects uncompiled regexes,
- whereas the self.patterns field expects compiled regexes.
-
- The 'ignore' value is either None or a callable, which is called
- with the scanner and the to-be-ignored match object; this can
- be used for include file or comment handling.
- """
-
- if not filename:
- global in_name
- filename="<f.%d>" % in_name
- in_name += 1
-
- self.input = input
- self.ignore = ignore
- self.file = file
- self.filename = filename
- self.pos = 0
- self.del_pos = 0 # skipped
- self.line = 1
- self.del_line = 0 # skipped
- self.col = 0
- self.tokens = []
- self.stack = None
- self.stacked = stacked
-
- self.last_read_token = None
- self.last_token = None
- self.last_types = None
-
- if patterns is not None:
- # Compile the regex strings into regex objects
- self.patterns = []
- for terminal, regex in patterns:
- self.patterns.append( (terminal, re.compile(regex)) )
-
- def stack_input(self, input="", file=None, filename=None):
- """Temporarily parse from a second file."""
-
- # Already reading from somewhere else: Go on top of that, please.
- if self.stack:
- # autogenerate a recursion-level-identifying filename
- if not filename:
- filename = 1
- else:
- try:
- filename += 1
- except TypeError:
- pass
- # now pass off to the include file
- self.stack.stack_input(input,file,filename)
- else:
-
- try:
- filename += 0
- except TypeError:
- pass
- else:
- filename = "<str_%d>" % filename
-
-# self.stack = object.__new__(self.__class__)
-# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
-
- # Note that the pattern+ignore are added by the generated
- # scanner code
- self.stack = self.__class__(input,file,filename, stacked=True)
-
- def get_pos(self):
- """Return a file/line/char tuple."""
- if self.stack: return self.stack.get_pos()
-
- return (self.filename, self.line+self.del_line, self.col)
-
-# def __repr__(self):
-# """Print the last few tokens that have been scanned in"""
-# output = ''
-# for t in self.tokens:
-# output += '%s\n' % (repr(t),)
-# return output
-
- def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
- """Print the line of 'text' that includes position 'p',
- along with a second line with a single caret (^) at position p"""
-
- file,line,p = pos
- if file != self.filename:
- if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
- print >>out, "(%s: not in input buffer)" % file
- return
-
- text = self.input
- p += length-1 # starts at pos 1
-
- origline=line
- line -= self.del_line
- spos=0
- if line > 0:
- while 1:
- line = line - 1
- try:
- cr = text.index("\n",spos)
- except ValueError:
- if line:
- text = ""
- break
- if line == 0:
- text = text[spos:cr]
- break
- spos = cr+1
- else:
- print >>out, "(%s:%d not in input buffer)" % (file,origline)
- return
-
- # Now try printing part of the line
- text = text[max(p-80, 0):p+80]
- p = p - max(p-80, 0)
-
- # Strip to the left
- i = text[:p].rfind('\n')
- j = text[:p].rfind('\r')
- if i < 0 or (0 <= j < i): i = j
- if 0 <= i < p:
- p = p - i - 1
- text = text[i+1:]
-
- # Strip to the right
- i = text.find('\n', p)
- j = text.find('\r', p)
- if i < 0 or (0 <= j < i): i = j
- if i >= 0:
- text = text[:i]
-
- # Now shorten the text
- while len(text) > 70 and p > 60:
- # Cut off 10 chars
- text = "..." + text[10:]
- p = p - 7
-
- # Now print the string, along with an indicator
- print >>out, '> ',text
- print >>out, '> ',' '*p + '^'
-
- def grab_input(self):
- """Get more input if possible."""
- if not self.file: return
- if len(self.input) - self.pos >= MIN_WINDOW: return
-
- data = self.file.read(MIN_WINDOW)
- if data is None or data == "":
- self.file = None
-
- # Drop bytes from the start, if necessary.
- if self.pos > 2*MIN_WINDOW:
- self.del_pos += MIN_WINDOW
- self.del_line += self.input[:MIN_WINDOW].count("\n")
- self.pos -= MIN_WINDOW
- self.input = self.input[MIN_WINDOW:] + data
- else:
- self.input = self.input + data
-
- def getchar(self):
- """Return the next character."""
- self.grab_input()
-
- c = self.input[self.pos]
- self.pos += 1
- return c
-
- def token(self, restrict, context=None):
- """Scan for another token."""
-
- while 1:
- if self.stack:
- try:
- return self.stack.token(restrict, context)
- except StopIteration:
- self.stack = None
-
- # Keep looking for a token, ignoring any in self.ignore
- self.grab_input()
-
- # special handling for end-of-file
- if self.stacked and self.pos==len(self.input):
- raise StopIteration
-
- # Search the patterns for the longest match, with earlier
- # tokens in the list having preference
- best_match = -1
- best_pat = '(error)'
- best_m = None
- for p, regexp in self.patterns:
- # First check to see if we're ignoring this token
- if restrict and p not in restrict and p not in self.ignore:
- continue
- m = regexp.match(self.input, self.pos)
- if m and m.end()-m.start() > best_match:
- # We got a match that's better than the previous one
- best_pat = p
- best_match = m.end()-m.start()
- best_m = m
-
- # If we didn't find anything, raise an error
- if best_pat == '(error)' and best_match < 0:
- msg = 'Bad Token'
- if restrict:
- msg = 'Trying to find one of '+', '.join(restrict)
- raise SyntaxError(self.get_pos(), msg, context=context)
-
- ignore = best_pat in self.ignore
- value = self.input[self.pos:self.pos+best_match]
- if not ignore:
- tok=Token(type=best_pat, value=value, pos=self.get_pos())
-
- self.pos += best_match
-
- npos = value.rfind("\n")
- if npos > -1:
- self.col = best_match-npos
- self.line += value.count("\n")
- else:
- self.col += best_match
-
- # If we found something that isn't to be ignored, return it
- if not ignore:
- if len(self.tokens) >= 10:
- del self.tokens[0]
- self.tokens.append(tok)
- self.last_read_token = tok
- # print repr(tok)
- return tok
- else:
- ignore = self.ignore[best_pat]
- if ignore:
- ignore(self, best_m)
-
- def peek(self, *types, **kw):
- """Returns the token type for lookahead; if there are any args
- then the list of args is the set of token types to allow"""
- context = kw.get("context",None)
- if self.last_token is None:
- self.last_types = types
- self.last_token = self.token(types,context)
- elif self.last_types:
- for t in types:
- if t not in self.last_types:
- raise NotImplementedError("Unimplemented: restriction set changed")
- return self.last_token.type
-
- def scan(self, type, **kw):
- """Returns the matched text, and moves to the next token"""
- context = kw.get("context",None)
-
- if self.last_token is None:
- tok = self.token([type],context)
- else:
- if self.last_types and type not in self.last_types:
- raise NotImplementedError("Unimplemented: restriction set changed")
-
- tok = self.last_token
- self.last_token = None
- if tok.type != type:
- if not self.last_types: self.last_types=[]
- raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
- return tok.value
-
-class Parser(object):
- """Base class for Yapps-generated parsers.
-
- """
-
- def __init__(self, scanner):
- self._scanner = scanner
-
- def _stack(self, input="",file=None,filename=None):
- """Temporarily read from someplace else"""
- self._scanner.stack_input(input,file,filename)
- self._tok = None
-
- def _peek(self, *types, **kw):
- """Returns the token type for lookahead; if there are any args
- then the list of args is the set of token types to allow"""
- return self._scanner.peek(*types, **kw)
-
- def _scan(self, type, **kw):
- """Returns the matched text, and moves to the next token"""
- return self._scanner.scan(type, **kw)
-
-class Context(object):
- """Class to represent the parser's call stack.
-
- Every rule creates a Context that links to its parent rule. The
- contexts can be used for debugging.
-
- """
-
- def __init__(self, parent, scanner, rule, args=()):
- """Create a new context.
-
- Args:
- parent: Context object or None
- scanner: Scanner object
- rule: string (name of the rule)
- args: tuple listing parameters to the rule
-
- """
- self.parent = parent
- self.scanner = scanner
- self.rule = rule
- self.args = args
- while scanner.stack: scanner = scanner.stack
- self.token = scanner.last_read_token
-
- def __str__(self):
- output = ''
- if self.parent: output = str(self.parent) + ' > '
- output += self.rule
- return output
-
-def print_error(err, scanner, max_ctx=None):
- """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
- # NOTE: this function assumes 80 columns :-(
- # Figure out the line number
- pos = err.pos
- if not pos:
- pos = scanner.get_pos()
-
- file_name, line_number, column_number = pos
- print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
-
- scanner.print_line_with_pointer(pos)
-
- context = err.context
- token = None
- while context:
- print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
- if context.token:
- token = context.token
- if token:
- scanner.print_line_with_pointer(token.pos, length=len(token.value))
- context = context.parent
- if max_ctx:
- max_ctx = max_ctx-1
- if not max_ctx:
- break
-
-def wrap_error_reporter(parser, rule, *args,**kw):
- try:
- return getattr(parser, rule)(*args,**kw)
- except SyntaxError, e:
- print_error(e, parser._scanner)
- except NoMoreTokens:
- print >>sys.stderr, 'Could not complete parsing; stopped around here:'
- print >>sys.stderr, parser._scanner