1 files changed, 442 insertions, 0 deletions
diff --git a/yapps/runtime.py b/yapps/runtime.py
new file mode 100644
index 0000000..5d9d1d6
--- /dev/null
+++ b/yapps/runtime.py
@@ -0,0 +1,442 @@
+# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
+# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
+#
+# This version of the Yapps 2 Runtime can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Run time libraries needed to run parsers generated by Yapps.
+
+This module defines parse-time exception classes, a scanner class, a
+base class for parsers produced by Yapps, and a context class that
+keeps track of the parse stack.
+
+"""
+
+import sys, re
+
+MIN_WINDOW=4096
+# File lookup window
+
+class SyntaxError(Exception):
+	"""When we run into an unexpected token, this is the exception to use"""
+	def __init__(self, pos=None, msg="Bad Token", context=None):
+		Exception.__init__(self)
+		self.pos = pos
+		self.msg = msg
+		self.context = context
+		
+	def __str__(self):
+		if not self.pos: return 'SyntaxError'
+		else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
+
+class NoMoreTokens(Exception):
+	"""Another exception object, for when we run out of tokens"""
+	pass
+
+class Token(object):
+	"""Yapps token.
+
+	This is a container for a scanned token.
+	"""
+
+	def __init__(self, type,value, pos=None):
+		"""Initialize a token."""
+		self.type = type
+		self.value = value
+		self.pos = pos
+
+	def __repr__(self):
+		output = '<%s: %s' % (self.type, repr(self.value))
+		if self.pos:
+			output += " @ "
+			if self.pos[0]:
+				output += "%s:" % self.pos[0]
+			if self.pos[1]:
+				output += "%d" % self.pos[1]
+			if self.pos[2] is not None:
+				output += ".%d" % self.pos[2]
+		output += ">"
+		return output
+
+in_name=0
+class Scanner(object):
+	"""Yapps scanner.
+
+	The Yapps scanner can work in context sensitive or context
+	insensitive modes.  The token(i) method is used to retrieve the
+	i-th token.  It takes a restrict set that limits the set of tokens
+	it is allowed to return.  In context sensitive mode, this restrict
+	set guides the scanner.  In context insensitive mode, there is no
+	restriction (the set is always the full set of tokens).
+	
+	"""
+	
+	def __init__(self, patterns, ignore, input="",
+			file=None,filename=None,stacked=False):
+		"""Initialize the scanner.
+
+		Parameters:
+		  patterns : [(terminal, uncompiled regex), ...] or None
+		  ignore : {terminal:None, ...}
+		  input : string
+
+		If patterns is None, we assume that the subclass has
+		defined self.patterns : [(terminal, compiled regex), ...].
+		Note that the patterns parameter expects uncompiled regexes,
+		whereas the self.patterns field expects compiled regexes.
+
+		The 'ignore' value is either None or a callable, which is called
+		with the scanner and the to-be-ignored match object; this can
+		be used for include file or comment handling.
+		"""
+
+		if not filename:
+			global in_name
+			filename="<f.%d>" % in_name
+			in_name += 1
+
+		self.input = input
+		self.ignore = ignore
+		self.file = file
+		self.filename = filename
+		self.pos = 0
+		self.del_pos = 0 # skipped
+		self.line = 1
+		self.del_line = 0 # skipped
+		self.col = 0
+		self.tokens = []
+		self.stack = None
+		self.stacked = stacked
+		
+		self.last_read_token = None
+		self.last_token = None
+		self.last_types = None
+
+		if patterns is not None:
+			# Compile the regex strings into regex objects
+			self.patterns = []
+			for terminal, regex in patterns:
+				self.patterns.append( (terminal, re.compile(regex)) )
+
+	def stack_input(self, input="", file=None, filename=None):
+		"""Temporarily parse from a second file."""
+
+		# Already reading from somewhere else: Go on top of that, please.
+		if self.stack:
+			# autogenerate a recursion-level-identifying filename
+			if not filename:
+				filename = 1
+			else:
+				try:
+					filename += 1
+				except TypeError:
+					pass
+				# now pass off to the include file
+			self.stack.stack_input(input,file,filename)
+		else:
+
+			try:
+				filename += 0
+			except TypeError:
+				pass
+			else:
+				filename = "<str_%d>" % filename
+
+#			self.stack = object.__new__(self.__class__)
+#			Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
+
+			# Note that the pattern+ignore are added by the generated
+			# scanner code
+			self.stack = self.__class__(input,file,filename, stacked=True)
+
+	def get_pos(self):
+		"""Return a file/line/char tuple."""
+		if self.stack: return self.stack.get_pos()
+
+		return (self.filename, self.line+self.del_line, self.col)
+
+#	def __repr__(self):
+#		"""Print the last few tokens that have been scanned in"""
+#		output = ''
+#		for t in self.tokens:
+#			output += '%s\n' % (repr(t),)
+#		return output
+	
+	def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
+		"""Print the line of 'text' that includes position 'p',
+		along with a second line with a single caret (^) at position p"""
+
+		file,line,p = pos
+		if file != self.filename:
+			if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
+			print >>out, "(%s: not in input buffer)" % file
+			return
+
+		text = self.input
+		p += length-1 # starts at pos 1
+
+		origline=line
+		line -= self.del_line
+		spos=0
+		if line > 0:
+			while 1:
+				line = line - 1
+				try:
+					cr = text.index("\n",spos)
+				except ValueError:
+					if line:
+						text = ""
+					break
+				if line == 0:
+					text = text[spos:cr]
+					break
+				spos = cr+1
+		else:
+			print >>out, "(%s:%d not in input buffer)" % (file,origline)
+			return
+
+		# Now try printing part of the line
+		text = text[max(p-80, 0):p+80]
+		p = p - max(p-80, 0)
+
+		# Strip to the left
+		i = text[:p].rfind('\n')
+		j = text[:p].rfind('\r')
+		if i < 0 or (0 <= j < i): i = j
+		if 0 <= i < p:
+			p = p - i - 1
+			text = text[i+1:]
+
+		# Strip to the right
+		i = text.find('\n', p)
+		j = text.find('\r', p)
+		if i < 0 or (0 <= j < i): i = j
+		if i >= 0:
+			text = text[:i]
+
+		# Now shorten the text
+		while len(text) > 70 and p > 60:
+			# Cut off 10 chars
+			text = "..." + text[10:]
+			p = p - 7
+
+		# Now print the string, along with an indicator
+		print >>out, '> ',text
+		print >>out, '> ',' '*p + '^'
+	
+	def grab_input(self):
+		"""Get more input if possible."""
+		if not self.file: return
+		if len(self.input) - self.pos >= MIN_WINDOW: return
+
+		data = self.file.read(MIN_WINDOW)
+		if data is None or data == "":
+			self.file = None
+
+		# Drop bytes from the start, if necessary.
+		if self.pos > 2*MIN_WINDOW:
+			self.del_pos += MIN_WINDOW
+			self.del_line += self.input[:MIN_WINDOW].count("\n")
+			self.pos -= MIN_WINDOW
+			self.input = self.input[MIN_WINDOW:] + data
+		else:
+			self.input = self.input + data
+
+	def getchar(self):
+		"""Return the next character."""
+		self.grab_input()
+
+		c = self.input[self.pos]
+		self.pos += 1
+		return c
+
+	def token(self, restrict, context=None):
+		"""Scan for another token."""
+
+		while 1:
+			if self.stack:
+				try:
+					return self.stack.token(restrict, context)
+				except StopIteration:
+					self.stack = None
+
+		# Keep looking for a token, ignoring any in self.ignore
+			self.grab_input()
+
+			# special handling for end-of-file
+			if self.stacked and self.pos==len(self.input):
+				raise StopIteration
+
+			# Search the patterns for the longest match, with earlier
+			# tokens in the list having preference
+			best_match = -1
+			best_pat = '(error)'
+			best_m = None
+			for p, regexp in self.patterns:
+				# First check to see if we're ignoring this token
+				if restrict and p not in restrict and p not in self.ignore:
+					continue
+				m = regexp.match(self.input, self.pos)
+				if m and m.end()-m.start() > best_match:
+					# We got a match that's better than the previous one
+					best_pat = p
+					best_match = m.end()-m.start()
+					best_m = m
+					
+			# If we didn't find anything, raise an error
+			if best_pat == '(error)' and best_match < 0:
+				msg = 'Bad Token'
+				if restrict:
+					msg = 'Trying to find one of '+', '.join(restrict)
+				raise SyntaxError(self.get_pos(), msg, context=context)
+
+			ignore = best_pat in self.ignore
+			value = self.input[self.pos:self.pos+best_match]
+			if not ignore:
+				tok=Token(type=best_pat, value=value, pos=self.get_pos())
+
+			self.pos += best_match
+
+			npos = value.rfind("\n")
+			if npos > -1:
+				self.col = best_match-npos
+				self.line += value.count("\n")
+			else:
+				self.col += best_match
+
+			# If we found something that isn't to be ignored, return it
+			if not ignore:
+				if len(self.tokens) >= 10:
+					del self.tokens[0]
+				self.tokens.append(tok)
+				self.last_read_token = tok
+				# print repr(tok)
+				return tok
+			else:
+				ignore = self.ignore[best_pat]
+				if ignore:
+					ignore(self, best_m)
+
+	def peek(self, *types, **kw):
+		"""Returns the token type for lookahead; if there are any args
+		then the list of args is the set of token types to allow"""
+		context = kw.get("context",None)
+		if self.last_token is None:
+			self.last_types = types
+			self.last_token = self.token(types,context)
+		elif self.last_types:
+			for t in types:
+				if t not in self.last_types:
+					raise NotImplementedError("Unimplemented: restriction set changed")
+		return self.last_token.type
+		
+	def scan(self, type, **kw):
+		"""Returns the matched text, and moves to the next token"""
+		context = kw.get("context",None)
+
+		if self.last_token is None:
+			tok = self.token([type],context)
+		else:
+			if self.last_types and type not in self.last_types:
+				raise NotImplementedError("Unimplemented: restriction set changed")
+
+			tok = self.last_token
+			self.last_token = None
+		if tok.type != type:
+			if not self.last_types: self.last_types=[]
+			raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
+		return tok.value
+
+class Parser(object):
+	"""Base class for Yapps-generated parsers.
+
+	"""
+	
+	def __init__(self, scanner):
+		self._scanner = scanner
+		
+	def _stack(self, input="",file=None,filename=None):
+		"""Temporarily read from someplace else"""
+		self._scanner.stack_input(input,file,filename)
+		self._tok = None
+
+	def _peek(self, *types, **kw):
+		"""Returns the token type for lookahead; if there are any args
+		then the list of args is the set of token types to allow"""
+		return self._scanner.peek(*types, **kw)
+		
+	def _scan(self, type, **kw):
+		"""Returns the matched text, and moves to the next token"""
+		return self._scanner.scan(type, **kw)
+
+class Context(object):
+	"""Class to represent the parser's call stack.
+
+	Every rule creates a Context that links to its parent rule.  The
+	contexts can be used for debugging.
+
+	"""
+	
+	def __init__(self, parent, scanner, rule, args=()):
+		"""Create a new context.
+
+		Args:
+		parent: Context object or None
+		scanner: Scanner object
+		rule: string (name of the rule)
+		args: tuple listing parameters to the rule
+
+		"""
+		self.parent = parent
+		self.scanner = scanner
+		self.rule = rule
+		self.args = args
+		while scanner.stack: scanner = scanner.stack
+		self.token = scanner.last_read_token
+
+	def __str__(self):
+		output = ''
+		if self.parent: output = str(self.parent) + ' > '
+		output += self.rule
+		return output
+	
+def print_error(err, scanner, max_ctx=None):
+	"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
+	# NOTE: this function assumes 80 columns :-(
+	# Figure out the line number
+	pos = err.pos
+	if not pos:
+		pos = scanner.get_pos()
+
+	file_name, line_number, column_number = pos
+	print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
+
+	scanner.print_line_with_pointer(pos)
+		
+	context = err.context
+	token = None
+	while context:
+		print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
+		if context.token:
+			token = context.token
+		if token:
+			scanner.print_line_with_pointer(token.pos, length=len(token.value))
+		context = context.parent
+		if max_ctx:
+			max_ctx = max_ctx-1
+			if not max_ctx:
+				break
+
+def wrap_error_reporter(parser, rule, *args,**kw):
+	try:
+		return getattr(parser, rule)(*args,**kw)
+	except SyntaxError, e:
+		print_error(e, parser._scanner)
+	except NoMoreTokens:
+		print >>sys.stderr, 'Could not complete parsing; stopped around here:'
+		print >>sys.stderr, parser._scanner