Created
May 30, 2015 15:35
-
-
Save fbwright/68a03126dd00740b7ae9 to your computer and use it in GitHub Desktop.
Recursive descent parser and interpreter/(Python|Forth) compiler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import print_function, division | |
import sys, re | |
if sys.version_info.major < 3: | |
input = raw_input | |
#I wonder... how difficult would be writing a parser that takes well-formed | |
#BNF in input and outputs a recursive descent parser based on that grammar? | |
#I'd have to recognize, name and put into an enum tokens ('...'), create a | |
#dictionary of identifiers (<...>), and each identifier will be assigned | |
#some other identifiers/tokens, separared by spaces, perhaps inside () or [] | |
#or followed by * | |
#The only prebuilt part would be the atom - for numbers, strings and ids | |
def tabify(lines, tabs='\t'): | |
s = "" | |
for line in lines.split("\n"): | |
s = "%s\n%s%s" % (s, tabs, line) | |
return s | |
def is_string(s): | |
return s.startswith("str(") or s.startswith('"') and s.endswith('"') | |
def to_number(number): | |
try: | |
return int(number) | |
except ValueError: | |
try: | |
return float(number) | |
except ValueError: | |
raise ValueError("Not a number") | |
def parse_number(number): | |
try: | |
return to_number(number) | |
except ValueError: | |
pass | |
if number.endswith(('i', 'I')): | |
suffix = number[-2] | |
number = to_number(number[:-2]) | |
number *= { | |
'': 1, | |
'K': 2**10, | |
'k': 2**10, | |
'M': 2**20, | |
'G': 2**30, | |
'T': 2**40, | |
'P': 2**50, | |
'E': 2**60, | |
'Z': 2**70, | |
'Y': 2**80, | |
}[suffix] | |
return number | |
suffix = number[-1] | |
number = to_number(number[:-1]) | |
number *= { | |
'y': 10**-24, | |
'z': 10**-21, | |
'a': 10**-18, | |
'f': 10**-15, | |
'p': 10**-12, | |
'n': 10**-9, | |
'u': 10**-6, | |
'm': 10**-3, | |
'c': 10**-2, | |
'd': 10**-1, | |
'': 1, | |
'da': 10, | |
'D': 10, | |
'h': 100, | |
'H': 100, | |
'k': 10**3, | |
'K': 10**3, | |
'M': 10**6, | |
'G': 10**9, | |
'T': 10**12, | |
'P': 10**15, | |
'E': 10**18, | |
'Z': 10**21, | |
'Y': 10**24 | |
}[suffix] | |
return number | |
class Enum(tuple): __getattr__ = tuple.index | |
Tokens = Enum("NUMBER STRING IDENTIFIER ASSIGN SMART_ASSIGN COLON SEMICOLON COMMA DOT ADD SUB MUL DIV MOD POW LEFT_SHIFT RIGHT_SHIFT LEFT_ROUND_PAREN RIGHT_ROUND_PAREN NOT AND NAND OR NOR XOR XNOR BIT_NOT BIT_AND BIT_NAND BIT_OR BIT_NOR BIT_XOR BIT_XNOR BEGIN END WRITE READ IF THEN ELSE WHILE DO EQ NEQ GT GTE LT LTE QUESTION_MARK ELVIS TRUE FALSE NULL COMMENT NEWLINE".split()) | |
class Token(object): | |
__slots__ = ["type", "value", "pos"] | |
def __init__(self, value, type, pos=0): | |
self.type = type | |
self.value = value | |
self.pos = pos | |
def __repr__(self): | |
return "(%s '%s' @ %s)" % (Tokens[self.type], self.value, self.pos) | |
class Node(object): | |
__slots__ = ["operation", "args", "pos"] | |
def __init__(self, operation, *args): | |
self.operation = operation | |
if type(args[0]) in (list, tuple): | |
args = args[0] | |
self.args = args | |
#self.pos = pos | |
def __repr__(self): | |
if self.operation is "NUM": | |
return "%s" % self.args[0] | |
return "(%s %s)" % (self.operation, " ".join(map(str,self.args))) | |
def eval(self, env, debug=False): | |
raise Exception("%s"%repr(self)) | |
class Block(Node): | |
def eval(self, env, debug=False): | |
for statement in self.args: | |
statement.eval(env, debug) | |
def compile(self, debug=False): | |
return "\n".join(map(lambda i: i.compile(debug), self.args)) | |
def compile_forth(self, debug=False): | |
return "\n".join(map(lambda i: i.compile_forth(debug), self.args)) | |
class Comment(Node): | |
def eval(self, env, debug=False): | |
return | |
def compile(self, debug=False): | |
return self.args[0] | |
def compile_forth(self, debug=False): | |
return "( {0} )".format(self.args[0]) | |
class Assignment(Node): | |
def eval(self, env, debug=False): | |
name, expression = self.args | |
value = expression.eval(env, debug) | |
env[name] = value | |
def compile(self, debug=False): | |
name, expression = self.args | |
return "%s = %s" % (name, expression.compile(debug)) | |
def compile_forth(self, debug=False): | |
name, expression = self.args | |
return "%s\nPUSH %s\nSTORE" % (expression.compile_forth(debug), name) | |
class Number(Node): | |
def eval(self, env, debug=False): | |
return self.args[0] | |
def compile(self, debug=False): | |
return "%s" % self.args[0] | |
def compile_forth(self, debug=False): | |
return "PUSH %s" % self.args[0] | |
class String(Node): | |
def eval(self, env, debug=False): | |
return self.args[0].strip('"') | |
def compile(self, debug=False): | |
return "\"%s\"" % self.args[0].strip('"') | |
def compile_forth(self, debug=False): | |
return '"%s"' % self.args[0].strip('"') | |
class Variable(Node): | |
def eval(self, env, debug=False): | |
try: | |
return env[self.args[0]] | |
except KeyError: | |
raise RuntimeError("Variable %s doesn't exists" % self.args[0]) | |
def compile(self, debug=False): | |
return "%s" % self.args[0] | |
def compile_forth(self, debug=False): | |
return "PUSH %s\nLOAD" % self.args[0] | |
class UnaryOperation(Node): | |
def _getop(self): | |
try: | |
return { | |
'+': '+{0}', | |
'-': '-{0}', | |
'~': '~{0}', | |
'NOT': 'not {0}', | |
}[self.operation.upper()] | |
except KeyError: | |
raise SyntaxError("Unknown operation %s" % self.operation) | |
def _getop_forth(self): | |
try: | |
return { | |
'+': '{0}', | |
'-': '{0}\nNEG', | |
'~': '{0}\nNOT', | |
'NOT': '{0} =0', | |
}[self.operation.upper()] | |
except KeyError: | |
raise SyntaxError("Unknown operation %s" % self.operation) | |
def eval(self, env, debug=False): | |
operation, value = self.operation, self.args[0] | |
value = value.eval(env, debug) | |
temp = "lambda a: %s" % self._getop().format('a') | |
value = eval(temp)(value) | |
return value | |
def compile(self, debug=False): | |
operation, value = self.operation, self.args[0] | |
value = value.compile(debug) | |
return self._getop().format(value) | |
def compile_forth(self, debug=False): | |
operation, value = self.operation, self.args[0] | |
value = value.compile_forth(debug) | |
return self._getop_forth().format(value) | |
class BinaryOperation(Node): | |
def _getop(self): | |
try: | |
return { | |
'+': '{0} + {1}', | |
'-': '{0} - {1}', | |
'*': '{0} * {1}', | |
'/': '{0} / {1}', | |
'%': '{0} % {1}', | |
'**': '{0} ** {1}', | |
'<<': '{0} << {1}', | |
'>>': '{0} >> {1}', | |
'&': '{0} & {1}', | |
'~&': '~({0} & {1})', | |
'|': '{0} | {1}', | |
'~|': '~({0} | {1})', | |
'^': '{0} ^ {1}', | |
'~^': '~({0} ^ {1})', | |
'AND': '{0} and {1}', | |
'NAND': 'not ({0} and {1})', | |
'OR': '{0} or {1}', | |
'NOR': 'not ({0} or {1})', | |
'XOR': '({0} or {1}) and not ({0} and {1})', | |
'XNOR': 'not (({0} or {1}) and not ({0} and {1}))', | |
'==': '{0} == {1}', | |
'<=': '{0} <= {1}', | |
'>=': '{0} >= {1}', | |
'<': '{0} < {1}', | |
'>': '{0} > {1}', | |
'!=': '{0} != {1}', | |
'><': '{0} != {1}', | |
'<>': '{0} != {1}', | |
'?:': '{0} if ( {0} is not None ) else {1}', | |
}[self.operation.upper()] | |
except KeyError: | |
raise SyntaxError("Unknown operation %s" % self.operation) | |
def _getop_forth(self): | |
try: | |
return { | |
'+': '{0}\n{1}\nADD',# | |
'-': '{0}\n{1}\nSUB',# | |
'*': '{0} {1} *',# | |
'/': '{0} {1} /',# | |
'%': '{0} {1} mod',# | |
'**': '{0} {1} f**',# | |
'<<': '{0} {1} 2*',# | |
'>>': '{0} {1} 2/',# | |
'&': '{0} {1} and',# | |
'~&': '{0} {1} and =0',# | |
'|': '{0} {1} or',# | |
'~|': '{0} {1} or =0',# | |
'^': '{0} {1} xor',# | |
'~^': '{0} {1} xor =0',# | |
'AND': '{0} {1} and',# | |
'NAND': '{0} {1} and =0',# | |
'OR': '{0} {1} or',# | |
'NOR': '{0} {1} or =0',# | |
'XOR': '{0} {1} xor',# | |
'XNOR': '{0} {1} xor =0',# | |
'==': '{0} {1} =', | |
'<=': '{0} {1} <=', | |
'>=': '{0} {1} >=', | |
'<': '{0} {1} <', | |
'>': '{0} {1} >', | |
'!=': '{0} {1} <>', | |
'><': '{0} {1} <>', | |
'<>': '{0} {1} <>', | |
'?:': '{0} if ( {0} is not None ) else {1}',#TODO | |
}[self.operation.upper()] | |
except KeyError: | |
raise SyntaxError("Unknown operation %s" % self.operation) | |
def eval(self, env, debug=False): | |
#DONE: this needs some work. | |
# It won't produce correct python code for some operators (namely, | |
# AND-OR-XOR [as they're uppercase - and besides, xor doesn't | |
# exist in python], <>->< and the elvis operator (?:) | |
#And, yes, I used some sorcery to get it right - but this works. | |
#Still needs work - won't work for python code | |
#This should work... mwha-ha-ha-ha! | |
operation, left, right = self.operation, self.args[0], self.args[1] | |
left = left.eval(env, debug) | |
right = right.eval(env, debug) | |
if operation == '+' and (type(left) is str or type(right) is str): | |
left = str(left) | |
right = str(right) | |
temp = "lambda a,b: %s" % self._getop().format('a', 'b') | |
value = eval(temp)(left, right) | |
return value | |
def compile(self, debug=False): | |
operation, left, right = self.operation, self.args[0], self.args[1] | |
left = left.compile(debug) | |
right = right.compile(debug) | |
if operation == '+' and ( is_string(left) or is_string(right) ): | |
left = "str(%s)" % left | |
right = "str(%s)" % right | |
return self._getop().format(left, right) | |
def compile_forth(self, debug=False): | |
operation, left, right = self.operation, self.args[0], self.args[1] | |
left = left.compile_forth(debug) | |
right = right.compile_forth(debug) | |
return self._getop_forth().format(left, right) | |
class TernaryOperation(Node): | |
def eval(self, env, debug=False): | |
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2] | |
test = test.eval(env, debug) | |
if_true = if_true.eval(env, debug) | |
if_false = if_false.eval(env, debug) | |
try: | |
value = { | |
'?': lambda a,b,c: b if a else c, | |
}[operation](test, if_true, if_false) | |
except KeyError: | |
raise SyntaxError("Unknown operation %s" % operation) | |
return value | |
def compile(self, debug=False): | |
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2] | |
test = test.compile(debug) | |
if_true = if_true.compile(debug) | |
if_false = if_false.compile(debug) | |
return "%s if %s else %s" % (if_true, test, if_false) | |
def compile_forth(self, debug=False): | |
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2] | |
test = test.compile_forth(debug) | |
if_true = if_true.compile_forth(debug) | |
if_false = if_false.compile_forth(debug) | |
return "{0} if {1} else {2} then".format(test, if_true, if_false) | |
class Write(Node): | |
def eval(self, env, debug=False): | |
if debug: | |
return | |
print(" ".join(map(lambda i: str(i.eval(env)), self.args))) | |
def compile(self, debug=False): | |
if debug: | |
return "pass" | |
return "print(%s)" % ",".join(map(lambda i: i.compile(), self.args)) | |
def compile_forth(self, debug=False): | |
if debug: | |
return "0 drop ( NOP in place of write )" | |
s = "" | |
for item in self.args: | |
if isinstance(item, String): | |
s = "{0} .\" {1}\"".format(s, item.compile_forth(debug).strip('"')) | |
elif isinstance(item, Variable) or isinstance(item, Number): | |
s = "{0} {1}\nCALL print".format(s, item.compile_forth(debug)) | |
else: | |
s = "{0} {1}\nCALL print".format(s, item.compile_forth(debug)) | |
return s | |
return '." %s" cr' % "".join(map(lambda i: str(i.compile_forth()).strip('"'), self.args)) | |
class If(Node): | |
def eval(self, env, debug=False): | |
if len(self.args) == 2: | |
condition, if_true = self.args | |
else: | |
condition, if_true, if_false = self.args | |
if condition.eval(env, debug): | |
if_true.eval(env, debug) | |
elif if_false: | |
if_false.eval(env, debug) | |
def compile(self, debug=False): | |
if len(self.args) == 2: | |
condition, if_true = self.args | |
else: | |
condition, if_true, if_false = self.args | |
s = "if %s:" % condition.compile(debug) | |
s = s + tabify(if_true.compile(debug)) | |
if if_false: | |
s = s + "\nelse:" | |
s = s + tabify(if_false.compile(debug)) | |
return s | |
def compile_forth(self, debug=False): | |
if len(self.args) == 2: | |
condition, if_true = self.args | |
else: | |
condition, if_true, if_false = self.args | |
return "{0} if {1} {2} then".format( | |
condition.compile_forth(debug), | |
if_true.compile_forth(debug), | |
"else {0}".format(if_false.compile_forth(debug)) if if_false else "") | |
class While(Node): | |
def eval(self, env, debug=False): | |
condition, body = self.args | |
while condition.eval(env, debug): | |
body.eval(env, debug) | |
def compile(self, debug=False): | |
condition, body = self.args | |
s = "while %s:" % condition.compile(debug) | |
s = s + tabify(body.compile(debug)) | |
return s | |
token_expressions = [ | |
(r'[ \n\t]+', None), #Discard whitespaces | |
(r'//[^\n]*', None), #Line comments, C-style - discarded | |
(r'#[^\n]*', None), #Line comments, Python-style | |
(r'#\[.*#\]', Tokens.COMMENT), #New-style block comment | |
(r'(\+|-|\*|/|%|\*\*|&|\||^|<<|>>|\?)=', Tokens.SMART_ASSIGN), | |
(r'\?:', Tokens.ELVIS), | |
(r'\?', Tokens.QUESTION_MARK), | |
(r';', Tokens.SEMICOLON), | |
(r',', Tokens.COMMA), | |
(r'\.', Tokens.DOT), | |
(r'=', Tokens.ASSIGN), | |
(r':', Tokens.COLON), | |
(r'\(', Tokens.LEFT_ROUND_PAREN), | |
(r'\)', Tokens.RIGHT_ROUND_PAREN), | |
(r'\+', Tokens.ADD), #Mathematical operators | |
(r'-', Tokens.SUB), | |
(r'\*\*', Tokens.POW), | |
(r'\*', Tokens.MUL), | |
(r'/', Tokens.DIV), | |
(r'%', Tokens.MOD), | |
(r'<<', Tokens.LEFT_SHIFT), | |
(r'>>', Tokens.RIGHT_SHIFT), | |
(r'~&', Tokens.BIT_NAND), #Bitwise operators | |
(r'~\|', Tokens.BIT_NOR), | |
(r'~\^', Tokens.BIT_XNOR), | |
(r'~', Tokens.BIT_NOT), | |
(r'&', Tokens.BIT_AND), | |
(r'\|', Tokens.BIT_OR), | |
(r'\^', Tokens.BIT_XOR), | |
(r'not', Tokens.NOT), #Boolean operators | |
(r'nand', Tokens.NAND), | |
(r'and', Tokens.AND), | |
(r'or', Tokens.OR), | |
(r'nor', Tokens.NOR), | |
(r'xor', Tokens.XOR), | |
(r'xnor', Tokens.XNOR), | |
(r'==', Tokens.EQ), #Comparisons | |
(r'<>|><|!=', Tokens.NEQ), | |
(r'>=', Tokens.GTE), | |
(r'<=', Tokens.LTE), | |
(r'>', Tokens.GT), | |
(r'<', Tokens.LT), | |
(r'read', Tokens.READ), #Statements | |
(r'write', Tokens.WRITE), | |
(r'if(?![a-z_])', Tokens.IF), | |
(r'then(?![a-z_])', Tokens.THEN), | |
(r'else(?![a-z_])', Tokens.ELSE), | |
(r'while(?![a-z_])', Tokens.WHILE), | |
(r'do(?![a-z_])', Tokens.DO), | |
(r'begin', Tokens.BEGIN), | |
(r'end', Tokens.END), | |
(r'true(?![a-z_])', Tokens.TRUE), | |
(r'false(?![a-z_])', Tokens.FALSE), | |
(r'null(?![a-z_])', Tokens.NULL), | |
(r'[+-]?[0-9]+(\.[0-9]*)?(((da)|(d(?!a))|[cmunpfazyhDH])(?!i)|[kKMGTPEZY]i?|e[+-]?[0-9]+)?', Tokens.NUMBER), | |
(r'"[^"\n]*"', Tokens.STRING), | |
(r"'[^'\n]*'", Tokens.STRING), | |
(r"\n", Tokens.NEWLINE), | |
(r'[A-Za-z_][A-Za-z0-9_]*', Tokens.IDENTIFIER), | |
] | |
def tokenize(text): | |
position = 0 | |
while position < len(text): | |
match = None | |
for token_expression in token_expressions: | |
pattern, tag = token_expression | |
regex = re.compile(pattern, re.I) | |
match = regex.match(text, position) | |
if match: | |
matched_text = match.group(0) | |
if tag is not None: | |
token = Token(matched_text, tag, pos=position) | |
yield token | |
break | |
if match is None: | |
raise SyntaxError("Illegal character %r @ %s" % (text[position], position)) | |
else: | |
position = match.end(0) | |
#TODO: I could use function decorators to write shorter code... | |
# or perhaps function factories. For example, and_expr, xor_expr, or_expr | |
# are almost identical one to the other | |
class TreeBuilder(object): | |
def parse(self, text, _who_you_gonna_call = None): | |
self.tokens = tokenize(text) | |
self.token, self.next_token = None, None | |
self._advance() | |
if _who_you_gonna_call is None: | |
_who_you_gonna_call = self.block | |
return _who_you_gonna_call() | |
#self.statement()#self.expression()#self.block() | |
def _advance(self): | |
self.token, self.next_token = self.next_token, next(self.tokens, None) | |
def _accept(self, *token_types): | |
if self.next_token is not None and (self.next_token.type in token_types | |
or self.next_token.type is Tokens.COMMENT): | |
self._advance() | |
return True | |
return False | |
def _expect(self, *token_types): | |
if not self._accept(*token_types): | |
e=", ".join(map(lambda i:Tokens[i], token_types)) | |
raise SyntaxError("{0}: expected {1}".format(self.token, e))#Tokens[token_types]) | |
def atom(self): | |
"<atom> ::= <identifier> | <literal> | <enclosure>" | |
if self._accept(Tokens.IDENTIFIER): | |
value = self.token.value | |
return Variable("VAR", value) | |
elif self._accept(Tokens.NUMBER): | |
value = parse_number(self.token.value) | |
return Number("NUM", value) | |
elif self._accept(Tokens.STRING): | |
value = self.token.value.strip("'\"") | |
return String("STR", value) | |
elif self._accept(Tokens.TRUE, Tokens.FALSE, Tokens.NULL): | |
value = { | |
Tokens.TRUE: True, | |
Tokens.FALSE: False, | |
Tokens.NULL: None, | |
}[self.token.type] | |
return Number("NUM", value) | |
elif self._accept(Tokens.LEFT_ROUND_PAREN): | |
value = self.expression() | |
self._expect(Tokens.RIGHT_ROUND_PAREN) | |
return value | |
elif self._accept(Tokens.COMMENT): | |
return Comment("###", self.token.value) | |
raise SyntaxError("{0}: expected IDENTIFIER, NUMBER, STRING or LEFT_ROUND_PAREN".format(self.token)) | |
def primary(self): | |
"<primary> ::= <atom>" | |
return self.atom() | |
def power(self): | |
"<power> ::= <primary> ['**' <unary_expr>]" | |
value = self.primary() | |
if self._accept(Tokens.POW): | |
right = self.unary_expr() | |
return BinaryOperation('**', value, right) | |
return value | |
def unary_expr(self): | |
"<unary_expr> ::= <power> | ('+'|'-'|'~') <unary_expr>" | |
if self._accept(Tokens.ADD, Tokens.SUB, Tokens.BIT_NOT): | |
operation = self.token.value | |
value = self.unary_expr() | |
return UnaryOperation(operation, value) | |
return self.power() | |
def mul_expr(self): | |
"<mul_expr> ::= <unary_expr> [ ('*'|'/'|'%') <mul_expr> ]" | |
value = self.unary_expr() | |
if self._accept(Tokens.MUL, Tokens.DIV, Tokens.MOD): | |
operation = self.token.value | |
right = self.mul_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def add_expr(self): | |
"<add_expr> ::= <mul_expr> [ ('+'|'-') <add_expr> ]" | |
value = self.mul_expr() | |
if self._accept(Tokens.ADD, Tokens.SUB): | |
operation = self.token.value | |
right = self.add_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def shift_expr(self): | |
"<shift_expr> ::= <add_expr> [ ('<<'|'>>') <shift_expr> ]" | |
value = self.add_expr() | |
if self._accept(Tokens.LEFT_SHIFT, Tokens.RIGHT_SHIFT): | |
operation = self.token.value | |
right = self.shift_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def and_expr(self): | |
"<and_expr> ::= <shift_expr> [ '&' <and_expr> ]" | |
value = self.shift_expr() | |
if self._accept(Tokens.BIT_AND, Tokens.BIT_NAND): | |
operation = self.token.value | |
right = self.add_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def xor_expr(self): | |
"<xor_expr> ::= <and_expr> [ '^' <xor_expr> ]" | |
value = self.and_expr() | |
if self._accept(Tokens.BIT_XOR, Tokens.BIT_XNOR): | |
operation = self.token.value | |
right = self.xor_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def or_expr(self): | |
"<or_expr> ::= <xor_expr> [ '|' <or_expr> ]" | |
value = self.xor_expr() | |
if self._accept(Tokens.BIT_OR, Tokens.BIT_NOR, Tokens.ELVIS): | |
operation = self.token.value | |
right = self.or_expr() | |
return BinaryOperation(operation, value, right) | |
return value | |
def comparison(self): | |
"<comparison> ::= <or_expr> ( <comp_operator> <or_expr> )*" | |
"<comp_operator> ::= '<' | '>' | '<=' | '>=' | '==' | '<>' | '><'" | |
value = self.or_expr() | |
while self._accept(Tokens.EQ, Tokens.NEQ, Tokens.GT, Tokens.GTE, Tokens.LT, Tokens.LTE): | |
operation = self.token.value | |
right = self.or_expr() | |
value = BinaryOperation(operation, value, right) | |
return value | |
def or_test(self): | |
"<or_test> ::= <xor_test> [ 'or' | 'nor' <or_test> ]" | |
value = self.xor_test() | |
if self._accept(Tokens.OR, Tokens.NOR): | |
operation = self.token.value | |
right = self.or_test() | |
return BinaryOperation(operation, value, right) | |
return value | |
def xor_test(self): | |
"<xor_test> ::= <and_test> [ 'xor' | 'xnor' <xor_test> ]" | |
value = self.and_test() | |
if self._accept(Tokens.XOR, Tokens.XNOR): | |
operation = self.token.value | |
right = self.xor_test() | |
return BinaryOperation(operation, value, right) | |
return value | |
def and_test(self): | |
"<and_test> ::= <not_test> [ 'and' | 'nand' <and_test> ]" | |
value = self.not_test() | |
if self._accept(Tokens.AND, Tokens.NAND): | |
operation = self.token.value | |
right = self.and_test() | |
return BinaryOperation(operation, value, right) | |
return value | |
def not_test(self): | |
"<not_test> ::= <comparison> | 'not' <not_test>" | |
if self._accept(Tokens.NOT): | |
value = self.not_test() | |
return UnaryOperation("NOT", value) | |
return self.comparison() | |
def conditional_expression(self): | |
"<conditional_expression> ::= <or_test> ('?' <or_test> ':' <expression>)" | |
value = self.or_test() | |
if self._accept(Tokens.QUESTION_MARK): | |
test = self.or_test() | |
self._expect(Tokens.COLON) | |
if_false = self.expression() | |
return TernaryOperation("?", test, value, if_false) | |
return value | |
def expression(self): | |
"<expression> ::= <conditional_expression>" | |
return self.conditional_expression() | |
def expression_list(self): | |
"<expression_list> ::= <expression> ( ',' <expression> )*" | |
paren = self._accept(Tokens.LEFT_ROUND_PAREN) | |
value = [self.expression()] | |
while self._accept(Tokens.COMMA): | |
value.append(self.expression()) | |
if paren: | |
self._expect(Tokens.RIGHT_ROUND_PAREN) | |
return value | |
def assignment(self): | |
"<assignment> ::= <identifier> <assign_operator> <expression>" | |
"<assign_operator> ::= ':=' | '+=' | '-=' | '*=' | '/=' | '%=' | '**=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '?='" | |
if self.token.type is not Tokens.IDENTIFIER \ | |
and not self._accept(Tokens.IDENTIFIER): | |
raise SyntaxError("{0}: expected IDENTIFIER".format(self.token)) | |
variable = self.token.value | |
self._expect(Tokens.ASSIGN, Tokens.SMART_ASSIGN) | |
assign_type, assign_literal = self.token.type, self.token.value | |
value = self.expression() | |
if assign_type is Tokens.SMART_ASSIGN: | |
var_ = Variable("VAR", variable) | |
op_type = assign_literal[:-1] | |
op_type = '?:' if op_type=='?' else op_type | |
value = BinaryOperation(op_type, var_, value) | |
return Assignment("SET", variable, value) | |
def statement(self): | |
if self._accept(Tokens.IDENTIFIER, Tokens.IF, Tokens.WHILE, Tokens.WRITE): | |
value = { | |
Tokens.IDENTIFIER: self.assignment, | |
Tokens.IF: self.if_block, | |
Tokens.WHILE: self.while_block, | |
Tokens.WRITE: self.write_statement, | |
}[self.token.type]() | |
return value | |
def expr_or_statement(self): | |
#I'm approaching this the wrong way... | |
#There's too much code here | |
if self._accept(Tokens.IF, Tokens.WHILE, Tokens.WRITE): | |
value = { | |
Tokens.IF: self.if_block, | |
Tokens.WHILE: self.while_block, | |
Tokens.WRITE: self.write_statement, | |
}[self.token.type]() | |
return value | |
elif self._accept(Tokens.IDENTIFIER, ): | |
variable = self.token.value | |
if self._accept(Tokens.ASSIGN, Tokens.SMART_ASSIGN): | |
assign_type, assign_literal = self.token.type, self.token.value | |
value = self.expression() | |
if assign_type is Tokens.SMART_ASSIGN: | |
var_ = Variable("VAR", variable) | |
op_type = assign_literal[:-1] | |
op_type = '?:' if op_type=='?' else op_type | |
value = BinaryOperation(op_type, var_, value) | |
return Assignment("SET", variable, value) | |
else: | |
value = Variable("VAR", self.token.value) | |
if self._accept(Tokens.QUESTION_MARK): | |
test = self.or_test() | |
self._expect(Tokens.COLON) | |
if_false = self.expression() | |
return TernaryOperation("?", test, value, if_false) | |
return value | |
return self.expression() | |
def if_block(self): | |
"<if> ::= 'if' <expression> 'then' <block> [ 'else' <block> ] 'end'" | |
condition = self.expression() | |
self._expect(Tokens.THEN) | |
if_true = self.block() | |
if_false = None | |
if self._accept(Tokens.ELSE): | |
if_false = self.block() | |
self._expect(Tokens.END) | |
return If("IF", condition, if_true, if_false) | |
def while_block(self): | |
"<while> ::= 'while' <expression> 'do' <block> 'end'" | |
condition = self.expression() | |
self._expect(Tokens.DO) | |
operations = self.block() | |
self._expect(Tokens.END) | |
return While("WHILE", condition, operations) | |
def write_statement(self): | |
"<write> ::= 'write' <expression-list> [ 'on' <identifier> ]" | |
value = self.expression_list() #TODO: complete write_statement | |
return Write("WRITE", value) | |
def line(self): | |
"<line> ::= <statement> ';'" | |
value = self.statement() | |
#if value: | |
self._accept(Tokens.SEMICOLON, Tokens.NEWLINE) | |
return value | |
def block(self): | |
"<block> ::= <line> <line>*" | |
lines = [] | |
line = self.line() | |
while line is not None: | |
lines.append(line) | |
line = self.line() | |
return Block("BLOCK", lines) | |
if 0: | |
t = TreeBuilder() | |
env = {} | |
#for token in tokenize("title?='Untitled'"): | |
# print(token) | |
print(t.parse("a:=10k*1k/10**6;")) | |
print(t.parse("a:=10k*1k/10**6;").eval(env)) | |
print(t.parse("b:='3'+('<'?3<2:'>=')+'2';").eval(env)) | |
t.parse("title:=Null;").eval(env) | |
t.parse("title?='Untitled';").eval(env) | |
t.parse("i:=10;while i do write i; i-=1; end;").eval(env) | |
print(env) | |
if __name__ == "__main__": | |
t, env = TreeBuilder(), {} | |
line, debug = '', True | |
while line.lower() != 'quit': | |
line = input(">>> ").strip() | |
if line.lower() not in ('quit', 'debug'): | |
try: | |
parsed = t.parse(line, _who_you_gonna_call = t.block)#t.expr_or_statement)#t.expression) #t.expr_or_statement) | |
if debug: | |
print("AST:", parsed) | |
print("Forth Code:\n", parsed.compile_forth()) | |
evaluated = parsed.eval(env) | |
if evaluated is not None: | |
print(evaluated) | |
exec('_ = %s'%evaluated) | |
env['_'] = evaluated | |
except SyntaxError as e: | |
print("Syntax Error:",e) | |
except RuntimeError as e: | |
print("Runtime Error:", e) | |
elif line.lower() == 'debug': | |
debug = not debug |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment