Last active
May 23, 2018 05:59
-
-
Save ble/8312995 to your computer and use it in GitHub Desktop.
Expression parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ble | |
import scala.language.implicitConversions | |
package object parsimple { | |
import scala.util.parsing.combinator.{ JavaTokenParsers, ImplicitConversions } | |
import scala.util.parsing.input.CharSequenceReader | |
implicit def promoteToReader(cs: CharSequence): CharSequenceReader = new CharSequenceReader(cs) | |
//AST contains the definitions of all types used to represent parsed expressions | |
object AST { | |
//This trait does nothing but indicate that a type is an expression, | |
//but we could give it more behavior. | |
sealed trait Expression | |
//Ops contains the definitions of all operators recognized by our parser and | |
//some associated functionality. | |
object Ops { | |
//An Operator just needs some string to represent it | |
sealed class Operator(val opString: String) | |
//There's no difference here between a BinaryOperator and a UnaryOperator, | |
//but you can't substitute one for another. | |
sealed class BinaryOperator(op: String) extends Operator(op) | |
sealed class UnaryOperator(op: String) extends Operator(op) | |
//Define our unary operators: | |
case object LogicalNot extends UnaryOperator("!") | |
case object BitwiseNot extends UnaryOperator("~") | |
//Binary operators, grouped by decreasing precedence. | |
case object Mul extends BinaryOperator("*") | |
case object Div extends BinaryOperator("/") | |
case object Add extends BinaryOperator("+") | |
case object Sub extends BinaryOperator("-") | |
case object Lt extends BinaryOperator("<") | |
case object Lte extends BinaryOperator("<=") | |
case object Gt extends BinaryOperator(">") | |
case object Gte extends BinaryOperator(">=") | |
//C++ puts (in-)equality below order comparisons, so what the hell, we'll | |
//do that too. | |
case object Eq extends BinaryOperator("==") | |
case object NEq extends BinaryOperator("!=") | |
//We want to be able to enumerate these guys, so: | |
val binaryOperators = List(Mul, Div, Add, Sub, Lt, Lte, Gt, Gte, Eq, NEq) | |
val unaryOperators = List(LogicalNot, BitwiseNot) | |
//These are maps that let us look up operators by their operator string: | |
//the helper function used to define them comes later. | |
val stringToBinaryOp = opsToLookupTable(binaryOperators) | |
val stringToUnaryOp = opsToLookupTable(unaryOperators) | |
//BinaryOp and UnaryOp represent parsed expressions: | |
//BinaryOp represents <lhs> <operator> <rhs> | |
case class BinaryOp( | |
op: BinaryOperator, | |
lhs: Expression, | |
rhs: Expression) extends Expression | |
//Unary could represent either <operator> <operand> or | |
// <operand> <operator> | |
case class UnaryOp( | |
op: UnaryOperator, | |
operand: Expression) extends Expression | |
//The helper function used to make the map from string to operator: | |
//Since this is one of the more complicated method signatures in this | |
//example, an explanation of it follows immediately after the method body. | |
def opsToLookupTable[OpType <: Operator](ops: List[OpType]) = ( | |
for { | |
(op, opString) <- ops.zip(ops.map(_.opString)) | |
} yield(opString -> op)).toMap | |
//What the heck does | |
// def opsToLookupTable[OpType <: Operator](ops: List[OpType]) | |
//mean? | |
//def = "I'm defining a method on my containing object, class, or trait" | |
//opsToLookupTable = "call it this silly long name" | |
//[OpType <: Operator] is a doozy, let's do it in parts: | |
// [OpType] would just mean | |
// "Part of my definition will allow you to use any type; whichever | |
// type you use, I'll refer to as OpType" | |
// OpType <: Operator means | |
// the type Operator is an upper bound to the type OpType | |
// (a subclass is "less" than the classes it inherits from | |
// and a superclass is "greater" than the classes that inherit form it), so | |
//[OpType <: Operator] = "Part of my definition will allow you to use | |
// Operator or any subclass thereof; whichever type | |
// you use, I'll refer to as OpType" | |
//Last but not least, | |
//(ops: List[OpType]) = "Remember that OpType? I'll take a list of those | |
// as my one and only argument." | |
//This function definition could have but does not need a return type, | |
//which would add to the signature like this: | |
// def opsToLookupTable[OpType <: Operator](ops: List[OpType]): Map[String, OpType] | |
} | |
//Two pseudo-constructors to simplify looking up an operator and | |
//creating an expression that uses that operator. | |
object UnaryOperation { | |
def apply(opString: String, operand: Expression): Expression = | |
Ops.UnaryOp(Ops.stringToUnaryOp(opString), operand) | |
} | |
object BinaryOperation { | |
def apply(opString: String, lhs: Expression, rhs: Expression): Expression = | |
Ops.BinaryOp(Ops.stringToBinaryOp(opString), lhs, rhs) | |
} | |
//These next three classes represent expressions like | |
// f(x, y, z), | |
// v[i, j, k], and | |
// o.field, | |
//respectively. | |
case class FunctionInvocation( | |
f: Expression, | |
args: List[Expression]) extends Expression | |
case class IndexLookup( | |
array: Expression, | |
indices: List[Expression]) extends Expression | |
case class FieldSelection( | |
struct: Expression, | |
fieldName: Sym) extends Expression | |
//This class represents an expression that is a literal number. | |
case class Num(value: Double) extends Expression | |
//This class represents an expression or part of an expression that is | |
//a symbol. | |
case class Sym(name: Symbol) extends Expression | |
} | |
//I'll define all the actual parsing in this object: | |
object ExprParse { | |
//Aside to people who know what I'm talking about: | |
//Rather than having this object extend JavaTokenParsers, | |
//it will have an instance of JavaTokenParsers and import | |
//all the names from it. | |
//This way ExprParse "uses-a JavaTokenParsers" rather than | |
// ExprParse "is-a JavaTokenParsers" | |
//and the fields of ExprParse don't contain a bunch of | |
//inherited junk. | |
//To people who don't know what I'm talking about, "anon" has a lot of | |
//functionality that I need and the import declaration lets me write code | |
//to access all of that functionality as if they were members of ExprParse, | |
//making the whole thing much easier to write... and a bit mysterious to read. | |
object anon extends JavaTokenParsers | |
import anon._ | |
//The most important type that we're getting from anon is called Parser | |
//and although we'll be using lots and lots of Parsers, you won't see it | |
//mentioned much: where Scala can figure out the type of something, you | |
//don't have to write it down. | |
//This is a double-edged sword: sometimes saving the effort typing is good, | |
//sometimes code is much harder to understand without return types. | |
//Similarly, we don't want to be writing "AST.this" and "AST.Ops.that" all | |
//over, so we import them, too. | |
import AST._ | |
import Ops._ | |
//Because the parser rules used by expression may also use expression, | |
//Scala's type inference can't determine what the type of expression | |
//should be-- unless we make it clear by adding a return type. | |
def expression: Parser[Expression] = equality | |
//Parser[Expression] means | |
// "A parser that, when it succeeds, has constructed an Expression from | |
// the input it parsed." | |
//This is a helper function; it's signature means | |
//(opString: String) = "I take one argument, it's a string" | |
//: (Expression, Expression) => Expression = | |
// I return a function that takes two Expressions as arguments and returns | |
// an argument | |
def processBinaryOp(opString: String): (Expression, Expression) => Expression = | |
BinaryOperation(opString, _, _) | |
//These rules are super simple and very, very similar; the differences | |
//are that each one has different operators and each one refers to the | |
//one that follows immediately afterwards. | |
def equality = inequality.*( "[=!]=".r ^^ processBinaryOp ) | |
//"An equality expression is one or more inequality expressions, | |
// with either == or != in between those inequality expressions." | |
//Hint: the `.r` in `"some string".r` means, "as a regex". | |
def inequality = arith.*( "[<>]=?".r ^^ processBinaryOp ) | |
def arith = term.*( "[+-]".r ^^ processBinaryOp ) | |
def term = factor.*( "[*/]".r ^^ processBinaryOp ) | |
//Here's an interesting one: we have to handle one or more unary operators | |
//and these unary operators are right-associative: | |
def factor = "[!~]".r.* ~ negatable ^^ { | |
case negateOpStrings ~ rest => | |
negateOpStrings.foldRight(rest)( | |
(opString, compounded) => UnaryOperation(opString, compounded)) | |
} | |
//"A negatable expression is an invocable expression followed by one or more | |
// invocations." | |
def negatable = invocable ~ invocation.* ^^ { | |
case lhs ~ suffixes => suffixes.foldLeft(lhs)((x, f) => f(x)) | |
} | |
def invocation = | |
functionInvoke | arrayIndex | fieldSelection | |
//The next 3 rules here are pretty dope. | |
//They are actually parsing rules that return functions! | |
//The returned functions take an expression and return expressions like | |
// thatExpression(some, function, args), | |
// thatExpression[index], or | |
// thatExpression.aField | |
//Even though these parsers have a kinda complicated type, | |
//we don't have to give them their explicit return type: Parser[Expression => Expression] | |
def functionInvoke = "(" ~> repsep(expression, ",") <~ ")" ^^ { | |
case fnArgs => (f: Expression) => FunctionInvocation(f, fnArgs) | |
} | |
def arrayIndex = "[" ~> repsep(expression, ",") <~ "]" ^^ { | |
case indices => (array: Expression) => IndexLookup(array, indices) | |
} | |
def fieldSelection = "." ~> symbol ^^ { | |
case fieldName => (struct: Expression) => FieldSelection(struct, fieldName) | |
} | |
//We have to define what's going to be invoked, be indexed, or have its fields | |
//selected, too. | |
def invocable = number | symbol | parenthesized | |
//These last 3 are dead simple, too | |
def number = floatingPointNumber ^^ (numStr => Num(numStr.toDouble)) | |
def symbol = """[A-Za-z][A-Za-z0-9_]*""".r ^^ (name => Sym(Symbol(name))) | |
def parenthesized = "(" ~> expression <~ ")" | |
} | |
def parse(input: String) = ExprParse.expression(input) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment