Created
July 16, 2024 04:56
-
-
Save freddi301/1e3eae77e8bb9e54070ca7a392c7ad64 to your computer and use it in GitHub Desktop.
Idris 2 Parser based on combinators
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser | |
import Data.Nat | |
import Data.List | |
%default total | |
-- interface Codec input output where | |
-- encode : input -> output | |
-- decode : output -> Maybe input | |
-- EncodeDecode : (x : input) -> decode (encode x) = Just x | |
public export | |
data Outcome : Type -> Type -> Type where | |
Failure : error -> (position : Nat) -> Outcome error output | |
Success : output -> (consumed : Nat) -> Outcome error output | |
export | |
Eq error => Eq output => | |
Eq (Outcome error output) where | |
(==) (Failure reason position) (Failure reason' position') = reason == reason' && position == position' | |
(==) (Success result consumed) (Success result' consumed') = result == result' && consumed == consumed' | |
(==) _ _ = False | |
export | |
Show error => Show output => | |
Show (Outcome error output) where | |
show (Failure reason position) = "Failure (" ++ show reason ++ ") " ++ show position | |
show (Success result consumed) = "Success (" ++ show result ++ ") " ++ show consumed | |
namespace Outcome | |
public export | |
(>>=) : Outcome error output_x -> (output_x -> Nat -> Outcome error output_y) -> Outcome error output_y | |
Failure reason position >>= binder = Failure reason position | |
Success result consumed >>= binder = binder result consumed | |
export | |
record Parser input error output where | |
constructor MakeParser | |
run : List input -> Outcome error output | |
public export interface ParserErrorEndOfInputExpected error where endOfInputExpected : error | |
export | |
parse : | |
ParserErrorEndOfInputExpected error => | |
Parser input error output -> List input -> Outcome error output | |
parse parser tokens = parser.run tokens >>= \result, consumed => | |
if consumed == length tokens then Success result consumed else Failure endOfInputExpected consumed | |
--- Basics | |
export | |
fail : error -> Parser input error output | |
fail reason = MakeParser $ \tokens => | |
Failure reason Z | |
export | |
yield : output -> Parser input error output | |
yield result = MakeParser $ \tokens => | |
Success result Z | |
public export interface ParserErrorUnexpectedEndOfInput error where unexpectedEndOfInput : error | |
export | |
take : | |
ParserErrorUnexpectedEndOfInput error => | |
Parser input error input | |
take = MakeParser $ \tokens => case tokens of | |
[] => Failure unexpectedEndOfInput Z | |
token :: tokens => Success token (S Z) | |
export | |
(>>=) : Parser input error output_x -> (output_x -> Parser input error output_y) -> Parser input error output_y | |
(>>=) parser binder = MakeParser $ \tokens => | |
parser.run tokens >>= \result_parser, consumed_parser => | |
case (binder result_parser).run (drop consumed_parser tokens) of | |
Failure reason position => Failure reason (consumed_parser + position) | |
Success result_binder consumed_binder => Success result_binder (consumed_parser + consumed_binder) | |
public export interface ParserErrorNoAlternative error where noAlternative : (first : (error, Nat)) -> (second : (error, Nat)) -> error | |
export | |
(<|>) : | |
ParserErrorNoAlternative error => | |
Parser input error output -> Parser input error output -> Parser input error output | |
(<|>) first second = MakeParser $ \tokens => | |
case first.run tokens of | |
Failure reason_first position_first => case second.run tokens of | |
Failure reason_second position_second => Failure (noAlternative (reason_first, position_first) (reason_second, position_second)) Z | |
outcome => outcome | |
outcome => outcome | |
repeatRec : (max : Maybe Nat) -> Parser input error output -> Nat -> List input -> (List output, Nat) | |
repeatRec (Just Z) parser p tokens = ([], 0) | |
repeatRec max parser (S Z) (token :: tokens) = case parser.run tokens of | |
Failure reason position => ([], 0) | |
Success result consumed_head => | |
let (results, consumed_tail) = repeatRec (pred <$> max) parser consumed_head tokens in | |
(result :: results, consumed_head + consumed_tail) | |
repeatRec max parser (S p) (token :: tokens) = repeatRec max parser p tokens | |
repeatRec max parser p tokens = ([], 0) | |
export | |
repeat : | |
{default Nothing max : Maybe Nat} -> | |
Parser input error output -> Parser input error (List output) | |
repeat {max = Just Z} parser = yield [] | |
repeat {max} parser = MakeParser $ \tokens => case parser.run tokens of | |
Failure reason position => Success [] 0 | |
Success result consumed_head => | |
let (results, consumed_tail) = repeatRec (pred <$> max) parser consumed_head tokens in | |
Success (result :: results) (consumed_head + consumed_tail) | |
--- Utils | |
export | |
(>>) : Parser input error output_x -> Parser input error output_y -> Parser input error output_y | |
first >> second = first >>= const second | |
export infixl 1 << | |
export | |
(<<) : Parser input error output_x -> Parser input error output_y -> Parser input error output_x | |
first << second = first >>= \result => second >>= const (yield result) | |
export | |
(<$>) : (output_x -> output_y) -> Parser input error output_x -> Parser input error output_y | |
mapper <$> parser = parser >>= \result => yield $ mapper result | |
export | |
map : (output_x -> output_y) -> Parser input error output_x -> Parser input error output_y | |
map = (<$>) | |
export | |
sequence : List (Parser input error output) -> Parser input error (List output) | |
sequence [] = yield [] | |
sequence (parser :: parsers) = parser >>= \result => sequence parsers >>= \results => yield (result :: results) | |
public export interface ParserErrorNotMatched error input where notMatched : (token : input) -> (expected : input) -> error | |
export | |
match : | |
ParserErrorUnexpectedEndOfInput error => ParserErrorNotMatched error input => | |
Eq input => input -> Parser input error input | |
match expected = MakeParser $ \tokens => case tokens of | |
[] => Failure unexpectedEndOfInput Z | |
(token :: tokens) => if token == expected then Success token (S Z) else Failure (notMatched token expected) Z | |
export | |
matches : | |
ParserErrorUnexpectedEndOfInput error => ParserErrorNotMatched error Char => | |
String -> Parser Char error String | |
matches expected = map pack $ sequence $ map Parser.match $ unpack expected | |
public export interface ParserErrorNotSatisfied error input where notSatisfied : (token : input) -> (predicate : String) -> error | |
export | |
satisfy : | |
ParserErrorUnexpectedEndOfInput error => ParserErrorNotSatisfied error input => {default "" name: String} -> | |
(input -> Bool) -> Parser input error input | |
satisfy predicate = MakeParser $ \tokens => case tokens of | |
[] => Failure unexpectedEndOfInput Z | |
(token :: tokens) => if predicate token then Success token (S Z) else Failure (notSatisfied token name) Z | |
namespace Satisfy | |
export | |
(||) : (input -> Bool) -> (input -> Bool) -> (input -> Bool) | |
(||) predicate_a predicate_b = \token => predicate_a token || predicate_b token | |
export | |
optional : | |
ParserErrorNoAlternative error => | |
Parser input error output -> Parser input error (Maybe output) | |
optional parser = (Just <$> parser) <|> (yield Nothing) | |
export | |
separatedBy : | |
ParserErrorNoAlternative error => | |
Parser input error _ -> Parser input error output -> Parser input error (List output) | |
separatedBy separator parser = parser >>= \result => repeat (separator >> parser) >>= \results => yield (result :: results) | |
public export interface ParserErrorNotEnoughItems error where notEnoughItems : (actual : Nat) -> (parser : String) -> (min : Nat) -> error | |
export | |
min : | |
ParserErrorNotEnoughItems error => {default "" name : String} -> | |
Nat -> Parser input error (List output) -> Parser input error (List output) | |
min n parser = parser >>= \results => if length results >= n then yield results else fail $ notEnoughItems (length results) name n | |
public export interface ParserErrorRecursionLimit error where recursionLimit : error | |
export | |
recursive : | |
ParserErrorRecursionLimit error => | |
Nat -> (Parser input error output -> Parser input error output) -> Parser input error output | |
recursive Z factory = factory $ fail recursionLimit | |
recursive (S p) factory = factory (recursive p factory) | |
--- Test | |
data TestsError : Type -> Type where | |
EndOfInputExpected : TestsError input | |
UnexpectedEndOfInput : TestsError input | |
NoAlternative : (first : (TestsError input, Nat)) -> (second : (TestsError input, Nat)) -> TestsError input | |
NotMatched : (token : input) -> (expected : input) -> TestsError input | |
NotSatisfied : (token : input) -> (predicate : String) -> TestsError input | |
RecursionLimit : TestsError input | |
NotEnoughItems : (actual : Nat) -> (parser : String) -> (min : Nat) -> TestsError input | |
Custom : TestsError input | |
ParserErrorEndOfInputExpected (TestsError input) where endOfInputExpected = EndOfInputExpected | |
ParserErrorUnexpectedEndOfInput (TestsError input) where unexpectedEndOfInput = UnexpectedEndOfInput | |
ParserErrorNoAlternative (TestsError input) where noAlternative = NoAlternative | |
ParserErrorNotMatched (TestsError input) input where notMatched = NotMatched | |
ParserErrorNotSatisfied (TestsError input) input where notSatisfied = NotSatisfied | |
ParserErrorNotEnoughItems (TestsError input) where notEnoughItems = NotEnoughItems | |
ParserErrorRecursionLimit (TestsError input) where recursionLimit = RecursionLimit | |
Test_1_parser : Parser Char (TestsError Char) (List Char) | |
Test_1_parser = repeat take | |
Test_1_result : Outcome (TestsError Char) (List Char) | |
Test_1_result = parse Test_1_parser (unpack "abc") | |
Test_1 : Test_1_result = Success (unpack "abc") 3 | |
Test_1 = Refl | |
Test_2_parser : Parser Char (TestsError Char) (List String) | |
Test_2_parser = repeat (take >>= \a => take >>= \b => yield $ pack [a, b]) | |
Test_2_result : Outcome (TestsError Char) (List String) | |
Test_2_result = parse Test_2_parser (unpack "abcd") | |
Test_2 : Test_2_result = Success ["ab", "cd"] 4 | |
Test_2 = Refl | |
Test_3_parser : Parser Char (TestsError Char) (List (List Char)) | |
Test_3_parser = repeat $ repeat $ take | |
Test_3_result : Outcome (TestsError Char) (List (List Char)) | |
Test_3_result = parse Test_3_parser (unpack "abc") | |
Test_3 : Test_3_result = Success [unpack "abc", []] 3 | |
Test_3 = Refl | |
Test_4_parser : Parser Char (TestsError Char) String | |
Test_4_parser = pack . (:: []) <$> take | |
Test_4_result : Outcome (TestsError Char) String | |
Test_4_result = parse Test_4_parser (unpack "a") | |
Test_4 : Test_4_result = Success "a" 1 | |
Test_4 = Refl | |
Test_5_parser : Parser Char (TestsError Char) (List String) | |
Test_5_parser = separatedBy ((repeat $ satisfy isSpace) >> match ',' >> (repeat $ satisfy isSpace)) (pack <$> repeat (satisfy isAlpha)) | |
Test_5_result : Outcome (TestsError Char) (List String) | |
Test_5_result = parse Test_5_parser (unpack "one, two, three , four") | |
Test_5 : Test_5_result = Success ["one", "two", "three", "four"] 23 | |
Test_5 = Refl | |
Test_6_parser : Parser Char (TestsError Char) Char | |
Test_6_parser = satisfy {name="isAlpha"} isAlpha | |
Test_6_result_1 : Outcome (TestsError Char) Char | |
Test_6_result_1 = parse Test_6_parser (unpack "a") | |
Test_6_1 : Test_6_result_1 = Success 'a' 1 | |
Test_6_1 = Refl | |
Test_6_result_2 : Outcome (TestsError Char) (Char) | |
Test_6_result_2 = parse Test_6_parser (unpack "4") | |
Test_6_2 : Test_6_result_2 = Failure (notSatisfied '4' "isAlpha") 0 | |
Test_6_2 = Refl | |
data Test7Tree : Type -> Type where | |
Test7Leaf : value -> Test7Tree value | |
Test7Branch : Test7Tree value -> Test7Tree value -> Test7Tree value | |
Test7ParserType = Parser Char (TestsError Char) (Test7Tree String) | |
parameters (Test_7_parser_recursive : Test7ParserType) | |
mutual | |
Test_7_parser_not_recursive : Test7ParserType | |
Test_7_parser_not_recursive = Test_7_parser_branch <|> Test_7_parser_leaf | |
Test_7_parser_leaf : Test7ParserType | |
Test_7_parser_leaf = Test7Leaf <$> pack <$> repeat (satisfy isAlpha) | |
Test_7_parser_branch : Test7ParserType | |
Test_7_parser_branch = do | |
match '(' | |
left <- Test_7_parser_recursive | |
match ' ' | |
right <- Test_7_parser_recursive | |
match ')' | |
yield $ Test7Branch left right | |
Test_7_parser : Test7ParserType | |
Test_7_parser = recursive 2 Test_7_parser_not_recursive | |
Test_7_result : Outcome (TestsError Char) (Test7Tree String) | |
Test_7_result = parse Test_7_parser (unpack "(a (b c))") | |
Test_7 : Test_7_result = Success (Test7Branch (Test7Leaf "a") (Test7Branch (Test7Leaf "b") (Test7Leaf "c"))) 9 | |
Test_7 = Refl | |
Test_8_parser : Parser Char (TestsError Char) String | |
Test_8_parser = take >>= \a => take >>= \b => yield $ pack [a, b] | |
Test_8_result_1 : Outcome (TestsError Char) String | |
Test_8_result_1 = parse Test_8_parser (unpack "ab") | |
Test_8_1 : Test_8_result_1 = Success "ab" 2 | |
Test_8_1 = Refl | |
Test_8_result_2 : Outcome (TestsError Char) String | |
Test_8_result_2 = parse Test_8_parser (unpack "abc") | |
Test_8_2 : Test_8_result_2 = Failure EndOfInputExpected 2 | |
Test_8_2 = Refl | |
Test_8_result_3 : Outcome (TestsError Char) String | |
Test_8_result_3 = parse Test_8_parser (unpack "a") | |
Test_8_3 : Test_8_result_3 = Failure UnexpectedEndOfInput 1 | |
Test_8_3 = Refl | |
Test_9_parser : Parser Char (TestsError Char) String | |
Test_9_parser = take >>= \a => fail $ Custom | |
Test_9_result_1 : Outcome (TestsError Char) String | |
Test_9_result_1 = parse Test_9_parser (unpack "a") | |
Test_9_1 : Test_9_result_1 = Failure Custom 1 | |
Test_9_1 = Refl | |
Test_9_result_2 : Outcome (TestsError Char) String | |
Test_9_result_2 = parse Test_9_parser (unpack "ab") | |
Test_9_2 : Test_9_result_2 = Failure Custom 1 | |
Test_9_2 = Refl | |
Test_10_parser : Parser Char (TestsError Char) (Maybe Char) | |
Test_10_parser = (match 'a') >> optional (match 'b') | |
Test_10_result_1 : Outcome (TestsError Char) (Maybe Char) | |
Test_10_result_1 = parse Test_10_parser (unpack "a") | |
Test_10_1 : Test_10_result_1 = Success Nothing 1 | |
Test_10_1 = Refl | |
Test_10_result_2 : Outcome (TestsError Char) (Maybe Char) | |
Test_10_result_2 = parse Test_10_parser (unpack "ab") | |
Test_10_2 : Test_10_result_2 = Success (Just 'b') 2 | |
Test_10_2 = Refl | |
Test_11_parser : Parser Char (TestsError Char) (List String) | |
Test_11_parser = min 1 $ repeat $ ((repeat $ satisfy isSpace) >> (map pack $ min 1 $ repeat $ satisfy isAlpha)) | |
Test_11_result_1 : Outcome (TestsError Char) (List String) | |
Test_11_result_1 = parse Test_11_parser (unpack "a bb ccc") | |
Test_11_1 : Test_11_result_1 = Success ["a", "bb", "ccc"] 9 | |
Test_11_1 = Refl | |
Test_12_parser : Parser Nat (TestsError Nat) (List Nat) | |
Test_12_parser = repeat $ map sum $ min 1 $ repeat {max = Just 2} take | |
Test_12_result : Outcome (TestsError Nat) (List Nat) | |
Test_12_result = parse Test_12_parser [1, 2, 3, 4, 5, 6] | |
Test_12 : Test_12_result = Success [3, 7, 11] 6 | |
Test_12 = Refl | |
Test_13_parser : Parser Char (TestsError Char) (List String) | |
Test_13_parser = repeat $ do | |
item <- map pack $ repeat $ satisfy isAlpha | |
match ',' | |
yield item | |
Test_13_result : Outcome (TestsError Char) (List String) | |
Test_13_result = parse Test_13_parser (unpack "aa,bb,cc,") | |
test_13 : Test_13_result = Success ["aa", "bb", "cc"] 9 | |
test_13 = Refl | |
data Test_14_Tree : Type where | |
Leaf : String -> Test_14_Tree | |
Branch : List Test_14_Tree -> Test_14_Tree | |
Test_14_parser : Parser Char (TestsError Char) Test_14_Tree | |
Test_14_parser = recursive 2 $ \recParser => | |
(do match '('; items <- separatedBy ((repeat $ satisfy isSpace) >> (match ',') >> (repeat $ satisfy isSpace)) recParser; match ')'; yield $ Branch items) <|> | |
(do chars <- min 1 $ repeat $ satisfy isAlpha; yield $ Leaf $ pack chars) | |
Test_14_result : Outcome (TestsError Char) Test_14_Tree | |
Test_14_result = parse Test_14_parser (unpack "(a, (b, c), d)") | |
Test_14 : Test_14_result = Success (Branch [Leaf "a", Branch [Leaf "b", Leaf "c"], Leaf "d"]) 14 | |
Test_14 = Refl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment