Created
January 17, 2025 03:49
-
-
Save Hanaasagi/2d06c4e83c182c460766d811162836fb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Enum, auto | |
""" | |
START: | |
SIGN -> SIGN | |
DIGIT -> INT | |
"0" -> ZERO | |
"." -> P_DOT | |
ERROR -> ERROR | |
SIGN: | |
DIGIT -> INT | |
"0" -> ZERO | |
"." -> P_DOT | |
ERROR -> ERROR | |
ZERO: | |
"x" | "X" -> HEX | |
"o" | "O" -> OCT | |
"b" | "B" -> BIN | |
"." -> P_DOT | |
DIGIT -> INT # (Warning: Invalid octal sequence, e.g., 09) | |
"e" | "E" -> EXP | |
END -> END | |
INT: | |
DIGIT -> INT | |
"." -> DOT | |
"e" | "E" -> EXP | |
END -> END | |
DOT: | |
DIGIT -> FRAC | |
ERROR -> ERROR | |
FRAC: | |
DIGIT -> FRAC | |
"e" | "E" -> EXP | |
END -> END | |
EXP: | |
"+" | "-" -> EXP_SIGN | |
DIGIT -> EXP_INT | |
ERROR -> ERROR | |
EXP_SIGN: | |
DIGIT -> EXP_INT | |
ERROR -> ERROR | |
EXP_INT: | |
DIGIT -> EXP_INT | |
END -> END | |
HEX: | |
HEX_DIGIT -> HEX | |
END -> END | |
OCT: | |
OCT_DIGIT -> OCT | |
END -> END | |
BIN: | |
"0" | "1" -> BIN | |
END -> END | |
END: | |
ERROR -> ERROR | |
ERROR: | |
ERROR -> ERROR | |
""" | |
class State(Enum): | |
START = auto() | |
SIGN = auto() | |
ZERO = auto() | |
INT = auto() | |
P_DOT = auto() | |
DOT = auto() | |
FRAC = auto() | |
EXP = auto() | |
EXP_SIGN = auto() | |
EXP_INT = auto() | |
HEX = auto() | |
OCT = auto() | |
BIN = auto() | |
END = auto() | |
ERROR = auto() | |
class TokenType(Enum): | |
NUMBER = auto() | |
ERROR = auto() | |
class NumberParser: | |
def __init__(self): | |
self.state = State.START | |
def transition(self, char: str): | |
if self.state == State.START: | |
if char in "+-": | |
self.state = State.SIGN | |
elif char == "0": | |
self.state = State.ZERO | |
elif char.isdigit(): | |
self.state = State.INT | |
elif char == ".": | |
self.state = State.P_DOT | |
else: | |
self.state = State.ERROR | |
elif self.state == State.SIGN: | |
if char == "0": | |
self.state = State.ZERO | |
elif char.isdigit(): | |
self.state = State.INT | |
elif char == ".": | |
self.state = State.P_DOT | |
else: | |
self.state = State.ERROR | |
elif self.state == State.ZERO: | |
if char in "xX": | |
self.state = State.HEX | |
elif char in "oO": | |
self.state = State.OCT | |
elif char in "bB": | |
self.state = State.BIN | |
elif char == ".": | |
self.state = State.P_DOT | |
elif char.isdigit(): | |
self.state = State.INT # Warning: Invalid octal sequence | |
elif char in "eE": | |
self.state = State.EXP | |
else: | |
self.state = State.END | |
elif self.state == State.INT: | |
if char.isdigit(): | |
self.state = State.INT | |
elif char == ".": | |
self.state = State.DOT | |
elif char in "eE": | |
self.state = State.EXP | |
else: | |
self.state = State.END | |
elif self.state == State.P_DOT: | |
if char.isdigit(): | |
self.state = State.FRAC | |
else: | |
self.state = State.ERROR | |
elif self.state == State.DOT: | |
if char.isdigit(): | |
self.state = State.FRAC | |
else: | |
self.state = State.ERROR | |
elif self.state == State.FRAC: | |
if char.isdigit(): | |
self.state = State.FRAC | |
elif char in "eE": | |
self.state = State.EXP | |
else: | |
self.state = State.END | |
elif self.state == State.EXP: | |
if char in "+-": | |
self.state = State.EXP_SIGN | |
elif char.isdigit(): | |
self.state = State.EXP_INT | |
else: | |
self.state = State.ERROR | |
elif self.state == State.EXP_SIGN: | |
if char.isdigit(): | |
self.state = State.EXP_INT | |
else: | |
self.state = State.ERROR | |
elif self.state == State.EXP_INT: | |
if char.isdigit(): | |
self.state = State.EXP_INT | |
else: | |
self.state = State.END | |
elif self.state == State.HEX: | |
if char.isdigit() or char.lower() in "abcdef": | |
self.state = State.HEX | |
else: | |
self.state = State.END | |
elif self.state == State.OCT: | |
if char in "01234567": | |
self.state = State.OCT | |
else: | |
self.state = State.END | |
elif self.state == State.BIN: | |
if char in "01": | |
self.state = State.BIN | |
else: | |
self.state = State.END | |
else: | |
self.state = State.ERROR | |
def parse(self, token: str): | |
self.state = State.START | |
prev_char = "" | |
for char in token: | |
if char == '_' and prev_char != "" and (prev_char.isdigit() or prev_char.lower() in "abcdef_"): | |
prev_char = char | |
continue | |
prev_char = char | |
self.transition(char) | |
if self.state == State.ERROR: | |
return TokenType.ERROR | |
if self.state in {State.INT, State.FRAC, State.EXP_INT, State.HEX, State.OCT, State.BIN, State.ZERO, State.DOT}: | |
return TokenType.NUMBER | |
return TokenType.ERROR | |
def test_parser(): | |
parser = NumberParser() | |
good_cases = [ | |
"0", | |
"0.0", | |
"-0", | |
"+0", | |
"123", | |
"-123", | |
"+123", | |
"0.2", | |
"2.123456", | |
".2", | |
"2.", | |
"-2.5", | |
"+2.5", | |
"1e3", | |
"1e+3", | |
"1e-3", | |
"-1e-3", | |
"+1e3", | |
"0x3F", | |
"0o77", | |
"0b1010", | |
"-0x3A", | |
"+0b101", | |
"0e0", | |
"-0e0", | |
"+0e0", | |
"123.456", | |
"1e1000", | |
"1e-1000", | |
"1e+1000", | |
"1_000", | |
"1_000_000", | |
"1_000.000_1", | |
"1_000_000e10", | |
"0b0", | |
"0x0", | |
"0o0", | |
"-1.", | |
"+1.", | |
"01", | |
] | |
bad_cases = [ | |
"invalid", | |
"1e", | |
"0b102", | |
"0xG", | |
"0o08", | |
"0o9", | |
"0bb1", | |
"0.2.", | |
"0.2.2", | |
"-", | |
"+", | |
".", | |
"1e+", | |
"1e-", | |
"e3", | |
"1.2.3", | |
"1e+1.5", | |
"+-123", | |
"-+123", | |
"0b123", | |
"0o08", | |
"0b102", | |
"0xG", | |
"0._2", | |
"_3", | |
"FF", | |
"1e3e2" | |
] | |
print("-" * 20, "GOOD", "-" * 20) | |
for case in good_cases: | |
result = parser.parse(case) | |
print(f"Token: {case:16} -> Type: {result.name}") | |
print("-" * 20, "BAD", "-" * 20) | |
for case in bad_cases: | |
result = parser.parse(case) | |
print(f"Token: {case:16} -> Type: {result.name}") | |
test_parser() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment