Created
July 21, 2024 12:11
-
-
Save mibmo/f1bb63ee98445cb31f39c03db12c6ddd to your computer and use it in GitHub Desktop.
Trying some JSON parsing with winnow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "json-parser" | |
version = "0.1.0" | |
edition = "2021" | |
[dependencies] | |
winnow = { version = "0.6.14", features = ["simd", "debug"] } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
use winnow::{ | |
ascii::{digit0, digit1, take_escaped}, | |
combinator::{alt, delimited, fail, opt, preceded, repeat, separated, separated_pair, trace}, | |
error::PResult, | |
token::{one_of, take_while}, | |
Parser, | |
}; | |
// types | |
#[derive(Debug, Clone, PartialEq)] | |
pub enum JsonValue<'a> { | |
Object(JsonObject<'a>), | |
Array(JsonArray<'a>), | |
String(JsonString<'a>), | |
Number(JsonNumber), | |
Boolean(JsonBoolean), | |
Null, | |
} | |
impl<'a> TryFrom<&'a str> for JsonValue<'a> { | |
type Error = String; | |
fn try_from(input: &'a str) -> Result<Self, Self::Error> { | |
value.parse(input).map_err(|e| e.to_string()) | |
} | |
} | |
pub type JsonObject<'a> = HashMap<JsonString<'a>, JsonValue<'a>>; | |
pub type JsonArray<'a> = Vec<JsonValue<'a>>; | |
pub type JsonString<'a> = &'a str; | |
pub type JsonNumber = f32; | |
pub type JsonBoolean = bool; | |
// parsers | |
pub fn whitespace(input: &mut &str) -> PResult<()> { | |
repeat(0.., one_of([' ', '\n', '\r', '\t']).void()).parse_next(input) | |
} | |
pub fn element<'a>(input: &mut &'a str) -> PResult<JsonValue<'a>> { | |
delimited(whitespace, value, whitespace).parse_next(input) | |
} | |
pub fn value<'a>(input: &mut &'a str) -> PResult<JsonValue<'a>> { | |
trace( | |
"value", | |
alt(( | |
null.map(|_| JsonValue::Null), | |
boolean.map(JsonValue::Boolean), | |
number.map(JsonValue::Number), | |
string.map(JsonValue::String), | |
array.map(JsonValue::Array), | |
object.map(JsonValue::Object), | |
fail, | |
)), | |
) | |
.parse_next(input) | |
} | |
pub fn object<'a>(input: &mut &'a str) -> PResult<JsonObject<'a>> { | |
let member = separated_pair(delimited(whitespace, string, whitespace), ':', element); | |
trace( | |
"object", | |
delimited('{', preceded(whitespace, separated(0.., member, ',')), '}'), | |
) | |
.parse_next(input) | |
} | |
pub fn array<'a>(input: &mut &'a str) -> PResult<JsonArray<'a>> { | |
trace( | |
"array", | |
delimited('[', preceded(whitespace, separated(0.., element, ',')), ']'), | |
) | |
.parse_next(input) | |
} | |
pub fn string<'a>(input: &mut &'a str) -> PResult<&'a str> { | |
let hex = one_of(|c: char| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F')); | |
let escaped_codepoint = trace( | |
"unicode_point", | |
preceded('u', repeat(4, hex)), | |
) | |
.map(|chars: Vec<char>| { | |
let codepoints = chars.into_iter().collect::<String>(); | |
let codepoint = u32::from_str_radix(&codepoints, 16).expect("thingy"); | |
char::from_u32(codepoint).expect("thongy") | |
}); | |
trace( | |
"string", | |
delimited( | |
'"', | |
take_escaped( | |
take_while(1.., |c: char| { | |
matches!(c as u32, 0x0020..=0x10FFFF) && c != '\"' && c != '\\' | |
}), | |
'\\', | |
alt(( | |
"\"".value('\"'), | |
"\\".value('\\'), | |
"b".value('\u{08}'), | |
"f".value('\u{0C}'), | |
"r".value('\r'), | |
"t".value('\t'), | |
escaped_codepoint, | |
//fail, | |
)), | |
), | |
'"', | |
), | |
) | |
.parse_next(input) | |
} | |
pub fn number(input: &mut &str) -> PResult<JsonNumber> { | |
trace( | |
"number", | |
( | |
// /-?/ | |
trace("sign", opt("-")).map(Option::unwrap_or_default), | |
// /(0|[1-9][0-9]*)/ | |
trace("mantissa", alt((('0', ""), (one_of('1'..='9'), digit0)))).recognize(), | |
// /(\.[0-9]+)?/ | |
trace("fraction", opt((".", digit1))) | |
.map(Option::unwrap_or_default) | |
.recognize(), | |
// /([eE][-+]?[0-9]+)?/ | |
trace( | |
"exponent", | |
opt(preceded( | |
one_of(['e', 'E']), | |
(opt(one_of(['-', '+'])), digit1), | |
)), | |
), | |
), | |
) | |
.parse_next(input) | |
.map(|(sign, mantissa, fraction, exponent)| { | |
let exponent = if let Some((sign_char, digits)) = exponent { | |
let exponent = digits.parse().expect("already parsed"); | |
let power = (10 as JsonNumber).powi(exponent); | |
match sign_char { | |
Some('+') | None => power, | |
Some('-') => 1.0 / power, | |
Some(_) => unreachable!("only + and - are parsed"), | |
} | |
} else { | |
1 as JsonNumber | |
}; | |
format!("{sign}{mantissa}{fraction}") | |
.parse() | |
.map(|x: JsonNumber| x * exponent) | |
.expect("already parsed") | |
}) | |
} | |
pub fn boolean(input: &mut &str) -> PResult<JsonBoolean> { | |
trace("boolean", alt(("true".value(true), "false".value(false)))).parse_next(input) | |
} | |
pub fn null(input: &mut &str) -> PResult<()> { | |
trace("null", "null".void()).parse_next(input) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
// @todo: this | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment