Last active
April 28, 2025 13:53
-
-
Save decatur/7e627cf7f53f4e92186a084f83813e0a to your computer and use it in GitHub Desktop.
Parses ISO8601 date/times and validates against the proleptic Gregorian calendar; No dependencies.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Debug, PartialEq)] | |
pub enum Subsecond { | |
Milli(u16), | |
Micro(u32), | |
Nano(u32), | |
} | |
/// Parses ISO8601 date/times and validates against the proleptic Gregorian calendar. | |
/// Usage: | |
/// let parts = parse_iso8601("2025-01-24T23:00:00.232999936Z")?; | |
pub fn parse_iso8601(ts: &str) -> Result<(u16, u8, u8, u8, u8, u8, Subsecond), &str> { | |
if !ts.is_ascii() { | |
return Err("Non-ascii char in date string"); | |
}; | |
if ts.len() > 30 { | |
return Err("Date string too long"); | |
}; | |
let ts = ts.as_bytes(); | |
#[derive(Debug)] | |
enum State { | |
Year, | |
Month, | |
Day, | |
Hour, | |
Minute, | |
Second, | |
Subsecond, | |
Z, | |
} | |
let mut state = State::Year; | |
let mut year = 0; | |
let mut month = 0; | |
let mut day = 0; | |
let mut hour = 0; | |
let mut minute = 0; | |
let mut second = 0; | |
let mut subsecond = 0; | |
let mut n_digits = 0; | |
for ch in ts { | |
// println!("{state:?} {ch}"); | |
match (&state, ch) { | |
(State::Year, 45 /* - */) => { | |
n_digits = 0; | |
state = State::Month; | |
} | |
(State::Year, 48..=57 /* 0..=9 */) => { | |
n_digits += 1; | |
if n_digits > 4 { | |
return Err("Year has more than 4 digits"); | |
} | |
year = 10 * year + (ch - 48) as u16 | |
} | |
(State::Year, _) => return Err("Invalid character in year"), | |
(State::Month, 48..=57 /* 0..=9 */) => { | |
n_digits += 1; | |
if n_digits > 2 { | |
return Err("Month has more than 2 digits"); | |
} | |
month = 10 * month + ch - 48; | |
} | |
(State::Month, 45 /* - */) => { | |
n_digits = 0; | |
state = State::Day; | |
} | |
(State::Month, _) => return Err("Invalid character in month"), | |
(State::Day, 84 /* T */) => state = State::Hour, | |
(State::Day, 48..=57 /* 0..=9 */) => day = 10 * day + ch - 48, | |
(State::Day, _) => return Err("Invalid character in day"), | |
(State::Hour, 58 /* : */) => state = State::Minute, | |
(State::Hour, 48..=57 /* 0..=9 */) => hour = 10 * hour + ch - 48, | |
(State::Hour, _) => return Err("Invalid character in hour"), | |
(State::Minute, 58 /* : */) => state = State::Second, | |
(State::Minute, 48..=57 /* 0..=9 */) => minute = 10 * minute + ch - 48, | |
(State::Minute, _) => return Err("Invalid character in minute"), | |
(State::Second, 46 /* . */) => state = State::Subsecond, | |
(State::Second, 48..=57 /* 0..=9 */) => second = 10 * second + ch - 48, | |
(State::Second, _) => return Err("Invalid character in second"), | |
(State::Subsecond, 90 /* Z */) => { | |
if !(n_digits == 3 || n_digits == 6 || n_digits == 9) { | |
return Err("Subsecond must have 3, 6 or 9 digits"); | |
} | |
state = State::Z; | |
} | |
(State::Subsecond, 48..=57) => { | |
n_digits += 1; | |
if n_digits > 9 { | |
return Err("Subsecond has more than 9 digits"); | |
} | |
subsecond = 10 * subsecond + (ch - 48) as u32; | |
} | |
(State::Subsecond, _) => return Err("Invalid character in subsecond"), | |
(State::Z, _) => return Err("Invalid character in timezone"), | |
} | |
} | |
let daycount_february = if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) { | |
29 // https://en.wikipedia.org/wiki/Leap_year | |
} else { | |
28 | |
}; | |
let day_ranges = [ | |
31, | |
daycount_february, | |
31, | |
30, | |
31, | |
30, | |
31, | |
31, | |
30, | |
31, | |
30, | |
31, | |
]; | |
if month == 0 || month > 12 { | |
return Err("Month out of range"); | |
} | |
if day == 0 || day > day_ranges[(month - 1) as usize] { | |
return Err("Day out of range"); | |
} | |
if hour > 24 { | |
return Err("Hour out of range"); | |
} | |
if minute > 60 { | |
return Err("Minute out of range"); | |
} | |
if second > 60 { | |
return Err("Second out of range"); | |
} | |
let subsecond = match n_digits { | |
3 => Subsecond::Milli(subsecond as u16), | |
6 => Subsecond::Micro(subsecond), | |
9 => Subsecond::Nano(subsecond), | |
_ => panic!(), | |
}; | |
return Ok((year, month, day, hour, minute, second, subsecond)); | |
} | |
#[cfg(test)] | |
mod iso8601_tests { | |
use super::*; | |
#[test] | |
fn test_parse() { | |
let r = parse_iso8601("2025-01-24T23:59:56.232999936Z"); | |
assert_eq!(r.unwrap(), (2025, 1, 24, 23, 59, 56, Subsecond::Nano(232999936))); | |
let r = parse_iso8601("2000-02-29T23:59:56.232999Z"); | |
assert_eq!(r.unwrap(), (2000, 2, 29, 23, 59, 56, Subsecond::Micro(232999))); | |
let r = parse_iso8601("2025-02-28T23:00:56.232Z"); | |
assert_eq!(r.unwrap(), (2025, 2, 28, 23, 00, 56, Subsecond::Milli(232))); | |
let r = parse_iso8601("202😀-01-24T23:30:56.232999936Z"); | |
assert_eq!(r.unwrap_err(), "Non-ascii char in date string"); | |
let r = parse_iso8601("202@-01-24T23:00:56.232999936Z"); | |
assert_eq!(r.unwrap_err(), "Invalid character in year"); | |
let r = parse_iso8601("2025-13-24T23:01:56.232999936Z"); | |
assert_eq!(r.unwrap_err(), "Month out of range"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment