Created
December 10, 2023 11:38
-
-
Save ulidtko/6a50faa20eaa77d9b0e51c2a033eafde to your computer and use it in GitHub Desktop.
Haskell string literals with custom compile-time validation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DataKinds, PolyKinds, TypeFamilies #-} | |
{-# LANGUAGE MultiParamTypeClasses #-} | |
{-# LANGUAGE FlexibleInstances, UndecidableInstances #-} | |
{-# LANGUAGE NoStarIsType #-} | |
{-# OPTIONS_GHC -Wno-orphans #-} | |
{-# OPTIONS_GHC -fplugin=Overloaded -fplugin-opt=Overloaded:Symbols #-} | |
module Main where | |
import Data.Fixed | |
import Data.Function (on) | |
import Data.Kind (Constraint, Type) | |
import Data.NonEmptyText as NeT (NonEmptyText, fromText) | |
import Data.Proxy (Proxy(..)) | |
import Data.Symbol.Ascii as S | |
import Data.Text qualified as T | |
import Fcf | |
import Fcf.Class.Monoid (type (<>)) | |
import Fcf.Data.List (Cons, Concat) | |
import GHC.TypeLits hiding (type (*)) | |
import Overloaded.Symbols (FromSymbol(..)) | |
-- Exercize 1 -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- | |
type family NonEmptyInvariant (s :: Symbol) :: Constraint where | |
NonEmptyInvariant "" = TypeError ('Text "Empty string as NonEmptyText literal") | |
NonEmptyInvariant _s = () | |
instance (NonEmptyInvariant lit, KnownSymbol lit) => FromSymbol lit NonEmptyText where | |
fromSymbol = nothingImpossible . NeT.fromText . T.pack $ symbolVal (Proxy @lit) where | |
nothingImpossible (Just x) = x | |
nothingImpossible Nothing = error "compile-time invariant violated" | |
demo1 :: [NonEmptyText] | |
demo1 = | |
[ "hello, types" | |
-- , "" -- compile error here, as expected! | |
] | |
-- Exercize 2 -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- | |
type DurationUnitTable = '[ | |
'("us", 1) | |
, '("ms", 1000) | |
, '("s", 1_000_000) | |
, '("m", 60_000_000) | |
, '("h", 3600_000_000) | |
, '("d", 86400_000_000) | |
] | |
type GenericParseErrorText | |
= 'Text "Time duration with no parse; try 1s, 3ms, 50us, 5m, 12h" | |
type AvailableDurationUnitsText | |
= 'Text "Unrecognized time-duration unit, try one of these suffixes:" ':$$: | |
'Text (Eval (CommaEnumerate =<< Map Fst DurationUnitTable) <> ".") | |
data CommaEnumerate :: [Symbol] -> Exp Symbol | |
type instance Eval (CommaEnumerate '[]) = "(no options)" | |
type instance Eval (CommaEnumerate '[x]) = "only " <> x | |
type instance Eval (CommaEnumerate '[x, y]) = x <> " and " <> y | |
type instance Eval (CommaEnumerate (x ': y ': z ': rest)) | |
= x <> ", " <> Eval (CommaEnumerate (y ': z ': rest)) | |
-- | Type-level parser of time-duration-with-units from compile-time string literal | |
type family ParseDurationMicros (s :: Symbol) :: Nat where | |
ParseDurationMicros s | |
= Eval ( | |
(Uncurry (*) <=< ParseDurationDigits *** ParseDurationUnitMultiplier) | |
=<< BreakUntil (Not <=< IsDigit) | |
=<< Pure (S.ToList s) | |
) | |
data ParseDurationUnitMultiplier :: [Symbol] -> Exp Nat | |
type instance Eval (ParseDurationUnitMultiplier suffix) | |
= Eval ( | |
FromMaybe (TypeError AvailableDurationUnitsText) | |
=<< Flip Lookup DurationUnitTable | |
=<< Concat suffix | |
) | |
data ParseDurationDigits :: [Symbol] -> Exp Nat | |
type instance Eval (ParseDurationDigits digits) | |
= If (Eval (TyEq digits '[])) | |
(TypeError GenericParseErrorText) | |
(Eval (ReadNat' =<< Concat digits)) | |
-- | FCF-defunctionalized 'ReadNat' from @symbols@. On parse error, it raises a | |
-- TypeError rather than returning 'Nothing -- so we check empty input beforehand. | |
data ReadNat' :: Symbol -> Exp Nat | |
type instance Eval (ReadNat' num) = ReadNat num | |
-- | The obvious character predicate. One technicality: it's rather hard to turn | |
-- Symbol into ['Char] (worse, it's somewhat of a miracle that decomposing Symbol | |
-- into a list of one-character Symbols is possible). | |
-- For this reason, the implementation isn't made with range comparison, a-la | |
-- @Not (CmpSymbol s "0" == 'LE) && Not (CmpSymbol "9" == 'GT)@ | |
-- as it wouldn't be good enough: IsDigit "5? yep" would evaluate to 'True. | |
data IsDigit :: Symbol -> Exp Bool | |
type instance Eval (IsDigit s) = Eval (Case '[ | |
"0" --> 'True, "1" --> 'True, "2" --> 'True, "3" --> 'True, "4" --> 'True, | |
"5" --> 'True, "6" --> 'True, "7" --> 'True, "8" --> 'True, "9" --> 'True, | |
Else (ConstFn 'False) | |
] s) | |
-- | FCF-defunctionalized typelevel 'Data.List.break': split a list in two, | |
-- at the first point where the supplied predicate starts yielding 'True. | |
data BreakUntil :: (k -> Exp Bool) -> [k] -> Exp ([k], [k]) | |
type instance Eval (BreakUntil _ '[]) | |
= '( '[], '[] ) | |
type instance Eval (BreakUntil p (x ': xs)) | |
= Eval (If (Eval (p x)) | |
(Pure '( '[], x ': xs )) | |
(Cons x *** Pure =<< BreakUntil p xs) | |
) | |
-- | 'Micro' which remembers the original text it was parsed from. | |
-- This is useful for roundtrip property @toJson . fromJson === id@. | |
data MicroWithOrig = MicroWithOrig { mwoVal :: Micro, mwoOrig :: T.Text } | |
instance Eq MicroWithOrig where (==) = (==) `on` mwoVal | |
instance Show MicroWithOrig where show = show . mwoOrig | |
instance (KnownSymbol lit, KnownNat (ParseDurationMicros lit)) | |
=> FromSymbol lit MicroWithOrig where | |
fromSymbol = case Proxy @(ParseDurationMicros lit) of | |
(Proxy :: Proxy micros) -> MicroWithOrig {..} where | |
mwoOrig = T.pack $ symbolVal (Proxy @lit) | |
mwoVal = MkFixed @Type @E6 $ natVal (Proxy @micros) | |
demo2 :: [MicroWithOrig] | |
demo2 = | |
[ "40m" | |
-- , "" | |
-- , "150years" | |
-- , "sdf" | |
-- , "150years" | |
] | |
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- | |
main :: IO () | |
main = mapM_ print demo2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment