Created
May 20, 2025 05:06
-
-
Save rntz/da1f004b2bd2f4268c626c5d6ef8acb4 to your computer and use it in GitHub Desktop.
seekable sorted worst-case optimal iterators in Haskell
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- A lower bound in a totally ordered key-space k; corresponds to some part of an | |
-- ordered sequence we can seek forward to. | |
data Bound k = Init | Atleast !k | Greater !k | Done deriving (Show, Eq) | |
instance Ord k => Ord (Bound k) where | |
Init <= _ = True | |
_ <= Done = True | |
_ <= Init = False | |
Done <= _ = False | |
Atleast x <= Atleast y = x <= y | |
Atleast x <= Greater y = x <= y | |
Greater x <= Atleast y = x < y -- <== NB. the odd one out! | |
Greater x <= Greater y = x <= y | |
-- An iterator has either found a particular key-value pair, or knows a lower | |
-- bound on future keys. | |
data Position k v = Found !k v | Bound !(Bound k) deriving (Show, Eq, Functor) | |
bound :: Position k v -> Bound k | |
bound (Found k _) = Atleast k | |
bound (Bound k) = k | |
data Iter k v = Iter { posn :: !(Position k v), seek :: Bound k -> Iter k v } deriving Functor | |
key :: Iter k v -> Bound k | |
key = bound . posn | |
-- Any key-value function can be an iterator - just not a productive one. | |
fromFunction :: (k -> Maybe v) -> Iter k v | |
fromFunction f = seek Init | |
where seek k = Iter (at k) seek | |
at (Atleast k) = maybe (Bound (Greater k)) (Found k) (f k) | |
at p = Bound p | |
-- Inner joins, ie generalized intersection. liftA2 is productive if either argument is. | |
instance Ord k => Applicative (Iter k) where | |
pure x = fromFunction (\_ -> Just x) | |
liftA2 f s t = Iter posn' seek' | |
where posn' | Found k x <- posn s, Found k' y <- posn t, k == k' = Found k (f x y) | |
| otherwise = Bound (key s `max` key t) | |
seek' k = liftA2 f s' t' | |
where s' = seek s k | |
t' = seek t $ key s' -- the leapfrog trick | |
-- -- Simpler implementation with same asymptotics: | |
-- seek' k = liftA2 f (seek s k) (seek t k) | |
-- Outer joins, ie generalized union. | |
class Functor f => OuterJoin f where | |
outerJoin :: (a -> c) -> (b -> c) -> (a -> b -> c) -> f a -> f b -> f c | |
instance Ord k => OuterJoin (Position k) where | |
outerJoin l r b p q = case bound p `compare` bound q of | |
LT -> l <$> p | |
GT -> r <$> q | |
EQ -> case (p, q) of | |
(Found k x, Found _ y) -> Found k (b x y) | |
-- If they're equal but one isn't done finding a value yet, we | |
-- have to wait until it does. | |
(Bound pos, _) -> Bound pos | |
(_, Bound pos) -> Bound pos | |
instance Ord k => OuterJoin (Iter k) where | |
-- outerJoin is productive if both arguments are. | |
outerJoin l r b s t = Iter (outerJoin l r b (posn s) (posn t)) | |
(\k -> outerJoin l r b (seek s k) (seek t k)) | |
-- NB. We can't actually seek efficiently in a Haskell list. | |
fromSorted :: Ord k => [(k,v)] -> Iter k v | |
fromSorted [] = emptyIter | |
fromSorted l@((k,v):_) = Iter (Found k v) seek | |
where seek target = fromSorted $ dropWhile (not . matches target . fst) l | |
toSorted :: Iter k v -> [(k,v)] | |
toSorted (Iter (Bound Done) _) = [] | |
toSorted (Iter (Found k v) seek) = (k,v) : toSorted (seek (Greater k)) | |
toSorted (Iter (Bound k) seek) = toSorted $ seek k | |
emptyIter = Iter (Bound Done) (const emptyIter) | |
matches :: Ord k => Bound k -> k -> Bool | |
matches Init _ = True | |
matches Done _ = False | |
matches (Atleast x) y = x <= y | |
matches (Greater x) y = x < y | |
list1 = [(1, "one"), (2, "two"), (3, "three"), (5, "five")] | |
list2 = [(1, "a"), (3, "c"), (5, "e")] | |
ms1, ms2 :: Iter Int String | |
ms1 = fromSorted list1 | |
ms2 = fromSorted list2 | |
m = (,) <$> ms1 <*> ms2 | |
xs = fromSorted [(x,x) | x <- [1,3 .. 100]] | |
ys = fromSorted [(y,y) | y <- [2,4 .. 100]] | |
zs = fromSorted [(z,z) | z <- [1,100]] | |
mxyz :: Iter Int ((Int, Int), Int) | |
mxyz = (,) <$> ((,) <$> xs <*> ys) <*> zs | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment