gelisam · October 25, 2022 13:52
diff --git a/PolyConduit.hs b/PolyConduit.hs
 -- Follow up to [1], praising @viercc's better solution [2].
 --
 -- [1] https://gist.github.com/gelisam/a8bee217410b74f030c21f782de23d11
 -- [2] https://www.reddit.com/r/haskell/comments/yb9bi4/comment/itfh07z
 --
 -- The challenge is still to implement a function which takes in three
 -- Conduits, and uses the values from the first Conduit in order to decide
 -- which of the other two Conduits to sample from next. Something like this:
 --
 --     example bools ints strings = do
 --       maybeBool <- awaitMaybe bools
 --       case maybeBool of
 --         Nothing -> do
 --           liftIO $ putStrLn "it's over"
 --         Just True -> do
 --           int <- await ints
 --           yield (int + 4)
 --           example
 --         Just False -> do
 --           str <- await strings
 --           liftIO $ putStrLn str
 --           yield $ length str
 --           example
 --
 -- And I still want a solution which works for n Conduits, not just three. But
 -- this time, instead of switching to the notoriously-complex "machines"
 -- package, I'll stick to conduit, thanks to @viercc's great tip of using
 -- Conduit _transformers_.
 {-# LANGUAGE LambdaCase, RankNTypes, ScopedTypeVariables #-}
 module Main where

 import Test.DocTest

 import Control.Monad.IO.Class (MonadIO(liftIO))
 import Control.Monad.Trans.Class (lift)
 import Data.Conduit hiding (Source)
 import Data.Functor.Identity (Identity(runIdentity))
 import Data.Void (absurd)

 -- $setup
 -- >>> import qualified Data.Conduit.Combinators as Conduit
 -- >>> import qualified Data.Conduit.List as Conduit
 -- >>> :{
 -- let testConduit
 --       :: Show o
 --       => ConduitT () o IO ()
 --       -> IO ()
 --     testConduit source = do
 --       os <- connect source Conduit.consume
 --       print os
 -- :}


 -- In the previous post, I used a type-level list to keep track of the
 -- multiple inputs. This time, we'll represent a conduit with n inputs as a
 -- stack of n 'ConduitT' transformers.

 type Source  m o       = ConduitT () o m ()
 type Process m a o     = ConduitT a o m ()
 type Tee     m a b o   = ConduitT a o
                           (ConduitT b Void m) ()
 type Tee3    m a b c o = ConduitT a o
                           (ConduitT b Void
                             (ConduitT c Void m)) ()

 -- Each 'ConduitT' layer has two type arguments; one for the elements you
 -- await from the single input you get when you use conduits the normal way,
 -- and one for the elements you send downstream. With n layers, we can thus
 -- specify n inputs, which is what we want, but we must also specify n
 -- outputs, which is n-1 too many. Using 'Void' for all but one of the outputs
 -- clarifies that only one of them is actually used.

 -- Now that we have unexpectedly managed to represent conduits which take more
 -- than one input, how can we attach those inputs? In the previous post, I
 -- defined a versatile 'polyCapL' function which could attach a 'Source' to a
 -- number of different machines:
 --
 --     -- polyCapL :: Source m a -> Process m a o     -> Source  m o
 --     -- polyCapL :: Source m a -> Tee     m a b o   -> Process m b o
 --     -- polyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
 --     polyCapL
 --       :: Source m a1
 --       -> PolyTee m (a1 ': as) o
 --       -> PolyTee m as o
 --
 -- I would like to construct a similar function here. As before, I want to
 -- attach a single source as the first input 'a1', while leaving the remaining
 -- inputs 'as' untouched. In this conduit stack representation, the first
 -- input is the input of the outermost 'ConduitT' layer, while the remaining
 -- inputs are specified by the rest of the layers, 'mm':
 --
 --     myPolyCapL
 --       :: Source m a1 ()
 --       -> ConduitT a1 o mm r
 --       -> mm r
 --
 -- In order for this type to specialize to these,
 --
 --     myPolyCapL :: Source m a -> Process m a o     -> Source  m o
 --     myPolyCapL :: Source m a -> Tee     m a b o   -> Process m b o
 --     myPolyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
 --
 -- I need to somehow specify that the monad at the base of the 'mm' stack must
 -- be 'm'. When 'm' is IO, this is represented using a 'MonadIO' constraint:
 --
 --     liftIO :: forall x. IO x -> mm x
 --
 -- There exists a @MonadBase m@ constraint which generalizes 'MonadIO':
 --
 --     liftBase :: forall x. m x -> mm x
 --
 -- But instead of adding an orphan @MonadBase m (ConduitT i o mm)@ instance,
 -- I'll just ask for an extra @forall x. m x -> mm x@ parameter:
 --
 --     myPolyCapL
 --       :: (forall x. m x -> mm x)
 --       => Source m a1 ()
 --       -> ConduitT a1 o mm r
 --       -> mm r

 -- Another way in which the type above isn't quite right is that the output
 -- type 'o' disappears. The fix is quite simple: instead of only concretizing
 -- the very outermost ConduitT layer and leaving the rest abstract, I
 -- concretize the _two_ outermost ConduitT layers:
 myPolyCapL
  :: forall a b o m mm r. (Monad m, Monad mm)
  => (forall x. m x -> mm x)
  -> ConduitT () a m ()
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
 myPolyCapL liftM src doubleConduit
  = connect src' doubleConduit''
  where
    -- The implementation looks very different from polyCapL's, but it's the
    -- same idea. It just so happens that the conduit API is expressive enough
    -- that we can achieve our goal via several small transformations, without
    -- having to unravel the conduits into sequences of instructions.

    src' :: ConduitT () a (ConduitT b o mm) ()
    src'
      = transPipe (lift . liftM) src

    doubleConduit' :: ConduitT a o (ConduitT b o mm) r
    doubleConduit'
      = transPipe (mapOutput absurd) doubleConduit

    -- The 'doubleConduit' transformations swap the 'Void' and 'o' output
    -- types.
    -- At this point one might wonder why I chose the convention of
    -- using 'Void' for all but the _outermost_ layer. If I had chosen the
    -- innermost layer instead, the 'o' would already be in the right
    -- position, and I wouldn't need to perform any transformations on
    -- 'doubleConduit'!
    -- The reason is simply to provide a more ergonomic experience to the
    -- user: by choosing the outermost layer, the user can emit by writing
    --
    --     emit o
    --
    -- Whereas if I had chosen the innermost layer, the user would have to
    -- write this instead.
    --
    --     lift $ lift $ emit o
    doubleConduit'' :: ConduitT a Void (ConduitT b o mm) r
    doubleConduit''
      = fuseUpstream doubleConduit' outputToInner

    outputToInner :: ConduitT o Void (ConduitT b o mm) ()
    outputToInner = do
      await >>= \case
        Just o -> do
          lift $ yield o
          outputToInner
        Nothing -> do
          pure ()

 -- The types line up, but how does this work? We're compiling down to a single layer, so how did we persuade conduit to magically create a ConduitT with more than one input?
 --
 -- The secret is, we didn't! Conduits are combined in the same way machines are: by lining up and then eliminating matching `yield` and `await` instructions. Thus, after we have attached an input and eliminated a ConduitT layer, all the `await` calls which were reading from that input have been replaced by a fragment of the code from that input, namely the code between two consecutive `yield`s. That's the magic of representing computations as a sequence of instructions, we can splice and rearrange those instructions!
 --
 -- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.


 -- Anyway, since I suspect that most conduit stacks will either have IO or Identity as
 -- a base monad, here are two specializations which fill-in the 'liftM'
 -- parameter.

 -- polyCapIO :: Source IO a -> Process IO a o     -> Source  IO o
 -- polyCapIO :: Source IO a -> Tee     IO a b o   -> Process IO b o
 -- polyCapIO :: Source IO a -> Tee3    IO a b c o -> Tee     IO b c o
 polyCapIO
  :: forall a b o mm r. MonadIO mm
  => Source IO a
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
 polyCapIO
  = myPolyCapL liftIO

 polyCap
  :: forall a b o mm r. Monad mm
  => ConduitT () a Identity ()
  -> ConduitT a o (ConduitT b Void mm) r
  -> ConduitT b o mm r
 polyCap
  = myPolyCapL (pure . runIdentity)


 -- In the previous post, I also implemented a 'polyCapR' function for
 -- converting a fully-saturated 'PolyTee' into a 'Source', so it can be used
 -- with existing machine combinators.
 --
 --     polyCapR
 --       :: PolyTee m '[] b
 --       -> Source m b
 --
 -- With the conduit layers representation, such a function is not needed.
 -- After attaching all but one of the inputs, the result is a single ConduitT
 -- layer, that is, a normal conduit which can already be used with existing
 -- conduit combinators. In particular, 'fuse' can be used to attach the last
 -- input, thus converting the conduit to a source.


 -- We can finally implement the challenge; twice, in order to exercise both
 -- the IO and Identity specializations.

 -- |
 -- >>> :{
 -- testConduit
 --   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
 --   $ polyCapIO (Conduit.yieldMany [1..])
 --   $ polyCapIO (Conduit.yieldMany [True, False, False, True, False])
 --   $ exampleIO
 -- :}
 -- foo
 -- bar
 -- quux
 -- it's over
 -- [5,3,3,6,4]
 exampleIO
  :: ConduitT Bool Int
       (ConduitT Int Void
         (ConduitT String Void IO))
       ()
 exampleIO = do
  maybeBool <- await
  case maybeBool of
    Nothing -> do
      liftIO $ putStrLn "it's over"
    Just True -> do
      Just int <- lift await
      yield (int + 4)
      exampleIO
    Just False -> do
      Just str <- lift $ lift await
      liftIO $ putStrLn str
      yield $ length str
      exampleIO

 -- |
 -- >>> :{
 -- testConduit
 --   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
 --   $ polyCap (Conduit.yieldMany [1..])
 --   $ polyCap (Conduit.yieldMany [True, False, False, True, False])
 --   $ example
 -- :}
 -- [5,3,3,6,4]
 example
  :: forall m. MonadFail m
  => ConduitT Bool Int
       (ConduitT Int Void
         (ConduitT String Void m))
       ()
 example = do
  maybeBool <- await
  case maybeBool of
    Nothing -> do
      pure ()
    Just True -> do
      Just int <- lift await
      yield (int + 4)
      example
    Just False -> do
      Just str <- lift $ lift await
      yield $ length str
      example

 main :: IO ()
 main = do
  putStrLn "typechecks."

 test :: IO ()
 test = do
  doctest ["src/Main.hs"]
	-- Follow up to [1], praising @viercc's better solution [2].
	--
	-- [1] https://gist.github.com/gelisam/a8bee217410b74f030c21f782de23d11
	-- [2] https://www.reddit.com/r/haskell/comments/yb9bi4/comment/itfh07z
	--
	-- The challenge is still to implement a function which takes in three
	-- Conduits, and uses the values from the first Conduit in order to decide
	-- which of the other two Conduits to sample from next. Something like this:
	--
	-- example bools ints strings = do
	-- maybeBool <- awaitMaybe bools
	-- case maybeBool of
	-- Nothing -> do
	-- liftIO $ putStrLn "it's over"
	-- Just True -> do
	-- int <- await ints
	-- yield (int + 4)
	-- example
	-- Just False -> do
	-- str <- await strings
	-- liftIO $ putStrLn str
	-- yield $ length str
	-- example
	--
	-- And I still want a solution which works for n Conduits, not just three. But
	-- this time, instead of switching to the notoriously-complex "machines"
	-- package, I'll stick to conduit, thanks to @viercc's great tip of using
	-- Conduit _transformers_.
	{-# LANGUAGE LambdaCase, RankNTypes, ScopedTypeVariables #-}
	module Main where

	import Test.DocTest

	import Control.Monad.IO.Class (MonadIO(liftIO))
	import Control.Monad.Trans.Class (lift)
	import Data.Conduit hiding (Source)
	import Data.Functor.Identity (Identity(runIdentity))
	import Data.Void (absurd)

	-- $setup
	-- >>> import qualified Data.Conduit.Combinators as Conduit
	-- >>> import qualified Data.Conduit.List as Conduit
	-- >>> :{
	-- let testConduit
	-- :: Show o
	-- => ConduitT () o IO ()
	-- -> IO ()
	-- testConduit source = do
	-- os <- connect source Conduit.consume
	-- print os
	-- :}


	-- In the previous post, I used a type-level list to keep track of the
	-- multiple inputs. This time, we'll represent a conduit with n inputs as a
	-- stack of n 'ConduitT' transformers.

	type Source m o = ConduitT () o m ()
	type Process m a o = ConduitT a o m ()
	type Tee m a b o = ConduitT a o
	(ConduitT b Void m) ()
	type Tee3 m a b c o = ConduitT a o
	(ConduitT b Void
	(ConduitT c Void m)) ()

	-- Each 'ConduitT' layer has two type arguments; one for the elements you
	-- await from the single input you get when you use conduits the normal way,
	-- and one for the elements you send downstream. With n layers, we can thus
	-- specify n inputs, which is what we want, but we must also specify n
	-- outputs, which is n-1 too many. Using 'Void' for all but one of the outputs
	-- clarifies that only one of them is actually used.

	-- Now that we have unexpectedly managed to represent conduits which take more
	-- than one input, how can we attach those inputs? In the previous post, I
	-- defined a versatile 'polyCapL' function which could attach a 'Source' to a
	-- number of different machines:
	--
	-- -- polyCapL :: Source m a -> Process m a o -> Source m o
	-- -- polyCapL :: Source m a -> Tee m a b o -> Process m b o
	-- -- polyCapL :: Source m a -> Tee3 m a b c o -> Tee m b c o
	-- polyCapL
	-- :: Source m a1
	-- -> PolyTee m (a1 ': as) o
	-- -> PolyTee m as o
	--
	-- I would like to construct a similar function here. As before, I want to
	-- attach a single source as the first input 'a1', while leaving the remaining
	-- inputs 'as' untouched. In this conduit stack representation, the first
	-- input is the input of the outermost 'ConduitT' layer, while the remaining
	-- inputs are specified by the rest of the layers, 'mm':
	--
	-- myPolyCapL
	-- :: Source m a1 ()
	-- -> ConduitT a1 o mm r
	-- -> mm r
	--
	-- In order for this type to specialize to these,
	--
	-- myPolyCapL :: Source m a -> Process m a o -> Source m o
	-- myPolyCapL :: Source m a -> Tee m a b o -> Process m b o
	-- myPolyCapL :: Source m a -> Tee3 m a b c o -> Tee m b c o
	--
	-- I need to somehow specify that the monad at the base of the 'mm' stack must
	-- be 'm'. When 'm' is IO, this is represented using a 'MonadIO' constraint:
	--
	-- liftIO :: forall x. IO x -> mm x
	--
	-- There exists a @MonadBase m@ constraint which generalizes 'MonadIO':
	--
	-- liftBase :: forall x. m x -> mm x
	--
	-- But instead of adding an orphan @MonadBase m (ConduitT i o mm)@ instance,
	-- I'll just ask for an extra @forall x. m x -> mm x@ parameter:
	--
	-- myPolyCapL
	-- :: (forall x. m x -> mm x)
	-- => Source m a1 ()
	-- -> ConduitT a1 o mm r
	-- -> mm r

	-- Another way in which the type above isn't quite right is that the output
	-- type 'o' disappears. The fix is quite simple: instead of only concretizing
	-- the very outermost ConduitT layer and leaving the rest abstract, I
	-- concretize the _two_ outermost ConduitT layers:
	myPolyCapL
	:: forall a b o m mm r. (Monad m, Monad mm)
	=> (forall x. m x -> mm x)
	-> ConduitT () a m ()
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	myPolyCapL liftM src doubleConduit
	= connect src' doubleConduit''
	where
	-- The implementation looks very different from polyCapL's, but it's the
	-- same idea. It just so happens that the conduit API is expressive enough
	-- that we can achieve our goal via several small transformations, without
	-- having to unravel the conduits into sequences of instructions.

	src' :: ConduitT () a (ConduitT b o mm) ()
	src'
	= transPipe (lift . liftM) src

	doubleConduit' :: ConduitT a o (ConduitT b o mm) r
	doubleConduit'
	= transPipe (mapOutput absurd) doubleConduit

	-- The 'doubleConduit' transformations swap the 'Void' and 'o' output
	-- types.
	-- At this point one might wonder why I chose the convention of
	-- using 'Void' for all but the _outermost_ layer. If I had chosen the
	-- innermost layer instead, the 'o' would already be in the right
	-- position, and I wouldn't need to perform any transformations on
	-- 'doubleConduit'!
	-- The reason is simply to provide a more ergonomic experience to the
	-- user: by choosing the outermost layer, the user can emit by writing
	--
	-- emit o
	--
	-- Whereas if I had chosen the innermost layer, the user would have to
	-- write this instead.
	--
	-- lift $ lift $ emit o
	doubleConduit'' :: ConduitT a Void (ConduitT b o mm) r
	doubleConduit''
	= fuseUpstream doubleConduit' outputToInner

	outputToInner :: ConduitT o Void (ConduitT b o mm) ()
	outputToInner = do
	await >>= \case
	Just o -> do
	lift $ yield o
	outputToInner
	Nothing -> do
	pure ()

	-- The types line up, but how does this work? We're compiling down to a single layer, so how did we persuade conduit to magically create a ConduitT with more than one input?
	--
	-- The secret is, we didn't! Conduits are combined in the same way machines are: by lining up and then eliminating matching `yield` and `await` instructions. Thus, after we have attached an input and eliminated a ConduitT layer, all the `await` calls which were reading from that input have been replaced by a fragment of the code from that input, namely the code between two consecutive `yield`s. That's the magic of representing computations as a sequence of instructions, we can splice and rearrange those instructions!
	--
	-- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.


	-- Anyway, since I suspect that most conduit stacks will either have IO or Identity as
	-- a base monad, here are two specializations which fill-in the 'liftM'
	-- parameter.

	-- polyCapIO :: Source IO a -> Process IO a o -> Source IO o
	-- polyCapIO :: Source IO a -> Tee IO a b o -> Process IO b o
	-- polyCapIO :: Source IO a -> Tee3 IO a b c o -> Tee IO b c o
	polyCapIO
	:: forall a b o mm r. MonadIO mm
	=> Source IO a
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	polyCapIO
	= myPolyCapL liftIO

	polyCap
	:: forall a b o mm r. Monad mm
	=> ConduitT () a Identity ()
	-> ConduitT a o (ConduitT b Void mm) r
	-> ConduitT b o mm r
	polyCap
	= myPolyCapL (pure . runIdentity)


	-- In the previous post, I also implemented a 'polyCapR' function for
	-- converting a fully-saturated 'PolyTee' into a 'Source', so it can be used
	-- with existing machine combinators.
	--
	-- polyCapR
	-- :: PolyTee m '[] b
	-- -> Source m b
	--
	-- With the conduit layers representation, such a function is not needed.
	-- After attaching all but one of the inputs, the result is a single ConduitT
	-- layer, that is, a normal conduit which can already be used with existing
	-- conduit combinators. In particular, 'fuse' can be used to attach the last
	-- input, thus converting the conduit to a source.


	-- We can finally implement the challenge; twice, in order to exercise both
	-- the IO and Identity specializations.

	-- \|
	-- >>> :{
	-- testConduit
	-- $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
	-- $ polyCapIO (Conduit.yieldMany [1..])
	-- $ polyCapIO (Conduit.yieldMany [True, False, False, True, False])
	-- $ exampleIO
	-- :}
	-- foo
	-- bar
	-- quux
	-- it's over
	-- [5,3,3,6,4]
	exampleIO
	:: ConduitT Bool Int
	(ConduitT Int Void
	(ConduitT String Void IO))
	()
	exampleIO = do
	maybeBool <- await
	case maybeBool of
	Nothing -> do
	liftIO $ putStrLn "it's over"
	Just True -> do
	Just int <- lift await
	yield (int + 4)
	exampleIO
	Just False -> do
	Just str <- lift $ lift await
	liftIO $ putStrLn str
	yield $ length str
	exampleIO

	-- \|
	-- >>> :{
	-- testConduit
	-- $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
	-- $ polyCap (Conduit.yieldMany [1..])
	-- $ polyCap (Conduit.yieldMany [True, False, False, True, False])
	-- $ example
	-- :}
	-- [5,3,3,6,4]
	example
	:: forall m. MonadFail m
	=> ConduitT Bool Int
	(ConduitT Int Void
	(ConduitT String Void m))
	()
	example = do
	maybeBool <- await
	case maybeBool of
	Nothing -> do
	pure ()
	Just True -> do
	Just int <- lift await
	yield (int + 4)
	example
	Just False -> do
	Just str <- lift $ lift await
	yield $ length str
	example

	main :: IO ()
	main = do
	putStrLn "typechecks."

	test :: IO ()
	test = do
	doctest ["src/Main.hs"]