andrevdm · April 20, 2025 16:11 · andrevdm · Apr 20, 2025
diff --git a/App.hs b/App.hs
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedRecordDot #-}
 {-# LANGUAGE DuplicateRecordFields #-}
 {-# LANGUAGE QuasiQuotes #-}

 module App where

 import           Verset
 import           Control.Exception.Safe (throwString, bracket)
 import qualified Database.PostgreSQL.Simple as Pg
 import qualified Data.List.NonEmpty as NE
 import qualified Data.Text as Txt
 import qualified Data.Text.IO as Txt
 import qualified Ollama as O
 import qualified Pgvector as Pgv
 import qualified System.Console.Haskeline as HL
 import qualified System.Directory as Dir
 import qualified System.FilePath as Path
 import           System.FilePath ((</>))
 import           Text.Pretty.Simple (pPrint)

 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 -- Model selection
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 summariseInputModel :: Text
 summariseInputModel = "gemma3"


 summariseQueryModel :: Text
 --summariseQueryModel = "gemma3:12b"
 summariseQueryModel = "gemma3"


 -- Nomic is faster, use bge-m3 if you need better quality, but remember to change the vector size in the DB (see setup.sql, and recreate the DB)
 embedModel :: Text
 embedModel = "nomic-embed-text:latest"
 --embedModel = "bge-m3:latest"


 replModel :: Text
 --replModel = "gemma3:12b"
 replModel = "gemma3"
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



 data ReplState = ReplState
  { rsVerbose :: !Bool
  , rsHistory :: ![O.Message]
  , rsReplPromptMsg :: !O.Message
  }

 app :: IO ()
 app = do
  startRepl


 createConn :: IO Pg.Connection
 createConn =
  Pg.connect Pg.defaultConnectInfo
        { Pg.connectHost = "localhost"
        , Pg.connectPort = 5532
        , Pg.connectUser = "demo"
        , Pg.connectPassword = ""
        , Pg.connectDatabase = "demo"
        }


 startRepl :: IO ()
 startRepl = do
  --------------------------------------------------------------------------------------------
  -- Index
  --------------------------------------------------------------------------------------------
  putText "Indexing..."
  connIdx <- createConn

  -- Get all the haskell files in ./_index_root
  -- e.g. `git clone https://github.com/scotty-web/scotty.git ./_index_root/scotty/

  let root = "./_index_root/"
  fs1 <- getFilesRec root
  let
    fs2 = filter (\f -> Path.takeExtension f == ".hs") fs1
    fs3 = Txt.pack <$> fs2
    fs4 = filter (\f -> not $ Txt.isInfixOf ".git" f) fs3
    fs = Txt.drop (length root) <$> fs4


  -- Summarise each file, embed and store in the database
  let num = length fs
  for_ (zip fs [1::Int ..]) $ \(f, ix) -> do
    putText $ "Indexing(" <> show ix <> "/" <> show num <> "): " <> f
    docTxt <- Txt.readFile (root </> Txt.unpack f)
    indexIfNotExists summariseFunctions connIdx f docTxt
    pass

  Pg.close connIdx
  --------------------------------------------------------------------------------------------



  --------------------------------------------------------------------------------------------
  -- REPL
  --------------------------------------------------------------------------------------------
  let replSysMessage = Txt.unlines
       [ "Here is a list of the top level functions in this code, and a short summary of each."
       ]

  let st0 =
       ReplState
         { rsVerbose = False
         , rsReplPromptMsg = O.Message O.User replSysMessage Nothing Nothing
         , rsHistory = []
         }


  putText ""
  putText ""
  putText "Welcome to the REPL. Type :quit to exit."
  putText "Type :verbose to see the full response."
  putText "Type :quiet to hide the full response."
  putText ""

  HL.runInputT
    (HL.Settings
      { HL.historyFile = Just "llm.repl.txt"
      , HL.complete = HL.noCompletion
      , HL.autoAddHistory = True
      }
    )
    (loop st0)
  --------------------------------------------------------------------------------------------

  where
     loop :: ReplState -> HL.InputT IO ()
     loop st1 = do
        minput <- HL.getInputLine "▷ "
        case minput of
          Nothing -> pure ()
          Just ":quit" -> pure ()
          Just ":quiet" -> loop $ st1 { rsVerbose = False }
          Just ":verbose" -> loop $ st1 { rsVerbose = True }
          Just input -> do
            st2 <- liftIO . handleInput st1 $ Txt.pack input
            loop st2



 handleInput :: ReplState -> Text -> IO ReplState
 handleInput st1 input = do
  handleMessage
    st1
    O.Message
      { role = O.User
      , content = input
      , images = Nothing
      , tool_calls = Nothing
      }



 handleMessage :: ReplState -> O.Message -> IO ReplState
 handleMessage st1 inputMsg = do
  -- Summarise the user's input so we can use it to search the vector database
  summarisedInput <- summariseFunctions inputMsg.content
  searchRes1 <- do
    bracket
      createConn
      Pg.close
      (\conn -> do
        -- Search the vector database for the summarised input
        -- Get max 4 results
        search conn summarisedInput 4
      )

  -- Only use the vector search results
  -- You could use the FTS results as well, but in this demo I'm not using them
  -- To use them do: `fst searchRes1 <> snd searchRes1`
  let searchRes2 = fst searchRes1
  let searchRes = searchRes2 <&> \(src, content, _score) -> "In file: " <> src <> "\n" <> content

  -- Create history messages for the search results (RAG)
  let ragMessages = searchRes <&> \content -> O.Message O.System content Nothing Nothing

  -- Create the input messages
  let msgs = [] -- message history
       -- Start with the last 10 messages
       <> (reverse . take 10 . reverse . excludeSystemMsgs . excludeReplPromptMsg $ st1.rsHistory)
       <> [st1.rsReplPromptMsg] -- Keep the repl prompt message
       <> ragMessages
       <> [inputMsg] -- The current user message

  chatRes <- chat msgs

  if st1.rsVerbose
    then do
      putText ""
      putText "=================="
      pPrint chatRes
    else
      putText $ maybe "" O.content chatRes.message

  pure $ st1 { rsHistory = msgs <> maybe [] (:[]) chatRes.message }


  where
    -- Model to use. Install with `ollama pull <model_name>`
    --   Make sure it has the tool capability (use `ollama show <model_name>` to check)
    --   And look at the Berkeley leaderboard to pick a good model for function calls. https://gorilla.cs.berkeley.edu/leaderboard.html
    myCopt = mkCopt replModel

    chat msgs = do
      let chatOpts = myCopt (NE.fromList msgs)
      chatRes' <- O.chat chatOpts

      case chatRes' of
        Left e -> throwString e
        Right r' -> pure r'

    excludeReplPromptMsg msgs = filter (\m -> m.content /= st1.rsReplPromptMsg.content) msgs
    excludeSystemMsgs msgs = filter (\m -> m.role /= O.System) msgs





 mkCopt :: Text -> NE.NonEmpty O.Message -> O.ChatOps
 mkCopt modelName msg =
  -- Chat options, you could use O.defaultChatOptions
  O.ChatOps
    { chatModelName = modelName
    , messages = msg
    , tools = Nothing
    , format = Nothing
    , keepAlive = Nothing
    , hostUrl = Nothing
    , responseTimeOut = Nothing
    , options = Nothing
    , stream = Nothing
    }
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 -- Summarise
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 summariseFunctions :: Text -> IO Text
 summariseFunctions txt = do
  let prompt = Txt.unlines
       [ "You are a source code summarization expert."
       , "List all the top level functions defined in this code and a short summary of each, no premble, no welcome, no other text, just the name and summary."
       , "Following that add a short summary of the file"
       , "Do not add any additional information or context"
       ]

  let opts =
       O.GenerateOps
         { O.modelName = summariseInputModel
         , O.prompt = txt
         , O.suffix = Nothing
         , O.images = Nothing
         , O.format = Nothing
         , O.system = Just prompt
         , O.template = Nothing
         , O.stream = Nothing
         , O.raw = Nothing
         , O.keepAlive = Nothing
         , O.hostUrl = Nothing
         , O.responseTimeOut = Nothing
         , O.options = Nothing
         }

  O.generate opts >>= \case
    Left e -> throwString e
    Right res -> pure res.response_



 summariseQuery :: Text -> IO Text
 summariseQuery txt = do
  let prompt = Txt.unlines
       [ "You are a source code summarization expert."
       , "The user will provide a question about the code."
       , "Convert the user question into a concise search query that can be used to find relevant text passages. Remove question wording and rephrase as a statement."
       , "Do not add any additional information or context."
       , "Never return anything other than the summary text."
       , "Dont guess function names, just summarise the query"
       ]

  let opts =
       O.GenerateOps
         { O.modelName = summariseQueryModel
         , O.prompt = txt
         , O.suffix = Nothing
         , O.images = Nothing
         , O.format = Nothing
         , O.system = Just prompt
         , O.template = Nothing
         , O.stream = Nothing
         , O.raw = Nothing
         , O.keepAlive = Nothing
         , O.hostUrl = Nothing
         , O.responseTimeOut = Nothing
         , O.options = Nothing
         }

  O.generate opts >>= \case
    Left e -> throwString e
    Right res -> pure res.response_
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------





 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 -- Embedding
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 embedd :: Text -> IO O.EmbeddingResp
 embedd txt = do
  O.embedding embedModel txt >>= \case
    Left e -> throwString e
    Right r' -> pure r'


 reindex :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
 reindex preprocessText conn docId docText = do
  -- Remove all old entries
  _ <- Pg.execute conn "delete from paragraph where document_id = ?" (Pg.Only docId)

  putText $ "   Embedding: " <> docId

  summaryText <- preprocessText docText

  -- putText ""
  -- putText ""
  -- putText docId
  -- putText "-----------------------------------------"
  -- putText summaryText
  -- putText "-----------------------------------------"
  -- putText ""
  -- putText ""
  -- putText ""

  embeddedParagraph' <- embedd summaryText
  let embeddedParagraph = embeddedParagraph'.embedding_

  -- Get embedding as Pgvector so it can be inserted
  let vs = Pgv.Vector <$> embeddedParagraph

  -- There might be multiple embeddings
  for_ (zip vs [1::Int ..]) $ \(vec, eidx) -> do
    -- Combined index
    let idx :: Text = show eidx

    Pg.execute conn
      "INSERT INTO paragraph \
      \(document_id, paragraph_index, content, embedding, source) \
      \VALUES (?, ?, ?, ?, ?)"
      (docId, idx, summaryText, vec, docId)


 indexIfNotExists :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
 indexIfNotExists preprocessText conn docId docText = do
  res :: [Pg.Only Text] <- Pg.query conn "SELECT document_id FROM paragraph WHERE document_id = ? limit 1" (Pg.Only docId)

  case res of
    [] -> reindex preprocessText conn docId docText
    _ -> putText $ "   Document " <> docId <> " already indexed."


 search :: Pg.Connection -> Text -> Int -> IO ([(Text, Text, Double)], [(Text, Text, Double)])
 search conn txt limit = do
  eres <- O.embedding embedModel txt >>= \case
    Left e -> throwString e
    Right r' -> pure r'

  let emb = take 1 $ eres.embedding_ <&> \v -> Pgv.Vector v

  rs <- for emb $ \vec -> do
    vecRes :: [(Text, Text, Double)] <- Pg.query conn
        "SELECT source, content, \
        \       1 - (embedding <#> ?) AS similarity \
        \  FROM paragraph \
        \ ORDER BY embedding <#> ? \
        \ LIMIT ?"
        (vec, vec, limit)


    ftsRes :: [(Text, Text, Double)] <- Pg.query conn
      "SELECT source, content, \
      \       ts_rank_cd(content_tsv, plainto_tsquery('english', ?)) AS rank \
      \  FROM paragraph \
      \ WHERE content_tsv @@ plainto_tsquery('english', ?) \
      \ ORDER BY rank DESC \
      \ LIMIT ?"
      (txt, txt, limit)

    pure (vecRes, ftsRes)

  pure (concat $ fst <$> rs, concat $ snd <$> rs)
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 -- File system utils
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 getFilesRec :: FilePath -> IO [FilePath]
 getFilesRec = getFilesRec' (const True)

 getFilesRec' :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
 getFilesRec' continueFn p =
  if continueFn p
     then do
       fs <- getFiles p
       ds <- getDirs p
       cs <- traverse (getFilesRec' continueFn) ds
       pure $ fs <> join cs
     else
       pure []


 getDirs :: FilePath -> IO [FilePath]
 getDirs p = do
  entries <- (p </>) <<$>> Dir.listDirectory p
  filterM Dir.doesDirectoryExist entries


 getFiles :: FilePath -> IO [FilePath]
 getFiles p = do
  entries <- (p </>) <<$>> Dir.listDirectory p
  filterM Dir.doesFileExist entries
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/setup.sql b/setup.sql
 -- NB as postgres user

 create database demo;
 CREATE USER demo;
 GRANT ALL PRIVILEGES ON DATABASE demo TO demo;
 \connect demo;
 ALTER SCHEMA public OWNER TO demo;
 SET search_path = public;


 CREATE EXTENSION IF NOT EXISTS vector;
 CREATE EXTENSION IF NOT EXISTS pg_trgm;
 CREATE EXTENSION IF NOT EXISTS "uuid-ossp";



 -- NB change to demo user


 CREATE TABLE paragraph (
  id               UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
  document_id      TEXT        NOT NULL,
  paragraph_index  TEXT        NOT NULL,
  content          TEXT        NOT NULL,
  embedding        VECTOR(768) NOT NULL,  -- 1024 for bge-m3:latest, 768 for nomic-embed-text:latest
  source           TEXT,
  inserted_at      TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  content_tsv      tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
 );

 -- Indexes
 CREATE INDEX ON paragraph
  USING ivfflat (embedding vector_cosine_ops)
  WITH (lists = 100);

 CREATE INDEX ON paragraph
  USING GIN (content_tsv);

 CREATE INDEX ON paragraph
  USING GIN (content gin_trgm_ops); -- Optional, for fuzzy searches
	{-# LANGUAGE OverloadedStrings #-}
	{-# LANGUAGE NoImplicitPrelude #-}
	{-# LANGUAGE OverloadedRecordDot #-}
	{-# LANGUAGE DuplicateRecordFields #-}
	{-# LANGUAGE QuasiQuotes #-}

	module App where

	import Verset
	import Control.Exception.Safe (throwString, bracket)
	import qualified Database.PostgreSQL.Simple as Pg
	import qualified Data.List.NonEmpty as NE
	import qualified Data.Text as Txt
	import qualified Data.Text.IO as Txt
	import qualified Ollama as O
	import qualified Pgvector as Pgv
	import qualified System.Console.Haskeline as HL
	import qualified System.Directory as Dir
	import qualified System.FilePath as Path
	import System.FilePath ((</>))
	import Text.Pretty.Simple (pPrint)

	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	-- Model selection
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	summariseInputModel :: Text
	summariseInputModel = "gemma3"


	summariseQueryModel :: Text
	--summariseQueryModel = "gemma3:12b"
	summariseQueryModel = "gemma3"


	-- Nomic is faster, use bge-m3 if you need better quality, but remember to change the vector size in the DB (see setup.sql, and recreate the DB)
	embedModel :: Text
	embedModel = "nomic-embed-text:latest"
	--embedModel = "bge-m3:latest"


	replModel :: Text
	--replModel = "gemma3:12b"
	replModel = "gemma3"
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



	data ReplState = ReplState
	{ rsVerbose :: !Bool
	, rsHistory :: ![O.Message]
	, rsReplPromptMsg :: !O.Message
	}

	app :: IO ()
	app = do
	startRepl


	createConn :: IO Pg.Connection
	createConn =
	Pg.connect Pg.defaultConnectInfo
	{ Pg.connectHost = "localhost"
	, Pg.connectPort = 5532
	, Pg.connectUser = "demo"
	, Pg.connectPassword = ""
	, Pg.connectDatabase = "demo"
	}


	startRepl :: IO ()
	startRepl = do
	--------------------------------------------------------------------------------------------
	-- Index
	--------------------------------------------------------------------------------------------
	putText "Indexing..."
	connIdx <- createConn

	-- Get all the haskell files in ./_index_root
	-- e.g. `git clone https://github.com/scotty-web/scotty.git ./_index_root/scotty/

	let root = "./_index_root/"
	fs1 <- getFilesRec root
	let
	fs2 = filter (\f -> Path.takeExtension f == ".hs") fs1
	fs3 = Txt.pack <$> fs2
	fs4 = filter (\f -> not $ Txt.isInfixOf ".git" f) fs3
	fs = Txt.drop (length root) <$> fs4


	-- Summarise each file, embed and store in the database
	let num = length fs
	for_ (zip fs [1::Int ..]) $ \(f, ix) -> do
	putText $ "Indexing(" <> show ix <> "/" <> show num <> "): " <> f
	docTxt <- Txt.readFile (root </> Txt.unpack f)
	indexIfNotExists summariseFunctions connIdx f docTxt
	pass

	Pg.close connIdx
	--------------------------------------------------------------------------------------------



	--------------------------------------------------------------------------------------------
	-- REPL
	--------------------------------------------------------------------------------------------
	let replSysMessage = Txt.unlines
	[ "Here is a list of the top level functions in this code, and a short summary of each."
	]

	let st0 =
	ReplState
	{ rsVerbose = False
	, rsReplPromptMsg = O.Message O.User replSysMessage Nothing Nothing
	, rsHistory = []
	}


	putText ""
	putText ""
	putText "Welcome to the REPL. Type :quit to exit."
	putText "Type :verbose to see the full response."
	putText "Type :quiet to hide the full response."
	putText ""

	HL.runInputT
	(HL.Settings
	{ HL.historyFile = Just "llm.repl.txt"
	, HL.complete = HL.noCompletion
	, HL.autoAddHistory = True
	}
	)
	(loop st0)
	--------------------------------------------------------------------------------------------

	where
	loop :: ReplState -> HL.InputT IO ()
	loop st1 = do
	minput <- HL.getInputLine "▷ "
	case minput of
	Nothing -> pure ()
	Just ":quit" -> pure ()
	Just ":quiet" -> loop $ st1 { rsVerbose = False }
	Just ":verbose" -> loop $ st1 { rsVerbose = True }
	Just input -> do
	st2 <- liftIO . handleInput st1 $ Txt.pack input
	loop st2



	handleInput :: ReplState -> Text -> IO ReplState
	handleInput st1 input = do
	handleMessage
	st1
	O.Message
	{ role = O.User
	, content = input
	, images = Nothing
	, tool_calls = Nothing
	}



	handleMessage :: ReplState -> O.Message -> IO ReplState
	handleMessage st1 inputMsg = do
	-- Summarise the user's input so we can use it to search the vector database
	summarisedInput <- summariseFunctions inputMsg.content
	searchRes1 <- do
	bracket
	createConn
	Pg.close
	(\conn -> do
	-- Search the vector database for the summarised input
	-- Get max 4 results
	search conn summarisedInput 4
	)

	-- Only use the vector search results
	-- You could use the FTS results as well, but in this demo I'm not using them
	-- To use them do: `fst searchRes1 <> snd searchRes1`
	let searchRes2 = fst searchRes1
	let searchRes = searchRes2 <&> \(src, content, _score) -> "In file: " <> src <> "\n" <> content

	-- Create history messages for the search results (RAG)
	let ragMessages = searchRes <&> \content -> O.Message O.System content Nothing Nothing

	-- Create the input messages
	let msgs = [] -- message history
	-- Start with the last 10 messages
	<> (reverse . take 10 . reverse . excludeSystemMsgs . excludeReplPromptMsg $ st1.rsHistory)
	<> [st1.rsReplPromptMsg] -- Keep the repl prompt message
	<> ragMessages
	<> [inputMsg] -- The current user message

	chatRes <- chat msgs

	if st1.rsVerbose
	then do
	putText ""
	putText "=================="
	pPrint chatRes
	else
	putText $ maybe "" O.content chatRes.message

	pure $ st1 { rsHistory = msgs <> maybe [] (:[]) chatRes.message }


	where
	-- Model to use. Install with `ollama pull <model_name>`
	-- Make sure it has the tool capability (use `ollama show <model_name>` to check)
	-- And look at the Berkeley leaderboard to pick a good model for function calls. https://gorilla.cs.berkeley.edu/leaderboard.html
	myCopt = mkCopt replModel

	chat msgs = do
	let chatOpts = myCopt (NE.fromList msgs)
	chatRes' <- O.chat chatOpts

	case chatRes' of
	Left e -> throwString e
	Right r' -> pure r'

	excludeReplPromptMsg msgs = filter (\m -> m.content /= st1.rsReplPromptMsg.content) msgs
	excludeSystemMsgs msgs = filter (\m -> m.role /= O.System) msgs





	mkCopt :: Text -> NE.NonEmpty O.Message -> O.ChatOps
	mkCopt modelName msg =
	-- Chat options, you could use O.defaultChatOptions
	O.ChatOps
	{ chatModelName = modelName
	, messages = msg
	, tools = Nothing
	, format = Nothing
	, keepAlive = Nothing
	, hostUrl = Nothing
	, responseTimeOut = Nothing
	, options = Nothing
	, stream = Nothing
	}
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	-- Summarise
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	summariseFunctions :: Text -> IO Text
	summariseFunctions txt = do
	let prompt = Txt.unlines
	[ "You are a source code summarization expert."
	, "List all the top level functions defined in this code and a short summary of each, no premble, no welcome, no other text, just the name and summary."
	, "Following that add a short summary of the file"
	, "Do not add any additional information or context"
	]

	let opts =
	O.GenerateOps
	{ O.modelName = summariseInputModel
	, O.prompt = txt
	, O.suffix = Nothing
	, O.images = Nothing
	, O.format = Nothing
	, O.system = Just prompt
	, O.template = Nothing
	, O.stream = Nothing
	, O.raw = Nothing
	, O.keepAlive = Nothing
	, O.hostUrl = Nothing
	, O.responseTimeOut = Nothing
	, O.options = Nothing
	}

	O.generate opts >>= \case
	Left e -> throwString e
	Right res -> pure res.response_



	summariseQuery :: Text -> IO Text
	summariseQuery txt = do
	let prompt = Txt.unlines
	[ "You are a source code summarization expert."
	, "The user will provide a question about the code."
	, "Convert the user question into a concise search query that can be used to find relevant text passages. Remove question wording and rephrase as a statement."
	, "Do not add any additional information or context."
	, "Never return anything other than the summary text."
	, "Dont guess function names, just summarise the query"
	]

	let opts =
	O.GenerateOps
	{ O.modelName = summariseQueryModel
	, O.prompt = txt
	, O.suffix = Nothing
	, O.images = Nothing
	, O.format = Nothing
	, O.system = Just prompt
	, O.template = Nothing
	, O.stream = Nothing
	, O.raw = Nothing
	, O.keepAlive = Nothing
	, O.hostUrl = Nothing
	, O.responseTimeOut = Nothing
	, O.options = Nothing
	}

	O.generate opts >>= \case
	Left e -> throwString e
	Right res -> pure res.response_
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------





	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	-- Embedding
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	embedd :: Text -> IO O.EmbeddingResp
	embedd txt = do
	O.embedding embedModel txt >>= \case
	Left e -> throwString e
	Right r' -> pure r'


	reindex :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
	reindex preprocessText conn docId docText = do
	-- Remove all old entries
	_ <- Pg.execute conn "delete from paragraph where document_id = ?" (Pg.Only docId)

	putText $ " Embedding: " <> docId

	summaryText <- preprocessText docText

	-- putText ""
	-- putText ""
	-- putText docId
	-- putText "-----------------------------------------"
	-- putText summaryText
	-- putText "-----------------------------------------"
	-- putText ""
	-- putText ""
	-- putText ""

	embeddedParagraph' <- embedd summaryText
	let embeddedParagraph = embeddedParagraph'.embedding_

	-- Get embedding as Pgvector so it can be inserted
	let vs = Pgv.Vector <$> embeddedParagraph

	-- There might be multiple embeddings
	for_ (zip vs [1::Int ..]) $ \(vec, eidx) -> do
	-- Combined index
	let idx :: Text = show eidx

	Pg.execute conn
	"INSERT INTO paragraph \
	\(document_id, paragraph_index, content, embedding, source) \
	\VALUES (?, ?, ?, ?, ?)"
	(docId, idx, summaryText, vec, docId)


	indexIfNotExists :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
	indexIfNotExists preprocessText conn docId docText = do
	res :: [Pg.Only Text] <- Pg.query conn "SELECT document_id FROM paragraph WHERE document_id = ? limit 1" (Pg.Only docId)

	case res of
	[] -> reindex preprocessText conn docId docText
	_ -> putText $ " Document " <> docId <> " already indexed."


	search :: Pg.Connection -> Text -> Int -> IO ([(Text, Text, Double)], [(Text, Text, Double)])
	search conn txt limit = do
	eres <- O.embedding embedModel txt >>= \case
	Left e -> throwString e
	Right r' -> pure r'

	let emb = take 1 $ eres.embedding_ <&> \v -> Pgv.Vector v

	rs <- for emb $ \vec -> do
	vecRes :: [(Text, Text, Double)] <- Pg.query conn
	"SELECT source, content, \
	\ 1 - (embedding <#> ?) AS similarity \
	\ FROM paragraph \
	\ ORDER BY embedding <#> ? \
	\ LIMIT ?"
	(vec, vec, limit)


	ftsRes :: [(Text, Text, Double)] <- Pg.query conn
	"SELECT source, content, \
	\ ts_rank_cd(content_tsv, plainto_tsquery('english', ?)) AS rank \
	\ FROM paragraph \
	\ WHERE content_tsv @@ plainto_tsquery('english', ?) \
	\ ORDER BY rank DESC \
	\ LIMIT ?"
	(txt, txt, limit)

	pure (vecRes, ftsRes)

	pure (concat $ fst <$> rs, concat $ snd <$> rs)
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	-- File system utils
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	getFilesRec :: FilePath -> IO [FilePath]
	getFilesRec = getFilesRec' (const True)

	getFilesRec' :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
	getFilesRec' continueFn p =
	if continueFn p
	then do
	fs <- getFiles p
	ds <- getDirs p
	cs <- traverse (getFilesRec' continueFn) ds
	pure $ fs <> join cs
	else
	pure []


	getDirs :: FilePath -> IO [FilePath]
	getDirs p = do
	entries <- (p </>) <<$>> Dir.listDirectory p
	filterM Dir.doesDirectoryExist entries


	getFiles :: FilePath -> IO [FilePath]
	getFiles p = do
	entries <- (p </>) <<$>> Dir.listDirectory p
	filterM Dir.doesFileExist entries
	--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	-- NB as postgres user

	create database demo;
	CREATE USER demo;
	GRANT ALL PRIVILEGES ON DATABASE demo TO demo;
	\connect demo;
	ALTER SCHEMA public OWNER TO demo;
	SET search_path = public;


	CREATE EXTENSION IF NOT EXISTS vector;
	CREATE EXTENSION IF NOT EXISTS pg_trgm;
	CREATE EXTENSION IF NOT EXISTS "uuid-ossp";



	-- NB change to demo user


	CREATE TABLE paragraph (
	id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
	document_id TEXT NOT NULL,
	paragraph_index TEXT NOT NULL,
	content TEXT NOT NULL,
	embedding VECTOR(768) NOT NULL, -- 1024 for bge-m3:latest, 768 for nomic-embed-text:latest
	source TEXT,
	inserted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
	content_tsv tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
	);

	-- Indexes
	CREATE INDEX ON paragraph
	USING ivfflat (embedding vector_cosine_ops)
	WITH (lists = 100);

	CREATE INDEX ON paragraph
	USING GIN (content_tsv);

	CREATE INDEX ON paragraph
	USING GIN (content gin_trgm_ops); -- Optional, for fuzzy searches