Skip to content

Instantly share code, notes, and snippets.

@andrevdm
Created April 20, 2025 16:11
Show Gist options
  • Save andrevdm/61528c06540a3c8c1b7f410f426a6827 to your computer and use it in GitHub Desktop.
Save andrevdm/61528c06540a3c8c1b7f410f426a6827 to your computer and use it in GitHub Desktop.
Haskell ollama demo showing very janky RAG with pgVector and ollama embedding
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE QuasiQuotes #-}
module App where
import Verset
import Control.Exception.Safe (throwString, bracket)
import qualified Database.PostgreSQL.Simple as Pg
import qualified Data.List.NonEmpty as NE
import qualified Data.Text as Txt
import qualified Data.Text.IO as Txt
import qualified Ollama as O
import qualified Pgvector as Pgv
import qualified System.Console.Haskeline as HL
import qualified System.Directory as Dir
import qualified System.FilePath as Path
import System.FilePath ((</>))
import Text.Pretty.Simple (pPrint)
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Model selection
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
summariseInputModel :: Text
summariseInputModel = "gemma3"
summariseQueryModel :: Text
--summariseQueryModel = "gemma3:12b"
summariseQueryModel = "gemma3"
-- Nomic is faster, use bge-m3 if you need better quality, but remember to change the vector size in the DB (see setup.sql, and recreate the DB)
embedModel :: Text
embedModel = "nomic-embed-text:latest"
--embedModel = "bge-m3:latest"
replModel :: Text
--replModel = "gemma3:12b"
replModel = "gemma3"
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
data ReplState = ReplState
{ rsVerbose :: !Bool
, rsHistory :: ![O.Message]
, rsReplPromptMsg :: !O.Message
}
app :: IO ()
app = do
startRepl
createConn :: IO Pg.Connection
createConn =
Pg.connect Pg.defaultConnectInfo
{ Pg.connectHost = "localhost"
, Pg.connectPort = 5532
, Pg.connectUser = "demo"
, Pg.connectPassword = ""
, Pg.connectDatabase = "demo"
}
startRepl :: IO ()
startRepl = do
--------------------------------------------------------------------------------------------
-- Index
--------------------------------------------------------------------------------------------
putText "Indexing..."
connIdx <- createConn
-- Get all the haskell files in ./_index_root
-- e.g. `git clone https://github.com/scotty-web/scotty.git ./_index_root/scotty/
let root = "./_index_root/"
fs1 <- getFilesRec root
let
fs2 = filter (\f -> Path.takeExtension f == ".hs") fs1
fs3 = Txt.pack <$> fs2
fs4 = filter (\f -> not $ Txt.isInfixOf ".git" f) fs3
fs = Txt.drop (length root) <$> fs4
-- Summarise each file, embed and store in the database
let num = length fs
for_ (zip fs [1::Int ..]) $ \(f, ix) -> do
putText $ "Indexing(" <> show ix <> "/" <> show num <> "): " <> f
docTxt <- Txt.readFile (root </> Txt.unpack f)
indexIfNotExists summariseFunctions connIdx f docTxt
pass
Pg.close connIdx
--------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------
-- REPL
--------------------------------------------------------------------------------------------
let replSysMessage = Txt.unlines
[ "Here is a list of the top level functions in this code, and a short summary of each."
]
let st0 =
ReplState
{ rsVerbose = False
, rsReplPromptMsg = O.Message O.User replSysMessage Nothing Nothing
, rsHistory = []
}
putText ""
putText ""
putText "Welcome to the REPL. Type :quit to exit."
putText "Type :verbose to see the full response."
putText "Type :quiet to hide the full response."
putText ""
HL.runInputT
(HL.Settings
{ HL.historyFile = Just "llm.repl.txt"
, HL.complete = HL.noCompletion
, HL.autoAddHistory = True
}
)
(loop st0)
--------------------------------------------------------------------------------------------
where
loop :: ReplState -> HL.InputT IO ()
loop st1 = do
minput <- HL.getInputLine "▷ "
case minput of
Nothing -> pure ()
Just ":quit" -> pure ()
Just ":quiet" -> loop $ st1 { rsVerbose = False }
Just ":verbose" -> loop $ st1 { rsVerbose = True }
Just input -> do
st2 <- liftIO . handleInput st1 $ Txt.pack input
loop st2
handleInput :: ReplState -> Text -> IO ReplState
handleInput st1 input = do
handleMessage
st1
O.Message
{ role = O.User
, content = input
, images = Nothing
, tool_calls = Nothing
}
handleMessage :: ReplState -> O.Message -> IO ReplState
handleMessage st1 inputMsg = do
-- Summarise the user's input so we can use it to search the vector database
summarisedInput <- summariseFunctions inputMsg.content
searchRes1 <- do
bracket
createConn
Pg.close
(\conn -> do
-- Search the vector database for the summarised input
-- Get max 4 results
search conn summarisedInput 4
)
-- Only use the vector search results
-- You could use the FTS results as well, but in this demo I'm not using them
-- To use them do: `fst searchRes1 <> snd searchRes1`
let searchRes2 = fst searchRes1
let searchRes = searchRes2 <&> \(src, content, _score) -> "In file: " <> src <> "\n" <> content
-- Create history messages for the search results (RAG)
let ragMessages = searchRes <&> \content -> O.Message O.System content Nothing Nothing
-- Create the input messages
let msgs = [] -- message history
-- Start with the last 10 messages
<> (reverse . take 10 . reverse . excludeSystemMsgs . excludeReplPromptMsg $ st1.rsHistory)
<> [st1.rsReplPromptMsg] -- Keep the repl prompt message
<> ragMessages
<> [inputMsg] -- The current user message
chatRes <- chat msgs
if st1.rsVerbose
then do
putText ""
putText "=================="
pPrint chatRes
else
putText $ maybe "" O.content chatRes.message
pure $ st1 { rsHistory = msgs <> maybe [] (:[]) chatRes.message }
where
-- Model to use. Install with `ollama pull <model_name>`
-- Make sure it has the tool capability (use `ollama show <model_name>` to check)
-- And look at the Berkeley leaderboard to pick a good model for function calls. https://gorilla.cs.berkeley.edu/leaderboard.html
myCopt = mkCopt replModel
chat msgs = do
let chatOpts = myCopt (NE.fromList msgs)
chatRes' <- O.chat chatOpts
case chatRes' of
Left e -> throwString e
Right r' -> pure r'
excludeReplPromptMsg msgs = filter (\m -> m.content /= st1.rsReplPromptMsg.content) msgs
excludeSystemMsgs msgs = filter (\m -> m.role /= O.System) msgs
mkCopt :: Text -> NE.NonEmpty O.Message -> O.ChatOps
mkCopt modelName msg =
-- Chat options, you could use O.defaultChatOptions
O.ChatOps
{ chatModelName = modelName
, messages = msg
, tools = Nothing
, format = Nothing
, keepAlive = Nothing
, hostUrl = Nothing
, responseTimeOut = Nothing
, options = Nothing
, stream = Nothing
}
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Summarise
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
summariseFunctions :: Text -> IO Text
summariseFunctions txt = do
let prompt = Txt.unlines
[ "You are a source code summarization expert."
, "List all the top level functions defined in this code and a short summary of each, no premble, no welcome, no other text, just the name and summary."
, "Following that add a short summary of the file"
, "Do not add any additional information or context"
]
let opts =
O.GenerateOps
{ O.modelName = summariseInputModel
, O.prompt = txt
, O.suffix = Nothing
, O.images = Nothing
, O.format = Nothing
, O.system = Just prompt
, O.template = Nothing
, O.stream = Nothing
, O.raw = Nothing
, O.keepAlive = Nothing
, O.hostUrl = Nothing
, O.responseTimeOut = Nothing
, O.options = Nothing
}
O.generate opts >>= \case
Left e -> throwString e
Right res -> pure res.response_
summariseQuery :: Text -> IO Text
summariseQuery txt = do
let prompt = Txt.unlines
[ "You are a source code summarization expert."
, "The user will provide a question about the code."
, "Convert the user question into a concise search query that can be used to find relevant text passages. Remove question wording and rephrase as a statement."
, "Do not add any additional information or context."
, "Never return anything other than the summary text."
, "Dont guess function names, just summarise the query"
]
let opts =
O.GenerateOps
{ O.modelName = summariseQueryModel
, O.prompt = txt
, O.suffix = Nothing
, O.images = Nothing
, O.format = Nothing
, O.system = Just prompt
, O.template = Nothing
, O.stream = Nothing
, O.raw = Nothing
, O.keepAlive = Nothing
, O.hostUrl = Nothing
, O.responseTimeOut = Nothing
, O.options = Nothing
}
O.generate opts >>= \case
Left e -> throwString e
Right res -> pure res.response_
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Embedding
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
embedd :: Text -> IO O.EmbeddingResp
embedd txt = do
O.embedding embedModel txt >>= \case
Left e -> throwString e
Right r' -> pure r'
reindex :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
reindex preprocessText conn docId docText = do
-- Remove all old entries
_ <- Pg.execute conn "delete from paragraph where document_id = ?" (Pg.Only docId)
putText $ " Embedding: " <> docId
summaryText <- preprocessText docText
-- putText ""
-- putText ""
-- putText docId
-- putText "-----------------------------------------"
-- putText summaryText
-- putText "-----------------------------------------"
-- putText ""
-- putText ""
-- putText ""
embeddedParagraph' <- embedd summaryText
let embeddedParagraph = embeddedParagraph'.embedding_
-- Get embedding as Pgvector so it can be inserted
let vs = Pgv.Vector <$> embeddedParagraph
-- There might be multiple embeddings
for_ (zip vs [1::Int ..]) $ \(vec, eidx) -> do
-- Combined index
let idx :: Text = show eidx
Pg.execute conn
"INSERT INTO paragraph \
\(document_id, paragraph_index, content, embedding, source) \
\VALUES (?, ?, ?, ?, ?)"
(docId, idx, summaryText, vec, docId)
indexIfNotExists :: (Text -> IO Text) -> Pg.Connection -> Text -> Text -> IO ()
indexIfNotExists preprocessText conn docId docText = do
res :: [Pg.Only Text] <- Pg.query conn "SELECT document_id FROM paragraph WHERE document_id = ? limit 1" (Pg.Only docId)
case res of
[] -> reindex preprocessText conn docId docText
_ -> putText $ " Document " <> docId <> " already indexed."
search :: Pg.Connection -> Text -> Int -> IO ([(Text, Text, Double)], [(Text, Text, Double)])
search conn txt limit = do
eres <- O.embedding embedModel txt >>= \case
Left e -> throwString e
Right r' -> pure r'
let emb = take 1 $ eres.embedding_ <&> \v -> Pgv.Vector v
rs <- for emb $ \vec -> do
vecRes :: [(Text, Text, Double)] <- Pg.query conn
"SELECT source, content, \
\ 1 - (embedding <#> ?) AS similarity \
\ FROM paragraph \
\ ORDER BY embedding <#> ? \
\ LIMIT ?"
(vec, vec, limit)
ftsRes :: [(Text, Text, Double)] <- Pg.query conn
"SELECT source, content, \
\ ts_rank_cd(content_tsv, plainto_tsquery('english', ?)) AS rank \
\ FROM paragraph \
\ WHERE content_tsv @@ plainto_tsquery('english', ?) \
\ ORDER BY rank DESC \
\ LIMIT ?"
(txt, txt, limit)
pure (vecRes, ftsRes)
pure (concat $ fst <$> rs, concat $ snd <$> rs)
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- File system utils
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
getFilesRec :: FilePath -> IO [FilePath]
getFilesRec = getFilesRec' (const True)
getFilesRec' :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
getFilesRec' continueFn p =
if continueFn p
then do
fs <- getFiles p
ds <- getDirs p
cs <- traverse (getFilesRec' continueFn) ds
pure $ fs <> join cs
else
pure []
getDirs :: FilePath -> IO [FilePath]
getDirs p = do
entries <- (p </>) <<$>> Dir.listDirectory p
filterM Dir.doesDirectoryExist entries
getFiles :: FilePath -> IO [FilePath]
getFiles p = do
entries <- (p </>) <<$>> Dir.listDirectory p
filterM Dir.doesFileExist entries
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- NB as postgres user
create database demo;
CREATE USER demo;
GRANT ALL PRIVILEGES ON DATABASE demo TO demo;
\connect demo;
ALTER SCHEMA public OWNER TO demo;
SET search_path = public;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- NB change to demo user
CREATE TABLE paragraph (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
document_id TEXT NOT NULL,
paragraph_index TEXT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(768) NOT NULL, -- 1024 for bge-m3:latest, 768 for nomic-embed-text:latest
source TEXT,
inserted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
content_tsv tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
);
-- Indexes
CREATE INDEX ON paragraph
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX ON paragraph
USING GIN (content_tsv);
CREATE INDEX ON paragraph
USING GIN (content gin_trgm_ops); -- Optional, for fuzzy searches
@andrevdm
Copy link
Author

andrevdm commented Apr 20, 2025

Very very rough, but shows

  • pgvector tables
  • postgress full text support, if you wanted to to a hybrid vector + full text search (FTS not actually used here though)
  • summarising documents with the LLM (might not always be a good idea, depends on use case)
  • embedding with ollama
  • summarising the user's query (you might want a tool call to do this conditionally...)
  • embed summarised user query and search
  • Prompts to try guide the LLM to do the correct thing

Again very rough, barely useful for code indexing. But it kinda works, and gives an idea of how everything fits together.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment