Skip to content

Instantly share code, notes, and snippets.

@elijahbenizzy
Created December 31, 2024 23:24
Show Gist options
  • Save elijahbenizzy/e087c4717b1a285a4970280325d0d50c to your computer and use it in GitHub Desktop.
Save elijahbenizzy/e087c4717b1a285a4970280325d0d50c to your computer and use it in GitHub Desktop.
@action(reads=["markdown_content"], writes=[])
def embed_and_store(state: State, webpage_url: str) -> State:
"""Embed and store the Markdown content."""
markdown_content = state["markdown_content"]
chunks = get_text_chunks(markdown_content)
# Embed and store the chunks using LanceDB
con = lancedb.connect("./webpages")
table = con.create_table("chunks", exist_ok=True, schema=TextDocument)
table.add([
{"text": c, "url": webpage_url, "position": i}
for i, c in enumerate(chunks)
])
return state
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment