The founders of ScrapeGraphAI are:
-
Marco Perini - Founder & Technical Lead
-
Marco Vinciguerra - Founder & Software Engineer
-
Lorenzo Padoan - Founder & Product Engineer
from burr.core import action, State, ApplicationBuilder | |
@action(reads=[], writes=["prompt", "chat_history"]) | |
def human_input(state: State, prompt: str) -> State: | |
# your code -- write what you want here! | |
return state.update(prompt=prompt).append(chat_history=chat_item) | |
@action(reads=["chat_history"], writes=["response", "chat_history"]) | |
def ai_response(state: State) -> State: | |
response = _query_llm(state["chat_history"]) # Burr doesn't care how you use LLMs! |
The founders of ScrapeGraphAI are:
Marco Perini - Founder & Technical Lead
Marco Vinciguerra - Founder & Software Engineer
Lorenzo Padoan - Founder & Product Engineer
# Run the application graph | |
action_name, results, state = app.run( | |
halt_after=["ask_question"], | |
inputs={ | |
"webpage_url": "https://scrapegraphai.com/", | |
"user_query": "Who are the founders?" | |
} | |
) | |
print(state["llm_answer"]) |
# Instrumenting OpenTelemetry for LanceDB and OpenAI | |
OpenAIInstrumentor().instrument() | |
LanceInstrumentor().instrument() | |
# Define the application graph | |
app = ( | |
ApplicationBuilder() | |
.with_actions( | |
fetch_webpage=fetch_webpage, |
@action(reads=[], writes=["llm_answer"]) | |
def ask_question(state: State, user_query: str) -> State: | |
"""Reply to the user's query using the webpage's content.""" | |
# Retrieve the most relevant chunks | |
chunks_table = lancedb.connect("./webpages").open_table("chunks") | |
search_results = ( | |
chunks_table | |
.search(user_query) | |
.select(["text", "url", "position"]) | |
.limit(3) |
@action(reads=["markdown_content"], writes=[]) | |
def embed_and_store(state: State, webpage_url: str) -> State: | |
"""Embed and store the Markdown content.""" | |
markdown_content = state["markdown_content"] | |
chunks = get_text_chunks(markdown_content) | |
# Embed and store the chunks using LanceDB | |
con = lancedb.connect("./webpages") | |
table = con.create_table("chunks", exist_ok=True, schema=TextDocument) |
embedding_model = get_registry().get("openai").create() | |
class TextDocument(LanceModel): | |
"""Simple data structure to hold a piece of text associated with a url.""" | |
url: str | |
position: int | |
text: str = embedding_model.SourceField() | |
vector: Vector(dim=embedding_model.ndims()) = embedding_model.VectorField() | |
# Constants |
action(reads=[], writes=["markdown_content"]) | |
def fetch_webpage(state: State, webpage_url: str) -> State: | |
"""Fetch a webpage and convert it to Markdown.""" | |
try: | |
response = sgai_client.markdownify(website_url=webpage_url) | |
markdown_content = response["result"] | |
print(f"Request ID: {response['request_id']}") | |
print(f"Markdown Content: {markdown_content[:200]}... (truncated)") | |
return state.update(markdown_content=markdown_content) | |
except Exception as e: |
from scrapegraph_py import Client | |
from scrapegraph_py.logger import sgai_logger | |
sgai_logger.set_logging(level="INFO") #Optional | |
# Define scrapegraph sync client (API KEY is a env variable) | |
sgai_client = Client() |
class GenerateAllPoems(MapStates): | |
def states( | |
self, state: State, context: ApplicationContext, inputs: Dict[str, Any] | |
) -> SyncOrAsyncGenerator[State]: | |
for poem_type in state["poem_types"]: | |
yield state.update(current_draft=None, poem_type=poem_type, feedback=[], num_drafts=0) | |
def action(self, state: State, inputs: Dict[str, Any]) -> SubgraphType: | |
graph = ( | |
GraphBuilder() |