The founders of ScrapeGraphAI are:
-
Marco Perini - Founder & Technical Lead
-
Marco Vinciguerra - Founder & Software Engineer
-
Lorenzo Padoan - Founder & Product Engineer
| from burr.core import action, State, ApplicationBuilder | |
| @action(reads=[], writes=["prompt", "chat_history"]) | |
| def human_input(state: State, prompt: str) -> State: | |
| # your code -- write what you want here! | |
| return state.update(prompt=prompt).append(chat_history=chat_item) | |
| @action(reads=["chat_history"], writes=["response", "chat_history"]) | |
| def ai_response(state: State) -> State: | |
| response = _query_llm(state["chat_history"]) # Burr doesn't care how you use LLMs! |
The founders of ScrapeGraphAI are:
Marco Perini - Founder & Technical Lead
Marco Vinciguerra - Founder & Software Engineer
Lorenzo Padoan - Founder & Product Engineer
| # Run the application graph | |
| action_name, results, state = app.run( | |
| halt_after=["ask_question"], | |
| inputs={ | |
| "webpage_url": "https://scrapegraphai.com/", | |
| "user_query": "Who are the founders?" | |
| } | |
| ) | |
| print(state["llm_answer"]) |
| # Instrumenting OpenTelemetry for LanceDB and OpenAI | |
| OpenAIInstrumentor().instrument() | |
| LanceInstrumentor().instrument() | |
| # Define the application graph | |
| app = ( | |
| ApplicationBuilder() | |
| .with_actions( | |
| fetch_webpage=fetch_webpage, |
| @action(reads=[], writes=["llm_answer"]) | |
| def ask_question(state: State, user_query: str) -> State: | |
| """Reply to the user's query using the webpage's content.""" | |
| # Retrieve the most relevant chunks | |
| chunks_table = lancedb.connect("./webpages").open_table("chunks") | |
| search_results = ( | |
| chunks_table | |
| .search(user_query) | |
| .select(["text", "url", "position"]) | |
| .limit(3) |
| @action(reads=["markdown_content"], writes=[]) | |
| def embed_and_store(state: State, webpage_url: str) -> State: | |
| """Embed and store the Markdown content.""" | |
| markdown_content = state["markdown_content"] | |
| chunks = get_text_chunks(markdown_content) | |
| # Embed and store the chunks using LanceDB | |
| con = lancedb.connect("./webpages") | |
| table = con.create_table("chunks", exist_ok=True, schema=TextDocument) |
| embedding_model = get_registry().get("openai").create() | |
| class TextDocument(LanceModel): | |
| """Simple data structure to hold a piece of text associated with a url.""" | |
| url: str | |
| position: int | |
| text: str = embedding_model.SourceField() | |
| vector: Vector(dim=embedding_model.ndims()) = embedding_model.VectorField() | |
| # Constants |
| action(reads=[], writes=["markdown_content"]) | |
| def fetch_webpage(state: State, webpage_url: str) -> State: | |
| """Fetch a webpage and convert it to Markdown.""" | |
| try: | |
| response = sgai_client.markdownify(website_url=webpage_url) | |
| markdown_content = response["result"] | |
| print(f"Request ID: {response['request_id']}") | |
| print(f"Markdown Content: {markdown_content[:200]}... (truncated)") | |
| return state.update(markdown_content=markdown_content) | |
| except Exception as e: |
| from scrapegraph_py import Client | |
| from scrapegraph_py.logger import sgai_logger | |
| sgai_logger.set_logging(level="INFO") #Optional | |
| # Define scrapegraph sync client (API KEY is a env variable) | |
| sgai_client = Client() |
| class GenerateAllPoems(MapStates): | |
| def states( | |
| self, state: State, context: ApplicationContext, inputs: Dict[str, Any] | |
| ) -> SyncOrAsyncGenerator[State]: | |
| for poem_type in state["poem_types"]: | |
| yield state.update(current_draft=None, poem_type=poem_type, feedback=[], num_drafts=0) | |
| def action(self, state: State, inputs: Dict[str, Any]) -> SubgraphType: | |
| graph = ( | |
| GraphBuilder() |