Created
August 19, 2025 21:26
-
-
Save KrishnanSriram/fedb44c8aa012f1194af54dafab418b5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Simple LangGraph solution with 3 tools: | |
| 1. Fetch web content using BeautifulSoup | |
| 2. Summarize content using OLLAMA | |
| 3. Persist summarized content to local file | |
| """ | |
| import uuid | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.tools import tool | |
| from langchain_ollama import ChatOllama | |
| from langgraph.prebuilt import ToolNode, create_react_agent | |
| from langchain_core.messages import HumanMessage | |
| import os | |
| from langchain_openai import AzureChatOpenAI | |
| from langchain_openai import ChatOpenAI | |
| # Initialize the model | |
| llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key="asdklnasdklnsdklgnasdlgnas") | |
| @tool | |
| def fetch_web_content(url: str) -> str: | |
| """ | |
| Fetches text content from a given remote web page. | |
| Args: | |
| url: The URL of the web page to fetch | |
| Returns: | |
| str: The extracted text content from the web page | |
| """ | |
| print("========================") | |
| print("Fetching web content...") | |
| print("========================") | |
| try: | |
| response = requests.get(url, timeout=10, verify=False) | |
| response.raise_for_status() | |
| # Use BeautifulSoup to parse the HTML and extract text | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| text = soup.get_text(separator=' ', strip=True) | |
| print("Content fetched and parsed successfully.") | |
| return text, None | |
| except requests.exceptions.RequestException as e: | |
| return (None, f"Error fetching content: {e}") | |
| except Exception as e: | |
| return (None, f"Error parsing HTML: {e}") | |
| @tool | |
| def summarize_content(content: str) -> str: | |
| """ | |
| Summarizes any text/content. | |
| Args: | |
| content: The content to summarize | |
| Returns: | |
| str: The summarized content | |
| """ | |
| print("========================") | |
| print("Summarizing content...") | |
| print("========================") | |
| # Create a prompt template for summarization | |
| summary_prompt = PromptTemplate.from_template( | |
| "Please provide a concise summary of the following text in not more than 10 lines as plain continuous text only:\n\n{text}" | |
| ) | |
| summarization_chain = {"text": RunnablePassthrough()} | summary_prompt | llm | StrOutputParser() | |
| summary = summarization_chain.invoke(content) | |
| # final_message = write_content_to_file(file_name, summary) | |
| return summary | |
| @tool | |
| def persist_to_file(content: str, filename: str = None) -> str: | |
| """ | |
| Persists summarized content to a local file. | |
| Args: | |
| content: The content to save | |
| filename: Optional filename (if not provided, uses timestamp) | |
| Returns: | |
| str: Confirmation message with file path | |
| """ | |
| print("========================") | |
| print("Persist content...") | |
| print("========================") | |
| file_name = str(uuid.uuid4()) + ".txt" | |
| try: | |
| with open(file_name, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| print(f"Successfully created and wrote content to file: {file_name}") | |
| return f"File '{file_name}' created successfully with summarized content." | |
| except Exception as e: | |
| print(f"Error creating file: {e}") | |
| return f"Error creating file: {e}" | |
| def create_web_scraper_agent(): | |
| # List of tools | |
| tools = [fetch_web_content, summarize_content, persist_to_file] | |
| # Create the agent using prebuilt create_react_agent | |
| agent = create_react_agent(llm, tools) | |
| return agent | |
| def run_web_scraper(url: str, filename: str = None): | |
| # Create the agent | |
| agent = create_web_scraper_agent() | |
| # Create the task message | |
| if filename: | |
| task = f""" | |
| Please help me with the following workflow: | |
| 1. Fetch content from this URL: {url} using fetch_web_content tool | |
| 2. Summarize the content to 20-30 lines using summarize_content tool | |
| 3. Save the summary to a file named: {filename} using persist_to_file tool | |
| Execute above steps in order. | |
| """ | |
| else: | |
| task = f""" | |
| Please help me with the following workflow: | |
| 1. Fetch content from this URL: {url} using fetch_web_content tool | |
| 2. Summarize the content to 20-30 lines using summarize_content tool | |
| 3. Save the summary to a file | |
| Execute above steps in order. | |
| """ | |
| # Run the agent | |
| print("Starting web scraper workflow...") | |
| print(f"Target URL: {url}") | |
| print("-" * 50) | |
| try: | |
| # Execute the workflow | |
| result = agent.invoke({"messages": [HumanMessage(content=task)]}) | |
| print("\nWorkflow completed!") | |
| print("Final result:") | |
| print(result["messages"][-1].content) | |
| except Exception as e: | |
| print(f"Error running workflow: {str(e)}") | |
| # Example usage | |
| if __name__ == "__main__": | |
| # Example 1: Basic usage with automatic filename | |
| example_url = "https://www.usbank.com/credit-cards/cash-plus-visa-signature-credit-card.html" | |
| print("Example 1: Basic usage") | |
| run_web_scraper(example_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment