Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save KrishnanSriram/fedb44c8aa012f1194af54dafab418b5 to your computer and use it in GitHub Desktop.

Select an option

Save KrishnanSriram/fedb44c8aa012f1194af54dafab418b5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Simple LangGraph solution with 3 tools:
1. Fetch web content using BeautifulSoup
2. Summarize content using OLLAMA
3. Persist summarized content to local file
"""
import uuid
import requests
from bs4 import BeautifulSoup
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langgraph.prebuilt import ToolNode, create_react_agent
from langchain_core.messages import HumanMessage
import os
from langchain_openai import AzureChatOpenAI
from langchain_openai import ChatOpenAI
# Initialize the model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key="asdklnasdklnsdklgnasdlgnas")
@tool
def fetch_web_content(url: str) -> str:
"""
Fetches text content from a given remote web page.
Args:
url: The URL of the web page to fetch
Returns:
str: The extracted text content from the web page
"""
print("========================")
print("Fetching web content...")
print("========================")
try:
response = requests.get(url, timeout=10, verify=False)
response.raise_for_status()
# Use BeautifulSoup to parse the HTML and extract text
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text(separator=' ', strip=True)
print("Content fetched and parsed successfully.")
return text, None
except requests.exceptions.RequestException as e:
return (None, f"Error fetching content: {e}")
except Exception as e:
return (None, f"Error parsing HTML: {e}")
@tool
def summarize_content(content: str) -> str:
"""
Summarizes any text/content.
Args:
content: The content to summarize
Returns:
str: The summarized content
"""
print("========================")
print("Summarizing content...")
print("========================")
# Create a prompt template for summarization
summary_prompt = PromptTemplate.from_template(
"Please provide a concise summary of the following text in not more than 10 lines as plain continuous text only:\n\n{text}"
)
summarization_chain = {"text": RunnablePassthrough()} | summary_prompt | llm | StrOutputParser()
summary = summarization_chain.invoke(content)
# final_message = write_content_to_file(file_name, summary)
return summary
@tool
def persist_to_file(content: str, filename: str = None) -> str:
"""
Persists summarized content to a local file.
Args:
content: The content to save
filename: Optional filename (if not provided, uses timestamp)
Returns:
str: Confirmation message with file path
"""
print("========================")
print("Persist content...")
print("========================")
file_name = str(uuid.uuid4()) + ".txt"
try:
with open(file_name, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Successfully created and wrote content to file: {file_name}")
return f"File '{file_name}' created successfully with summarized content."
except Exception as e:
print(f"Error creating file: {e}")
return f"Error creating file: {e}"
def create_web_scraper_agent():
# List of tools
tools = [fetch_web_content, summarize_content, persist_to_file]
# Create the agent using prebuilt create_react_agent
agent = create_react_agent(llm, tools)
return agent
def run_web_scraper(url: str, filename: str = None):
# Create the agent
agent = create_web_scraper_agent()
# Create the task message
if filename:
task = f"""
Please help me with the following workflow:
1. Fetch content from this URL: {url} using fetch_web_content tool
2. Summarize the content to 20-30 lines using summarize_content tool
3. Save the summary to a file named: {filename} using persist_to_file tool
Execute above steps in order.
"""
else:
task = f"""
Please help me with the following workflow:
1. Fetch content from this URL: {url} using fetch_web_content tool
2. Summarize the content to 20-30 lines using summarize_content tool
3. Save the summary to a file
Execute above steps in order.
"""
# Run the agent
print("Starting web scraper workflow...")
print(f"Target URL: {url}")
print("-" * 50)
try:
# Execute the workflow
result = agent.invoke({"messages": [HumanMessage(content=task)]})
print("\nWorkflow completed!")
print("Final result:")
print(result["messages"][-1].content)
except Exception as e:
print(f"Error running workflow: {str(e)}")
# Example usage
if __name__ == "__main__":
# Example 1: Basic usage with automatic filename
example_url = "https://www.usbank.com/credit-cards/cash-plus-visa-signature-credit-card.html"
print("Example 1: Basic usage")
run_web_scraper(example_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment