Skip to content

Instantly share code, notes, and snippets.

@Idan707
Last active January 14, 2025 12:51
Show Gist options
  • Save Idan707/44de5020e0a9a9dc9849597f64e4539c to your computer and use it in GitHub Desktop.
Save Idan707/44de5020e0a9a9dc9849597f64e4539c to your computer and use it in GitHub Desktop.
This code performs automated scrolling and analysis of Reddit posts in the r/sidehustle subreddit for relevance to AI and prompt engineering, using a browser automation tool, a controller for managing tasks, and structured output for saving results
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.context import BrowserContext
from browser_use.browser.browser import Browser, BrowserConfig
import asyncio
import os
import json
import re
os.environ["OPENAI_API_KEY"] = ""
controller = Controller()
# Define the structure for API documentation
class APIEndpoint(BaseModel):
path: str | None
method: str | None
description: str | None
parameters: list | None
request: dict | None
response: dict | None
class APIDocumentation(BaseModel):
endpoints: list[APIEndpoint]
authentication: dict
class RedditPost(BaseModel):
title: str
content: str | None
upvotes: int | None
comments_count: int | None
relevance_score: float | None # How relevant the post is to prompt engineering
class RedditScan(BaseModel):
relevant_posts: list[RedditPost]
total_posts_scanned: int
@controller.registry.action('Done with task', param_model=RedditScan)
async def done(params: RedditScan):
result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
return result
@controller.registry.action('Scroll and analyze', requires_browser=True)
async def scroll_and_analyze(browser: BrowserContext):
# Scroll multiple times
for _ in range(10): # Adjust number of scrolls as needed
await browser.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
await asyncio.sleep(2) # Wait for content to load
# Try to click "Show more" or "Load more" buttons if they exist
try:
more_button = await browser.page.get_by_role("button", name=re.compile(r"[Mm]ore|[Ll]oad", re.IGNORECASE)).click()
await asyncio.sleep(2)
except Exception:
pass # Button might not exist yet
return ActionResult(extracted_content="Scrolled and analyzed posts")
# Add this before main()
browser = Browser(
config=BrowserConfig(
headless=False, # Set to True if you don't want to see the browser window
disable_security=True,
)
)
async def main():
agent = Agent(
task="""Go to https://www.reddit.com/r/sidehustle/
Scroll through the posts and analyze each one for relevance to prompt engineering or AI
Look for posts discussing:
- Side hustles related to AI/ChatGPT
- Questions about automation or writing
- Opportunities for prompt engineering services
- Content creation with AI
Keep scrolling and clicking 'More' when available
Score each post's relevance to prompt engineering (0-1)
Return findings using the done action with the RedditScan structure""",
llm=ChatOpenAI(model="gpt-4o"),
controller=controller,
browser=browser
)
history = await agent.run()
result = history.final_result()
if result:
try:
parsed = RedditScan.model_validate_json(result)
# Write out to a file
with open("reddit_analysis.json", "w", encoding="utf-8") as f:
json.dump(parsed.model_dump(), f, indent=4, ensure_ascii=False)
print("✅ Results saved successfully to reddit_analysis.json")
except Exception as e:
print(f"❌ Error processing result: {e}")
print(f"Raw result: {result}")
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment