-
-
Save MaTriXy/258abbbd4e0bb5026ec496939c42507c to your computer and use it in GitHub Desktop.
This code performs automated scrolling and analysis of Reddit posts in the r/sidehustle subreddit for relevance to AI and prompt engineering, using a browser automation tool, a controller for managing tasks, and structured output for saving results
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dotenv import load_dotenv | |
from langchain_openai import ChatOpenAI | |
from pydantic import BaseModel | |
from browser_use import ActionResult, Agent, Controller | |
from browser_use.browser.context import BrowserContext | |
from browser_use.browser.browser import Browser, BrowserConfig | |
import asyncio | |
import os | |
import json | |
import re | |
os.environ["OPENAI_API_KEY"] = "" | |
controller = Controller() | |
# Define the structure for API documentation | |
class APIEndpoint(BaseModel): | |
path: str | None | |
method: str | None | |
description: str | None | |
parameters: list | None | |
request: dict | None | |
response: dict | None | |
class APIDocumentation(BaseModel): | |
endpoints: list[APIEndpoint] | |
authentication: dict | |
class RedditPost(BaseModel): | |
title: str | |
content: str | None | |
upvotes: int | None | |
comments_count: int | None | |
relevance_score: float | None # How relevant the post is to prompt engineering | |
class RedditScan(BaseModel): | |
relevant_posts: list[RedditPost] | |
total_posts_scanned: int | |
@controller.registry.action('Done with task', param_model=RedditScan) | |
async def done(params: RedditScan): | |
result = ActionResult(is_done=True, extracted_content=params.model_dump_json()) | |
return result | |
@controller.registry.action('Scroll and analyze', requires_browser=True) | |
async def scroll_and_analyze(browser: BrowserContext): | |
# Scroll multiple times | |
for _ in range(10): # Adjust number of scrolls as needed | |
await browser.page.evaluate("window.scrollTo(0, document.body.scrollHeight)") | |
await asyncio.sleep(2) # Wait for content to load | |
# Try to click "Show more" or "Load more" buttons if they exist | |
try: | |
more_button = await browser.page.get_by_role("button", name=re.compile(r"[Mm]ore|[Ll]oad", re.IGNORECASE)).click() | |
await asyncio.sleep(2) | |
except Exception: | |
pass # Button might not exist yet | |
return ActionResult(extracted_content="Scrolled and analyzed posts") | |
# Add this before main() | |
browser = Browser( | |
config=BrowserConfig( | |
headless=False, # Set to True if you don't want to see the browser window | |
disable_security=True, | |
) | |
) | |
async def main(): | |
agent = Agent( | |
task="""Go to https://www.reddit.com/r/sidehustle/ | |
Scroll through the posts and analyze each one for relevance to prompt engineering or AI | |
Look for posts discussing: | |
- Side hustles related to AI/ChatGPT | |
- Questions about automation or writing | |
- Opportunities for prompt engineering services | |
- Content creation with AI | |
Keep scrolling and clicking 'More' when available | |
Score each post's relevance to prompt engineering (0-1) | |
Return findings using the done action with the RedditScan structure""", | |
llm=ChatOpenAI(model="gpt-4o"), | |
controller=controller, | |
browser=browser | |
) | |
history = await agent.run() | |
result = history.final_result() | |
if result: | |
try: | |
parsed = RedditScan.model_validate_json(result) | |
# Write out to a file | |
with open("reddit_analysis.json", "w", encoding="utf-8") as f: | |
json.dump(parsed.model_dump(), f, indent=4, ensure_ascii=False) | |
print("✅ Results saved successfully to reddit_analysis.json") | |
except Exception as e: | |
print(f"❌ Error processing result: {e}") | |
print(f"Raw result: {result}") | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment