Skip to content

Instantly share code, notes, and snippets.

@rafaelquintanilha
Created December 16, 2024 23:34
Show Gist options
  • Save rafaelquintanilha/9321ae0f9a5878350d6ae0ab6e5f84f8 to your computer and use it in GitHub Desktop.
Save rafaelquintanilha/9321ae0f9a5878350d6ae0ab6e5f84f8 to your computer and use it in GitHub Desktop.
Use Google Gemini Flash 2.0 for Sentiment Analysis
import os
import re
import time
from typing import List, Literal
import dotenv
from bs4 import BeautifulSoup
from google import genai
from google.genai import types
from playwright.sync_api import sync_playwright
from pydantic import BaseModel
dotenv.load_dotenv()
# GH url: https://github.com/googleapis/python-genai
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
class TickerSentiment(BaseModel):
ticker: str
sentiment: Literal["positive", "negative", "neutral"]
reason: str
def get_news_sentiment(text: str, ticker: str) -> TickerSentiment:
prompt = f"""
You are a financial analyst. You are given a news article and a stock ticker.
You need to analyze the news article and determine the sentiment of the stock ticker.
The sentiment can be positive, negative or neutral. Only say it is positive if the news article is about the stock ticker and the news is good for the stock. Only say it is negative if the news article is about the stock ticker and the news is bad for the stock.
You need to return a JSON object with the following fields:
- ticker: the stock ticker
- sentiment: the sentiment of the stock ticker
- reason: the reason for the sentiment
The news article is:
{text}
The stock ticker is:
{ticker}
""" # noqa
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=TickerSentiment,
),
)
return response.text
def extract_tickers(text: str) -> List[str]:
"""Extract stock tickers from text using regex pattern."""
# Pattern for Brazilian stock tickers (4-6 characters followed by 1-2 numbers)
pattern = r"\b[A-Z]{4,6}[0-9]{1,2}\b"
return list(set(re.findall(pattern, text)))
# Use playwright to extract the text from the news article
def extract_news_text(url: str) -> str:
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(url)
text = page.content()
soup = BeautifulSoup(text, "html.parser")
if "tradingview.com" in url or "infomoney.com.br" in url:
# get element with tag <article> and parse with BeautifulSoup
article = soup.find("article")
return article.get_text()
elif "br.investing.com" in url:
# get div with id article
article = soup.find("div", id="article")
return article.get_text()
raise ValueError(f"Unknown provider: {url}")
def get_sentiments_for_url(url: str) -> List[TickerSentiment]:
print(f"Getting sentiments for url: {url}")
text = extract_news_text(url)
tickers = extract_tickers(text)
print(f"Found tickers: {tickers}")
sentiments = []
for ticker in tickers:
sentiment = get_news_sentiment(text, ticker)
sentiments.append(sentiment)
print(f"Sentiment for {ticker}: {sentiment}")
return sentiments
def parse_tradingview_news() -> None:
url = "https://www.tradingview.com/markets/stocks-brazil/news/"
soup = None
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(url)
# wait for 2 seconds
time.sleep(2)
# open with beautifulsoup
soup = BeautifulSoup(page.content(), "html.parser")
browser.close()
# get the element with class js-news-category-page-container
container = soup.find("div", class_="js-news-category-page-container")
# get all anchor tags <a> within the container
anchors = container.find_all("a")
all_news_urls = []
for anchor in anchors:
href = anchor["href"]
all_news_urls.append(f"https://www.tradingview.com{href}")
# remove the last url
all_news_urls.pop()
print(f"Found {len(all_news_urls)} news urls")
for news_url in all_news_urls:
get_sentiments_for_url(news_url)
# Example with Tradingview:
# url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L2N3MZ0QM:0-brazil-s-ecorodovias-wins-highway-concession-in-sao-paulo-shares-plunge/"
url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L1N3NH0VC:0-brazil-s-embraer-sells-12-a-29n-super-tucano-aircrafts-to-portugal/"
# Example with Infomoney:
# url = "https://www.infomoney.com.br/mercados/luiz-barsi-neto-licoes-historias-caso-oi-no-mercado-financeiro/"
get_sentiments_for_url(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment