Created
December 16, 2024 23:34
-
-
Save rafaelquintanilha/9321ae0f9a5878350d6ae0ab6e5f84f8 to your computer and use it in GitHub Desktop.
Use Google Gemini Flash 2.0 for Sentiment Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import time | |
from typing import List, Literal | |
import dotenv | |
from bs4 import BeautifulSoup | |
from google import genai | |
from google.genai import types | |
from playwright.sync_api import sync_playwright | |
from pydantic import BaseModel | |
dotenv.load_dotenv() | |
# GH url: https://github.com/googleapis/python-genai | |
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"]) | |
class TickerSentiment(BaseModel): | |
ticker: str | |
sentiment: Literal["positive", "negative", "neutral"] | |
reason: str | |
def get_news_sentiment(text: str, ticker: str) -> TickerSentiment: | |
prompt = f""" | |
You are a financial analyst. You are given a news article and a stock ticker. | |
You need to analyze the news article and determine the sentiment of the stock ticker. | |
The sentiment can be positive, negative or neutral. Only say it is positive if the news article is about the stock ticker and the news is good for the stock. Only say it is negative if the news article is about the stock ticker and the news is bad for the stock. | |
You need to return a JSON object with the following fields: | |
- ticker: the stock ticker | |
- sentiment: the sentiment of the stock ticker | |
- reason: the reason for the sentiment | |
The news article is: | |
{text} | |
The stock ticker is: | |
{ticker} | |
""" # noqa | |
response = client.models.generate_content( | |
model="gemini-2.0-flash-exp", | |
contents=prompt, | |
config=types.GenerateContentConfig( | |
response_mime_type="application/json", | |
response_schema=TickerSentiment, | |
), | |
) | |
return response.text | |
def extract_tickers(text: str) -> List[str]: | |
"""Extract stock tickers from text using regex pattern.""" | |
# Pattern for Brazilian stock tickers (4-6 characters followed by 1-2 numbers) | |
pattern = r"\b[A-Z]{4,6}[0-9]{1,2}\b" | |
return list(set(re.findall(pattern, text))) | |
# Use playwright to extract the text from the news article | |
def extract_news_text(url: str) -> str: | |
with sync_playwright() as p: | |
browser = p.chromium.launch() | |
page = browser.new_page() | |
page.goto(url) | |
text = page.content() | |
soup = BeautifulSoup(text, "html.parser") | |
if "tradingview.com" in url or "infomoney.com.br" in url: | |
# get element with tag <article> and parse with BeautifulSoup | |
article = soup.find("article") | |
return article.get_text() | |
elif "br.investing.com" in url: | |
# get div with id article | |
article = soup.find("div", id="article") | |
return article.get_text() | |
raise ValueError(f"Unknown provider: {url}") | |
def get_sentiments_for_url(url: str) -> List[TickerSentiment]: | |
print(f"Getting sentiments for url: {url}") | |
text = extract_news_text(url) | |
tickers = extract_tickers(text) | |
print(f"Found tickers: {tickers}") | |
sentiments = [] | |
for ticker in tickers: | |
sentiment = get_news_sentiment(text, ticker) | |
sentiments.append(sentiment) | |
print(f"Sentiment for {ticker}: {sentiment}") | |
return sentiments | |
def parse_tradingview_news() -> None: | |
url = "https://www.tradingview.com/markets/stocks-brazil/news/" | |
soup = None | |
with sync_playwright() as p: | |
browser = p.chromium.launch() | |
page = browser.new_page() | |
page.goto(url) | |
# wait for 2 seconds | |
time.sleep(2) | |
# open with beautifulsoup | |
soup = BeautifulSoup(page.content(), "html.parser") | |
browser.close() | |
# get the element with class js-news-category-page-container | |
container = soup.find("div", class_="js-news-category-page-container") | |
# get all anchor tags <a> within the container | |
anchors = container.find_all("a") | |
all_news_urls = [] | |
for anchor in anchors: | |
href = anchor["href"] | |
all_news_urls.append(f"https://www.tradingview.com{href}") | |
# remove the last url | |
all_news_urls.pop() | |
print(f"Found {len(all_news_urls)} news urls") | |
for news_url in all_news_urls: | |
get_sentiments_for_url(news_url) | |
# Example with Tradingview: | |
# url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L2N3MZ0QM:0-brazil-s-ecorodovias-wins-highway-concession-in-sao-paulo-shares-plunge/" | |
url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L1N3NH0VC:0-brazil-s-embraer-sells-12-a-29n-super-tucano-aircrafts-to-portugal/" | |
# Example with Infomoney: | |
# url = "https://www.infomoney.com.br/mercados/luiz-barsi-neto-licoes-historias-caso-oi-no-mercado-financeiro/" | |
get_sentiments_for_url(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment