rafaelquintanilha · December 16, 2024 23:34
diff --git a/gemini_demo.py b/gemini_demo.py
 import os
 import re
 import time
 from typing import List, Literal

 import dotenv
 from bs4 import BeautifulSoup
 from google import genai
 from google.genai import types
 from playwright.sync_api import sync_playwright
 from pydantic import BaseModel

 dotenv.load_dotenv()

 # GH url: https://github.com/googleapis/python-genai
 client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])


 class TickerSentiment(BaseModel):
    ticker: str
    sentiment: Literal["positive", "negative", "neutral"]
    reason: str


 def get_news_sentiment(text: str, ticker: str) -> TickerSentiment:
    prompt = f"""
    You are a financial analyst. You are given a news article and a stock ticker.
    You need to analyze the news article and determine the sentiment of the stock ticker.

    The sentiment can be positive, negative or neutral. Only say it is positive if the news article is about the stock ticker and the news is good for the stock. Only say it is negative if the news article is about the stock ticker and the news is bad for the stock.

    You need to return a JSON object with the following fields:
    - ticker: the stock ticker
    - sentiment: the sentiment of the stock ticker
    - reason: the reason for the sentiment

    The news article is:
    {text}

    The stock ticker is:
    {ticker}
    """  # noqa

    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=prompt,
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=TickerSentiment,
        ),
    )
    return response.text


 def extract_tickers(text: str) -> List[str]:
    """Extract stock tickers from text using regex pattern."""
    # Pattern for Brazilian stock tickers (4-6 characters followed by 1-2 numbers)
    pattern = r"\b[A-Z]{4,6}[0-9]{1,2}\b"
    return list(set(re.findall(pattern, text)))


 # Use playwright to extract the text from the news article
 def extract_news_text(url: str) -> str:
    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)
        text = page.content()
        soup = BeautifulSoup(text, "html.parser")

        if "tradingview.com" in url or "infomoney.com.br" in url:
            # get element with tag <article> and parse with BeautifulSoup
            article = soup.find("article")
            return article.get_text()
        elif "br.investing.com" in url:
            # get div with id article
            article = soup.find("div", id="article")
            return article.get_text()

        raise ValueError(f"Unknown provider: {url}")


 def get_sentiments_for_url(url: str) -> List[TickerSentiment]:
    print(f"Getting sentiments for url: {url}")
    text = extract_news_text(url)

    tickers = extract_tickers(text)
    print(f"Found tickers: {tickers}")
    sentiments = []
    for ticker in tickers:
        sentiment = get_news_sentiment(text, ticker)
        sentiments.append(sentiment)
        print(f"Sentiment for {ticker}: {sentiment}")
    return sentiments


 def parse_tradingview_news() -> None:
    url = "https://www.tradingview.com/markets/stocks-brazil/news/"

    soup = None

    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)

        # wait for 2 seconds
        time.sleep(2)

        # open with beautifulsoup
        soup = BeautifulSoup(page.content(), "html.parser")

        browser.close()

    # get the element with class js-news-category-page-container
    container = soup.find("div", class_="js-news-category-page-container")

    # get all anchor tags <a> within the container
    anchors = container.find_all("a")
    all_news_urls = []
    for anchor in anchors:
        href = anchor["href"]
        all_news_urls.append(f"https://www.tradingview.com{href}")

    # remove the last url
    all_news_urls.pop()

    print(f"Found {len(all_news_urls)} news urls")

    for news_url in all_news_urls:
        get_sentiments_for_url(news_url)


 # Example with Tradingview:
 # url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L2N3MZ0QM:0-brazil-s-ecorodovias-wins-highway-concession-in-sao-paulo-shares-plunge/"

 url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L1N3NH0VC:0-brazil-s-embraer-sells-12-a-29n-super-tucano-aircrafts-to-portugal/"

 # Example with Infomoney:
 # url = "https://www.infomoney.com.br/mercados/luiz-barsi-neto-licoes-historias-caso-oi-no-mercado-financeiro/"
 get_sentiments_for_url(url)
	import os
	import re
	import time
	from typing import List, Literal

	import dotenv
	from bs4 import BeautifulSoup
	from google import genai
	from google.genai import types
	from playwright.sync_api import sync_playwright
	from pydantic import BaseModel

	dotenv.load_dotenv()

	# GH url: https://github.com/googleapis/python-genai
	client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])


	class TickerSentiment(BaseModel):
	ticker: str
	sentiment: Literal["positive", "negative", "neutral"]
	reason: str


	def get_news_sentiment(text: str, ticker: str) -> TickerSentiment:
	prompt = f"""
	You are a financial analyst. You are given a news article and a stock ticker.
	You need to analyze the news article and determine the sentiment of the stock ticker.

	The sentiment can be positive, negative or neutral. Only say it is positive if the news article is about the stock ticker and the news is good for the stock. Only say it is negative if the news article is about the stock ticker and the news is bad for the stock.

	You need to return a JSON object with the following fields:
	- ticker: the stock ticker
	- sentiment: the sentiment of the stock ticker
	- reason: the reason for the sentiment

	The news article is:
	{text}

	The stock ticker is:
	{ticker}
	""" # noqa

	response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=prompt,
	config=types.GenerateContentConfig(
	response_mime_type="application/json",
	response_schema=TickerSentiment,
	),
	)
	return response.text


	def extract_tickers(text: str) -> List[str]:
	"""Extract stock tickers from text using regex pattern."""
	# Pattern for Brazilian stock tickers (4-6 characters followed by 1-2 numbers)
	pattern = r"\b[A-Z]{4,6}[0-9]{1,2}\b"
	return list(set(re.findall(pattern, text)))


	# Use playwright to extract the text from the news article
	def extract_news_text(url: str) -> str:
	with sync_playwright() as p:
	browser = p.chromium.launch()
	page = browser.new_page()
	page.goto(url)
	text = page.content()
	soup = BeautifulSoup(text, "html.parser")

	if "tradingview.com" in url or "infomoney.com.br" in url:
	# get element with tag <article> and parse with BeautifulSoup
	article = soup.find("article")
	return article.get_text()
	elif "br.investing.com" in url:
	# get div with id article
	article = soup.find("div", id="article")
	return article.get_text()

	raise ValueError(f"Unknown provider: {url}")


	def get_sentiments_for_url(url: str) -> List[TickerSentiment]:
	print(f"Getting sentiments for url: {url}")
	text = extract_news_text(url)

	tickers = extract_tickers(text)
	print(f"Found tickers: {tickers}")
	sentiments = []
	for ticker in tickers:
	sentiment = get_news_sentiment(text, ticker)
	sentiments.append(sentiment)
	print(f"Sentiment for {ticker}: {sentiment}")
	return sentiments


	def parse_tradingview_news() -> None:
	url = "https://www.tradingview.com/markets/stocks-brazil/news/"

	soup = None

	with sync_playwright() as p:
	browser = p.chromium.launch()
	page = browser.new_page()
	page.goto(url)

	# wait for 2 seconds
	time.sleep(2)

	# open with beautifulsoup
	soup = BeautifulSoup(page.content(), "html.parser")

	browser.close()

	# get the element with class js-news-category-page-container
	container = soup.find("div", class_="js-news-category-page-container")

	# get all anchor tags <a> within the container
	anchors = container.find_all("a")
	all_news_urls = []
	for anchor in anchors:
	href = anchor["href"]
	all_news_urls.append(f"https://www.tradingview.com{href}")

	# remove the last url
	all_news_urls.pop()

	print(f"Found {len(all_news_urls)} news urls")

	for news_url in all_news_urls:
	get_sentiments_for_url(news_url)


	# Example with Tradingview:
	# url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L2N3MZ0QM:0-brazil-s-ecorodovias-wins-highway-concession-in-sao-paulo-shares-plunge/"

	url = "https://www.tradingview.com/news/reuters.com,2024:newsml_L1N3NH0VC:0-brazil-s-embraer-sells-12-a-29n-super-tucano-aircrafts-to-portugal/"

	# Example with Infomoney:
	# url = "https://www.infomoney.com.br/mercados/luiz-barsi-neto-licoes-historias-caso-oi-no-mercado-financeiro/"
	get_sentiments_for_url(url)