Skip to content

Instantly share code, notes, and snippets.

@fr0gger
Created December 9, 2024 05:39
Show Gist options
  • Save fr0gger/3acd7d8235421c3ca12be2b2d0dfbc26 to your computer and use it in GitHub Desktop.
Save fr0gger/3acd7d8235421c3ca12be2b2d0dfbc26 to your computer and use it in GitHub Desktop.
# Thomas Roccia - @fr0gger_
# Structured IOCs
from pydantic import BaseModel
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
import json
client = OpenAI()
class IOC(BaseModel):
type: str # Type of IOC (e.g., "IP", "URL", "HASH")
value: str # The IOC value
context: str # Context in which the IOC appears
mitre_id_description: str # The Mitre ATT&CK technique with description
action: str # Action or recommandation
class IOCExtraction(BaseModel):
iocs: list[IOC]
def scrape_blog_text(blog_url: str) -> str:
"""
Scrapes the content of a URL and extracts the main text content.
Args:
url (str): The URL to scrape.
Returns:
str: The extracted text content from the URL.
"""
try:
response = requests.get(blog_url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
return soup.get_text(separator="\n", strip=True)
except requests.RequestException as e:
print(f"Error fetching URL content: {e}")
return ""
except Exception as e:
print(f"Error processing URL content: {e}")
return ""
def extract_iocs(blog_text):
try:
completion = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": (
"You are a threat intel expert. Extract all Indicators of Compromise (IOCs) from the provided text."
"IOC can be IP, hashes, url, network, domain, email, file, file path, registry key, threat actor, ransom note, cryptocurrency address, tool, command line, malware name, malware family, tags, target, victimology, mutex, task name, functions, CVE, Vulnerability..."
"Format the response as a JSON adhering to the schema. Each IOC must include its type, value, context, and Mitre ATT&CK technique ID and description as well as a recomandation."
"Do NOT omit any IOC from the report."
),
},
{
"role": "user",
"content": blog_text,
},
],
temperature = 0.2,
response_format=IOCExtraction,
)
return completion.choices[0].message.parsed
except Exception as e:
print(f"Error extracting IOCs: {e}")
return None
def generate_ioc_report(blog_url):
blog_text = scrape_blog_text(blog_url)
if not blog_text:
print("Failed to scrape blog content.")
return
ioc_data = extract_iocs(blog_text)
if not ioc_data or not ioc_data.iocs:
print("No IOCs found.")
return
try:
print("Extracted IOCs:")
print(json.dumps(ioc_data.dict(), indent=4))
except Exception as e:
print(f"Error displaying IOCs: {e}")
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python structuredIOC.py <blog_url>")
sys.exit(1)
blog_url = sys.argv[1]
generate_ioc_report(blog_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment