Created
December 9, 2024 05:39
-
-
Save fr0gger/3acd7d8235421c3ca12be2b2d0dfbc26 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Thomas Roccia - @fr0gger_ | |
# Structured IOCs | |
from pydantic import BaseModel | |
from openai import OpenAI | |
import requests | |
from bs4 import BeautifulSoup | |
import json | |
client = OpenAI() | |
class IOC(BaseModel): | |
type: str # Type of IOC (e.g., "IP", "URL", "HASH") | |
value: str # The IOC value | |
context: str # Context in which the IOC appears | |
mitre_id_description: str # The Mitre ATT&CK technique with description | |
action: str # Action or recommandation | |
class IOCExtraction(BaseModel): | |
iocs: list[IOC] | |
def scrape_blog_text(blog_url: str) -> str: | |
""" | |
Scrapes the content of a URL and extracts the main text content. | |
Args: | |
url (str): The URL to scrape. | |
Returns: | |
str: The extracted text content from the URL. | |
""" | |
try: | |
response = requests.get(blog_url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
return soup.get_text(separator="\n", strip=True) | |
except requests.RequestException as e: | |
print(f"Error fetching URL content: {e}") | |
return "" | |
except Exception as e: | |
print(f"Error processing URL content: {e}") | |
return "" | |
def extract_iocs(blog_text): | |
try: | |
completion = client.beta.chat.completions.parse( | |
model="gpt-4o-mini", | |
messages=[ | |
{ | |
"role": "system", | |
"content": ( | |
"You are a threat intel expert. Extract all Indicators of Compromise (IOCs) from the provided text." | |
"IOC can be IP, hashes, url, network, domain, email, file, file path, registry key, threat actor, ransom note, cryptocurrency address, tool, command line, malware name, malware family, tags, target, victimology, mutex, task name, functions, CVE, Vulnerability..." | |
"Format the response as a JSON adhering to the schema. Each IOC must include its type, value, context, and Mitre ATT&CK technique ID and description as well as a recomandation." | |
"Do NOT omit any IOC from the report." | |
), | |
}, | |
{ | |
"role": "user", | |
"content": blog_text, | |
}, | |
], | |
temperature = 0.2, | |
response_format=IOCExtraction, | |
) | |
return completion.choices[0].message.parsed | |
except Exception as e: | |
print(f"Error extracting IOCs: {e}") | |
return None | |
def generate_ioc_report(blog_url): | |
blog_text = scrape_blog_text(blog_url) | |
if not blog_text: | |
print("Failed to scrape blog content.") | |
return | |
ioc_data = extract_iocs(blog_text) | |
if not ioc_data or not ioc_data.iocs: | |
print("No IOCs found.") | |
return | |
try: | |
print("Extracted IOCs:") | |
print(json.dumps(ioc_data.dict(), indent=4)) | |
except Exception as e: | |
print(f"Error displaying IOCs: {e}") | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) != 2: | |
print("Usage: python structuredIOC.py <blog_url>") | |
sys.exit(1) | |
blog_url = sys.argv[1] | |
generate_ioc_report(blog_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment