Skip to content

Instantly share code, notes, and snippets.

@jkbjh
Created July 4, 2025 11:54
Show Gist options
  • Save jkbjh/9a3b944c0165de76405464c27899c085 to your computer and use it in GitHub Desktop.
Save jkbjh/9a3b944c0165de76405464c27899c085 to your computer and use it in GitHub Desktop.
ai-deadlines
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
# Function to parse dates with different formats
def parse_date_flexible(date_str):
for fmt in ("%b %d %Y", "%B %d %Y", "%b %d, %Y", "%B %d, %Y"):
try:
return datetime.strptime(date_str, fmt).date()
except ValueError:
continue
raise ValueError(f"Could not parse date: {date_str}")
# Function to extract abstract deadline date from text
def extract_deadline_date(text):
# Define regex patterns for dates
patterns = [
r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[a-z]*[.,]?\s+\d{1,2}(?:st|nd|rd|th)?[.,]?\s+\d{4}", # Oct 2, 2024
r"\b\d{1,2}(?:st|nd|rd|th)?\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)[.,]?\s+\d{4}", # 2 October 2024
r"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)[.,]?\s+\d{1,2}(?:st|nd|rd|th)?[.,]?\s+\d{4}", # October 2, 2024
r"\b\d{4}-\d{2}-\d{2}", # 2023-12-20
]
# Try all patterns until one matches
for pattern in patterns:
match = re.search(pattern, text)
if match:
date_str = match.group(0)
cleaned = re.sub(r"(st|nd|rd|th|,)", "", date_str).strip() # Remove suffixes and commas
# Try parsing the cleaned string with known formats
for fmt in [
"%b %d %Y", # Oct 2 2024
"%B %d %Y", # October 2 2024
"%d %B %Y", # 2 October 2024
"%Y-%m-%d", # 2023-12-20
]:
try:
return datetime.strptime(cleaned, fmt).date()
except ValueError:
continue
return None # No valid date found
# Step 1: Load the HTML page
url = "https://aideadlin.es/?sub=ML,CV,RO,KR,AP"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# Step 2: Extract conference items
conf_divs = soup.find_all("div", class_="ConfItem")
# Step 3: Parse information
data = []
for div in conf_divs:
try:
title_tag = div.find("span", class_="conf-title")
name = title_tag.get_text(strip=True) if title_tag else None
link_tag = title_tag.find("a") if title_tag else None
page_link = "https://aideadlin.es" + link_tag["href"] if link_tag else None
website_tag = div.find("span", class_="conf-title-icon").find("a")
website_link = website_tag["href"] if website_tag else None
date_span = div.find("span", class_="conf-date")
date_text = date_span.get_text(strip=True).replace("\xa0", " ") if date_span else None
start_date = end_date = None
if date_text:
date_text = date_text.replace("–", "-")
match = re.match(r"([A-Za-z]+) (\d+)[ -]+(\d+), (\d{4})", date_text)
if match:
month, start_day, end_day, year = match.groups()
start_date = parse_date_flexible(f"{month} {start_day} {year}")
end_date = parse_date_flexible(f"{month} {end_day} {year}")
else:
match = re.match(r"([A-Za-z]+ \d+)\s*-\s*([A-Za-z]+ \d+), (\d{4})", date_text)
if match:
month_day_start, month_day_end, year = match.groups()
start_date = parse_date_flexible(f"{month_day_start} {year}")
end_date = parse_date_flexible(f"{month_day_end} {year}")
place_tag = div.find("span", class_="conf-place")
location = place_tag.get_text(strip=True) if place_tag else None
note_div = div.find("div", class_="note")
note = note_div.get_text(strip=True).replace("Note:", "") if note_div else None
# Extract abstract deadline using the new function
abstract_deadline = None
if note:
abstract_deadline = extract_deadline_date(note)
tags = [span["data-sub"] for span in div.find_all("span", class_="conf-sub")]
data.append(
{
"Name": name,
"Detail Page": page_link,
"Website": website_link,
"Start Date": start_date,
"End Date": end_date,
"Location": location,
"Note": note,
"Deadline": abstract_deadline, # Add extracted deadline here
"Tags": tags,
}
)
except Exception as e:
print("\n--- Error parsing the following conference div ---\n")
print(div.prettify())
raise e # Re-raise so you can still break execution if needed
# Step 4: Convert to DataFrame
df = pd.DataFrame(data)
# Show result
print(df.to_markdown())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment