Skip to content

Instantly share code, notes, and snippets.

@anjohan
Last active April 27, 2026 19:49
Show Gist options
  • Select an option

  • Save anjohan/9b60eef8faed7d99ea6405ab52657e01 to your computer and use it in GitHub Desktop.

Select an option

Save anjohan/9b60eef8faed7d99ea6405ab52657e01 to your computer and use it in GitHub Desktop.
MRS Spring 2026 AI Scheduler
"""
Script for going through all talks at MRS Spring 2026 and using AI to evaluate whether
a talk is interesting or not. The script generates a .ics calendar file.
This uses the Cerebras Cloud SDK as the AI endpoint, since this was the easiest
to get running for me. Each run costs ~$0.25.
This script was mostly written by Gemini 3.1 Pro within a few iterations.
STEP 1:
Download abstract book from https://www.mrs.org/docs/default-source/meetings-events/spring-meetings/2026/2026-mrs-spring-meeting-program.pdf
STEP 2:
Install PDF-to-markdown converter and run it (takes ~5 minutes):
pip install markitdown[pdf]
markitdown mrs.pdf > mrs.md
STEP 3:
Install Cerebras Cloud:
pip install cerebras-cloud-sdk
Sign up for cloud.cerebras.ai.
Go to the billing page and select "pay as you go",
and add $10 of value, otherwise you run out of tokens per minute/day.
Get an API key (I think it changes when you switch from free tier).
STEP 4:
Edit the prompt_intro variable below with your interests.
The example below finds 235 talks.
STEP 5:
Run this script (2-3 minutes) with your API key, which will produce schedule.ics
CEREBRAS_API_KEY=YOURKEYHERE python mrsscheduler.py
STEP 6:
Import schedule.ics in your favorite calendar software
Make a separate calendar, to make the events easy to
delete in case your are not happy with the result.
If using Google Calendar, make a separate MRS calendar,
then import the events to that calendar.
(Warning: It will by default add it to your main calendar.)
"""
import os
import re
import uuid
import multiprocessing
from tqdm import tqdm
from datetime import datetime, timedelta
from cerebras.cloud.sdk import Cerebras
client = Cerebras(
api_key=os.environ.get("CEREBRAS_API_KEY")
)
# CONFIGURATION
num_workers = 8 # within 1k requests/minute rate limit
input_file = "mrs.md"
output_file = "schedule.ics"
prompt_intro = """I am interested methods development and computational aspects of molecular dynamics, atomistic simulations, machine learned interatomic potentials, and high-performance computing. I am not interested in pure application talks without methodological or computational novelty. I am also not interested in machine learning applied to domains that are not related to atomistic simulations. If relevant to ML potentials, I am also interested in computational quantum methods such as DFT."""
def extract_talks_from_markdown(md_text):
talks =[]
md_text = re.sub(r'2026 MRS Spring Meeting\s+Current as of April 15, 2026', '', md_text)
current_date, current_place, is_poster, current_talk = None, None, False, None
state = 'SEARCHING'
session_re = re.compile(r"^SESSION\s+[A-Z0-9.]+:\s+(.*)")
talk_re = re.compile(r"^(\d{1,2}:\d{2}\s+[AP]M)\s+([*+^]*[A-Z0-9]+\.\d+\s*\.\d+)")
break_re = re.compile(r"^(\d{1,2}:\d{2}\s+[AP]M)\s+(BREAK|PANEL DISCUSSION)")
# Explicit regex to identify the known date bounds of the conference
date_pattern = re.compile(r"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday).*?(April|May)\s+\d{1,2},\s+2026")
lines = md_text.split('\n')
i = 0
def process_header(header_text):
"""Treats the Title and Authors as one string, just stripping affiliations."""
header_text = header_text.replace('\n', ' ').strip()
if ';' in header_text:
parts = header_text.split(';', 1)
title_authors = parts[0].strip()
affiliations = parts[1].strip()
else:
title_authors = header_text
affiliations = ""
return title_authors, "", affiliations
while i < len(lines):
line = lines[i].strip()
if not line:
if state == 'HEADER' and current_talk:
t, a, aff = process_header(current_talk['raw_header'])
current_talk['title'], current_talk['authors'], current_talk['affiliations'] = t, a, aff
state = 'ABSTRACT'
elif state == 'ABSTRACT' and current_talk:
if current_talk['abstract'] and not current_talk['abstract'].endswith('\n\n'):
current_talk['abstract'] += '\n\n'
i += 1
continue
match_session = session_re.match(line)
if match_session:
if current_talk and not is_poster:
# Session ended. Current talk is last in session, end time remains None.
current_talk['abstract'] = current_talk['abstract'].strip()
talks.append(current_talk)
current_talk = None
is_poster = "Poster" in match_session.group(1).strip()
# Look ahead for a valid date line
current_date, current_place = None, None
for j in range(i + 1, min(i + 10, len(lines))):
lookahead_line = lines[j].strip()
if date_pattern.match(lookahead_line):
current_date = lookahead_line
# The next non-empty, non-event line is generally the location
for k in range(j + 1, min(j + 5, len(lines))):
place_line = lines[k].strip()
if place_line and not talk_re.match(place_line) and not break_re.match(place_line):
current_place = place_line
break
break
state = 'SEARCHING'
i += 1
continue
match_break = break_re.match(line)
if match_break:
time_str, break_type = match_break.groups()
if current_talk and not is_poster:
current_talk['end_time_str'] = time_str.strip() # The break signifies the end of the previous talk
current_talk['abstract'] = current_talk['abstract'].strip()
talks.append(current_talk)
current_talk = None
state = 'SEARCHING'
i += 1
continue
match_talk = talk_re.match(line)
if match_talk:
time_str, talk_id = match_talk.groups()
if current_talk and not is_poster:
current_talk['end_time_str'] = time_str.strip() # The new talk signifies the end of the previous talk
current_talk['abstract'] = current_talk['abstract'].strip()
talks.append(current_talk)
if not is_poster:
current_talk = {
'talk_id': talk_id.strip(),
'time': time_str.strip(),
'end_time_str': None, # To be dynamically populated
'date': current_date,
'place': current_place,
'title': '', 'authors': '', 'affiliations': '', 'raw_header': '', 'abstract': ''
}
state = 'HEADER'
else:
current_talk, state = None, 'SEARCHING'
i += 1
continue
if not is_poster and current_talk:
if state == 'HEADER': current_talk['raw_header'] += line + " "
elif state == 'ABSTRACT':
if line.startswith("SYMPOSIUM BI") or line.startswith("Symposium Organizers"):
state = 'SEARCHING'
continue
if current_talk['abstract'] and not current_talk['abstract'].endswith('\n'):
current_talk['abstract'] += ' '
current_talk['abstract'] += line
i += 1
if current_talk and not is_poster:
if state == 'HEADER':
t, a, aff = process_header(current_talk['raw_header'])
current_talk['title'], current_talk['authors'], current_talk['affiliations'] = t, a, aff
current_talk['abstract'] = current_talk['abstract'].strip()
talks.append(current_talk)
for t in talks: t.pop('raw_header', None)
return talks
def is_interesting(talk):
"""Evaluate a single talk using AI"""
prompt_template = prompt_intro + "\n" + """
Answering only yes or no, do you think I would be interested in the following talk?
Title and Authors: {title}
Abstract: {abstract}
"""
prompt = prompt_template.format(
title=talk['title'],
abstract=talk['abstract']
)
try:
completion = client.chat.completions.create(
messages=[{"role":"user","content":prompt}],
model="llama3.1-8b",
max_completion_tokens=1024,
temperature=0.2,
top_p=1,
stream=False
)
answer = completion.choices[0].message.content
# Clean response: lowercased and stripped of punctuation
answer = re.sub(r'[^a-z]', '', answer.strip().lower())
if answer == 'yes':
return True
except Exception as e:
print(f"Error calling API for talk '{talk['title'][:30]}...': {e}")
return False
def get_interesting_talks(talks):
"""Use AI to evaluate each talk based on the user's prompt."""
interesting =[]
print(f"Evaluating {len(talks)} talks using LLM...")
with multiprocessing.Pool(processes=num_workers) as pool:
results = pool.imap(is_interesting, talks)
for i, result in enumerate(tqdm(results, total=len(talks))):
if result:
interesting.append(talks[i])
return interesting
def parse_datetime(date_str, time_str):
"""Converts the date and time strings into datetime objects."""
if not date_str or not time_str:
return None
try:
# Extract purely the valid date using regex to avoid artifacts
match = re.search(r'(April|May)\s+\d{1,2},\s+2026', date_str)
if not match:
return None
clean_date = match.group(0)
combined = f"{clean_date} {time_str.strip()}"
return datetime.strptime(combined, "%B %d, %Y %I:%M %p")
except Exception as e:
print(f"Warning: Could not parse datetime ({date_str} {time_str}) - {e}")
return None
def get_start_end_dt(talk):
"""Helper function to calculate the duration of the talk."""
start_dt = parse_datetime(talk['date'], talk['time'])
if not start_dt:
return None, None
if talk.get('end_time_str'):
end_dt = parse_datetime(talk['date'], talk['end_time_str'])
# Safeguard if AM/PM boundary is crossed unpredictably
if not end_dt or end_dt <= start_dt:
end_dt = start_dt + timedelta(minutes=15)
else:
# Default to 15 mins if it's the last talk before a session ends
end_dt = start_dt + timedelta(minutes=15)
return start_dt, end_dt
def fold_ics_line(line):
"""Folds lines longer than 75 characters per RFC 5545 specification."""
folded =[]
while len(line) > 75:
folded.append(line[:75])
line = " " + line[75:]
folded.append(line)
return "\r\n".join(folded)
def generate_ics(talks, filename="schedule.ics"):
"""Generates an iCalendar (.ics) file from the filtered talks."""
ics_lines =[
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//User Name//Conference Schedule 1.0//EN",
"CALSCALE:GREGORIAN"
]
for talk in talks:
start_dt, end_dt = get_start_end_dt(talk)
if not start_dt or not end_dt:
continue
# Datetime formatted for ICS (floating local time)
dtstart = start_dt.strftime("%Y%m%dT%H%M%S")
dtend = end_dt.strftime("%Y%m%dT%H%M%S")
# Escape special characters for valid ICS text using raw strings to prevent SyntaxWarning
escape = lambda s: str(s).replace('\\', r'\\').replace(';', r'\;').replace(',', r'\,').replace('\n', r'\n')
summary = escape(talk['title'])
location = escape(talk['place'])
description = escape(f"Title/Authors: {talk['title']}\n\n{talk['abstract']}")
uid = str(uuid.uuid4()) + "@mrsschedule"
event_lines =[
"BEGIN:VEVENT",
f"UID:{uid}",
f"DTSTAMP:{datetime.now().strftime('%Y%m%dT%H%M%SZ')}",
f"DTSTART:{dtstart}",
f"DTEND:{dtend}",
f"SUMMARY:{summary}",
f"LOCATION:{location}",
f"DESCRIPTION:{description}",
"END:VEVENT"
]
for line in event_lines:
ics_lines.append(fold_ics_line(line))
ics_lines.append("END:VCALENDAR")
with open(filename, 'w', encoding='utf-8') as f:
# ICS files strictly require \r\n line endings
f.write("\r\n".join(ics_lines))
if __name__ == "__main__":
# 1. Read the markdown content
with open(input_file, "r", encoding="utf-8") as file:
markdown_data = file.read()
# 2. Extract talks
all_talks = extract_talks_from_markdown(markdown_data)
# 3. Filter using LLM API
my_talks = get_interesting_talks(all_talks)
# 4. Print simple format to console
print("\n" + "="*50)
print(" YOUR PERSONALIZED CONFERENCE ITINERARY")
print("="*50 + "\n")
if not my_talks:
print("No talks matching your interests were found.")
else:
for t in my_talks:
start_dt, end_dt = get_start_end_dt(t)
time_display = f"{t['time']} (could not calculate duration)"
if start_dt and end_dt:
duration = int((end_dt - start_dt).total_seconds() / 60)
time_display = f"{start_dt.strftime('%I:%M %p')} - {end_dt.strftime('%I:%M %p')} ({duration} min)"
print(f"πŸ“Œ {t['title']}")
print(f"πŸ•’ {t['date']} @ {time_display}")
print(f"πŸ“ {t['place']}")
print("-" * 50)
# 5. Export to .ics
generate_ics(my_talks, filename=output_file)
print(f"\nβœ… Created '{output_file}' with {len(my_talks)} events.")
print(" You can double-click this file to import it into your calendar.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment