Last active
April 27, 2026 19:49
-
-
Save anjohan/9b60eef8faed7d99ea6405ab52657e01 to your computer and use it in GitHub Desktop.
MRS Spring 2026 AI Scheduler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Script for going through all talks at MRS Spring 2026 and using AI to evaluate whether | |
| a talk is interesting or not. The script generates a .ics calendar file. | |
| This uses the Cerebras Cloud SDK as the AI endpoint, since this was the easiest | |
| to get running for me. Each run costs ~$0.25. | |
| This script was mostly written by Gemini 3.1 Pro within a few iterations. | |
| STEP 1: | |
| Download abstract book from https://www.mrs.org/docs/default-source/meetings-events/spring-meetings/2026/2026-mrs-spring-meeting-program.pdf | |
| STEP 2: | |
| Install PDF-to-markdown converter and run it (takes ~5 minutes): | |
| pip install markitdown[pdf] | |
| markitdown mrs.pdf > mrs.md | |
| STEP 3: | |
| Install Cerebras Cloud: | |
| pip install cerebras-cloud-sdk | |
| Sign up for cloud.cerebras.ai. | |
| Go to the billing page and select "pay as you go", | |
| and add $10 of value, otherwise you run out of tokens per minute/day. | |
| Get an API key (I think it changes when you switch from free tier). | |
| STEP 4: | |
| Edit the prompt_intro variable below with your interests. | |
| The example below finds 235 talks. | |
| STEP 5: | |
| Run this script (2-3 minutes) with your API key, which will produce schedule.ics | |
| CEREBRAS_API_KEY=YOURKEYHERE python mrsscheduler.py | |
| STEP 6: | |
| Import schedule.ics in your favorite calendar software | |
| Make a separate calendar, to make the events easy to | |
| delete in case your are not happy with the result. | |
| If using Google Calendar, make a separate MRS calendar, | |
| then import the events to that calendar. | |
| (Warning: It will by default add it to your main calendar.) | |
| """ | |
| import os | |
| import re | |
| import uuid | |
| import multiprocessing | |
| from tqdm import tqdm | |
| from datetime import datetime, timedelta | |
| from cerebras.cloud.sdk import Cerebras | |
| client = Cerebras( | |
| api_key=os.environ.get("CEREBRAS_API_KEY") | |
| ) | |
| # CONFIGURATION | |
| num_workers = 8 # within 1k requests/minute rate limit | |
| input_file = "mrs.md" | |
| output_file = "schedule.ics" | |
| prompt_intro = """I am interested methods development and computational aspects of molecular dynamics, atomistic simulations, machine learned interatomic potentials, and high-performance computing. I am not interested in pure application talks without methodological or computational novelty. I am also not interested in machine learning applied to domains that are not related to atomistic simulations. If relevant to ML potentials, I am also interested in computational quantum methods such as DFT.""" | |
| def extract_talks_from_markdown(md_text): | |
| talks =[] | |
| md_text = re.sub(r'2026 MRS Spring Meeting\s+Current as of April 15, 2026', '', md_text) | |
| current_date, current_place, is_poster, current_talk = None, None, False, None | |
| state = 'SEARCHING' | |
| session_re = re.compile(r"^SESSION\s+[A-Z0-9.]+:\s+(.*)") | |
| talk_re = re.compile(r"^(\d{1,2}:\d{2}\s+[AP]M)\s+([*+^]*[A-Z0-9]+\.\d+\s*\.\d+)") | |
| break_re = re.compile(r"^(\d{1,2}:\d{2}\s+[AP]M)\s+(BREAK|PANEL DISCUSSION)") | |
| # Explicit regex to identify the known date bounds of the conference | |
| date_pattern = re.compile(r"^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday).*?(April|May)\s+\d{1,2},\s+2026") | |
| lines = md_text.split('\n') | |
| i = 0 | |
| def process_header(header_text): | |
| """Treats the Title and Authors as one string, just stripping affiliations.""" | |
| header_text = header_text.replace('\n', ' ').strip() | |
| if ';' in header_text: | |
| parts = header_text.split(';', 1) | |
| title_authors = parts[0].strip() | |
| affiliations = parts[1].strip() | |
| else: | |
| title_authors = header_text | |
| affiliations = "" | |
| return title_authors, "", affiliations | |
| while i < len(lines): | |
| line = lines[i].strip() | |
| if not line: | |
| if state == 'HEADER' and current_talk: | |
| t, a, aff = process_header(current_talk['raw_header']) | |
| current_talk['title'], current_talk['authors'], current_talk['affiliations'] = t, a, aff | |
| state = 'ABSTRACT' | |
| elif state == 'ABSTRACT' and current_talk: | |
| if current_talk['abstract'] and not current_talk['abstract'].endswith('\n\n'): | |
| current_talk['abstract'] += '\n\n' | |
| i += 1 | |
| continue | |
| match_session = session_re.match(line) | |
| if match_session: | |
| if current_talk and not is_poster: | |
| # Session ended. Current talk is last in session, end time remains None. | |
| current_talk['abstract'] = current_talk['abstract'].strip() | |
| talks.append(current_talk) | |
| current_talk = None | |
| is_poster = "Poster" in match_session.group(1).strip() | |
| # Look ahead for a valid date line | |
| current_date, current_place = None, None | |
| for j in range(i + 1, min(i + 10, len(lines))): | |
| lookahead_line = lines[j].strip() | |
| if date_pattern.match(lookahead_line): | |
| current_date = lookahead_line | |
| # The next non-empty, non-event line is generally the location | |
| for k in range(j + 1, min(j + 5, len(lines))): | |
| place_line = lines[k].strip() | |
| if place_line and not talk_re.match(place_line) and not break_re.match(place_line): | |
| current_place = place_line | |
| break | |
| break | |
| state = 'SEARCHING' | |
| i += 1 | |
| continue | |
| match_break = break_re.match(line) | |
| if match_break: | |
| time_str, break_type = match_break.groups() | |
| if current_talk and not is_poster: | |
| current_talk['end_time_str'] = time_str.strip() # The break signifies the end of the previous talk | |
| current_talk['abstract'] = current_talk['abstract'].strip() | |
| talks.append(current_talk) | |
| current_talk = None | |
| state = 'SEARCHING' | |
| i += 1 | |
| continue | |
| match_talk = talk_re.match(line) | |
| if match_talk: | |
| time_str, talk_id = match_talk.groups() | |
| if current_talk and not is_poster: | |
| current_talk['end_time_str'] = time_str.strip() # The new talk signifies the end of the previous talk | |
| current_talk['abstract'] = current_talk['abstract'].strip() | |
| talks.append(current_talk) | |
| if not is_poster: | |
| current_talk = { | |
| 'talk_id': talk_id.strip(), | |
| 'time': time_str.strip(), | |
| 'end_time_str': None, # To be dynamically populated | |
| 'date': current_date, | |
| 'place': current_place, | |
| 'title': '', 'authors': '', 'affiliations': '', 'raw_header': '', 'abstract': '' | |
| } | |
| state = 'HEADER' | |
| else: | |
| current_talk, state = None, 'SEARCHING' | |
| i += 1 | |
| continue | |
| if not is_poster and current_talk: | |
| if state == 'HEADER': current_talk['raw_header'] += line + " " | |
| elif state == 'ABSTRACT': | |
| if line.startswith("SYMPOSIUM BI") or line.startswith("Symposium Organizers"): | |
| state = 'SEARCHING' | |
| continue | |
| if current_talk['abstract'] and not current_talk['abstract'].endswith('\n'): | |
| current_talk['abstract'] += ' ' | |
| current_talk['abstract'] += line | |
| i += 1 | |
| if current_talk and not is_poster: | |
| if state == 'HEADER': | |
| t, a, aff = process_header(current_talk['raw_header']) | |
| current_talk['title'], current_talk['authors'], current_talk['affiliations'] = t, a, aff | |
| current_talk['abstract'] = current_talk['abstract'].strip() | |
| talks.append(current_talk) | |
| for t in talks: t.pop('raw_header', None) | |
| return talks | |
| def is_interesting(talk): | |
| """Evaluate a single talk using AI""" | |
| prompt_template = prompt_intro + "\n" + """ | |
| Answering only yes or no, do you think I would be interested in the following talk? | |
| Title and Authors: {title} | |
| Abstract: {abstract} | |
| """ | |
| prompt = prompt_template.format( | |
| title=talk['title'], | |
| abstract=talk['abstract'] | |
| ) | |
| try: | |
| completion = client.chat.completions.create( | |
| messages=[{"role":"user","content":prompt}], | |
| model="llama3.1-8b", | |
| max_completion_tokens=1024, | |
| temperature=0.2, | |
| top_p=1, | |
| stream=False | |
| ) | |
| answer = completion.choices[0].message.content | |
| # Clean response: lowercased and stripped of punctuation | |
| answer = re.sub(r'[^a-z]', '', answer.strip().lower()) | |
| if answer == 'yes': | |
| return True | |
| except Exception as e: | |
| print(f"Error calling API for talk '{talk['title'][:30]}...': {e}") | |
| return False | |
| def get_interesting_talks(talks): | |
| """Use AI to evaluate each talk based on the user's prompt.""" | |
| interesting =[] | |
| print(f"Evaluating {len(talks)} talks using LLM...") | |
| with multiprocessing.Pool(processes=num_workers) as pool: | |
| results = pool.imap(is_interesting, talks) | |
| for i, result in enumerate(tqdm(results, total=len(talks))): | |
| if result: | |
| interesting.append(talks[i]) | |
| return interesting | |
| def parse_datetime(date_str, time_str): | |
| """Converts the date and time strings into datetime objects.""" | |
| if not date_str or not time_str: | |
| return None | |
| try: | |
| # Extract purely the valid date using regex to avoid artifacts | |
| match = re.search(r'(April|May)\s+\d{1,2},\s+2026', date_str) | |
| if not match: | |
| return None | |
| clean_date = match.group(0) | |
| combined = f"{clean_date} {time_str.strip()}" | |
| return datetime.strptime(combined, "%B %d, %Y %I:%M %p") | |
| except Exception as e: | |
| print(f"Warning: Could not parse datetime ({date_str} {time_str}) - {e}") | |
| return None | |
| def get_start_end_dt(talk): | |
| """Helper function to calculate the duration of the talk.""" | |
| start_dt = parse_datetime(talk['date'], talk['time']) | |
| if not start_dt: | |
| return None, None | |
| if talk.get('end_time_str'): | |
| end_dt = parse_datetime(talk['date'], talk['end_time_str']) | |
| # Safeguard if AM/PM boundary is crossed unpredictably | |
| if not end_dt or end_dt <= start_dt: | |
| end_dt = start_dt + timedelta(minutes=15) | |
| else: | |
| # Default to 15 mins if it's the last talk before a session ends | |
| end_dt = start_dt + timedelta(minutes=15) | |
| return start_dt, end_dt | |
| def fold_ics_line(line): | |
| """Folds lines longer than 75 characters per RFC 5545 specification.""" | |
| folded =[] | |
| while len(line) > 75: | |
| folded.append(line[:75]) | |
| line = " " + line[75:] | |
| folded.append(line) | |
| return "\r\n".join(folded) | |
| def generate_ics(talks, filename="schedule.ics"): | |
| """Generates an iCalendar (.ics) file from the filtered talks.""" | |
| ics_lines =[ | |
| "BEGIN:VCALENDAR", | |
| "VERSION:2.0", | |
| "PRODID:-//User Name//Conference Schedule 1.0//EN", | |
| "CALSCALE:GREGORIAN" | |
| ] | |
| for talk in talks: | |
| start_dt, end_dt = get_start_end_dt(talk) | |
| if not start_dt or not end_dt: | |
| continue | |
| # Datetime formatted for ICS (floating local time) | |
| dtstart = start_dt.strftime("%Y%m%dT%H%M%S") | |
| dtend = end_dt.strftime("%Y%m%dT%H%M%S") | |
| # Escape special characters for valid ICS text using raw strings to prevent SyntaxWarning | |
| escape = lambda s: str(s).replace('\\', r'\\').replace(';', r'\;').replace(',', r'\,').replace('\n', r'\n') | |
| summary = escape(talk['title']) | |
| location = escape(talk['place']) | |
| description = escape(f"Title/Authors: {talk['title']}\n\n{talk['abstract']}") | |
| uid = str(uuid.uuid4()) + "@mrsschedule" | |
| event_lines =[ | |
| "BEGIN:VEVENT", | |
| f"UID:{uid}", | |
| f"DTSTAMP:{datetime.now().strftime('%Y%m%dT%H%M%SZ')}", | |
| f"DTSTART:{dtstart}", | |
| f"DTEND:{dtend}", | |
| f"SUMMARY:{summary}", | |
| f"LOCATION:{location}", | |
| f"DESCRIPTION:{description}", | |
| "END:VEVENT" | |
| ] | |
| for line in event_lines: | |
| ics_lines.append(fold_ics_line(line)) | |
| ics_lines.append("END:VCALENDAR") | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| # ICS files strictly require \r\n line endings | |
| f.write("\r\n".join(ics_lines)) | |
| if __name__ == "__main__": | |
| # 1. Read the markdown content | |
| with open(input_file, "r", encoding="utf-8") as file: | |
| markdown_data = file.read() | |
| # 2. Extract talks | |
| all_talks = extract_talks_from_markdown(markdown_data) | |
| # 3. Filter using LLM API | |
| my_talks = get_interesting_talks(all_talks) | |
| # 4. Print simple format to console | |
| print("\n" + "="*50) | |
| print(" YOUR PERSONALIZED CONFERENCE ITINERARY") | |
| print("="*50 + "\n") | |
| if not my_talks: | |
| print("No talks matching your interests were found.") | |
| else: | |
| for t in my_talks: | |
| start_dt, end_dt = get_start_end_dt(t) | |
| time_display = f"{t['time']} (could not calculate duration)" | |
| if start_dt and end_dt: | |
| duration = int((end_dt - start_dt).total_seconds() / 60) | |
| time_display = f"{start_dt.strftime('%I:%M %p')} - {end_dt.strftime('%I:%M %p')} ({duration} min)" | |
| print(f"π {t['title']}") | |
| print(f"π {t['date']} @ {time_display}") | |
| print(f"π {t['place']}") | |
| print("-" * 50) | |
| # 5. Export to .ics | |
| generate_ics(my_talks, filename=output_file) | |
| print(f"\nβ Created '{output_file}' with {len(my_talks)} events.") | |
| print(" You can double-click this file to import it into your calendar.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment