Skip to content

Instantly share code, notes, and snippets.

View ipeirotis's full-sized avatar

Panos Ipeirotis ipeirotis

View GitHub Profile
@ipeirotis
ipeirotis / download_convai_transcripts.py
Created February 24, 2026 14:17
Download exam transcripts from ElevenLabs
import argparse
import csv
import json
import os
import re
import sys
import time
from dataclasses import asdict, is_dataclass
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
@ipeirotis
ipeirotis / council_grade.py
Created February 24, 2026 14:16
Grading using a council of LLMs
import json
import os
import re
import time
from tqdm import tqdm
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
import json
import os
from getpass import getpass
from pathlib import Path
from typing import Dict, List, Optional, Union
from IPython.display import display, HTML, JSON
try:
SELECT
P1.*
, COALESCE(P1.count-P2.count,P1.count) AS count_without_missing
, P2.count AS count_missing
FROM
itemsets P1
JOIN itemsets P2 ON (P1.email = P2.email)
WHERE
P2.itemset_size>=2 AND
P1.itemset_size = P2.itemset_size - 1 AND
# We use the "CUBE" operator, to calculate the frequency of any
# attribute-value combination.
#
# Notice that we replace the NULL values with "N/A" before the CUBE operator.
# This is to avoid confusion with the way that CUBE uses NULL values to indicate
# "any value" for attribute combinations that do not use the available attributes
# of the cube.
df = (
dataset
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ipeirotis
ipeirotis / baseball_salaries_violin_plots.py
Last active June 30, 2018 03:49
Plot the distribution of salaries in baseball leagues over time
# Long version with full comments
# We want to plot the distribution of salaries in baseball leagues over time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Download a dataset from Lahman's database of baseball statistics
# Panos Ipeirotis, Oct 30 2016
# Just trying to keep my 5yo busy while it is pouring rain outside...
import random
import emoji # https://pypi.python.org/pypi/emoji/
# Selection from http://www.webpagefx.com/tools/emoji-cheat-sheet/
emojis = [':apple:', ':green_apple:', ':tangerine:', ':lemon:', ':cherries:', ':grapes:', ':watermelon:', ':strawberry:',
':peach:', ':melon:', ':banana:', ':pear:', ':pineapple:', ':sweet_potato:', ':eggplant:', ':tomato:', ':corn:']
@ipeirotis
ipeirotis / MTurk_Cohort_Analysis.tsv
Created February 29, 2016 15:30
Data for Mechanical Turk Cohort Analysis
firstSeen lastSeen cnt
2014-05 2014-05 882
2014-05 2014-06 255
2014-05 2014-07 108
2014-05 2014-08 93
2014-05 2014-09 68
2014-05 2014-10 44
2014-05 2014-11 59
2014-05 2014-12 35
2014-05 2015-01 33
### Cohort Analysis
import matplotlib.pyplot as plt
# Connect to the BigQuery API
from googleapiclient.discovery import build
from oauth2client import client
credentials = client._get_application_default_credential_from_file('client_secrets.json')
credentials = credentials.create_scoped('https://www.googleapis.com/auth/bigquery')