Skip to content

Instantly share code, notes, and snippets.

@UBarney
Created June 19, 2025 02:29
Show Gist options
  • Save UBarney/9dcbf304e65f061d3352b34abd0f0e05 to your computer and use it in GitHub Desktop.
Save UBarney/9dcbf304e65f061d3352b34abd0f0e05 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys
import subprocess
import time
import re
import os
from statistics import mean
def parse_sql_file(sql_file):
"""Parse SQL file and return list of SQL statements"""
try:
with open(sql_file, 'r', encoding='utf-8') as f:
content = f.read()
# Split SQL statements by newlines and filter empty statements
sqls = [sql.strip() for sql in content.split('\n') if sql.strip()]
return sqls
except FileNotFoundError:
print(f"Error: SQL file not found: {sql_file}")
sys.exit(1)
except Exception as e:
print(f"Error: Failed to read SQL file - {e}")
sys.exit(1)
def execute_binary(bin_path, sql):
"""Execute binary file and return execution time"""
cmd = [bin_path, '--maxrows', '1', '-c', sql]
try:
start_time = time.time()
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # 5min timeout
end_time = time.time()
if result.returncode != 0:
print(f"Warning: Command execution failed - {' '.join(cmd)}")
print(f"Error output: {result.stderr}")
return None
# Extract "Elapsed X seconds" from output
elapsed_match = re.search(r'Elapsed\s+([\d.]+)\s+seconds?\.', result.stdout)
if elapsed_match:
return float(elapsed_match.group(1))
else:
# If no Elapsed info found, use actual runtime
return end_time - start_time
except subprocess.TimeoutExpired:
print(f"Warning: Command execution timeout - {' '.join(cmd)}")
return None
except Exception as e:
print(f"Error: Failed to execute command - {e}")
return None
def run_benchmark(base_bin, opt_bin, sql):
"""Run benchmark for single SQL (execute each binary 5 times)"""
base_times = []
opt_times = []
base_name = os.path.basename(base_bin)
opt_name = os.path.basename(opt_bin)
print(f"Testing SQL: {sql[:50]}{'...' if len(sql) > 50 else ''}")
# Execute base_bin 5 times
for i in range(5):
start_time = time.time()
print(f" Running {base_name} #{i+1}...")
elapsed = execute_binary(base_bin, sql)
actual_time = time.time() - start_time
if elapsed is not None:
base_times.append(elapsed)
print(f" Completed in {actual_time:.2f}s")
else:
print(f" Failed after {actual_time:.2f}s")
# Execute opt_bin 5 times
for i in range(5):
start_time = time.time()
print(f" Running {opt_name} #{i+1}...")
elapsed = execute_binary(opt_bin, sql)
actual_time = time.time() - start_time
if elapsed is not None:
opt_times.append(elapsed)
print(f" Completed in {actual_time:.2f}s")
else:
print(f" Failed after {actual_time:.2f}s")
# Return mean times
base_mean = mean(base_times) if base_times else None
opt_mean = mean(opt_times) if opt_times else None
return base_mean, opt_mean
def calculate_improvement(base_time, opt_time):
"""Calculate performance improvement percentage"""
if base_time is None or opt_time is None:
return "N/A"
if base_time == 0:
return "N/A"
improvement = ((base_time - opt_time) / base_time) * 100
if improvement > 0:
return f"+{improvement:.2f}%"
else:
return f"{improvement:.2f}%"
def generate_markdown_table(results, base_bin, opt_bin):
"""Generate Markdown table"""
print("\n" + "="*80)
print("Benchmark Results")
print("="*80)
base_name = os.path.basename(base_bin)
opt_name = os.path.basename(opt_bin)
table = [f"| ID | SQL | {base_name} Time(s) | {opt_name} Time(s) | Performance Change |"]
table.append("|----|-----|-------------|------------|-------------------|")
for idx, (sql, base_time, opt_time, improvement) in enumerate(results, 1):
# Use full SQL without truncation
display_sql = sql.replace("|", "\\|") # Escape pipe characters
base_str = f"{base_time:.3f}" if base_time is not None else "Failed"
opt_str = f"{opt_time:.3f}" if opt_time is not None else "Failed"
table.append(f"| {idx} | {display_sql} | {base_str} | {opt_str} | {improvement} |")
return "\n".join(table)
def main():
if len(sys.argv) != 4:
print("Usage: ./bench.py base_bin opt_bin a.sql")
print(" base_bin: Path to baseline binary file")
print(" opt_bin: Path to optimized binary file")
print(" a.sql: SQL file containing SQL statements")
sys.exit(1)
base_bin = sys.argv[1]
opt_bin = sys.argv[2]
sql_file = sys.argv[3]
# Check if binary files exist
if not os.path.exists(base_bin):
print(f"Error: base_bin file not found: {base_bin}")
sys.exit(1)
if not os.path.exists(opt_bin):
print(f"Error: opt_bin file not found: {opt_bin}")
sys.exit(1)
# Parse SQL file
sqls = parse_sql_file(sql_file)
print(f"Found {len(sqls)} SQL statements")
if not sqls:
print("Error: No valid SQL statements found in SQL file")
sys.exit(1)
# Run benchmarks
results = []
for i, sql in enumerate(sqls, 1):
print(f"\n[{i}/{len(sqls)}] Starting test...")
base_time, opt_time = run_benchmark(base_bin, opt_bin, sql)
improvement = calculate_improvement(base_time, opt_time)
results.append((sql, base_time, opt_time, improvement))
# Generate and output results table
markdown_table = generate_markdown_table(results, base_bin, opt_bin)
print(f"\n{markdown_table}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment