Created
June 19, 2025 02:29
-
-
Save UBarney/9dcbf304e65f061d3352b34abd0f0e05 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import subprocess | |
import time | |
import re | |
import os | |
from statistics import mean | |
def parse_sql_file(sql_file): | |
"""Parse SQL file and return list of SQL statements""" | |
try: | |
with open(sql_file, 'r', encoding='utf-8') as f: | |
content = f.read() | |
# Split SQL statements by newlines and filter empty statements | |
sqls = [sql.strip() for sql in content.split('\n') if sql.strip()] | |
return sqls | |
except FileNotFoundError: | |
print(f"Error: SQL file not found: {sql_file}") | |
sys.exit(1) | |
except Exception as e: | |
print(f"Error: Failed to read SQL file - {e}") | |
sys.exit(1) | |
def execute_binary(bin_path, sql): | |
"""Execute binary file and return execution time""" | |
cmd = [bin_path, '--maxrows', '1', '-c', sql] | |
try: | |
start_time = time.time() | |
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # 5min timeout | |
end_time = time.time() | |
if result.returncode != 0: | |
print(f"Warning: Command execution failed - {' '.join(cmd)}") | |
print(f"Error output: {result.stderr}") | |
return None | |
# Extract "Elapsed X seconds" from output | |
elapsed_match = re.search(r'Elapsed\s+([\d.]+)\s+seconds?\.', result.stdout) | |
if elapsed_match: | |
return float(elapsed_match.group(1)) | |
else: | |
# If no Elapsed info found, use actual runtime | |
return end_time - start_time | |
except subprocess.TimeoutExpired: | |
print(f"Warning: Command execution timeout - {' '.join(cmd)}") | |
return None | |
except Exception as e: | |
print(f"Error: Failed to execute command - {e}") | |
return None | |
def run_benchmark(base_bin, opt_bin, sql): | |
"""Run benchmark for single SQL (execute each binary 5 times)""" | |
base_times = [] | |
opt_times = [] | |
base_name = os.path.basename(base_bin) | |
opt_name = os.path.basename(opt_bin) | |
print(f"Testing SQL: {sql[:50]}{'...' if len(sql) > 50 else ''}") | |
# Execute base_bin 5 times | |
for i in range(5): | |
start_time = time.time() | |
print(f" Running {base_name} #{i+1}...") | |
elapsed = execute_binary(base_bin, sql) | |
actual_time = time.time() - start_time | |
if elapsed is not None: | |
base_times.append(elapsed) | |
print(f" Completed in {actual_time:.2f}s") | |
else: | |
print(f" Failed after {actual_time:.2f}s") | |
# Execute opt_bin 5 times | |
for i in range(5): | |
start_time = time.time() | |
print(f" Running {opt_name} #{i+1}...") | |
elapsed = execute_binary(opt_bin, sql) | |
actual_time = time.time() - start_time | |
if elapsed is not None: | |
opt_times.append(elapsed) | |
print(f" Completed in {actual_time:.2f}s") | |
else: | |
print(f" Failed after {actual_time:.2f}s") | |
# Return mean times | |
base_mean = mean(base_times) if base_times else None | |
opt_mean = mean(opt_times) if opt_times else None | |
return base_mean, opt_mean | |
def calculate_improvement(base_time, opt_time): | |
"""Calculate performance improvement percentage""" | |
if base_time is None or opt_time is None: | |
return "N/A" | |
if base_time == 0: | |
return "N/A" | |
improvement = ((base_time - opt_time) / base_time) * 100 | |
if improvement > 0: | |
return f"+{improvement:.2f}%" | |
else: | |
return f"{improvement:.2f}%" | |
def generate_markdown_table(results, base_bin, opt_bin): | |
"""Generate Markdown table""" | |
print("\n" + "="*80) | |
print("Benchmark Results") | |
print("="*80) | |
base_name = os.path.basename(base_bin) | |
opt_name = os.path.basename(opt_bin) | |
table = [f"| ID | SQL | {base_name} Time(s) | {opt_name} Time(s) | Performance Change |"] | |
table.append("|----|-----|-------------|------------|-------------------|") | |
for idx, (sql, base_time, opt_time, improvement) in enumerate(results, 1): | |
# Use full SQL without truncation | |
display_sql = sql.replace("|", "\\|") # Escape pipe characters | |
base_str = f"{base_time:.3f}" if base_time is not None else "Failed" | |
opt_str = f"{opt_time:.3f}" if opt_time is not None else "Failed" | |
table.append(f"| {idx} | {display_sql} | {base_str} | {opt_str} | {improvement} |") | |
return "\n".join(table) | |
def main(): | |
if len(sys.argv) != 4: | |
print("Usage: ./bench.py base_bin opt_bin a.sql") | |
print(" base_bin: Path to baseline binary file") | |
print(" opt_bin: Path to optimized binary file") | |
print(" a.sql: SQL file containing SQL statements") | |
sys.exit(1) | |
base_bin = sys.argv[1] | |
opt_bin = sys.argv[2] | |
sql_file = sys.argv[3] | |
# Check if binary files exist | |
if not os.path.exists(base_bin): | |
print(f"Error: base_bin file not found: {base_bin}") | |
sys.exit(1) | |
if not os.path.exists(opt_bin): | |
print(f"Error: opt_bin file not found: {opt_bin}") | |
sys.exit(1) | |
# Parse SQL file | |
sqls = parse_sql_file(sql_file) | |
print(f"Found {len(sqls)} SQL statements") | |
if not sqls: | |
print("Error: No valid SQL statements found in SQL file") | |
sys.exit(1) | |
# Run benchmarks | |
results = [] | |
for i, sql in enumerate(sqls, 1): | |
print(f"\n[{i}/{len(sqls)}] Starting test...") | |
base_time, opt_time = run_benchmark(base_bin, opt_bin, sql) | |
improvement = calculate_improvement(base_time, opt_time) | |
results.append((sql, base_time, opt_time, improvement)) | |
# Generate and output results table | |
markdown_table = generate_markdown_table(results, base_bin, opt_bin) | |
print(f"\n{markdown_table}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment