UBarney · June 19, 2025 02:29
diff --git a/sql_bench.py b/sql_bench.py
 #!/usr/bin/env python3

 import sys
 import subprocess
 import time
 import re
 import os
 from statistics import mean

 def parse_sql_file(sql_file):
    """Parse SQL file and return list of SQL statements"""
    try:
        with open(sql_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Split SQL statements by newlines and filter empty statements
        sqls = [sql.strip() for sql in content.split('\n') if sql.strip()]
        return sqls
    except FileNotFoundError:
        print(f"Error: SQL file not found: {sql_file}")
        sys.exit(1)
    except Exception as e:
        print(f"Error: Failed to read SQL file - {e}")
        sys.exit(1)

 def execute_binary(bin_path, sql):
    """Execute binary file and return execution time"""
    cmd = [bin_path, '--maxrows', '1', '-c', sql]
    
    try:
        start_time = time.time()
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)  # 5min timeout
        end_time = time.time()
        
        if result.returncode != 0:
            print(f"Warning: Command execution failed - {' '.join(cmd)}")
            print(f"Error output: {result.stderr}")
            return None
        
        # Extract "Elapsed X seconds" from output
        elapsed_match = re.search(r'Elapsed\s+([\d.]+)\s+seconds?\.', result.stdout)
        if elapsed_match:
            return float(elapsed_match.group(1))
        else:
            # If no Elapsed info found, use actual runtime
            return end_time - start_time
            
    except subprocess.TimeoutExpired:
        print(f"Warning: Command execution timeout - {' '.join(cmd)}")
        return None
    except Exception as e:
        print(f"Error: Failed to execute command - {e}")
        return None

 def run_benchmark(base_bin, opt_bin, sql):
    """Run benchmark for single SQL (execute each binary 5 times)"""
    base_times = []
    opt_times = []
    
    base_name = os.path.basename(base_bin)
    opt_name = os.path.basename(opt_bin)
    
    print(f"Testing SQL: {sql[:50]}{'...' if len(sql) > 50 else ''}")
    
    # Execute base_bin 5 times
    for i in range(5):
        start_time = time.time()
        print(f"  Running {base_name} #{i+1}...")
        elapsed = execute_binary(base_bin, sql)
        actual_time = time.time() - start_time
        if elapsed is not None:
            base_times.append(elapsed)
            print(f"    Completed in {actual_time:.2f}s")
        else:
            print(f"    Failed after {actual_time:.2f}s")
    
    # Execute opt_bin 5 times
    for i in range(5):
        start_time = time.time()
        print(f"  Running {opt_name} #{i+1}...")
        elapsed = execute_binary(opt_bin, sql)
        actual_time = time.time() - start_time
        if elapsed is not None:
            opt_times.append(elapsed)
            print(f"    Completed in {actual_time:.2f}s")
        else:
            print(f"    Failed after {actual_time:.2f}s")
    
    # Return mean times
    base_mean = mean(base_times) if base_times else None
    opt_mean = mean(opt_times) if opt_times else None
    
    return base_mean, opt_mean

 def calculate_improvement(base_time, opt_time):
    """Calculate performance improvement percentage"""
    if base_time is None or opt_time is None:
        return "N/A"
    
    if base_time == 0:
        return "N/A"
    
    improvement = ((base_time - opt_time) / base_time) * 100
    if improvement > 0:
        return f"+{improvement:.2f}%"
    else:
        return f"{improvement:.2f}%"

 def generate_markdown_table(results, base_bin, opt_bin):
    """Generate Markdown table"""
    print("\n" + "="*80)
    print("Benchmark Results")
    print("="*80)
    
    base_name = os.path.basename(base_bin)
    opt_name = os.path.basename(opt_bin)
    
    table = [f"| ID | SQL | {base_name} Time(s) | {opt_name} Time(s) | Performance Change |"]
    table.append("|----|-----|-------------|------------|-------------------|")
    
    for idx, (sql, base_time, opt_time, improvement) in enumerate(results, 1):
        # Use full SQL without truncation
        display_sql = sql.replace("|", "\\|")  # Escape pipe characters
        
        base_str = f"{base_time:.3f}" if base_time is not None else "Failed"
        opt_str = f"{opt_time:.3f}" if opt_time is not None else "Failed"
        
        table.append(f"| {idx} | {display_sql} | {base_str} | {opt_str} | {improvement} |")
    
    return "\n".join(table)

 def main():
    if len(sys.argv) != 4:
        print("Usage: ./bench.py base_bin opt_bin a.sql")
        print("  base_bin: Path to baseline binary file")
        print("  opt_bin:  Path to optimized binary file")
        print("  a.sql:    SQL file containing SQL statements")
        sys.exit(1)
    
    base_bin = sys.argv[1]
    opt_bin = sys.argv[2]
    sql_file = sys.argv[3]
    
    # Check if binary files exist
    if not os.path.exists(base_bin):
        print(f"Error: base_bin file not found: {base_bin}")
        sys.exit(1)
    if not os.path.exists(opt_bin):
        print(f"Error: opt_bin file not found: {opt_bin}")
        sys.exit(1)
    
    # Parse SQL file
    sqls = parse_sql_file(sql_file)
    print(f"Found {len(sqls)} SQL statements")
    
    if not sqls:
        print("Error: No valid SQL statements found in SQL file")
        sys.exit(1)
    
    # Run benchmarks
    results = []
    for i, sql in enumerate(sqls, 1):
        print(f"\n[{i}/{len(sqls)}] Starting test...")
        base_time, opt_time = run_benchmark(base_bin, opt_bin, sql)
        improvement = calculate_improvement(base_time, opt_time)
        results.append((sql, base_time, opt_time, improvement))
    
    # Generate and output results table
    markdown_table = generate_markdown_table(results, base_bin, opt_bin)
    print(f"\n{markdown_table}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	import sys
	import subprocess
	import time
	import re
	import os
	from statistics import mean

	def parse_sql_file(sql_file):
	"""Parse SQL file and return list of SQL statements"""
	try:
	with open(sql_file, 'r', encoding='utf-8') as f:
	content = f.read()

	# Split SQL statements by newlines and filter empty statements
	sqls = [sql.strip() for sql in content.split('\n') if sql.strip()]
	return sqls
	except FileNotFoundError:
	print(f"Error: SQL file not found: {sql_file}")
	sys.exit(1)
	except Exception as e:
	print(f"Error: Failed to read SQL file - {e}")
	sys.exit(1)

	def execute_binary(bin_path, sql):
	"""Execute binary file and return execution time"""
	cmd = [bin_path, '--maxrows', '1', '-c', sql]

	try:
	start_time = time.time()
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # 5min timeout
	end_time = time.time()

	if result.returncode != 0:
	print(f"Warning: Command execution failed - {' '.join(cmd)}")
	print(f"Error output: {result.stderr}")
	return None

	# Extract "Elapsed X seconds" from output
	elapsed_match = re.search(r'Elapsed\s+([\d.]+)\s+seconds?\.', result.stdout)
	if elapsed_match:
	return float(elapsed_match.group(1))
	else:
	# If no Elapsed info found, use actual runtime
	return end_time - start_time

	except subprocess.TimeoutExpired:
	print(f"Warning: Command execution timeout - {' '.join(cmd)}")
	return None
	except Exception as e:
	print(f"Error: Failed to execute command - {e}")
	return None

	def run_benchmark(base_bin, opt_bin, sql):
	"""Run benchmark for single SQL (execute each binary 5 times)"""
	base_times = []
	opt_times = []

	base_name = os.path.basename(base_bin)
	opt_name = os.path.basename(opt_bin)

	print(f"Testing SQL: {sql[:50]}{'...' if len(sql) > 50 else ''}")

	# Execute base_bin 5 times
	for i in range(5):
	start_time = time.time()
	print(f" Running {base_name} #{i+1}...")
	elapsed = execute_binary(base_bin, sql)
	actual_time = time.time() - start_time
	if elapsed is not None:
	base_times.append(elapsed)
	print(f" Completed in {actual_time:.2f}s")
	else:
	print(f" Failed after {actual_time:.2f}s")

	# Execute opt_bin 5 times
	for i in range(5):
	start_time = time.time()
	print(f" Running {opt_name} #{i+1}...")
	elapsed = execute_binary(opt_bin, sql)
	actual_time = time.time() - start_time
	if elapsed is not None:
	opt_times.append(elapsed)
	print(f" Completed in {actual_time:.2f}s")
	else:
	print(f" Failed after {actual_time:.2f}s")

	# Return mean times
	base_mean = mean(base_times) if base_times else None
	opt_mean = mean(opt_times) if opt_times else None

	return base_mean, opt_mean

	def calculate_improvement(base_time, opt_time):
	"""Calculate performance improvement percentage"""
	if base_time is None or opt_time is None:
	return "N/A"

	if base_time == 0:
	return "N/A"

	improvement = ((base_time - opt_time) / base_time) * 100
	if improvement > 0:
	return f"+{improvement:.2f}%"
	else:
	return f"{improvement:.2f}%"

	def generate_markdown_table(results, base_bin, opt_bin):
	"""Generate Markdown table"""
	print("\n" + "="*80)
	print("Benchmark Results")
	print("="*80)

	base_name = os.path.basename(base_bin)
	opt_name = os.path.basename(opt_bin)

	table = [f"\| ID \| SQL \| {base_name} Time(s) \| {opt_name} Time(s) \| Performance Change \|"]
	table.append("\|----\|-----\|-------------\|------------\|-------------------\|")

	for idx, (sql, base_time, opt_time, improvement) in enumerate(results, 1):
	# Use full SQL without truncation
	display_sql = sql.replace("\|", "\\\|") # Escape pipe characters

	base_str = f"{base_time:.3f}" if base_time is not None else "Failed"
	opt_str = f"{opt_time:.3f}" if opt_time is not None else "Failed"

	table.append(f"\| {idx} \| {display_sql} \| {base_str} \| {opt_str} \| {improvement} \|")

	return "\n".join(table)

	def main():
	if len(sys.argv) != 4:
	print("Usage: ./bench.py base_bin opt_bin a.sql")
	print(" base_bin: Path to baseline binary file")
	print(" opt_bin: Path to optimized binary file")
	print(" a.sql: SQL file containing SQL statements")
	sys.exit(1)

	base_bin = sys.argv[1]
	opt_bin = sys.argv[2]
	sql_file = sys.argv[3]

	# Check if binary files exist
	if not os.path.exists(base_bin):
	print(f"Error: base_bin file not found: {base_bin}")
	sys.exit(1)
	if not os.path.exists(opt_bin):
	print(f"Error: opt_bin file not found: {opt_bin}")
	sys.exit(1)

	# Parse SQL file
	sqls = parse_sql_file(sql_file)
	print(f"Found {len(sqls)} SQL statements")

	if not sqls:
	print("Error: No valid SQL statements found in SQL file")
	sys.exit(1)

	# Run benchmarks
	results = []
	for i, sql in enumerate(sqls, 1):
	print(f"\n[{i}/{len(sqls)}] Starting test...")
	base_time, opt_time = run_benchmark(base_bin, opt_bin, sql)
	improvement = calculate_improvement(base_time, opt_time)
	results.append((sql, base_time, opt_time, improvement))

	# Generate and output results table
	markdown_table = generate_markdown_table(results, base_bin, opt_bin)
	print(f"\n{markdown_table}")

	if __name__ == "__main__":
	main()