Skip to content

Instantly share code, notes, and snippets.

@tfausak
Last active April 2, 2026 21:04
Show Gist options
  • Select an option

  • Save tfausak/3a17bb415a836612ca8f070b146785bf to your computer and use it in GitHub Desktop.

Select an option

Save tfausak/3a17bb415a836612ca8f070b146785bf to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# Unified Generic deriving benchmark
# Tests compile-time (allocations + time) and runtime (to . from round-trip)
# for record types (N fields) and sum types (N constructors),
# comparing -fdirect-core-generic-deriving vs -fno-direct-core-generic-deriving.
set -euo pipefail
GHC="${GHC:-./_build/stage1/bin/ghc}"
OPT="${OPT:--O0}"
# Find libgmp for linking runtime benchmarks
if [ -z "${LIBRARY_PATH:-}" ]; then
GMP_LIB=$(find /nix/store -name "libgmp.so" -path "*gmp-with-cxx*" 2>/dev/null | head -1 | xargs dirname 2>/dev/null || true)
NUMA_LIB=$(find /nix/store -name "libnuma.so" 2>/dev/null | head -1 | xargs dirname 2>/dev/null || true)
if [ -n "$GMP_LIB" ]; then
export LIBRARY_PATH="${GMP_LIB}${NUMA_LIB:+:$NUMA_LIB}"
fi
fi
TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" EXIT
if [ ! -x "$GHC" ]; then
echo "Error: GHC not found at $GHC" >&2
exit 1
fi
# ---------- Generate Haskell source files ----------
gen_record() {
local n=$1 f="$TMPDIR/R${n}.hs"
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric #-}
module R${n} where
import GHC.Generics
HEOF
printf "data R = R" >> "$f"
for i in $(seq 1 $n); do printf " !Int" >> "$f"; done
echo " deriving Generic" >> "$f"
echo "$f"
}
gen_enum() {
local n=$1 f="$TMPDIR/E${n}.hs"
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric #-}
module E${n} where
import GHC.Generics
HEOF
printf "data E\n" >> "$f"
for i in $(seq 1 $n); do
if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f"
else printf " | C_%d Int\n" $i >> "$f"; fi
done
echo " deriving Generic" >> "$f"
echo "$f"
}
gen_runtime() {
local kind=$1 n=$2 f="$TMPDIR/Run_${kind}${n}.hs"
if [ "$kind" = "R" ]; then
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric, BangPatterns #-}
module Main where
import qualified GHC.Generics as G
import System.CPUTime
data R = R$(for i in $(seq 1 $n); do printf " !Int"; done) deriving (G.Generic)
{-# NOINLINE testVal #-}
testVal :: R
testVal = R$(for i in $(seq 1 $n); do printf " %d" $i; done)
{-# INLINE roundTrip #-}
roundTrip :: R -> R
roundTrip = G.to . G.from
iters :: Int
iters = 1000000
go :: Int -> R -> Int
go 0 !_ = 0
go n !r = case roundTrip r of
!r' -> go (n-1) r'
main :: IO ()
main = do
t0 <- getCPUTime
let !_ = go iters testVal
t1 <- getCPUTime
let ms = fromIntegral (t1 - t0) / 1e9 :: Double
putStrLn (show ms)
HEOF
else
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric, BangPatterns #-}
module Main where
import qualified GHC.Generics as G
import System.CPUTime
HEOF
printf "\ndata E\n" >> "$f"
for i in $(seq 1 $n); do
if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f"
else printf " | C_%d Int\n" $i >> "$f"; fi
done
echo " deriving (G.Generic)" >> "$f"
cat >> "$f" <<HEOF
{-# NOINLINE testVal #-}
testVal :: E
testVal = C_1 42
{-# INLINE roundTrip #-}
roundTrip :: E -> E
roundTrip = G.to . G.from
iters :: Int
iters = 1000000
go :: Int -> E -> Int
go 0 !_ = 0
go n !r = case roundTrip r of
!r' -> go (n-1) r'
main :: IO ()
main = do
t0 <- getCPUTime
let !_ = go iters testVal
t1 <- getCPUTime
let ms = fromIntegral (t1 - t0) / 1e9 :: Double
putStrLn (show ms)
HEOF
fi
echo "$f"
}
# ---------- Measurement helpers ----------
compile_stats() {
local src="$1" flag="$2"
# Returns "alloc_bytes elapsed_seconds"
local stats
stats=$("$GHC" $OPT -fforce-recomp $flag -c "$src" +RTS -s 2>&1)
local alloc elapsed
alloc=$(echo "$stats" | grep 'bytes allocated in the heap' | head -1 | sed 's/,//g' | awk '{print $1}')
elapsed=$(echo "$stats" | grep 'Total time' | head -1 | sed 's/.*( *//;s/s elapsed.*//')
echo "$alloc $elapsed"
}
runtime_bench() {
local src="$1" flag="$2"
local bin="$TMPDIR/run_$(basename "$src" .hs)"
"$GHC" -O2 -fforce-recomp $flag -o "$bin" "$src" > /dev/null 2>"$TMPDIR/link_err.txt"
if [ ! -x "$bin" ]; then
echo "LINK_FAIL"
return
fi
# Run 3 times, take median
local times=()
for _ in 1 2 3; do
times+=($("$bin"))
done
# Sort and take middle
printf '%s\n' "${times[@]}" | sort -n | sed -n '2p'
}
fmt_alloc() {
# Format number with commas
printf "%'d" "$1" 2>/dev/null || echo "$1"
}
pct_change() {
local base=$1 new=$2
if [ "$base" -gt 0 ] 2>/dev/null; then
awk "BEGIN { printf \"%+.1f%%\", ($new - $base) * 100.0 / $base }"
else
echo "N/A"
fi
}
pct_change_float() {
local base=$1 new=$2
awk "BEGIN { printf \"%+.1f%%\", ($new - $base) * 100.0 / $base }"
}
# ---------- Main ----------
echo "GHC: $GHC"
echo "Optimization: $OPT (compile-time), -O2 (runtime)"
echo ""
for kind in "Record" "Sum"; do
echo "=== $kind type ==="
echo ""
printf "%-6s | %-16s %-16s %-8s | %-10s %-10s %-8s | %-10s %-10s %-8s\n" \
"N" "Baseline alloc" "Direct alloc" "Change" \
"Base time" "Direct time" "Change" \
"Base run ms" "Direct ms" "Change"
printf "%s\n" "$(printf '%.0s-' {1..140})"
for n in 1 10 100 1000; do
if [ "$kind" = "Record" ]; then
src=$(gen_record $n)
else
src=$(gen_enum $n)
fi
# Compile-time stats
read base_alloc base_time <<< $(compile_stats "$src" "-fno-direct-core-generic-deriving")
read dc_alloc dc_time <<< $(compile_stats "$src" "-fdirect-core-generic-deriving")
alloc_chg=$(pct_change "$base_alloc" "$dc_alloc")
time_chg=$(pct_change_float "$base_time" "$dc_time")
# Runtime (only at reasonable sizes - 1000 ctors is fine)
if [ "$kind" = "Record" ]; then
rt_src=$(gen_runtime "R" $n)
else
rt_src=$(gen_runtime "E" $n)
fi
base_rt=$(runtime_bench "$rt_src" "-fno-direct-core-generic-deriving")
dc_rt=$(runtime_bench "$rt_src" "-fdirect-core-generic-deriving")
rt_chg=$(pct_change_float "$base_rt" "$dc_rt")
printf "%-6d | %16s %16s %8s | %10s %10s %8s | %10s %10s %8s\n" \
"$n" \
"$(fmt_alloc $base_alloc)" "$(fmt_alloc $dc_alloc)" "$alloc_chg" \
"${base_time}s" "${dc_time}s" "$time_chg" \
"${base_rt}ms" "${dc_rt}ms" "$rt_chg"
done
echo ""
done
#!/usr/bin/env bash
# Benchmark with -dshow-passes for a big record and big enum
# Compares -fdirect-core-generic-deriving vs -fno-direct-core-generic-deriving
set -euo pipefail
GHC="${GHC:-./_build/stage1/bin/ghc}"
OPT="${OPT:--O0}"
TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" EXIT
if [ ! -x "$GHC" ]; then
echo "Error: GHC not found at $GHC" >&2
exit 1
fi
# ---------- Generate source files ----------
gen_record() {
local n=$1 f="$TMPDIR/R${n}.hs"
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric #-}
module R${n} where
import GHC.Generics
HEOF
printf "data R = R" >> "$f"
for i in $(seq 1 $n); do printf " !Int" >> "$f"; done
echo " deriving Generic" >> "$f"
echo "$f"
}
gen_enum() {
local n=$1 f="$TMPDIR/E${n}.hs"
cat > "$f" <<HEOF
{-# LANGUAGE DeriveGeneric #-}
module E${n} where
import GHC.Generics
HEOF
printf "data E\n" >> "$f"
for i in $(seq 1 $n); do
if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f"
else printf " | C_%d Int\n" $i >> "$f"; fi
done
echo " deriving Generic" >> "$f"
echo "$f"
}
# ---------- Run with -dshow-passes ----------
run_passes() {
local label="$1" src="$2" flag="$3"
local out="$TMPDIR/${label}.txt"
"$GHC" $OPT -fforce-recomp $flag -dshow-passes -c "$src" +RTS -s 2>"$out"
echo "$out"
}
echo "GHC: $GHC"
echo "Optimization: $OPT"
echo ""
for kind in "Record" "Enum"; do
if [ "$kind" = "Record" ]; then
n=1000
src=$(gen_record $n)
else
n=1000
src=$(gen_enum $n)
fi
echo "=========================================="
echo " $kind N=$n"
echo "=========================================="
echo ""
base_out=$(run_passes "${kind}_base" "$src" "-fno-direct-core-generic-deriving")
direct_out=$(run_passes "${kind}_direct" "$src" "-fdirect-core-generic-deriving")
echo "--- BASELINE (-fno-direct-core-generic-deriving) ---"
cat "$base_out"
echo ""
echo "--- DIRECT CORE (-fdirect-core-generic-deriving) ---"
cat "$direct_out"
echo ""
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment