Last active
April 2, 2026 21:04
-
-
Save tfausak/3a17bb415a836612ca8f070b146785bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Unified Generic deriving benchmark | |
| # Tests compile-time (allocations + time) and runtime (to . from round-trip) | |
| # for record types (N fields) and sum types (N constructors), | |
| # comparing -fdirect-core-generic-deriving vs -fno-direct-core-generic-deriving. | |
| set -euo pipefail | |
| GHC="${GHC:-./_build/stage1/bin/ghc}" | |
| OPT="${OPT:--O0}" | |
| # Find libgmp for linking runtime benchmarks | |
| if [ -z "${LIBRARY_PATH:-}" ]; then | |
| GMP_LIB=$(find /nix/store -name "libgmp.so" -path "*gmp-with-cxx*" 2>/dev/null | head -1 | xargs dirname 2>/dev/null || true) | |
| NUMA_LIB=$(find /nix/store -name "libnuma.so" 2>/dev/null | head -1 | xargs dirname 2>/dev/null || true) | |
| if [ -n "$GMP_LIB" ]; then | |
| export LIBRARY_PATH="${GMP_LIB}${NUMA_LIB:+:$NUMA_LIB}" | |
| fi | |
| fi | |
| TMPDIR=$(mktemp -d) | |
| trap "rm -rf $TMPDIR" EXIT | |
| if [ ! -x "$GHC" ]; then | |
| echo "Error: GHC not found at $GHC" >&2 | |
| exit 1 | |
| fi | |
| # ---------- Generate Haskell source files ---------- | |
| gen_record() { | |
| local n=$1 f="$TMPDIR/R${n}.hs" | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric #-} | |
| module R${n} where | |
| import GHC.Generics | |
| HEOF | |
| printf "data R = R" >> "$f" | |
| for i in $(seq 1 $n); do printf " !Int" >> "$f"; done | |
| echo " deriving Generic" >> "$f" | |
| echo "$f" | |
| } | |
| gen_enum() { | |
| local n=$1 f="$TMPDIR/E${n}.hs" | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric #-} | |
| module E${n} where | |
| import GHC.Generics | |
| HEOF | |
| printf "data E\n" >> "$f" | |
| for i in $(seq 1 $n); do | |
| if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f" | |
| else printf " | C_%d Int\n" $i >> "$f"; fi | |
| done | |
| echo " deriving Generic" >> "$f" | |
| echo "$f" | |
| } | |
| gen_runtime() { | |
| local kind=$1 n=$2 f="$TMPDIR/Run_${kind}${n}.hs" | |
| if [ "$kind" = "R" ]; then | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric, BangPatterns #-} | |
| module Main where | |
| import qualified GHC.Generics as G | |
| import System.CPUTime | |
| data R = R$(for i in $(seq 1 $n); do printf " !Int"; done) deriving (G.Generic) | |
| {-# NOINLINE testVal #-} | |
| testVal :: R | |
| testVal = R$(for i in $(seq 1 $n); do printf " %d" $i; done) | |
| {-# INLINE roundTrip #-} | |
| roundTrip :: R -> R | |
| roundTrip = G.to . G.from | |
| iters :: Int | |
| iters = 1000000 | |
| go :: Int -> R -> Int | |
| go 0 !_ = 0 | |
| go n !r = case roundTrip r of | |
| !r' -> go (n-1) r' | |
| main :: IO () | |
| main = do | |
| t0 <- getCPUTime | |
| let !_ = go iters testVal | |
| t1 <- getCPUTime | |
| let ms = fromIntegral (t1 - t0) / 1e9 :: Double | |
| putStrLn (show ms) | |
| HEOF | |
| else | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric, BangPatterns #-} | |
| module Main where | |
| import qualified GHC.Generics as G | |
| import System.CPUTime | |
| HEOF | |
| printf "\ndata E\n" >> "$f" | |
| for i in $(seq 1 $n); do | |
| if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f" | |
| else printf " | C_%d Int\n" $i >> "$f"; fi | |
| done | |
| echo " deriving (G.Generic)" >> "$f" | |
| cat >> "$f" <<HEOF | |
| {-# NOINLINE testVal #-} | |
| testVal :: E | |
| testVal = C_1 42 | |
| {-# INLINE roundTrip #-} | |
| roundTrip :: E -> E | |
| roundTrip = G.to . G.from | |
| iters :: Int | |
| iters = 1000000 | |
| go :: Int -> E -> Int | |
| go 0 !_ = 0 | |
| go n !r = case roundTrip r of | |
| !r' -> go (n-1) r' | |
| main :: IO () | |
| main = do | |
| t0 <- getCPUTime | |
| let !_ = go iters testVal | |
| t1 <- getCPUTime | |
| let ms = fromIntegral (t1 - t0) / 1e9 :: Double | |
| putStrLn (show ms) | |
| HEOF | |
| fi | |
| echo "$f" | |
| } | |
| # ---------- Measurement helpers ---------- | |
| compile_stats() { | |
| local src="$1" flag="$2" | |
| # Returns "alloc_bytes elapsed_seconds" | |
| local stats | |
| stats=$("$GHC" $OPT -fforce-recomp $flag -c "$src" +RTS -s 2>&1) | |
| local alloc elapsed | |
| alloc=$(echo "$stats" | grep 'bytes allocated in the heap' | head -1 | sed 's/,//g' | awk '{print $1}') | |
| elapsed=$(echo "$stats" | grep 'Total time' | head -1 | sed 's/.*( *//;s/s elapsed.*//') | |
| echo "$alloc $elapsed" | |
| } | |
| runtime_bench() { | |
| local src="$1" flag="$2" | |
| local bin="$TMPDIR/run_$(basename "$src" .hs)" | |
| "$GHC" -O2 -fforce-recomp $flag -o "$bin" "$src" > /dev/null 2>"$TMPDIR/link_err.txt" | |
| if [ ! -x "$bin" ]; then | |
| echo "LINK_FAIL" | |
| return | |
| fi | |
| # Run 3 times, take median | |
| local times=() | |
| for _ in 1 2 3; do | |
| times+=($("$bin")) | |
| done | |
| # Sort and take middle | |
| printf '%s\n' "${times[@]}" | sort -n | sed -n '2p' | |
| } | |
| fmt_alloc() { | |
| # Format number with commas | |
| printf "%'d" "$1" 2>/dev/null || echo "$1" | |
| } | |
| pct_change() { | |
| local base=$1 new=$2 | |
| if [ "$base" -gt 0 ] 2>/dev/null; then | |
| awk "BEGIN { printf \"%+.1f%%\", ($new - $base) * 100.0 / $base }" | |
| else | |
| echo "N/A" | |
| fi | |
| } | |
| pct_change_float() { | |
| local base=$1 new=$2 | |
| awk "BEGIN { printf \"%+.1f%%\", ($new - $base) * 100.0 / $base }" | |
| } | |
| # ---------- Main ---------- | |
| echo "GHC: $GHC" | |
| echo "Optimization: $OPT (compile-time), -O2 (runtime)" | |
| echo "" | |
| for kind in "Record" "Sum"; do | |
| echo "=== $kind type ===" | |
| echo "" | |
| printf "%-6s | %-16s %-16s %-8s | %-10s %-10s %-8s | %-10s %-10s %-8s\n" \ | |
| "N" "Baseline alloc" "Direct alloc" "Change" \ | |
| "Base time" "Direct time" "Change" \ | |
| "Base run ms" "Direct ms" "Change" | |
| printf "%s\n" "$(printf '%.0s-' {1..140})" | |
| for n in 1 10 100 1000; do | |
| if [ "$kind" = "Record" ]; then | |
| src=$(gen_record $n) | |
| else | |
| src=$(gen_enum $n) | |
| fi | |
| # Compile-time stats | |
| read base_alloc base_time <<< $(compile_stats "$src" "-fno-direct-core-generic-deriving") | |
| read dc_alloc dc_time <<< $(compile_stats "$src" "-fdirect-core-generic-deriving") | |
| alloc_chg=$(pct_change "$base_alloc" "$dc_alloc") | |
| time_chg=$(pct_change_float "$base_time" "$dc_time") | |
| # Runtime (only at reasonable sizes - 1000 ctors is fine) | |
| if [ "$kind" = "Record" ]; then | |
| rt_src=$(gen_runtime "R" $n) | |
| else | |
| rt_src=$(gen_runtime "E" $n) | |
| fi | |
| base_rt=$(runtime_bench "$rt_src" "-fno-direct-core-generic-deriving") | |
| dc_rt=$(runtime_bench "$rt_src" "-fdirect-core-generic-deriving") | |
| rt_chg=$(pct_change_float "$base_rt" "$dc_rt") | |
| printf "%-6d | %16s %16s %8s | %10s %10s %8s | %10s %10s %8s\n" \ | |
| "$n" \ | |
| "$(fmt_alloc $base_alloc)" "$(fmt_alloc $dc_alloc)" "$alloc_chg" \ | |
| "${base_time}s" "${dc_time}s" "$time_chg" \ | |
| "${base_rt}ms" "${dc_rt}ms" "$rt_chg" | |
| done | |
| echo "" | |
| done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Benchmark with -dshow-passes for a big record and big enum | |
| # Compares -fdirect-core-generic-deriving vs -fno-direct-core-generic-deriving | |
| set -euo pipefail | |
| GHC="${GHC:-./_build/stage1/bin/ghc}" | |
| OPT="${OPT:--O0}" | |
| TMPDIR=$(mktemp -d) | |
| trap "rm -rf $TMPDIR" EXIT | |
| if [ ! -x "$GHC" ]; then | |
| echo "Error: GHC not found at $GHC" >&2 | |
| exit 1 | |
| fi | |
| # ---------- Generate source files ---------- | |
| gen_record() { | |
| local n=$1 f="$TMPDIR/R${n}.hs" | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric #-} | |
| module R${n} where | |
| import GHC.Generics | |
| HEOF | |
| printf "data R = R" >> "$f" | |
| for i in $(seq 1 $n); do printf " !Int" >> "$f"; done | |
| echo " deriving Generic" >> "$f" | |
| echo "$f" | |
| } | |
| gen_enum() { | |
| local n=$1 f="$TMPDIR/E${n}.hs" | |
| cat > "$f" <<HEOF | |
| {-# LANGUAGE DeriveGeneric #-} | |
| module E${n} where | |
| import GHC.Generics | |
| HEOF | |
| printf "data E\n" >> "$f" | |
| for i in $(seq 1 $n); do | |
| if [ $i -eq 1 ]; then printf " = C_%d Int\n" $i >> "$f" | |
| else printf " | C_%d Int\n" $i >> "$f"; fi | |
| done | |
| echo " deriving Generic" >> "$f" | |
| echo "$f" | |
| } | |
| # ---------- Run with -dshow-passes ---------- | |
| run_passes() { | |
| local label="$1" src="$2" flag="$3" | |
| local out="$TMPDIR/${label}.txt" | |
| "$GHC" $OPT -fforce-recomp $flag -dshow-passes -c "$src" +RTS -s 2>"$out" | |
| echo "$out" | |
| } | |
| echo "GHC: $GHC" | |
| echo "Optimization: $OPT" | |
| echo "" | |
| for kind in "Record" "Enum"; do | |
| if [ "$kind" = "Record" ]; then | |
| n=1000 | |
| src=$(gen_record $n) | |
| else | |
| n=1000 | |
| src=$(gen_enum $n) | |
| fi | |
| echo "==========================================" | |
| echo " $kind N=$n" | |
| echo "==========================================" | |
| echo "" | |
| base_out=$(run_passes "${kind}_base" "$src" "-fno-direct-core-generic-deriving") | |
| direct_out=$(run_passes "${kind}_direct" "$src" "-fdirect-core-generic-deriving") | |
| echo "--- BASELINE (-fno-direct-core-generic-deriving) ---" | |
| cat "$base_out" | |
| echo "" | |
| echo "--- DIRECT CORE (-fdirect-core-generic-deriving) ---" | |
| cat "$direct_out" | |
| echo "" | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment