samth · February 2, 2026 16:18
diff --git a/map-benchmark-results.md b/map-benchmark-results.md
diff --git a/map-benchmark.rkt b/map-benchmark.rkt
 #lang racket/base

 ;; Benchmark comparing the JIT-optimized map/for-each/andmap/ormap
 ;; from racket/private/map.rkt against the kernel's built-in versions.
 ;;
 ;; The comment in map.rkt says the JIT generates faster code, but
 ;; that was written for Racket BC. Let's check if it's still true in CS.

 (require (prefix-in k: '#%kernel))

 ;; Number of iterations for each benchmark
 (define iterations 1000)

 ;; Test data
 (define small-list (build-list 10 values))
 (define medium-list (build-list 100 values))
 (define large-list (build-list 1000 values))
 (define xlarge-list (build-list 10000 values))

 ;; Simple timing helper
 (define-syntax-rule (time-it label expr)
  (let ()
    (collect-garbage)
    (collect-garbage)
    (define start (current-inexact-milliseconds))
    (for ([_ (in-range iterations)])
      expr)
    (define end (current-inexact-milliseconds))
    (printf "~a: ~a ms\n" label (- end start))))

 (define (run-map-benchmarks lst name)
  (printf "\n=== map benchmarks (~a, ~a elements) ===\n" name (length lst))

  ;; Single list map
  (time-it "optimized map (1 list)" (map add1 lst))
  (time-it "kernel map (1 list)   " (k:map add1 lst))

  ;; Two list map
  (time-it "optimized map (2 lists)" (map + lst lst))
  (time-it "kernel map (2 lists)   " (k:map + lst lst)))

 (define (run-for-each-benchmarks lst name)
  (printf "\n=== for-each benchmarks (~a, ~a elements) ===\n" name (length lst))

  ;; Single list for-each
  (time-it "optimized for-each (1 list)" (for-each void lst))
  (time-it "kernel for-each (1 list)   " (k:for-each void lst))

  ;; Two list for-each
  (time-it "optimized for-each (2 lists)" (for-each void lst lst))
  (time-it "kernel for-each (2 lists)   " (k:for-each void lst lst)))

 (define (run-andmap-benchmarks lst name)
  (printf "\n=== andmap benchmarks (~a, ~a elements) ===\n" name (length lst))

  ;; Single list andmap (all true)
  (time-it "optimized andmap (1 list, all true)" (andmap number? lst))
  (time-it "kernel andmap (1 list, all true)   " (k:andmap number? lst))

  ;; Two list andmap
  (time-it "optimized andmap (2 lists)" (andmap < lst lst))
  (time-it "kernel andmap (2 lists)   " (k:andmap < lst lst)))

 (define (run-ormap-benchmarks lst name)
  (printf "\n=== ormap benchmarks (~a, ~a elements) ===\n" name (length lst))

  ;; Single list ormap (none true - worst case, has to check all)
  (time-it "optimized ormap (1 list, none true)" (ormap string? lst))
  (time-it "kernel ormap (1 list, none true)   " (k:ormap string? lst))

  ;; Two list ormap
  (time-it "optimized ormap (2 lists)" (ormap > lst lst))
  (time-it "kernel ormap (2 lists)   " (k:ormap > lst lst)))

 (printf "Racket version: ~a\n" (version))
 (printf "Iterations per benchmark: ~a\n" iterations)

 ;; Run all benchmarks
 (for ([lst (list small-list medium-list large-list xlarge-list)]
      [name '("small" "medium" "large" "xlarge")])
  (run-map-benchmarks lst name)
  (run-for-each-benchmarks lst name)
  (run-andmap-benchmarks lst name)
  (run-ormap-benchmarks lst name))

 (printf "\n=== Summary ===\n")
 (printf "Lower times are better.\n")
 (printf "If optimized versions are faster, the JIT optimization is still beneficial.\n")
diff --git a/map-benchmark2.rkt b/map-benchmark2.rkt
 #lang racket/base

 ;; More rigorous benchmark comparing optimized vs kernel map/for-each/andmap/ormap

 (require (prefix-in k: '#%kernel)
         racket/format)

 ;; Target minimum runtime in milliseconds
 (define min-runtime-ms 10)

 ;; Test data
 (define test-sizes '(10 100 1000 10000))

 ;; Calibration: run a thunk and determine iterations needed for min-runtime-ms
 (define (calibrate-iterations thunk)
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  ;; Run 100 iterations to estimate time per iteration
  (define start (current-inexact-milliseconds))
  (for ([_ (in-range 100)])
    (thunk))
  (define elapsed (- (current-inexact-milliseconds) start))
  (define time-per-iter (/ elapsed 100))
  ;; Calculate iterations needed for min-runtime-ms
  (max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter)))))

 ;; Timing helper that returns milliseconds
 (define (time-expr thunk iterations)
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define start (current-inexact-milliseconds))
  (for ([_ (in-range iterations)])
    (thunk))
  (define end (current-inexact-milliseconds))
  (- end start))

 (define (benchmark-pair name opt-thunk kern-thunk)
  ;; Calibrate based on optimized version
  (define iterations (calibrate-iterations opt-thunk))
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define opt-time (time-expr opt-thunk iterations))
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define kern-time (time-expr kern-thunk iterations))
  (define ratio (/ kern-time opt-time))
  (printf "  ~a: opt=~ams kern=~ams iters=~a ratio=~a (~a)\n"
          name
          (~r opt-time #:precision 2)
          (~r kern-time #:precision 2)
          iterations
          (~r ratio #:precision 2)
          (cond [(> ratio 1.1) "optimized faster"]
                [(< ratio 0.9) "kernel faster"]
                [else "roughly equal"])))

 (printf "Racket version: ~a\n\n" (version))

 (for ([size test-sizes])
  (define lst (build-list size values))
  (define lst2 (build-list size (lambda (x) (+ x size))))

  (printf "=== List size: ~a ===\n" size)

  ;; map with 1 list
  (benchmark-pair "map (1 list)"
                  (lambda () (map add1 lst))
                  (lambda () (k:map add1 lst)))

  ;; map with 2 lists
  (benchmark-pair "map (2 lists)"
                  (lambda () (map + lst lst2))
                  (lambda () (k:map + lst lst2)))

  ;; for-each with 1 list
  (benchmark-pair "for-each (1 list)"
                  (lambda () (for-each void lst))
                  (lambda () (k:for-each void lst)))

  ;; for-each with 2 lists
  (benchmark-pair "for-each (2 lists)"
                  (lambda () (for-each void lst lst2))
                  (lambda () (k:for-each void lst lst2)))

  ;; andmap with 1 list (all true)
  (benchmark-pair "andmap (1 list)"
                  (lambda () (andmap number? lst))
                  (lambda () (k:andmap number? lst)))

  ;; andmap with 2 lists
  (benchmark-pair "andmap (2 lists)"
                  (lambda () (andmap <= lst lst2))
                  (lambda () (k:andmap <= lst lst2)))

  ;; ormap with 1 list (none true - worst case)
  (benchmark-pair "ormap (1 list)"
                  (lambda () (ormap string? lst))
                  (lambda () (k:ormap string? lst)))

  ;; ormap with 2 lists
  (benchmark-pair "ormap (2 lists)"
                  (lambda () (ormap > lst lst2))
                  (lambda () (k:ormap > lst lst2)))

  (newline))

 (printf "Ratio > 1 means optimized is faster, < 1 means kernel is faster\n")
diff --git a/map-kern.rkt b/map-kern.rkt
 #lang racket/base

 ;; Uses the kernel map directly

 (require (prefix-in k: '#%kernel))

 (define lst '(1 2 3 4 5))

 (define (test)
  (k:map add1 lst))

 (test)
diff --git a/map-opt.rkt b/map-opt.rkt
 #lang racket/base

 ;; Uses the optimized map from racket/private/map.rkt

 (define lst '(1 2 3 4 5))

 (define (test)
  (map add1 lst))

 (test)
Function	Optimized (ms)	Kernel (ms)	Iterations	Ratio	Winner
map (1 list)	3.90	4.83	227,556	1.24	optimized
map (2 lists)	4.43	4.40	146,286	0.99	roughly equal
for-each (1 list)	3.86	5.06	372,364	1.31	optimized
for-each (2 lists)	4.71	5.62	186,182	1.19	optimized
andmap (1 list)	4.18	6.26	455,112	1.50	optimized
andmap (2 lists)	5.08	4.41	186,182	0.87	kernel
ormap (1 list)	3.12	4.24	292,572	1.36	optimized
ormap (2 lists)	5.19	4.54	186,182	0.87	kernel
Function	Optimized (ms)	Kernel (ms)	Iterations	Ratio	Winner
map (1 list)	7.40	10.40	48,762	1.40	optimized
map (2 lists)	9.23	10.05	28,055	1.09	roughly equal
for-each (1 list)	6.87	7.13	70,621	1.04	roughly equal
for-each (2 lists)	9.32	9.40	34,421	1.01	roughly equal
andmap (1 list)	6.76	6.93	61,135	1.03	roughly equal
andmap (2 lists)	7.62	7.69	27,864	1.01	roughly equal
ormap (1 list)	6.08	6.37	61,135	1.05	roughly equal
ormap (2 lists)	9.27	9.38	33,301	1.01	roughly equal
Function	Optimized (ms)	Kernel (ms)	Iterations	Ratio	Winner
map (1 list)	7.87	9.86	4,730	1.25	optimized
map (2 lists)	9.17	11.81	2,711	1.29	optimized
for-each (1 list)	8.42	8.82	10,215	1.05	roughly equal
for-each (2 lists)	9.76	10.02	4,036	1.03	roughly equal
andmap (1 list)	8.39	9.12	10,064	1.09	roughly equal
andmap (2 lists)	9.58	10.07	3,943	1.05	roughly equal
ormap (1 list)	8.38	9.29	10,015	1.11	optimized
ormap (2 lists)	9.74	10.23	4,016	1.05	roughly equal
Function	Optimized (ms)	Kernel (ms)	Iterations	Ratio	Winner
map (1 list)	8.21	9.56	379	1.16	optimized
map (2 lists)	9.48	10.16	221	1.07	roughly equal
for-each (1 list)	9.73	10.23	1,102	1.05	roughly equal
for-each (2 lists)	9.94	10.23	369	1.03	roughly equal
andmap (1 list)	9.72	10.66	1,108	1.10	roughly equal
andmap (2 lists)	9.93	10.28	368	1.04	roughly equal
ormap (1 list)	9.61	10.61	1,090	1.10	optimized
ormap (2 lists)	9.87	10.49	366	1.06	roughly equal
Function	Verdict
`map` (1 list)	Keep optimization - 16-40% faster across all sizes
`map` (2 lists)	Keep - 9-29% faster for larger lists
`for-each` (1 list)	Keep - 5-31% faster, especially small lists
`for-each` (2 lists)	Marginal - 1-19% faster
`andmap` (1 list)	Keep - 3-50% faster, especially small lists
`andmap` (2 lists)	Mixed - kernel faster for small lists
`ormap` (1 list)	Keep - 5-36% faster
`ormap` (2 lists)	Mixed - kernel faster for small lists
Aspect	Optimized	Kernel
Inlining	Completely inlined into a direct loop	Indirect call through `$top-level-value`
`list?` check	Constant-folded away (literal list is known to be a list)	Check happens at runtime inside callee
Function call overhead	None - direct recursive loop	Runtime symbol lookup + function call
Loop structure	Explicit `letrec` loop in generated code	Hidden inside `#%map` primitive
	#lang racket/base

	;; Benchmark comparing the JIT-optimized map/for-each/andmap/ormap
	;; from racket/private/map.rkt against the kernel's built-in versions.
	;;
	;; The comment in map.rkt says the JIT generates faster code, but
	;; that was written for Racket BC. Let's check if it's still true in CS.

	(require (prefix-in k: '#%kernel))

	;; Number of iterations for each benchmark
	(define iterations 1000)

	;; Test data
	(define small-list (build-list 10 values))
	(define medium-list (build-list 100 values))
	(define large-list (build-list 1000 values))
	(define xlarge-list (build-list 10000 values))

	;; Simple timing helper
	(define-syntax-rule (time-it label expr)
	(let ()
	(collect-garbage)
	(collect-garbage)
	(define start (current-inexact-milliseconds))
	(for ([_ (in-range iterations)])
	expr)
	(define end (current-inexact-milliseconds))
	(printf "~a: ~a ms\n" label (- end start))))

	(define (run-map-benchmarks lst name)
	(printf "\n=== map benchmarks (~a, ~a elements) ===\n" name (length lst))

	;; Single list map
	(time-it "optimized map (1 list)" (map add1 lst))
	(time-it "kernel map (1 list) " (k:map add1 lst))

	;; Two list map
	(time-it "optimized map (2 lists)" (map + lst lst))
	(time-it "kernel map (2 lists) " (k:map + lst lst)))

	(define (run-for-each-benchmarks lst name)
	(printf "\n=== for-each benchmarks (~a, ~a elements) ===\n" name (length lst))

	;; Single list for-each
	(time-it "optimized for-each (1 list)" (for-each void lst))
	(time-it "kernel for-each (1 list) " (k:for-each void lst))

	;; Two list for-each
	(time-it "optimized for-each (2 lists)" (for-each void lst lst))
	(time-it "kernel for-each (2 lists) " (k:for-each void lst lst)))

	(define (run-andmap-benchmarks lst name)
	(printf "\n=== andmap benchmarks (~a, ~a elements) ===\n" name (length lst))

	;; Single list andmap (all true)
	(time-it "optimized andmap (1 list, all true)" (andmap number? lst))
	(time-it "kernel andmap (1 list, all true) " (k:andmap number? lst))

	;; Two list andmap
	(time-it "optimized andmap (2 lists)" (andmap < lst lst))
	(time-it "kernel andmap (2 lists) " (k:andmap < lst lst)))

	(define (run-ormap-benchmarks lst name)
	(printf "\n=== ormap benchmarks (~a, ~a elements) ===\n" name (length lst))

	;; Single list ormap (none true - worst case, has to check all)
	(time-it "optimized ormap (1 list, none true)" (ormap string? lst))
	(time-it "kernel ormap (1 list, none true) " (k:ormap string? lst))

	;; Two list ormap
	(time-it "optimized ormap (2 lists)" (ormap > lst lst))
	(time-it "kernel ormap (2 lists) " (k:ormap > lst lst)))

	(printf "Racket version: ~a\n" (version))
	(printf "Iterations per benchmark: ~a\n" iterations)

	;; Run all benchmarks
	(for ([lst (list small-list medium-list large-list xlarge-list)]
	[name '("small" "medium" "large" "xlarge")])
	(run-map-benchmarks lst name)
	(run-for-each-benchmarks lst name)
	(run-andmap-benchmarks lst name)
	(run-ormap-benchmarks lst name))

	(printf "\n=== Summary ===\n")
	(printf "Lower times are better.\n")
	(printf "If optimized versions are faster, the JIT optimization is still beneficial.\n")
	#lang racket/base

	;; More rigorous benchmark comparing optimized vs kernel map/for-each/andmap/ormap

	(require (prefix-in k: '#%kernel)
	racket/format)

	;; Target minimum runtime in milliseconds
	(define min-runtime-ms 10)

	;; Test data
	(define test-sizes '(10 100 1000 10000))

	;; Calibration: run a thunk and determine iterations needed for min-runtime-ms
	(define (calibrate-iterations thunk)
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	;; Run 100 iterations to estimate time per iteration
	(define start (current-inexact-milliseconds))
	(for ([_ (in-range 100)])
	(thunk))
	(define elapsed (- (current-inexact-milliseconds) start))
	(define time-per-iter (/ elapsed 100))
	;; Calculate iterations needed for min-runtime-ms
	(max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter)))))

	;; Timing helper that returns milliseconds
	(define (time-expr thunk iterations)
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define start (current-inexact-milliseconds))
	(for ([_ (in-range iterations)])
	(thunk))
	(define end (current-inexact-milliseconds))
	(- end start))

	(define (benchmark-pair name opt-thunk kern-thunk)
	;; Calibrate based on optimized version
	(define iterations (calibrate-iterations opt-thunk))
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define opt-time (time-expr opt-thunk iterations))
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define kern-time (time-expr kern-thunk iterations))
	(define ratio (/ kern-time opt-time))
	(printf " ~a: opt=~ams kern=~ams iters=~a ratio=~a (~a)\n"
	name
	(~r opt-time #:precision 2)
	(~r kern-time #:precision 2)
	iterations
	(~r ratio #:precision 2)
	(cond [(> ratio 1.1) "optimized faster"]
	[(< ratio 0.9) "kernel faster"]
	[else "roughly equal"])))

	(printf "Racket version: ~a\n\n" (version))

	(for ([size test-sizes])
	(define lst (build-list size values))
	(define lst2 (build-list size (lambda (x) (+ x size))))

	(printf "=== List size: ~a ===\n" size)

	;; map with 1 list
	(benchmark-pair "map (1 list)"
	(lambda () (map add1 lst))
	(lambda () (k:map add1 lst)))

	;; map with 2 lists
	(benchmark-pair "map (2 lists)"
	(lambda () (map + lst lst2))
	(lambda () (k:map + lst lst2)))

	;; for-each with 1 list
	(benchmark-pair "for-each (1 list)"
	(lambda () (for-each void lst))
	(lambda () (k:for-each void lst)))

	;; for-each with 2 lists
	(benchmark-pair "for-each (2 lists)"
	(lambda () (for-each void lst lst2))
	(lambda () (k:for-each void lst lst2)))

	;; andmap with 1 list (all true)
	(benchmark-pair "andmap (1 list)"
	(lambda () (andmap number? lst))
	(lambda () (k:andmap number? lst)))

	;; andmap with 2 lists
	(benchmark-pair "andmap (2 lists)"
	(lambda () (andmap <= lst lst2))
	(lambda () (k:andmap <= lst lst2)))

	;; ormap with 1 list (none true - worst case)
	(benchmark-pair "ormap (1 list)"
	(lambda () (ormap string? lst))
	(lambda () (k:ormap string? lst)))

	;; ormap with 2 lists
	(benchmark-pair "ormap (2 lists)"
	(lambda () (ormap > lst lst2))
	(lambda () (k:ormap > lst lst2)))

	(newline))

	(printf "Ratio > 1 means optimized is faster, < 1 means kernel is faster\n")
	#lang racket/base

	;; Uses the kernel map directly

	(require (prefix-in k: '#%kernel))

	(define lst '(1 2 3 4 5))

	(define (test)
	(k:map add1 lst))

	(test)
	#lang racket/base

	;; Uses the optimized map from racket/private/map.rkt

	(define lst '(1 2 3 4 5))

	(define (test)
	(map add1 lst))

	(test)