Skip to content

Instantly share code, notes, and snippets.

@corporatepiyush
Created October 16, 2025 21:26
Show Gist options
  • Save corporatepiyush/0c853861db8f22e87c7087ae40d54668 to your computer and use it in GitHub Desktop.
Save corporatepiyush/0c853861db8f22e87c7087ae40d54668 to your computer and use it in GitHub Desktop.
Optimizing For loops
import 'dart:math';
import 'dart:typed_data';
const int size = 1000;
void main() {
final Int32List data = Int32List(size);
final rnd = Random(42);
for (int i = 0; i < size; i++) data[i] = rnd.nextInt(1000) - 500;
void dur(String name, int Function(Int32List) fn) {
final Stopwatch sw = Stopwatch()..start();
final int sum = fn(data);
sw.stop();
print('${name.padRight(40)}: ${sw.elapsedMicroseconds} µs (sum=$sum)');
}
print('=== EVEN-SUM: Dart 3.10 (distinct locals) ===');
dur('regular % 2', regularMod2);
dur('bitwise & 1', bitwiseAnd1);
dur('standalone 4× unroll', unroll4x); // somewhat helpful
dur('standalone 8× unroll', unroll8x); // not helpful
dur('branch-less & 1', branchlessEven); // extremely helpful
dur('ALL TRICKS 4×', allTricksCombined4x);
dur('ALL TRICKS 8×', allTricksCombined8x);
}
// ---------- baseline ----------
int regularMod2(Int32List a) {
int sum = 0;
for (int i = 0; i < a.length; i++) if (a[i] % 2 == 0) sum += a[i];
return sum;
}
// ---------- standalone bit-trick ----------
int bitwiseAnd1(Int32List a) {
int sum = 0;
for (int i = 0; i < a.length; i++) if ((a[i] & 1) == 0) sum += a[i];
return sum;
}
// ---------- 4× unroll with distinct locals ----------
int unroll4x(Int32List a) {
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
for (int i = 0; i < a.length - 3; i += 4) {
if ((a[i] & 1) == 0) s0 += a[i];
if ((a[i + 1] & 1) == 0) s1 += a[i + 1];
if ((a[i + 2] & 1) == 0) s2 += a[i + 2];
if ((a[i + 3] & 1) == 0) s3 += a[i + 3];
}
return s0 + s1 + s2 + s3;
}
// ---------- 8× unroll with distinct locals ----------
int unroll8x(Int32List a) {
int s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, s5 = 0, s6 = 0, s7 = 0;
for (int i = 0; i < a.length - 7; i += 8) {
if ((a[i] & 1) == 0) s0 += a[i];
if ((a[i + 1] & 1) == 0) s1 += a[i + 1];
if ((a[i + 2] & 1) == 0) s2 += a[i + 2];
if ((a[i + 3] & 1) == 0) s3 += a[i + 3];
if ((a[i + 4] & 1) == 0) s4 += a[i + 4];
if ((a[i + 5] & 1) == 0) s5 += a[i + 5];
if ((a[i + 6] & 1) == 0) s6 += a[i + 6];
if ((a[i + 7] & 1) == 0) s7 += a[i + 7];
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
// ---------- branch-less even-test ----------
int branchlessEven(Int32List a) {
int sum = 0;
for (int i = 0; i < a.length; i++) {
final int mask = ~(a[i] & 1) & 1; // 1 if even, 0 if odd
sum += a[i] & -mask; // add only if even
}
return sum;
}
// ---------- ALL TRICKS: 4× unroll + branch-less + distinct locals ----------
int allTricksCombined4x(Int32List a) {
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
for (int i = 0; i < a.length - 3; i += 4) {
final int m0 = ~(a[i] & 1) & 1;
final int m1 = ~(a[i + 1] & 1) & 1;
final int m2 = ~(a[i + 2] & 1) & 1;
final int m3 = ~(a[i + 3] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
}
return s0 + s1 + s2 + s3;
}
// ---------- ALL TRICKS: 8× unroll + branch-less + distinct locals ----------
int allTricksCombined8x(Int32List a) {
int s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, s5 = 0, s6 = 0, s7 = 0;
for (int i = 0; i < a.length - 7; i += 8) {
final int m0 = ~(a[i] & 1) & 1;
final int m1 = ~(a[i + 1] & 1) & 1;
final int m2 = ~(a[i + 2] & 1) & 1;
final int m3 = ~(a[i + 3] & 1) & 1;
final int m4 = ~(a[i + 4] & 1) & 1;
final int m5 = ~(a[i + 5] & 1) & 1;
final int m6 = ~(a[i + 6] & 1) & 1;
final int m7 = ~(a[i + 7] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
s4 += a[i + 4] & -m4;
s5 += a[i + 5] & -m5;
s6 += a[i + 6] & -m6;
s7 += a[i + 7] & -m7;
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
package main
import (
"fmt"
"math/rand"
"time"
)
const size = 1_000
var testData []int
func init() {
testData = make([]int, size)
rand.Seed(42)
for i := 0; i < size; i++ {
testData[i] = rand.Intn(1000) - 500
}
}
func dur(name string, fn func([]int) int) {
start := time.Now()
sum := fn(testData)
elapsed := time.Since(start)
fmt.Printf("%-40s: %v (sum=%d)\n", name, elapsed, sum)
}
// ---------- baseline ----------
func regularMod2(a []int) int {
sum := 0
for i := 0; i < size; i++ {
if a[i]%2 == 0 {
sum += a[i]
}
}
return sum
}
// ---------- standalone bit-trick ----------
func bitwiseAnd1(a []int) int {
sum := 0
for i := 0; i < size; i++ {
if (a[i] & 1) == 0 {
sum += a[i]
}
}
return sum
}
// ---------- 4× unroll with distinct locals ----------
func unroll4x(a []int) int {
s0, s1, s2, s3 := 0, 0, 0, 0
for i := 0; i < size-3; i += 4 {
if (a[i] & 1) == 0 {
s0 += a[i]
}
if (a[i+1] & 1) == 0 {
s1 += a[i+1]
}
if (a[i+2] & 1) == 0 {
s2 += a[i+2]
}
if (a[i+3] & 1) == 0 {
s3 += a[i+3]
}
}
return s0 + s1 + s2 + s3
}
// ---------- 8× unroll with distinct locals ----------
func unroll8x(a []int) int {
s0, s1, s2, s3, s4, s5, s6, s7 := 0, 0, 0, 0, 0, 0, 0, 0
for i := 0; i < size-7; i += 8 {
if (a[i] & 1) == 0 {
s0 += a[i]
}
if (a[i+1] & 1) == 0 {
s1 += a[i+1]
}
if (a[i+2] & 1) == 0 {
s2 += a[i+2]
}
if (a[i+3] & 1) == 0 {
s3 += a[i+3]
}
if (a[i+4] & 1) == 0 {
s4 += a[i+4]
}
if (a[i+5] & 1) == 0 {
s5 += a[i+5]
}
if (a[i+6] & 1) == 0 {
s6 += a[i+6]
}
if (a[i+7] & 1) == 0 {
s7 += a[i+7]
}
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7
}
// ---------- branch-less even-test ----------
func branchlessEven(a []int) int {
sum := 0
for i := 0; i < size; i++ {
mask := ^(a[i] & 1) & 1
sum += a[i] & -mask
}
return sum
}
// ---------- ALL TRICKS: 4× unroll + branch-less + distinct locals ----------
func allTricksCombined4x(a []int) int {
s0, s1, s2, s3 := 0, 0, 0, 0
for i := 0; i < size-3; i += 4 {
m0 := ^(a[i] & 1) & 1
m1 := ^(a[i+1] & 1) & 1
m2 := ^(a[i+2] & 1) & 1
m3 := ^(a[i+3] & 1) & 1
s0 += a[i] & -m0
s1 += a[i+1] & -m1
s2 += a[i+2] & -m2
s3 += a[i+3] & -m3
}
return s0 + s1 + s2 + s3
}
// ---------- ALL TRICKS: 8× unroll + branch-less + distinct locals ----------
func allTricksCombined8x(a []int) int {
s0, s1, s2, s3, s4, s5, s6, s7 := 0, 0, 0, 0, 0, 0, 0, 0
for i := 0; i < size-7; i += 8 {
m0 := ^(a[i] & 1) & 1
m1 := ^(a[i+1] & 1) & 1
m2 := ^(a[i+2] & 1) & 1
m3 := ^(a[i+3] & 1) & 1
m4 := ^(a[i+4] & 1) & 1
m5 := ^(a[i+5] & 1) & 1
m6 := ^(a[i+6] & 1) & 1
m7 := ^(a[i+7] & 1) & 1
s0 += a[i] & -m0
s1 += a[i+1] & -m1
s2 += a[i+2] & -m2
s3 += a[i+3] & -m3
s4 += a[i+4] & -m4
s5 += a[i+5] & -m5
s6 += a[i+6] & -m6
s7 += a[i+7] & -m7
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7
}
func main() {
fmt.Println("=== EVEN-SUM: GO 1.25 (distinct locals) ===")
dur("regular % 2", regularMod2)
dur("bitwise & 1", bitwiseAnd1)
dur("standalone 4× unroll", unroll4x)
dur("standalone 8× unroll", unroll8x)
dur("branch-less & 1", branchlessEven)
dur("ALL TRICKS 4×", allTricksCombined4x)
dur("ALL TRICKS 8×", allTricksCombined8x)
}
import java.util.Random;
public class For {
static final int SIZE = 1_000_000;
public static void main(String[] args) {
int[] data = new int[SIZE];
Random rnd = new Random(42);
for (int i = 0; i < SIZE; i++) data[i] = rnd.nextInt(1000) - 500;
System.out.println("=== EVEN-SUM: Java 25 (HotSpot JIT, ARM64) ===");
dur("regular % 2", For::regularMod2, data);
dur("bitwise & 1", For::bitwiseAnd1, data);
dur("standalone 4× unroll", For::unroll4x, data); // not helpful
dur("standalone 8× unroll", For::unroll8x, data); // not helpful
dur("branch-less & 1", For::branchlessEven, data);
dur("ALL TRICKS 4×", For::allTricksCombined4x, data);
dur("ALL TRICKS 8×", For::allTricksCombined8x, data);
}
private static void dur(String name, java.util.function.ToIntFunction<int[]> fn, int[] a) {
long start = System.nanoTime();
int sum = fn.applyAsInt(a);
long ns = System.nanoTime() - start;
System.out.printf("%-40s: %7.3f ms (sum=%d)%n", name, ns / 1_000_000.0, sum);
}
// ---------- baseline ----------
static int regularMod2(int[] a) {
int sum = 0;
for (int i = 0; i < a.length; i++) if (a[i] % 2 == 0) sum += a[i];
return sum;
}
static int bitwiseAnd1(int[] a) {
int sum = 0;
for (int i = 0; i < a.length; i++) if ((a[i] & 1) == 0) sum += a[i];
return sum;
}
// ---------- 4× unroll with distinct locals ----------
static int unroll4x(int[] a) {
int s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0;
for (int i = 0; i < a.length - 3; i += 4) {
if ((a[i] & 1) == 0) s0 += a[i];
if ((a[i + 1] & 1) == 0) s1 += a[i + 1];
if ((a[i + 2] & 1) == 0) s2 += a[i + 2];
if ((a[i + 3] & 1) == 0) s3 += a[i + 3];
}
return s0 + s1 + s2 + s3;
}
// ---------- 8× unroll with distinct locals ----------
static int unroll8x(int[] a) {
int s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0,
s4 = 0,
s5 = 0,
s6 = 0,
s7 = 0;
for (int i = 0; i < a.length - 7; i += 8) {
if ((a[i] & 1) == 0) s0 += a[i];
if ((a[i + 1] & 1) == 0) s1 += a[i + 1];
if ((a[i + 2] & 1) == 0) s2 += a[i + 2];
if ((a[i + 3] & 1) == 0) s3 += a[i + 3];
if ((a[i + 4] & 1) == 0) s4 += a[i + 4];
if ((a[i + 5] & 1) == 0) s5 += a[i + 5];
if ((a[i + 6] & 1) == 0) s6 += a[i + 6];
if ((a[i + 7] & 1) == 0) s7 += a[i + 7];
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
// ---------- branch-less even-test ----------
static int branchlessEven(int[] a) {
int sum = 0;
for (int i = 0; i < a.length; i++) {
int mask = ~(a[i] & 1) & 1; // 1 if even, 0 if odd
sum += a[i] & -mask; // add only if even
}
return sum;
}
// ---------- ALL TRICKS: 4× unroll + branch-less + distinct locals ----------
static int allTricksCombined4x(int[] a) {
int s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0;
for (int i = 0; i < a.length - 3; i += 4) {
int m0 = ~(a[i] & 1) & 1;
int m1 = ~(a[i + 1] & 1) & 1;
int m2 = ~(a[i + 2] & 1) & 1;
int m3 = ~(a[i + 3] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
}
return s0 + s1 + s2 + s3;
}
// ---------- ALL TRICKS: 8× unroll + branch-less + distinct locals ----------
static int allTricksCombined8x(int[] a) {
int s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0,
s4 = 0,
s5 = 0,
s6 = 0,
s7 = 0;
for (int i = 0; i < a.length - 7; i += 8) {
int m0 = ~(a[i] & 1) & 1;
int m1 = ~(a[i + 1] & 1) & 1;
int m2 = ~(a[i + 2] & 1) & 1;
int m3 = ~(a[i + 3] & 1) & 1;
int m4 = ~(a[i + 4] & 1) & 1;
int m5 = ~(a[i + 5] & 1) & 1;
int m6 = ~(a[i + 6] & 1) & 1;
int m7 = ~(a[i + 7] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
s4 += a[i + 4] & -m4;
s5 += a[i + 5] & -m5;
s6 += a[i + 6] & -m6;
s7 += a[i + 7] & -m7;
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
}
'use strict';
const { performance } = require('perf_hooks');
const size = 1_000;
const testData = new Int32Array(size);
let s = 123456789;
for (let i = 0; i < size; i++) {
s = Math.imul(s, 1664525) + 1013904223;
testData[i] = ((s >>> 0) % 1000) - 500;
}
function dur(name, fn) {
const start = performance.now();
const sum = fn(testData);
const elapsed = performance.now() - start;
console.log(`${name.padEnd(40)}: ${elapsed.toFixed(3)} ms (sum=${sum})`);
}
// ---------- baseline ----------
function regularMod2(a) {
let sum = 0;
for (let i = 0; i < size; i++) if (a[i] % 2 === 0) sum += a[i];
return sum;
}
// ---------- standalone bit-trick ----------
function bitwiseAnd1(a) {
let sum = 0;
for (let i = 0; i < size; i++) if ((a[i] & 1) === 0) sum += a[i];
return sum;
}
// ---------- 4× unroll with distinct locals ----------
function unroll4x(a) {
let s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0;
for (let i = 0; i < size - 3; i += 4) {
if ((a[i] & 1) === 0) s0 += a[i];
if ((a[i + 1] & 1) === 0) s1 += a[i + 1];
if ((a[i + 2] & 1) === 0) s2 += a[i + 2];
if ((a[i + 3] & 1) === 0) s3 += a[i + 3];
}
return s0 + s1 + s2 + s3;
}
// ---------- 8× unroll with distinct locals ----------
function unroll8x(a) {
let s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0,
s4 = 0,
s5 = 0,
s6 = 0,
s7 = 0;
for (let i = 0; i < size - 7; i += 8) {
if ((a[i] & 1) === 0) s0 += a[i];
if ((a[i + 1] & 1) === 0) s1 += a[i + 1];
if ((a[i + 2] & 1) === 0) s2 += a[i + 2];
if ((a[i + 3] & 1) === 0) s3 += a[i + 3];
if ((a[i + 4] & 1) === 0) s4 += a[i + 4];
if ((a[i + 5] & 1) === 0) s5 += a[i + 5];
if ((a[i + 6] & 1) === 0) s6 += a[i + 6];
if ((a[i + 7] & 1) === 0) s7 += a[i + 7];
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
// ---------- branch-less even-test ----------
function branchlessEven(a) {
let sum = 0;
for (let i = 0; i < size; i++) {
const mask = ~(a[i] & 1) & 1;
sum += a[i] & -mask;
}
return sum;
}
// ---------- ALL TRICKS: 4× unroll + branch-less + distinct locals ----------
function allTricksCombined4x(a) {
let s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0;
for (let i = 0; i < size - 3; i += 4) {
const m0 = ~(a[i] & 1) & 1;
const m1 = ~(a[i + 1] & 1) & 1;
const m2 = ~(a[i + 2] & 1) & 1;
const m3 = ~(a[i + 3] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
}
return s0 + s1 + s2 + s3;
}
// ---------- ALL TRICKS: 8× unroll + branch-less + distinct locals ----------
function allTricksCombined8x(a) {
let s0 = 0,
s1 = 0,
s2 = 0,
s3 = 0,
s4 = 0,
s5 = 0,
s6 = 0,
s7 = 0;
for (let i = 0; i < size - 7; i += 8) {
const m0 = ~(a[i] & 1) & 1;
const m1 = ~(a[i + 1] & 1) & 1;
const m2 = ~(a[i + 2] & 1) & 1;
const m3 = ~(a[i + 3] & 1) & 1;
const m4 = ~(a[i + 4] & 1) & 1;
const m5 = ~(a[i + 5] & 1) & 1;
const m6 = ~(a[i + 6] & 1) & 1;
const m7 = ~(a[i + 7] & 1) & 1;
s0 += a[i] & -m0;
s1 += a[i + 1] & -m1;
s2 += a[i + 2] & -m2;
s3 += a[i + 3] & -m3;
s4 += a[i + 4] & -m4;
s5 += a[i + 5] & -m5;
s6 += a[i + 6] & -m6;
s7 += a[i + 7] & -m7;
}
return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
}
console.log('=== EVEN-SUM: NODE.JS 24 (distinct locals) ===');
dur('regular % 2', regularMod2);
dur('bitwise & 1', bitwiseAnd1);
dur('standalone 4× unroll', unroll4x);
dur('standalone 8× unroll', unroll8x);
dur('branch-less & 1', branchlessEven);
dur('ALL TRICKS 4×', allTricksCombined4x);
dur('ALL TRICKS 8×', allTricksCombined8x);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment