Skip to content

Instantly share code, notes, and snippets.

@DocBohn
Created May 13, 2025 14:38
Show Gist options
  • Save DocBohn/d8d72eb2830ab674d70a37c24df28b3c to your computer and use it in GitHub Desktop.
Save DocBohn/d8d72eb2830ab674d70a37c24df28b3c to your computer and use it in GitHub Desktop.
Code demonstrating that branchless programming often results in unreadable code
#include <stdint.h>
int switch_lg(uint32_t power_of_two) {
switch (power_of_two) {
case 0x1 :
return 0;
case 0x2 :
return 1;
case 0x4 :
return 2;
case 0x8 :
return 3;
case 0x10 :
return 4;
case 0x20 :
return 5;
case 0x40 :
return 6;
case 0x80 :
return 7;
case 0x100 :
return 8;
case 0x200 :
return 9;
case 0x400 :
return 10;
case 0x800 :
return 11;
case 0x1000 :
return 12;
case 0x2000 :
return 13;
case 0x4000 :
return 14;
case 0x8000 :
return 15;
case 0x10000 :
return 16;
case 0x20000 :
return 17;
case 0x40000 :
return 18;
case 0x80000 :
return 19;
case 0x100000 :
return 20;
case 0x200000 :
return 21;
case 0x400000 :
return 22;
case 0x800000 :
return 23;
case 0x1000000 :
return 24;
case 0x2000000 :
return 25;
case 0x4000000 :
return 26;
case 0x8000000 :
return 27;
case 0x10000000 :
return 28;
case 0x20000000 :
return 29;
case 0x40000000 :
return 30;
case 0x80000000 :
return 31;
default :
return (int)0xFFFFFFFF;
}
}
int de_bruijn_lg(uint32_t power_of_two) {
static const int logarithms[32] = {
0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9
};
return logarithms[(uint32_t)(power_of_two * 0x077CB531U) >> 27];
}
switch_lg:
cmp w0, 65536
beq .L11
cmp w0, 65536
bhi .L3
cmp w0, 256
beq .L12
cmp w0, 256
bhi .L4
cmp w0, 16
beq .L13
cmp w0, 16
bhi .L5
cmp w0, 4
beq .L14
cmp w0, 4
bhi .L6
cmp w0, 1
beq .L15
cmp w0, 2
bne .L16
mov w0, 1
b .L1
.L6:
cmp w0, 8
bne .L17
mov w0, 3
b .L1
.L5:
cmp w0, 64
beq .L18
cmp w0, 128
beq .L19
cmp w0, 32
bne .L20
mov w0, 5
b .L1
.L4:
cmp w0, 4096
beq .L21
cmp w0, 4096
bls .L43
cmp w0, 16384
beq .L25
cmp w0, 32768
beq .L26
cmp w0, 8192
bne .L27
mov w0, 13
b .L1
.L43:
cmp w0, 1024
beq .L22
cmp w0, 2048
beq .L23
cmp w0, 512
bne .L24
mov w0, 9
b .L1
.L3:
mov w1, 16777216
cmp w0, w1
beq .L28
cmp w0, w1
bhi .L8
cmp w0, 1048576
beq .L29
cmp w0, 1048576
bls .L44
cmp w0, 4194304
beq .L33
cmp w0, 8388608
beq .L34
cmp w0, 2097152
bne .L35
mov w0, 21
b .L1
.L44:
cmp w0, 262144
beq .L30
cmp w0, 524288
beq .L31
cmp w0, 131072
bne .L32
mov w0, 17
b .L1
.L8:
mov w1, 268435456
cmp w0, w1
beq .L36
cmp w0, w1
bls .L45
mov w1, 1073741824
cmp w0, w1
beq .L40
mov w1, -2147483648
cmp w0, w1
beq .L41
mov w1, 536870912
cmp w0, w1
bne .L42
mov w0, 29
b .L1
.L45:
mov w1, 67108864
cmp w0, w1
beq .L37
mov w1, 134217728
cmp w0, w1
beq .L38
mov w1, 33554432
cmp w0, w1
bne .L39
mov w0, 25
b .L1
.L11:
mov w0, 16
.L1:
ret
.L12:
mov w0, 8
b .L1
.L13:
mov w0, 4
b .L1
.L14:
mov w0, 2
b .L1
.L15:
mov w0, 0
b .L1
.L16:
mov w0, -1
b .L1
.L17:
mov w0, -1
b .L1
.L18:
mov w0, 6
b .L1
.L19:
mov w0, 7
b .L1
.L20:
mov w0, -1
b .L1
.L21:
mov w0, 12
b .L1
.L22:
mov w0, 10
b .L1
.L23:
mov w0, 11
b .L1
.L24:
mov w0, -1
b .L1
.L25:
mov w0, 14
b .L1
.L26:
mov w0, 15
b .L1
.L27:
mov w0, -1
b .L1
.L28:
mov w0, 24
b .L1
.L29:
mov w0, 20
b .L1
.L30:
mov w0, 18
b .L1
.L31:
mov w0, 19
b .L1
.L32:
mov w0, -1
b .L1
.L33:
mov w0, 22
b .L1
.L34:
mov w0, 23
b .L1
.L35:
mov w0, -1
b .L1
.L36:
mov w0, 28
b .L1
.L37:
mov w0, 26
b .L1
.L38:
mov w0, 27
b .L1
.L39:
mov w0, -1
b .L1
.L40:
mov w0, 30
b .L1
.L41:
mov w0, 31
b .L1
.L42:
mov w0, -1
b .L1
de_bruijn_lg:
mov w1, 46385
movk w1, 0x77c, lsl 16
mul w0, w0, w1
lsr w0, w0, 27
adrp x1, .LANCHOR0
add x1, x1, :lo12:.LANCHOR0
ldr w0, [x1, x0, lsl 2]
ret
.set .LANCHOR0,. + 0
logarithms.0:
.word 0
.word 1
.word 28
.word 2
.word 29
.word 14
.word 24
.word 3
.word 30
.word 22
.word 20
.word 15
.word 25
.word 17
.word 4
.word 8
.word 31
.word 27
.word 13
.word 23
.word 21
.word 19
.word 16
.word 7
.word 26
.word 12
.word 18
.word 6
.word 11
.word 5
.word 10
.word 9
switch_lg:
cmpl $65536, %edi
je .L11
ja .L3
cmpl $256, %edi
je .L12
ja .L4
cmpl $16, %edi
je .L13
ja .L5
cmpl $4, %edi
je .L14
ja .L6
cmpl $1, %edi
je .L15
cmpl $2, %edi
jne .L16
movl $1, %eax
ret
.L6:
cmpl $8, %edi
jne .L17
movl $3, %eax
ret
.L5:
cmpl $64, %edi
je .L18
cmpl $128, %edi
je .L19
cmpl $32, %edi
jne .L20
movl $5, %eax
ret
.L4:
cmpl $4096, %edi
je .L21
jbe .L43
cmpl $16384, %edi
je .L25
cmpl $32768, %edi
je .L26
cmpl $8192, %edi
jne .L27
movl $13, %eax
ret
.L43:
cmpl $1024, %edi
je .L22
cmpl $2048, %edi
je .L23
cmpl $512, %edi
jne .L24
movl $9, %eax
ret
.L3:
cmpl $16777216, %edi
je .L28
ja .L8
cmpl $1048576, %edi
je .L29
jbe .L44
cmpl $4194304, %edi
je .L33
cmpl $8388608, %edi
je .L34
cmpl $2097152, %edi
jne .L35
movl $21, %eax
ret
.L44:
cmpl $262144, %edi
je .L30
cmpl $524288, %edi
je .L31
cmpl $131072, %edi
jne .L32
movl $17, %eax
ret
.L8:
cmpl $268435456, %edi
je .L36
jbe .L45
cmpl $1073741824, %edi
je .L40
cmpl $-2147483648, %edi
je .L41
cmpl $536870912, %edi
jne .L42
movl $29, %eax
ret
.L45:
cmpl $67108864, %edi
je .L37
cmpl $134217728, %edi
je .L38
cmpl $33554432, %edi
jne .L39
movl $25, %eax
ret
.L11:
movl $16, %eax
ret
.L12:
movl $8, %eax
ret
.L13:
movl $4, %eax
ret
.L14:
movl $2, %eax
ret
.L15:
movl $0, %eax
ret
.L16:
movl $-1, %eax
ret
.L17:
movl $-1, %eax
ret
.L18:
movl $6, %eax
ret
.L19:
movl $7, %eax
ret
.L20:
movl $-1, %eax
ret
.L21:
movl $12, %eax
ret
.L22:
movl $10, %eax
ret
.L23:
movl $11, %eax
ret
.L24:
movl $-1, %eax
ret
.L25:
movl $14, %eax
ret
.L26:
movl $15, %eax
ret
.L27:
movl $-1, %eax
ret
.L28:
movl $24, %eax
ret
.L29:
movl $20, %eax
ret
.L30:
movl $18, %eax
ret
.L31:
movl $19, %eax
ret
.L32:
movl $-1, %eax
ret
.L33:
movl $22, %eax
ret
.L34:
movl $23, %eax
ret
.L35:
movl $-1, %eax
ret
.L36:
movl $28, %eax
ret
.L37:
movl $26, %eax
ret
.L38:
movl $27, %eax
ret
.L39:
movl $-1, %eax
ret
.L40:
movl $30, %eax
ret
.L41:
movl $31, %eax
ret
.L42:
movl $-1, %eax
ret
de_bruijn_lg:
imull $125613361, %edi, %edi
shrl $27, %edi
movl %edi, %edi
movl logarithms.0(,%rdi,4), %eax
ret
logarithms.0:
.long 0
.long 1
.long 28
.long 2
.long 29
.long 14
.long 24
.long 3
.long 30
.long 22
.long 20
.long 15
.long 25
.long 17
.long 4
.long 8
.long 31
.long 27
.long 13
.long 23
.long 21
.long 19
.long 16
.long 7
.long 26
.long 12
.long 18
.long 6
.long 11
.long 5
.long 10
.long 9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment