Created
May 13, 2025 14:38
-
-
Save DocBohn/d8d72eb2830ab674d70a37c24df28b3c to your computer and use it in GitHub Desktop.
Code demonstrating that branchless programming often results in unreadable code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
int switch_lg(uint32_t power_of_two) { | |
switch (power_of_two) { | |
case 0x1 : | |
return 0; | |
case 0x2 : | |
return 1; | |
case 0x4 : | |
return 2; | |
case 0x8 : | |
return 3; | |
case 0x10 : | |
return 4; | |
case 0x20 : | |
return 5; | |
case 0x40 : | |
return 6; | |
case 0x80 : | |
return 7; | |
case 0x100 : | |
return 8; | |
case 0x200 : | |
return 9; | |
case 0x400 : | |
return 10; | |
case 0x800 : | |
return 11; | |
case 0x1000 : | |
return 12; | |
case 0x2000 : | |
return 13; | |
case 0x4000 : | |
return 14; | |
case 0x8000 : | |
return 15; | |
case 0x10000 : | |
return 16; | |
case 0x20000 : | |
return 17; | |
case 0x40000 : | |
return 18; | |
case 0x80000 : | |
return 19; | |
case 0x100000 : | |
return 20; | |
case 0x200000 : | |
return 21; | |
case 0x400000 : | |
return 22; | |
case 0x800000 : | |
return 23; | |
case 0x1000000 : | |
return 24; | |
case 0x2000000 : | |
return 25; | |
case 0x4000000 : | |
return 26; | |
case 0x8000000 : | |
return 27; | |
case 0x10000000 : | |
return 28; | |
case 0x20000000 : | |
return 29; | |
case 0x40000000 : | |
return 30; | |
case 0x80000000 : | |
return 31; | |
default : | |
return (int)0xFFFFFFFF; | |
} | |
} | |
int de_bruijn_lg(uint32_t power_of_two) { | |
static const int logarithms[32] = { | |
0, 1, 28, 2, 29, 14, 24, 3, | |
30, 22, 20, 15, 25, 17, 4, 8, | |
31, 27, 13, 23, 21, 19, 16, 7, | |
26, 12, 18, 6, 11, 5, 10, 9 | |
}; | |
return logarithms[(uint32_t)(power_of_two * 0x077CB531U) >> 27]; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
switch_lg: | |
cmp w0, 65536 | |
beq .L11 | |
cmp w0, 65536 | |
bhi .L3 | |
cmp w0, 256 | |
beq .L12 | |
cmp w0, 256 | |
bhi .L4 | |
cmp w0, 16 | |
beq .L13 | |
cmp w0, 16 | |
bhi .L5 | |
cmp w0, 4 | |
beq .L14 | |
cmp w0, 4 | |
bhi .L6 | |
cmp w0, 1 | |
beq .L15 | |
cmp w0, 2 | |
bne .L16 | |
mov w0, 1 | |
b .L1 | |
.L6: | |
cmp w0, 8 | |
bne .L17 | |
mov w0, 3 | |
b .L1 | |
.L5: | |
cmp w0, 64 | |
beq .L18 | |
cmp w0, 128 | |
beq .L19 | |
cmp w0, 32 | |
bne .L20 | |
mov w0, 5 | |
b .L1 | |
.L4: | |
cmp w0, 4096 | |
beq .L21 | |
cmp w0, 4096 | |
bls .L43 | |
cmp w0, 16384 | |
beq .L25 | |
cmp w0, 32768 | |
beq .L26 | |
cmp w0, 8192 | |
bne .L27 | |
mov w0, 13 | |
b .L1 | |
.L43: | |
cmp w0, 1024 | |
beq .L22 | |
cmp w0, 2048 | |
beq .L23 | |
cmp w0, 512 | |
bne .L24 | |
mov w0, 9 | |
b .L1 | |
.L3: | |
mov w1, 16777216 | |
cmp w0, w1 | |
beq .L28 | |
cmp w0, w1 | |
bhi .L8 | |
cmp w0, 1048576 | |
beq .L29 | |
cmp w0, 1048576 | |
bls .L44 | |
cmp w0, 4194304 | |
beq .L33 | |
cmp w0, 8388608 | |
beq .L34 | |
cmp w0, 2097152 | |
bne .L35 | |
mov w0, 21 | |
b .L1 | |
.L44: | |
cmp w0, 262144 | |
beq .L30 | |
cmp w0, 524288 | |
beq .L31 | |
cmp w0, 131072 | |
bne .L32 | |
mov w0, 17 | |
b .L1 | |
.L8: | |
mov w1, 268435456 | |
cmp w0, w1 | |
beq .L36 | |
cmp w0, w1 | |
bls .L45 | |
mov w1, 1073741824 | |
cmp w0, w1 | |
beq .L40 | |
mov w1, -2147483648 | |
cmp w0, w1 | |
beq .L41 | |
mov w1, 536870912 | |
cmp w0, w1 | |
bne .L42 | |
mov w0, 29 | |
b .L1 | |
.L45: | |
mov w1, 67108864 | |
cmp w0, w1 | |
beq .L37 | |
mov w1, 134217728 | |
cmp w0, w1 | |
beq .L38 | |
mov w1, 33554432 | |
cmp w0, w1 | |
bne .L39 | |
mov w0, 25 | |
b .L1 | |
.L11: | |
mov w0, 16 | |
.L1: | |
ret | |
.L12: | |
mov w0, 8 | |
b .L1 | |
.L13: | |
mov w0, 4 | |
b .L1 | |
.L14: | |
mov w0, 2 | |
b .L1 | |
.L15: | |
mov w0, 0 | |
b .L1 | |
.L16: | |
mov w0, -1 | |
b .L1 | |
.L17: | |
mov w0, -1 | |
b .L1 | |
.L18: | |
mov w0, 6 | |
b .L1 | |
.L19: | |
mov w0, 7 | |
b .L1 | |
.L20: | |
mov w0, -1 | |
b .L1 | |
.L21: | |
mov w0, 12 | |
b .L1 | |
.L22: | |
mov w0, 10 | |
b .L1 | |
.L23: | |
mov w0, 11 | |
b .L1 | |
.L24: | |
mov w0, -1 | |
b .L1 | |
.L25: | |
mov w0, 14 | |
b .L1 | |
.L26: | |
mov w0, 15 | |
b .L1 | |
.L27: | |
mov w0, -1 | |
b .L1 | |
.L28: | |
mov w0, 24 | |
b .L1 | |
.L29: | |
mov w0, 20 | |
b .L1 | |
.L30: | |
mov w0, 18 | |
b .L1 | |
.L31: | |
mov w0, 19 | |
b .L1 | |
.L32: | |
mov w0, -1 | |
b .L1 | |
.L33: | |
mov w0, 22 | |
b .L1 | |
.L34: | |
mov w0, 23 | |
b .L1 | |
.L35: | |
mov w0, -1 | |
b .L1 | |
.L36: | |
mov w0, 28 | |
b .L1 | |
.L37: | |
mov w0, 26 | |
b .L1 | |
.L38: | |
mov w0, 27 | |
b .L1 | |
.L39: | |
mov w0, -1 | |
b .L1 | |
.L40: | |
mov w0, 30 | |
b .L1 | |
.L41: | |
mov w0, 31 | |
b .L1 | |
.L42: | |
mov w0, -1 | |
b .L1 | |
de_bruijn_lg: | |
mov w1, 46385 | |
movk w1, 0x77c, lsl 16 | |
mul w0, w0, w1 | |
lsr w0, w0, 27 | |
adrp x1, .LANCHOR0 | |
add x1, x1, :lo12:.LANCHOR0 | |
ldr w0, [x1, x0, lsl 2] | |
ret | |
.set .LANCHOR0,. + 0 | |
logarithms.0: | |
.word 0 | |
.word 1 | |
.word 28 | |
.word 2 | |
.word 29 | |
.word 14 | |
.word 24 | |
.word 3 | |
.word 30 | |
.word 22 | |
.word 20 | |
.word 15 | |
.word 25 | |
.word 17 | |
.word 4 | |
.word 8 | |
.word 31 | |
.word 27 | |
.word 13 | |
.word 23 | |
.word 21 | |
.word 19 | |
.word 16 | |
.word 7 | |
.word 26 | |
.word 12 | |
.word 18 | |
.word 6 | |
.word 11 | |
.word 5 | |
.word 10 | |
.word 9 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
switch_lg: | |
cmpl $65536, %edi | |
je .L11 | |
ja .L3 | |
cmpl $256, %edi | |
je .L12 | |
ja .L4 | |
cmpl $16, %edi | |
je .L13 | |
ja .L5 | |
cmpl $4, %edi | |
je .L14 | |
ja .L6 | |
cmpl $1, %edi | |
je .L15 | |
cmpl $2, %edi | |
jne .L16 | |
movl $1, %eax | |
ret | |
.L6: | |
cmpl $8, %edi | |
jne .L17 | |
movl $3, %eax | |
ret | |
.L5: | |
cmpl $64, %edi | |
je .L18 | |
cmpl $128, %edi | |
je .L19 | |
cmpl $32, %edi | |
jne .L20 | |
movl $5, %eax | |
ret | |
.L4: | |
cmpl $4096, %edi | |
je .L21 | |
jbe .L43 | |
cmpl $16384, %edi | |
je .L25 | |
cmpl $32768, %edi | |
je .L26 | |
cmpl $8192, %edi | |
jne .L27 | |
movl $13, %eax | |
ret | |
.L43: | |
cmpl $1024, %edi | |
je .L22 | |
cmpl $2048, %edi | |
je .L23 | |
cmpl $512, %edi | |
jne .L24 | |
movl $9, %eax | |
ret | |
.L3: | |
cmpl $16777216, %edi | |
je .L28 | |
ja .L8 | |
cmpl $1048576, %edi | |
je .L29 | |
jbe .L44 | |
cmpl $4194304, %edi | |
je .L33 | |
cmpl $8388608, %edi | |
je .L34 | |
cmpl $2097152, %edi | |
jne .L35 | |
movl $21, %eax | |
ret | |
.L44: | |
cmpl $262144, %edi | |
je .L30 | |
cmpl $524288, %edi | |
je .L31 | |
cmpl $131072, %edi | |
jne .L32 | |
movl $17, %eax | |
ret | |
.L8: | |
cmpl $268435456, %edi | |
je .L36 | |
jbe .L45 | |
cmpl $1073741824, %edi | |
je .L40 | |
cmpl $-2147483648, %edi | |
je .L41 | |
cmpl $536870912, %edi | |
jne .L42 | |
movl $29, %eax | |
ret | |
.L45: | |
cmpl $67108864, %edi | |
je .L37 | |
cmpl $134217728, %edi | |
je .L38 | |
cmpl $33554432, %edi | |
jne .L39 | |
movl $25, %eax | |
ret | |
.L11: | |
movl $16, %eax | |
ret | |
.L12: | |
movl $8, %eax | |
ret | |
.L13: | |
movl $4, %eax | |
ret | |
.L14: | |
movl $2, %eax | |
ret | |
.L15: | |
movl $0, %eax | |
ret | |
.L16: | |
movl $-1, %eax | |
ret | |
.L17: | |
movl $-1, %eax | |
ret | |
.L18: | |
movl $6, %eax | |
ret | |
.L19: | |
movl $7, %eax | |
ret | |
.L20: | |
movl $-1, %eax | |
ret | |
.L21: | |
movl $12, %eax | |
ret | |
.L22: | |
movl $10, %eax | |
ret | |
.L23: | |
movl $11, %eax | |
ret | |
.L24: | |
movl $-1, %eax | |
ret | |
.L25: | |
movl $14, %eax | |
ret | |
.L26: | |
movl $15, %eax | |
ret | |
.L27: | |
movl $-1, %eax | |
ret | |
.L28: | |
movl $24, %eax | |
ret | |
.L29: | |
movl $20, %eax | |
ret | |
.L30: | |
movl $18, %eax | |
ret | |
.L31: | |
movl $19, %eax | |
ret | |
.L32: | |
movl $-1, %eax | |
ret | |
.L33: | |
movl $22, %eax | |
ret | |
.L34: | |
movl $23, %eax | |
ret | |
.L35: | |
movl $-1, %eax | |
ret | |
.L36: | |
movl $28, %eax | |
ret | |
.L37: | |
movl $26, %eax | |
ret | |
.L38: | |
movl $27, %eax | |
ret | |
.L39: | |
movl $-1, %eax | |
ret | |
.L40: | |
movl $30, %eax | |
ret | |
.L41: | |
movl $31, %eax | |
ret | |
.L42: | |
movl $-1, %eax | |
ret | |
de_bruijn_lg: | |
imull $125613361, %edi, %edi | |
shrl $27, %edi | |
movl %edi, %edi | |
movl logarithms.0(,%rdi,4), %eax | |
ret | |
logarithms.0: | |
.long 0 | |
.long 1 | |
.long 28 | |
.long 2 | |
.long 29 | |
.long 14 | |
.long 24 | |
.long 3 | |
.long 30 | |
.long 22 | |
.long 20 | |
.long 15 | |
.long 25 | |
.long 17 | |
.long 4 | |
.long 8 | |
.long 31 | |
.long 27 | |
.long 13 | |
.long 23 | |
.long 21 | |
.long 19 | |
.long 16 | |
.long 7 | |
.long 26 | |
.long 12 | |
.long 18 | |
.long 6 | |
.long 11 | |
.long 5 | |
.long 10 | |
.long 9 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment