Last active
May 5, 2025 17:48
-
-
Save DocBohn/d51a7cafcc2da91afdd815be69d1bc0c to your computer and use it in GitHub Desktop.
Code demonstrating hand-crafted and compiler-generated migration of redundant loads/stores out of a loop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void nonoptimizable_accumulate(int *accumulator, int const *array, int length) { | |
for (int i = 0; i < length; i++) { | |
*accumulator += array[i]; | |
} | |
} | |
void optimizable_accumulate(int *restrict accumulator, int const *restrict array, int length) { | |
for (int i = 0; i < length; i++) { | |
*accumulator += array[i]; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -O1 | |
nonoptimizable_accumulate: | |
cmp w2, 0 | |
ble .L1 | |
mov x3, x1 | |
add x4, x1, w2, sxtw 2 | |
.L3: | |
ldr w1, [x0] | |
ldr w2, [x3], 4 | |
add w1, w1, w2 | |
str w1, [x0] | |
cmp x3, x4 | |
bne .L3 | |
.L1: | |
ret | |
optimizable_accumulate: | |
cmp w2, 0 | |
ble .L5 | |
ldr w4, [x0] | |
mov x3, x1 | |
add x2, x1, w2, sxtw 2 | |
.L7: | |
ldr w1, [x3], 4 | |
add w4, w4, w1 | |
cmp x3, x2 | |
bne .L7 | |
str w4, [x0] | |
.L5: | |
ret |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void optimized_accumulate(int *restrict accumulator, int const *restrict array, int length) { | |
int local_accumulator = *accumulator; | |
for (int i = 0; i < length; i++) { | |
local_accumulator += array[i]; | |
} | |
*accumulator = local_accumulator; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -O1 | |
nonoptimizable_accumulate: | |
testl %edx, %edx | |
jle .L1 | |
movq %rsi, %rax | |
movslq %edx, %rdx | |
leaq (%rsi,%rdx,4), %rcx | |
.L3: | |
movl (%rax), %edx | |
addl %edx, (%rdi) | |
addq $4, %rax | |
cmpq %rcx, %rax | |
jne .L3 | |
.L1: | |
ret | |
optimizable_accumulate: | |
testl %edx, %edx | |
jle .L5 | |
movl (%rdi), %ecx | |
movq %rsi, %rax | |
movslq %edx, %rdx | |
leaq (%rsi,%rdx,4), %rsi | |
.L7: | |
addl (%rax), %ecx | |
addq $4, %rax | |
cmpq %rsi, %rax | |
jne .L7 | |
movl %ecx, (%rdi) | |
.L5: | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment