Skip to content

Instantly share code, notes, and snippets.

@DocBohn
Last active May 5, 2025 17:48
Show Gist options
  • Save DocBohn/d51a7cafcc2da91afdd815be69d1bc0c to your computer and use it in GitHub Desktop.
Save DocBohn/d51a7cafcc2da91afdd815be69d1bc0c to your computer and use it in GitHub Desktop.
Code demonstrating hand-crafted and compiler-generated migration of redundant loads/stores out of a loop
void nonoptimizable_accumulate(int *accumulator, int const *array, int length) {
for (int i = 0; i < length; i++) {
*accumulator += array[i];
}
}
void optimizable_accumulate(int *restrict accumulator, int const *restrict array, int length) {
for (int i = 0; i < length; i++) {
*accumulator += array[i];
}
}
// -O1
nonoptimizable_accumulate:
cmp w2, 0
ble .L1
mov x3, x1
add x4, x1, w2, sxtw 2
.L3:
ldr w1, [x0]
ldr w2, [x3], 4
add w1, w1, w2
str w1, [x0]
cmp x3, x4
bne .L3
.L1:
ret
optimizable_accumulate:
cmp w2, 0
ble .L5
ldr w4, [x0]
mov x3, x1
add x2, x1, w2, sxtw 2
.L7:
ldr w1, [x3], 4
add w4, w4, w1
cmp x3, x2
bne .L7
str w4, [x0]
.L5:
ret
void optimized_accumulate(int *restrict accumulator, int const *restrict array, int length) {
int local_accumulator = *accumulator;
for (int i = 0; i < length; i++) {
local_accumulator += array[i];
}
*accumulator = local_accumulator;
}
# -O1
nonoptimizable_accumulate:
testl %edx, %edx
jle .L1
movq %rsi, %rax
movslq %edx, %rdx
leaq (%rsi,%rdx,4), %rcx
.L3:
movl (%rax), %edx
addl %edx, (%rdi)
addq $4, %rax
cmpq %rcx, %rax
jne .L3
.L1:
ret
optimizable_accumulate:
testl %edx, %edx
jle .L5
movl (%rdi), %ecx
movq %rsi, %rax
movslq %edx, %rdx
leaq (%rsi,%rdx,4), %rsi
.L7:
addl (%rax), %ecx
addq $4, %rax
cmpq %rsi, %rax
jne .L7
movl %ecx, (%rdi)
.L5:
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment