-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Labels
clang:codegenIR generation bugs: mangling, exceptions, etc.IR generation bugs: mangling, exceptions, etc.missed-optimization
Description
float foo (float num[], float r2inv, int n) {
float sum = 0.0;
for (int i=0; i < 1000; i++) {
float a = num[i];
// const float r = a / std::sqrt (r2inv);
const float expm2 = std::exp (a);
float tmp = expm2 * num[i];
sum += tmp * tmp;
}
return sum;
}
- llvm: When built with O3, use 2 load to get num[i] before and after the call expf
.LBB0_1: // =>This Inner Loop Header: Depth=1
ldr s0, [x19, x20] // load num[i]
bl expf
ldr s1, [x19, x20] // repeat load the num[i]
add x20, x20, #4
cmp x20, #4000
fmul s0, s0, s1
fmadd s8, s0, s0, s8
b.ne .LBB0_1
- gcc: Only one load for num[i] because it use the callee save register
.L2:
ldr s15, [x19], 4
fmov s0, s15
bl expf
fmul s15, s15, s0
fmadd s14, s15, s15, s14
cmp x19, x20
bne .L2
Metadata
Metadata
Assignees
Labels
clang:codegenIR generation bugs: mangling, exceptions, etc.IR generation bugs: mangling, exceptions, etc.missed-optimization