-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
Reproducer:
#include <cstdio>
#include <cstdint>
struct my_struct_1 {
float a;
float b;
};
struct my_struct_2 {
float x;
double y;
};
union my_union {
my_struct_1 s1;
my_struct_2 s2;
};
my_union my_func() {
my_union u;
u.s1 = my_struct_1{100.f, 200.f};
return u;
}
int main() {
my_union u = my_func();
if (u.s1.a != 100.f)
std::puts("a ooops");
if (u.s1.b != 200.f)
std::puts("b ooops");
return 0;
}Please, note that my_struct_1 has field b which fits into padding of my_struct_2 and sizeof(my_struct_2) > sizeof(my_struct_1).
Bug is reproduced on X86 on clang 16 release and on the latest main branch (clang++-17 is not tested, but I assume its behaviour is the same):
# clang 16 release
clang++-16 -O1 example.cpp && ./a.out
b ooops
# clang main 5caae72d1a4f
clang++ -O1 example.cpp && ./a.out
b ooopsThe problem is that C++ unions are represented in IR level as a struct of member with the largets size:
%union.my_union = type { %struct.my_struct_2 }
%struct.my_struct_2 = type { float, double }
%struct.my_struct_1 = type { float, float }
Information about my_struct_1 layout is lost when union llvm::StructType is constructed. SROA pass operates on my_struct_2 layout only.
my_func IR before SROA:
*** IR Dump After SimplifyCFGPass on _Z7my_funcv ***
; Function Attrs: mustprogress nounwind uwtable
define dso_local { float, double } @_Z7my_funcv() #0 {
entry:
%retval = alloca %union.my_union, align 8
%ref.tmp = alloca %struct.my_struct_1, align 4
call void @llvm.lifetime.start.p0(i64 8, ptr %ref.tmp) #5
%a = getelementptr inbounds %struct.my_struct_1, ptr %ref.tmp, i32 0, i32 0
store float 1.000000e+02, ptr %a, align 4, !tbaa !5
%b = getelementptr inbounds %struct.my_struct_1, ptr %ref.tmp, i32 0, i32 1
store float 2.000000e+02, ptr %b, align 4, !tbaa !10
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %retval, ptr align 4 %ref.tmp, i64 8, i1 false), !tbaa.struct !11
call void @llvm.lifetime.end.p0(i64 8, ptr %ref.tmp) #5
%coerce.dive = getelementptr inbounds %union.my_union, ptr %retval, i32 0, i32 0
%0 = load { float, double }, ptr %coerce.dive, align 8
ret { float, double } %0
}
my_func IR after SROA:
*** IR Dump After SROAPass on _Z7my_funcv ***
; Function Attrs: mustprogress nounwind uwtable
define dso_local { float, double } @_Z7my_funcv() #0 {
entry:
%.fca.0.insert = insertvalue { float, double } poison, float 1.000000e+02, 0
%.fca.1.insert = insertvalue { float, double } %.fca.0.insert, double undef, 1
ret { float, double } %.fca.1.insert
}
As you can see, 200.f value is lost and undef value insertion happens here.
On the other side, IR before SROA also looks suspicious, because 32bits of double value are undef -> whole double is undef.
gcc works well on this example.
Possibly related discussion:
https://discourse.llvm.org/t/struct-copy/11330
Possibly related issue:
#53710