Open
Description
Example (godbolt):
#include <cstdint>
#include <cstddef>
template<size_t N>
struct Test {
uint16_t a;
uint8_t b;
uint8_t c;
uint32_t d;
uint64_t extra[N] = {};
};
template<size_t N>
Test<N> test(uint64_t x) {
Test<N> t;
t.a = static_cast<uint16_t>(x);
t.b = static_cast<uint8_t>(x >> 16);
t.c = static_cast<uint8_t>(x >> 24);
t.d = static_cast<uint32_t>(x >> 32);
return t;
}
template Test<1> test<1>(uint64_t);
template Test<2> test<2>(uint64_t);
For test<2>
(24 byte case), assignment of a/b/c/d is done exactly as specified above (3 shifts, 4 truncates, 4 stores) when it could be reduced to just a single 64-bit store:
define weak_odr dso_local void @Test<2ul> test<2ul>(unsigned long)(ptr dead_on_unwind noalias writable sret(%struct.Test.0) align 8 %0, i64 noundef %1) local_unnamed_addr #0 comdat !dbg !138 {
#dbg_value(i64 %1, !142, !DIExpression(), !144)
#dbg_declare(ptr %0, !143, !DIExpression(), !145)
#dbg_value(ptr %0, !146, !DIExpression(), !154)
%3 = getelementptr inbounds nuw i8, ptr %0, i64 8, !dbg !156
tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %3, i8 0, i64 16, i1 false), !dbg !156
%4 = trunc i64 %1 to i16, !dbg !161
store i16 %4, ptr %0, align 8, !dbg !162
%5 = lshr i64 %1, 16, !dbg !167
%6 = trunc i64 %5 to i8, !dbg !168
%7 = getelementptr inbounds nuw i8, ptr %0, i64 2, !dbg !169
store i8 %6, ptr %7, align 2, !dbg !170
%8 = lshr i64 %1, 24, !dbg !172
%9 = trunc i64 %8 to i8, !dbg !173
%10 = getelementptr inbounds nuw i8, ptr %0, i64 3, !dbg !174
store i8 %9, ptr %10, align 1, !dbg !175
%11 = lshr i64 %1, 32, !dbg !177
%12 = trunc nuw i64 %11 to i32, !dbg !178
%13 = getelementptr inbounds nuw i8, ptr %0, i64 4, !dbg !179
store i32 %12, ptr %13, align 4, !dbg !180
ret void, !dbg !182
}
For test<1>
(16 byte case), the assignments are combined as expected (but this case is a bit different since no store to memory is involved):
define weak_odr dso_local { i64, i64 } @Test<1ul> test<1ul>(unsigned long)(i64 noundef %0) local_unnamed_addr #0 comdat !dbg !130 {
#dbg_value(i64 %0, !134, !DIExpression(), !136)
#dbg_value(i64 0, !135, !DIExpression(DW_OP_LLVM_fragment, 64, 64), !136)
#dbg_value(i64 %0, !135, !DIExpression(DW_OP_LLVM_fragment, 0, 16), !136)
#dbg_value(i64 %0, !135, !DIExpression(DW_OP_constu, 16, DW_OP_shr, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value, DW_OP_LLVM_fragment, 16, 8), !136)
#dbg_value(i64 %0, !135, !DIExpression(DW_OP_constu, 24, DW_OP_shr, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value, DW_OP_LLVM_fragment, 24, 8), !136)
#dbg_value(i64 %0, !135, !DIExpression(DW_OP_constu, 32, DW_OP_shr, DW_OP_stack_value, DW_OP_LLVM_fragment, 32, 32), !136)
%2 = insertvalue { i64, i64 } poison, i64 %0, 0, !dbg !137
%3 = insertvalue { i64, i64 } %2, i64 0, 1, !dbg !137
ret { i64, i64 } %3, !dbg !137
}