Skip to content

Commit c65b4d6

Browse files
committed
[SelectionDAG] Do not second-guess alignment for alloca
Alignment of an alloca in IR can be lower than the preferred alignment on purpose, but this override essentially treats the preferred alignment as the minimum alignment. The patch changes this behavior to always use the specified alignment. If alignment is not set explicitly in LLVM IR, it is set to DL.getPrefTypeAlign(Ty) in computeAllocaDefaultAlign. Tests are changed as well: explicit alignment is increased to match the preferred alignment if it changes output, or omitted when it is hard to determine the right value (e.g. for pointers, some structs, or weird types). Differential Revision: https://reviews.llvm.org/D135462
1 parent 5e71ca3 commit c65b4d6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+134
-147
lines changed

llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
128128
for (const Instruction &I : BB) {
129129
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
130130
Type *Ty = AI->getAllocatedType();
131-
Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
132-
// The "specified" alignment is the alignment written on the alloca,
133-
// or the preferred alignment of the type if none is specified.
134-
//
135-
// (Unspecified alignment on allocas will be going away soon.)
136-
Align SpecifiedAlign = AI->getAlign();
137-
138-
// If the preferred alignment of the type is higher than the specified
139-
// alignment of the alloca, promote the alignment, as long as it doesn't
140-
// require realigning the stack.
141-
//
142-
// FIXME: Do we really want to second-guess the IR in isel?
143-
Align Alignment =
144-
std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
131+
Align Alignment = AI->getAlign();
145132

146133
// Static allocas can be folded into the initial stack frame
147134
// adjustment. For targets that don't realign the stack, don't

llvm/test/CodeGen/AArch64/preferred-alignment.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
; Function Attrs: nounwind
44
define i32 @foo() #0 {
55
entry:
6-
%c = alloca i8, align 1
6+
%c = alloca i8
77
; CHECK: add x0, sp, #12
8-
%s = alloca i16, align 2
8+
%s = alloca i16
99
; CHECK-NEXT: add x1, sp, #8
10-
%i = alloca i32, align 4
10+
%i = alloca i32
1111
; CHECK-NEXT: add x2, sp, #4
1212
%call = call i32 @bar(ptr %c, ptr %s, ptr %i)
1313
%0 = load i8, ptr %c, align 1

llvm/test/CodeGen/AArch64/seh-finally.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ entry:
4242
; CHECK: ldur w0, [x29, #-8]
4343
; CHECK: bl foo
4444

45-
%o = alloca %struct.S, align 4
45+
%o = alloca %struct.S, align 8
4646
call void (...) @llvm.localescape(ptr %o)
4747
%0 = load i32, ptr %o, align 4
4848
invoke void @foo(i32 %0) #5

llvm/test/CodeGen/AMDGPU/call-argument-types.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
671671
; GCN-NEXT: s_swappc_b64
672672
; GCN-NOT: [[SP]]
673673
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
674-
%val = alloca { i8, i32 }, align 4, addrspace(5)
674+
%val = alloca { i8, i32 }, align 8, addrspace(5)
675675
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
676676
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
677677
store i8 3, ptr addrspace(5) %gep0
@@ -702,8 +702,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
702702
; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
703703
; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
704704
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
705-
%in.val = alloca { i8, i32 }, align 4, addrspace(5)
706-
%out.val = alloca { i8, i32 }, align 4, addrspace(5)
705+
%in.val = alloca { i8, i32 }, align 8, addrspace(5)
706+
%out.val = alloca { i8, i32 }, align 8, addrspace(5)
707707
%in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
708708
%in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
709709
store i8 3, ptr addrspace(5) %in.gep0

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ bb5:
289289

290290
; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]]
291291
define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
292-
%alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
292+
%alloca0 = alloca { i8, i32 }, align 8, addrspace(5)
293293
%cmp = icmp eq i32 %arg0, 0
294294
br i1 %cmp, label %bb, label %ret
295295

llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11098,7 +11098,7 @@ entry:
1109811098
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
1109911099

1110011100
; allocate enough scratch to go beyond 2^12 addressing
11101-
%scratch = alloca <1280 x i32>, align 8, addrspace(5)
11101+
%scratch = alloca <1280 x i32>, align 16, addrspace(5)
1110211102

1110311103
; load VGPR data
1110411104
%aptr = getelementptr <64 x i32>, ptr addrspace(1) %in, i32 %tid

llvm/test/CodeGen/ARM/ssp-data-layout.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ entry:
386386
; CHECK: bl get_struct_large_char2
387387
; CHECK: strb r0, [sp, #106]
388388
; CHECK: bl end_struct_large_char2
389-
%a = alloca %struct.struct_small_char, align 1
390-
%b = alloca %struct.struct_large_char2, align 1
389+
%a = alloca %struct.struct_small_char, align 4
390+
%b = alloca %struct.struct_large_char2, align 4
391391
%d1 = alloca %struct.struct_large_nonchar, align 8
392392
%d2 = alloca %struct.struct_small_nonchar, align 2
393393
%call = call signext i8 @get_struct_small_char()

llvm/test/CodeGen/BPF/pr57872.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ define void @foo(ptr %g) {
180180
; CHECK-NEXT: call bar
181181
; CHECK-NEXT: exit
182182
entry:
183-
%event = alloca %struct.event, align 1
183+
%event = alloca %struct.event, align 8
184184
%hostname = getelementptr inbounds %struct.event, ptr %event, i64 0, i32 1
185185
%0 = load ptr, ptr %g, align 8
186186
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(84) %hostname, ptr noundef nonnull align 1 dereferenceable(84) %0, i64 84, i1 false)

llvm/test/CodeGen/BPF/undef.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define i32 @ebpf_filter(ptr nocapture readnone %ebpf_packet) #0 section "socket1
4040
; CHECK: r1 = routing
4141
; CHECK: call bpf_map_lookup_elem
4242
; CHECK: exit
43-
%key = alloca %struct.routing_key_2, align 1
43+
%key = alloca %struct.routing_key_2, align 8
4444
store i8 5, ptr %key, align 1
4545
%1 = getelementptr inbounds %struct.routing_key_2, ptr %key, i64 0, i32 0, i64 1
4646
store i8 6, ptr %1, align 1

llvm/test/CodeGen/Mips/Fast-ISel/fastalloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ entry:
1010
; CHECK-LABEL: foobar:
1111
%retval = alloca i32, align 4
1212
%x.addr = alloca i32, align 4
13-
%a = alloca %struct.x, align 4
14-
%c = alloca ptr, align 4
13+
%a = alloca %struct.x, align 8
14+
%c = alloca ptr, align 8
1515
store i32 %x, ptr %x.addr, align 4
1616
%0 = load i32, ptr %x.addr, align 4
1717
store i32 %0, ptr %a, align 4

0 commit comments

Comments
 (0)