Skip to content

Commit d14a916

Browse files
committed
Be very conservative with physregs
1 parent db44b41 commit d14a916

File tree

2 files changed

+46
-2
lines changed

2 files changed

+46
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,6 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12211221
unsigned MaxPhysReg = 0;
12221222
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
12231223

1224-
// TODO: Underestimates due to not accounting for tuple alignment requirements
12251224
// TODO: Overestimates due to not accounting for tied operands
12261225
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
12271226
Type *Ty = nullptr;
@@ -1258,6 +1257,11 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12581257
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
12591258
if (Kind == 'a') {
12601259
RegCount = NumRegs;
1260+
1261+
// Apply physreg alignment requirement
1262+
//
1263+
// TODO: This is more conservative than necessary.
1264+
MaxPhysReg = alignTo(MaxPhysReg, NumRegs);
12611265
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
12621266
}
12631267
}
@@ -1272,7 +1276,11 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12721276
}
12731277

12741278
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1275-
return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
1279+
1280+
// TODO: This is overly conservative. If there are any physical registers,
1281+
// allocate any virtual registers after them so we don't have to solve optimal
1282+
// packing.
1283+
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12761284
}
12771285

12781286
// TODO: Migrate to range merge of amdgpu-agpr-alloc.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,42 @@ define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
665665
ret void
666666
}
667667

668+
define amdgpu_kernel void @align3_virtreg() {
669+
; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
670+
; CHECK-SAME: ) #[[ATTR1]] {
671+
; CHECK-NEXT: call void asm sideeffect "
672+
; CHECK-NEXT: call void @use_most()
673+
; CHECK-NEXT: ret void
674+
;
675+
call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison)
676+
call void @use_most()
677+
ret void
678+
}
679+
680+
define amdgpu_kernel void @align3_align4_virtreg() {
681+
; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
682+
; CHECK-SAME: ) #[[ATTR1]] {
683+
; CHECK-NEXT: call void asm sideeffect "
684+
; CHECK-NEXT: call void @use_most()
685+
; CHECK-NEXT: ret void
686+
;
687+
call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison)
688+
call void @use_most()
689+
ret void
690+
}
691+
692+
define amdgpu_kernel void @align2_align4_virtreg() {
693+
; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
694+
; CHECK-SAME: ) #[[ATTR1]] {
695+
; CHECK-NEXT: call void asm sideeffect "
696+
; CHECK-NEXT: call void @use_most()
697+
; CHECK-NEXT: ret void
698+
;
699+
call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison)
700+
call void @use_most()
701+
ret void
702+
}
703+
668704
attributes #0 = { "amdgpu-agpr-alloc"="0" }
669705
;.
670706
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }

0 commit comments

Comments
 (0)