Skip to content

Commit a4a8e64

Browse files
committed
Try to apply alignment requirement
1 parent d14a916 commit a4a8e64

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,6 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12451245

12461246
for (StringRef Code : CI.Codes) {
12471247
unsigned RegCount = 0;
1248-
12491248
if (Code.starts_with("a")) {
12501249
// Virtual register, compute number of registers based on the type.
12511250
//
@@ -1257,21 +1256,27 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12571256
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
12581257
if (Kind == 'a') {
12591258
RegCount = NumRegs;
1260-
1261-
// Apply physreg alignment requirement
1262-
//
1263-
// TODO: This is more conservative than necessary.
1264-
MaxPhysReg = alignTo(MaxPhysReg, NumRegs);
12651259
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
12661260
}
1261+
1262+
continue;
12671263
}
12681264

12691265
if (CI.Type == InlineAsm::isOutput) {
1266+
// Apply tuple alignment requirement
1267+
//
1268+
// TODO: This is more conservative than necessary.
1269+
AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1270+
12701271
AGPRDefCount += RegCount;
1271-
if (CI.isEarlyClobber)
1272+
if (CI.isEarlyClobber) {
1273+
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
12721274
AGPRUseCount += RegCount;
1273-
} else
1275+
}
1276+
} else {
1277+
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
12741278
AGPRUseCount += RegCount;
1279+
}
12751280
}
12761281
}
12771282

0 commit comments

Comments
 (0)