@@ -1245,7 +1245,6 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12451245
12461246 for (StringRef Code : CI.Codes ) {
12471247 unsigned RegCount = 0 ;
1248-
12491248 if (Code.starts_with (" a" )) {
12501249 // Virtual register, compute number of registers based on the type.
12511250 //
@@ -1257,21 +1256,27 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12571256 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg (Code);
12581257 if (Kind == ' a' ) {
12591258 RegCount = NumRegs;
1260-
1261- // Apply physreg alignment requirement
1262- //
1263- // TODO: This is more conservative than necessary.
1264- MaxPhysReg = alignTo (MaxPhysReg, NumRegs);
12651259 MaxPhysReg = std::max (MaxPhysReg, std::min (RegIdx + NumRegs, 256u ));
12661260 }
1261+
1262+ continue ;
12671263 }
12681264
12691265 if (CI.Type == InlineAsm::isOutput) {
1266+ // Apply tuple alignment requirement
1267+ //
1268+ // TODO: This is more conservative than necessary.
1269+ AGPRDefCount = alignTo (AGPRDefCount, RegCount);
1270+
12701271 AGPRDefCount += RegCount;
1271- if (CI.isEarlyClobber )
1272+ if (CI.isEarlyClobber ) {
1273+ AGPRUseCount = alignTo (AGPRUseCount, RegCount);
12721274 AGPRUseCount += RegCount;
1273- } else
1275+ }
1276+ } else {
1277+ AGPRUseCount = alignTo (AGPRUseCount, RegCount);
12741278 AGPRUseCount += RegCount;
1279+ }
12751280 }
12761281 }
12771282
0 commit comments