@@ -1243,29 +1243,34 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12431243 }
12441244
12451245 for (StringRef Code : CI.Codes ) {
1246+ unsigned RegCount = 0 ;
1247+
12461248 if (Code.starts_with (" a" )) {
12471249 // Virtual register, compute number of registers based on the type.
12481250 //
12491251 // We ought to be going through TargetLowering to get the number of
12501252 // registers, but we should avoid the dependence on CodeGen here.
1251- unsigned RegCount = divideCeil (DL.getTypeSizeInBits (Ty), 32 );
1252- if (CI.Type == InlineAsm::isOutput) {
1253- AGPRDefCount += RegCount;
1254- if (CI.isEarlyClobber )
1255- AGPRUseCount += RegCount;
1256- } else
1257- AGPRUseCount += RegCount;
1253+ RegCount = divideCeil (DL.getTypeSizeInBits (Ty), 32 );
12581254 } else {
12591255 // Physical register reference
12601256 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg (Code);
1261- if (Kind == ' a' )
1257+ if (Kind == ' a' ) {
1258+ RegCount = NumRegs;
12621259 MaxPhysReg = std::max (MaxPhysReg, std::min (RegIdx + NumRegs, 256u ));
1260+ }
12631261 }
1262+
1263+ if (CI.Type == InlineAsm::isOutput) {
1264+ AGPRDefCount += RegCount;
1265+ if (CI.isEarlyClobber )
1266+ AGPRUseCount += RegCount;
1267+ } else
1268+ AGPRUseCount += RegCount;
12641269 }
12651270 }
12661271
12671272 unsigned MaxVirtReg = std::max (AGPRUseCount, AGPRDefCount);
1268- return std::min (MaxVirtReg + MaxPhysReg, 256u );
1273+ return std::min (std::max ( MaxVirtReg, MaxPhysReg) , 256u );
12691274}
12701275
12711276// TODO: Migrate to range merge of amdgpu-agpr-alloc.
0 commit comments