@@ -173,6 +173,7 @@ struct FoldCandidate {
173173
174174class SIFoldOperandsImpl {
175175public:
176+ MachineFunction *MF;
176177 MachineRegisterInfo *MRI;
177178 const SIInstrInfo *TII;
178179 const SIRegisterInfo *TRI;
@@ -705,6 +706,36 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
705706 }
706707
707708 MachineOperand *New = Fold.Def .OpToFold ;
709+
710+ // Verify the register is compatible with the operand.
711+ if (const TargetRegisterClass *OpRC =
712+ TII->getRegClass (MI->getDesc (), Fold.UseOpNo , TRI, *MF)) {
713+ const TargetRegisterClass *OldRC = MRI->getRegClass (Old.getReg ());
714+ const TargetRegisterClass *NewRC = MRI->getRegClass (New->getReg ());
715+ unsigned NewSubReg = New->getSubReg ();
716+ unsigned OldSubReg = Old.getSubReg ();
717+
718+ const TargetRegisterClass *ConstrainRC = OpRC;
719+ if (NewSubReg && OldSubReg) {
720+ unsigned PreA, PreB;
721+ ConstrainRC = TRI->getCommonSuperRegClass (OpRC, OldSubReg, NewRC,
722+ NewSubReg, PreA, PreB);
723+ } else if (OldSubReg) {
724+ ConstrainRC = TRI->getMatchingSuperRegClass (OldRC, OpRC, OldSubReg);
725+ } else if (NewSubReg) {
726+ ConstrainRC = TRI->getMatchingSuperRegClass (NewRC, OpRC, NewSubReg);
727+ }
728+
729+ if (!ConstrainRC)
730+ return false ;
731+
732+ if (!MRI->constrainRegClass (New->getReg (), ConstrainRC)) {
733+ LLVM_DEBUG (dbgs () << " Cannot constrain " << printReg (New->getReg (), TRI)
734+ << TRI->getRegClassName (ConstrainRC) << ' \n ' );
735+ return false ;
736+ }
737+ }
738+
708739 // Rework once the VS_16 register class is updated to include proper
709740 // 16-bit SGPRs instead of 32-bit ones.
710741 if (Old.getSubReg () == AMDGPU::lo16 && TRI->isSGPRReg (*MRI, New->getReg ()))
@@ -1429,30 +1460,9 @@ void SIFoldOperandsImpl::foldOperand(
14291460 return ;
14301461 }
14311462
1432- if (!FoldingImmLike) {
1433- if (OpToFold.isReg () && ST->needsAlignedVGPRs ()) {
1434- // Don't fold if OpToFold doesn't hold an aligned register.
1435- const TargetRegisterClass *RC =
1436- TRI->getRegClassForReg (*MRI, OpToFold.getReg ());
1437- assert (RC);
1438- if (TRI->hasVectorRegisters (RC) && OpToFold.getSubReg ()) {
1439- unsigned SubReg = OpToFold.getSubReg ();
1440- if (const TargetRegisterClass *SubRC =
1441- TRI->getSubRegisterClass (RC, SubReg))
1442- RC = SubRC;
1443- }
1444-
1445- if (!RC || !TRI->isProperlyAlignedRC (*RC))
1446- return ;
1447- }
1448-
1449- tryAddToFoldList (FoldList, UseMI, UseOpIdx, OpToFold);
1450-
1451- // FIXME: We could try to change the instruction from 64-bit to 32-bit
1452- // to enable more folding opportunities. The shrink operands pass
1453- // already does this.
1454- return ;
1455- }
1463+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
1464+ // to enable more folding opportunities. The shrink operands pass
1465+ // already does this.
14561466
14571467 tryAddToFoldList (FoldList, UseMI, UseOpIdx, OpToFold);
14581468}
@@ -2747,6 +2757,7 @@ bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
27472757}
27482758
27492759bool SIFoldOperandsImpl::run (MachineFunction &MF) {
2760+ this ->MF = &MF;
27502761 MRI = &MF.getRegInfo ();
27512762 ST = &MF.getSubtarget <GCNSubtarget>();
27522763 TII = ST->getInstrInfo ();
0 commit comments