@@ -99,7 +99,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
9999 // Return the matching instruction if one is found, else MBB->end().
100100 MachineBasicBlock::iterator findMatchingInsn (MachineBasicBlock::iterator I,
101101 LdStPairFlags &Flags,
102- unsigned Limit);
102+ unsigned Limit,
103+ bool FindNarrowMerge);
103104
104105 // Scan the instructions looking for a store that writes to the address from
105106 // which the current load instruction reads. Return true if one is found.
@@ -757,7 +758,8 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
757758 MergeMI->eraseFromParent ();
758759 return NextI;
759760 }
760- assert (isPromotableZeroStoreInst (I) && " Expected promotable zero store" );
761+ assert (isPromotableZeroStoreInst (I) && isPromotableZeroStoreInst (MergeMI) &&
762+ " Expected promotable zero store" );
761763
762764 // Construct the new instruction.
763765 MachineInstrBuilder MIB;
@@ -1181,7 +1183,8 @@ static bool canMergeOpc(unsigned OpcA, unsigned OpcB, LdStPairFlags &Flags,
11811183// / current instruction into a wider equivalent or a load/store pair.
11821184MachineBasicBlock::iterator
11831185AArch64LoadStoreOpt::findMatchingInsn (MachineBasicBlock::iterator I,
1184- LdStPairFlags &Flags, unsigned Limit) {
1186+ LdStPairFlags &Flags, unsigned Limit,
1187+ bool FindNarrowMerge) {
11851188 MachineBasicBlock::iterator E = I->getParent ()->end ();
11861189 MachineBasicBlock::iterator MBBI = I;
11871190 MachineInstr *FirstMI = I;
@@ -1255,26 +1258,26 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12551258 // safely transform. Similarly, stop if we see a hint to avoid pairs.
12561259 if (MI->hasOrderedMemoryRef () || TII->isLdStPairSuppressed (MI))
12571260 return E;
1258- // If the resultant immediate offset of merging these instructions
1259- // is out of range for a pairwise instruction, bail and keep looking.
1260- bool IsNarrowLoad = isNarrowLoad (MI->getOpcode ());
1261- if (!IsNarrowLoad &&
1262- !inBoundsForPair (IsUnscaled, MinOffset, OffsetStride)) {
1263- trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1264- MemInsns.push_back (MI);
1265- continue ;
1266- }
12671261
1268- if (IsNarrowLoad || IsPromotableZeroStore ) {
1262+ if (FindNarrowMerge ) {
12691263 // If the alignment requirements of the scaled wide load/store
1270- // instruction can't express the offset of the scaled narrow
1271- // input, bail and keep looking.
1272- if (!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) {
1264+ // instruction can't express the offset of the scaled narrow input,
1265+ // bail and keep looking. For promotable zero stores, allow only when
1266+ // the stored value is the same (i.e., WZR).
1267+ if ((!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) ||
1268+ (IsPromotableZeroStore && Reg != getLdStRegOp (MI).getReg ())) {
12731269 trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
12741270 MemInsns.push_back (MI);
12751271 continue ;
12761272 }
12771273 } else {
1274+ // If the resultant immediate offset of merging these instructions
1275+ // is out of range for a pairwise instruction, bail and keep looking.
1276+ if (!inBoundsForPair (IsUnscaled, MinOffset, OffsetStride)) {
1277+ trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
1278+ MemInsns.push_back (MI);
1279+ continue ;
1280+ }
12781281 // If the alignment requirements of the paired (scaled) instruction
12791282 // can't express the offset of the unscaled input, bail and keep
12801283 // looking.
@@ -1287,10 +1290,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
12871290 // If the destination register of the loads is the same register, bail
12881291 // and keep looking. A load-pair instruction with both destination
12891292 // registers the same is UNPREDICTABLE and will result in an exception.
1290- // For narrow stores, allow only when the stored value is the same
1291- // (i.e., WZR).
1292- if ((MayLoad && Reg == getLdStRegOp (MI).getReg ()) ||
1293- (IsPromotableZeroStore && Reg != getLdStRegOp (MI).getReg ())) {
1293+ if (MayLoad && Reg == getLdStRegOp (MI).getReg ()) {
12941294 trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
12951295 MemInsns.push_back (MI);
12961296 continue ;
@@ -1609,7 +1609,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
16091609 // Look ahead up to LdStLimit instructions for a mergable instruction.
16101610 LdStPairFlags Flags;
16111611 MachineBasicBlock::iterator MergeMI =
1612- findMatchingInsn (MBBI, Flags, LdStLimit);
1612+ findMatchingInsn (MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true );
16131613 if (MergeMI != E) {
16141614 if (isNarrowLoad (MI)) {
16151615 ++NumNarrowLoadsPromoted;
@@ -1644,7 +1644,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
16441644
16451645 // Look ahead up to LdStLimit instructions for a pairable instruction.
16461646 LdStPairFlags Flags;
1647- MachineBasicBlock::iterator Paired = findMatchingInsn (MBBI, Flags, LdStLimit);
1647+ MachineBasicBlock::iterator Paired =
1648+ findMatchingInsn (MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false );
16481649 if (Paired != E) {
16491650 ++NumPairCreated;
16501651 if (TII->isUnscaledLdSt (MI))
0 commit comments