diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index e8ec93dd5ee63..d758260a8ab5d 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -59,7 +59,7 @@ add_llvm_target(HexagonCodeGen HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp HexagonSplitDouble.cpp - HexagonStoreWidening.cpp + HexagonLoadStoreWidening.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp new file mode 100644 index 0000000000000..1a60d0e13057e --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp @@ -0,0 +1,915 @@ +//===---HexagonLoadStoreWidening.cpp---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// HexagonStoreWidening: +// Replace sequences of "narrow" stores to adjacent memory locations with +// a fewer "wide" stores that have the same effect. +// For example, replace: +// S4_storeirb_io %100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %100, 1, 0 ; store-immediate-byte +// with +// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword +// The above is the general idea. The actual cases handled by the code +// may be a bit more complex. +// The purpose of this pass is to reduce the number of outstanding stores, +// or as one could say, "reduce store queue pressure". Also, wide stores +// mean fewer stores, and since there are only two memory instructions allowed +// per packet, it also means fewer packets, and ultimately fewer cycles. +// +// HexagonLoadWidening does the same thing as HexagonStoreWidening but +// for Loads. Here, we try to replace 4-byte Loads with register-pair loads. +// For example: +// Replace +// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8) +// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2) +// with +// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1) +// %2:intregs = COPY %4.isub_lo:doubleregs +// %3:intregs = COPY %4.isub_hi:doubleregs +// +// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N +// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts + +//===---------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-load-store-widening" + +static cl::opt MaxMBBSizeForLoadStoreWidening( + "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000), + cl::desc("Limit block size to analyze in load/store widening pass")); + +namespace llvm { + +FunctionPass *createHexagonStoreWidening(); +FunctionPass *createHexagonLoadWidening(); +void initializeHexagonStoreWideningPass(PassRegistry &); +void initializeHexagonLoadWideningPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +struct HexagonLoadStoreWidening { + enum WideningMode { Store, Load }; + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineFunction *MF; + +public: + HexagonLoadStoreWidening(const HexagonInstrInfo *TII, + const HexagonRegisterInfo *TRI, + MachineRegisterInfo *MRI, AliasAnalysis *AA, + MachineFunction *MF, bool StoreMode) + : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF), + Mode(StoreMode ? WideningMode::Store : WideningMode::Load), + HII(MF->getSubtarget().getInstrInfo()) {} + + bool run(); + +private: + const bool Mode; + const unsigned MaxWideSize = 8; + const HexagonInstrInfo *HII = nullptr; + + using InstrSet = SmallPtrSet; + using InstrGroup = SmallVector; + using InstrGroupList = SmallVector; + + InstrSet ProcessedInsts; + + unsigned getBaseAddressRegister(const MachineInstr *MI); + int64_t getOffset(const MachineInstr *MI); + int64_t getPostIncrementValue(const MachineInstr *MI); + bool handledInstType(const MachineInstr *MI); + + void createGroup(MachineInstr *BaseInst, InstrGroup &Group); + void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups); + bool processBasicBlock(MachineBasicBlock &MBB); + bool processGroup(InstrGroup &Group); + bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); + bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool replaceInsts(InstrGroup &OG, InstrGroup &NG); + bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2); + bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B); +}; + +struct HexagonStoreWidening : public MachineFunctionPass { + static char ID; + + HexagonStoreWidening() : MachineFunctionPass(ID) { + initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Hexagon Store Widening"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MFn) override { + if (skipFunction(MFn.getFunction())) + return false; + + auto &ST = MFn.getSubtarget(); + const HexagonInstrInfo *TII = ST.getInstrInfo(); + const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo *MRI = &MFn.getRegInfo(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run(); + } +}; + +struct HexagonLoadWidening : public MachineFunctionPass { + static char ID; + + HexagonLoadWidening() : MachineFunctionPass(ID) { + initializeHexagonLoadWideningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Hexagon Load Widening"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MFn) override { + if (skipFunction(MFn.getFunction())) + return false; + + auto &ST = MFn.getSubtarget(); + const HexagonInstrInfo *TII = ST.getInstrInfo(); + const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo *MRI = &MFn.getRegInfo(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run(); + } +}; + +char HexagonStoreWidening::ID = 0; +char HexagonLoadWidening::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) + +INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads", + "Hexagon Load Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads", + "Hexagon Load Widening", false, false) + +static const MachineMemOperand &getMemTarget(const MachineInstr *MI) { + assert(!MI->memoperands_empty() && "Expecting memory operands"); + return **MI->memoperands_begin(); +} + +unsigned +HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) { + assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode"); + unsigned Base, Offset; + HII->getBaseAndOffsetPosition(*MI, Base, Offset); + const MachineOperand &MO = MI->getOperand(Base); + assert(MO.isReg() && "Expecting register operand"); + return MO.getReg(); +} + +int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) { + assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode"); + + // On Hexagon, post-incs always have an offset of 0 + // There is no Offset operand to post-incs + if (HII->isPostIncrement(*MI)) + return 0; + + unsigned Base, Offset; + + HII->getBaseAndOffsetPosition(*MI, Base, Offset); + const MachineOperand &MO = MI->getOperand(Offset); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + return MO.getImm(); + case MachineOperand::MO_GlobalAddress: + return MO.getOffset(); + default: + break; + } + llvm_unreachable("Expecting an immediate or global operand"); +} + +inline int64_t +HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) { + unsigned Base, PostIncIdx; + HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx); + const MachineOperand &MO = MI->getOperand(PostIncIdx); + return MO.getImm(); +} + +// Filtering function: any loads/stores whose opcodes are not "approved" of by +// this function will not be subjected to widening. +inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Mode == WideningMode::Store) { + switch (Opc) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S2_storeri_io: + // Base address must be a register. (Implement FI later.) + return MI->getOperand(0).isReg(); + case Hexagon::S2_storeri_pi: + return MI->getOperand(1).isReg(); + } + } else { + // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x + // loads. So we only widen 32 bit loads as we don't need to select the + // right bits with AND & SHIFT ops. + switch (Opc) { + case Hexagon::L2_loadri_io: + // Base address must be a register and offset must be immediate. + return !MI->memoperands_empty() && MI->getOperand(1).isReg() && + MI->getOperand(2).isImm(); + case Hexagon::L2_loadri_pi: + return !MI->memoperands_empty() && MI->getOperand(2).isReg(); + } + } + return false; +} + +static void addDefsUsesToList(const MachineInstr *MI, + DenseSet &RegDefs, + DenseSet &RegUses) { + for (const auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + if (Op.isDef()) + RegDefs.insert(Op.getReg()); + if (Op.readsReg()) + RegUses.insert(Op.getReg()); + } +} + +bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A, + const MachineInstr *B) { + DenseSet ARegDefs; + DenseSet ARegUses; + addDefsUsesToList(A, ARegDefs, ARegUses); + if (A->mayLoadOrStore() && B->mayLoadOrStore() && + (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true)) + return false; + for (const auto &BOp : B->operands()) { + if (!BOp.isReg()) + continue; + if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg())) + return false; + if (BOp.isDef() && ARegUses.contains(BOp.getReg())) + return false; + } + return true; +} + +// Inspect a machine basic block, and generate groups out of loads/stores +// encountered in the block. +// +// A load/store group is a group of loads or stores that use the same base +// register, and which can be reordered within that group without altering the +// semantics of the program. A single group could be widened as +// a whole, if there existed a single load/store instruction with the same +// semantics as the entire group. In many cases, a single group may need more +// than one wide load or store. +void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups) { + // Traverse all instructions and if we encounter + // a load/store, then try to create a group starting at that instruction + // i.e. a sequence of independent loads/stores that can be widened. + for (auto I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr *MI = &(*I); + if (!handledInstType(MI)) + continue; + if (ProcessedInsts.count(MI)) + continue; + + // Found a store. Try to create a store group. + InstrGroup G; + createGroup(MI, G); + if (G.size() > 1) + StoreGroups.push_back(G); + } +} + +// Create a single load/store group. The insts need to be independent between +// themselves, and also there cannot be other instructions between them +// that could read or modify storage being read from or stored into. +void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst, + InstrGroup &Group) { + assert(handledInstType(BaseInst) && "Unexpected instruction"); + unsigned BaseReg = getBaseAddressRegister(BaseInst); + InstrGroup Other; + + Group.push_back(BaseInst); + LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump()); + auto End = BaseInst->getParent()->end(); + auto I = BaseInst->getIterator(); + + while (true) { + I = std::next(I); + if (I == End) + break; + MachineInstr *MI = &(*I); + + // Assume calls are aliased to everything. + if (MI->isCall() || MI->hasUnmodeledSideEffects() || + MI->hasOrderedMemoryRef()) + return; + + if (!handledInstType(MI)) { + if (MI->mayLoadOrStore()) + Other.push_back(MI); + continue; + } + + // We have a handledInstType instruction + // If this load/store instruction is aliased with anything already in the + // group, terminate the group now. + for (auto GI : Group) + if (GI->mayAlias(AA, *MI, true)) + return; + if (Mode == WideningMode::Load) { + // Check if current load MI can be moved to the first load instruction + // in Group. If any load instruction aliases with memory instructions in + // Other, terminate the group. + for (auto MemI : Other) + if (!canSwapInstructions(MI, MemI)) + return; + } else { + // Check if store instructions in the group can be moved to current + // store MI. If any store instruction aliases with memory instructions + // in Other, terminate the group. + for (auto MemI : Other) { + if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <= + 0) + continue; + for (auto GI : Group) + if (!canSwapInstructions(MemI, GI)) + return; + } + } + + unsigned BR = getBaseAddressRegister(MI); + if (BR == BaseReg) { + LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump()); + Group.push_back(MI); + ProcessedInsts.insert(MI); + } + } // while +} + +// Check if load/store instructions S1 and S2 are adjacent. More precisely, +// S2 has to access memory immediately following that accessed by S1. +bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1, + const MachineInstr *S2) { + if (!handledInstType(S1) || !handledInstType(S2)) + return false; + + const MachineMemOperand &S1MO = getMemTarget(S1); + + // Currently only handling immediate stores. + int Off1 = getOffset(S1); + int Off2 = getOffset(S2); + + return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2) + : int(Off1 + S1MO.getSize().getValue()) == Off2; +} + +/// Given a sequence of adjacent loads/stores, and a maximum size of a single +/// wide inst, pick a group of insts that can be replaced by a single load/store +/// of size not exceeding MaxSize. The selected sequence will be recorded +/// in OG ("old group" of instructions). +/// OG should be empty on entry, and should be left empty if the function +/// fails. +bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin, + InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, + unsigned MaxSize) { + assert(Begin != End && "No instructions to analyze"); + assert(OG.empty() && "Old group not empty on entry"); + + if (std::distance(Begin, End) <= 1) + return false; + + MachineInstr *FirstMI = *Begin; + assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); + const MachineMemOperand &FirstMMO = getMemTarget(FirstMI); + if (!FirstMMO.getType().isValid()) + return false; + + unsigned Alignment = FirstMMO.getAlign().value(); + unsigned SizeAccum = FirstMMO.getSize().getValue(); + unsigned FirstOffset = getOffset(FirstMI); + + // The initial value of SizeAccum should always be a power of 2. + assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); + + // If the size of the first store equals to or exceeds the limit, do nothing. + if (SizeAccum >= MaxSize) + return false; + + // If the size of the first load/store is greater than or equal to the address + // stored to, then the inst cannot be made any wider. + if (SizeAccum >= Alignment) { + LLVM_DEBUG( + dbgs() << "Size of load/store greater than equal to its alignment\n"); + return false; + } + + // The offset of a load/store will put restrictions on how wide the inst can + // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest + // bits be 0. If the first inst already exhausts the offset limits, quit. + // Test this by checking if the next wider size would exceed the limit. + // For post-increment instructions, the increment amount needs to follow the + // same rule. + unsigned OffsetOrIncVal = 0; + if (HII->isPostIncrement(*FirstMI)) + OffsetOrIncVal = getPostIncrementValue(FirstMI); + else + OffsetOrIncVal = FirstOffset; + if ((2 * SizeAccum - 1) & OffsetOrIncVal) { + LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc" + << " value: " << getPostIncrementValue(FirstMI) + << " is invalid in the widened version\n"); + return false; + } + + OG.push_back(FirstMI); + MachineInstr *S1 = FirstMI; + + // Pow2Num will be the largest number of elements in OG such that the sum + // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2. + unsigned Pow2Num = 1; + unsigned Pow2Size = SizeAccum; + bool HavePostInc = HII->isPostIncrement(*S1); + + // Be greedy: keep accumulating insts as long as they are to adjacent + // memory locations, and as long as the total number of bytes stored + // does not exceed the limit (MaxSize). + // Keep track of when the total size covered is a power of 2, since + // this is a size a single load/store can cover. + for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { + MachineInstr *S2 = *I; + // Insts are sorted, so if S1 and S2 are not adjacent, there won't be + // any other store to fill the "hole". + if (!areAdjacent(S1, S2)) + break; + + // Cannot widen two post increments, need to return two registers + // with incremented values + if (HavePostInc && HII->isPostIncrement(*S2)) + break; + + unsigned S2Size = getMemTarget(S2).getSize().getValue(); + if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) + break; + + OG.push_back(S2); + SizeAccum += S2Size; + if (isPowerOf2_32(SizeAccum)) { + Pow2Num = OG.size(); + Pow2Size = SizeAccum; + } + if ((2 * Pow2Size - 1) & FirstOffset) + break; + + S1 = S2; + } + + // The insts don't add up to anything that can be widened. Clean up. + if (Pow2Num <= 1) { + OG.clear(); + return false; + } + + // Only leave the loads/stores being widened. + OG.resize(Pow2Num); + TotalSize = Pow2Size; + return true; +} + +/// Given an "old group" OG of insts, create a "new group" NG of instructions +/// to replace them. +bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + if (Mode == WideningMode::Store) { + return createWideStores(OG, NG, TotalSize); + } + return createWideLoads(OG, NG, TotalSize); +} + +/// Given an "old group" OG of stores, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for store-immediate. +bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + // XXX Current limitations: + // - only handle a TotalSize of up to 8 + + LLVM_DEBUG(dbgs() << "Creating wide stores\n"); + if (TotalSize > MaxWideSize) + return false; + + uint64_t Acc = 0; // Value accumulator. + unsigned Shift = 0; + bool HaveImm = false; + bool HaveReg = false; + + for (MachineInstr *MI : OG) { + const MachineMemOperand &MMO = getMemTarget(MI); + MachineOperand &SO = HII->isPostIncrement(*MI) + ? MI->getOperand(3) + : MI->getOperand(2); // Source. + unsigned NBits; + uint64_t Mask; + uint64_t Val; + + switch (SO.getType()) { + case MachineOperand::MO_Immediate: + LLVM_DEBUG(dbgs() << "Have store immediate\n"); + HaveImm = true; + + NBits = MMO.getSizeInBits().toRaw(); + Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits)); + Val = (SO.getImm() & Mask) << Shift; + Acc |= Val; + Shift += NBits; + break; + case MachineOperand::MO_Register: + HaveReg = true; + break; + default: + LLVM_DEBUG(dbgs() << "Unhandled store\n"); + return false; + } + } + + if (HaveImm && HaveReg) { + LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n"); + return false; + } + + MachineInstr *FirstSt = OG.front(); + DebugLoc DL = OG.back()->getDebugLoc(); + const MachineMemOperand &OldM = getMemTarget(FirstSt); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlign(), OldM.getAAInfo()); + MachineInstr *StI; + MachineOperand &MR = + (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1) + : FirstSt->getOperand(0)); + auto SecondSt = OG.back(); + if (HaveReg) { + MachineOperand FReg = + (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3) + : FirstSt->getOperand(2)); + // Post increments appear first in the sorted group. + // Cannot have a post increment for the second instruction + assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc"); + MachineOperand SReg = SecondSt->getOperand(2); + assert(FReg.isReg() && SReg.isReg() && + "Cannot merge store register and store imm"); + const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew); + Register VReg = + MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg); + NG.push_back(CombI); + + if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) { + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi); + auto IncDestMO = FirstSt->getOperand(0); + auto IncMO = FirstSt->getOperand(2); + StI = + BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg); + } else { + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io); + auto OffMO = FirstSt->getOperand(1); + StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg); + } + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + return true; + } + + // Handle store immediates + // There are no post increment store immediates on Hexagon + assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc"); + auto Off = FirstSt->getOperand(1).getImm(); + if (TotalSize == 8) { + // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg + uint64_t Mask = 0xFFFFFFFFU; + int LowerAcc = int(Mask & Acc); + int UpperAcc = Acc >> 32; + Register DReg = + MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *CombI; + if (Acc != 0) { + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc); + NG.push_back(TfrI); + const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir); + CombI = BuildMI(*MF, DL, CombD, DReg) + .addImm(UpperAcc) + .addReg(VReg, RegState::Kill); + } + // If immediates are 0, we do not need A2_tfrsi + else { + const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii); + CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0); + } + NG.push_back(CombI); + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io); + StI = + BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill); + } else if (Acc < 0x10000) { + // Create mem[hw] = #Acc + unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io + : (TotalSize == 4) ? Hexagon::S4_storeiri_io + : 0; + assert(WOpc && "Unexpected size"); + + int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); + const MCInstrDesc &StD = TII->get(WOpc); + StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val); + } else { + // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc)); + NG.push_back(TfrI); + + unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io + : (TotalSize == 4) ? Hexagon::S2_storeri_io + : 0; + assert(WOpc && "Unexpected size"); + + const MCInstrDesc &StD = TII->get(WOpc); + StI = + BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill); + } + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + + return true; +} + +/// Given an "old group" OG of loads, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for double register loads. +bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + LLVM_DEBUG(dbgs() << "Creating wide loads\n"); + // XXX Current limitations: + // - only expect stores of immediate values in OG, + // - only handle a TotalSize of up to 8 + if (TotalSize > MaxWideSize) + return false; + assert(OG.size() == 2 && "Expecting two elements in Instruction Group."); + + MachineInstr *FirstLd = OG.front(); + const MachineMemOperand &OldM = getMemTarget(FirstLd); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlign(), OldM.getAAInfo()); + + MachineOperand &MR = FirstLd->getOperand(0); + MachineOperand &MRBase = + (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2) + : FirstLd->getOperand(1)); + DebugLoc DL = OG.back()->getDebugLoc(); + + // Create the double register Load Instruction. + Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *LdI; + + // Post increments appear first in the sorted group + if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) { + auto IncDestMO = FirstLd->getOperand(1); + auto IncMO = FirstLd->getOperand(3); + LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi)) + .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg()) + .add(IncDestMO) + .add(MRBase) + .add(IncMO); + LdI->addMemOperand(*MF, NewM); + } else { + auto OffMO = FirstLd->getOperand(2); + LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io)) + .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg()) + .add(MRBase) + .add(OffMO); + LdI->addMemOperand(*MF, NewM); + } + NG.push_back(LdI); + + auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg, + MachineInstr *DstReg) { + Register DestReg = DstReg->getOperand(0).getReg(); + return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg) + .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg); + }; + + MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd); + MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back()); + NG.push_back(LdI_lo); + NG.push_back(LdI_hi); + + return true; +} + +// Replace instructions from the old group OG with instructions from the +// new group NG. Conceptually, remove all instructions in OG, and then +// insert all instructions in NG, starting at where the first instruction +// from OG was (in the order in which they appeared in the basic block). +// (The ordering in OG does not have to match the order in the basic block.) +bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) { + LLVM_DEBUG({ + dbgs() << "Replacing:\n"; + for (auto I : OG) + dbgs() << " " << *I; + dbgs() << "with\n"; + for (auto I : NG) + dbgs() << " " << *I; + }); + + MachineBasicBlock *MBB = OG.back()->getParent(); + MachineBasicBlock::iterator InsertAt = MBB->end(); + + // Need to establish the insertion point. + // For loads the best one is right before the first load in the OG, + // but in the order in which the insts occur in the program list. + // For stores the best point is right after the last store in the OG. + // Since the ordering in OG does not correspond + // to the order in the program list, we need to do some work to find + // the insertion point. + + // Create a set of all instructions in OG (for quick lookup). + InstrSet OldMemInsts; + for (auto *I : OG) + OldMemInsts.insert(I); + + if (Mode == WideningMode::Load) { + // Find the first load instruction in the block that is present in OG. + for (auto &I : *MBB) { + if (OldMemInsts.count(&I)) { + InsertAt = I; + break; + } + } + + assert((InsertAt != MBB->end()) && "Cannot locate any load from the group"); + + for (auto *I : NG) + MBB->insert(InsertAt, I); + } else { + // Find the last store instruction in the block that is present in OG. + auto I = MBB->rbegin(); + for (; I != MBB->rend(); ++I) { + if (OldMemInsts.count(&(*I))) { + InsertAt = (*I).getIterator(); + break; + } + } + + assert((I != MBB->rend()) && "Cannot locate any store from the group"); + + for (auto I = NG.rbegin(); I != NG.rend(); ++I) + MBB->insertAfter(InsertAt, *I); + } + + for (auto *I : OG) + I->eraseFromParent(); + + return true; +} + +// Break up the group into smaller groups, each of which can be replaced by +// a single wide load/store. Widen each such smaller group and replace the old +// instructions with the widened ones. +bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) { + bool Changed = false; + InstrGroup::iterator I = Group.begin(), E = Group.end(); + InstrGroup OG, NG; // Old and new groups. + unsigned CollectedSize; + + while (I != E) { + OG.clear(); + NG.clear(); + + bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) && + createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG); + if (!Succ) + continue; + + assert(OG.size() > 1 && "Created invalid group"); + assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements"); + I += OG.size() - 1; + + Changed = true; + } + + return Changed; +} + +// Process a single basic block: create the load/store groups, and replace them +// with the widened insts, if possible. Processing of each basic block +// is independent from processing of any other basic block. This transfor- +// mation could be stopped after having processed any basic block without +// any ill effects (other than not having performed widening in the unpro- +// cessed blocks). Also, the basic blocks can be processed in any order. +bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { + InstrGroupList SGs; + bool Changed = false; + + // To prevent long compile time check for max BB size. + if (MBB.size() > MaxMBBSizeForLoadStoreWidening) + return false; + + createGroups(MBB, SGs); + + auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool { + return getOffset(A) < getOffset(B); + }; + for (auto &G : SGs) { + assert(G.size() > 1 && "Group with fewer than 2 elements"); + llvm::sort(G, Less); + + Changed |= processGroup(G); + } + + return Changed; +} + +bool HexagonLoadStoreWidening::run() { + bool Changed = false; + + for (auto &B : *MF) + Changed |= processBasicBlock(B); + + return Changed; +} + +FunctionPass *llvm::createHexagonStoreWidening() { + return new HexagonStoreWidening(); +} + +FunctionPass *llvm::createHexagonLoadWidening() { + return new HexagonLoadWidening(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp deleted file mode 100644 index 9d8e5c53b8227..0000000000000 --- a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ /dev/null @@ -1,606 +0,0 @@ -//===- HexagonStoreWidening.cpp -------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Replace sequences of "narrow" stores to adjacent memory locations with -// a fewer "wide" stores that have the same effect. -// For example, replace: -// S4_storeirb_io %100, 0, 0 ; store-immediate-byte -// S4_storeirb_io %100, 1, 0 ; store-immediate-byte -// with -// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword -// The above is the general idea. The actual cases handled by the code -// may be a bit more complex. -// The purpose of this pass is to reduce the number of outstanding stores, -// or as one could say, "reduce store queue pressure". Also, wide stores -// mean fewer stores, and since there are only two memory instructions allowed -// per packet, it also means fewer packets, and ultimately fewer cycles. -//===---------------------------------------------------------------------===// - -#include "HexagonInstrInfo.h" -#include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include - -#define DEBUG_TYPE "hexagon-widen-stores" - -using namespace llvm; - -namespace llvm { - -FunctionPass *createHexagonStoreWidening(); -void initializeHexagonStoreWideningPass(PassRegistry&); - -} // end namespace llvm - -namespace { - - struct HexagonStoreWidening : public MachineFunctionPass { - const HexagonInstrInfo *TII; - const HexagonRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - AliasAnalysis *AA; - MachineFunction *MF; - - public: - static char ID; - - HexagonStoreWidening() : MachineFunctionPass(ID) { - initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "Hexagon Store Widening"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - static bool handledStoreType(const MachineInstr *MI); - - private: - static const int MaxWideSize = 4; - - using InstrGroup = std::vector; - using InstrGroupList = std::vector; - - bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); - bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); - void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, - InstrGroup::iterator End, InstrGroup &Group); - void createStoreGroups(MachineBasicBlock &MBB, - InstrGroupList &StoreGroups); - bool processBasicBlock(MachineBasicBlock &MBB); - bool processStoreGroup(InstrGroup &Group); - bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, - InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); - bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); - bool replaceStores(InstrGroup &OG, InstrGroup &NG); - bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); - }; - -} // end anonymous namespace - -char HexagonStoreWidening::ID = 0; - -INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", - "Hexason Store Widening", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", - "Hexagon Store Widening", false, false) - -// Some local helper functions... -static unsigned getBaseAddressRegister(const MachineInstr *MI) { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isReg() && "Expecting register operand"); - return MO.getReg(); -} - -static int64_t getStoreOffset(const MachineInstr *MI) { - unsigned OpC = MI->getOpcode(); - assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); - - switch (OpC) { - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: { - const MachineOperand &MO = MI->getOperand(1); - assert(MO.isImm() && "Expecting immediate offset"); - return MO.getImm(); - } - } - dbgs() << *MI; - llvm_unreachable("Store offset calculation missing for a handled opcode"); - return 0; -} - -static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { - assert(!MI->memoperands_empty() && "Expecting memory operands"); - return **MI->memoperands_begin(); -} - -// Filtering function: any stores whose opcodes are not "approved" of by -// this function will not be subjected to widening. -inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { - // For now, only handle stores of immediate values. - // Also, reject stores to stack slots. - unsigned Opc = MI->getOpcode(); - switch (Opc) { - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: - // Base address must be a register. (Implement FI later.) - return MI->getOperand(0).isReg(); - default: - return false; - } -} - -// Check if the machine memory operand MMO is aliased with any of the -// stores in the store group Stores. -bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, - const MachineMemOperand &MMO) { - if (!MMO.getValue()) - return true; - - MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); - - for (auto *SI : Stores) { - const MachineMemOperand &SMO = getStoreTarget(SI); - if (!SMO.getValue()) - return true; - - MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); - if (!AA->isNoAlias(L, SL)) - return true; - } - - return false; -} - -// Check if the machine instruction MI accesses any storage aliased with -// any store in the group Stores. -bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, - const MachineInstr *MI) { - for (auto &I : MI->memoperands()) - if (instrAliased(Stores, *I)) - return true; - return false; -} - -// Inspect a machine basic block, and generate store groups out of stores -// encountered in the block. -// -// A store group is a group of stores that use the same base register, -// and which can be reordered within that group without altering the -// semantics of the program. A single store group could be widened as -// a whole, if there existed a single store instruction with the same -// semantics as the entire group. In many cases, a single store group -// may need more than one wide store. -void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, - InstrGroupList &StoreGroups) { - InstrGroup AllInsns; - - // Copy all instruction pointers from the basic block to a temporary - // list. This will allow operating on the list, and modifying its - // elements without affecting the basic block. - for (auto &I : MBB) - AllInsns.push_back(&I); - - // Traverse all instructions in the AllInsns list, and if we encounter - // a store, then try to create a store group starting at that instruction - // i.e. a sequence of independent stores that can be widened. - for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { - MachineInstr *MI = *I; - // Skip null pointers (processed instructions). - if (!MI || !handledStoreType(MI)) - continue; - - // Found a store. Try to create a store group. - InstrGroup G; - createStoreGroup(MI, I+1, E, G); - if (G.size() > 1) - StoreGroups.push_back(G); - } -} - -// Create a single store group. The stores need to be independent between -// themselves, and also there cannot be other instructions between them -// that could read or modify storage being stored into. -void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, - InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { - assert(handledStoreType(BaseStore) && "Unexpected instruction"); - unsigned BaseReg = getBaseAddressRegister(BaseStore); - InstrGroup Other; - - Group.push_back(BaseStore); - - for (auto I = Begin; I != End; ++I) { - MachineInstr *MI = *I; - if (!MI) - continue; - - if (handledStoreType(MI)) { - // If this store instruction is aliased with anything already in the - // group, terminate the group now. - if (instrAliased(Group, getStoreTarget(MI))) - return; - // If this store is aliased to any of the memory instructions we have - // seen so far (that are not a part of this group), terminate the group. - if (instrAliased(Other, getStoreTarget(MI))) - return; - - unsigned BR = getBaseAddressRegister(MI); - if (BR == BaseReg) { - Group.push_back(MI); - *I = nullptr; - continue; - } - } - - // Assume calls are aliased to everything. - if (MI->isCall() || MI->hasUnmodeledSideEffects()) - return; - - if (MI->mayLoadOrStore()) { - if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) - return; - Other.push_back(MI); - } - } // for -} - -// Check if store instructions S1 and S2 are adjacent. More precisely, -// S2 has to access memory immediately following that accessed by S1. -bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, - const MachineInstr *S2) { - if (!handledStoreType(S1) || !handledStoreType(S2)) - return false; - - const MachineMemOperand &S1MO = getStoreTarget(S1); - - // Currently only handling immediate stores. - int Off1 = S1->getOperand(1).getImm(); - int Off2 = S2->getOperand(1).getImm(); - - return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2) - : int(Off1 + S1MO.getSize().getValue()) == Off2; -} - -/// Given a sequence of adjacent stores, and a maximum size of a single wide -/// store, pick a group of stores that can be replaced by a single store -/// of size not exceeding MaxSize. The selected sequence will be recorded -/// in OG ("old group" of instructions). -/// OG should be empty on entry, and should be left empty if the function -/// fails. -bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, - InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, - unsigned MaxSize) { - assert(Begin != End && "No instructions to analyze"); - assert(OG.empty() && "Old group not empty on entry"); - - if (std::distance(Begin, End) <= 1) - return false; - - MachineInstr *FirstMI = *Begin; - assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); - const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); - unsigned Alignment = FirstMMO.getAlign().value(); - unsigned SizeAccum = FirstMMO.getSize().getValue(); - unsigned FirstOffset = getStoreOffset(FirstMI); - - // The initial value of SizeAccum should always be a power of 2. - assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); - - // If the size of the first store equals to or exceeds the limit, do nothing. - if (SizeAccum >= MaxSize) - return false; - - // If the size of the first store is greater than or equal to the address - // stored to, then the store cannot be made any wider. - if (SizeAccum >= Alignment) - return false; - - // The offset of a store will put restrictions on how wide the store can be. - // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. - // If the first store already exhausts the offset limits, quit. Test this - // by checking if the next wider size would exceed the limit. - if ((2*SizeAccum-1) & FirstOffset) - return false; - - OG.push_back(FirstMI); - MachineInstr *S1 = FirstMI; - - // Pow2Num will be the largest number of elements in OG such that the sum - // of sizes of stores 0...Pow2Num-1 will be a power of 2. - unsigned Pow2Num = 1; - unsigned Pow2Size = SizeAccum; - - // Be greedy: keep accumulating stores as long as they are to adjacent - // memory locations, and as long as the total number of bytes stored - // does not exceed the limit (MaxSize). - // Keep track of when the total size covered is a power of 2, since - // this is a size a single store can cover. - for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { - MachineInstr *S2 = *I; - // Stores are sorted, so if S1 and S2 are not adjacent, there won't be - // any other store to fill the "hole". - if (!storesAreAdjacent(S1, S2)) - break; - - unsigned S2Size = getStoreTarget(S2).getSize().getValue(); - if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) - break; - - OG.push_back(S2); - SizeAccum += S2Size; - if (isPowerOf2_32(SizeAccum)) { - Pow2Num = OG.size(); - Pow2Size = SizeAccum; - } - if ((2*Pow2Size-1) & FirstOffset) - break; - - S1 = S2; - } - - // The stores don't add up to anything that can be widened. Clean up. - if (Pow2Num <= 1) { - OG.clear(); - return false; - } - - // Only leave the stored being widened. - OG.resize(Pow2Num); - TotalSize = Pow2Size; - return true; -} - -/// Given an "old group" OG of stores, create a "new group" NG of instructions -/// to replace them. Ideally, NG would only have a single instruction in it, -/// but that may only be possible for store-immediate. -bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, - unsigned TotalSize) { - // XXX Current limitations: - // - only expect stores of immediate values in OG, - // - only handle a TotalSize of up to 4. - - if (TotalSize > 4) - return false; - - unsigned Acc = 0; // Value accumulator. - unsigned Shift = 0; - - for (MachineInstr *MI : OG) { - const MachineMemOperand &MMO = getStoreTarget(MI); - MachineOperand &SO = MI->getOperand(2); // Source. - assert(SO.isImm() && "Expecting an immediate operand"); - - unsigned NBits = MMO.getSize().getValue() * 8; - unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); - unsigned Val = (SO.getImm() & Mask) << Shift; - Acc |= Val; - Shift += NBits; - } - - MachineInstr *FirstSt = OG.front(); - DebugLoc DL = OG.back()->getDebugLoc(); - const MachineMemOperand &OldM = getStoreTarget(FirstSt); - MachineMemOperand *NewM = - MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), - TotalSize, OldM.getAlign(), OldM.getAAInfo()); - - if (Acc < 0x10000) { - // Create mem[hw] = #Acc - unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : - (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; - assert(WOpc && "Unexpected size"); - - int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); - const MCInstrDesc &StD = TII->get(WOpc); - MachineOperand &MR = FirstSt->getOperand(0); - int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = - BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) - .addImm(Off) - .addImm(Val); - StI->addMemOperand(*MF, NewM); - NG.push_back(StI); - } else { - // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg - const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); - Register VReg = MF->getRegInfo().createVirtualRegister(RC); - MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) - .addImm(int(Acc)); - NG.push_back(TfrI); - - unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : - (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; - assert(WOpc && "Unexpected size"); - - const MCInstrDesc &StD = TII->get(WOpc); - MachineOperand &MR = FirstSt->getOperand(0); - int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = - BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) - .addImm(Off) - .addReg(VReg, RegState::Kill); - StI->addMemOperand(*MF, NewM); - NG.push_back(StI); - } - - return true; -} - -// Replace instructions from the old group OG with instructions from the -// new group NG. Conceptually, remove all instructions in OG, and then -// insert all instructions in NG, starting at where the first instruction -// from OG was (in the order in which they appeared in the basic block). -// (The ordering in OG does not have to match the order in the basic block.) -bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { - LLVM_DEBUG({ - dbgs() << "Replacing:\n"; - for (auto I : OG) - dbgs() << " " << *I; - dbgs() << "with\n"; - for (auto I : NG) - dbgs() << " " << *I; - }); - - MachineBasicBlock *MBB = OG.back()->getParent(); - MachineBasicBlock::iterator InsertAt = MBB->end(); - - // Need to establish the insertion point. The best one is right before - // the first store in the OG, but in the order in which the stores occur - // in the program list. Since the ordering in OG does not correspond - // to the order in the program list, we need to do some work to find - // the insertion point. - - // Create a set of all instructions in OG (for quick lookup). - SmallPtrSet InstrSet; - for (auto *I : OG) - InstrSet.insert(I); - - // Traverse the block, until we hit an instruction from OG. - for (auto &I : *MBB) { - if (InstrSet.count(&I)) { - InsertAt = I; - break; - } - } - - assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); - - bool AtBBStart = false; - - // InsertAt points at the first instruction that will be removed. We need - // to move it out of the way, so it remains valid after removing all the - // old stores, and so we are able to recover it back to the proper insertion - // position. - if (InsertAt != MBB->begin()) - --InsertAt; - else - AtBBStart = true; - - for (auto *I : OG) - I->eraseFromParent(); - - if (!AtBBStart) - ++InsertAt; - else - InsertAt = MBB->begin(); - - for (auto *I : NG) - MBB->insert(InsertAt, I); - - return true; -} - -// Break up the group into smaller groups, each of which can be replaced by -// a single wide store. Widen each such smaller group and replace the old -// instructions with the widened ones. -bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { - bool Changed = false; - InstrGroup::iterator I = Group.begin(), E = Group.end(); - InstrGroup OG, NG; // Old and new groups. - unsigned CollectedSize; - - while (I != E) { - OG.clear(); - NG.clear(); - - bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && - createWideStores(OG, NG, CollectedSize) && - replaceStores(OG, NG); - if (!Succ) - continue; - - assert(OG.size() > 1 && "Created invalid group"); - assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); - I += OG.size()-1; - - Changed = true; - } - - return Changed; -} - -// Process a single basic block: create the store groups, and replace them -// with the widened stores, if possible. Processing of each basic block -// is independent from processing of any other basic block. This transfor- -// mation could be stopped after having processed any basic block without -// any ill effects (other than not having performed widening in the unpro- -// cessed blocks). Also, the basic blocks can be processed in any order. -bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { - InstrGroupList SGs; - bool Changed = false; - - createStoreGroups(MBB, SGs); - - auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { - return getStoreOffset(A) < getStoreOffset(B); - }; - for (auto &G : SGs) { - assert(G.size() > 1 && "Store group with fewer than 2 elements"); - llvm::sort(G, Less); - - Changed |= processStoreGroup(G); - } - - return Changed; -} - -bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { - if (skipFunction(MFn.getFunction())) - return false; - - MF = &MFn; - auto &ST = MFn.getSubtarget(); - TII = ST.getInstrInfo(); - TRI = ST.getRegisterInfo(); - MRI = &MFn.getRegInfo(); - AA = &getAnalysis().getAAResults(); - - bool Changed = false; - - for (auto &B : MFn) - Changed |= processBasicBlock(B); - - return Changed; -} - -FunctionPass *llvm::createHexagonStoreWidening() { - return new HexagonStoreWidening(); -} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index bc8b1d5f76b31..b3663500eb3a5 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -67,6 +67,9 @@ static cl::opt DisableStoreWidening("disable-store-widen", cl::Hidden, cl::init(false), cl::desc("Disable store widening")); +static cl::opt DisableLoadWidening("disable-load-widen", cl::Hidden, + cl::desc("Disable load widening")); + static cl::opt EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::desc("Early expansion of MUX")); @@ -230,6 +233,7 @@ FunctionPass *createHexagonRDFOpt(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); +FunctionPass *createHexagonLoadWidening(); FunctionPass *createHexagonTfrCleanup(); FunctionPass *createHexagonVectorCombineLegacyPass(); FunctionPass *createHexagonVectorPrint(); @@ -461,6 +465,8 @@ void HexagonPassConfig::addPreRegAlloc() { insertPass(&VirtRegRewriterID, &HexagonTfrCleanupID); if (!DisableStoreWidening) addPass(createHexagonStoreWidening()); + if (!DisableLoadWidening) + addPass(createHexagonLoadWidening()); if (EnableGenMemAbs) addPass(createHexagonGenMemAbsolute()); if (!DisableHardwareLoops) diff --git a/llvm/test/CodeGen/Hexagon/load-widen.ll b/llvm/test/CodeGen/Hexagon/load-widen.ll new file mode 100644 index 0000000000000..6fe47a57b89f0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/load-widen.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-load-widen < %s | FileCheck %s --check-prefix=CHECK-DISABLE + +%struct.node32 = type { ptr, ptr } + +%struct.node16_4 = type { i16, i16, i16, i16 } + +define void @test1(ptr nocapture %node) nounwind { +entry: +; There should be a memd and not two memw +; CHECK-LABEL: test1 +; CHECK: memd + %0 = load ptr, ptr %node, align 8 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} + +define void @test2(ptr nocapture %node) nounwind { +entry: +; Same as test1 but with load widening disabled. +; CHECK-DISABLE-LABEL: test2 +; CHECK-DISABLE: memw +; CHECK-DISABLE: memw + %0 = load ptr, ptr %node, align 8 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} + +define void @test3(ptr nocapture %node) nounwind { +entry: +; No memd because first load is not 8 byte aligned +; CHECK-LABEL: test3 +; CHECK-NOT: memd + %0 = load ptr, ptr %node, align 4 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll index 6c04e7a1e6ea4..d5d2da4d1056b 100644 --- a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll +++ b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll @@ -1,20 +1,16 @@ -; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s -; CHECK-NOT: memh -; Check that store widening does not merge the two stores. +; RUN: llc -march=hexagon --combiner-store-merging=false -verify-machineinstrs < %s | FileCheck %s +; CHECK: memh +; Check that store widening merges the two adjacent stores. -target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" target triple = "hexagon" %struct.type_t = type { i8, i8, [2 x i8] } define zeroext i8 @foo(ptr nocapture %p) nounwind { entry: - store i8 0, ptr %p, align 2, !tbaa !0 + store i8 0, ptr %p, align 2 %b = getelementptr inbounds %struct.type_t, ptr %p, i32 0, i32 1 - %0 = load i8, ptr %b, align 1, !tbaa !0 - store i8 0, ptr %b, align 1, !tbaa !0 + %0 = load i8, ptr %b, align 1 + store i8 0, ptr %b, align 1 ret i8 %0 } - -!0 = !{!"omnipotent char", !1} -!1 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Hexagon/widen-alias.ll b/llvm/test/CodeGen/Hexagon/widen-alias.ll new file mode 100644 index 0000000000000..4f84928654623 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-alias.ll @@ -0,0 +1,97 @@ +; Check the memd loads are generated by HexagonLoadStoreWidening pass +; Check that memw loads from adjacent memory location are replaced with memd, +; though the load/stores alias with instructions that occur later in the block. +; The order of memory operations remains unchanged. + +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: load_store_interleaved: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @load_store_interleaved(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %b + store float %add1, ptr %q, align 4 + ret void +} + +; Store can be widened here, but this order of instructions is not currently handled +; CHECK-LABEL: loads_between_stores: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK-NOT: memd(r{{[0-9]+}}+#4) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @loads_between_stores(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %add0 = fadd float %b, %a + %q = getelementptr i8, ptr %p, i32 4 + %r = getelementptr i8, ptr %p, i32 8 + store float %add0, ptr %r, align 4 + %0 = load float, ptr %p, align 8 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %0 + store float %add1, ptr %q, align 8 + ret void +} + +; CHECK-LABEL: loads_before_stores: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @loads_before_stores(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %1 = load float, ptr %q, align 4 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %add1 = fadd float %1, %b + store float %add1, ptr %q, align 4 + ret void +} + +; Store can be widened here, but this order of instructions is not currently handled +; CHECK-LABEL: store_load_interleaved: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK-NOT: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @store_load_interleaved(ptr %p, float %a, float %b, float %f) local_unnamed_addr { +entry: + %q = getelementptr i8, ptr %p, i32 4 + %r = getelementptr i8, ptr %p, i32 8 + store float %f, ptr %r, align 4 + %0 = load float, ptr %p, align 8 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %b + %add2 = fadd float %add1, %add0 + store float %add2, ptr %q, align 8 + ret void +} + +; CHECK-LABEL: stores_between_loads: +; CHECK-NOT: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @stores_between_loads(ptr %p, float %a, float %b, float %f) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %add0 = fadd float %f, %0 + store float %add0, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %add1 = fadd float %f, %b + store float %add1, ptr %q, align 8 + %r = getelementptr i8, ptr %p, i32 8 + %1 = load float, ptr %r, align 4 + %add2 = fadd float %add1, %1 + store float %add2, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/widen-not-load.ll b/llvm/test/CodeGen/Hexagon/widen-not-load.ll new file mode 100644 index 0000000000000..6206a0a5367e4 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-not-load.ll @@ -0,0 +1,61 @@ +; Test that double word post increment load is not generated. + +; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening %s -o 2>&1 - | FileCheck %s + +; Loads with positive invalid postinc is not widened +define ptr @test1() { +; CHECK-LABEL: test1 +; CHECK-NOT: memd(r{{[0-9]+}}++ +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 20 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with negative invalid postinc is not widened +define ptr @test2() { +; CHECK-LABEL: test2 +; CHECK-NOT: memd(r{{[0-9]+}}++ +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 -20 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with valid positive postinc is widened +define ptr @test3() { +; CHECK-LABEL: test3 +; CHECK: memd +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 24 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with valid negative postinc is widened +define ptr @test4() { +; CHECK-LABEL: test4 +; CHECK: memd +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 -24 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +declare i8 @foo(ptr, i32, i32) diff --git a/llvm/test/CodeGen/Hexagon/widen-volatile.ll b/llvm/test/CodeGen/Hexagon/widen-volatile.ll new file mode 100644 index 0000000000000..540f517a6c96f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-volatile.ll @@ -0,0 +1,34 @@ +; Check the volatile load/stores are not widened by HexagonLoadStoreWidening pass + +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: volatile_loads: +; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0) +; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+#4) +; CHECK-NOT: r{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +define dso_local void @volatile_loads(ptr noundef %dst, ptr noundef %src0) local_unnamed_addr { +entry: + %0 = load volatile i32, ptr %src0, align 8 + %src1 = getelementptr i8, ptr %src0, i32 4 + %conv = zext i32 %0 to i64 + %1 = load volatile i32, ptr %src1, align 4 + %conv4 = zext i32 %1 to i64 + %shl = shl nuw i64 %conv4, 32 + %or = or disjoint i64 %shl, %conv + store i64 %or, ptr %dst, align 1 + ret void +} + +; CHECK-LABEL: volatile_stores: +; CHECK: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}} +; CHECK: memw(r{{[0-9]+}}+#4) = r{{[0-9]+}} +; CHECK-NOT: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}} +define dso_local void @volatile_stores(ptr noundef %dst0, i32 %a, i32 %b) local_unnamed_addr { +entry: + store volatile i32 %a, ptr %dst0, align 8 + %dst1 = getelementptr i8, ptr %dst0, i32 4 + store volatile i32 %b, ptr %dst1, align 4 + ret void +}