From 0230c8e9d31c6df2ab63632545fd0a1e1a4c52dc Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 6 May 2025 16:06:15 +0900 Subject: [PATCH 1/5] [AMDGPU] Refine GCNHazardRecognizer hasHazard() Remove recursion to avoid stack overflow on large CFGs. Avoid worklist for hazard search within single MachineBasicBlock. Ensure predecessors are visited for all state combinations. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 79 +++++++++++-------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a3b64aee297b2..d36208a667276 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -441,42 +441,55 @@ using IsExpiredFn = function_ref; using GetNumWaitStatesFn = function_ref; // Search for a hazard in a block and its predecessors. +// StateT must implement getHashValue(). template static bool -hasHazard(StateT State, +hasHazard(StateT InitialState, function_ref IsHazard, function_ref UpdateState, - const MachineBasicBlock *MBB, - MachineBasicBlock::const_reverse_instr_iterator I, - DenseSet &Visited) { - for (auto E = MBB->instr_rend(); I != E; ++I) { - // No need to look at parent BUNDLE instructions. - if (I->isBundle()) - continue; + const MachineBasicBlock *InitialMBB, + MachineBasicBlock::const_reverse_instr_iterator InitialI) { + SmallVector> Worklist; + DenseSet> Visited; + const MachineBasicBlock *MBB = InitialMBB; + StateT State = InitialState; + auto I = InitialI; + + for (;;) { + bool Expired = false; + for (auto E = MBB->instr_rend(); I != E; ++I) { + // No need to look at parent BUNDLE instructions. + if (I->isBundle()) + continue; - switch (IsHazard(State, *I)) { - case HazardFound: - return true; - case HazardExpired: - return false; - default: - // Continue search - break; - } + auto Result = IsHazard(State, *I); + if (Result == HazardFound) + return true; + if (Result == HazardExpired) { + Expired = true; + break; + } - if (I->isInlineAsm() || I->isMetaInstruction()) - continue; + if (I->isInlineAsm() || I->isMetaInstruction()) + continue; - UpdateState(State, *I); - } + UpdateState(State, *I); + } - for (MachineBasicBlock *Pred : MBB->predecessors()) { - if (!Visited.insert(Pred).second) - continue; + if (!Expired) { + unsigned StateHash = State.getHashValue(); + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (!Visited.insert(std::pair(Pred, StateHash)).second) + continue; + Worklist.emplace_back(Pred, State); + } + } - if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(), - Visited)) - return true; + if (Worklist.empty()) + break; + + std::tie(MBB, State) = Worklist.pop_back_val(); + I = MBB->instr_rbegin(); } return false; @@ -1641,6 +1654,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { SmallDenseMap DefPos; int ExecPos = std::numeric_limits::max(); int VALUs = 0; + + unsigned getHashValue() const { + return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos)); + } }; StateType State; @@ -1735,9 +1752,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { State.VALUs += 1; }; - DenseSet Visited; if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), - std::next(MI->getReverseIterator()), Visited)) + std::next(MI->getReverseIterator()))) return false; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), @@ -1778,6 +1794,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { struct StateType { int VALUs = 0; int TRANS = 0; + + unsigned getHashValue() const { return hash_combine(VALUs, TRANS); } }; StateType State; @@ -1813,9 +1831,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { State.TRANS += 1; }; - DenseSet Visited; if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), - std::next(MI->getReverseIterator()), Visited)) + std::next(MI->getReverseIterator()))) return false; // Hazard is observed - insert a wait on va_dst counter to ensure hazard is From 2a18fbe9b7a79924b2bbed2f6e80e3920bf95805 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Sun, 18 May 2025 14:45:25 +0900 Subject: [PATCH 2/5] - Rework to use unified store of states - Handle hashing collisions --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 62 ++++++++++++++++--- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index d36208a667276..5180f5a112d72 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -441,7 +441,7 @@ using IsExpiredFn = function_ref; using GetNumWaitStatesFn = function_ref; // Search for a hazard in a block and its predecessors. -// StateT must implement getHashValue(). +// StateT must implement getHashValue() and isEqual(). template static bool hasHazard(StateT InitialState, @@ -449,8 +449,47 @@ hasHazard(StateT InitialState, function_ref UpdateState, const MachineBasicBlock *InitialMBB, MachineBasicBlock::const_reverse_instr_iterator InitialI) { - SmallVector> Worklist; - DenseSet> Visited; + SmallVector> Worklist; + SmallDenseSet> Visited; + SmallVector, 1> Collisions; + SmallDenseMap StateHash2Idx; + SmallVector States; + + // States contains a vector of unique state structures. + // StateT is hashed via getHashValue() and StateHash2Idx maps each hash + // to an index in the States vector. + // In the unlikely event of a hash collision the Collision vector provides + // additional hash to index associations which must be retrieved by a linear + // scan. + + // Retrieve unique constant index for a StateT structure in the States vector. + auto ResolveStateIdx = [&](const StateT State) { + unsigned StateHash = State.getHashValue(); + unsigned StateIdx; + if (!StateHash2Idx.contains(StateHash)) { + StateIdx = States.size(); + States.push_back(State); + StateHash2Idx[StateHash] = StateIdx; + } else { + StateIdx = StateHash2Idx[StateHash]; + if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) { + // Hash collision + auto *Collision = llvm::find_if(Collisions, [&](auto &C) { + return C.first == StateHash && + StateT::isEqual(State, States[C.second]); + }); + if (Collision) { + StateIdx = Collision->second; + } else { + StateIdx = States.size(); + States.push_back(State); + Collisions.emplace_back(StateHash, StateIdx); + } + } + } + return StateIdx; + }; + const MachineBasicBlock *MBB = InitialMBB; StateT State = InitialState; auto I = InitialI; @@ -477,18 +516,20 @@ hasHazard(StateT InitialState, } if (!Expired) { - unsigned StateHash = State.getHashValue(); + unsigned StateIdx = ResolveStateIdx(State); for (MachineBasicBlock *Pred : MBB->predecessors()) { - if (!Visited.insert(std::pair(Pred, StateHash)).second) + if (!Visited.insert(std::pair(Pred, StateIdx)).second) continue; - Worklist.emplace_back(Pred, State); + Worklist.emplace_back(Pred, StateIdx); } } if (Worklist.empty()) break; - std::tie(MBB, State) = Worklist.pop_back_val(); + unsigned StateIdx; + std::tie(MBB, StateIdx) = Worklist.pop_back_val(); + State = States[StateIdx]; I = MBB->instr_rbegin(); } @@ -1658,6 +1699,10 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { unsigned getHashValue() const { return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos)); } + static bool isEqual(const StateType &LHS, const StateType &RHS) { + return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos && + LHS.VALUs == RHS.VALUs; + } }; StateType State; @@ -1796,6 +1841,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { int TRANS = 0; unsigned getHashValue() const { return hash_combine(VALUs, TRANS); } + static bool isEqual(const StateType &LHS, const StateType &RHS) { + return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS; + } }; StateType State; From 9532454595ecbbdb70585e16edcaca27e9c1f613 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Mon, 19 May 2025 10:36:08 +0900 Subject: [PATCH 3/5] - Fix use of llvm::find_if --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 5180f5a112d72..912059ce87adb 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -474,11 +474,11 @@ hasHazard(StateT InitialState, StateIdx = StateHash2Idx[StateHash]; if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) { // Hash collision - auto *Collision = llvm::find_if(Collisions, [&](auto &C) { + auto Collision = llvm::find_if(Collisions, [&](auto &C) { return C.first == StateHash && StateT::isEqual(State, States[C.second]); }); - if (Collision) { + if (Collision != Collisions.end()) { StateIdx = Collision->second; } else { StateIdx = States.size(); From 61a3e4893a640e9cbf83c894a151534c23c36158 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 9 Sep 2025 15:29:07 +0900 Subject: [PATCH 4/5] - Rewrite most of custom implementation to DenseMap with traits --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 129 ++++++++++-------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 912059ce87adb..44e2f7a206805 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -441,59 +441,63 @@ using IsExpiredFn = function_ref; using GetNumWaitStatesFn = function_ref; // Search for a hazard in a block and its predecessors. -// StateT must implement getHashValue() and isEqual(). -template +template static bool hasHazard(StateT InitialState, function_ref IsHazard, function_ref UpdateState, const MachineBasicBlock *InitialMBB, MachineBasicBlock::const_reverse_instr_iterator InitialI) { - SmallVector> Worklist; - SmallDenseSet> Visited; - SmallVector, 1> Collisions; - SmallDenseMap StateHash2Idx; - SmallVector States; - - // States contains a vector of unique state structures. - // StateT is hashed via getHashValue() and StateHash2Idx maps each hash - // to an index in the States vector. - // In the unlikely event of a hash collision the Collision vector provides - // additional hash to index associations which must be retrieved by a linear - // scan. - - // Retrieve unique constant index for a StateT structure in the States vector. - auto ResolveStateIdx = [&](const StateT State) { - unsigned StateHash = State.getHashValue(); - unsigned StateIdx; - if (!StateHash2Idx.contains(StateHash)) { - StateIdx = States.size(); - States.push_back(State); - StateHash2Idx[StateHash] = StateIdx; - } else { - StateIdx = StateHash2Idx[StateHash]; - if (LLVM_UNLIKELY(!StateT::isEqual(State, States[StateIdx]))) { - // Hash collision - auto Collision = llvm::find_if(Collisions, [&](auto &C) { - return C.first == StateHash && - StateT::isEqual(State, States[C.second]); - }); - if (Collision != Collisions.end()) { - StateIdx = Collision->second; - } else { - StateIdx = States.size(); - States.push_back(State); - Collisions.emplace_back(StateHash, StateIdx); - } - } + struct StateMapKey { + SmallVectorImpl *States; + unsigned Idx; + static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) { + return LHS.States == RHS.States && LHS.Idx == RHS.Idx; } - return StateIdx; }; + struct StateMapKeyTraits : DenseMapInfo { + static inline StateMapKey getEmptyKey() { + return {static_cast *>( + DenseMapInfo::getEmptyKey()), + DenseMapInfo::getEmptyKey()}; + } + static inline StateMapKey getTombstoneKey() { + return {static_cast *>( + DenseMapInfo::getTombstoneKey()), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const StateMapKey &Key) { + return StateTraitsT::getHashValue((*Key.States)[Key.Idx]); + } + static unsigned getHashValue(const StateT &State) { + return StateTraitsT::getHashValue(State); + } + static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) { + const auto EKey = getEmptyKey(); + const auto TKey = getTombstoneKey(); + if (StateMapKey::isEqual(LHS, EKey) || StateMapKey::isEqual(RHS, EKey) || + StateMapKey::isEqual(LHS, TKey) || StateMapKey::isEqual(RHS, TKey)) + return StateMapKey::isEqual(LHS, RHS); + return StateTraitsT::isEqual((*LHS.States)[LHS.Idx], + (*RHS.States)[RHS.Idx]); + } + static bool isEqual(const StateT &LHS, const StateMapKey &RHS) { + if (StateMapKey::isEqual(RHS, getEmptyKey()) || + StateMapKey::isEqual(RHS, getTombstoneKey())) + return false; + return StateTraitsT::isEqual(LHS, (*RHS.States)[RHS.Idx]); + } + }; + + SmallDenseMap StateMap; + SmallVector States; const MachineBasicBlock *MBB = InitialMBB; StateT State = InitialState; auto I = InitialI; + SmallSetVector, 16> Worklist; + unsigned WorkIdx = 0; for (;;) { bool Expired = false; for (auto E = MBB->instr_rend(); I != E; ++I) { @@ -516,19 +520,25 @@ hasHazard(StateT InitialState, } if (!Expired) { - unsigned StateIdx = ResolveStateIdx(State); - for (MachineBasicBlock *Pred : MBB->predecessors()) { - if (!Visited.insert(std::pair(Pred, StateIdx)).second) - continue; - Worklist.emplace_back(Pred, StateIdx); + auto StateIt = StateMap.find_as(State); + unsigned StateIdx; + if (StateIt != StateMap.end()) { + StateIdx = StateIt->getSecond(); + } else { + StateIdx = States.size(); + States.emplace_back(State); + StateMapKey Key = {&States, StateIdx}; + StateMap.insert(std::pair(Key, StateIdx)); } + for (MachineBasicBlock *Pred : MBB->predecessors()) + Worklist.insert(std::pair(Pred, StateIdx)); } - if (Worklist.empty()) + if (WorkIdx == Worklist.size()) break; unsigned StateIdx; - std::tie(MBB, StateIdx) = Worklist.pop_back_val(); + std::tie(MBB, StateIdx) = Worklist[WorkIdx++]; State = States[StateIdx]; I = MBB->instr_rbegin(); } @@ -1695,9 +1705,11 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { SmallDenseMap DefPos; int ExecPos = std::numeric_limits::max(); int VALUs = 0; - - unsigned getHashValue() const { - return hash_combine(ExecPos, VALUs, hash_combine_range(DefPos)); + }; + struct StateTraits { + static unsigned getHashValue(const StateType &State) { + return hash_combine(State.ExecPos, State.VALUs, + hash_combine_range(State.DefPos)); } static bool isEqual(const StateType &LHS, const StateType &RHS) { return LHS.DefPos == RHS.DefPos && LHS.ExecPos == RHS.ExecPos && @@ -1797,8 +1809,9 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { State.VALUs += 1; }; - if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), - std::next(MI->getReverseIterator()))) + if (!hasHazard(State, IsHazardFn, UpdateStateFn, + MI->getParent(), + std::next(MI->getReverseIterator()))) return false; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), @@ -1839,8 +1852,11 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { struct StateType { int VALUs = 0; int TRANS = 0; - - unsigned getHashValue() const { return hash_combine(VALUs, TRANS); } + }; + struct StateTraits { + static unsigned getHashValue(const StateType &State) { + return hash_combine(State.VALUs, State.TRANS); + } static bool isEqual(const StateType &LHS, const StateType &RHS) { return LHS.VALUs == RHS.VALUs && LHS.TRANS == RHS.TRANS; } @@ -1879,8 +1895,9 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { State.TRANS += 1; }; - if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), - std::next(MI->getReverseIterator()))) + if (!hasHazard(State, IsHazardFn, UpdateStateFn, + MI->getParent(), + std::next(MI->getReverseIterator()))) return false; // Hazard is observed - insert a wait on va_dst counter to ensure hazard is From 5a736bdc55ae6db8f1d2843ce5f0ac1cdf3ea29e Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Wed, 24 Sep 2025 17:32:22 +0900 Subject: [PATCH 5/5] - Address reviewer comments --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 43 ++++++++----------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 44e2f7a206805..67b40a5a36423 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -441,7 +441,7 @@ using IsExpiredFn = function_ref; using GetNumWaitStatesFn = function_ref; // Search for a hazard in a block and its predecessors. -template +template static bool hasHazard(StateT InitialState, function_ref IsHazard, @@ -467,10 +467,10 @@ hasHazard(StateT InitialState, DenseMapInfo::getTombstoneKey()}; } static unsigned getHashValue(const StateMapKey &Key) { - return StateTraitsT::getHashValue((*Key.States)[Key.Idx]); + return StateT::getHashValue((*Key.States)[Key.Idx]); } static unsigned getHashValue(const StateT &State) { - return StateTraitsT::getHashValue(State); + return StateT::getHashValue(State); } static bool isEqual(const StateMapKey &LHS, const StateMapKey &RHS) { const auto EKey = getEmptyKey(); @@ -478,23 +478,22 @@ hasHazard(StateT InitialState, if (StateMapKey::isEqual(LHS, EKey) || StateMapKey::isEqual(RHS, EKey) || StateMapKey::isEqual(LHS, TKey) || StateMapKey::isEqual(RHS, TKey)) return StateMapKey::isEqual(LHS, RHS); - return StateTraitsT::isEqual((*LHS.States)[LHS.Idx], - (*RHS.States)[RHS.Idx]); + return StateT::isEqual((*LHS.States)[LHS.Idx], (*RHS.States)[RHS.Idx]); } static bool isEqual(const StateT &LHS, const StateMapKey &RHS) { if (StateMapKey::isEqual(RHS, getEmptyKey()) || StateMapKey::isEqual(RHS, getTombstoneKey())) return false; - return StateTraitsT::isEqual(LHS, (*RHS.States)[RHS.Idx]); + return StateT::isEqual(LHS, (*RHS.States)[RHS.Idx]); } }; SmallDenseMap StateMap; SmallVector States; + MachineBasicBlock::const_reverse_instr_iterator I = InitialI; const MachineBasicBlock *MBB = InitialMBB; StateT State = InitialState; - auto I = InitialI; SmallSetVector, 16> Worklist; unsigned WorkIdx = 0; @@ -520,15 +519,13 @@ hasHazard(StateT InitialState, } if (!Expired) { - auto StateIt = StateMap.find_as(State); - unsigned StateIdx; - if (StateIt != StateMap.end()) { - StateIdx = StateIt->getSecond(); - } else { - StateIdx = States.size(); + unsigned StateIdx = States.size(); + StateMapKey Key = {&States, StateIdx}; + auto Insertion = StateMap.insert_as(std::pair(Key, StateIdx), State); + if (Insertion.second) { States.emplace_back(State); - StateMapKey Key = {&States, StateIdx}; - StateMap.insert(std::pair(Key, StateIdx)); + } else { + StateIdx = Insertion.first->second; } for (MachineBasicBlock *Pred : MBB->predecessors()) Worklist.insert(std::pair(Pred, StateIdx)); @@ -1705,8 +1702,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { SmallDenseMap DefPos; int ExecPos = std::numeric_limits::max(); int VALUs = 0; - }; - struct StateTraits { + static unsigned getHashValue(const StateType &State) { return hash_combine(State.ExecPos, State.VALUs, hash_combine_range(State.DefPos)); @@ -1809,9 +1805,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { State.VALUs += 1; }; - if (!hasHazard(State, IsHazardFn, UpdateStateFn, - MI->getParent(), - std::next(MI->getReverseIterator()))) + if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), + std::next(MI->getReverseIterator()))) return false; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), @@ -1852,8 +1847,7 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { struct StateType { int VALUs = 0; int TRANS = 0; - }; - struct StateTraits { + static unsigned getHashValue(const StateType &State) { return hash_combine(State.VALUs, State.TRANS); } @@ -1895,9 +1889,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { State.TRANS += 1; }; - if (!hasHazard(State, IsHazardFn, UpdateStateFn, - MI->getParent(), - std::next(MI->getReverseIterator()))) + if (!hasHazard(State, IsHazardFn, UpdateStateFn, MI->getParent(), + std::next(MI->getReverseIterator()))) return false; // Hazard is observed - insert a wait on va_dst counter to ensure hazard is