@@ -52,13 +52,15 @@ class SIMemOpInfo final {
5252 SyncScope::ID SSID = SyncScope::System;
5353 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
5454 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
55+ bool IsNonTemporal = false ;
5556
5657 SIMemOpInfo (SyncScope::ID SSID, AtomicOrdering Ordering)
5758 : SSID(SSID), Ordering(Ordering) {}
5859
5960 SIMemOpInfo (SyncScope::ID SSID, AtomicOrdering Ordering,
60- AtomicOrdering FailureOrdering)
61- : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering) {}
61+ AtomicOrdering FailureOrdering, bool IsNonTemporal = false )
62+ : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering),
63+ IsNonTemporal (IsNonTemporal) {}
6264
6365 // / \returns Info constructed from \p MI, which has at least machine memory
6466 // / operand.
@@ -81,6 +83,11 @@ class SIMemOpInfo final {
8183 AtomicOrdering getFailureOrdering () const {
8284 return FailureOrdering;
8385 }
86+ // / \returns True if memory access of the machine instruction used to
87+ // / create this SIMemOpInfo is non-temporal, false otherwise.
88+ bool isNonTemporal () const {
89+ return IsNonTemporal;
90+ }
8491
8592 // / \returns True if ordering constraint of the machine instruction used to
8693 // / create this SIMemOpInfo is unordered or higher, false otherwise.
@@ -130,6 +137,34 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
130137 // / \brief List of atomic pseudo instructions.
131138 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
132139
140+ // / \brief Sets named bit (BitName) to "true" if present in \p MI. Returns
141+ // / true if \p MI is modified, false otherwise.
142+ template <uint16_t BitName>
143+ bool enableNamedBit (const MachineBasicBlock::iterator &MI) const {
144+ int BitIdx = AMDGPU::getNamedOperandIdx (MI->getOpcode (), BitName);
145+ if (BitIdx == -1 )
146+ return false ;
147+
148+ MachineOperand &Bit = MI->getOperand (BitIdx);
149+ if (Bit.getImm () != 0 )
150+ return false ;
151+
152+ Bit.setImm (1 );
153+ return true ;
154+ }
155+
156+ // / \brief Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
157+ // / is modified, false otherwise.
158+ bool enableGLCBit (const MachineBasicBlock::iterator &MI) const {
159+ return enableNamedBit<AMDGPU::OpName::glc>(MI);
160+ }
161+
162+ // / \brief Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
163+ // / is modified, false otherwise.
164+ bool enableSLCBit (const MachineBasicBlock::iterator &MI) const {
165+ return enableNamedBit<AMDGPU::OpName::slc>(MI);
166+ }
167+
133168 // / \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
134169 // / Always returns true.
135170 bool insertBufferWbinvl1Vol (MachineBasicBlock::iterator &MI,
@@ -139,10 +174,6 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
139174 bool insertWaitcntVmcnt0 (MachineBasicBlock::iterator &MI,
140175 bool Before = true ) const ;
141176
142- // / \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
143- // / modified, false otherwise.
144- bool setGLC (const MachineBasicBlock::iterator &MI) const ;
145-
146177 // / \brief Removes all processed atomic pseudo instructions from the current
147178 // / function. Returns true if current function is modified, false otherwise.
148179 bool removeAtomicPseudoMIs ();
@@ -199,6 +230,7 @@ Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
199230 SyncScope::ID SSID = SyncScope::SingleThread;
200231 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
201232 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
233+ bool IsNonTemporal = true ;
202234
203235 // Validator should check whether or not MMOs cover the entire set of
204236 // locations accessed by the memory instruction.
@@ -217,9 +249,12 @@ Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
217249 FailureOrdering =
218250 isStrongerThan (FailureOrdering, MMO->getFailureOrdering ()) ?
219251 FailureOrdering : MMO->getFailureOrdering ();
252+
253+ if (!(MMO->getFlags () & MachineMemOperand::MONonTemporal))
254+ IsNonTemporal = false ;
220255 }
221256
222- return SIMemOpInfo (SSID, Ordering, FailureOrdering);
257+ return SIMemOpInfo (SSID, Ordering, FailureOrdering, IsNonTemporal );
223258}
224259
225260/* static */
@@ -343,19 +378,6 @@ bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
343378 return true ;
344379}
345380
346- bool SIMemoryLegalizer::setGLC (const MachineBasicBlock::iterator &MI) const {
347- int GLCIdx = AMDGPU::getNamedOperandIdx (MI->getOpcode (), AMDGPU::OpName::glc);
348- if (GLCIdx == -1 )
349- return false ;
350-
351- MachineOperand &GLC = MI->getOperand (GLCIdx);
352- if (GLC.getImm () == 1 )
353- return false ;
354-
355- GLC.setImm (1 );
356- return true ;
357- }
358-
359381bool SIMemoryLegalizer::removeAtomicPseudoMIs () {
360382 if (AtomicPseudoMIs.empty ())
361383 return false ;
@@ -378,7 +400,7 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
378400 MOI.getSSID () == MMI->getAgentSSID ()) {
379401 if (MOI.getOrdering () == AtomicOrdering::Acquire ||
380402 MOI.getOrdering () == AtomicOrdering::SequentiallyConsistent)
381- Changed |= setGLC (MI);
403+ Changed |= enableGLCBit (MI);
382404
383405 if (MOI.getOrdering () == AtomicOrdering::SequentiallyConsistent)
384406 Changed |= insertWaitcntVmcnt0 (MI);
@@ -401,6 +423,13 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
401423 llvm_unreachable (" Unsupported synchronization scope" );
402424 }
403425
426+ // Atomic instructions do not have the nontemporal attribute.
427+ if (MOI.isNonTemporal ()) {
428+ Changed |= enableGLCBit (MI);
429+ Changed |= enableSLCBit (MI);
430+ return Changed;
431+ }
432+
404433 return Changed;
405434}
406435
@@ -429,6 +458,13 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
429458 llvm_unreachable (" Unsupported synchronization scope" );
430459 }
431460
461+ // Atomic instructions do not have the nontemporal attribute.
462+ if (MOI.isNonTemporal ()) {
463+ Changed |= enableGLCBit (MI);
464+ Changed |= enableSLCBit (MI);
465+ return Changed;
466+ }
467+
432468 return Changed;
433469}
434470
@@ -499,7 +535,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI,
499535 if (MOI.getSSID () == SyncScope::SingleThread ||
500536 MOI.getSSID () == MMI->getWorkgroupSSID () ||
501537 MOI.getSSID () == MMI->getWavefrontSSID ()) {
502- Changed |= setGLC (MI);
538+ Changed |= enableGLCBit (MI);
503539 return Changed;
504540 }
505541
@@ -536,7 +572,7 @@ bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI,
536572 if (MOI.getSSID () == SyncScope::SingleThread ||
537573 MOI.getSSID () == MMI->getWorkgroupSSID () ||
538574 MOI.getSSID () == MMI->getWavefrontSSID ()) {
539- Changed |= setGLC (MI);
575+ Changed |= enableGLCBit (MI);
540576 return Changed;
541577 }
542578
0 commit comments