@@ -6703,9 +6703,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67036703 // We are only processing the operands of d16 image operations on subtargets
67046704 // that use the unpacked register layout, or need to repack the TFE result.
67056705
6706+ unsigned IntrOpcode = Intr->BaseOpcode ;
6707+ // For image atomic: use no-return opcode if result is unused.
6708+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode ) {
6709+ const MachineRegisterInfo &MRI = MF.getRegInfo ();
6710+ Register ResultDef = MI.getOperand (0 ).getReg ();
6711+ if (MRI.use_nodbg_empty (ResultDef))
6712+ IntrOpcode = Intr->AtomicNoRetBaseOpcode ;
6713+ }
67066714 // TODO: Do we need to guard against already legalized intrinsics?
67076715 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
6708- AMDGPU::getMIMGBaseOpcodeInfo (Intr-> BaseOpcode );
6716+ AMDGPU::getMIMGBaseOpcodeInfo (IntrOpcode );
67096717
67106718 MachineRegisterInfo *MRI = B.getMRI ();
67116719 const LLT S32 = LLT::scalar (32 );
@@ -6723,7 +6731,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67236731
67246732 const bool IsAtomicPacked16Bit =
67256733 (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
6726- BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
6734+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
6735+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
6736+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
67276737
67286738 // Check for 16 bit addresses and pack if true.
67296739 LLT GradTy =
0 commit comments