diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f53016f62abbe..15094ca37e550 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1939,6 +1939,27 @@ def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic < } // defset AMDGPUBufferIntrinsics +// A wrapper around raw_ptr_buffer_load_lds that takes the global offset +// from the addrspace(7) pointer argument. +def int_amdgcn_buffer_fat_ptr_load_lds : Intrinsic < + [], + [LLVMQualPointerType<7>, // buffer fat pointer (SGPRx4 + VGPR) + LLVMQualPointerType<3>, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) + [IntrWillReturn, IntrArgMemOnly, + ReadOnly>, NoCapture>, + WriteOnly>, NoCapture>, + ImmArg>, ImmArg>, + ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; + // Uses that do not set the done bit should set IntrWriteMem on the // call site. def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [ diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 5dd1fe14e5626..5f684afe55b59 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -2157,6 +2157,7 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) { case Intrinsic::memset: case Intrinsic::memset_inline: case Intrinsic::experimental_memset_pattern: + case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: return true; } } @@ -2245,6 +2246,26 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) { SplitUsers.insert(&I); return {NewRsrc, Off}; } + case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: { + Value *BufferPtr = I.getArgOperand(0); + assert(isSplitFatPtr(BufferPtr->getType()) && + "amdgcn.buffer.fat.pointer.load.lds must have a buffer fat pointer " + "as argument 0"); + IRB.SetInsertPoint(&I); + auto [Rsrc, Off] = getPtrParts(BufferPtr); + Value *LDSPtr = I.getArgOperand(1); + Value *LoadSize = I.getArgOperand(2); + Value *ImmOff = I.getArgOperand(3); + Value *Aux = I.getArgOperand(4); + Value *SOffset = IRB.getInt32(0); + Instruction *NewLoad = IRB.CreateIntrinsic( + Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {}, + {Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux}); + copyMetadata(NewLoad, &I); + SplitUsers.insert(&I); + I.replaceAllUsesWith(NewLoad); + return {nullptr, nullptr}; + } } return {nullptr, nullptr}; } diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll index e6c2d1907068f..1d019d737bde5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll @@ -1728,3 +1728,21 @@ define void @memset_pattern_unknown(ptr addrspace(7) inreg %ptr, i32 inreg %leng call void @llvm.experimental.memset.pattern.p7.i32.i32(ptr addrspace(7) %ptr, i32 1, i32 %length, i1 false) ret void } + +;;; Buffer load to LDS + +declare void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7), ptr addrspace(3), i32 immarg, i32 immarg, i32 immarg) + +define void @llvm_amdgcn_buffer_fat_ptr_load_lds(ptr addrspace(7) inreg %p, ptr addrspace(3) inreg %l, i32 %idx) { +; CHECK-LABEL: define void @llvm_amdgcn_buffer_fat_ptr_load_lds( +; CHECK-SAME: { ptr addrspace(8), i32 } inreg [[P:%.*]], ptr addrspace(3) inreg [[L:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0 +; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1 +; CHECK-NEXT: [[Q:%.*]] = add i32 [[P_OFF]], [[IDX]] +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[P_RSRC]], ptr addrspace(3) [[L]], i32 4, i32 [[Q]], i32 0, i32 16, i32 0) +; CHECK-NEXT: ret void +; + %q = getelementptr i8, ptr addrspace(7) %p, i32 %idx + call void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7) %q, ptr addrspace(3) %l, i32 4, i32 16, i32 0) + ret void +}