diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 33474e7de0188..76553e99431c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1195,14 +1195,10 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( WorkList.push_back(ICmp); } - if (UseInst->getOpcode() == Instruction::AddrSpaceCast) { - // Give up if the pointer may be captured. - if (PointerMayBeCaptured(UseInst, true, true)) - return false; - // Don't collect the users of this. - WorkList.push_back(User); - continue; - } + // TODO: If we know the address is only observed through flat pointers, we + // could still promote. + if (UseInst->getOpcode() == Instruction::AddrSpaceCast) + return false; // Do not promote vector/aggregate type instructions. It is hard to track // their users. diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll index 8a467812ec485..bf4e02d8d7e1c 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll @@ -3,10 +3,10 @@ ; The types of the users of the addrspacecast should not be changed. ; CHECK-LABEL: @invalid_bitcast_addrspace( -; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [256 x [1 x i32]], ptr addrspace(3) @invalid_bitcast_addrspace.data, i32 0, i32 %{{[0-9]+}} -; CHECK: [[ASC:%[a-z0-9]+]] = addrspacecast ptr addrspace(3) [[GEP]] to ptr -; CHECK: [[LOAD:%[a-z0-9]+]] = load <2 x i16>, ptr [[ASC]] -; CHECK: bitcast <2 x i16> [[LOAD]] to <2 x half> +; CHECK: alloca +; CHECK: addrspacecast +; CHECK: load +; CHECK: bitcast define amdgpu_kernel void @invalid_bitcast_addrspace() #0 { entry: %data = alloca [1 x i32], addrspace(5) @@ -16,4 +16,22 @@ entry: ret void } +; A callee use is not promotable even if it has a nocapture attribute. +define void @nocapture_callee(ptr nocapture noundef writeonly %flat.observes.addrspace) #0 { + %private.ptr = addrspacecast ptr %flat.observes.addrspace to ptr addrspace(5) + store i32 1, ptr addrspace(5) %private.ptr, align 4 + ret void +} + +; CHECK-LABEL: @kernel_call_nocapture( +; CHECK: alloca i32 +; CHECK-NEXT: addrspacecast +; CHECK-NEXT: call +define amdgpu_kernel void @kernel_call_nocapture() #0 { + %alloca = alloca i32, align 4, addrspace(5) + %flat.alloca = addrspacecast ptr addrspace(5) %alloca to ptr + call void @nocapture_callee(ptr noundef %flat.alloca) + ret void +} + attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }