diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 6f3cdf54dceec..c0846b123d187 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -336,7 +336,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) { if (AllocaCost > VectorizationBudget) { LLVM_DEBUG(dbgs() << " Alloca too big for vectorization: " << *AI << "\n"); - return false; + return Changed; } if (tryPromoteAllocaToVector(*AI)) { diff --git a/llvm/test/CodeGen/AMDGPU/half-alloca-promotion.ll b/llvm/test/CodeGen/AMDGPU/half-alloca-promotion.ll new file mode 100644 index 0000000000000..cfec49f3652fb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/half-alloca-promotion.ll @@ -0,0 +1,11 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes="amdgpu-promote-alloca-to-vector" -o - %s +; We don't really need to check anything here because with expensive check, this +; test case crashes. The correctness of the pass is beyond the scope. + +define fastcc void @foo() { +entry: + %det = alloca [4 x i32], align 16, addrspace(5) + %trkltPosTmpYZ = alloca [2 x float], align 4, addrspace(5) + %trkltCovTmp = alloca [3 x float], align 4, addrspace(5) + ret void +}