diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index c54a956fc7e24..9f85396cde259 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -972,22 +972,13 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { - Type *GEPType = GEP->getResultElementType(); - // TODO: support reordering for non-trivial GEP chains - if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) + if (GEP->getNumIndices() != 1) return false; auto PtrGEP = dyn_cast(GEP->getPointerOperand()); if (!PtrGEP) return false; - Type *PtrGEPType = PtrGEP->getResultElementType(); - // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) - return false; - - // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType != GEPType || - PtrGEP->getSourceElementType() != GEP->getSourceElementType()) + if (PtrGEP->getNumIndices() != 1) return false; bool NestedNeedsExtraction; @@ -1002,8 +993,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) return false; - IRBuilder<> Builder(GEP); - Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); bool GEPInBounds = GEP->isInBounds(); bool PtrGEPInBounds = PtrGEP->isInBounds(); bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; @@ -1018,13 +1007,14 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, } } + IRBuilder<> Builder(GEP); // For trivial GEP chains, we can swap the indicies. - auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices())); - cast(NewSrc)->setIsInBounds(IsChainInBounds); - auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, - SmallVector(PtrGEP->indices())); - cast(NewGEP)->setIsInBounds(IsChainInBounds); + Value *NewSrc = Builder.CreateGEP( + GEP->getSourceElementType(), PtrGEP->getPointerOperand(), + SmallVector(GEP->indices()), "", IsChainInBounds); + Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc, + SmallVector(PtrGEP->indices()), + "", IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); RecursivelyDeleteTriviallyDeadInstructions(GEP); return true; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index c24bbd5f658f9..16e47f057babc 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -1,28 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s -define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsPossiblyNegative(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsPossiblyNegative( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1 ; CHECK-NEXT: ret void ; entry: - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1 ret void } -define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonNegative( +define void @inboundsNonNegative_nonCanonical(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative_nonCanonical( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[IN_IDX1_NNEG1:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = sext i32 [[IN_IDX1_NNEG1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1 ; CHECK-NEXT: ret void ; @@ -33,19 +32,277 @@ entry: ret void } -define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsNonNegative(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i64 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsNonchained( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1 ; CHECK-NEXT: ret void ; entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr <2 x i8>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonNegativeType_i16i8(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegativeType_i16i8( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i16, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i8, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonNegative_i8i16(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative_i8i16( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained_first(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained_first( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr i32, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained_second(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained_second( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @notInbounds(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @notInbounds( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr i128, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType1(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType1( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType2(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x half>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <4 x half>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType3(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType3( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType4(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType4( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x ptr addrspace(1)>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds <8 x ptr addrspace(1)>, ptr %const1, i64 %in.idx1.nneg + ret void +} + + +define void @ptrType(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(2), ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds ptr addrspace(2), ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @ptrType2(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(3), ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds ptr addrspace(3), ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @ptrType3(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType3( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(7), ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds ptr addrspace(7), ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace1(ptr addrspace(1) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace1( +; CHECK-SAME: ptr addrspace(1) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(1) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(1) %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace3(ptr addrspace(3) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace3( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(3) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(3) %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace7(ptr addrspace(7) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace7( +; CHECK-SAME: ptr addrspace(7) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(7) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(7) %const1, i64 %in.idx1.nneg ret void } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index 7137f0fb66fdb..b4119f0b50b4f 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -1,175 +1,286 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s -define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: sink_addr: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_lshl_b32 s2, s2, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB0_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x200 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x400 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x600 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @sink_addr(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @sink_addr( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} + +define void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @illegal_addr_mode( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38192 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38448 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr half, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38764 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]] +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 38192 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 38448 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 38764 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} + + +define void @reorder_i8half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @reorder_i8half( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i8, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} + +define void @reorder_i64half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @reorder_i64half( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i64, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end +define void @reorder_halfi8(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @reorder_halfi8( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + + +define void @bad_index(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @bad_index( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 1 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 2 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 3 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i64 1 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i64 2 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i64 3 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: illegal_addr_mode: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s5, 1 -; CHECK-NEXT: s_lshl_b32 s1, s6, 1 -; CHECK-NEXT: s_add_i32 s3, s4, s0 -; CHECK-NEXT: s_add_i32 s3, s3, s1 -; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60 -; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60 -; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: v_mov_b32_e32 v8, s1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v0 -; CHECK-NEXT: ds_read_b128 v[4:7], v4 -; CHECK-NEXT: ds_read_b128 v[8:11], v8 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB1_2: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm + +%struct.vec = type { [8 x i8], [4 x half] } +define void @vector_struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @vector_struct_type( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 256 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 512 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 768 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]] +; CHECK-NEXT: ret void +; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end + %base = getelementptr [1024 x %struct.vec], ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end +define void @struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @struct_type( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) +define void @struct_type_multiindex(ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 %in.idx1, i64 %in.idx2) { +; CHECK-LABEL: define void @struct_type_multiindex( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]], i64 [[IN_IDX2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i32 2 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM3]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM5:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP3]], i32 [[IDXPROM5]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 4 +; CHECK-NEXT: [[IDXPROM6:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM6]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM8:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 [[IDXPROM8]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 6 +; CHECK-NEXT: ret void +; +entry: + %const1 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 2 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx2 + %const2 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 4 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx2 + %const3 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 6 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx2 ret void } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll index a91c8172177f9..43dda1ae15176 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -7,14 +7,14 @@ define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %i ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 2048 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST11]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 2048 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST22:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4096 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST22]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4096 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST33:%.*]] = getelementptr i8, ptr [[TMP2]], i64 6144 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST33]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 6144 ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 ; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] ; CHECK: bb.1: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll index a15f11a634db5..2e3b6ca3653fc 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll @@ -186,3 +186,66 @@ end: call void asm sideeffect "; use $0", "v"(ptr %idx3) ret void } + + +define void @different_type_reorder2(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @different_type_reorder2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i8, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr %base, i64 256 + %idx1 = getelementptr i8, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr %base, i64 512 + %idx2 = getelementptr i8, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr %base, i64 768 + %idx3 = getelementptr i8, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +}