diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index fc96589f83238..a88f8f19f852e 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1039,19 +1039,31 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { if (GEP->getType()->isVectorTy()) return false; + // If the base of this GEP is a ptradd of a constant, lets pass the constant + // along. This ensures that when we have a chain of GEPs the constant + // offset from each is accumulated. + Value *NewBase; + const APInt *BaseOffset; + const bool ExtractBase = + match(GEP->getPointerOperand(), + m_PtrAdd(m_Value(NewBase), m_APInt(BaseOffset))); + + const int64_t BaseByteOffset = ExtractBase ? BaseOffset->getSExtValue() : 0; + // The backend can already nicely handle the case where all indices are // constant. - if (GEP->hasAllConstantIndices()) + if (GEP->hasAllConstantIndices() && !ExtractBase) return false; bool Changed = canonicalizeArrayIndicesToIndexSize(GEP); bool NeedsExtraction; - int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); + int64_t AccumulativeByteOffset = + BaseByteOffset + accumulateByteOffset(GEP, NeedsExtraction); TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); - if (!NeedsExtraction) { + if (!NeedsExtraction && !ExtractBase) { Changed |= reorderGEP(GEP, TTI); return Changed; } @@ -1075,7 +1087,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Track information for preserving GEP flags. bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0; - bool AllNUWPreserved = true; + bool AllNUWPreserved = GEP->hasNoUnsignedWrap(); + bool NewGEPInBounds = GEP->isInBounds(); + bool NewGEPNUSW = GEP->hasNoUnsignedSignedWrap(); // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. @@ -1111,6 +1125,16 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } } + if (ExtractBase) { + GEPOperator *Base = cast(GEP->getPointerOperand()); + AllNUWPreserved &= Base->hasNoUnsignedWrap(); + NewGEPInBounds &= Base->isInBounds(); + NewGEPNUSW &= Base->hasNoUnsignedSignedWrap(); + AllOffsetsNonNegative &= BaseByteOffset >= 0; + + GEP->setOperand(0, NewBase); + RecursivelyDeleteTriviallyDeadInstructions(Base); + } // Clear the inbounds attribute because the new index may be off-bound. // e.g., @@ -1138,7 +1162,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // If the initial GEP was NUW and all operations that we reassociate were NUW // additions, the resulting GEPs are also NUW. - if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) { + if (AllNUWPreserved) { NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap(); // If the initial GEP additionally had NUSW (or inbounds, which implies // NUSW), we know that the indices in the initial GEP must all have their @@ -1146,13 +1170,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // add-operands therefore also don't have their signbit set. Therefore, all // indices of the resulting GEPs are non-negative -> we can preserve // the inbounds/nusw flag. - CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap(); + CanPreserveInBoundsNUSW |= NewGEPNUSW; } if (CanPreserveInBoundsNUSW) { - if (GEP->isInBounds()) + if (NewGEPInBounds) NewGEPFlags |= GEPNoWrapFlags::inBounds(); - else if (GEP->hasNoUnsignedSignedWrap()) + else if (NewGEPNUSW) NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap(); } @@ -1220,11 +1244,13 @@ bool SeparateConstOffsetFromGEP::run(Function &F) { DL = &F.getDataLayout(); bool Changed = false; - for (BasicBlock &B : F) { - if (!DT->isReachableFromEntry(&B)) + + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *B : RPOT) { + if (!DT->isReachableFromEntry(B)) continue; - for (Instruction &I : llvm::make_early_inc_range(B)) + for (Instruction &I : llvm::make_early_inc_range(*B)) if (GetElementPtrInst *GEP = dyn_cast(&I)) Changed |= splitGEP(GEP); // No need to split GEP ConstantExprs because all its indices are constant diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 553d7e09390fd..680942fcb4d4b 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -279,11 +279,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_16: ; %Flow45 +; CHECK-NEXT: .LBB0_16: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 -; CHECK-NEXT: .LBB0_17: ; %Flow46 +; CHECK-NEXT: .LBB0_17: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 ; CHECK-NEXT: s_mov_b32 s55, exec_lo @@ -330,11 +330,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 ; CHECK-NEXT: s_branch .LBB0_19 -; CHECK-NEXT: .LBB0_22: ; %Flow43 +; CHECK-NEXT: .LBB0_22: ; %Flow41 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 -; CHECK-NEXT: .LBB0_23: ; %Flow44 +; CHECK-NEXT: .LBB0_23: ; %Flow42 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 @@ -347,7 +347,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_or_b32 s53, s4, s53 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 -; CHECK-NEXT: .LBB0_25: ; %Flow51 +; CHECK-NEXT: .LBB0_25: ; %Flow49 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index 16e47f057babc..5cb3ee6f72e3b 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -279,8 +279,8 @@ define void @addrspace3(ptr addrspace(3) %in.ptr, i64 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 1024 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDX11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: @@ -296,8 +296,8 @@ define void @addrspace7(ptr addrspace(7) %in.ptr, i64 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP0]], i64 1024 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDX11:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index dd12c98af696d..7d8a43f59e367 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -74,13 +74,13 @@ define void @reorder_i8half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] ; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDX13:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 256 ; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDX25:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 512 ; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: [[IDX37:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 768 ; CHECK-NEXT: ret void ; entry: @@ -169,13 +169,13 @@ define void @bad_index(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] ; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 1 +; CHECK-NEXT: [[IDX13:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1 ; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 2 +; CHECK-NEXT: [[IDX25:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 2 ; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 3 +; CHECK-NEXT: [[IDX37:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 3 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll new file mode 100644 index 0000000000000..e4b9dac72e96c --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll @@ -0,0 +1,413 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=nvptx64-nvidia-cuda -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +%struct.uchar4 = type { i8, i8, i8, i8 } + +define ptr @basic(ptr %ptr, i64 %offset1, i64 %offset2) { +; CHECK-LABEL: define ptr @basic( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[OFFSET1:%.*]], i64 [[OFFSET2:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_UCHAR4:%.*]], ptr [[PTR]], i64 [[OFFSET1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_UCHAR4]], ptr [[TMP1]], i64 [[OFFSET2]] +; CHECK-NEXT: [[GEP24:%.*]] = getelementptr i8, ptr [[TMP2]], i64 72 +; CHECK-NEXT: ret ptr [[GEP24]] +; + %offset3 = add i64 %offset1, 8 + %gep1 = getelementptr %struct.uchar4, ptr %ptr, i64 %offset3 + %offset4 = add i64 %offset2, 10 + %gep2 = getelementptr %struct.uchar4, ptr %gep1, i64 %offset4 + ret ptr %gep2 +} + +define i32 @more_interesting(ptr %ptr, i32 %offset1, i32 %offset2) { +; CHECK-LABEL: define i32 @more_interesting( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[OFFSET1:%.*]], i32 [[OFFSET2:%.*]]) { +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[OFFSET1]] to i64 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4:%.*]], ptr [[PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[OFFSET2]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4]], ptr [[GEP1]], i64 [[IDXPROM1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[OFFSET2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4]], ptr [[TMP1]], i64 [[IDXPROM2]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[R:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[R]] +; + %gep1 = getelementptr inbounds %struct.uchar4, ptr %ptr, i32 %offset1 + %gep2 = getelementptr inbounds nuw i8, ptr %gep1, i32 8 + %gep3 = getelementptr inbounds %struct.uchar4, ptr %gep2, i32 %offset2 + %v1 = load i32, ptr %gep3, align 4 + %gep4 = getelementptr inbounds i8, ptr %gep3, i32 -8 + %gep5 = getelementptr inbounds %struct.uchar4, ptr %gep4, i32 %offset2 + %v2 = load i32, ptr %gep5, align 4 + %r = add i32 %v1, %v2 + ret i32 %r +} + +;; Check nuw/nusw/inbounds flag propagation + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_0(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_0( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_1(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_1( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_2(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_2( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nusw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_3(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_3( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_4(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_4( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_5(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_5( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Negative offsets. +define ptr @test_6(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_6( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Negative offsets. +define ptr @test_7(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_7( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Mixed positive/negative offsets. +define ptr @test_8(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_8( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Mixed positive/negative offsets. +define ptr @test_9(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_9( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Mixed negative/positive offsets. +define ptr @test_10(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_10( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, 10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Mixed negative/positive offsets. +define ptr @test_11(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_11( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, 10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. All positive offsets. +define ptr @test_12(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_12( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, 1 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. All positive offsets. +define ptr @test_13(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_13( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with negative offsets. +define ptr @test_14(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_14( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with negative offsets. +define ptr @test_15(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_15( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with mixed positive/negative offsets. +define ptr @test_16(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_16( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with mixed positive/negative offsets. +define ptr @test_17(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_17( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with mixed negative/positive offsets. +define ptr @test_18(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_18( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with mixed negative/positive offsets. +define ptr @test_19(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_19( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with all positive offsets. +define ptr @test_20(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_20( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with all positive offsets. +define ptr @test_21(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_21( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +define ptr @test_rpot(ptr %ptr, i64 %offset1, i64 %offset2) { +; CHECK-LABEL: define ptr @test_rpot( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[OFFSET1:%.*]], i64 [[OFFSET2:%.*]]) { +; CHECK-NEXT: [[A:.*:]] +; CHECK-NEXT: br label %[[B:.*]] +; CHECK: [[C:.*]]: +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_UCHAR4:%.*]], ptr [[TMP0:%.*]], i64 [[OFFSET2]] +; CHECK-NEXT: [[GEP24:%.*]] = getelementptr i8, ptr [[TMP1]], i64 72 +; CHECK-NEXT: br label %[[D:.*]] +; CHECK: [[B]]: +; CHECK-NEXT: [[TMP0]] = getelementptr [[STRUCT_UCHAR4]], ptr [[PTR]], i64 [[OFFSET1]] +; CHECK-NEXT: br label %[[C]] +; CHECK: [[D]]: +; CHECK-NEXT: ret ptr [[GEP24]] +; +A: + br label %B +C: + %offset4 = add i64 %offset2, 10 + %gep2 = getelementptr %struct.uchar4, ptr %gep1, i64 %offset4 + br label %D +B: + %offset3 = add i64 %offset1, 8 + %gep1 = getelementptr %struct.uchar4, ptr %ptr, i64 %offset3 + br label %C +D: + ret ptr %gep2 +} +