From a8c5ae1a45a55a4a9bf5f628da649e7c518cd222 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Mon, 29 Apr 2024 15:49:15 -0700 Subject: [PATCH 1/9] [SeparateConstOffsetFromGEP] Support GEP reordering for conflicting types. Change-Id: Ic3ecd2c39f49b50ba7486a4223332771f8d672c0 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 54 ++- .../AMDGPU/reorder-gep-inbounds.ll | 34 ++ .../AMDGPU/reorder-gep.ll | 429 ++++++++++++++++++ .../SeparateConstOffsetFromGEP/reorder-gep.ll | 63 +++ 4 files changed, 575 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index c54a956fc7e24..e9ff227b6a5c9 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -985,9 +985,10 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) return false; - // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType != GEPType || - PtrGEP->getSourceElementType() != GEP->getSourceElementType()) + bool GEPIsPtr = GEPType->getScalarType()->isPointerTy(); + bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPointerTy(); + + if (GEPIsPtr != PtrGEPIsPtr) return false; bool NestedNeedsExtraction; @@ -1002,8 +1003,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) return false; - IRBuilder<> Builder(GEP); - Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); bool GEPInBounds = GEP->isInBounds(); bool PtrGEPInBounds = PtrGEP->isInBounds(); bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; @@ -1017,6 +1016,50 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); } } + TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getSourceElementType()); + TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getSourceElementType()); + IRBuilder<> Builder(GEP); + Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); + if (GEPSize > PtrGEPSize) { + if (GEPSize % PtrGEPSize) + return false; + unsigned Ratio = GEPSize / PtrGEPSize; + if (NestedByteOffset % Ratio) + return false; + + auto NewGEPOffset = Builder.CreateUDiv( + *PtrGEP->indices().begin(), + Builder.getIntN( + PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(), + Ratio)); + auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), + SmallVector(GEP->indices())); + cast(NewSrc)->setIsInBounds(IsChainInBounds); + auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset); + cast(NewGEP)->setIsInBounds(IsChainInBounds); + GEP->replaceAllUsesWith(NewGEP); + RecursivelyDeleteTriviallyDeadInstructions(GEP); + return true; + } + + if (GEPSize < PtrGEPSize) { + if (PtrGEPSize % GEPSize) + return false; + unsigned Ratio = PtrGEPSize / GEPSize; + + auto NewGEPOffset = Builder.CreateMul( + *PtrGEP->indices().begin(), + Builder.getIntN( + PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(), + Ratio)); + auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), + SmallVector(GEP->indices())); + cast(NewSrc)->setIsInBounds(IsChainInBounds); + auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset); + GEP->replaceAllUsesWith(NewGEP); + RecursivelyDeleteTriviallyDeadInstructions(GEP); + return true; + } // For trivial GEP chains, we can swap the indicies. auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), @@ -1025,6 +1068,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, SmallVector(PtrGEP->indices())); cast(NewGEP)->setIsInBounds(IsChainInBounds); + cast(NewGEP)->setIsInBounds(IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); RecursivelyDeleteTriviallyDeadInstructions(GEP); return true; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index c24bbd5f658f9..d5d89d42bc697 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -49,3 +49,37 @@ entry: %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg ret void } + +define void @inboundsNonNegativeTypeShrink(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegativeTypeShrink( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2048 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds i16, ptr %in.ptr, i32 1024 + %idx1 = getelementptr inbounds i8, ptr %const1, i32 %in.idx1.nneg + ret void +} + +define void @inboundsNonNegativeTypeExpand(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegativeTypeExpand( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 512 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds i8, ptr %in.ptr, i32 1024 + %idx1 = getelementptr inbounds i16, ptr %const1, i32 %in.idx1.nneg + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index 7137f0fb66fdb..fcf48dc415c03 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -173,3 +173,432 @@ end: call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } + + +define protected amdgpu_kernel void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: reorder_expand: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_add_i32 s0, s0, s1 +; CHECK-NEXT: s_lshl_b32 s2, s2, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB2_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:256 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:512 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:768 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB2_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 0x100 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 0x200 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 0x300 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr i8, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + +define protected amdgpu_kernel void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: reorder_shrink: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s3, s1, 3 +; CHECK-NEXT: s_add_i32 s0, s0, s3 +; CHECK-NEXT: s_lshl_b32 s2, s2, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB3_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2048 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4096 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6144 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB3_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 0x800 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 0x1000 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 0x1800 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr i64, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr i64, ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr i64, ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr i64, ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + +define protected amdgpu_kernel void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: reorder_shrink2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s3, s1, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s3 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB4_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB4_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 0x200 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 0x400 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 0x600 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + + + +define protected amdgpu_kernel void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: bad_index: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s3, s1, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s3 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB5_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB5_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 2 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 4 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 6 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 1 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 2 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 3 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + + +%struct.Packed = type <{ [8 x i8], [4 x half] }> +define protected amdgpu_kernel void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: struct_type: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s0, s5, 14 +; CHECK-NEXT: s_add_i32 s3, s4, s0 +; CHECK-NEXT: s_add_i32 s3, s3, s6 +; CHECK-NEXT: s_add_i32 s2, s3, 0x400000 +; CHECK-NEXT: s_add_i32 s1, s3, 0x800000 +; CHECK-NEXT: s_add_i32 s0, s3, 0xc00000 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB6_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_mov_b32_e32 v4, s2 +; CHECK-NEXT: v_mov_b32_e32 v8, s1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v0 +; CHECK-NEXT: ds_read_b128 v[4:7], v4 +; CHECK-NEXT: ds_read_b128 v[8:11], v8 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB6_2: ; %end +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll index a15f11a634db5..2e3b6ca3653fc 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll @@ -186,3 +186,66 @@ end: call void asm sideeffect "; use $0", "v"(ptr %idx3) ret void } + + +define void @different_type_reorder2(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @different_type_reorder2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i8, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr %base, i64 256 + %idx1 = getelementptr i8, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr %base, i64 512 + %idx2 = getelementptr i8, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr %base, i64 768 + %idx3 = getelementptr i8, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} From 88c84fcba708ddad4f6de6da2fc0cb0f81bd0df5 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Wed, 1 May 2024 17:12:27 -0700 Subject: [PATCH 2/9] actually set inbounds Change-Id: I3bd435e1baa27a36402cb06977c60662bda5059b --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 2 +- .../SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index e9ff227b6a5c9..56e5ac8a0cf95 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1056,6 +1056,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, SmallVector(GEP->indices())); cast(NewSrc)->setIsInBounds(IsChainInBounds); auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset); + cast(NewGEP)->setIsInBounds(IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); RecursivelyDeleteTriviallyDeadInstructions(GEP); return true; @@ -1068,7 +1069,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, SmallVector(PtrGEP->indices())); cast(NewGEP)->setIsInBounds(IsChainInBounds); - cast(NewGEP)->setIsInBounds(IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); RecursivelyDeleteTriviallyDeadInstructions(GEP); return true; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index d5d89d42bc697..9a601a6bfc992 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -57,7 +57,7 @@ define void @inboundsNonNegativeTypeShrink(ptr %in.ptr, i32 %in.idx1) { ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2048 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 2048 ; CHECK-NEXT: ret void ; entry: From 664c92fbfa6946f8bbe4c2fc3dd0a420af3b6289 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Thu, 2 May 2024 08:54:28 -0700 Subject: [PATCH 3/9] Review comments Change-Id: If84c0b348407e40dee488145d575497f687c56d3 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 15 +- .../AMDGPU/reorder-gep.ll | 631 +++--------------- .../NVPTX/lower-gep-reorder.ll | 12 +- 3 files changed, 123 insertions(+), 535 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 56e5ac8a0cf95..db3c3f093e508 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -972,7 +972,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { - Type *GEPType = GEP->getResultElementType(); + Type *GEPType = GEP->getSourceElementType(); // TODO: support reordering for non-trivial GEP chains if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) return false; @@ -980,13 +980,13 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, auto PtrGEP = dyn_cast(GEP->getPointerOperand()); if (!PtrGEP) return false; - Type *PtrGEPType = PtrGEP->getResultElementType(); + Type *PtrGEPType = PtrGEP->getSourceElementType(); // TODO: support reordering for non-trivial GEP chains if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) return false; - bool GEPIsPtr = GEPType->getScalarType()->isPointerTy(); - bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPointerTy(); + bool GEPIsPtr = GEPType->getScalarType()->isPtrOrPtrVectorTy(); + bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPtrOrPtrVectorTy(); if (GEPIsPtr != PtrGEPIsPtr) return false; @@ -1016,8 +1016,11 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); } } - TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getSourceElementType()); - TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getSourceElementType()); + TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getIndexedType( + GEP->getSourceElementType(), GEP->indices().begin()->get())); + TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getIndexedType( + PtrGEP->getSourceElementType(), PtrGEP->indices().begin()->get())); + IRBuilder<> Builder(GEP); Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); if (GEPSize > PtrGEPSize) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index fcf48dc415c03..72347ddbea2a4 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -1,60 +1,20 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s - -define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: sink_addr: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_lshl_b32 s2, s2, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB0_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x200 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x400 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x600 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s + +define void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @sink_addr( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 256 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 512 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 768 +; CHECK-NEXT: ret void +; entry: %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 @@ -64,85 +24,23 @@ entry: %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } -define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: illegal_addr_mode: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s5, 1 -; CHECK-NEXT: s_lshl_b32 s1, s6, 1 -; CHECK-NEXT: s_add_i32 s3, s4, s0 -; CHECK-NEXT: s_add_i32 s3, s3, s1 -; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60 -; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60 -; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: v_mov_b32_e32 v8, s1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v0 -; CHECK-NEXT: ds_read_b128 v[4:7], v4 -; CHECK-NEXT: ds_read_b128 v[8:11], v8 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB1_2: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @illegal_addr_mode( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38192 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38448 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr half, ptr addrspace(3) [[CONST2]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38764 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: ret void +; entry: %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 @@ -152,82 +50,24 @@ entry: %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } -define protected amdgpu_kernel void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: reorder_expand: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_add_i32 s0, s0, s1 -; CHECK-NEXT: s_lshl_b32 s2, s2, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB2_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:256 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:512 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:768 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB2_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x100 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x200 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x300 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_expand( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 128 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 256 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 384 +; CHECK-NEXT: ret void +; entry: %base = getelementptr i8, ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 @@ -237,82 +77,23 @@ entry: %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr i8, ptr addrspace(3) %base, i32 768 %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } -define protected amdgpu_kernel void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: reorder_shrink: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 3 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_lshl_b32 s2, s2, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB3_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2048 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4096 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6144 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB3_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x800 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x1000 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x1800 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_shrink( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 1024 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 2048 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 3072 +; CHECK-NEXT: ret void +; entry: %base = getelementptr i64, ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 @@ -322,81 +103,23 @@ entry: %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr i64, ptr addrspace(3) %base, i32 768 %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } -define protected amdgpu_kernel void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: reorder_shrink2: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB4_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB4_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x200 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x400 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x600 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_shrink2( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 512 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 1024 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1536 +; CHECK-NEXT: ret void +; entry: %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 @@ -406,172 +129,53 @@ entry: %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } -define protected amdgpu_kernel void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: bad_index: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB5_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB5_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 2 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 4 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 6 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @bad_index( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 1 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 1 +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 3 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: ret void +; entry: %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 1 - %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 2 - %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 3 - %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i32 1 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i32 2 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i32 3 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 ret void } %struct.Packed = type <{ [8 x i8], [4 x half] }> -define protected amdgpu_kernel void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: struct_type: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s5, 14 -; CHECK-NEXT: s_add_i32 s3, s4, s0 -; CHECK-NEXT: s_add_i32 s3, s3, s6 -; CHECK-NEXT: s_add_i32 s2, s3, 0x400000 -; CHECK-NEXT: s_add_i32 s1, s3, 0x800000 -; CHECK-NEXT: s_add_i32 s0, s3, 0xc00000 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB6_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: v_mov_b32_e32 v8, s1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v0 -; CHECK-NEXT: ds_read_b128 v[4:7], v4 -; CHECK-NEXT: ds_read_b128 v[8:11], v8 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB6_2: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm +define void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @struct_type( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST2]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: ret void +; entry: %base = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %in.ptr, i32 %in.idx0 %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 @@ -581,24 +185,5 @@ entry: %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 %const3 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 768 %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) ret void } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll index a91c8172177f9..d43987accad78 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -7,14 +7,14 @@ define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %i ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 2048 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST11]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[TMP3]], i64 256 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST22:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4096 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST22]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[TMP4]], i64 512 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[CONST33:%.*]] = getelementptr i8, ptr [[TMP2]], i64 6144 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST33]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[TMP7]], i64 768 ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 ; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] ; CHECK: bb.1: From a4c1767f51a9394c59b7b528dbbc4fb0ccb65b3c Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 7 May 2024 08:46:38 -0700 Subject: [PATCH 4/9] Review Comments 2 Change-Id: Ic94d65538a02cb73d12d461ec513b915dafe711d --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 58 ++----------------- .../AMDGPU/reorder-gep-inbounds.ll | 12 ++-- .../AMDGPU/reorder-gep.ll | 40 ++++++------- .../NVPTX/lower-gep-reorder.ll | 6 +- 4 files changed, 35 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index db3c3f093e508..af0c41df18d2a 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1016,62 +1016,16 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); } } - TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getIndexedType( - GEP->getSourceElementType(), GEP->indices().begin()->get())); - TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getIndexedType( - PtrGEP->getSourceElementType(), PtrGEP->indices().begin()->get())); IRBuilder<> Builder(GEP); Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); - if (GEPSize > PtrGEPSize) { - if (GEPSize % PtrGEPSize) - return false; - unsigned Ratio = GEPSize / PtrGEPSize; - if (NestedByteOffset % Ratio) - return false; - - auto NewGEPOffset = Builder.CreateUDiv( - *PtrGEP->indices().begin(), - Builder.getIntN( - PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(), - Ratio)); - auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices())); - cast(NewSrc)->setIsInBounds(IsChainInBounds); - auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset); - cast(NewGEP)->setIsInBounds(IsChainInBounds); - GEP->replaceAllUsesWith(NewGEP); - RecursivelyDeleteTriviallyDeadInstructions(GEP); - return true; - } - - if (GEPSize < PtrGEPSize) { - if (PtrGEPSize % GEPSize) - return false; - unsigned Ratio = PtrGEPSize / GEPSize; - - auto NewGEPOffset = Builder.CreateMul( - *PtrGEP->indices().begin(), - Builder.getIntN( - PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(), - Ratio)); - auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices())); - cast(NewSrc)->setIsInBounds(IsChainInBounds); - auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset); - cast(NewGEP)->setIsInBounds(IsChainInBounds); - GEP->replaceAllUsesWith(NewGEP); - RecursivelyDeleteTriviallyDeadInstructions(GEP); - return true; - } - // For trivial GEP chains, we can swap the indicies. - auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices())); - cast(NewSrc)->setIsInBounds(IsChainInBounds); - auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, - SmallVector(PtrGEP->indices())); - cast(NewGEP)->setIsInBounds(IsChainInBounds); + Value *NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), + SmallVector(GEP->indices()), "", + IsChainInBounds); + Value *NewGEP = Builder.CreateGEP(PtrGEPType, NewSrc, + SmallVector(PtrGEP->indices()), + "", IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); RecursivelyDeleteTriviallyDeadInstructions(GEP); return true; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index 9a601a6bfc992..e3511fc81f5a1 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -50,14 +50,14 @@ entry: ret void } -define void @inboundsNonNegativeTypeShrink(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonNegativeTypeShrink( +define void @inboundsNonNegativeType_i16i8(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegativeType_i16i8( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 2048 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: @@ -67,14 +67,14 @@ entry: ret void } -define void @inboundsNonNegativeTypeExpand(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonNegativeTypeExpand( +define void @inboundsNonNegative_i8i16(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative_i8i16( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 512 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index 72347ddbea2a4..c8465f4cf975d 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -54,18 +54,18 @@ entry: } -define void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: define void @reorder_expand( +define void @reorder_i8half(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_i8half( ; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 128 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 256 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 256 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 512 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 384 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 768 ; CHECK-NEXT: ret void ; entry: @@ -80,18 +80,18 @@ entry: ret void } -define void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: define void @reorder_shrink( +define void @reorder_i64half(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_i64half( ; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 1024 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP0]], i32 256 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 2048 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP2]], i32 512 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 3072 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP4]], i32 768 ; CHECK-NEXT: ret void ; entry: @@ -106,18 +106,18 @@ entry: ret void } -define void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: define void @reorder_shrink2( +define void @reorder_halfi8(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: define void @reorder_halfi8( ; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 512 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 256 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 1024 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 512 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1536 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 768 ; CHECK-NEXT: ret void ; entry: @@ -140,12 +140,12 @@ define void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 1 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 1 -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 3 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 3 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll index d43987accad78..43dda1ae15176 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -8,13 +8,13 @@ define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %i ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[TMP3]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 2048 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP1]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[TMP4]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4096 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP2]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[TMP7]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 6144 ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 ; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] ; CHECK: bb.1: From a8ee263615444eb0f6d458f10556a05a1aa6dcee Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 7 May 2024 11:40:26 -0700 Subject: [PATCH 5/9] Add Tests Change-Id: I301582a8a06dbef0184b9fcfa22cb7992e5bb9e2 --- .../AMDGPU/reorder-gep-inbounds.ll | 212 +++++++++++++++--- 1 file changed, 177 insertions(+), 35 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index e3511fc81f5a1..ff38f0a70ef4e 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -1,28 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s -define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsPossiblyNegative(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsPossiblyNegative( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 ; CHECK-NEXT: ret void ; entry: - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1 ret void } -define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonNegative( +define void @inboundsNonNegative_nonCanonical(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative_nonCanonical( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[IN_IDX1_NNEG1:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = sext i32 [[IN_IDX1_NNEG1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1 ; CHECK-NEXT: ret void ; @@ -33,53 +32,196 @@ entry: ret void } -define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsNonNegative(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i64 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsNonchained( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1 ; CHECK-NEXT: ret void ; entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i64 1 + %idx1 = getelementptr <2 x i8>, ptr %const1, i64 %in.idx1.nneg ret void } -define void @inboundsNonNegativeType_i16i8(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsNonNegativeType_i16i8(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsNonNegativeType_i16i8( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 1024 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 1024 ; CHECK-NEXT: ret void ; entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds i16, ptr %in.ptr, i32 1024 - %idx1 = getelementptr inbounds i8, ptr %const1, i32 %in.idx1.nneg + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i16, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i8, ptr %const1, i64 %in.idx1.nneg ret void } -define void @inboundsNonNegative_i8i16(ptr %in.ptr, i32 %in.idx1) { +define void @inboundsNonNegative_i8i16(ptr %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @inboundsNonNegative_i8i16( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 1024 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 1024 ; CHECK-NEXT: ret void ; entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds i8, ptr %in.ptr, i32 1024 - %idx1 = getelementptr inbounds i16, ptr %const1, i32 %in.idx1.nneg + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained_first(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained_first( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr i32, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained_second(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained_second( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @notInbounds(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @notInbounds( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr i8, ptr %in.ptr, i64 1024 + %idx1 = getelementptr i128, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType1(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @badVectorType( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType2(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @badVectorType2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x half>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <4 x half>, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace1(ptr addrspace(1) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace1( +; CHECK-SAME: ptr addrspace(1) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(1) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(1) %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace3(ptr addrspace(3) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace3( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(3) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(3) %const1, i64 %in.idx1.nneg + ret void +} + +define void @addrspace7(ptr addrspace(7) %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @addrspace7( +; CHECK-SAME: ptr addrspace(7) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP0]], i64 1024 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 9223372036854775807 + %const1 = getelementptr inbounds i8, ptr addrspace(7) %in.ptr, i64 1024 + %idx1 = getelementptr inbounds i128, ptr addrspace(7) %const1, i64 %in.idx1.nneg ret void } From b371d0bf58e935ca0244a4f98cf03c5c144f4400 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 7 May 2024 19:01:15 -0700 Subject: [PATCH 6/9] Allow ptr source type Change-Id: I66bdfcfd7ce3d53c28b4439bd8ebd65905574560 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 6 -- .../AMDGPU/reorder-gep-inbounds.ll | 87 ++++++++++++++++++- 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index af0c41df18d2a..93752a345daab 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -985,12 +985,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) return false; - bool GEPIsPtr = GEPType->getScalarType()->isPtrOrPtrVectorTy(); - bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPtrOrPtrVectorTy(); - - if (GEPIsPtr != PtrGEPIsPtr) - return false; - bool NestedNeedsExtraction; int64_t NestedByteOffset = accumulateByteOffset(PtrGEP, NestedNeedsExtraction); diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index ff38f0a70ef4e..5df016e2eea70 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -6,7 +6,7 @@ define void @inboundsPossiblyNegative(ptr %in.ptr, i64 %in.idx1) { ; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i64 1 ; CHECK-NEXT: ret void ; entry: @@ -145,7 +145,7 @@ entry: } define void @vectorType1(ptr %in.ptr, i64 %in.idx1) { -; CHECK-LABEL: define void @badVectorType( +; CHECK-LABEL: define void @vectorType1( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 @@ -161,7 +161,7 @@ entry: } define void @vectorType2(ptr %in.ptr, i64 %in.idx1) { -; CHECK-LABEL: define void @badVectorType2( +; CHECK-LABEL: define void @vectorType2( ; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 @@ -176,6 +176,87 @@ entry: ret void } +define void @vectorType3(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType3( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @vectorType4(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @vectorType4( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x ptr addrspace(1)>, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds <8 x ptr addrspace(1)>, ptr %const1, i64 %in.idx1.nneg + ret void +} + + +define void @ptrType(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @ptrType2(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType2( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(3), ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds ptr addrspace(3), ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds i64, ptr %const1, i64 %in.idx1.nneg + ret void +} + +define void @ptrType3(ptr %in.ptr, i64 %in.idx1) { +; CHECK-LABEL: define void @ptrType3( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(7), ptr [[TMP0]], i32 3 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i64 %in.idx1, 2147483647 + %const1 = getelementptr inbounds ptr addrspace(7), ptr %in.ptr, i32 3 + %idx1 = getelementptr inbounds i16, ptr %const1, i64 %in.idx1.nneg + ret void +} + define void @addrspace1(ptr addrspace(1) %in.ptr, i64 %in.idx1) { ; CHECK-LABEL: define void @addrspace1( ; CHECK-SAME: ptr addrspace(1) [[IN_PTR:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { From fe698dde150d7dd4a733be6ada055656ebafbced Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 7 May 2024 19:04:26 -0700 Subject: [PATCH 7/9] remove redundant test Change-Id: I8fdfcb81082fa2e868bae101eef40237a21d8e37 --- .../SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index 5df016e2eea70..16e47f057babc 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -215,12 +215,12 @@ define void @ptrType(ptr %in.ptr, i64 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 2147483647 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[IN_PTR]], i64 [[IN_IDX1_NNEG]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x ptr>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr addrspace(2), ptr [[TMP0]], i32 1 ; CHECK-NEXT: ret void ; entry: %in.idx1.nneg = and i64 %in.idx1, 2147483647 - %const1 = getelementptr inbounds <4 x ptr>, ptr %in.ptr, i32 1 + %const1 = getelementptr inbounds ptr addrspace(2), ptr %in.ptr, i32 1 %idx1 = getelementptr inbounds ptr, ptr %const1, i64 %in.idx1.nneg ret void } From 4e8a1067ce9e5a081c1a797434351b17f74384c5 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Wed, 8 May 2024 08:05:13 -0700 Subject: [PATCH 8/9] allow struct types Change-Id: Ifa15c2b28da6efde3ee98562cf97a688f4228366 --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 93752a345daab..f4f40c3683f97 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -974,7 +974,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { Type *GEPType = GEP->getSourceElementType(); // TODO: support reordering for non-trivial GEP chains - if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) + if (GEP->getNumIndices() != 1) return false; auto PtrGEP = dyn_cast(GEP->getPointerOperand()); @@ -982,7 +982,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, return false; Type *PtrGEPType = PtrGEP->getSourceElementType(); // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) + if (PtrGEP->getNumIndices() != 1) return false; bool NestedNeedsExtraction; @@ -1012,7 +1012,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, } IRBuilder<> Builder(GEP); - Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); // For trivial GEP chains, we can swap the indicies. Value *NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), SmallVector(GEP->indices()), "", From 15c25feeb884adf00784a4526b827e29afddb9c5 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Wed, 8 May 2024 09:13:51 -0700 Subject: [PATCH 9/9] Reorder struct types + canonicalize idx types Change-Id: If4eed6fa854bba99f0f86153e5e6224a5d21a805 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 12 +- .../AMDGPU/reorder-gep.ll | 351 +++++++++++------- 2 files changed, 228 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index f4f40c3683f97..9f85396cde259 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -972,16 +972,12 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { - Type *GEPType = GEP->getSourceElementType(); - // TODO: support reordering for non-trivial GEP chains if (GEP->getNumIndices() != 1) return false; auto PtrGEP = dyn_cast(GEP->getPointerOperand()); if (!PtrGEP) return false; - Type *PtrGEPType = PtrGEP->getSourceElementType(); - // TODO: support reordering for non-trivial GEP chains if (PtrGEP->getNumIndices() != 1) return false; @@ -1013,10 +1009,10 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, IRBuilder<> Builder(GEP); // For trivial GEP chains, we can swap the indicies. - Value *NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices()), "", - IsChainInBounds); - Value *NewGEP = Builder.CreateGEP(PtrGEPType, NewSrc, + Value *NewSrc = Builder.CreateGEP( + GEP->getSourceElementType(), PtrGEP->getPointerOperand(), + SmallVector(GEP->indices()), "", IsChainInBounds); + Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc, SmallVector(PtrGEP->indices()), "", IsChainInBounds); GEP->replaceAllUsesWith(NewGEP); diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index c8465f4cf975d..b4119f0b50b4f 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -1,189 +1,286 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s -define void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @sink_addr(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @sink_addr( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 256 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 512 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 768 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @illegal_addr_mode( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38192 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38448 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr half, ptr addrspace(3) [[CONST2]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 38764 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38192 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr half, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38448 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr half, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i64 38764 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr half, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]] ; CHECK-NEXT: ret void ; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 38192 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 38448 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 38764 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define void @reorder_i8half(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @reorder_i8half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @reorder_i8half( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 256 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 512 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 768 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 768 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr i8, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr i8, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr i8, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr i8, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr i8, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define void @reorder_i64half(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @reorder_i64half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @reorder_i64half( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP0]], i32 256 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP2]], i32 512 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP4]], i32 768 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr addrspace(3) [[TMP4]], i64 768 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr i64, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr i64, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr i64, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr i64, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr i64, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define void @reorder_halfi8(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @reorder_halfi8(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @reorder_halfi8( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i32 256 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i32 512 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i32 768 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr half, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr half, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP4]], i64 768 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -define void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +define void @bad_index(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @bad_index( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 3 +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr half, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 1 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 2 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 3 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr i8, ptr addrspace(3) %base, i32 1 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr i8, ptr addrspace(3) %base, i32 2 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr i8, ptr addrspace(3) %base, i32 3 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %base = getelementptr half, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr addrspace(3) %base, i64 1 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr addrspace(3) %base, i64 2 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr addrspace(3) %base, i64 3 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i64 %in.idx1 ret void } -%struct.Packed = type <{ [8 x i8], [4 x half] }> -define void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +%struct.vec = type { [8 x i8], [4 x half] } +define void @vector_struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @vector_struct_type( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 256 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST1]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 512 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST2]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [1024 x %struct.vec], ptr addrspace(3) [[BASE]], i64 768 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST3]], i32 [[IDXPROM4]] +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr [1024 x %struct.vec], ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr [1024 x %struct.vec], ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} + +define void @struct_type(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-LABEL: define void @struct_type( -; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i32 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[BASE:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i8, ptr addrspace(3) %base, i64 %in.idx1 + %const1 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 256 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx1 + %const2 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 512 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx1 + %const3 = getelementptr %struct.vec, ptr addrspace(3) %base, i64 768 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx1 + ret void +} + +define void @struct_type_multiindex(ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 %in.idx1, i64 %in.idx2) { +; CHECK-LABEL: define void @struct_type_multiindex( +; CHECK-SAME: ptr addrspace(3) [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i32 [[IN_IDX1:%.*]], i64 [[IN_IDX2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[IN_PTR]], i32 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i8, ptr addrspace(3) [[BASE]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 256 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST1]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 512 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST2]], i32 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) [[BASE]], i32 768 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i8, ptr addrspace(3) [[CONST3]], i32 [[IN_IDX1]] +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_VEC:%.*]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[IDXPROM2]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i32 2 +; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM3]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM5:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP3]], i32 [[IDXPROM5]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 4 +; CHECK-NEXT: [[IDXPROM6:%.*]] = trunc i64 [[IN_IDX0]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_VEC]], ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM6]], i32 0, i32 0 +; CHECK-NEXT: [[IDXPROM8:%.*]] = trunc i64 [[IN_IDX2]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 [[IDXPROM8]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 6 ; CHECK-NEXT: ret void ; entry: - %base = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr [1024 x %struct.Packed], ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1 + %const1 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 2 + %idx1 = getelementptr i8, ptr addrspace(3) %const1, i64 %in.idx2 + %const2 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 4 + %idx2 = getelementptr i8, ptr addrspace(3) %const2, i64 %in.idx2 + %const3 = getelementptr %struct.vec, ptr addrspace(3) %in.ptr, i64 %in.idx0, i32 0, i32 6 + %idx3 = getelementptr i8, ptr addrspace(3) %const3, i64 %in.idx2 ret void }