diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5126ab6c31c28..d295a45149d3c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24111,10 +24111,11 @@ bool RISCVTargetLowering::lowerInterleavedLoad( IRBuilder<> Builder(LI); + const DataLayout &DL = LI->getDataLayout(); + auto *VTy = cast(Shuffles[0]->getType()); if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), - LI->getPointerAddressSpace(), - LI->getDataLayout())) + LI->getPointerAddressSpace(), DL)) return false; auto *PtrTy = LI->getPointerOperandType(); @@ -24124,7 +24125,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad( // and there's only one element used, use a strided load instead. This // will be equally fast, and create less vector register pressure. if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) { - unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8; + unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes); Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset); @@ -24187,14 +24188,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { IRBuilder<> Builder(SI); + const DataLayout &DL = SI->getDataLayout(); auto Mask = SVI->getShuffleMask(); auto *ShuffleVTy = cast(SVI->getType()); // Given SVI : , then VTy : auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), ShuffleVTy->getNumElements() / Factor); if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), - SI->getDataLayout())) + SI->getPointerAddressSpace(), DL)) return false; auto *PtrTy = SI->getPointerOperandType(); @@ -24206,7 +24207,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, // be equally fast, and create less vector register pressure. if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) && isSpreadMask(Mask, Factor, Index)) { - unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8; + unsigned ScalarSizeInBytes = + DL.getTypeStoreSize(ShuffleVTy->getElementType()); Value *Data = SVI->getOperand(0); auto *DataVTy = cast(Data->getType()); Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 5e3ae2faf1a53..041aae229288f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -1662,6 +1662,25 @@ define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) { ret <4 x i8> %v0 } +define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) { +; RV32-LABEL: load_factor3_one_active_ptr: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 12 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vlse32.v v8, (a0), a1 +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor3_one_active_ptr: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 24 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vlse64.v v8, (a0), a1 +; RV64-NEXT: ret + %interleaved.vec = load <12 x ptr>, ptr %ptr + %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> + ret <4 x ptr> %v0 +} + define void @load_factor4_one_active_storeback(ptr %ptr) { ; CHECK-LABEL: load_factor4_one_active_storeback: ; CHECK: # %bb.0: @@ -1748,6 +1767,25 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) { ret void } +define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) { +; RV32-LABEL: store_factor4_one_active_ptr: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsse32.v v8, (a0), a1 +; RV32-NEXT: ret +; +; RV64-LABEL: store_factor4_one_active_ptr: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsse64.v v8, (a0), a1 +; RV64-NEXT: ret + %v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> + store <16 x ptr> %v0, ptr %ptr + ret void +} + ; Negative tests define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) { @@ -1766,8 +1804,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) { ; RV32-NEXT: vle32.v v12, (a0), v0.t ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: lui a1, %hi(.LCPI49_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0) +; RV32-NEXT: lui a1, %hi(.LCPI51_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v21, (a1) ; RV32-NEXT: vcompress.vm v8, v12, v11 @@ -1842,8 +1880,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: li a0, 146 ; RV32-NEXT: vmv.s.x v11, a0 -; RV32-NEXT: lui a0, %hi(.LCPI50_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI50_0) +; RV32-NEXT: lui a0, %hi(.LCPI52_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 36