-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Correct stride for strided load/store of vectors of pointers in lowerInterleavedLoad/lowerInterleavedStore. #147598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe need to use DataLayout to calculate the size if the element type Full diff: https://github.com/llvm/llvm-project/pull/147598.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5126ab6c31c28..d295a45149d3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24111,10 +24111,11 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
IRBuilder<> Builder(LI);
+ const DataLayout &DL = LI->getDataLayout();
+
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
- LI->getPointerAddressSpace(),
- LI->getDataLayout()))
+ LI->getPointerAddressSpace(), DL))
return false;
auto *PtrTy = LI->getPointerOperandType();
@@ -24124,7 +24125,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
// and there's only one element used, use a strided load instead. This
// will be equally fast, and create less vector register pressure.
if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
- unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
@@ -24187,14 +24188,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
IRBuilder<> Builder(SI);
+ const DataLayout &DL = SI->getDataLayout();
auto Mask = SVI->getShuffleMask();
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
ShuffleVTy->getNumElements() / Factor);
if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
- SI->getPointerAddressSpace(),
- SI->getDataLayout()))
+ SI->getPointerAddressSpace(), DL))
return false;
auto *PtrTy = SI->getPointerOperandType();
@@ -24206,7 +24207,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
// be equally fast, and create less vector register pressure.
if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
isSpreadMask(Mask, Factor, Index)) {
- unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
+ unsigned ScalarSizeInBytes =
+ DL.getTypeStoreSize(ShuffleVTy->getElementType());
Value *Data = SVI->getOperand(0);
auto *DataVTy = cast<FixedVectorType>(Data->getType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 5e3ae2faf1a53..fdbd85f13415a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1360,8 +1360,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; Stores
; ------------------------------------------------------------------------------
-define void @store_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
-; CHECK-LABEL: store_factor2:
+define void @ptrre_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: ptrre_factor2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg2e32.v v8, (a0)
@@ -1662,6 +1662,25 @@ define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
ret <4 x i8> %v0
}
+define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) {
+; RV32-LABEL: load_factor3_one_active_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 12
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vlse32.v v8, (a0), a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor3_one_active_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 24
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vlse64.v v8, (a0), a1
+; RV64-NEXT: ret
+ %interleaved.vec = load <12 x ptr>, ptr %ptr
+ %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ ret <4 x ptr> %v0
+}
+
define void @load_factor4_one_active_storeback(ptr %ptr) {
; CHECK-LABEL: load_factor4_one_active_storeback:
; CHECK: # %bb.0:
@@ -1748,6 +1767,25 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
ret void
}
+define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) {
+; RV32-LABEL: store_factor4_one_active_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsse32.v v8, (a0), a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store_factor4_one_active_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vsse64.v v8, (a0), a1
+; RV64-NEXT: ret
+ %v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
+ store <16 x ptr> %v0, ptr %ptr
+ ret void
+}
+
; Negative tests
define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
@@ -1766,8 +1804,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI49_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0)
+; RV32-NEXT: lui a1, %hi(.LCPI51_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1842,8 +1880,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI50_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI50_0)
+; RV32-NEXT: lui a0, %hi(.LCPI52_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/minor comment.
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Outdated
Show resolved
Hide resolved
…in lowerInterleavedLoad/lowerInterleavedStore. We need to use DataLayout to calculate the size if the element type is a pointer.
4df2f72 to
3b6587b
Compare
mshockwave
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
good catch, thanks
|
@preames If I'm reading the history right, this bug exists in llvm 20. Should we backport? |
We need to use DataLayout to calculate the size if the element type
is a pointer.