-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[IA][RISCV] Recognize interleaving stores that could lower to strided segmented stores #154647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[IA][RISCV] Recognize interleaving stores that could lower to strided segmented stores #154647
Conversation
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-arm Author: Min-Yih Hsu (mshockwave) ChangesThis is a sibling patch to #151612: passing gap masks to the renewal TLI hooks for lowering interleaved stores that use shufflevector to do the interleaving. Patch is 20.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154647.diff 11 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4480ced637456..3a51b9e6917c3 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3229,9 +3229,11 @@ class LLVM_ABI TargetLoweringBase {
/// result is unconditional.
/// \p SVI is the shufflevector to RE-interleave the stored vector.
/// \p Factor is the interleave factor.
+ /// \p GapMask is a mask with zeros for components / fields that may not be
+ /// accessed.
virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const {
return false;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 93f6e39b56ab6..c5e97037be336 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -537,28 +537,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");
Value *Mask = nullptr;
+ auto GapMask = APInt::getAllOnes(Factor);
if (SI) {
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
} else {
// Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
- APInt GapMask(Factor, 0);
std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
- // We haven't supported gap mask for stores. Yet it is possible that we
- // already changed the IR, hence returning true here.
- if (GapMask.popcount() != Factor)
- return true;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
<< *Store << "\n");
+ LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
+ << " and actual factor " << GapMask.popcount() << "\n");
}
// Try to create target specific intrinsics to replace the store and
// shuffle.
- if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor))
+ if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
return false;
// Already have a new target specific interleaved store. Erase the old store.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d168cc8d1bd06..3bb0fdd841653 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17604,14 +17604,16 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 071e96e194286..0347fbf21f279 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..57352d9567c13 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21720,13 +21720,15 @@ bool ARMTargetLowering::lowerInterleavedLoad(
bool ARMTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..d87cb4856c2af 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -688,8 +688,8 @@ class VectorType;
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index fb63ebcfaacea..4581c11356aff 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -435,8 +435,8 @@ class RISCVTargetLowering : public TargetLowering {
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index c7b96f5c3d0c8..5e1063155ba07 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
Intrinsic::riscv_seg8_store_mask};
+static const Intrinsic::ID FixedVsssegIntrIds[] = {
+ Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
+ Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
+ Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
+ Intrinsic::riscv_sseg8_store_mask};
+
static const Intrinsic::ID ScalableVssegIntrIds[] = {
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
+ assert(GapMask.getBitWidth() == Factor);
+
+ // We only support cases where the skipped fields are the trailing ones.
+ // TODO: Lower to strided store if there is only a single active field.
+ unsigned MaskFactor = GapMask.popcount();
+ if (MaskFactor < 2 || !GapMask.isMask())
+ return false;
+
IRBuilder<> Builder(Store);
const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
@@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *Ptr, *VL;
Align Alignment;
- if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
+ if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
+ Alignment))
return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
- if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Function *SegStoreFunc;
+ if (MaskFactor < Factor)
+ // Strided segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
+ {VTy, PtrTy, XLenTy, XLenTy});
+ else
+ // Normal segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVssegIntrIds[Factor - 2],
+ {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
- for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned i = 0; i < MaskFactor; i++) {
// Collect shuffle mask for this lane.
for (unsigned j = 0; j < VTy->getNumElements(); j++)
NewShuffleMask.push_back(Mask[i + Factor * j]);
@@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
NewShuffleMask.clear();
}
- Ops.append({Ptr, LaneMask, VL});
- Builder.CreateCall(VssegNFunc, Ops);
+ Ops.push_back(Ptr);
+ if (MaskFactor < Factor) {
+ // Insert the stride argument.
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
+ Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
+ }
+ Ops.append({LaneMask, VL});
+ Builder.CreateCall(SegStoreFunc, Ops);
return true;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3dd79b3249517..24407a46d6495 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1669,8 +1669,8 @@ namespace llvm {
/// Lower interleaved store(s) into target specific
/// instructions/intrinsics.
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
int JTI, SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 632db7e4326e2..4188487d75911 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -825,7 +825,8 @@ bool X86TargetLowering::lowerInterleavedLoad(
bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
@@ -836,7 +837,8 @@ bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
// Holds the indices of SVI that correspond to the starting index of each
// interleaved shuffle.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index c426ee7b7d2b1..1dfbdcb516c13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1638,6 +1638,37 @@ define void @vpstore_factor3_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i
ret void
}
+; mask = all ones, skip the last field.
+define void @vpstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3_gap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, i32 12)
+ ret void
+}
+
+; mask = 1010, skip the last field.
+define void @vpstore_factor3_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3_gap_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 5
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, i32 12)
+ ret void
+}
+
define void @vpstore_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: vpstore_factor4:
; CHECK: # %bb.0:
@@ -1998,8 +2029,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI63_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
+; RV32-NEXT: lui a1, %hi(.LCPI65_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2074,8 +2105,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI64_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
+; RV32-NEXT: lui a0, %hi(.LCPI66_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI66_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
@@ -2165,6 +2196,53 @@ define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}
+; mask = all ones, skip the last field.
+define void @maskedstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: maskedstore_factor3_gap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.masked.store(<12 x i32> %interleaved.vec, ptr %ptr, i32 4, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>)
+ ret void
+}
+
+; mask = 1010, skip the last two fields.
+define void @maskedstore_factor4_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: maskedstore_factor4_gap_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 5
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4, <16 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>)
+ ret void
+}
+
+; mask = %m, skip the last two fields.
+define void @maskedstore_factor4_gap_by_intrinsic_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i1> %m) {
+; CHECK-LABEL: maskedstore_factor4_gap_by_intrinsic_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ %interleaved.mask = call <16 x i1> @llvm.vector.interleave4(<4 x i1> %m, <4 x i1> %m, <4 x i1> splat (i1 false), <4 x i1> splat (i1 false))
+ tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4...
[truncated]
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Min-Yih Hsu (mshockwave) ChangesThis is a sibling patch to #151612: passing gap masks to the renewal TLI hooks for lowering interleaved stores that use shufflevector to do the interleaving. Patch is 20.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154647.diff 11 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4480ced637456..3a51b9e6917c3 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3229,9 +3229,11 @@ class LLVM_ABI TargetLoweringBase {
/// result is unconditional.
/// \p SVI is the shufflevector to RE-interleave the stored vector.
/// \p Factor is the interleave factor.
+ /// \p GapMask is a mask with zeros for components / fields that may not be
+ /// accessed.
virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const {
return false;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 93f6e39b56ab6..c5e97037be336 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -537,28 +537,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");
Value *Mask = nullptr;
+ auto GapMask = APInt::getAllOnes(Factor);
if (SI) {
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
} else {
// Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
- APInt GapMask(Factor, 0);
std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
- // We haven't supported gap mask for stores. Yet it is possible that we
- // already changed the IR, hence returning true here.
- if (GapMask.popcount() != Factor)
- return true;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
<< *Store << "\n");
+ LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
+ << " and actual factor " << GapMask.popcount() << "\n");
}
// Try to create target specific intrinsics to replace the store and
// shuffle.
- if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor))
+ if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
return false;
// Already have a new target specific interleaved store. Erase the old store.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d168cc8d1bd06..3bb0fdd841653 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17604,14 +17604,16 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 071e96e194286..0347fbf21f279 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..57352d9567c13 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21720,13 +21720,15 @@ bool ARMTargetLowering::lowerInterleavedLoad(
bool ARMTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..d87cb4856c2af 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -688,8 +688,8 @@ class VectorType;
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index fb63ebcfaacea..4581c11356aff 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -435,8 +435,8 @@ class RISCVTargetLowering : public TargetLowering {
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index c7b96f5c3d0c8..5e1063155ba07 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
Intrinsic::riscv_seg8_store_mask};
+static const Intrinsic::ID FixedVsssegIntrIds[] = {
+ Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
+ Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
+ Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
+ Intrinsic::riscv_sseg8_store_mask};
+
static const Intrinsic::ID ScalableVssegIntrIds[] = {
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
+ assert(GapMask.getBitWidth() == Factor);
+
+ // We only support cases where the skipped fields are the trailing ones.
+ // TODO: Lower to strided store if there is only a single active field.
+ unsigned MaskFactor = GapMask.popcount();
+ if (MaskFactor < 2 || !GapMask.isMask())
+ return false;
+
IRBuilder<> Builder(Store);
const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
@@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *Ptr, *VL;
Align Alignment;
- if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
+ if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
+ Alignment))
return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
- if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Function *SegStoreFunc;
+ if (MaskFactor < Factor)
+ // Strided segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
+ {VTy, PtrTy, XLenTy, XLenTy});
+ else
+ // Normal segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVssegIntrIds[Factor - 2],
+ {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
- for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned i = 0; i < MaskFactor; i++) {
// Collect shuffle mask for this lane.
for (unsigned j = 0; j < VTy->getNumElements(); j++)
NewShuffleMask.push_back(Mask[i + Factor * j]);
@@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
NewShuffleMask.clear();
}
- Ops.append({Ptr, LaneMask, VL});
- Builder.CreateCall(VssegNFunc, Ops);
+ Ops.push_back(Ptr);
+ if (MaskFactor < Factor) {
+ // Insert the stride argument.
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
+ Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
+ }
+ Ops.append({LaneMask, VL});
+ Builder.CreateCall(SegStoreFunc, Ops);
return true;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3dd79b3249517..24407a46d6495 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1669,8 +1669,8 @@ namespace llvm {
/// Lower interleaved store(s) into target specific
/// instructions/intrinsics.
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
int JTI, SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 632db7e4326e2..4188487d75911 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -825,7 +825,8 @@ bool X86TargetLowering::lowerInterleavedLoad(
bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
@@ -836,7 +837,8 @@ bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
- assert(!LaneMask && "Unexpected mask on store");
+ assert(!LaneMask && GapMask.popcount() == Factor &&
+ "Unexpected mask on store");
// Holds the indices of SVI that correspond to the starting index of each
// interleaved shuffle.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index c426ee7b7d2b1..1dfbdcb516c13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1638,6 +1638,37 @@ define void @vpstore_factor3_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i
ret void
}
+; mask = all ones, skip the last field.
+define void @vpstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3_gap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, i32 12)
+ ret void
+}
+
+; mask = 1010, skip the last field.
+define void @vpstore_factor3_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3_gap_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 5
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, i32 12)
+ ret void
+}
+
define void @vpstore_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: vpstore_factor4:
; CHECK: # %bb.0:
@@ -1998,8 +2029,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI63_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
+; RV32-NEXT: lui a1, %hi(.LCPI65_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2074,8 +2105,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI64_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
+; RV32-NEXT: lui a0, %hi(.LCPI66_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI66_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
@@ -2165,6 +2196,53 @@ define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}
+; mask = all ones, skip the last field.
+define void @maskedstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: maskedstore_factor3_gap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 12
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+ tail call void @llvm.masked.store(<12 x i32> %interleaved.vec, ptr %ptr, i32 4, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>)
+ ret void
+}
+
+; mask = 1010, skip the last two fields.
+define void @maskedstore_factor4_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: maskedstore_factor4_gap_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v0, 5
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4, <16 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>)
+ ret void
+}
+
+; mask = %m, skip the last two fields.
+define void @maskedstore_factor4_gap_by_intrinsic_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i1> %m) {
+; CHECK-LABEL: maskedstore_factor4_gap_by_intrinsic_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ %interleaved.mask = call <16 x i1> @llvm.vector.interleave4(<4 x i1> %m, <4 x i1> %m, <4 x i1> splat (i1 false), <4 x i1> splat (i1 false))
+ tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.h llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/lib/Target/X86/X86InterleavedAccess.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is a sibling patch to #151612: passing gap masks to the renewal TLI hooks for lowering interleaved stores that use shufflevector to do the interleaving.