Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3229,9 +3229,11 @@ class LLVM_ABI TargetLoweringBase {
/// result is unconditional.
/// \p SVI is the shufflevector to RE-interleave the stored vector.
/// \p Factor is the interleave factor.
/// \p GapMask is a mask with zeros for components / fields that may not be
/// accessed.
virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const {
return false;
}

Expand Down
10 changes: 4 additions & 6 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,28 +537,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");

Value *Mask = nullptr;
auto GapMask = APInt::getAllOnes(Factor);
if (SI) {
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
} else {
// Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
APInt GapMask(Factor, 0);
std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
// We haven't supported gap mask for stores. Yet it is possible that we
// already changed the IR, hence returning true here.
if (GapMask.popcount() != Factor)
return true;

LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
<< *Store << "\n");
LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
<< " and actual factor " << GapMask.popcount() << "\n");
}

// Try to create target specific intrinsics to replace the store and
// shuffle.
if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor))
if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
return false;

// Already have a new target specific interleaved store. Erase the old store.
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17604,14 +17604,16 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
unsigned Factor,
const APInt &GapMask) const {

assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
assert(!LaneMask && "Unexpected mask on store");
assert(!LaneMask && GapMask.popcount() == Factor &&
"Unexpected mask on store");

auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
ShuffleVectorInst *SVI,
unsigned Factor) const override;
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const override;

bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21720,13 +21720,15 @@ bool ARMTargetLowering::lowerInterleavedLoad(
bool ARMTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
unsigned Factor,
const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
assert(!LaneMask && "Unexpected mask on store");
assert(!LaneMask && GapMask.popcount() == Factor &&
"Unexpected mask on store");

auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -688,8 +688,8 @@ class VectorType;
ArrayRef<unsigned> Indices, unsigned Factor,
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
ShuffleVectorInst *SVI,
unsigned Factor) const override;
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const override;

bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,8 @@ class RISCVTargetLowering : public TargetLowering {
const APInt &GapMask) const override;

bool lowerInterleavedStore(Instruction *Store, Value *Mask,
ShuffleVectorInst *SVI,
unsigned Factor) const override;
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const override;

bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
Expand Down
47 changes: 39 additions & 8 deletions llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
Intrinsic::riscv_seg8_store_mask};

static const Intrinsic::ID FixedVsssegIntrIds[] = {
Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
Intrinsic::riscv_sseg8_store_mask};

static const Intrinsic::ID ScalableVssegIntrIds[] = {
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
Expand Down Expand Up @@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
unsigned Factor,
const APInt &GapMask) const {
assert(GapMask.getBitWidth() == Factor);

// We only support cases where the skipped fields are the trailing ones.
// TODO: Lower to strided store if there is only a single active field.
unsigned MaskFactor = GapMask.popcount();
if (MaskFactor < 2 || !GapMask.isMask())
return false;

IRBuilder<> Builder(Store);
const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
Expand All @@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,

Value *Ptr, *VL;
Align Alignment;
if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
Alignment))
return false;

Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;

Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
Function *SegStoreFunc;
if (MaskFactor < Factor)
// Strided segmented store.
SegStoreFunc = Intrinsic::getOrInsertDeclaration(
Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
{VTy, PtrTy, XLenTy, XLenTy});
else
// Normal segmented store.
SegStoreFunc = Intrinsic::getOrInsertDeclaration(
Store->getModule(), FixedVssegIntrIds[Factor - 2],
{VTy, PtrTy, XLenTy});

SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;

for (unsigned i = 0; i < Factor; i++) {
for (unsigned i = 0; i < MaskFactor; i++) {
// Collect shuffle mask for this lane.
for (unsigned j = 0; j < VTy->getNumElements(); j++)
NewShuffleMask.push_back(Mask[i + Factor * j]);
Expand All @@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,

NewShuffleMask.clear();
}
Ops.append({Ptr, LaneMask, VL});
Builder.CreateCall(VssegNFunc, Ops);
Ops.push_back(Ptr);
if (MaskFactor < Factor) {
// Insert the stride argument.
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
}
Ops.append({LaneMask, VL});
Builder.CreateCall(SegStoreFunc, Ops);

return true;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1669,8 +1669,8 @@ namespace llvm {
/// Lower interleaved store(s) into target specific
/// instructions/intrinsics.
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
ShuffleVectorInst *SVI,
unsigned Factor) const override;
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const override;

SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
int JTI, SelectionDAG &DAG) const override;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86InterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,8 @@ bool X86TargetLowering::lowerInterleavedLoad(
bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
unsigned Factor,
const APInt &GapMask) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");

Expand All @@ -836,7 +837,8 @@ bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
assert(!LaneMask && "Unexpected mask on store");
assert(!LaneMask && GapMask.popcount() == Factor &&
"Unexpected mask on store");

// Holds the indices of SVI that correspond to the starting index of each
// interleaved shuffle.
Expand Down
90 changes: 84 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1638,6 +1638,37 @@ define void @vpstore_factor3_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i
ret void
}

; mask = all ones, skip the last field.
define void @vpstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: vpstore_factor3_gap:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 12
; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
; CHECK-NEXT: ret
%s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>, i32 12)
ret void
}

; mask = 1010, skip the last field.
define void @vpstore_factor3_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: vpstore_factor3_gap_with_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: li a1, 12
; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
; CHECK-NEXT: ret
%s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, i32 12)
ret void
}

define void @vpstore_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: vpstore_factor4:
; CHECK: # %bb.0:
Expand Down Expand Up @@ -1998,8 +2029,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
; RV32-NEXT: lui a1, %hi(.LCPI63_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
; RV32-NEXT: lui a1, %hi(.LCPI65_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
Expand Down Expand Up @@ -2074,8 +2105,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
; RV32-NEXT: lui a0, %hi(.LCPI64_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
; RV32-NEXT: lui a0, %hi(.LCPI66_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI66_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
Expand Down Expand Up @@ -2165,6 +2196,53 @@ define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}

; mask = all ones, skip the last field.
define void @maskedstore_factor3_gap(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: maskedstore_factor3_gap:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 12
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vssseg2e32.v v8, (a0), a1
; CHECK-NEXT: ret
%s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
tail call void @llvm.masked.store(<12 x i32> %interleaved.vec, ptr %ptr, i32 4, <12 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0>)
ret void
}

; mask = 1010, skip the last two fields.
define void @maskedstore_factor4_gap_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
; CHECK-LABEL: maskedstore_factor4_gap_with_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
; CHECK-NEXT: ret
%s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4, <16 x i1> <i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>)
ret void
}

; mask = %m, skip the last two fields.
define void @maskedstore_factor4_gap_by_intrinsic_with_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i1> %m) {
; CHECK-LABEL: maskedstore_factor4_gap_by_intrinsic_with_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vssseg2e32.v v8, (a0), a1, v0.t
; CHECK-NEXT: ret
%s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
%interleaved.mask = call <16 x i1> @llvm.vector.interleave4(<4 x i1> %m, <4 x i1> %m, <4 x i1> splat (i1 false), <4 x i1> splat (i1 false))
tail call void @llvm.masked.store(<16 x i32> %interleaved.vec, ptr %ptr, i32 4, <16 x i1> %interleaved.mask)
ret void
}

define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_mask(ptr %ptr) {
; CHECK-LABEL: maskedload_factor3_mask:
; CHECK: # %bb.0:
Expand Down Expand Up @@ -2294,8 +2372,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
; RV32-NEXT: lui a1, %hi(.LCPI73_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI73_0)
; RV32-NEXT: lui a1, %hi(.LCPI78_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI78_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
Expand Down
Loading