Skip to content

Commit c0a3ab3

Browse files
Revert "[AArch64][SVE] Implement intrinsics for non-temporal loads & stores"
This reverts commit 3f5bf35 as it was causing build failures in llvm-clang-x86_64-expensive-checks: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-debian/builds/392 http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-ubuntu/builds/1045
1 parent 17554b8 commit c0a3ab3

File tree

7 files changed

+3
-311
lines changed

7 files changed

+3
-311
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -768,20 +768,6 @@ def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
768768
def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
769769

770770
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
771-
772-
class AdvSIMD_1Vec_PredLoad_Intrinsic
773-
: Intrinsic<[llvm_anyvector_ty],
774-
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
775-
LLVMPointerTo<0>],
776-
[IntrReadMem, IntrArgMemOnly]>;
777-
778-
class AdvSIMD_1Vec_PredStore_Intrinsic
779-
: Intrinsic<[],
780-
[llvm_anyvector_ty,
781-
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
782-
LLVMPointerTo<0>],
783-
[IntrArgMemOnly, NoCapture<2>]>;
784-
785771
class AdvSIMD_Merged1VectorArg_Intrinsic
786772
: Intrinsic<[llvm_anyvector_ty],
787773
[LLVMMatchType<0>,
@@ -1047,18 +1033,6 @@ class AdvSIMD_GatherLoad_VecTorBase_Intrinsic
10471033
],
10481034
[IntrReadMem, IntrArgMemOnly]>;
10491035

1050-
//
1051-
// Loads
1052-
//
1053-
1054-
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
1055-
1056-
//
1057-
// Stores
1058-
//
1059-
1060-
def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
1061-
10621036
//
10631037
// Integer arithmetic
10641038
//

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "llvm/ADT/Triple.h"
2626
#include "llvm/ADT/Twine.h"
2727
#include "llvm/Analysis/BlockFrequencyInfo.h"
28-
#include "llvm/Analysis/MemoryLocation.h"
2928
#include "llvm/Analysis/ProfileSummaryInfo.h"
3029
#include "llvm/Analysis/ValueTracking.h"
3130
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -6590,9 +6589,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
65906589
if (Align == 0) // Ensure that codegen never sees alignment 0
65916590
Align = getEVTAlignment(MemVT);
65926591

6593-
if (!Size && MemVT.isScalableVector())
6594-
Size = MemoryLocation::UnknownSize;
6595-
else if (!Size)
6592+
if (!Size)
65966593
Size = MemVT.getStoreSize();
65976594

65986595
MachineFunction &MF = getMachineFunction();

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8514,26 +8514,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
85148514
Info.align = Align(16);
85158515
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
85168516
return true;
8517-
case Intrinsic::aarch64_sve_ldnt1: {
8518-
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
8519-
Info.opc = ISD::INTRINSIC_W_CHAIN;
8520-
Info.memVT = MVT::getVT(PtrTy->getElementType());
8521-
Info.ptrVal = I.getArgOperand(1);
8522-
Info.offset = 0;
8523-
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
8524-
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
8525-
return true;
8526-
}
8527-
case Intrinsic::aarch64_sve_stnt1: {
8528-
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
8529-
Info.opc = ISD::INTRINSIC_W_CHAIN;
8530-
Info.memVT = MVT::getVT(PtrTy->getElementType());
8531-
Info.ptrVal = I.getArgOperand(2);
8532-
Info.offset = 0;
8533-
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
8534-
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
8535-
return true;
8536-
}
85378517
default:
85388518
break;
85398519
}
@@ -10963,48 +10943,6 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
1096310943
return NewST1;
1096410944
}
1096510945

10966-
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
10967-
SDLoc DL(N);
10968-
EVT VT = N->getValueType(0);
10969-
EVT PtrTy = N->getOperand(3).getValueType();
10970-
10971-
EVT LoadVT = VT;
10972-
if (VT.isFloatingPoint())
10973-
LoadVT = VT.changeTypeToInteger();
10974-
10975-
auto *MINode = cast<MemIntrinsicSDNode>(N);
10976-
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
10977-
SDValue L = DAG.getMaskedLoad(VT, DL, MINode->getChain(),
10978-
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
10979-
MINode->getOperand(2), PassThru,
10980-
MINode->getMemoryVT(), MINode->getMemOperand(),
10981-
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
10982-
10983-
if (VT.isFloatingPoint()) {
10984-
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
10985-
return DAG.getMergeValues(Ops, DL);
10986-
}
10987-
10988-
return L;
10989-
}
10990-
10991-
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
10992-
SDLoc DL(N);
10993-
10994-
SDValue Data = N->getOperand(2);
10995-
EVT DataVT = Data.getValueType();
10996-
EVT PtrTy = N->getOperand(4).getValueType();
10997-
10998-
if (DataVT.isFloatingPoint())
10999-
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
11000-
11001-
auto *MINode = cast<MemIntrinsicSDNode>(N);
11002-
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
11003-
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
11004-
MINode->getMemoryVT(), MINode->getMemOperand(),
11005-
ISD::UNINDEXED, false, false);
11006-
}
11007-
1100810946
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
1100910947
/// load store optimizer pass will merge them to store pair stores. This should
1101010948
/// be better than a movi to create the vector zero followed by a vector store
@@ -12281,10 +12219,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1228112219
case Intrinsic::aarch64_neon_st3lane:
1228212220
case Intrinsic::aarch64_neon_st4lane:
1228312221
return performNEONPostLDSTCombine(N, DCI, DAG);
12284-
case Intrinsic::aarch64_sve_ldnt1:
12285-
return performLDNT1Combine(N, DAG);
12286-
case Intrinsic::aarch64_sve_stnt1:
12287-
return performSTNT1Combine(N, DAG);
1228812222
case Intrinsic::aarch64_sve_ld1_gather:
1228912223
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
1229012224
case Intrinsic::aarch64_sve_ld1_gather_index:

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,7 @@ def nonext_masked_load :
265265
PatFrag<(ops node:$ptr, node:$pred, node:$def),
266266
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
267267
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
268-
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
269-
!cast<MaskedLoadSDNode>(N)->isNonTemporal();
268+
cast<MaskedLoadSDNode>(N)->isUnindexed();
270269
}]>;
271270
// sign extending masked load fragments.
272271
def asext_masked_load :
@@ -314,21 +313,12 @@ def zext_masked_load_i32 :
314313
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
315314
}]>;
316315

317-
def non_temporal_load :
318-
PatFrag<(ops node:$ptr, node:$pred, node:$def),
319-
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
320-
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
321-
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
322-
cast<MaskedLoadSDNode>(N)->isNonTemporal();
323-
}]>;
324-
325316
// non-truncating masked store fragment.
326317
def nontrunc_masked_store :
327318
PatFrag<(ops node:$val, node:$ptr, node:$pred),
328319
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
329320
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
330-
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
331-
!cast<MaskedStoreSDNode>(N)->isNonTemporal();
321+
cast<MaskedStoreSDNode>(N)->isUnindexed();
332322
}]>;
333323
// truncating masked store fragments.
334324
def trunc_masked_store :
@@ -353,14 +343,6 @@ def trunc_masked_store_i32 :
353343
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
354344
}]>;
355345

356-
def non_temporal_store :
357-
PatFrag<(ops node:$val, node:$ptr, node:$pred),
358-
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
359-
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
360-
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
361-
cast<MaskedStoreSDNode>(N)->isNonTemporal();
362-
}]>;
363-
364346
// Node definitions.
365347
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
366348
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,18 +1179,6 @@ let Predicates = [HasSVE] in {
11791179
// 16-element contiguous stores
11801180
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
11811181

1182-
defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR>;
1183-
defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
1184-
defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
1185-
defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
1186-
defm : pred_load<nxv8f16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
1187-
defm : pred_load<nxv4f32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
1188-
defm : pred_load<nxv2f64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
1189-
1190-
defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR>;
1191-
defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRR>;
1192-
defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR>;
1193-
defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR>;
11941182
}
11951183

11961184
let Predicates = [HasSVE2] in {

llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll

Lines changed: 0 additions & 88 deletions
This file was deleted.

llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)