Skip to content

Commit 3f5bf35

Browse files
[AArch64][SVE] Implement intrinsics for non-temporal loads & stores
Summary: Adds the following intrinsics: - llvm.aarch64.sve.ldnt1 - llvm.aarch64.sve.stnt1 This patch creates masked loads and stores with the MONonTemporal flag set when used with the intrinsics above. Reviewers: sdesmalen, paulwalker-arm, dancgr, mgudim, efriedma, rengolin Reviewed By: efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71000
1 parent bf4580b commit 3f5bf35

File tree

7 files changed

+311
-3
lines changed

7 files changed

+311
-3
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,20 @@ def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
768768
def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
769769

770770
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
771+
772+
class AdvSIMD_1Vec_PredLoad_Intrinsic
773+
: Intrinsic<[llvm_anyvector_ty],
774+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
775+
LLVMPointerTo<0>],
776+
[IntrReadMem, IntrArgMemOnly]>;
777+
778+
class AdvSIMD_1Vec_PredStore_Intrinsic
779+
: Intrinsic<[],
780+
[llvm_anyvector_ty,
781+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
782+
LLVMPointerTo<0>],
783+
[IntrArgMemOnly, NoCapture<2>]>;
784+
771785
class AdvSIMD_Merged1VectorArg_Intrinsic
772786
: Intrinsic<[llvm_anyvector_ty],
773787
[LLVMMatchType<0>,
@@ -1033,6 +1047,18 @@ class AdvSIMD_GatherLoad_VecTorBase_Intrinsic
10331047
],
10341048
[IntrReadMem, IntrArgMemOnly]>;
10351049

1050+
//
1051+
// Loads
1052+
//
1053+
1054+
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
1055+
1056+
//
1057+
// Stores
1058+
//
1059+
1060+
def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
1061+
10361062
//
10371063
// Integer arithmetic
10381064
//

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/ADT/Triple.h"
2626
#include "llvm/ADT/Twine.h"
2727
#include "llvm/Analysis/BlockFrequencyInfo.h"
28+
#include "llvm/Analysis/MemoryLocation.h"
2829
#include "llvm/Analysis/ProfileSummaryInfo.h"
2930
#include "llvm/Analysis/ValueTracking.h"
3031
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -6589,7 +6590,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
65896590
if (Align == 0) // Ensure that codegen never sees alignment 0
65906591
Align = getEVTAlignment(MemVT);
65916592

6592-
if (!Size)
6593+
if (!Size && MemVT.isScalableVector())
6594+
Size = MemoryLocation::UnknownSize;
6595+
else if (!Size)
65936596
Size = MemVT.getStoreSize();
65946597

65956598
MachineFunction &MF = getMachineFunction();

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8506,6 +8506,26 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
85068506
Info.align = Align(16);
85078507
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
85088508
return true;
8509+
case Intrinsic::aarch64_sve_ldnt1: {
8510+
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
8511+
Info.opc = ISD::INTRINSIC_W_CHAIN;
8512+
Info.memVT = MVT::getVT(PtrTy->getElementType());
8513+
Info.ptrVal = I.getArgOperand(1);
8514+
Info.offset = 0;
8515+
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
8516+
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
8517+
return true;
8518+
}
8519+
case Intrinsic::aarch64_sve_stnt1: {
8520+
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
8521+
Info.opc = ISD::INTRINSIC_W_CHAIN;
8522+
Info.memVT = MVT::getVT(PtrTy->getElementType());
8523+
Info.ptrVal = I.getArgOperand(2);
8524+
Info.offset = 0;
8525+
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
8526+
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
8527+
return true;
8528+
}
85098529
default:
85108530
break;
85118531
}
@@ -10871,6 +10891,48 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
1087110891
return NewST1;
1087210892
}
1087310893

10894+
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
10895+
SDLoc DL(N);
10896+
EVT VT = N->getValueType(0);
10897+
EVT PtrTy = N->getOperand(3).getValueType();
10898+
10899+
EVT LoadVT = VT;
10900+
if (VT.isFloatingPoint())
10901+
LoadVT = VT.changeTypeToInteger();
10902+
10903+
auto *MINode = cast<MemIntrinsicSDNode>(N);
10904+
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
10905+
SDValue L = DAG.getMaskedLoad(VT, DL, MINode->getChain(),
10906+
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
10907+
MINode->getOperand(2), PassThru,
10908+
MINode->getMemoryVT(), MINode->getMemOperand(),
10909+
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
10910+
10911+
if (VT.isFloatingPoint()) {
10912+
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
10913+
return DAG.getMergeValues(Ops, DL);
10914+
}
10915+
10916+
return L;
10917+
}
10918+
10919+
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
10920+
SDLoc DL(N);
10921+
10922+
SDValue Data = N->getOperand(2);
10923+
EVT DataVT = Data.getValueType();
10924+
EVT PtrTy = N->getOperand(4).getValueType();
10925+
10926+
if (DataVT.isFloatingPoint())
10927+
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
10928+
10929+
auto *MINode = cast<MemIntrinsicSDNode>(N);
10930+
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
10931+
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
10932+
MINode->getMemoryVT(), MINode->getMemOperand(),
10933+
ISD::UNINDEXED, false, false);
10934+
}
10935+
1087410936
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
1087510937
/// load store optimizer pass will merge them to store pair stores. This should
1087610938
/// be better than a movi to create the vector zero followed by a vector store
@@ -12087,6 +12149,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1208712149
case Intrinsic::aarch64_neon_st3lane:
1208812150
case Intrinsic::aarch64_neon_st4lane:
1208912151
return performNEONPostLDSTCombine(N, DCI, DAG);
12152+
case Intrinsic::aarch64_sve_ldnt1:
12153+
return performLDNT1Combine(N, DAG);
12154+
case Intrinsic::aarch64_sve_stnt1:
12155+
return performSTNT1Combine(N, DAG);
1209012156
case Intrinsic::aarch64_sve_ld1_gather:
1209112157
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
1209212158
case Intrinsic::aarch64_sve_ld1_gather_index:

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ def nonext_masked_load :
265265
PatFrag<(ops node:$ptr, node:$pred, node:$def),
266266
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
267267
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
268-
cast<MaskedLoadSDNode>(N)->isUnindexed();
268+
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
269+
!cast<MaskedLoadSDNode>(N)->isNonTemporal();
269270
}]>;
270271
// sign extending masked load fragments.
271272
def asext_masked_load :
@@ -313,12 +314,21 @@ def zext_masked_load_i32 :
313314
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
314315
}]>;
315316

317+
def non_temporal_load :
318+
PatFrag<(ops node:$ptr, node:$pred, node:$def),
319+
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
320+
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
321+
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
322+
cast<MaskedLoadSDNode>(N)->isNonTemporal();
323+
}]>;
324+
316325
// non-truncating masked store fragment.
317326
def nontrunc_masked_store :
318327
PatFrag<(ops node:$val, node:$ptr, node:$pred),
319328
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
320329
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
321-
cast<MaskedStoreSDNode>(N)->isUnindexed();
330+
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
331+
!cast<MaskedStoreSDNode>(N)->isNonTemporal();
322332
}]>;
323333
// truncating masked store fragments.
324334
def trunc_masked_store :
@@ -343,6 +353,14 @@ def trunc_masked_store_i32 :
343353
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
344354
}]>;
345355

356+
def non_temporal_store :
357+
PatFrag<(ops node:$val, node:$ptr, node:$pred),
358+
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
359+
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
360+
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
361+
cast<MaskedStoreSDNode>(N)->isNonTemporal();
362+
}]>;
363+
346364
// Node definitions.
347365
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
348366
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,18 @@ let Predicates = [HasSVE] in {
11641164
// 16-element contiguous stores
11651165
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
11661166

1167+
defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR>;
1168+
defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
1169+
defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
1170+
defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
1171+
defm : pred_load<nxv8f16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
1172+
defm : pred_load<nxv4f32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
1173+
defm : pred_load<nxv2f64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
1174+
1175+
defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR>;
1176+
defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRR>;
1177+
defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR>;
1178+
defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR>;
11671179
}
11681180

11691181
let Predicates = [HasSVE2] in {
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
;
4+
; LDNT1B
5+
;
6+
7+
define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
8+
; CHECK-LABEL: ldnt1b_i8:
9+
; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
10+
; CHECK-NEXT: ret
11+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pred,
12+
<vscale x 16 x i8>* %addr)
13+
ret <vscale x 16 x i8> %res
14+
}
15+
16+
;
17+
; LDNT1H
18+
;
19+
20+
define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
21+
; CHECK-LABEL: ldnt1h_i16:
22+
; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
23+
; CHECK-NEXT: ret
24+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %pred,
25+
<vscale x 8 x i16>* %addr)
26+
ret <vscale x 8 x i16> %res
27+
}
28+
29+
define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
30+
; CHECK-LABEL: ldnt1h_f16:
31+
; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
32+
; CHECK-NEXT: ret
33+
%res = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %pred,
34+
<vscale x 8 x half>* %addr)
35+
ret <vscale x 8 x half> %res
36+
}
37+
38+
;
39+
; LDNT1W
40+
;
41+
42+
define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
43+
; CHECK-LABEL: ldnt1w_i32:
44+
; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
45+
; CHECK-NEXT: ret
46+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %pred,
47+
<vscale x 4 x i32>* %addr)
48+
ret <vscale x 4 x i32> %res
49+
}
50+
51+
define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
52+
; CHECK-LABEL: ldnt1w_f32:
53+
; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
54+
; CHECK-NEXT: ret
55+
%res = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %pred,
56+
<vscale x 4 x float>* %addr)
57+
ret <vscale x 4 x float> %res
58+
}
59+
60+
;
61+
; LDNT1D
62+
;
63+
64+
define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
65+
; CHECK-LABEL: ldnt1d_i64:
66+
; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3]
67+
; CHECK-NEXT: ret
68+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %pred,
69+
<vscale x 2 x i64>* %addr)
70+
ret <vscale x 2 x i64> %res
71+
}
72+
73+
define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
74+
; CHECK-LABEL: ldnt1d_f64:
75+
; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3]
76+
; CHECK-NEXT: ret
77+
%res = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %pred,
78+
<vscale x 2 x double>* %addr)
79+
ret <vscale x 2 x double> %res
80+
}
81+
82+
declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*)
83+
declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*)
84+
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*)
85+
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*)
86+
declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*)
87+
declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*)
88+
declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
;
4+
; STNT1B
5+
;
6+
7+
define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
8+
; CHECK-LABEL: stnt1b_i8:
9+
; CHECK: stnt1b { z0.b }, p0, [x0, #0]
10+
; CHECK-NEXT: ret
11+
call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
12+
<vscale x 16 x i1> %pred,
13+
<vscale x 16 x i8>* %addr)
14+
ret void
15+
}
16+
17+
;
18+
; STNT1H
19+
;
20+
21+
define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
22+
; CHECK-LABEL: stnt1h_i16:
23+
; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
24+
; CHECK-NEXT: ret
25+
call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
26+
<vscale x 8 x i1> %pred,
27+
<vscale x 8 x i16>* %addr)
28+
ret void
29+
}
30+
31+
define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
32+
; CHECK-LABEL: stnt1h_f16:
33+
; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
34+
; CHECK-NEXT: ret
35+
call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
36+
<vscale x 8 x i1> %pred,
37+
<vscale x 8 x half>* %addr)
38+
ret void
39+
}
40+
41+
;
42+
; STNT1W
43+
;
44+
45+
define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
46+
; CHECK-LABEL: stnt1w_i32:
47+
; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
48+
; CHECK-NEXT: ret
49+
call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
50+
<vscale x 4 x i1> %pred,
51+
<vscale x 4 x i32>* %addr)
52+
ret void
53+
}
54+
55+
define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
56+
; CHECK-LABEL: stnt1w_f32:
57+
; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
58+
; CHECK-NEXT: ret
59+
call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
60+
<vscale x 4 x i1> %pred,
61+
<vscale x 4 x float>* %addr)
62+
ret void
63+
}
64+
65+
;
66+
; STNT1D
67+
;
68+
69+
define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
70+
; CHECK-LABEL: stnt1d_i64:
71+
; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
72+
; CHECK-NEXT: ret
73+
call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
74+
<vscale x 2 x i1> %pred,
75+
<vscale x 2 x i64>* %addr)
76+
ret void
77+
}
78+
79+
define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
80+
; CHECK-LABEL: stnt1d_f64:
81+
; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
82+
; CHECK-NEXT: ret
83+
call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
84+
<vscale x 2 x i1> %pred,
85+
<vscale x 2 x double>* %addr)
86+
ret void
87+
}
88+
89+
declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
90+
declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
91+
declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
92+
declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
93+
declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
94+
declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
95+
declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)

0 commit comments

Comments
 (0)