Skip to content

Commit 461f48e

Browse files
committed
Use PatFrag rather than Pat, move fixed-length SVE tests, and test GlobalISel
1 parent daa155e commit 461f48e

File tree

6 files changed

+258
-121
lines changed

6 files changed

+258
-121
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12992,9 +12992,6 @@ SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
1299212992
//
1299312993
// partial_reduce_fmla(acc, fmul(fpext(a), fpext(b)), splat(1.0))
1299412994
// -> partial_reduce_fmla(acc, a, b)
12995-
//
12996-
// partial_reduce_fmla(acc, fmul(fpext(x), splat(C)), splat(1.0))
12997-
// -> partial_reduce_fmla(acc, x, C)
1299812995
SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
1299912996
SDLoc DL(N);
1300012997
auto *Context = DAG.getContext();
@@ -13118,20 +13115,17 @@ SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
1311813115
SDValue Op1 = N->getOperand(1);
1311913116
SDValue Op2 = N->getOperand(2);
1312013117

13121-
if (!(N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA &&
13122-
llvm::isOneOrOneSplatFP(Op2)) &&
13123-
!llvm::isOneOrOneSplat(Op2))
13118+
if (!llvm::isOneOrOneSplat(Op2) && !llvm::isOneOrOneSplatFP(Op2))
1312413119
return SDValue();
1312513120

1312613121
unsigned Op1Opcode = Op1.getOpcode();
1312713122
if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
1312813123
return SDValue();
1312913124

13130-
bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
13125+
bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
1313113126
bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
1313213127
EVT AccElemVT = Acc.getValueType().getVectorElementType();
13133-
if (N->getOpcode() != ISD::PARTIAL_REDUCE_FMLA &&
13134-
Op1IsSigned != NodeIsSigned &&
13128+
if (Op1IsSigned != NodeIsSigned &&
1313513129
Op1.getValueType().getVectorElementType() != AccElemVT)
1313613130
return SDValue();
1313713131

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,12 +1921,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
19211921
setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv8i16, Legal);
19221922
setPartialReduceMLAAction(MLAOps, MVT::nxv8i16, MVT::nxv16i8, Legal);
19231923
}
1924-
}
19251924

1926-
// Handle floating-point partial reduction
1927-
if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1928-
static const unsigned FMLAOps[] = {ISD::PARTIAL_REDUCE_FMLA};
1929-
setPartialReduceMLAAction(FMLAOps, MVT::nxv4f32, MVT::nxv8f16, Legal);
1925+
// Handle floating-point partial reduction
1926+
if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1927+
setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_FMLA, MVT::nxv4f32,
1928+
MVT::nxv8f16, Legal);
1929+
}
19301930
}
19311931

19321932
// Handle non-aliasing elements mask

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,11 @@ def AArch64fclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
375375
node:$Zm)
376376
]>;
377377

378+
def AArch64fdot : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm),
379+
[(int_aarch64_sve_fdot_x2 node:$Zd, node:$Zn, node:$Zm),
380+
(partial_reduce_fmla node:$Zd, node:$Zn, node:$Zm)
381+
]>;
382+
378383
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
379384
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
380385
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>, SDTCisSameAs<0,3>
@@ -4251,7 +4256,7 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
42514256
let Predicates = [HasSVE2p1_or_SME2] in {
42524257
defm FCLAMP_ZZZ : sve_fp_clamp<"fclamp", AArch64fclamp>;
42534258

4254-
defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
4259+
defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, AArch64fdot>;
42554260
defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
42564261

42574262
defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9457,7 +9457,6 @@ multiclass sve_float_dot<bit bf, bit o2, ZPRRegOp dst_ty, ZPRRegOp src_ty,
94579457
string asm, ValueType InVT, SDPatternOperator op> {
94589458
def NAME : sve_float_dot<bf, o2, dst_ty, src_ty, asm>;
94599459
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>;
9460-
def : SVE_3_Op_Pat<nxv4f32, partial_reduce_fmla, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>;
94619460
}
94629461

94639462
multiclass sve_fp8_dot<bit bf, ZPRRegOp dstrc, string asm, ValueType vt,

llvm/test/CodeGen/AArch64/sve2p1-fdot.ll

Lines changed: 14 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
22
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
3+
; RUN: llc -global-isel -global-isel-abort=2 -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
34
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
5+
; RUN: llc -global-isel -global-isel-abort=2 -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
46

5-
define <vscale x 4 x float> @fdot_wide_vl128(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
6-
; SVE2-LABEL: fdot_wide_vl128:
7+
define <vscale x 4 x float> @fdot_wide_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
8+
; SVE2-LABEL: fdot_wide_nxv4f32:
79
; SVE2: // %bb.0: // %entry
810
; SVE2-NEXT: uunpklo z3.s, z1.h
911
; SVE2-NEXT: uunpklo z4.s, z2.h
@@ -20,7 +22,7 @@ define <vscale x 4 x float> @fdot_wide_vl128(<vscale x 4 x float> %acc, <vscale
2022
; SVE2-NEXT: fadd z0.s, z0.s, z1.s
2123
; SVE2-NEXT: ret
2224
;
23-
; SVE2P1-LABEL: fdot_wide_vl128:
25+
; SVE2P1-LABEL: fdot_wide_nxv4f32:
2426
; SVE2P1: // %bb.0: // %entry
2527
; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
2628
; SVE2P1-NEXT: ret
@@ -32,8 +34,8 @@ entry:
3234
ret <vscale x 4 x float> %partial.reduce
3335
}
3436

35-
define <vscale x 4 x float> @fdot_splat_vl128(<vscale x 4 x float> %acc, <vscale x 8 x half> %a) {
36-
; SVE2-LABEL: fdot_splat_vl128:
37+
define <vscale x 4 x float> @fdot_splat_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a) {
38+
; SVE2-LABEL: fdot_splat_nxv4f32:
3739
; SVE2: // %bb.0: // %entry
3840
; SVE2-NEXT: uunpklo z2.s, z1.h
3941
; SVE2-NEXT: ptrue p0.s
@@ -44,7 +46,7 @@ define <vscale x 4 x float> @fdot_splat_vl128(<vscale x 4 x float> %acc, <vscale
4446
; SVE2-NEXT: fadd z0.s, z0.s, z1.s
4547
; SVE2-NEXT: ret
4648
;
47-
; SVE2P1-LABEL: fdot_splat_vl128:
49+
; SVE2P1-LABEL: fdot_splat_nxv4f32:
4850
; SVE2P1: // %bb.0: // %entry
4951
; SVE2P1-NEXT: fmov z2.h, #1.00000000
5052
; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
@@ -55,101 +57,8 @@ entry:
5557
ret <vscale x 4 x float> %partial.reduce
5658
}
5759

58-
define void @fdot_wide_vl256(ptr %accptr, ptr %aptr, ptr %bptr) vscale_range(2,2) {
59-
; SVE2-LABEL: fdot_wide_vl256:
60-
; SVE2: // %bb.0: // %entry
61-
; SVE2-NEXT: ptrue p0.s
62-
; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
63-
; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
64-
; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, #1, mul vl]
65-
; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, #1, mul vl]
66-
; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
67-
; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
68-
; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
69-
; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
70-
; SVE2-NEXT: fmul z0.s, z0.s, z1.s
71-
; SVE2-NEXT: ldr z1, [x0]
72-
; SVE2-NEXT: fmul z2.s, z2.s, z3.s
73-
; SVE2-NEXT: fadd z0.s, z1.s, z0.s
74-
; SVE2-NEXT: fadd z0.s, z0.s, z2.s
75-
; SVE2-NEXT: str z0, [x0]
76-
; SVE2-NEXT: ret
77-
;
78-
; SVE2P1-LABEL: fdot_wide_vl256:
79-
; SVE2P1: // %bb.0: // %entry
80-
; SVE2P1-NEXT: ldr z0, [x0]
81-
; SVE2P1-NEXT: ldr z1, [x1]
82-
; SVE2P1-NEXT: ldr z2, [x2]
83-
; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
84-
; SVE2P1-NEXT: str z0, [x0]
85-
; SVE2P1-NEXT: ret
86-
entry:
87-
%acc = load <8 x float>, ptr %accptr
88-
%a = load <16 x half>, ptr %aptr
89-
%b = load <16 x half>, ptr %bptr
90-
%a.wide = fpext <16 x half> %a to <16 x float>
91-
%b.wide = fpext <16 x half> %b to <16 x float>
92-
%mult = fmul <16 x float> %a.wide, %b.wide
93-
%partial.reduce = call <8 x float> @llvm.vector.partial.reduce.fadd(<8 x float> %acc, <16 x float> %mult)
94-
store <8 x float> %partial.reduce, ptr %accptr
95-
ret void
96-
}
97-
98-
define <4 x float> @fixed_fdot_wide(<4 x float> %acc, <8 x half> %a, <8 x half> %b) {
99-
; CHECK-LABEL: fixed_fdot_wide:
100-
; CHECK: // %bb.0: // %entry
101-
; CHECK-NEXT: fcvtl v3.4s, v1.4h
102-
; CHECK-NEXT: fcvtl v4.4s, v2.4h
103-
; CHECK-NEXT: fcvtl2 v1.4s, v1.8h
104-
; CHECK-NEXT: fcvtl2 v2.4s, v2.8h
105-
; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s
106-
; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
107-
; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
108-
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
109-
; CHECK-NEXT: ret
110-
entry:
111-
%a.wide = fpext <8 x half> %a to <8 x float>
112-
%b.wide = fpext <8 x half> %b to <8 x float>
113-
%mult = fmul <8 x float> %a.wide, %b.wide
114-
%partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %mult)
115-
ret <4 x float> %partial.reduce
116-
}
117-
118-
define <8 x half> @partial_reduce_half(<8 x half> %acc, <16 x half> %a) {
119-
; CHECK-LABEL: partial_reduce_half:
120-
; CHECK: // %bb.0: // %entry
121-
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
122-
; CHECK-NEXT: fadd v0.8h, v0.8h, v2.8h
123-
; CHECK-NEXT: ret
124-
entry:
125-
%partial.reduce = call <8 x half> @llvm.vector.partial.reduce.fadd(<8 x half> %acc, <16 x half> %a)
126-
ret <8 x half> %partial.reduce
127-
}
128-
129-
define <4 x float> @partial_reduce_float(<4 x float> %acc, <8 x float> %a) {
130-
; CHECK-LABEL: partial_reduce_float:
131-
; CHECK: // %bb.0: // %entry
132-
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
133-
; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
134-
; CHECK-NEXT: ret
135-
entry:
136-
%partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %a)
137-
ret <4 x float> %partial.reduce
138-
}
139-
140-
define <2 x double> @partial_reduce_double(<2 x double> %acc, <4 x double> %a) {
141-
; CHECK-LABEL: partial_reduce_double:
142-
; CHECK: // %bb.0: // %entry
143-
; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
144-
; CHECK-NEXT: fadd v0.2d, v0.2d, v2.2d
145-
; CHECK-NEXT: ret
146-
entry:
147-
%partial.reduce = call <2 x double> @llvm.vector.partial.reduce.fadd(<2 x double> %acc, <4 x double> %a)
148-
ret <2 x double> %partial.reduce
149-
}
150-
151-
define <vscale x 8 x half> @partial_reduce_half_vl128(<vscale x 8 x half> %acc, <vscale x 16 x half> %a) {
152-
; CHECK-LABEL: partial_reduce_half_vl128:
60+
define <vscale x 8 x half> @partial_reduce_nxv8f16(<vscale x 8 x half> %acc, <vscale x 16 x half> %a) {
61+
; CHECK-LABEL: partial_reduce_nxv8f16:
15362
; CHECK: // %bb.0: // %entry
15463
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
15564
; CHECK-NEXT: fadd z0.h, z0.h, z2.h
@@ -159,8 +68,8 @@ entry:
15968
ret <vscale x 8 x half> %partial.reduce
16069
}
16170

162-
define <vscale x 4 x float> @partial_reduce_float_vl128(<vscale x 4 x float> %acc, <vscale x 8 x float> %a) {
163-
; CHECK-LABEL: partial_reduce_float_vl128:
71+
define <vscale x 4 x float> @partial_reduce_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x float> %a) {
72+
; CHECK-LABEL: partial_reduce_nxv4f32:
16473
; CHECK: // %bb.0: // %entry
16574
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
16675
; CHECK-NEXT: fadd z0.s, z0.s, z2.s
@@ -170,8 +79,8 @@ entry:
17079
ret <vscale x 4 x float> %partial.reduce
17180
}
17281

173-
define <vscale x 2 x double> @partial_reduce_double_vl128(<vscale x 2 x double> %acc, <vscale x 4 x double> %a) {
174-
; CHECK-LABEL: partial_reduce_double_vl128:
82+
define <vscale x 2 x double> @partial_reduce_nxv2f64(<vscale x 2 x double> %acc, <vscale x 4 x double> %a) {
83+
; CHECK-LABEL: partial_reduce_nxv2f64:
17584
; CHECK: // %bb.0: // %entry
17685
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
17786
; CHECK-NEXT: fadd z0.d, z0.d, z2.d

0 commit comments

Comments
 (0)