Skip to content

Commit 58ceb81

Browse files
author
Danilo Carvalho Grael
committed
[SVE] Add SVE2 patterns for unpredicated multiply instructions
Summary: Add patterns for SVE2 unpredicated multiply instructions: - mul, smulh, umulh, pmul, sqdmulh, sqrdmulh Reviewers: sdesmalen, huntergr, efriedma, c-rhodes, kmclaughlin, rengolin Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits, amehsan Tags: #llvm Differential Revision: https://reviews.llvm.org/D72799
1 parent 8d1ed29 commit 58ceb81

File tree

8 files changed

+428
-49
lines changed

8 files changed

+428
-49
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,10 @@ def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic;
11921192
def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic;
11931193
def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic;
11941194

1195+
def int_aarch64_sve_pmul : AdvSIMD_2VectorArg_Intrinsic;
1196+
def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic;
1197+
def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic;
1198+
11951199
def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic;
11961200
def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic;
11971201
def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,21 @@ let Predicates = [HasSVE] in {
158158
defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", umax>;
159159
defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", umin>;
160160

161-
defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
162-
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
161+
defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
162+
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
163163
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>;
164164
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>;
165165

166+
// Add unpredicated alternative for the mul instruction.
167+
def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2),
168+
(MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>;
169+
def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2),
170+
(MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>;
171+
def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2),
172+
(MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>;
173+
def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
174+
(MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
175+
166176
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>;
167177
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>;
168178
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>;
@@ -1405,15 +1415,32 @@ let Predicates = [HasSVE2] in {
14051415
defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">;
14061416

14071417
// SVE2 signed saturating doubling multiply high (unpredicated)
1408-
defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh">;
1409-
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">;
1418+
defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>;
1419+
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
14101420

14111421
// SVE2 integer multiply vectors (unpredicated)
1412-
defm MUL_ZZZ : sve2_int_mul<0b000, "mul">;
1413-
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
1414-
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
1415-
def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
1416-
1422+
defm MUL_ZZZ : sve2_int_mul<0b000, "mul", mul>;
1423+
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
1424+
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
1425+
defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
1426+
1427+
// Add patterns for unpredicated version of smulh and umulh.
1428+
def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
1429+
(SMULH_ZZZ_B $Op1, $Op2)>;
1430+
def : Pat<(nxv8i16 (int_aarch64_sve_smulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)),
1431+
(SMULH_ZZZ_H $Op1, $Op2)>;
1432+
def : Pat<(nxv4i32 (int_aarch64_sve_smulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)),
1433+
(SMULH_ZZZ_S $Op1, $Op2)>;
1434+
def : Pat<(nxv2i64 (int_aarch64_sve_smulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
1435+
(SMULH_ZZZ_D $Op1, $Op2)>;
1436+
def : Pat<(nxv16i8 (int_aarch64_sve_umulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
1437+
(UMULH_ZZZ_B $Op1, $Op2)>;
1438+
def : Pat<(nxv8i16 (int_aarch64_sve_umulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)),
1439+
(UMULH_ZZZ_H $Op1, $Op2)>;
1440+
def : Pat<(nxv4i32 (int_aarch64_sve_umulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)),
1441+
(UMULH_ZZZ_S $Op1, $Op2)>;
1442+
def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
1443+
(UMULH_ZZZ_D $Op1, $Op2)>;
14171444
// SVE2 complex integer dot product (indexed)
14181445
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
14191446

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2621,11 +2621,22 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
26212621
let Inst{4-0} = Zd;
26222622
}
26232623

2624-
multiclass sve2_int_mul<bits<3> opc, string asm> {
2624+
multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
26252625
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
26262626
def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
26272627
def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
26282628
def _D : sve2_int_mul<0b11, opc, asm, ZPR64>;
2629+
2630+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
2631+
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
2632+
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
2633+
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
2634+
}
2635+
2636+
multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
2637+
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
2638+
2639+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
26292640
}
26302641

26312642
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,3 +446,39 @@ define <vscale x 2 x i64> @mul_i64_pos(<vscale x 2 x i64> %a) {
446446
%res = mul <vscale x 2 x i64> %a, %splat
447447
ret <vscale x 2 x i64> %res
448448
}
449+
450+
define <vscale x 8 x i16> @mul_i16_range(<vscale x 8 x i16> %a) {
451+
; CHECK-LABEL: mul_i16_range
452+
; CHECK: mov w[[W:[0-9]+]], #255
453+
; CHECK-NEXT: mov z1.h, w[[W]]
454+
; CHECK: ptrue p0.h
455+
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
456+
%elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
457+
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
458+
%res = mul <vscale x 8 x i16> %a, %splat
459+
ret <vscale x 8 x i16> %res
460+
}
461+
462+
define <vscale x 4 x i32> @mul_i32_range(<vscale x 4 x i32> %a) {
463+
; CHECK-LABEL: mul_i32_range
464+
; CHECK: mov w[[W:[0-9]+]], #255
465+
; CHECK-NEXT: mov z1.s, w[[W]]
466+
; CHECK: ptrue p0.s
467+
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
468+
%elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
469+
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
470+
%res = mul <vscale x 4 x i32> %a, %splat
471+
ret <vscale x 4 x i32> %res
472+
}
473+
474+
define <vscale x 2 x i64> @mul_i64_range(<vscale x 2 x i64> %a) {
475+
; CHECK-LABEL: mul_i64_range
476+
; CHECK: mov w[[W:[0-9]+]], #255
477+
; CHECK-NEXT: mov z1.d, x[[W]]
478+
; CHECK: ptrue p0.d
479+
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
480+
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
481+
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
482+
%res = mul <vscale x 2 x i64> %a, %splat
483+
ret <vscale x 2 x i64> %res
484+
}

llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
4545
; CHECK: smulh z0.b, p0/m, z0.b, z1.b
4646
; CHECK-NEXT: ret
4747
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %pg,
48-
<vscale x 16 x i8> %a,
49-
<vscale x 16 x i8> %b)
48+
<vscale x 16 x i8> %a,
49+
<vscale x 16 x i8> %b)
5050
ret <vscale x 16 x i8> %out
5151
}
5252

@@ -55,8 +55,8 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
5555
; CHECK: smulh z0.h, p0/m, z0.h, z1.h
5656
; CHECK-NEXT: ret
5757
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %pg,
58-
<vscale x 8 x i16> %a,
59-
<vscale x 8 x i16> %b)
58+
<vscale x 8 x i16> %a,
59+
<vscale x 8 x i16> %b)
6060
ret <vscale x 8 x i16> %out
6161
}
6262

@@ -65,8 +65,8 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
6565
; CHECK: smulh z0.s, p0/m, z0.s, z1.s
6666
; CHECK-NEXT: ret
6767
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %pg,
68-
<vscale x 4 x i32> %a,
69-
<vscale x 4 x i32> %b)
68+
<vscale x 4 x i32> %a,
69+
<vscale x 4 x i32> %b)
7070
ret <vscale x 4 x i32> %out
7171
}
7272

@@ -75,8 +75,8 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
7575
; CHECK: smulh z0.d, p0/m, z0.d, z1.d
7676
; CHECK-NEXT: ret
7777
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %pg,
78-
<vscale x 2 x i64> %a,
79-
<vscale x 2 x i64> %b)
78+
<vscale x 2 x i64> %a,
79+
<vscale x 2 x i64> %b)
8080
ret <vscale x 2 x i64> %out
8181
}
8282

@@ -85,8 +85,8 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %
8585
; CHECK: umulh z0.b, p0/m, z0.b, z1.b
8686
; CHECK-NEXT: ret
8787
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %pg,
88-
<vscale x 16 x i8> %a,
89-
<vscale x 16 x i8> %b)
88+
<vscale x 16 x i8> %a,
89+
<vscale x 16 x i8> %b)
9090
ret <vscale x 16 x i8> %out
9191
}
9292

@@ -95,8 +95,8 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %
9595
; CHECK: umulh z0.h, p0/m, z0.h, z1.h
9696
; CHECK-NEXT: ret
9797
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %pg,
98-
<vscale x 8 x i16> %a,
99-
<vscale x 8 x i16> %b)
98+
<vscale x 8 x i16> %a,
99+
<vscale x 8 x i16> %b)
100100
ret <vscale x 8 x i16> %out
101101
}
102102

@@ -105,8 +105,8 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %
105105
; CHECK: umulh z0.s, p0/m, z0.s, z1.s
106106
; CHECK-NEXT: ret
107107
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg,
108-
<vscale x 4 x i32> %a,
109-
<vscale x 4 x i32> %b)
108+
<vscale x 4 x i32> %a,
109+
<vscale x 4 x i32> %b)
110110
ret <vscale x 4 x i32> %out
111111
}
112112

@@ -115,8 +115,8 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
115115
; CHECK: umulh z0.d, p0/m, z0.d, z1.d
116116
; CHECK-NEXT: ret
117117
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %pg,
118-
<vscale x 2 x i64> %a,
119-
<vscale x 2 x i64> %b)
118+
<vscale x 2 x i64> %a,
119+
<vscale x 2 x i64> %b)
120120
ret <vscale x 2 x i64> %out
121121
}
122122

llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm-2.ll

Lines changed: 0 additions & 12 deletions
This file was deleted.

llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm.ll

Lines changed: 0 additions & 11 deletions
This file was deleted.

0 commit comments

Comments
 (0)