Skip to content

Commit dbe8def

Browse files
authored
[AArch64] Lower mathlib call ldexp into fscale when sve is enabled (#67552)
The function of 'fscale' is equivalent to mathlib call ldexp, but has better performance. This patch lowers ldexp into fscale when sve is enabled.
1 parent 4a540ce commit dbe8def

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,6 +1642,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16421642

16431643
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
16441644

1645+
if (Subtarget->hasSVE()) {
1646+
setOperationAction(ISD::FLDEXP, MVT::f64, Custom);
1647+
setOperationAction(ISD::FLDEXP, MVT::f32, Custom);
1648+
setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
1649+
}
1650+
16451651
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
16461652

16471653
IsStrictFPEnabled = true;
@@ -5895,6 +5901,49 @@ static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
58955901
return SDValue();
58965902
}
58975903

5904+
static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
5905+
SDValue X = Op.getOperand(0);
5906+
EVT XScalarTy = X.getValueType();
5907+
SDValue Exp = Op.getOperand(1);
5908+
5909+
SDLoc DL(Op);
5910+
EVT XVT, ExpVT;
5911+
switch (Op.getSimpleValueType().SimpleTy) {
5912+
default:
5913+
return SDValue();
5914+
case MVT::f16:
5915+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
5916+
[[fallthrough]];
5917+
case MVT::f32:
5918+
XVT = MVT::nxv4f32;
5919+
ExpVT = MVT::nxv4i32;
5920+
break;
5921+
case MVT::f64:
5922+
XVT = MVT::nxv2f64;
5923+
ExpVT = MVT::nxv2i64;
5924+
Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Exp);
5925+
break;
5926+
}
5927+
5928+
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
5929+
SDValue VX =
5930+
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
5931+
SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
5932+
DAG.getUNDEF(ExpVT), Exp, Zero);
5933+
SDValue VPg = getPTrue(DAG, DL, XVT.changeVectorElementType(MVT::i1),
5934+
AArch64SVEPredPattern::all);
5935+
SDValue FScale =
5936+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XVT,
5937+
DAG.getConstant(Intrinsic::aarch64_sve_fscale, DL, MVT::i64),
5938+
VPg, VX, VExp);
5939+
SDValue Final =
5940+
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), FScale, Zero);
5941+
if (X.getValueType() != XScalarTy)
5942+
Final = DAG.getNode(ISD::FP_ROUND, DL, XScalarTy, Final,
5943+
DAG.getIntPtrConstant(1, SDLoc(Op)));
5944+
return Final;
5945+
}
5946+
58985947
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
58995948
SelectionDAG &DAG) const {
59005949
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6215,6 +6264,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
62156264
case ISD::FSHL:
62166265
case ISD::FSHR:
62176266
return LowerFunnelShift(Op, DAG);
6267+
case ISD::FLDEXP:
6268+
return LowerFLDEXP(Op, DAG);
62186269
}
62196270
}
62206271

llvm/test/CodeGen/AArch64/ldexp.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
3+
4+
define double @testExp(double %val, i32 %a) {
5+
; CHECK-LABEL: testExp:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ptrue p0.d
8+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
9+
; CHECK-NEXT: sxtw x8, w0
10+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
11+
; CHECK-NEXT: fmov d1, x8
12+
; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
13+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
14+
; CHECK-NEXT: ret
15+
entry:
16+
%call = tail call fast double @ldexp(double %val, i32 %a)
17+
ret double %call
18+
}
19+
20+
declare double @ldexp(double, i32) memory(none)
21+
22+
define float @testExpf(float %val, i32 %a) {
23+
; CHECK-LABEL: testExpf:
24+
; CHECK: // %bb.0: // %entry
25+
; CHECK-NEXT: ptrue p0.s
26+
; CHECK-NEXT: fmov s1, w0
27+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
28+
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
29+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
30+
; CHECK-NEXT: ret
31+
entry:
32+
%call = tail call fast float @ldexpf(float %val, i32 %a)
33+
ret float %call
34+
}
35+
36+
declare float @ldexpf(float, i32) memory(none)
37+
38+
define fp128 @testExpl(fp128 %val, i32 %a) {
39+
; CHECK-LABEL: testExpl:
40+
; CHECK: // %bb.0: // %entry
41+
; CHECK-NEXT: b ldexpl
42+
entry:
43+
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
44+
ret fp128 %call
45+
}
46+
47+
declare fp128 @ldexpl(fp128, i32) memory(none)
48+
49+
define half @testExpf16(half %val, i32 %a) {
50+
; CHECK-LABEL: testExpf16:
51+
; CHECK: // %bb.0: // %entry
52+
; CHECK-NEXT: ptrue p0.s
53+
; CHECK-NEXT: fcvt s0, h0
54+
; CHECK-NEXT: fmov s1, w0
55+
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
56+
; CHECK-NEXT: fcvt h0, s0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
60+
ret half %0
61+
}
62+
63+
declare half @llvm.ldexp.f16.i32(half, i32) memory(none)

0 commit comments

Comments
 (0)