Skip to content

Commit 1abba52

Browse files
committed
[PowerPC] Add missing handling for half precision
The fix for PR39865 took care of some of the handling for half precision but it missed a number of issues that still exist. This patch fixes the remaining issues that cause crashes in the PPC back end. Fixes: https://bugs.llvm.org/show_bug.cgi?id=45776 Differential revision: https://reviews.llvm.org/D79283 (cherry picked from commit 1a493b0)
1 parent 3428405 commit 1abba52

File tree

6 files changed

+104
-177
lines changed

6 files changed

+104
-177
lines changed

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
959959
let IsLoad = 1;
960960
let MemoryVT = i32;
961961
}
962+
def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
963+
let IsLoad = 1;
964+
let MemoryVT = f16;
965+
}
962966
def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
963967
let IsLoad = 1;
964968
let MemoryVT = f32;
@@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
10941098
let IsStore = 1;
10951099
let MemoryVT = i32;
10961100
}
1101+
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
1102+
(truncstore node:$val, node:$ptr)> {
1103+
let IsStore = 1;
1104+
let MemoryVT = f16;
1105+
}
10971106
def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
10981107
(truncstore node:$val, node:$ptr)> {
10991108
let IsStore = 1;

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
167167
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
168168
}
169169

170+
if (Subtarget.isISA3_0()) {
171+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
172+
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
173+
setTruncStoreAction(MVT::f64, MVT::f16, Legal);
174+
setTruncStoreAction(MVT::f32, MVT::f16, Legal);
175+
} else {
176+
// No extending loads from f16 or HW conversions back and forth.
177+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
178+
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
179+
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
180+
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
181+
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
182+
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
183+
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
184+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
185+
}
186+
170187
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
171188

172189
// PowerPC has pre-inc load and store's.
@@ -10361,6 +10378,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
1036110378
assert(Op.getOpcode() == ISD::FP_EXTEND &&
1036210379
"Should only be called for ISD::FP_EXTEND");
1036310380

10381+
// FIXME: handle extends from half precision float vectors on P9.
1036410382
// We only want to custom lower an extend from v2f32 to v2f64.
1036510383
if (Op.getValueType() != MVT::v2f64 ||
1036610384
Op.getOperand(0).getValueType() != MVT::v2f32)
@@ -10574,6 +10592,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
1057410592
case ISD::BITCAST:
1057510593
// Don't handle bitcast here.
1057610594
return;
10595+
case ISD::FP_EXTEND:
10596+
SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10597+
if (Lowered)
10598+
Results.push_back(Lowered);
10599+
return;
1057710600
}
1057810601
}
1057910602

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ namespace llvm {
637637
/// then the VPERM for the shuffle. All in all a very slow sequence.
638638
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
639639
const override {
640-
if (VT.getScalarSizeInBits() % 8 == 0)
640+
if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
641641
return TypeWidenVector;
642642
return TargetLoweringBase::getPreferredVectorAction(VT);
643643
}

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
33433343
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
33443344
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
33453345

3346+
// Load/convert and convert/store patterns for f16.
3347+
def : Pat<(f64 (extloadf16 xoaddr:$src)),
3348+
(f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
3349+
def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
3350+
(STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
3351+
def : Pat<(f32 (extloadf16 xoaddr:$src)),
3352+
(f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
3353+
def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
3354+
(STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
3355+
def : Pat<(f64 (f16_to_fp i32:$A)),
3356+
(f64 (XSCVHPDP (MTVSRWZ $A)))>;
3357+
def : Pat<(f32 (f16_to_fp i32:$A)),
3358+
(f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
3359+
def : Pat<(i32 (fp_to_f16 f32:$A)),
3360+
(i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
3361+
def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
3362+
33463363
let Predicates = [IsBigEndian, HasP9Vector] in {
33473364
// Scalar stores of i8
33483365
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),

llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,46 +11,34 @@
1111
define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
1212
; P9LE-LABEL: test_liwzx1:
1313
; P9LE: # %bb.0:
14-
; P9LE-NEXT: lfiwzx f0, 0, r3
15-
; P9LE-NEXT: lfiwzx f1, 0, r4
16-
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
17-
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
18-
; P9LE-NEXT: xvaddsp vs0, vs0, vs1
19-
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
20-
; P9LE-NEXT: stfiwx f0, 0, r5
14+
; P9LE-NEXT: lfs f0, 0(r3)
15+
; P9LE-NEXT: lfs f1, 0(r4)
16+
; P9LE-NEXT: xsaddsp f0, f0, f1
17+
; P9LE-NEXT: stfs f0, 0(r5)
2118
; P9LE-NEXT: blr
2219
;
2320
; P9BE-LABEL: test_liwzx1:
2421
; P9BE: # %bb.0:
25-
; P9BE-NEXT: lfiwzx f0, 0, r3
26-
; P9BE-NEXT: lfiwzx f1, 0, r4
27-
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
28-
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
29-
; P9BE-NEXT: xvaddsp vs0, vs0, vs1
30-
; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
31-
; P9BE-NEXT: stfiwx f0, 0, r5
22+
; P9BE-NEXT: lfs f0, 0(r3)
23+
; P9BE-NEXT: lfs f1, 0(r4)
24+
; P9BE-NEXT: xsaddsp f0, f0, f1
25+
; P9BE-NEXT: stfs f0, 0(r5)
3226
; P9BE-NEXT: blr
3327
;
3428
; P8LE-LABEL: test_liwzx1:
3529
; P8LE: # %bb.0:
36-
; P8LE-NEXT: lfiwzx f0, 0, r3
37-
; P8LE-NEXT: lfiwzx f1, 0, r4
38-
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
39-
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
40-
; P8LE-NEXT: xvaddsp vs0, vs0, vs1
41-
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
42-
; P8LE-NEXT: stfiwx f0, 0, r5
30+
; P8LE-NEXT: lfsx f0, 0, r3
31+
; P8LE-NEXT: lfsx f1, 0, r4
32+
; P8LE-NEXT: xsaddsp f0, f0, f1
33+
; P8LE-NEXT: stfsx f0, 0, r5
4334
; P8LE-NEXT: blr
4435
;
4536
; P8BE-LABEL: test_liwzx1:
4637
; P8BE: # %bb.0:
47-
; P8BE-NEXT: lfiwzx f0, 0, r3
48-
; P8BE-NEXT: lfiwzx f1, 0, r4
49-
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
50-
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
51-
; P8BE-NEXT: xvaddsp vs0, vs0, vs1
52-
; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3
53-
; P8BE-NEXT: stfiwx f0, 0, r5
38+
; P8BE-NEXT: lfsx f0, 0, r3
39+
; P8BE-NEXT: lfsx f1, 0, r4
40+
; P8BE-NEXT: xsaddsp f0, f0, f1
41+
; P8BE-NEXT: stfsx f0, 0, r5
5442
; P8BE-NEXT: blr
5543

5644

@@ -65,50 +53,38 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
6553
define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
6654
; P9LE-LABEL: test_liwzx2:
6755
; P9LE: # %bb.0:
68-
; P9LE-NEXT: lfiwzx f0, 0, r3
69-
; P9LE-NEXT: lfiwzx f1, 0, r4
70-
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
71-
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
72-
; P9LE-NEXT: xvsubsp vs0, vs0, vs1
73-
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2
56+
; P9LE-NEXT: lfs f0, 0(r3)
7457
; P9LE-NEXT: mr r3, r5
75-
; P9LE-NEXT: stfiwx f0, 0, r5
58+
; P9LE-NEXT: lfs f1, 0(r4)
59+
; P9LE-NEXT: xssubsp f0, f0, f1
60+
; P9LE-NEXT: stfs f0, 0(r5)
7661
; P9LE-NEXT: blr
7762
;
7863
; P9BE-LABEL: test_liwzx2:
7964
; P9BE: # %bb.0:
80-
; P9BE-NEXT: lfiwzx f0, 0, r3
81-
; P9BE-NEXT: lfiwzx f1, 0, r4
82-
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
83-
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
84-
; P9BE-NEXT: xvsubsp vs0, vs0, vs1
85-
; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
65+
; P9BE-NEXT: lfs f0, 0(r3)
8666
; P9BE-NEXT: mr r3, r5
87-
; P9BE-NEXT: stfiwx f0, 0, r5
67+
; P9BE-NEXT: lfs f1, 0(r4)
68+
; P9BE-NEXT: xssubsp f0, f0, f1
69+
; P9BE-NEXT: stfs f0, 0(r5)
8870
; P9BE-NEXT: blr
8971
;
9072
; P8LE-LABEL: test_liwzx2:
9173
; P8LE: # %bb.0:
92-
; P8LE-NEXT: lfiwzx f0, 0, r3
93-
; P8LE-NEXT: lfiwzx f1, 0, r4
74+
; P8LE-NEXT: lfsx f0, 0, r3
75+
; P8LE-NEXT: lfsx f1, 0, r4
9476
; P8LE-NEXT: mr r3, r5
95-
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
96-
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
97-
; P8LE-NEXT: xvsubsp vs0, vs0, vs1
98-
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2
99-
; P8LE-NEXT: stfiwx f0, 0, r5
77+
; P8LE-NEXT: xssubsp f0, f0, f1
78+
; P8LE-NEXT: stfsx f0, 0, r5
10079
; P8LE-NEXT: blr
10180
;
10281
; P8BE-LABEL: test_liwzx2:
10382
; P8BE: # %bb.0:
104-
; P8BE-NEXT: lfiwzx f0, 0, r3
105-
; P8BE-NEXT: lfiwzx f1, 0, r4
83+
; P8BE-NEXT: lfsx f0, 0, r3
84+
; P8BE-NEXT: lfsx f1, 0, r4
10685
; P8BE-NEXT: mr r3, r5
107-
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
108-
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
109-
; P8BE-NEXT: xvsubsp vs0, vs0, vs1
110-
; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3
111-
; P8BE-NEXT: stfiwx f0, 0, r5
86+
; P8BE-NEXT: xssubsp f0, f0, f1
87+
; P8BE-NEXT: stfsx f0, 0, r5
11288
; P8BE-NEXT: blr
11389

11490

0 commit comments

Comments
 (0)