Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 33 additions & 28 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,14 +442,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
Subtarget.hasFRE()))
if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);

if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
Subtarget.hasFRES()))
!(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);

if (Subtarget.hasFCPSGN()) {
Expand Down Expand Up @@ -565,16 +562,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::LRINT, MVT::f64, Legal);
setOperationAction(ISD::LRINT, MVT::f32, Legal);
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
}

setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom);
setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom);
setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom);
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
Expand Down Expand Up @@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

// The nearbyint variants are not allowed to raise the inexact exception
// so we can only code-gen them with unsafe math.
if (TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
}
// so we can only code-gen them with fpexcept.ignore.
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom);

setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
Expand Down Expand Up @@ -8911,11 +8905,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// be lost at this stage, but is below the single-precision rounding
// position.
//
// However, if -enable-unsafe-fp-math is in effect, accept double
// However, if afn is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
!Subtarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// FIXME: Currently INT_TO_FP can't support fast math flags because
// of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
// false.
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
!Op->getFlags().hasApproximateFuncs()) {

// Twiddle input to make sure the low 11 bits are zero. (If this
// is the case, we are guaranteed the value will fit into the 53 bit
Expand Down Expand Up @@ -12759,6 +12755,14 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerADDSUBO_CARRY(Op, DAG);
case ISD::UCMP:
return LowerUCMP(Op, DAG);
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND:
case ISD::STRICT_FNEARBYINT:
if (Op->getFlags().hasNoFPExcept())
return Op;
return SDValue();
}
}

Expand Down Expand Up @@ -18707,11 +18711,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
bool AllowContract = I->getFastMathFlags().allowContract() &&
User->getFastMathFlags().allowContract();

return !(
isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
}
case Instruction::Load: {
// Don't break "store (load float*)" pattern, this pattern will be combined
Expand Down
30 changes: 16 additions & 14 deletions llvm/lib/Target/PowerPC/PPCInstrVSX.td
Original file line number Diff line number Diff line change
Expand Up @@ -2786,14 +2786,16 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
// these need to be defined after the any_frint versions so ISEL will correctly
// add the chain to the strict versions.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can add a TODO to investigate whether or not we can use Roland's suggestion of xsrdpiz in these cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be possible when SDNodeFlags and MIFlag support rounding modes.

def : Pat<(f32 (fnearbyint f32:$S)),
// TODO: Match strict fp rounding intrinsics with instructions like xsrdpiz when
// rounding mode is propagated to CodeGen part.
def : Pat<(f32 (strict_fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f64 (fnearbyint f64:$S)),
def : Pat<(f64 (strict_fnearbyint f64:$S)),
(f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (fnearbyint v2f64:$S)),
def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)),
(v2f64 (XVRDPIC $S))>;
def : Pat<(v4f32 (fnearbyint v4f32:$S)),
def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)),
(v4f32 (XVRSPIC $S))>;

// Materialize a zero-vector of long long
Expand Down Expand Up @@ -3578,25 +3580,25 @@ def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;

// Rounding to integer.
def : Pat<(i64 (lrint f64:$S)),
def : Pat<(i64 (strict_lrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
def : Pat<(i64 (lrint f32:$S)),
def : Pat<(i64 (strict_lrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
def : Pat<(i64 (llrint f64:$S)),
def : Pat<(i64 (strict_llrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
def : Pat<(i64 (llrint f32:$S)),
def : Pat<(i64 (strict_llrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
def : Pat<(i64 (lround f64:$S)),
def : Pat<(i64 (strict_lround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
def : Pat<(i64 (lround f32:$S)),
def : Pat<(i64 (strict_lround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
def : Pat<(i32 (lround f64:$S)),
def : Pat<(i32 (strict_lround f64:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>;
def : Pat<(i32 (lround f32:$S)),
def : Pat<(i32 (strict_lround f32:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
def : Pat<(i64 (llround f64:$S)),
def : Pat<(i64 (strict_llround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
def : Pat<(i64 (llround f32:$S)),
def : Pat<(i64 (strict_llround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;

// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
Expand Down
18 changes: 4 additions & 14 deletions llvm/test/CodeGen/PowerPC/i64_fp_round.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,18 @@
; for minor code generation differences.
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL
; Also check that with -enable-unsafe-fp-math we do not get that extra
; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
; CHECK-UNSAFE-NOT: isel

target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

define float @test(i64 %x) nounwind readnone {
; Verify that we get the code sequence needed to avoid double-rounding.
; Note that only parts of the sequence are checked for here, to allow
; for minor code generation differences.
; Also check that with -enable-unsafe-fp-math we do not get that extra
; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: clrldi 4, 3, 53
Expand Down Expand Up @@ -51,18 +49,10 @@ define float @test(i64 %x) nounwind readnone {
; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
; CHECK-NO-ISEL-NEXT: frsp 1, 0
; CHECK-NO-ISEL-NEXT: blr
;
; CHECK-UNSAFE-LABEL: test:
; CHECK-UNSAFE: # %bb.0: # %entry
; CHECK-UNSAFE-NEXT: std 3, -8(1)
; CHECK-UNSAFE-NEXT: lfd 0, -8(1)
; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0
; CHECK-UNSAFE-NEXT: frsp 1, 0
; CHECK-UNSAFE-NEXT: blr

entry:
%conv = sitofp i64 %x to float
ret float %conv
}


; TODO: Add sitofp afn test.
Loading