Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2405,6 +2405,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
}

if (Subtarget.hasAVX512()) {
for (MVT VT : { MVT::f16, MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64 })
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure this is right? I'd expect MVT::v4f32, MVT::v2f64, MVT::v8f32, MVT::v4f64 to be the VLX cases.

setOperationAction(ISD::FLDEXP, VT, Custom);

if (Subtarget.hasVLX())
for (MVT VT : { MVT::v8f32, MVT::v4f64, MVT::v16f32, MVT::v8f64 })
setOperationAction(ISD::FLDEXP, VT, Custom);
}

// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
Expand Down Expand Up @@ -31814,6 +31823,57 @@ static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
return StringRef();
}

static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue X = Op.getOperand(0);
EVT XTy = X.getValueType();
SDValue Exp = Op.getOperand(1);
SDLoc DL(Op);
EVT XVT, ExpVT;
switch (Op.getSimpleValueType().SimpleTy) {
default:
return SDValue();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use vscalefph if we have AVX512FP16?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Copy link
Contributor Author

@huhu233 huhu233 Dec 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, there may be risk of truncation, as the EXP operand of FLDEXP types i32, e.g., @llvm.ldexp.f16.i32(half, i32) ->@llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>, <8 x half>. I didn't know how to handle the issue elegantly, so I made an extension here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! LangRef doesn't give an example f16 case. Should we define it as @llvm.ldexp.f16.i16(half, i16)? i32 is a too large range to be useful for FP16.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use value tracking to check the bounds of the EXP operand?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Value tracking seems to make things very complicated.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd settle for a TODO comment for now

case MVT::f16:
// TODO: Choose vscalefph when fp16 for ISD::FLDEXP is fully supported.
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
[[fallthrough]];
case MVT::f32:
XVT = MVT::v4f32;
ExpVT = MVT::v4f32;
break;
case MVT::f64:
XVT = MVT::v2f64;
ExpVT = MVT::v2f64;
break;
case MVT::v4f32:
case MVT::v2f64:
if (!Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEFS, DL, XTy, X, Exp, X);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need to vectorize to use SCALEFS? I thought SCALEFS was for scalar types and SCALEF was for vector types? (So it should be possible to add vector support here as well).

case MVT::v8f32:
case MVT::v4f64:
case MVT::v16f32:
case MVT::v8f64:
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
}

SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
SDValue VX =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
DAG.getUNDEF(ExpVT), Exp, Zero);
SDValue Scalef = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
SDValue Final =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), Scalef, Zero);
if (X.getValueType() != XTy)
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
DAG.getIntPtrConstant(1, SDLoc(Op)));
return Final;
}

bool X86TargetLowering::isInlineAsmTargetBranch(
const SmallVectorImpl<StringRef> &AsmStrs, unsigned OpNo) const {
// In a __asm block, __asm inst foo where inst is CALL or JMP should be
Expand Down Expand Up @@ -31979,6 +32039,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
}
}

Expand Down
Loading