Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 135 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
}

// Set operations for 'LASX' feature.
Expand Down Expand Up @@ -448,6 +449,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
}
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Custom);
}

// Set DAG combine for LA32 and LA64.
Expand All @@ -466,8 +468,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,

// Set DAG combine for 'LASX' feature.

if (Subtarget.hasExtLASX())
if (Subtarget.hasExtLASX()) {
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
}

// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
Expand Down Expand Up @@ -592,7 +596,101 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
}
return SDValue();
}

// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
// LoongArchISD::VFCVT. For example:
// x1 = fp_round x, 0
// y1 = fp_round y, 0
// z = concat_vectors x1, y1
// Or
// x1 = LoongArch::VFCVT undef, x
// y1 = LoongArch::VFCVT undef, y
// z = LoongArchISD::VPACKEV y1, x1
// can be combined to:
// z = LoongArch::VFCVT y, x
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
(N->getOpcode() == LoongArchISD::VPACKEV)) &&
"Invalid Node");

SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
unsigned Opcode0 = Op0.getOpcode();
unsigned Opcode1 = Op1.getOpcode();
if (Opcode0 != Opcode1)
return SDValue();

if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
return SDValue();

// Check if two nodes have only one use.
if (!Op0.hasOneUse() || !Op1.hasOneUse())
return SDValue();

EVT VT = N.getValueType();
EVT SVT0 = Op0.getValueType();
EVT SVT1 = Op1.getValueType();
// Check if two nodes have the same result type.
if (SVT0 != SVT1)
return SDValue();

// Check if two nodes have the same operand type.
EVT SSVT0 = Op0.getOperand(0).getValueType();
EVT SSVT1 = Op1.getOperand(0).getValueType();
if (SSVT0 != SSVT1)
return SDValue();

if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
SSVT0 == MVT::v4f64) {
// A vector_shuffle is required in the final step, as xvfcvt instruction
// operates on each 128-bit segament as a lane.
SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
Op1.getOperand(0), Op0.getOperand(0));
SDValue Undef = DAG.getUNDEF(VT);
SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
Res = DAG.getVectorShuffle(VT, DL, Res, Undef, Mask);
return DAG.getBitcast(VT, Res);
}
}

if (N->getOpcode() == LoongArchISD::VPACKEV &&
Opcode0 == LoongArchISD::VFCVT) {
// For VPACKEV, check if the first operation of LoongArchISD::VFCVT is
// undef.
if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
return SDValue();

if (Subtarget.hasExtLSX() && (VT == MVT::v2i64 || VT == MVT::v2f64) &&
SVT0 == MVT::v4f32 && SSVT0 == MVT::v2f64) {
SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
Op0.getOperand(1), Op1.getOperand(1));
return DAG.getBitcast(VT, Res);
}
}

return SDValue();
}

SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue In = Op.getOperand(0);
MVT VT = Op.getSimpleValueType();
MVT SVT = In.getSimpleValueType();

if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
}

return SDValue();
}

Expand Down Expand Up @@ -4720,6 +4818,21 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
break;
}
case ISD::FP_ROUND: {
assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
"Unexpected custom legalisation");
// On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
// v2f32) is scalarized. Add a customized v2f32 widening to convert it into
// a target-specific LoongArchISD::VFCVT to optimize it.
if (VT == MVT::v2f32) {
SDValue Src = N->getOperand(0);
SDValue Undef = DAG.getUNDEF(Src.getValueType());
SDValue Dst =
DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Src);
Results.push_back(Dst);
}
break;
}
case ISD::BSWAP: {
SDValue Src = N->getOperand(0);
assert((VT == MVT::i16 || VT == MVT::i32) &&
Expand Down Expand Up @@ -6679,6 +6792,20 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

static SDValue
performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
SDLoc DL(N);
EVT VT = N->getValueType(0);

if (VT.isVector() && N->getNumOperands() == 2)
if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
return R;

return SDValue();
}

SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -6714,6 +6841,12 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
case ISD::CONCAT_VECTORS:
return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
case LoongArchISD::VPACKEV:
if (SDValue Result =
combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
return Result;
}
return SDValue();
}
Expand Down Expand Up @@ -7512,6 +7645,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
NODE_NAME_CASE(VFCVT)
NODE_NAME_CASE(VSLLI)
NODE_NAME_CASE(VSRLI)
NODE_NAME_CASE(VBSLL)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ enum NodeType : unsigned {
FRECIPE,
FRSQRTE,

VFCVT,

// Vector logicial left / right shift by immediate
VSLLI,
VSRLI,
Expand Down Expand Up @@ -415,6 +417,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;

bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2403,6 +2403,10 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;

// Vector floating-point conversion
def : Pat<(v8f32 (loongarch_vfcvt_s_d (v4f64 LASX256:$xj), (v4f64 LASX256:$xk))),
(XVFCVT_S_D LASX256:$xj, LASX256:$xk)>;

// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTC
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
def SDT_LoongArchVFCVT_S_D : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisVec<1>, SDTCisFP<1>, SDTCisSameAs<1, 2>]>;

// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
Expand Down Expand Up @@ -82,6 +84,8 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;

def loongarch_vfcvt_s_d: SDNode<"LoongArchISD::VFCVT", SDT_LoongArchVFCVT_S_D>;

def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
Expand Down Expand Up @@ -2519,6 +2523,9 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;

def : Pat<(v4f32 (loongarch_vfcvt_s_d (v2f64 LSX128:$vj), (v2f64 LSX128:$vk))),
(VFCVT_S_D LSX128:$vj, LSX128:$vk)>;

// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
Expand Down
48 changes: 10 additions & 38 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,9 @@ define void @fptrunc_v4f64_to_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fptrunc_v4f64_to_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
; CHECK-NEXT: fcvt.s.d $fa1, $fa1
; CHECK-NEXT: xvpickve.d $xr2, $xr0, 0
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 2
; CHECK-NEXT: fcvt.s.d $fa1, $fa1
; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
; CHECK-NEXT: vst $vr2, $a0, 0
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT: vfcvt.s.d $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
Expand All @@ -30,32 +21,13 @@ entry:
define void @fptrunc_v8f64_to_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fptrunc_v8f64_to_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 32
; CHECK-NEXT: xvld $xr1, $a1, 0
; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: xvpickve.d $xr3, $xr0, 0
; CHECK-NEXT: fcvt.s.d $fa3, $fa3
; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
; CHECK-NEXT: xvpickve.d $xr0, $xr1, 1
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 0
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: vextrins.w $vr2, $vr0, 16
; CHECK-NEXT: xvpickve.d $xr0, $xr1, 2
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: vextrins.w $vr2, $vr0, 32
; CHECK-NEXT: xvpickve.d $xr0, $xr1, 3
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
; CHECK-NEXT: xvpermi.q $xr2, $xr3, 2
; CHECK-NEXT: xvst $xr2, $a0, 0
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a1, 32
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0
; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x double>, ptr %a0
Expand Down
27 changes: 5 additions & 22 deletions llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,15 @@ define void @fptrunc_v2f64_to_v2f32(ptr %res, ptr %a0) nounwind {
; LA32-LABEL: fptrunc_v2f64_to_v2f32:
; LA32: # %bb.0: # %entry
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vreplvei.d $vr1, $vr0, 0
; LA32-NEXT: fcvt.s.d $fa1, $fa1
; LA32-NEXT: vreplvei.d $vr0, $vr0, 1
; LA32-NEXT: fcvt.s.d $fa0, $fa0
; LA32-NEXT: fst.s $fa0, $a0, 4
; LA32-NEXT: fst.s $fa1, $a0, 0
; LA32-NEXT: vfcvt.s.d $vr0, $vr0, $vr0
; LA32-NEXT: vstelm.w $vr0, $a0, 4, 1
; LA32-NEXT: vstelm.w $vr0, $a0, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: fptrunc_v2f64_to_v2f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: vld $vr0, $a1, 0
; LA64-NEXT: vreplvei.d $vr1, $vr0, 1
; LA64-NEXT: fcvt.s.d $fa1, $fa1
; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
; LA64-NEXT: fcvt.s.d $fa0, $fa0
; LA64-NEXT: vextrins.w $vr0, $vr1, 16
; LA64-NEXT: vfcvt.s.d $vr0, $vr0, $vr0
; LA64-NEXT: vstelm.d $vr0, $a0, 0, 0
; LA64-NEXT: ret
entry:
Expand All @@ -51,17 +44,7 @@ define void @fptrunc_v4f64_to_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a1, 16
; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: fcvt.s.d $fa0, $fa0
; CHECK-NEXT: vextrins.w $vr0, $vr2, 16
; CHECK-NEXT: vreplvei.d $vr2, $vr1, 0
; CHECK-NEXT: fcvt.s.d $fa2, $fa2
; CHECK-NEXT: vextrins.w $vr0, $vr2, 32
; CHECK-NEXT: vreplvei.d $vr1, $vr1, 1
; CHECK-NEXT: fcvt.s.d $fa1, $fa1
; CHECK-NEXT: vextrins.w $vr0, $vr1, 48
; CHECK-NEXT: vfcvt.s.d $vr0, $vr1, $vr0
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
Expand Down