Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
VT, Custom);
setOperationAction(
{ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
Custom);

// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
Expand Down Expand Up @@ -1101,9 +1101,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::SELECT, VT, Custom);

setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
VT, Custom);
setOperationAction(
{ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
Custom);

setOperationAction(ISD::BITCAST, VT, Custom);

Expand Down Expand Up @@ -6230,7 +6230,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::TRUNCATE:
case ISD::TRUNCATE_SSAT_S:
case ISD::TRUNCATE_SSAT_U:
case ISD::TRUNCATE_USAT_U:
// Only custom-lower vector truncates
if (!Op.getSimpleValueType().isVector())
Expand Down Expand Up @@ -8117,7 +8116,7 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
unsigned NewOpc;
if (Opc == ISD::TRUNCATE_SSAT_S)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U)
else if (Opc == ISD::TRUNCATE_USAT_U)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
else
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
Expand Down
32 changes: 24 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
Expand All @@ -117,8 +119,10 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -352,8 +356,10 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
Expand All @@ -368,8 +374,10 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -437,8 +445,10 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
Expand All @@ -455,8 +465,10 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
Expand Down Expand Up @@ -532,8 +544,10 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
Expand All @@ -552,8 +566,10 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
Expand Down
68 changes: 42 additions & 26 deletions llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -303,6 +304,9 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
Expand Down Expand Up @@ -797,16 +801,17 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -939,8 +944,9 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down Expand Up @@ -1133,6 +1139,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -2107,23 +2114,24 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -3465,6 +3473,7 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -3650,6 +3659,9 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
Expand Down Expand Up @@ -4139,16 +4151,17 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -4276,8 +4289,9 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down Expand Up @@ -4465,6 +4479,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
Expand Down Expand Up @@ -5434,23 +5449,24 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v10, a0
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
Expand Down
Loading