Skip to content

Commit dbadab9

Browse files
authored
[GlobalISel] Support saturated truncate (#150219)
Implements combining and legalization of G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, and G_TRUNC_USAT_U, which where previously added to SDAG with the below patterns: ``` truncate(smin(smax(x, C1), C2)) -> trunc_ssat_s(x) truncate(smax(smin(x, C2), C1)) -> trunc_ssat_s(x) truncate(smax(smin(x, C), 0)) -> trunc_ssat_u(x) truncate(smin(smax(x, 0), C)) -> trunc_ssat_u(x) truncate(umin(smax(x, 0), C)) -> trunc_ssat_u(x) truncate(umin(x, C)) -> trunc_usat_u(x) ```
1 parent 3f97736 commit dbadab9

File tree

10 files changed

+516
-1184
lines changed

10 files changed

+516
-1184
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,23 @@ class CombinerHelper {
727727
bool matchUMulHToLShr(MachineInstr &MI) const;
728728
void applyUMulHToLShr(MachineInstr &MI) const;
729729

730+
// Combine trunc(smin(smax(x, C1), C2)) -> truncssat_s(x)
731+
// or trunc(smax(smin(x, C2), C1)) -> truncssat_s(x).
732+
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const;
733+
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const;
734+
735+
// Combine trunc(smin(smax(x, 0), C)) -> truncssat_u(x)
736+
// or trunc(smax(smin(x, C), 0)) -> truncssat_u(x)
737+
// or trunc(umin(smax(x, 0), C)) -> truncssat_u(x)
738+
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const;
739+
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const;
740+
741+
// Combine trunc(umin(x, C)) -> truncusat_u(x).
742+
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const;
743+
744+
// Combine truncusat_u(fptoui(x)) -> fptoui_sat(x)
745+
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const;
746+
730747
/// Try to transform \p MI by using all of the above
731748
/// combine functions. Returns true if changed.
732749
bool tryCombine(MachineInstr &MI) const;

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,9 @@ class GCastOp : public GenericMachineInstr {
874874
case TargetOpcode::G_SEXT:
875875
case TargetOpcode::G_SITOFP:
876876
case TargetOpcode::G_TRUNC:
877+
case TargetOpcode::G_TRUNC_SSAT_S:
878+
case TargetOpcode::G_TRUNC_SSAT_U:
879+
case TargetOpcode::G_TRUNC_USAT_U:
877880
case TargetOpcode::G_UITOFP:
878881
case TargetOpcode::G_ZEXT:
879882
case TargetOpcode::G_ANYEXT:

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,35 @@ def mulh_to_lshr : GICombineRule<
12501250

12511251
def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
12521252

1253+
def trunc_ssats : GICombineRule<
1254+
(defs root:$root, register_matchinfo:$matchinfo),
1255+
(match (G_TRUNC $dst, $src):$root,
1256+
[{ return Helper.matchTruncSSatS(*${root}, ${matchinfo}); }]),
1257+
(apply [{ Helper.applyTruncSSatS(*${root}, ${matchinfo}); }])>;
1258+
1259+
def trunc_ssatu : GICombineRule<
1260+
(defs root:$root, register_matchinfo:$matchinfo),
1261+
(match (G_TRUNC $dst, $src):$root,
1262+
[{ return Helper.matchTruncSSatU(*${root}, ${matchinfo}); }]),
1263+
(apply [{ Helper.applyTruncSSatU(*${root}, ${matchinfo}); }])>;
1264+
1265+
def trunc_usatu : GICombineRule<
1266+
(defs root:$root),
1267+
(match (G_UMIN $min, $x, $y):$Min,
1268+
(G_TRUNC $dst, $min):$root,
1269+
[{ return Helper.matchTruncUSatU(*${root}, *${Min}); }]),
1270+
(apply (G_TRUNC_USAT_U $dst, $x))>;
1271+
1272+
def truncusatu_to_fptouisat : GICombineRule<
1273+
(defs root:$root),
1274+
(match (G_FPTOUI $src, $x):$Src,
1275+
(G_TRUNC_USAT_U $dst, $src):$root,
1276+
[{ return Helper.matchTruncUSatUToFPTOUISat(*${root}, *${Src}); }]),
1277+
(apply (G_FPTOUI_SAT $dst, $x))
1278+
>;
1279+
1280+
def truncsat_combines : GICombineGroup<[trunc_ssats, trunc_ssatu, trunc_usatu, truncusatu_to_fptouisat]>;
1281+
12531282
def redundant_neg_operands: GICombineRule<
12541283
(defs root:$root, build_fn_matchinfo:$matchinfo),
12551284
(match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMAD, G_FMA):$root,
@@ -2074,7 +2103,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
20742103
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
20752104
simplify_neg_minmax, combine_concat_vector,
20762105
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
2077-
combine_use_vector_truncate, merge_combines, overflow_combines]>;
2106+
combine_use_vector_truncate, merge_combines, overflow_combines, truncsat_combines]>;
20782107

20792108
// A combine group used to for prelegalizer combiners at -O0. The combines in
20802109
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5924,6 +5924,96 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) const {
59245924
MI.eraseFromParent();
59255925
}
59265926

5927+
bool CombinerHelper::matchTruncSSatS(MachineInstr &MI,
5928+
Register &MatchInfo) const {
5929+
Register Dst = MI.getOperand(0).getReg();
5930+
Register Src = MI.getOperand(1).getReg();
5931+
LLT DstTy = MRI.getType(Dst);
5932+
LLT SrcTy = MRI.getType(Src);
5933+
unsigned NumDstBits = DstTy.getScalarSizeInBits();
5934+
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5935+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5936+
5937+
if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
5938+
return false;
5939+
5940+
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
5941+
APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
5942+
return mi_match(Src, MRI,
5943+
m_GSMin(m_GSMax(m_Reg(MatchInfo),
5944+
m_SpecificICstOrSplat(SignedMin)),
5945+
m_SpecificICstOrSplat(SignedMax))) ||
5946+
mi_match(Src, MRI,
5947+
m_GSMax(m_GSMin(m_Reg(MatchInfo),
5948+
m_SpecificICstOrSplat(SignedMax)),
5949+
m_SpecificICstOrSplat(SignedMin)));
5950+
}
5951+
5952+
void CombinerHelper::applyTruncSSatS(MachineInstr &MI,
5953+
Register &MatchInfo) const {
5954+
Register Dst = MI.getOperand(0).getReg();
5955+
Builder.buildTruncSSatS(Dst, MatchInfo);
5956+
MI.eraseFromParent();
5957+
}
5958+
5959+
bool CombinerHelper::matchTruncSSatU(MachineInstr &MI,
5960+
Register &MatchInfo) const {
5961+
Register Dst = MI.getOperand(0).getReg();
5962+
Register Src = MI.getOperand(1).getReg();
5963+
LLT DstTy = MRI.getType(Dst);
5964+
LLT SrcTy = MRI.getType(Src);
5965+
unsigned NumDstBits = DstTy.getScalarSizeInBits();
5966+
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5967+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5968+
5969+
if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
5970+
return false;
5971+
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
5972+
return mi_match(Src, MRI,
5973+
m_GSMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)),
5974+
m_SpecificICstOrSplat(UnsignedMax))) ||
5975+
mi_match(Src, MRI,
5976+
m_GSMax(m_GSMin(m_Reg(MatchInfo),
5977+
m_SpecificICstOrSplat(UnsignedMax)),
5978+
m_SpecificICstOrSplat(0))) ||
5979+
mi_match(Src, MRI,
5980+
m_GUMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)),
5981+
m_SpecificICstOrSplat(UnsignedMax)));
5982+
}
5983+
5984+
void CombinerHelper::applyTruncSSatU(MachineInstr &MI,
5985+
Register &MatchInfo) const {
5986+
Register Dst = MI.getOperand(0).getReg();
5987+
Builder.buildTruncSSatU(Dst, MatchInfo);
5988+
MI.eraseFromParent();
5989+
}
5990+
5991+
bool CombinerHelper::matchTruncUSatU(MachineInstr &MI,
5992+
MachineInstr &MinMI) const {
5993+
Register Min = MinMI.getOperand(2).getReg();
5994+
Register Val = MinMI.getOperand(1).getReg();
5995+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5996+
LLT SrcTy = MRI.getType(Val);
5997+
unsigned NumDstBits = DstTy.getScalarSizeInBits();
5998+
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5999+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6000+
6001+
if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6002+
return false;
6003+
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6004+
return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6005+
!mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6006+
}
6007+
6008+
bool CombinerHelper::matchTruncUSatUToFPTOUISat(MachineInstr &MI,
6009+
MachineInstr &SrcMI) const {
6010+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6011+
LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6012+
6013+
return LI &&
6014+
isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6015+
}
6016+
59276017
bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
59286018
BuildFnTy &MatchInfo) const {
59296019
unsigned Opc = MI.getOpcode();

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,10 @@ def AArch64PostLegalizerLowering
351351
// Post-legalization combines which are primarily optimizations.
352352
def AArch64PostLegalizerCombiner
353353
: GICombiner<"AArch64PostLegalizerCombinerImpl",
354-
[copy_prop, cast_of_cast_combines, buildvector_of_truncate,
355-
integer_of_truncate, mutate_anyext_to_zext,
356-
combines_for_extload, combine_indexed_load_store, sext_trunc_sextload,
354+
[copy_prop, cast_of_cast_combines,
355+
buildvector_of_truncate, integer_of_truncate,
356+
mutate_anyext_to_zext, combines_for_extload,
357+
combine_indexed_load_store, sext_trunc_sextload,
357358
hoist_logic_op_with_same_opcode_hands,
358359
redundant_and, xor_of_and_with_same_reg,
359360
extractvecelt_pairwise_add, redundant_or,
@@ -367,5 +368,6 @@ def AArch64PostLegalizerCombiner
367368
select_to_minmax, or_to_bsp, combine_concat_vector,
368369
commute_constant_to_rhs, extract_vec_elt_combines,
369370
push_freeze_to_prevent_poison_from_propagating,
370-
combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> {
371+
combine_mul_cmlt, combine_use_vector_truncate,
372+
extmultomull, truncsat_combines]> {
371373
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,11 +1847,11 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
18471847
case Intrinsic::aarch64_neon_sdot:
18481848
return LowerTriOp(AArch64::G_SDOT);
18491849
case Intrinsic::aarch64_neon_sqxtn:
1850-
return LowerUnaryOp(AArch64::G_TRUNC_SSAT_S);
1850+
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
18511851
case Intrinsic::aarch64_neon_sqxtun:
1852-
return LowerUnaryOp(AArch64::G_TRUNC_SSAT_U);
1852+
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
18531853
case Intrinsic::aarch64_neon_uqxtn:
1854-
return LowerUnaryOp(AArch64::G_TRUNC_USAT_U);
1854+
return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
18551855

18561856
case Intrinsic::vector_reverse:
18571857
// TODO: Add support for vector_reverse

0 commit comments

Comments
 (0)