diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 3590ab221ad44..660b03080f92e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1522,7 +1522,7 @@ def extract_vector_element_build_vector_trunc8 : GICombineRule< def sext_trunc : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (G_TRUNC $src, $x, (MIFlags NoSWrap)), + (match (G_TRUNC $src, $x), (G_SEXT $root, $src), [{ return Helper.matchSextOfTrunc(${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp index 7b4c427a9c504..576fd5fd81703 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp @@ -36,6 +36,26 @@ bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO, LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + // Combines without nsw trunc. + if (!Trunc->getFlag(MachineInstr::NoSWrap)) { + if (DstTy != SrcTy || + !isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT_INREG, {DstTy, SrcTy}})) + return false; + + // Do this for 8 bit values and up. We don't want to do it for e.g. G_TRUNC + // to i1. + unsigned TruncWidth = MRI.getType(Trunc->getReg(0)).getScalarSizeInBits(); + if (TruncWidth < 8) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSExtInReg(Dst, Src, TruncWidth); + }; + return true; + } + + // Combines for nsw trunc. + if (DstTy == SrcTy) { MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); }; return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-trunc.mir similarity index 74% rename from llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir rename to llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-trunc.mir index 8cb44605246ff..d034ec18aa271 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-trunc.mir @@ -100,9 +100,8 @@ body: | ; CHECK: liveins: $w0, $w1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nuw G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32) - ; CHECK-NEXT: $x1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: $x1 = COPY [[SEXT_INREG]](s64) %0:_(s64) = COPY $x0 %2:_(s32) = nuw G_TRUNC %0 %3:_(s64) = G_SEXT %2 @@ -117,9 +116,8 @@ body: | ; CHECK: liveins: $w0, $w1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32) - ; CHECK-NEXT: $x1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: $x1 = COPY [[SEXT_INREG]](s64) %0:_(s64) = COPY $x0 %2:_(s32) = G_TRUNC %0 %3:_(s64) = G_SEXT %2 @@ -372,3 +370,150 @@ body: | %3:_(s32) = G_SEXT %2 $w1 = COPY %3 ... +--- +name: trunc_sext_i32_i16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: trunc_sext_i32_i16 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... +--- +name: trunc_sext_i32_i1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: trunc_sext_i32_i1 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) + %0:_(s32) = COPY $w0 + %1:_(s1) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... +--- +name: trunc_sext_i32_i2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: trunc_sext_i32_i2 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s2) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s2) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) + %0:_(s32) = COPY $w0 + %1:_(s2) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... +--- +name: trunc_sext_i32_i8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: trunc_sext_i32_i8 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s8) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... +--- +name: trunc_sext_i64_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: trunc_sext_i64_i32 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; CHECK-NEXT: $x0 = COPY [[SEXT_INREG]](s64) + %0:_(s64) = COPY $x0 + %1:_(s32) = G_TRUNC %0 + %2:_(s64) = G_SEXT %1 + $x0 = COPY %2 +... +--- +name: trunc_sext_v2i32_v2i16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: trunc_sext_v2i32_v2i16 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: $x0 = COPY [[SEXT_INREG]](<2 x s32>) + %0:_(<2 x s32>) = COPY $x0 + %1:_(<2 x s16>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_SEXT %1 + $x0 = COPY %2 +... +--- +name: trunc_sext_v4i16_v4i8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: trunc_sext_v4i16_v4i8 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s16>) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: $x0 = COPY [[SEXT_INREG]](<4 x s16>) + %0:_(<4 x s16>) = COPY $x0 + %1:_(<4 x s8>) = G_TRUNC %0 + %2:_(<4 x s16>) = G_SEXT %1 + $x0 = COPY %2 +... +--- +name: trunc_sext_mismatching_types +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: trunc_sext_mismatching_types + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) + %0:_(s64) = COPY $x0 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir index 47c85f7678593..28c7485bbf60c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir @@ -469,10 +469,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXTLOAD]](s64) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32) ; CHECK-NEXT: $x0 = COPY [[ZEXTLOAD]](s64) - ; CHECK-NEXT: $x1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: $x1 = COPY [[ZEXTLOAD]](s64) %0:_(p0) = COPY $x0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8)) %2:_(s64) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-sext.mir new file mode 100644 index 0000000000000..b2277c4e1141b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-sext.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: trunc_sext_i32_i16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: trunc_sext_i32_i16 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: trunc_sext_i32_i8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: trunc_sext_i32_i8 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GCN-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: trunc_sext_i64_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: trunc_sext_i64_i32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_TRUNC %0 + %2:_(s64) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: trunc_sext_v4i32_v4i16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; GCN-LABEL: name: trunc_sext_v4i32_v4i16 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[COPY]], 16 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXT_INREG]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s16>) = G_TRUNC %0 + %2:_(<4 x s32>) = G_SEXT %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: trunc_sext_v4i16_v4i8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: trunc_sext_v4i16_v4i8 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s16>) = G_SEXT_INREG [[COPY]], 8 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s8>) = G_TRUNC %0 + %2:_(<4 x s16>) = G_SEXT %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: trunc_sext_mismatching_types +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: trunc_sext_mismatching_types + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GCN-NEXT: $vgpr0 = COPY [[SEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_SEXT %1 + $vgpr0 = COPY %2 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine.mir b/llvm/test/CodeGen/RISCV/GlobalISel/combine.mir index ef3fc4c9d5fae..cd574945965b5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/combine.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine.mir @@ -8,9 +8,8 @@ body: | ; RV64-LABEL: name: nneg_zext ; RV64: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32) - ; RV64-NEXT: $x10 = COPY [[SEXT]](s64) + ; RV64-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; RV64-NEXT: $x10 = COPY [[SEXT_INREG]](s64) ; RV64-NEXT: PseudoRET implicit $x10 %0:_(s64) = COPY $x10 %2:_(s32) = G_TRUNC %0