diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 58c267f1ce4bd..9a887b832c90f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1110,10 +1110,10 @@ instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, return std::nullopt; } -// Simplify unary operation where predicate has all inactive lanes by replacing +// Simplify operation where predicate has all inactive lanes by replacing // instruction with zeroed object static std::optional -instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) { +instCombineSVENoActiveZero(InstCombiner &IC, IntrinsicInst &II) { if (match(II.getOperand(0), m_ZeroInt())) { Constant *Node; Type *RetTy = II.getType(); @@ -1126,10 +1126,9 @@ instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) { : ConstantInt::get(VecT, 0)); } Node = ConstantStruct::get(StructT, ZerVec); - } else if (RetTy->isFPOrFPVectorTy()) - Node = ConstantFP::get(RetTy, 0.0); - else - Node = ConstantInt::get(II.getType(), 0); + } else + Node = RetTy->isFPOrFPVectorTy() ? ConstantFP::get(RetTy, 0.0) + : ConstantInt::get(II.getType(), 0); IC.replaceInstUsesWith(II, Node); return IC.eraseInstFromFunction(II); @@ -1188,7 +1187,7 @@ static std::optional instCombineSVECmpNE(InstCombiner &IC, LLVMContext &Ctx = II.getContext(); // Replace by zero constant when all lanes are inactive - if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) + if (auto II_NA = instCombineSVENoActiveZero(IC, II)) return II_NA; // Check that the predicate is all active @@ -1556,7 +1555,7 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Type *VecTy = II.getType(); // Replace by zero constant when all lanes are inactive - if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) + if (auto II_NA = instCombineSVENoActiveZero(IC, II)) return II_NA; if (isAllActivePredicate(Pred)) { @@ -1907,7 +1906,7 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) { Value *PassThru = ConstantAggregateZero::get(Ty); // Replace by zero constant when all lanes are inactive - if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) + if (auto II_NA = instCombineSVENoActiveZero(IC, II)) return II_NA; // Contiguous gather => masked load. @@ -2197,6 +2196,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_st4: case Intrinsic::aarch64_sve_st4q: return instCombineSVENoActiveUnaryErase(IC, II, 4); + case Intrinsic::aarch64_sve_addqv: + case Intrinsic::aarch64_sve_and_z: + case Intrinsic::aarch64_sve_bic_z: + case Intrinsic::aarch64_sve_brka_z: + case Intrinsic::aarch64_sve_brkb_z: + case Intrinsic::aarch64_sve_brkn_z: + case Intrinsic::aarch64_sve_brkpa_z: + case Intrinsic::aarch64_sve_brkpb_z: + case Intrinsic::aarch64_sve_cntp: + case Intrinsic::aarch64_sve_compact: + case Intrinsic::aarch64_sve_eor_z: + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_eorqv: + case Intrinsic::aarch64_sve_nand_z: + case Intrinsic::aarch64_sve_nor_z: + case Intrinsic::aarch64_sve_orn_z: + case Intrinsic::aarch64_sve_orr_z: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_orqv: + case Intrinsic::aarch64_sve_pnext: + case Intrinsic::aarch64_sve_rdffr_z: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_umaxqv: case Intrinsic::aarch64_sve_cmpeq: case Intrinsic::aarch64_sve_cmpeq_wide: case Intrinsic::aarch64_sve_cmpge: @@ -2251,7 +2275,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ldnt1_gather_index: case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset: case Intrinsic::aarch64_sve_ldnt1_gather_uxtw: - return instCombineSVENoActiveUnaryZero(IC, II); + return instCombineSVENoActiveZero(IC, II); case Intrinsic::aarch64_sve_prf: case Intrinsic::aarch64_sve_prfb_gather_index: case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll new file mode 100644 index 0000000000000..e58aa2eeefa8d --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + + +define <16 x i8> @addqv_i8( %a) { +; CHECK-LABEL: define <16 x i8> @addqv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <16 x i8> zeroinitializer +; + %res = call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8( zeroinitializer, %a); + ret <16 x i8> %res +} + +define @and_4( %Pn, %Pd) { +; CHECK-LABEL: define @and_4( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.and.z.nxv4i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define @bic_16( %Pn, %Pd) { +; CHECK-LABEL: define @bic_16( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.bic.z.nxv16i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define @brka_z_b8( %a) { +; CHECK-LABEL: define @brka_z_b8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.brka.z.nxv16i1( zeroinitializer, %a) + ret %out +} + +define @brkb_z_b8( %a) { +; CHECK-LABEL: define @brkb_z_b8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.brkb.z.nxv16i1( zeroinitializer, %a) + ret %out +} + +define @brkn_b8( %a, %b) { +; CHECK-LABEL: define @brkn_b8( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.brkn.z.nxv16i1( zeroinitializer, %a, %b) + ret %out +} + +define @brkpa_b8( %a, %b) { +; CHECK-LABEL: define @brkpa_b8( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.brkpa.z.nxv16i1( zeroinitializer, %a, %b) + ret %out +} + +define @brkpb_b8( %a, %b) { +; CHECK-LABEL: define @brkpb_b8( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.brkpb.z.nxv16i1( zeroinitializer, %a, %b) + ret %out +} + +define i64 @cntp_b64( %a) { +; CHECK-LABEL: define i64 @cntp_b64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i64 0 +; +; USE_SCALAR_INC-LABEL: cntp_b64: +; USE_SCALAR_INC: // %bb.0: +; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.d +; USE_SCALAR_INC-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntp.nxv2i1( zeroinitializer, %a) + ret i64 %out +} + +define @compact_i32( %a) { +; CHECK-LABEL: define @compact_i32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.compact.nxv4i32( zeroinitializer, %a) + ret %out +} + +define @eor_16( %Pn, %Pd) { +; CHECK-LABEL: define @eor_16( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.eor.z.nxv16i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define i32 @eorv_i32( %a) { +; CHECK-LABEL: define i32 @eorv_i32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( zeroinitializer, %a) + ret i32 %out +} + +define <4 x i32> @eorqv_i32( %a) { +; CHECK-LABEL: define <4 x i32> @eorqv_i32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; + %res = call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32( zeroinitializer, %a); + ret <4 x i32> %res +} + +define @nand_8( %Pn, %Pd) { +; CHECK-LABEL: define @nand_8( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.nand.z.nxv8i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define @nor_4( %Pn, %Pd) { +; CHECK-LABEL: define @nor_4( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.nor.z.nxv4i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define @orn_4( %Pn, %Pd) { +; CHECK-LABEL: define @orn_4( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.orn.z.nxv4i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define @orr_2( %Pn, %Pd) { +; CHECK-LABEL: define @orr_2( +; CHECK-SAME: [[PN:%.*]], [[PD:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %res = call @llvm.aarch64.sve.orr.z.nxv2i1( zeroinitializer, %Pn, %Pd) + ret %res; +} + +define i8 @orv_i8( %a) { +; CHECK-LABEL: define i8 @orv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define <8 x i16> @orqv_i16( %a) { +; CHECK-LABEL: define <8 x i16> @orqv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; + %res = call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16( zeroinitializer, %a); + ret <8 x i16> %res +} + +define @pnext_b32( %a) { +; CHECK-LABEL: define @pnext_b32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.pnext.nxv4i1( zeroinitializer, %a) + ret %out +} + +define @rdffr_z() { +; CHECK-LABEL: define @rdffr_z() { +; CHECK-NEXT: ret zeroinitializer +; + %out = call @llvm.aarch64.sve.rdffr.z( zeroinitializer) + ret %out +} + +define i64 @saddv_i64( %a) { +; CHECK-LABEL: define i64 @saddv_i64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( zeroinitializer, %a) + ret i64 %out +} + +define i64 @uaddv_i8( %a) { +; CHECK-LABEL: define i64 @uaddv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( zeroinitializer, %a) + ret i64 %out +} + +define i8 @umaxv_i8( %a) { +; CHECK-LABEL: define i8 @umaxv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define <8 x i16> @umaxqv_i16( %a) { +; CHECK-LABEL: define <8 x i16> @umaxqv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; + %res = call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16( zeroinitializer, %a); + ret <8 x i16> %res +}