From f77215aebf7592a3bc958c56a64b448bfb482136 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Mon, 19 Aug 2024 14:43:04 +0000 Subject: [PATCH 1/4] [AArch64] optimise SVE cvt intrinsics with no active lanes --- .../AArch64/AArch64TargetTransformInfo.cpp | 62 +++- .../sve-intrinsic-comb-no-active-lanes-cvt.ll | 309 ++++++++++++++++++ 2 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a782c9c435123..c546dfb145349 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1073,6 +1073,32 @@ static bool isAllActivePredicate(Value *Pred) { m_ConstantInt())); } +// Simplify unary operation where predicate has all inactive lanes by replacing +// instruction with its operand +static std::optional +instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II, + bool hasInactiveVector) { + int PredOperand = hasInactiveVector ? 1 : 0; + int ReplaceOperand = hasInactiveVector ? 0 : 1; + if (match(II.getOperand(PredOperand), m_ZeroInt())) { + IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand)); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + +// Simplify unary operation where predicate has all inactive lanes or try to +// replace with _x form when all lanes are active +static std::optional +instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { + if (isAllActivePredicate(II.getOperand(1)) && + !isa(II.getOperand(0))) { + Value *Undef = llvm::UndefValue::get(II.getType()); + return IC.replaceOperand(II, 0, Undef); + } + return instCombineSVENoActiveUnaryReplace(IC, II, true); +} + // Erase unary operation where predicate has all inactive lanes static std::optional instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, @@ -2104,7 +2130,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, switch (IID) { default: break; - + case Intrinsic::aarch64_sve_fcvt_bf16f32: + case Intrinsic::aarch64_sve_fcvt_f16f32: + case Intrinsic::aarch64_sve_fcvt_f16f64: + case Intrinsic::aarch64_sve_fcvt_f32f16: + case Intrinsic::aarch64_sve_fcvt_f32f64: + case Intrinsic::aarch64_sve_fcvt_f64f16: + case Intrinsic::aarch64_sve_fcvt_f64f32: + case Intrinsic::aarch64_sve_fcvtlt_f32f16: + case Intrinsic::aarch64_sve_fcvtlt_f64f32: + case Intrinsic::aarch64_sve_fcvtnt_bf16f32: + case Intrinsic::aarch64_sve_fcvtnt_f16f32: + case Intrinsic::aarch64_sve_fcvtnt_f32f64: + case Intrinsic::aarch64_sve_fcvtx_f32f64: + case Intrinsic::aarch64_sve_fcvtxnt_f32f64: + case Intrinsic::aarch64_sve_fcvtzs: + case Intrinsic::aarch64_sve_fcvtzs_i32f16: + case Intrinsic::aarch64_sve_fcvtzs_i32f64: + case Intrinsic::aarch64_sve_fcvtzs_i64f16: + case Intrinsic::aarch64_sve_fcvtzs_i64f32: + case Intrinsic::aarch64_sve_fcvtzu: + case Intrinsic::aarch64_sve_fcvtzu_i32f16: + case Intrinsic::aarch64_sve_fcvtzu_i32f64: + case Intrinsic::aarch64_sve_fcvtzu_i64f16: + case Intrinsic::aarch64_sve_fcvtzu_i64f32: + case Intrinsic::aarch64_sve_scvtf: + case Intrinsic::aarch64_sve_scvtf_f16i32: + case Intrinsic::aarch64_sve_scvtf_f16i64: + case Intrinsic::aarch64_sve_scvtf_f32i64: + case Intrinsic::aarch64_sve_scvtf_f64i32: + case Intrinsic::aarch64_sve_ucvtf: + case Intrinsic::aarch64_sve_ucvtf_f16i32: + case Intrinsic::aarch64_sve_ucvtf_f16i64: + case Intrinsic::aarch64_sve_ucvtf_f32i64: + case Intrinsic::aarch64_sve_ucvtf_f64i32: + return instCombineSVEAllOrNoActiveUnary(IC, II); case Intrinsic::aarch64_sve_st1_scatter: case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: case Intrinsic::aarch64_sve_st1_scatter_sxtw: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll new file mode 100644 index 0000000000000..eb01ff5dacab5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define @test_fcvt_bf16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_bf16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.bf16f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f16f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f16_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f16_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f16f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f32f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f64f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvt_f64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvt.f64f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtlt_f32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtlt_f32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtlt.f32f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtlt_f64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtlt_f64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtlt.f64f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtnt_bf16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_bf16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtnt_f16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_f16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtnt.f16f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtnt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtnt.f32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtx_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtx_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtx.f32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtxnt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtxnt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzs( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzs_i32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzs.i32f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzs_i32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzs.i32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzs_i64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzs.i64f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzs_i64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzs.i64f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzu( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzu_i32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzu.i32f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzu_i32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzu.i32f64( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzu_i64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzu.i64f16( %a, zeroinitializer, %b) + ret %out +} + +define @test_fcvtzu_i64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.fcvtzu.i64f32( %a, zeroinitializer, %b) + ret %out +} + +define @test_scvtf( %a, %b) { +; CHECK-LABEL: define @test_scvtf( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, zeroinitializer, %b) + ret %out +} + +define @test_scvtf_f16_i32( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f16_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.scvtf.f16i32( %a, zeroinitializer, %b) + ret %out +} + +define @test_scvtf_f16_i64( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f16_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.scvtf.f16i64( %a, zeroinitializer, %b) + ret %out +} + +define @test_scvtf_f32_i64( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f32_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.scvtf.f32i64( %a, zeroinitializer, %b) + ret %out +} + +define @test_scvtf_f64_i32( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f64_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.scvtf.f64i32( %a, zeroinitializer, %b) + ret %out +} + +define @test_ucvtf( %a, %b) { +; CHECK-LABEL: define @test_ucvtf( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( %a, zeroinitializer, %b) + ret %out +} + +define @test_ucvtf_f16_i32( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f16_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.ucvtf.f16i32( %a, zeroinitializer, %b) + ret %out +} + +define @test_ucvtf_f16_i64( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f16_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.ucvtf.f16i64( %a, zeroinitializer, %b) + ret %out +} + +define @test_ucvtf_f32_i64( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f32_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.ucvtf.f32i64( %a, zeroinitializer, %b) + ret %out +} + +define @test_ucvtf_f64_i32( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f64_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: ret [[A]] +; + %out = call @llvm.aarch64.sve.ucvtf.f64i32( %a, zeroinitializer, %b) + ret %out +} From 82cc1e31368711b3c527dfc3399f5516c57d70aa Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Thu, 22 Aug 2024 15:37:47 +0000 Subject: [PATCH 2/4] Fix function naming and tests and added test for all active lanes --- .../AArch64/AArch64TargetTransformInfo.cpp | 6 +- ...sve-intrinsic-comb-all-active-lanes-cvt.ll | 411 ++++++++++++++++++ .../sve-intrinsic-comb-no-active-lanes-cvt.ll | 4 +- 3 files changed, 416 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index c546dfb145349..52ec5d4c0a8c4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1076,8 +1076,8 @@ static bool isAllActivePredicate(Value *Pred) { // Simplify unary operation where predicate has all inactive lanes by replacing // instruction with its operand static std::optional -instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II, - bool hasInactiveVector) { +instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II, + bool hasInactiveVector) { int PredOperand = hasInactiveVector ? 1 : 0; int ReplaceOperand = hasInactiveVector ? 0 : 1; if (match(II.getOperand(PredOperand), m_ZeroInt())) { @@ -1096,7 +1096,7 @@ instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { Value *Undef = llvm::UndefValue::get(II.getType()); return IC.replaceOperand(II, 0, Undef); } - return instCombineSVENoActiveUnaryReplace(IC, II, true); + return instCombineSVENoActiveReplace(IC, II, true); } // Erase unary operation where predicate has all inactive lanes diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll new file mode 100644 index 0000000000000..40da5c5755335 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -0,0 +1,411 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define @test_fcvt_bf16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_bf16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f16f32( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f16_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f16_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f16f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f16f64( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f32f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f32f16( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f64f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f64f16( %a, %pg, %b) + ret %out +} + +define @test_fcvt_f64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvt_f64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.f64f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.f64f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtlt_f32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtlt_f32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtlt.f32f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtlt.f32f16( %a, %pg, %b) + ret %out +} + +define @test_fcvtlt_f64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtlt_f64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtlt.f64f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtlt.f64f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtnt_bf16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_bf16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtnt_f16_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_f16_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtnt.f16f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtnt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtnt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtnt.f32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvtx_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtx_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtx.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtx.f32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvtxnt_f32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtxnt_f32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtxnt.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvtzs( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtzs_i32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzs.i32f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzs.i32f16( %a, %pg, %b) + ret %out +} + +define @test_fcvtzs_i32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzs.i32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzs.i32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvtzs_i64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzs.i64f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzs.i64f16( %a, %pg, %b) + ret %out +} + +define @test_fcvtzs_i64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtzs_i64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzs.i64f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzs.i64f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtzu( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, %pg, %b) + ret %out +} + +define @test_fcvtzu_i32_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i32_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzu.i32f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzu.i32f16( %a, %pg, %b) + ret %out +} + +define @test_fcvtzu_i32_f64( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i32_f64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzu.i32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzu.i32f64( %a, %pg, %b) + ret %out +} + +define @test_fcvtzu_i64_f16( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i64_f16( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzu.i64f16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzu.i64f16( %a, %pg, %b) + ret %out +} + +define @test_fcvtzu_i64_f32( %a, %b) { +; CHECK-LABEL: define @test_fcvtzu_i64_f32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtzu.i64f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtzu.i64f32( %a, %pg, %b) + ret %out +} + +define @test_scvtf( %a, %b) { +; CHECK-LABEL: define @test_scvtf( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, %pg, %b) + ret %out +} + +define @test_scvtf_f16_i32( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f16_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.scvtf.f16i32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.scvtf.f16i32( %a, %pg, %b) + ret %out +} + +define @test_scvtf_f16_i64( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f16_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.scvtf.f16i64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.scvtf.f16i64( %a, %pg, %b) + ret %out +} + +define @test_scvtf_f32_i64( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f32_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.scvtf.f32i64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.scvtf.f32i64( %a, %pg, %b) + ret %out +} + +define @test_scvtf_f64_i32( %a, %b) { +; CHECK-LABEL: define @test_scvtf_f64_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.scvtf.f64i32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.scvtf.f64i32( %a, %pg, %b) + ret %out +} + +define @test_ucvtf( %a, %b) { +; CHECK-LABEL: define @test_ucvtf( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( %a, %pg, %b) + ret %out +} + +define @test_ucvtf_f16_i32( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f16_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.ucvtf.f16i32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.ucvtf.f16i32( %a, %pg, %b) + ret %out +} + +define @test_ucvtf_f16_i64( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f16_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.ucvtf.f16i64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.ucvtf.f16i64( %a, %pg, %b) + ret %out +} + +define @test_ucvtf_f32_i64( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f32_i64( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.ucvtf.f32i64( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.ucvtf.f32i64( %a, %pg, %b) + ret %out +} + +define @test_ucvtf_f64_i32( %a, %b) { +; CHECK-LABEL: define @test_ucvtf_f64_i32( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.ucvtf.f64i32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.ucvtf.f64i32( %a, %pg, %b) + ret %out +} diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll index eb01ff5dacab5..9b1528eda8ffd 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll @@ -133,7 +133,7 @@ define @test_fcvtzs( %a, [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: ret [[A]] ; - %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, zeroinitializer, %b) + %out = call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( %a, zeroinitializer, %b) ret %out } @@ -178,7 +178,7 @@ define @test_fcvtzu( %a, [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: ret [[A]] ; - %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, zeroinitializer, %b) + %out = call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( %a, zeroinitializer, %b) ret %out } From c6b530e0ce857ce4706643f0554423efa43665d0 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 27 Aug 2024 11:37:50 +0000 Subject: [PATCH 3/4] Fix comment and add poison value check --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 52ec5d4c0a8c4..4f8e60b9e94ad 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1087,12 +1087,13 @@ instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II, return std::nullopt; } -// Simplify unary operation where predicate has all inactive lanes or try to -// replace with _x form when all lanes are active +// Simplify unary operation where predicate has all inactive lanes or +// replace unused first operand with undef when all lanes are active static std::optional instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { if (isAllActivePredicate(II.getOperand(1)) && - !isa(II.getOperand(0))) { + !isa(II.getOperand(0)) && + !isa(II.getOperand(0)) { Value *Undef = llvm::UndefValue::get(II.getType()); return IC.replaceOperand(II, 0, Undef); } From e5340ce53edf1da5b51f1a4c2df963ae726a4db0 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Wed, 28 Aug 2024 14:03:11 +0000 Subject: [PATCH 4/4] add tests for poison/undef and fix intrinsics name --- .../AArch64/AArch64TargetTransformInfo.cpp | 2 +- ...sve-intrinsic-comb-all-active-lanes-cvt.ll | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 4f8e60b9e94ad..e447440713530 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1093,7 +1093,7 @@ static std::optional instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { if (isAllActivePredicate(II.getOperand(1)) && !isa(II.getOperand(0)) && - !isa(II.getOperand(0)) { + !isa(II.getOperand(0))) { Value *Undef = llvm::UndefValue::get(II.getType()); return IC.replaceOperand(II, 0, Undef); } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll index 40da5c5755335..374a985191768 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -2,6 +2,30 @@ ; RUN: opt -S -passes=instcombine < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" +define @test_fcvt_bf16_f32_undef( %a, %b) { +; CHECK-LABEL: define @test_fcvt_bf16_f32_undef( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32( undef, %pg, %b) + ret %out +} + +define @test_fcvt_bf16_f32_poison( %a, %b) { +; CHECK-LABEL: define @test_fcvt_bf16_f32_poison( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( poison, [[PG]], [[B]]) +; CHECK-NEXT: ret [[OUT]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32( poison, %pg, %b) + ret %out +} + define @test_fcvt_bf16_f32( %a, %b) { ; CHECK-LABEL: define @test_fcvt_bf16_f32( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { @@ -178,7 +202,7 @@ define @test_fcvtzs( %a, [[OUT]] ; %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, %pg, %b) + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i16.nxv4f16( %a, %pg, %b) ret %out }