diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b68291786..13b5e578391de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,23 @@ static std::optional instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active +static std::optional +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m + return IC.replaceInstUsesWith(II, II.getOperand(1)); + } + return instCombineSVEAllActive(II, IID); +} + static std::optional instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub( @@ -1423,7 +1437,8 @@ static std::optional instCombineSVEVectorAdd(InstCombiner &IC, static std::optional instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub( @@ -1523,11 +1540,6 @@ static std::optional instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mla_u>( IC, II, true); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: @@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mls_u>( IC, II, true); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll new file mode 100644 index 0000000000000..463a5f5d2cfb5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll @@ -0,0 +1,1324 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Replace SVE _m intrinsics with their first operand when the predicate is all false. + +; Float arithmetic + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd. + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +define @replace_fadd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +define @replace_fadd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +define @replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul. + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +define @replace_fmul_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +define @replace_fmul_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +define @replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv8f16(, , ) +define @replace_fmulx_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) +define @replace_fmulx_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +define @replace_fmulx_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub. + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +define @replace_fsub_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +define @replace_fsub_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) +define @replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +; Integer arithmetic + +declare @llvm.aarch64.sve.add.nxv16i8(, , ) +define @replace_add_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv8i16(, , ) +define @replace_add_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv4i32(, , ) +define @replace_add_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv2i64(, , ) +define @replace_add_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +define @replace_mul_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +define @replace_mul_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +define @replace_mul_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +define @replace_mul_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv16i8(, , ) +define @replace_sub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv8i16(, , ) +define @replace_sub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv4i32(, , ) +define @replace_sub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv2i64(, , ) +define @replace_sub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret [[A]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +attributes #0 = { "target-features"="+sve,+sve2" }