From c408a24040ecce659955f1a623a66e9fae49ab54 Mon Sep 17 00:00:00 2001 From: Mark Harley Date: Thu, 30 Nov 2023 16:17:54 +0000 Subject: [PATCH 1/5] [AArch64][SVE][NFC] Added tests for SVE intrinsic calls with all false predicates --- ...-intrinsic-comb-m-forms-no-active-lanes.ll | 1454 +++++++++++++++++ ...-intrinsic-comb-u-forms-no-active-lanes.ll | 1448 ++++++++++++++++ 2 files changed, 2902 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll new file mode 100644 index 0000000000000..5ea6fff12134c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll @@ -0,0 +1,1454 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Replace SVE _m intrinsics with their first operand when the predicate is all false. + +; Float arithmetic + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd. + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +define @replace_fadd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +define @replace_fadd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +define @replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul. + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +define @replace_fmul_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +define @replace_fmul_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +define @replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv8f16(, , ) +define @replace_fmulx_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv4f32(, , ) +define @replace_fmulx_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.nxv2f64(, , ) +define @replace_fmulx_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub. + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +define @replace_fsub_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +define @replace_fsub_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) +define @replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +; Integer arithmetic + +declare @llvm.aarch64.sve.add.nxv16i8(, , ) +define @replace_add_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv8i16(, , ) +define @replace_add_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv4i32(, , ) +define @replace_add_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.nxv2i64(, , ) +define @replace_add_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv16i8(, , ) +define @replace_mul_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +define @replace_mul_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv4i32(, , ) +define @replace_mul_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +define @replace_mul_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv16i8(, , ) +define @replace_sub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv8i16(, , ) +define @replace_sub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv4i32(, , ) +define @replace_sub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.nxv2i64(, , ) +define @replace_sub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +attributes #0 = { "target-features"="+sve,+sve2" } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll new file mode 100644 index 0000000000000..d3e51231fb52c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll @@ -0,0 +1,1448 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Replace SVE _u intrinsics with undef if the predicate is all false. + +; Float arithmetic + +declare @llvm.aarch64.sve.fabd.u.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.u.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fabd.u.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) +define @replace_fadd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) +define @replace_fadd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) +define @replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fadd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.u.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.u.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fdiv.u.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.u.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.u.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmax.u.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.u.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.u.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmaxnm.u.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.u.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.u.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmin.u.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.u.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.u.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fminnm.u.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.u.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.u.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmla.u.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.u.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.u.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmls.u.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) +define @replace_fmul_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) +define @replace_fmul_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) +define @replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmul_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.u.nxv8f16(, , ) +define @replace_fmulx_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.u.nxv4f32(, , ) +define @replace_fmulx_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fmulx.u.nxv2f64(, , ) +define @replace_fmulx_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmulx_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.u.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.u.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmla.u.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.u.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.u.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fnmls.u.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) +define @replace_fsub_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) +define @replace_fsub_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) +define @replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fsub_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, %a, %b) + ret %1 +} + +; Integer arithmetic + +declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) +define @replace_add_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.u.nxv8i16(, , ) +define @replace_add_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.u.nxv4i32(, , ) +define @replace_add_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.add.u.nxv2i64(, , ) +define @replace_add_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_add_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mla.u.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.u.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.u.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mla.u.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.u.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.u.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.u.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mls.u.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, %a, %b, %c) + ret %1 +} + +declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) +define @replace_mul_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +define @replace_mul_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +define @replace_mul_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) +define @replace_mul_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_mul_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.u.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.u.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.u.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smax.u.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.u.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.u.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.u.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smin.u.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.u.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.u.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.u.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.smulh.u.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.u.nxv16i8(, , ) +define @replace_sub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.u.nxv8i16(, , ) +define @replace_sub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.u.nxv4i32(, , ) +define @replace_sub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sub.u.nxv2i64(, , ) +define @replace_sub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.u.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.u.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.u.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uabd.u.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.u.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.u.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.u.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umax.u.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.u.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.u.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.u.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umin.u.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.u.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.u.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.u.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.umulh.u.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.asr.u.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.u.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.u.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.u.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsl.u.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.u.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.u.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.u.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.and.u.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.u.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.u.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.u.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.bic.u.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.u.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.u.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.u.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.eor.u.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.u.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.u.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.u.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.orr.u.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, %a, %b) + ret %1 +} + +declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, %a, %b) + ret %1 +} + +attributes #0 = { "target-features"="+sve,+sve2" } From 39b1af8c622e4521fcf7abe46cad5f3994350cf6 Mon Sep 17 00:00:00 2001 From: Mark Harley Date: Thu, 30 Nov 2023 17:16:14 +0000 Subject: [PATCH 2/5] [AArch64][SVE] Added optimisation for SVE intrinsics with no active lanes --- .../AArch64/AArch64TargetTransformInfo.cpp | 166 ++++++-- ...-intrinsic-comb-m-forms-no-active-lanes.ll | 390 ++++++------------ ...-intrinsic-comb-u-forms-no-active-lanes.ll | 390 ++++++------------ 3 files changed, 382 insertions(+), 564 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b68291786..a0e42a183f1d9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,30 @@ static std::optional instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Optimize operations that take an all false predicate or send them for +// canonicalization. +static std::optional +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + if (II.getIntrinsicID() != IID) + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m or sv[func]_z + return IC.replaceInstUsesWith(II, II.getOperand(1)); + else + // llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes + // are inactive for sv[func]_x + return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); + } + if (II.getIntrinsicID() != IID) + return instCombineSVEAllActive(II, IID); + return std::nullopt; +} + static std::optional instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub( @@ -1421,9 +1442,22 @@ static std::optional instCombineSVEVectorAdd(InstCombiner &IC, return std::nullopt; } +static std::optional +instCombineSVEVectorAddU(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) + return II_U; + else { + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); + } +} + static std::optional instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) + return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1465,7 +1502,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) + return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1507,7 +1548,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub( @@ -1516,6 +1558,18 @@ static std::optional instCombineSVEVectorSub(InstCombiner &IC, return std::nullopt; } +static std::optional +instCombineSVEVectorSubU(InstCombiner &IC, IntrinsicInst &II) { + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) + return II_U; + else { + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); + } +} + static std::optional instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID) { @@ -1523,10 +1577,8 @@ static std::optional instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; + if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID)) + return II_U; // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { @@ -1891,34 +1943,45 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + case Intrinsic::aarch64_sve_fabd_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + case Intrinsic::aarch64_sve_fdiv_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + case Intrinsic::aarch64_sve_fmax_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + case Intrinsic::aarch64_sve_fmaxnm_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + case Intrinsic::aarch64_sve_fmin_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + case Intrinsic::aarch64_sve_fminnm_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + case Intrinsic::aarch64_sve_fmla_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + case Intrinsic::aarch64_sve_fmls_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + case Intrinsic::aarch64_sve_fmulx_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + case Intrinsic::aarch64_sve_fnmla_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + case Intrinsic::aarch64_sve_fnmls_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1926,56 +1989,71 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_add: return instCombineSVEVectorAdd(IC, II); case Intrinsic::aarch64_sve_add_u: - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); + return instCombineSVEVectorAddU(IC, II); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + case Intrinsic::aarch64_sve_mla_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + case Intrinsic::aarch64_sve_mls_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + case Intrinsic::aarch64_sve_sabd_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + case Intrinsic::aarch64_sve_smax_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + case Intrinsic::aarch64_sve_smin_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + case Intrinsic::aarch64_sve_smulh_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); + return instCombineSVEVectorSubU(IC, II); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + case Intrinsic::aarch64_sve_uabd_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + case Intrinsic::aarch64_sve_umax_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + case Intrinsic::aarch64_sve_umin_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + case Intrinsic::aarch64_sve_umulh_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + case Intrinsic::aarch64_sve_asr_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + case Intrinsic::aarch64_sve_lsl_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + case Intrinsic::aarch64_sve_lsr_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + case Intrinsic::aarch64_sve_and_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + case Intrinsic::aarch64_sve_bic_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + case Intrinsic::aarch64_sve_eor_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + case Intrinsic::aarch64_sve_orr_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + case Intrinsic::aarch64_sve_sqsub_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + case Intrinsic::aarch64_sve_uqsub_u: + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll index 5ea6fff12134c..463a5f5d2cfb5 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll @@ -11,8 +11,7 @@ declare @llvm.aarch64.sve.fabd.nxv8f16(, @replace_fabd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -22,8 +21,7 @@ declare @llvm.aarch64.sve.fabd.nxv4f32(, < define @replace_fabd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -33,8 +31,7 @@ declare @llvm.aarch64.sve.fabd.nxv2f64(, define @replace_fabd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -46,8 +43,7 @@ declare @llvm.aarch64.sve.fadd.nxv8f16(, @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -57,8 +53,7 @@ declare @llvm.aarch64.sve.fadd.nxv4f32(, < define @replace_fadd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -68,8 +63,7 @@ declare @llvm.aarch64.sve.fadd.nxv2f64(, define @replace_fadd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -79,8 +73,7 @@ declare @llvm.aarch64.sve.fdiv.nxv8f16(, @replace_fdiv_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -90,8 +83,7 @@ declare @llvm.aarch64.sve.fdiv.nxv4f32(, < define @replace_fdiv_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -101,8 +93,7 @@ declare @llvm.aarch64.sve.fdiv.nxv2f64(, define @replace_fdiv_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -112,8 +103,7 @@ declare @llvm.aarch64.sve.fmax.nxv8f16(, @replace_fmax_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -123,8 +113,7 @@ declare @llvm.aarch64.sve.fmax.nxv4f32(, < define @replace_fmax_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -134,8 +123,7 @@ declare @llvm.aarch64.sve.fmax.nxv2f64(, define @replace_fmax_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -145,8 +133,7 @@ declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -156,8 +143,7 @@ declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -167,8 +153,7 @@ declare @llvm.aarch64.sve.fmaxnm.nxv2f64( define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -178,8 +163,7 @@ declare @llvm.aarch64.sve.fmin.nxv8f16(, @replace_fmin_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -189,8 +173,7 @@ declare @llvm.aarch64.sve.fmin.nxv4f32(, < define @replace_fmin_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -200,8 +183,7 @@ declare @llvm.aarch64.sve.fmin.nxv2f64(, define @replace_fmin_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -211,8 +193,7 @@ declare @llvm.aarch64.sve.fminnm.nxv8f16(, define @replace_fminnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -222,8 +203,7 @@ declare @llvm.aarch64.sve.fminnm.nxv4f32(, define @replace_fminnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -233,8 +213,7 @@ declare @llvm.aarch64.sve.fminnm.nxv2f64( define @replace_fminnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -244,8 +223,7 @@ declare @llvm.aarch64.sve.fmla.nxv8f16(, @replace_fmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -255,8 +233,7 @@ declare @llvm.aarch64.sve.fmla.nxv4f32(, < define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -266,8 +243,7 @@ declare @llvm.aarch64.sve.fmla.nxv2f64(, define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -277,8 +253,7 @@ declare @llvm.aarch64.sve.fmls.nxv8f16(, @replace_fmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -288,8 +263,7 @@ declare @llvm.aarch64.sve.fmls.nxv4f32(, < define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -299,8 +273,7 @@ declare @llvm.aarch64.sve.fmls.nxv2f64(, define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -312,8 +285,7 @@ declare @llvm.aarch64.sve.fmul.nxv8f16(, @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -323,8 +295,7 @@ declare @llvm.aarch64.sve.fmul.nxv4f32(, < define @replace_fmul_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -334,8 +305,7 @@ declare @llvm.aarch64.sve.fmul.nxv2f64(, define @replace_fmul_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -345,8 +315,7 @@ declare @llvm.aarch64.sve.fmulx.nxv8f16(, < define @replace_fmulx_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -356,8 +325,7 @@ declare @llvm.aarch64.sve.fmulx.nxv4f32(, define @replace_fmulx_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -367,8 +335,7 @@ declare @llvm.aarch64.sve.fmulx.nxv2f64(, define @replace_fmulx_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -378,8 +345,7 @@ declare @llvm.aarch64.sve.fnmla.nxv8f16(, < define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -389,8 +355,7 @@ declare @llvm.aarch64.sve.fnmla.nxv4f32(, define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -400,8 +365,7 @@ declare @llvm.aarch64.sve.fnmla.nxv2f64(, define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -411,8 +375,7 @@ declare @llvm.aarch64.sve.fnmls.nxv8f16(, < define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -422,8 +385,7 @@ declare @llvm.aarch64.sve.fnmls.nxv4f32(, define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -433,8 +395,7 @@ declare @llvm.aarch64.sve.fnmls.nxv2f64(, define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -446,8 +407,7 @@ declare @llvm.aarch64.sve.fsub.nxv8f16(, @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -457,8 +417,7 @@ declare @llvm.aarch64.sve.fsub.nxv4f32(, < define @replace_fsub_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -468,8 +427,7 @@ declare @llvm.aarch64.sve.fsub.nxv2f64(, define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -481,8 +439,7 @@ declare @llvm.aarch64.sve.add.nxv16i8(, @replace_add_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.add.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -492,8 +449,7 @@ declare @llvm.aarch64.sve.add.nxv8i16(, @replace_add_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.add.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -503,8 +459,7 @@ declare @llvm.aarch64.sve.add.nxv4i32(, @replace_add_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.add.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -514,8 +469,7 @@ declare @llvm.aarch64.sve.add.nxv2i64(, @replace_add_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.add.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -525,8 +479,7 @@ declare @llvm.aarch64.sve.mla.nxv16i8(, @replace_mla_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mla.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -536,8 +489,7 @@ declare @llvm.aarch64.sve.mla.nxv8i16(, @replace_mla_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mla.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -547,8 +499,7 @@ declare @llvm.aarch64.sve.mla.nxv4i32(, @replace_mla_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mla.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -558,8 +509,7 @@ declare @llvm.aarch64.sve.mla.nxv2i64(, @replace_mla_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mla.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -569,8 +519,7 @@ declare @llvm.aarch64.sve.mls.nxv16i8(, @replace_mls_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mls.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -580,8 +529,7 @@ declare @llvm.aarch64.sve.mls.nxv8i16(, @replace_mls_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mls.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -591,8 +539,7 @@ declare @llvm.aarch64.sve.mls.nxv4i32(, @replace_mls_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mls.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -602,8 +549,7 @@ declare @llvm.aarch64.sve.mls.nxv2i64(, @replace_mls_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mls.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -613,8 +559,7 @@ declare @llvm.aarch64.sve.mul.nxv16i8(, @replace_mul_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mul.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -624,8 +569,7 @@ declare @llvm.aarch64.sve.mul.nxv8i16(, @replace_mul_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mul.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -635,8 +579,7 @@ declare @llvm.aarch64.sve.mul.nxv4i32(, @replace_mul_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mul.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -646,8 +589,7 @@ declare @llvm.aarch64.sve.mul.nxv2i64(, @replace_mul_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.mul.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -657,8 +599,7 @@ declare @llvm.aarch64.sve.sabd.nxv16i8(, @replace_sabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sabd.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -668,8 +609,7 @@ declare @llvm.aarch64.sve.sabd.nxv8i16(, @replace_sabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sabd.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -679,8 +619,7 @@ declare @llvm.aarch64.sve.sabd.nxv4i32(, @replace_sabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sabd.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -690,8 +629,7 @@ declare @llvm.aarch64.sve.sabd.nxv2i64(, @replace_sabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sabd.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -701,8 +639,7 @@ declare @llvm.aarch64.sve.smax.nxv16i8(, @replace_smax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smax.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -712,8 +649,7 @@ declare @llvm.aarch64.sve.smax.nxv8i16(, @replace_smax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smax.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -723,8 +659,7 @@ declare @llvm.aarch64.sve.smax.nxv4i32(, @replace_smax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smax.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -734,8 +669,7 @@ declare @llvm.aarch64.sve.smax.nxv2i64(, @replace_smax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smax.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -745,8 +679,7 @@ declare @llvm.aarch64.sve.smin.nxv16i8(, @replace_smin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smin.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -756,8 +689,7 @@ declare @llvm.aarch64.sve.smin.nxv8i16(, @replace_smin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smin.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -767,8 +699,7 @@ declare @llvm.aarch64.sve.smin.nxv4i32(, @replace_smin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smin.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -778,8 +709,7 @@ declare @llvm.aarch64.sve.smin.nxv2i64(, @replace_smin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smin.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -789,8 +719,7 @@ declare @llvm.aarch64.sve.smulh.nxv16i8(, < define @replace_smulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smulh.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -800,8 +729,7 @@ declare @llvm.aarch64.sve.smulh.nxv8i16(, @replace_smulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smulh.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -811,8 +739,7 @@ declare @llvm.aarch64.sve.smulh.nxv4i32(, @replace_smulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smulh.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -822,8 +749,7 @@ declare @llvm.aarch64.sve.smulh.nxv2i64(, @replace_smulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.smulh.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -833,8 +759,7 @@ declare @llvm.aarch64.sve.sub.nxv16i8(, @replace_sub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sub.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -844,8 +769,7 @@ declare @llvm.aarch64.sve.sub.nxv8i16(, @replace_sub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sub.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -855,8 +779,7 @@ declare @llvm.aarch64.sve.sub.nxv4i32(, @replace_sub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sub.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -866,8 +789,7 @@ declare @llvm.aarch64.sve.sub.nxv2i64(, @replace_sub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sub.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -877,8 +799,7 @@ declare @llvm.aarch64.sve.uabd.nxv16i8(, @replace_uabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uabd.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -888,8 +809,7 @@ declare @llvm.aarch64.sve.uabd.nxv8i16(, @replace_uabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uabd.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -899,8 +819,7 @@ declare @llvm.aarch64.sve.uabd.nxv4i32(, @replace_uabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uabd.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -910,8 +829,7 @@ declare @llvm.aarch64.sve.uabd.nxv2i64(, @replace_uabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uabd.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -921,8 +839,7 @@ declare @llvm.aarch64.sve.umax.nxv16i8(, @replace_umax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umax.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -932,8 +849,7 @@ declare @llvm.aarch64.sve.umax.nxv8i16(, @replace_umax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umax.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -943,8 +859,7 @@ declare @llvm.aarch64.sve.umax.nxv4i32(, @replace_umax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umax.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -954,8 +869,7 @@ declare @llvm.aarch64.sve.umax.nxv2i64(, @replace_umax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umax.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -965,8 +879,7 @@ declare @llvm.aarch64.sve.umin.nxv16i8(, @replace_umin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umin.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -976,8 +889,7 @@ declare @llvm.aarch64.sve.umin.nxv8i16(, @replace_umin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umin.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -987,8 +899,7 @@ declare @llvm.aarch64.sve.umin.nxv4i32(, @replace_umin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umin.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -998,8 +909,7 @@ declare @llvm.aarch64.sve.umin.nxv2i64(, @replace_umin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umin.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1009,8 +919,7 @@ declare @llvm.aarch64.sve.umulh.nxv16i8(, < define @replace_umulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umulh.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1020,8 +929,7 @@ declare @llvm.aarch64.sve.umulh.nxv8i16(, @replace_umulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umulh.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1031,8 +939,7 @@ declare @llvm.aarch64.sve.umulh.nxv4i32(, @replace_umulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umulh.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1042,8 +949,7 @@ declare @llvm.aarch64.sve.umulh.nxv2i64(, @replace_umulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.umulh.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1055,8 +961,7 @@ declare @llvm.aarch64.sve.asr.nxv16i8(, @replace_asr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.asr.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1066,8 +971,7 @@ declare @llvm.aarch64.sve.asr.nxv8i16(, @replace_asr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.asr.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1077,8 +981,7 @@ declare @llvm.aarch64.sve.asr.nxv4i32(, @replace_asr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.asr.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1088,8 +991,7 @@ declare @llvm.aarch64.sve.asr.nxv2i64(, @replace_asr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.asr.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1099,8 +1001,7 @@ declare @llvm.aarch64.sve.lsl.nxv16i8(, @replace_lsl_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsl.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1110,8 +1011,7 @@ declare @llvm.aarch64.sve.lsl.nxv8i16(, @replace_lsl_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsl.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1121,8 +1021,7 @@ declare @llvm.aarch64.sve.lsl.nxv4i32(, @replace_lsl_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsl.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1132,8 +1031,7 @@ declare @llvm.aarch64.sve.lsl.nxv2i64(, @replace_lsl_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsl.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1143,8 +1041,7 @@ declare @llvm.aarch64.sve.lsr.nxv16i8(, @replace_lsr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsr.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1154,8 +1051,7 @@ declare @llvm.aarch64.sve.lsr.nxv8i16(, @replace_lsr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsr.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1165,8 +1061,7 @@ declare @llvm.aarch64.sve.lsr.nxv4i32(, @replace_lsr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsr.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1176,8 +1071,7 @@ declare @llvm.aarch64.sve.lsr.nxv2i64(, @replace_lsr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.lsr.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1189,8 +1083,7 @@ declare @llvm.aarch64.sve.and.nxv16i8(, @replace_and_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.and.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1200,8 +1093,7 @@ declare @llvm.aarch64.sve.and.nxv8i16(, @replace_and_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.and.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1211,8 +1103,7 @@ declare @llvm.aarch64.sve.and.nxv4i32(, @replace_and_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.and.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1222,8 +1113,7 @@ declare @llvm.aarch64.sve.and.nxv2i64(, @replace_and_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.and.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1233,8 +1123,7 @@ declare @llvm.aarch64.sve.bic.nxv16i8(, @replace_bic_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.bic.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1244,8 +1133,7 @@ declare @llvm.aarch64.sve.bic.nxv8i16(, @replace_bic_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.bic.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1255,8 +1143,7 @@ declare @llvm.aarch64.sve.bic.nxv4i32(, @replace_bic_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.bic.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1266,8 +1153,7 @@ declare @llvm.aarch64.sve.bic.nxv2i64(, @replace_bic_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.bic.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1277,8 +1163,7 @@ declare @llvm.aarch64.sve.eor.nxv16i8(, @replace_eor_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.eor.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1288,8 +1173,7 @@ declare @llvm.aarch64.sve.eor.nxv8i16(, @replace_eor_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.eor.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1299,8 +1183,7 @@ declare @llvm.aarch64.sve.eor.nxv4i32(, @replace_eor_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.eor.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1310,8 +1193,7 @@ declare @llvm.aarch64.sve.eor.nxv2i64(, @replace_eor_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.eor.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1321,8 +1203,7 @@ declare @llvm.aarch64.sve.orr.nxv16i8(, @replace_orr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.orr.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1332,8 +1213,7 @@ declare @llvm.aarch64.sve.orr.nxv8i16(, @replace_orr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.orr.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1343,8 +1223,7 @@ declare @llvm.aarch64.sve.orr.nxv4i32(, @replace_orr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.orr.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1354,8 +1233,7 @@ declare @llvm.aarch64.sve.orr.nxv2i64(, @replace_orr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.orr.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1367,8 +1245,7 @@ declare @llvm.aarch64.sve.sqsub.nxv16i8(, < define @replace_sqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1378,8 +1255,7 @@ declare @llvm.aarch64.sve.sqsub.nxv8i16(, @replace_sqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1389,8 +1265,7 @@ declare @llvm.aarch64.sve.sqsub.nxv4i32(, @replace_sqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1400,8 +1275,7 @@ declare @llvm.aarch64.sve.sqsub.nxv2i64(, @replace_sqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1411,8 +1285,7 @@ declare @llvm.aarch64.sve.uqsub.nxv16i8(, < define @replace_uqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1422,8 +1295,7 @@ declare @llvm.aarch64.sve.uqsub.nxv8i16(, @replace_uqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1433,8 +1305,7 @@ declare @llvm.aarch64.sve.uqsub.nxv4i32(, @replace_uqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1444,8 +1315,7 @@ declare @llvm.aarch64.sve.uqsub.nxv2i64(, @replace_uqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret [[A]] ; %1 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( zeroinitializer, %a, %b) ret %1 diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll index d3e51231fb52c..7a01dce5e2128 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll @@ -11,8 +11,7 @@ declare @llvm.aarch64.sve.fabd.u.nxv8f16(, define @replace_fabd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -22,8 +21,7 @@ declare @llvm.aarch64.sve.fabd.u.nxv4f32(, define @replace_fabd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -33,8 +31,7 @@ declare @llvm.aarch64.sve.fabd.u.nxv2f64( define @replace_fabd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -44,8 +41,7 @@ declare @llvm.aarch64.sve.fadd.u.nxv8f16(, define @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -55,8 +51,7 @@ declare @llvm.aarch64.sve.fadd.u.nxv4f32(, define @replace_fadd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -66,8 +61,7 @@ declare @llvm.aarch64.sve.fadd.u.nxv2f64( define @replace_fadd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -77,8 +71,7 @@ declare @llvm.aarch64.sve.fdiv.u.nxv8f16(, define @replace_fdiv_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -88,8 +81,7 @@ declare @llvm.aarch64.sve.fdiv.u.nxv4f32(, define @replace_fdiv_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -99,8 +91,7 @@ declare @llvm.aarch64.sve.fdiv.u.nxv2f64( define @replace_fdiv_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -110,8 +101,7 @@ declare @llvm.aarch64.sve.fmax.u.nxv8f16(, define @replace_fmax_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -121,8 +111,7 @@ declare @llvm.aarch64.sve.fmax.u.nxv4f32(, define @replace_fmax_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -132,8 +121,7 @@ declare @llvm.aarch64.sve.fmax.u.nxv2f64( define @replace_fmax_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -143,8 +131,7 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv8f16( define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -154,8 +141,7 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv4f32( @replace_fmaxnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -165,8 +151,7 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv2f64( @replace_fmaxnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -176,8 +161,7 @@ declare @llvm.aarch64.sve.fmin.u.nxv8f16(, define @replace_fmin_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -187,8 +171,7 @@ declare @llvm.aarch64.sve.fmin.u.nxv4f32(, define @replace_fmin_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -198,8 +181,7 @@ declare @llvm.aarch64.sve.fmin.u.nxv2f64( define @replace_fmin_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -209,8 +191,7 @@ declare @llvm.aarch64.sve.fminnm.u.nxv8f16( define @replace_fminnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -220,8 +201,7 @@ declare @llvm.aarch64.sve.fminnm.u.nxv4f32( @replace_fminnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -231,8 +211,7 @@ declare @llvm.aarch64.sve.fminnm.u.nxv2f64( @replace_fminnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -242,8 +221,7 @@ declare @llvm.aarch64.sve.fmla.u.nxv8f16(, define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -253,8 +231,7 @@ declare @llvm.aarch64.sve.fmla.u.nxv4f32(, define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -264,8 +241,7 @@ declare @llvm.aarch64.sve.fmla.u.nxv2f64( define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -275,8 +251,7 @@ declare @llvm.aarch64.sve.fmls.u.nxv8f16(, define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -286,8 +261,7 @@ declare @llvm.aarch64.sve.fmls.u.nxv4f32(, define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -297,8 +271,7 @@ declare @llvm.aarch64.sve.fmls.u.nxv2f64( define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -308,8 +281,7 @@ declare @llvm.aarch64.sve.fmul.u.nxv8f16(, define @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -319,8 +291,7 @@ declare @llvm.aarch64.sve.fmul.u.nxv4f32(, define @replace_fmul_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -330,8 +301,7 @@ declare @llvm.aarch64.sve.fmul.u.nxv2f64( define @replace_fmul_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -341,8 +311,7 @@ declare @llvm.aarch64.sve.fmulx.u.nxv8f16(, define @replace_fmulx_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -352,8 +321,7 @@ declare @llvm.aarch64.sve.fmulx.u.nxv4f32( define @replace_fmulx_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -363,8 +331,7 @@ declare @llvm.aarch64.sve.fmulx.u.nxv2f64( @replace_fmulx_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -374,8 +341,7 @@ declare @llvm.aarch64.sve.fnmla.u.nxv8f16(, define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -385,8 +351,7 @@ declare @llvm.aarch64.sve.fnmla.u.nxv4f32( define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -396,8 +361,7 @@ declare @llvm.aarch64.sve.fnmla.u.nxv2f64( @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -407,8 +371,7 @@ declare @llvm.aarch64.sve.fnmls.u.nxv8f16(, define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -418,8 +381,7 @@ declare @llvm.aarch64.sve.fnmls.u.nxv4f32( define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -429,8 +391,7 @@ declare @llvm.aarch64.sve.fnmls.u.nxv2f64( @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -440,8 +401,7 @@ declare @llvm.aarch64.sve.fsub.u.nxv8f16(, define @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -451,8 +411,7 @@ declare @llvm.aarch64.sve.fsub.u.nxv4f32(, define @replace_fsub_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -462,8 +421,7 @@ declare @llvm.aarch64.sve.fsub.u.nxv2f64( define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -475,8 +433,7 @@ declare @llvm.aarch64.sve.add.u.nxv16i8(, < define @replace_add_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -486,8 +443,7 @@ declare @llvm.aarch64.sve.add.u.nxv8i16(, @replace_add_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -497,8 +453,7 @@ declare @llvm.aarch64.sve.add.u.nxv4i32(, @replace_add_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -508,8 +463,7 @@ declare @llvm.aarch64.sve.add.u.nxv2i64(, @replace_add_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -519,8 +473,7 @@ declare @llvm.aarch64.sve.mla.u.nxv16i8(, < define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -530,8 +483,7 @@ declare @llvm.aarch64.sve.mla.u.nxv8i16(, @replace_mla_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -541,8 +493,7 @@ declare @llvm.aarch64.sve.mla.u.nxv4i32(, @replace_mla_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -552,8 +503,7 @@ declare @llvm.aarch64.sve.mla.u.nxv2i64(, @replace_mla_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -563,8 +513,7 @@ declare @llvm.aarch64.sve.mls.u.nxv16i8(, < define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -574,8 +523,7 @@ declare @llvm.aarch64.sve.mls.u.nxv8i16(, @replace_mls_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -585,8 +533,7 @@ declare @llvm.aarch64.sve.mls.u.nxv4i32(, @replace_mls_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -596,8 +543,7 @@ declare @llvm.aarch64.sve.mls.u.nxv2i64(, @replace_mls_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -607,8 +553,7 @@ declare @llvm.aarch64.sve.mul.u.nxv16i8(, < define @replace_mul_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -618,8 +563,7 @@ declare @llvm.aarch64.sve.mul.u.nxv8i16(, @replace_mul_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -629,8 +573,7 @@ declare @llvm.aarch64.sve.mul.u.nxv4i32(, @replace_mul_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -640,8 +583,7 @@ declare @llvm.aarch64.sve.mul.u.nxv2i64(, @replace_mul_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -651,8 +593,7 @@ declare @llvm.aarch64.sve.sabd.u.nxv16i8(, define @replace_sabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -662,8 +603,7 @@ declare @llvm.aarch64.sve.sabd.u.nxv8i16(, < define @replace_sabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -673,8 +613,7 @@ declare @llvm.aarch64.sve.sabd.u.nxv4i32(, < define @replace_sabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -684,8 +623,7 @@ declare @llvm.aarch64.sve.sabd.u.nxv2i64(, < define @replace_sabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -695,8 +633,7 @@ declare @llvm.aarch64.sve.smax.u.nxv16i8(, define @replace_smax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -706,8 +643,7 @@ declare @llvm.aarch64.sve.smax.u.nxv8i16(, < define @replace_smax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -717,8 +653,7 @@ declare @llvm.aarch64.sve.smax.u.nxv4i32(, < define @replace_smax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -728,8 +663,7 @@ declare @llvm.aarch64.sve.smax.u.nxv2i64(, < define @replace_smax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -739,8 +673,7 @@ declare @llvm.aarch64.sve.smin.u.nxv16i8(, define @replace_smin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -750,8 +683,7 @@ declare @llvm.aarch64.sve.smin.u.nxv8i16(, < define @replace_smin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -761,8 +693,7 @@ declare @llvm.aarch64.sve.smin.u.nxv4i32(, < define @replace_smin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -772,8 +703,7 @@ declare @llvm.aarch64.sve.smin.u.nxv2i64(, < define @replace_smin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -783,8 +713,7 @@ declare @llvm.aarch64.sve.smulh.u.nxv16i8(, define @replace_smulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -794,8 +723,7 @@ declare @llvm.aarch64.sve.smulh.u.nxv8i16(, define @replace_smulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -805,8 +733,7 @@ declare @llvm.aarch64.sve.smulh.u.nxv4i32(, define @replace_smulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -816,8 +743,7 @@ declare @llvm.aarch64.sve.smulh.u.nxv2i64(, define @replace_smulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -827,8 +753,7 @@ declare @llvm.aarch64.sve.sub.u.nxv16i8(, < define @replace_sub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -838,8 +763,7 @@ declare @llvm.aarch64.sve.sub.u.nxv8i16(, @replace_sub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -849,8 +773,7 @@ declare @llvm.aarch64.sve.sub.u.nxv4i32(, @replace_sub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -860,8 +783,7 @@ declare @llvm.aarch64.sve.sub.u.nxv2i64(, @replace_sub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -871,8 +793,7 @@ declare @llvm.aarch64.sve.uabd.u.nxv16i8(, define @replace_uabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -882,8 +803,7 @@ declare @llvm.aarch64.sve.uabd.u.nxv8i16(, < define @replace_uabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -893,8 +813,7 @@ declare @llvm.aarch64.sve.uabd.u.nxv4i32(, < define @replace_uabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -904,8 +823,7 @@ declare @llvm.aarch64.sve.uabd.u.nxv2i64(, < define @replace_uabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -915,8 +833,7 @@ declare @llvm.aarch64.sve.umax.u.nxv16i8(, define @replace_umax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -926,8 +843,7 @@ declare @llvm.aarch64.sve.umax.u.nxv8i16(, < define @replace_umax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -937,8 +853,7 @@ declare @llvm.aarch64.sve.umax.u.nxv4i32(, < define @replace_umax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -948,8 +863,7 @@ declare @llvm.aarch64.sve.umax.u.nxv2i64(, < define @replace_umax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -959,8 +873,7 @@ declare @llvm.aarch64.sve.umin.u.nxv16i8(, define @replace_umin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -970,8 +883,7 @@ declare @llvm.aarch64.sve.umin.u.nxv8i16(, < define @replace_umin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -981,8 +893,7 @@ declare @llvm.aarch64.sve.umin.u.nxv4i32(, < define @replace_umin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -992,8 +903,7 @@ declare @llvm.aarch64.sve.umin.u.nxv2i64(, < define @replace_umin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1003,8 +913,7 @@ declare @llvm.aarch64.sve.umulh.u.nxv16i8(, define @replace_umulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1014,8 +923,7 @@ declare @llvm.aarch64.sve.umulh.u.nxv8i16(, define @replace_umulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1025,8 +933,7 @@ declare @llvm.aarch64.sve.umulh.u.nxv4i32(, define @replace_umulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1036,8 +943,7 @@ declare @llvm.aarch64.sve.umulh.u.nxv2i64(, define @replace_umulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1049,8 +955,7 @@ declare @llvm.aarch64.sve.asr.u.nxv16i8(, < define @replace_asr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1060,8 +965,7 @@ declare @llvm.aarch64.sve.asr.u.nxv8i16(, @replace_asr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1071,8 +975,7 @@ declare @llvm.aarch64.sve.asr.u.nxv4i32(, @replace_asr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1082,8 +985,7 @@ declare @llvm.aarch64.sve.asr.u.nxv2i64(, @replace_asr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1093,8 +995,7 @@ declare @llvm.aarch64.sve.lsl.u.nxv16i8(, < define @replace_lsl_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1104,8 +1005,7 @@ declare @llvm.aarch64.sve.lsl.u.nxv8i16(, @replace_lsl_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1115,8 +1015,7 @@ declare @llvm.aarch64.sve.lsl.u.nxv4i32(, @replace_lsl_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1126,8 +1025,7 @@ declare @llvm.aarch64.sve.lsl.u.nxv2i64(, @replace_lsl_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1137,8 +1035,7 @@ declare @llvm.aarch64.sve.lsr.u.nxv16i8(, < define @replace_lsr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1148,8 +1045,7 @@ declare @llvm.aarch64.sve.lsr.u.nxv8i16(, @replace_lsr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1159,8 +1055,7 @@ declare @llvm.aarch64.sve.lsr.u.nxv4i32(, @replace_lsr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1170,8 +1065,7 @@ declare @llvm.aarch64.sve.lsr.u.nxv2i64(, @replace_lsr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1183,8 +1077,7 @@ declare @llvm.aarch64.sve.and.u.nxv16i8(, < define @replace_and_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1194,8 +1087,7 @@ declare @llvm.aarch64.sve.and.u.nxv8i16(, @replace_and_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1205,8 +1097,7 @@ declare @llvm.aarch64.sve.and.u.nxv4i32(, @replace_and_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1216,8 +1107,7 @@ declare @llvm.aarch64.sve.and.u.nxv2i64(, @replace_and_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1227,8 +1117,7 @@ declare @llvm.aarch64.sve.bic.u.nxv16i8(, < define @replace_bic_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1238,8 +1127,7 @@ declare @llvm.aarch64.sve.bic.u.nxv8i16(, @replace_bic_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1249,8 +1137,7 @@ declare @llvm.aarch64.sve.bic.u.nxv4i32(, @replace_bic_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1260,8 +1147,7 @@ declare @llvm.aarch64.sve.bic.u.nxv2i64(, @replace_bic_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1271,8 +1157,7 @@ declare @llvm.aarch64.sve.eor.u.nxv16i8(, < define @replace_eor_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1282,8 +1167,7 @@ declare @llvm.aarch64.sve.eor.u.nxv8i16(, @replace_eor_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1293,8 +1177,7 @@ declare @llvm.aarch64.sve.eor.u.nxv4i32(, @replace_eor_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1304,8 +1187,7 @@ declare @llvm.aarch64.sve.eor.u.nxv2i64(, @replace_eor_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1315,8 +1197,7 @@ declare @llvm.aarch64.sve.orr.u.nxv16i8(, < define @replace_orr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1326,8 +1207,7 @@ declare @llvm.aarch64.sve.orr.u.nxv8i16(, @replace_orr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1337,8 +1217,7 @@ declare @llvm.aarch64.sve.orr.u.nxv4i32(, @replace_orr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1348,8 +1227,7 @@ declare @llvm.aarch64.sve.orr.u.nxv2i64(, @replace_orr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1361,8 +1239,7 @@ declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, define @replace_sqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1372,8 +1249,7 @@ declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, define @replace_sqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1383,8 +1259,7 @@ declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, define @replace_sqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1394,8 +1269,7 @@ declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, define @replace_sqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1405,8 +1279,7 @@ declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, define @replace_uqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1416,8 +1289,7 @@ declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, define @replace_uqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1427,8 +1299,7 @@ declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, define @replace_uqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1438,8 +1309,7 @@ declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, define @replace_uqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] +; CHECK-NEXT: ret undef ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 From fa66a70760f01cec75d1f1c2afa35f126751cf1e Mon Sep 17 00:00:00 2001 From: Mark Harley Date: Fri, 1 Dec 2023 15:25:06 +0000 Subject: [PATCH 3/5] [AArch64][SVE][NFC] Added braces on if block --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a0e42a183f1d9..2e05396c44f2c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1412,14 +1412,15 @@ static std::optional instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID) { if (match(II.getOperand(0), m_ZeroInt())) { - if (II.getIntrinsicID() != IID) + if (II.getIntrinsicID() != IID) { // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are // inactive for sv[func]_m or sv[func]_z return IC.replaceInstUsesWith(II, II.getOperand(1)); - else + } else { // llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes // are inactive for sv[func]_x return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); + } } if (II.getIntrinsicID() != IID) return instCombineSVEAllActive(II, IID); From 2cbc47b7e67f8ed58256c24a1cd87e9af5662b2e Mon Sep 17 00:00:00 2001 From: Mark Harley Date: Thu, 14 Dec 2023 16:20:25 +0000 Subject: [PATCH 4/5] [AArch64][SVE] Removed uneccesary optimisations for _u type intrinsics This patch removes the optimisations for _u intrinsics will no active lanes as this should never occur. --- .../AArch64/AArch64TargetTransformInfo.cpp | 93 +---- ...-intrinsic-comb-u-forms-no-active-lanes.ll | 392 ++++++++++++------ 2 files changed, 276 insertions(+), 209 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 2e05396c44f2c..de427dfb06aec 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,25 +1406,17 @@ static std::optional instCombineSVEAllActive(IntrinsicInst &II, return &II; } -// Optimize operations that take an all false predicate or send them for -// canonicalization. +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active static std::optional instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID) { if (match(II.getOperand(0), m_ZeroInt())) { - if (II.getIntrinsicID() != IID) { - // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are - // inactive for sv[func]_m or sv[func]_z - return IC.replaceInstUsesWith(II, II.getOperand(1)); - } else { - // llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes - // are inactive for sv[func]_x - return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); - } + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m or sv[func]_z + return IC.replaceInstUsesWith(II, II.getOperand(1)); } - if (II.getIntrinsicID() != IID) - return instCombineSVEAllActive(II, IID); - return std::nullopt; + return instCombineSVEAllActive(II, IID); } static std::optional instCombineSVEVectorAdd(InstCombiner &IC, @@ -1443,18 +1435,6 @@ static std::optional instCombineSVEVectorAdd(InstCombiner &IC, return std::nullopt; } -static std::optional -instCombineSVEVectorAddU(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = - instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) - return II_U; - else { - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); - } -} - static std::optional instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { if (auto II_U = @@ -1480,9 +1460,6 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = - instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) - return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1526,9 +1503,6 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { static std::optional instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = - instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) - return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub(IC, II, @@ -1559,18 +1533,6 @@ static std::optional instCombineSVEVectorSub(InstCombiner &IC, return std::nullopt; } -static std::optional -instCombineSVEVectorSubU(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = - instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) - return II_U; - else { - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); - } -} - static std::optional instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID) { @@ -1578,8 +1540,9 @@ static std::optional instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID)) - return II_U; + if (II.getIntrinsicID() != IID) + if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID)) + return II_U; // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { @@ -1944,44 +1907,33 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - case Intrinsic::aarch64_sve_fabd_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - case Intrinsic::aarch64_sve_fdiv_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - case Intrinsic::aarch64_sve_fmax_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - case Intrinsic::aarch64_sve_fmaxnm_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - case Intrinsic::aarch64_sve_fmin_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - case Intrinsic::aarch64_sve_fminnm_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - case Intrinsic::aarch64_sve_fmla_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - case Intrinsic::aarch64_sve_fmls_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - case Intrinsic::aarch64_sve_fmulx_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - case Intrinsic::aarch64_sve_fnmla_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - case Intrinsic::aarch64_sve_fnmls_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); @@ -1990,70 +1942,55 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_add: return instCombineSVEVectorAdd(IC, II); case Intrinsic::aarch64_sve_add_u: - return instCombineSVEVectorAddU(IC, II); + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); case Intrinsic::aarch64_sve_mla: - case Intrinsic::aarch64_sve_mla_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - case Intrinsic::aarch64_sve_mls_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - case Intrinsic::aarch64_sve_sabd_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - case Intrinsic::aarch64_sve_smax_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - case Intrinsic::aarch64_sve_smin_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - case Intrinsic::aarch64_sve_smulh_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: - return instCombineSVEVectorSubU(IC, II); + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); case Intrinsic::aarch64_sve_uabd: - case Intrinsic::aarch64_sve_uabd_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - case Intrinsic::aarch64_sve_umax_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - case Intrinsic::aarch64_sve_umin_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - case Intrinsic::aarch64_sve_umulh_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - case Intrinsic::aarch64_sve_asr_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - case Intrinsic::aarch64_sve_lsl_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - case Intrinsic::aarch64_sve_lsr_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - case Intrinsic::aarch64_sve_and_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - case Intrinsic::aarch64_sve_bic_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - case Intrinsic::aarch64_sve_eor_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - case Intrinsic::aarch64_sve_orr_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - case Intrinsic::aarch64_sve_sqsub_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - case Intrinsic::aarch64_sve_uqsub_u: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll index 7a01dce5e2128..40deadbbb9bf3 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll @@ -11,7 +11,8 @@ declare @llvm.aarch64.sve.fabd.u.nxv8f16(, define @replace_fabd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -21,7 +22,8 @@ declare @llvm.aarch64.sve.fabd.u.nxv4f32(, define @replace_fabd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -31,7 +33,8 @@ declare @llvm.aarch64.sve.fabd.u.nxv2f64( define @replace_fabd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fabd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -41,7 +44,8 @@ declare @llvm.aarch64.sve.fadd.u.nxv8f16(, define @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -51,7 +55,8 @@ declare @llvm.aarch64.sve.fadd.u.nxv4f32(, define @replace_fadd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -61,7 +66,8 @@ declare @llvm.aarch64.sve.fadd.u.nxv2f64( define @replace_fadd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fadd_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -71,7 +77,8 @@ declare @llvm.aarch64.sve.fdiv.u.nxv8f16(, define @replace_fdiv_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -81,7 +88,8 @@ declare @llvm.aarch64.sve.fdiv.u.nxv4f32(, define @replace_fdiv_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -91,7 +99,8 @@ declare @llvm.aarch64.sve.fdiv.u.nxv2f64( define @replace_fdiv_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fdiv_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -101,7 +110,8 @@ declare @llvm.aarch64.sve.fmax.u.nxv8f16(, define @replace_fmax_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -111,7 +121,8 @@ declare @llvm.aarch64.sve.fmax.u.nxv4f32(, define @replace_fmax_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -121,7 +132,8 @@ declare @llvm.aarch64.sve.fmax.u.nxv2f64( define @replace_fmax_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmax_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -131,7 +143,8 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv8f16( define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -141,7 +154,8 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv4f32( @replace_fmaxnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -151,7 +165,8 @@ declare @llvm.aarch64.sve.fmaxnm.u.nxv2f64( @replace_fmaxnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -161,7 +176,8 @@ declare @llvm.aarch64.sve.fmin.u.nxv8f16(, define @replace_fmin_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -171,7 +187,8 @@ declare @llvm.aarch64.sve.fmin.u.nxv4f32(, define @replace_fmin_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -181,7 +198,8 @@ declare @llvm.aarch64.sve.fmin.u.nxv2f64( define @replace_fmin_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmin_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -191,7 +209,8 @@ declare @llvm.aarch64.sve.fminnm.u.nxv8f16( define @replace_fminnm_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -201,7 +220,8 @@ declare @llvm.aarch64.sve.fminnm.u.nxv4f32( @replace_fminnm_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -211,7 +231,8 @@ declare @llvm.aarch64.sve.fminnm.u.nxv2f64( @replace_fminnm_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fminnm_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -221,7 +242,8 @@ declare @llvm.aarch64.sve.fmla.u.nxv8f16(, define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -231,7 +253,8 @@ declare @llvm.aarch64.sve.fmla.u.nxv4f32(, define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -241,7 +264,8 @@ declare @llvm.aarch64.sve.fmla.u.nxv2f64( define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -251,7 +275,8 @@ declare @llvm.aarch64.sve.fmls.u.nxv8f16(, define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -261,7 +286,8 @@ declare @llvm.aarch64.sve.fmls.u.nxv4f32(, define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -271,7 +297,8 @@ declare @llvm.aarch64.sve.fmls.u.nxv2f64( define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -281,7 +308,8 @@ declare @llvm.aarch64.sve.fmul.u.nxv8f16(, define @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -291,7 +319,8 @@ declare @llvm.aarch64.sve.fmul.u.nxv4f32(, define @replace_fmul_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -301,7 +330,8 @@ declare @llvm.aarch64.sve.fmul.u.nxv2f64( define @replace_fmul_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmul_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -311,7 +341,8 @@ declare @llvm.aarch64.sve.fmulx.u.nxv8f16(, define @replace_fmulx_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -321,7 +352,8 @@ declare @llvm.aarch64.sve.fmulx.u.nxv4f32( define @replace_fmulx_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -331,7 +363,8 @@ declare @llvm.aarch64.sve.fmulx.u.nxv2f64( @replace_fmulx_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fmulx_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -341,7 +374,8 @@ declare @llvm.aarch64.sve.fnmla.u.nxv8f16(, define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -351,7 +385,8 @@ declare @llvm.aarch64.sve.fnmla.u.nxv4f32( define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -361,7 +396,8 @@ declare @llvm.aarch64.sve.fnmla.u.nxv2f64( @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmla_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -371,7 +407,8 @@ declare @llvm.aarch64.sve.fnmls.u.nxv8f16(, define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, %a, %b, %c) ret %1 @@ -381,7 +418,8 @@ declare @llvm.aarch64.sve.fnmls.u.nxv4f32( define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, %a, %b, %c) ret %1 @@ -391,7 +429,8 @@ declare @llvm.aarch64.sve.fnmls.u.nxv2f64( @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_fnmls_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, %a, %b, %c) ret %1 @@ -401,7 +440,8 @@ declare @llvm.aarch64.sve.fsub.u.nxv8f16(, define @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_half ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, %a, %b) ret %1 @@ -411,7 +451,8 @@ declare @llvm.aarch64.sve.fsub.u.nxv4f32(, define @replace_fsub_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_float ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, %a, %b) ret %1 @@ -421,7 +462,8 @@ declare @llvm.aarch64.sve.fsub.u.nxv2f64( define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: define @replace_fsub_intrinsic_double ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, %a, %b) ret %1 @@ -433,7 +475,8 @@ declare @llvm.aarch64.sve.add.u.nxv16i8(, < define @replace_add_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -443,7 +486,8 @@ declare @llvm.aarch64.sve.add.u.nxv8i16(, @replace_add_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -453,7 +497,8 @@ declare @llvm.aarch64.sve.add.u.nxv4i32(, @replace_add_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -463,7 +508,8 @@ declare @llvm.aarch64.sve.add.u.nxv2i64(, @replace_add_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_add_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -473,7 +519,8 @@ declare @llvm.aarch64.sve.mla.u.nxv16i8(, < define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -483,7 +530,8 @@ declare @llvm.aarch64.sve.mla.u.nxv8i16(, @replace_mla_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -493,7 +541,8 @@ declare @llvm.aarch64.sve.mla.u.nxv4i32(, @replace_mla_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -503,7 +552,8 @@ declare @llvm.aarch64.sve.mla.u.nxv2i64(, @replace_mla_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mla_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -513,7 +563,8 @@ declare @llvm.aarch64.sve.mls.u.nxv16i8(, < define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, %a, %b, %c) ret %1 @@ -523,7 +574,8 @@ declare @llvm.aarch64.sve.mls.u.nxv8i16(, @replace_mls_intrinsic_i16( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, %a, %b, %c) ret %1 @@ -533,7 +585,8 @@ declare @llvm.aarch64.sve.mls.u.nxv4i32(, @replace_mls_intrinsic_i32( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, %a, %b, %c) ret %1 @@ -543,7 +596,8 @@ declare @llvm.aarch64.sve.mls.u.nxv2i64(, @replace_mls_intrinsic_i64( %a, %b, %c) #0 { ; CHECK-LABEL: define @replace_mls_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, %a, %b, %c) ret %1 @@ -553,7 +607,8 @@ declare @llvm.aarch64.sve.mul.u.nxv16i8(, < define @replace_mul_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -563,7 +618,8 @@ declare @llvm.aarch64.sve.mul.u.nxv8i16(, @replace_mul_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -573,7 +629,8 @@ declare @llvm.aarch64.sve.mul.u.nxv4i32(, @replace_mul_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -583,7 +640,8 @@ declare @llvm.aarch64.sve.mul.u.nxv2i64(, @replace_mul_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_mul_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -593,7 +651,8 @@ declare @llvm.aarch64.sve.sabd.u.nxv16i8(, define @replace_sabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -603,7 +662,8 @@ declare @llvm.aarch64.sve.sabd.u.nxv8i16(, < define @replace_sabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -613,7 +673,8 @@ declare @llvm.aarch64.sve.sabd.u.nxv4i32(, < define @replace_sabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -623,7 +684,8 @@ declare @llvm.aarch64.sve.sabd.u.nxv2i64(, < define @replace_sabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -633,7 +695,8 @@ declare @llvm.aarch64.sve.smax.u.nxv16i8(, define @replace_smax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -643,7 +706,8 @@ declare @llvm.aarch64.sve.smax.u.nxv8i16(, < define @replace_smax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -653,7 +717,8 @@ declare @llvm.aarch64.sve.smax.u.nxv4i32(, < define @replace_smax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -663,7 +728,8 @@ declare @llvm.aarch64.sve.smax.u.nxv2i64(, < define @replace_smax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -673,7 +739,8 @@ declare @llvm.aarch64.sve.smin.u.nxv16i8(, define @replace_smin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -683,7 +750,8 @@ declare @llvm.aarch64.sve.smin.u.nxv8i16(, < define @replace_smin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -693,7 +761,8 @@ declare @llvm.aarch64.sve.smin.u.nxv4i32(, < define @replace_smin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -703,7 +772,8 @@ declare @llvm.aarch64.sve.smin.u.nxv2i64(, < define @replace_smin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -713,7 +783,8 @@ declare @llvm.aarch64.sve.smulh.u.nxv16i8(, define @replace_smulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -723,7 +794,8 @@ declare @llvm.aarch64.sve.smulh.u.nxv8i16(, define @replace_smulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -733,7 +805,8 @@ declare @llvm.aarch64.sve.smulh.u.nxv4i32(, define @replace_smulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -743,7 +816,8 @@ declare @llvm.aarch64.sve.smulh.u.nxv2i64(, define @replace_smulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_smulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -753,7 +827,8 @@ declare @llvm.aarch64.sve.sub.u.nxv16i8(, < define @replace_sub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -763,7 +838,8 @@ declare @llvm.aarch64.sve.sub.u.nxv8i16(, @replace_sub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -773,7 +849,8 @@ declare @llvm.aarch64.sve.sub.u.nxv4i32(, @replace_sub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -783,7 +860,8 @@ declare @llvm.aarch64.sve.sub.u.nxv2i64(, @replace_sub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -793,7 +871,8 @@ declare @llvm.aarch64.sve.uabd.u.nxv16i8(, define @replace_uabd_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -803,7 +882,8 @@ declare @llvm.aarch64.sve.uabd.u.nxv8i16(, < define @replace_uabd_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -813,7 +893,8 @@ declare @llvm.aarch64.sve.uabd.u.nxv4i32(, < define @replace_uabd_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -823,7 +904,8 @@ declare @llvm.aarch64.sve.uabd.u.nxv2i64(, < define @replace_uabd_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uabd_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -833,7 +915,8 @@ declare @llvm.aarch64.sve.umax.u.nxv16i8(, define @replace_umax_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -843,7 +926,8 @@ declare @llvm.aarch64.sve.umax.u.nxv8i16(, < define @replace_umax_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -853,7 +937,8 @@ declare @llvm.aarch64.sve.umax.u.nxv4i32(, < define @replace_umax_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -863,7 +948,8 @@ declare @llvm.aarch64.sve.umax.u.nxv2i64(, < define @replace_umax_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umax_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -873,7 +959,8 @@ declare @llvm.aarch64.sve.umin.u.nxv16i8(, define @replace_umin_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -883,7 +970,8 @@ declare @llvm.aarch64.sve.umin.u.nxv8i16(, < define @replace_umin_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -893,7 +981,8 @@ declare @llvm.aarch64.sve.umin.u.nxv4i32(, < define @replace_umin_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -903,7 +992,8 @@ declare @llvm.aarch64.sve.umin.u.nxv2i64(, < define @replace_umin_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umin_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -913,7 +1003,8 @@ declare @llvm.aarch64.sve.umulh.u.nxv16i8(, define @replace_umulh_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -923,7 +1014,8 @@ declare @llvm.aarch64.sve.umulh.u.nxv8i16(, define @replace_umulh_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -933,7 +1025,8 @@ declare @llvm.aarch64.sve.umulh.u.nxv4i32(, define @replace_umulh_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -943,7 +1036,8 @@ declare @llvm.aarch64.sve.umulh.u.nxv2i64(, define @replace_umulh_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_umulh_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -955,7 +1049,8 @@ declare @llvm.aarch64.sve.asr.u.nxv16i8(, < define @replace_asr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -965,7 +1060,8 @@ declare @llvm.aarch64.sve.asr.u.nxv8i16(, @replace_asr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -975,7 +1071,8 @@ declare @llvm.aarch64.sve.asr.u.nxv4i32(, @replace_asr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -985,7 +1082,8 @@ declare @llvm.aarch64.sve.asr.u.nxv2i64(, @replace_asr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_asr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -995,7 +1093,8 @@ declare @llvm.aarch64.sve.lsl.u.nxv16i8(, < define @replace_lsl_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1005,7 +1104,8 @@ declare @llvm.aarch64.sve.lsl.u.nxv8i16(, @replace_lsl_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1015,7 +1115,8 @@ declare @llvm.aarch64.sve.lsl.u.nxv4i32(, @replace_lsl_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1025,7 +1126,8 @@ declare @llvm.aarch64.sve.lsl.u.nxv2i64(, @replace_lsl_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsl_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1035,7 +1137,8 @@ declare @llvm.aarch64.sve.lsr.u.nxv16i8(, < define @replace_lsr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1045,7 +1148,8 @@ declare @llvm.aarch64.sve.lsr.u.nxv8i16(, @replace_lsr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1055,7 +1159,8 @@ declare @llvm.aarch64.sve.lsr.u.nxv4i32(, @replace_lsr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1065,7 +1170,8 @@ declare @llvm.aarch64.sve.lsr.u.nxv2i64(, @replace_lsr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_lsr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1077,7 +1183,8 @@ declare @llvm.aarch64.sve.and.u.nxv16i8(, < define @replace_and_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1087,7 +1194,8 @@ declare @llvm.aarch64.sve.and.u.nxv8i16(, @replace_and_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1097,7 +1205,8 @@ declare @llvm.aarch64.sve.and.u.nxv4i32(, @replace_and_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1107,7 +1216,8 @@ declare @llvm.aarch64.sve.and.u.nxv2i64(, @replace_and_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_and_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1117,7 +1227,8 @@ declare @llvm.aarch64.sve.bic.u.nxv16i8(, < define @replace_bic_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1127,7 +1238,8 @@ declare @llvm.aarch64.sve.bic.u.nxv8i16(, @replace_bic_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1137,7 +1249,8 @@ declare @llvm.aarch64.sve.bic.u.nxv4i32(, @replace_bic_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1147,7 +1260,8 @@ declare @llvm.aarch64.sve.bic.u.nxv2i64(, @replace_bic_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_bic_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1157,7 +1271,8 @@ declare @llvm.aarch64.sve.eor.u.nxv16i8(, < define @replace_eor_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1167,7 +1282,8 @@ declare @llvm.aarch64.sve.eor.u.nxv8i16(, @replace_eor_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1177,7 +1293,8 @@ declare @llvm.aarch64.sve.eor.u.nxv4i32(, @replace_eor_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1187,7 +1304,8 @@ declare @llvm.aarch64.sve.eor.u.nxv2i64(, @replace_eor_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_eor_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1197,7 +1315,8 @@ declare @llvm.aarch64.sve.orr.u.nxv16i8(, < define @replace_orr_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1207,7 +1326,8 @@ declare @llvm.aarch64.sve.orr.u.nxv8i16(, @replace_orr_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1217,7 +1337,8 @@ declare @llvm.aarch64.sve.orr.u.nxv4i32(, @replace_orr_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1227,7 +1348,8 @@ declare @llvm.aarch64.sve.orr.u.nxv2i64(, @replace_orr_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_orr_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1239,7 +1361,8 @@ declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, define @replace_sqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1249,7 +1372,8 @@ declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, define @replace_sqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1259,7 +1383,8 @@ declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, define @replace_sqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1269,7 +1394,8 @@ declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, define @replace_sqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 @@ -1279,7 +1405,8 @@ declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, define @replace_uqsub_intrinsic_i8( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, %a, %b) ret %1 @@ -1289,7 +1416,8 @@ declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, define @replace_uqsub_intrinsic_i16( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, %a, %b) ret %1 @@ -1299,7 +1427,8 @@ declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, define @replace_uqsub_intrinsic_i32( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, %a, %b) ret %1 @@ -1309,10 +1438,11 @@ declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, define @replace_uqsub_intrinsic_i64( %a, %b) #0 { ; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 ; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: ret undef +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, %a, %b) ret %1 } -attributes #0 = { "target-features"="+sve,+sve2" } +attributes #0 = { "target-features"="+sve,+sve2" } \ No newline at end of file From 09f0cd977b540cdd639bb87c3bf24e09688b83fc Mon Sep 17 00:00:00 2001 From: Mark Harley Date: Mon, 8 Jan 2024 13:22:18 +0000 Subject: [PATCH 5/5] [AArch64][SVE] Removed unnecessary _u tests and refactored IID checks --- .../AArch64/AArch64TargetTransformInfo.cpp | 14 +- ...-intrinsic-comb-u-forms-no-active-lanes.ll | 1448 ----------------- 2 files changed, 9 insertions(+), 1453 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index de427dfb06aec..13b5e578391de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1413,7 +1413,7 @@ instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID) { if (match(II.getOperand(0), m_ZeroInt())) { // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are - // inactive for sv[func]_m or sv[func]_z + // inactive for sv[func]_m return IC.replaceInstUsesWith(II, II.getOperand(1)); } return instCombineSVEAllActive(II, IID); @@ -1540,10 +1540,6 @@ static std::optional instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1927,6 +1923,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_fmls: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: @@ -1950,6 +1950,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_mls: return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll deleted file mode 100644 index 40deadbbb9bf3..0000000000000 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll +++ /dev/null @@ -1,1448 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=instcombine < %s | FileCheck %s - -target triple = "aarch64-unknown-linux-gnu" - -; Replace SVE _u intrinsics with undef if the predicate is all false. - -; Float arithmetic - -declare @llvm.aarch64.sve.fabd.u.nxv8f16(, , ) -define @replace_fabd_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fabd_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fabd.u.nxv4f32(, , ) -define @replace_fabd_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fabd_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fabd.u.nxv2f64(, , ) -define @replace_fabd_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fabd_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fabd.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) -define @replace_fadd_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fadd_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) -define @replace_fadd_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fadd_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) -define @replace_fadd_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fadd_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fdiv.u.nxv8f16(, , ) -define @replace_fdiv_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fdiv_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fdiv.u.nxv4f32(, , ) -define @replace_fdiv_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fdiv_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fdiv.u.nxv2f64(, , ) -define @replace_fdiv_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fdiv_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fdiv.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmax.u.nxv8f16(, , ) -define @replace_fmax_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmax_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmax.u.nxv4f32(, , ) -define @replace_fmax_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmax_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmax.u.nxv2f64(, , ) -define @replace_fmax_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmax_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmax.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmaxnm.u.nxv8f16(, , ) -define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmaxnm.u.nxv4f32(, , ) -define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmaxnm.u.nxv2f64(, , ) -define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmaxnm.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmin.u.nxv8f16(, , ) -define @replace_fmin_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmin_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmin.u.nxv4f32(, , ) -define @replace_fmin_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmin_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmin.u.nxv2f64(, , ) -define @replace_fmin_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmin_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmin.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fminnm.u.nxv8f16(, , ) -define @replace_fminnm_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fminnm_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fminnm.u.nxv4f32(, , ) -define @replace_fminnm_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fminnm_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fminnm.u.nxv2f64(, , ) -define @replace_fminnm_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fminnm_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fminnm.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmla.u.nxv8f16(, , , ) -define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmla_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv8f16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmla.u.nxv4f32(, , , ) -define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmla_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv4f32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmla.u.nxv2f64(, , , ) -define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmla_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmla.u.nxv2f64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmls.u.nxv8f16(, , , ) -define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmls_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv8f16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmls.u.nxv4f32(, , , ) -define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmls_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv4f32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmls.u.nxv2f64(, , , ) -define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fmls_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmls.u.nxv2f64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) -define @replace_fmul_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmul_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) -define @replace_fmul_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmul_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) -define @replace_fmul_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmul_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmulx.u.nxv8f16(, , ) -define @replace_fmulx_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmulx_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmulx.u.nxv4f32(, , ) -define @replace_fmulx_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmulx_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fmulx.u.nxv2f64(, , ) -define @replace_fmulx_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fmulx_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fmulx.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fnmla.u.nxv8f16(, , , ) -define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmla_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv8f16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fnmla.u.nxv4f32(, , , ) -define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmla_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv4f32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fnmla.u.nxv2f64(, , , ) -define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmla_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmla.u.nxv2f64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fnmls.u.nxv8f16(, , , ) -define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmls_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv8f16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fnmls.u.nxv4f32(, , , ) -define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmls_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv4f32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fnmls.u.nxv2f64(, , , ) -define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_fnmls_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fnmls.u.nxv2f64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) -define @replace_fsub_intrinsic_half( %a, %b) #0 { -; CHECK-LABEL: define @replace_fsub_intrinsic_half -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) -define @replace_fsub_intrinsic_float( %a, %b) #0 { -; CHECK-LABEL: define @replace_fsub_intrinsic_float -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) -define @replace_fsub_intrinsic_double( %a, %b) #0 { -; CHECK-LABEL: define @replace_fsub_intrinsic_double -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( zeroinitializer, %a, %b) - ret %1 -} - -; Integer arithmetic - -declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) -define @replace_add_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_add_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.add.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.add.u.nxv8i16(, , ) -define @replace_add_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_add_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.add.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.add.u.nxv4i32(, , ) -define @replace_add_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_add_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.add.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.add.u.nxv2i64(, , ) -define @replace_add_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_add_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.add.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.mla.u.nxv16i8(, , , ) -define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mla_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mla.u.nxv16i8( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mla.u.nxv8i16(, , , ) -define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mla_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mla.u.nxv8i16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mla.u.nxv4i32(, , , ) -define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mla_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mla.u.nxv4i32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mla.u.nxv2i64(, , , ) -define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mla_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mla.u.nxv2i64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mls.u.nxv16i8(, , , ) -define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mls_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mls.u.nxv16i8( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mls.u.nxv8i16(, , , ) -define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mls_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mls.u.nxv8i16( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mls.u.nxv4i32(, , , ) -define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mls_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mls.u.nxv4i32( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mls.u.nxv2i64(, , , ) -define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { -; CHECK-LABEL: define @replace_mls_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, [[A]], [[B]], [[C]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mls.u.nxv2i64( zeroinitializer, %a, %b, %c) - ret %1 -} - -declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) -define @replace_mul_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_mul_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) -define @replace_mul_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_mul_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) -define @replace_mul_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_mul_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) -define @replace_mul_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_mul_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.mul.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) -define @replace_sabd_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_sabd_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sabd.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) -define @replace_sabd_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_sabd_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sabd.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) -define @replace_sabd_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_sabd_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sabd.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) -define @replace_sabd_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_sabd_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sabd.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smax.u.nxv16i8(, , ) -define @replace_smax_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_smax_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smax.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smax.u.nxv8i16(, , ) -define @replace_smax_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_smax_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smax.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smax.u.nxv4i32(, , ) -define @replace_smax_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_smax_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smax.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smax.u.nxv2i64(, , ) -define @replace_smax_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_smax_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smax.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smin.u.nxv16i8(, , ) -define @replace_smin_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_smin_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smin.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smin.u.nxv8i16(, , ) -define @replace_smin_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_smin_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smin.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smin.u.nxv4i32(, , ) -define @replace_smin_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_smin_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smin.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smin.u.nxv2i64(, , ) -define @replace_smin_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_smin_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smin.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smulh.u.nxv16i8(, , ) -define @replace_smulh_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_smulh_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smulh.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smulh.u.nxv8i16(, , ) -define @replace_smulh_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_smulh_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smulh.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smulh.u.nxv4i32(, , ) -define @replace_smulh_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_smulh_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smulh.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.smulh.u.nxv2i64(, , ) -define @replace_smulh_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_smulh_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.smulh.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sub.u.nxv16i8(, , ) -define @replace_sub_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_sub_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sub.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sub.u.nxv8i16(, , ) -define @replace_sub_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_sub_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sub.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sub.u.nxv4i32(, , ) -define @replace_sub_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_sub_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sub.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sub.u.nxv2i64(, , ) -define @replace_sub_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_sub_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sub.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uabd.u.nxv16i8(, , ) -define @replace_uabd_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_uabd_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uabd.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uabd.u.nxv8i16(, , ) -define @replace_uabd_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_uabd_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uabd.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uabd.u.nxv4i32(, , ) -define @replace_uabd_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_uabd_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uabd.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uabd.u.nxv2i64(, , ) -define @replace_uabd_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_uabd_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uabd.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umax.u.nxv16i8(, , ) -define @replace_umax_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_umax_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umax.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umax.u.nxv8i16(, , ) -define @replace_umax_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_umax_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umax.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umax.u.nxv4i32(, , ) -define @replace_umax_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_umax_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umax.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umax.u.nxv2i64(, , ) -define @replace_umax_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_umax_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umax.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umin.u.nxv16i8(, , ) -define @replace_umin_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_umin_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umin.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umin.u.nxv8i16(, , ) -define @replace_umin_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_umin_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umin.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umin.u.nxv4i32(, , ) -define @replace_umin_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_umin_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umin.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umin.u.nxv2i64(, , ) -define @replace_umin_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_umin_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umin.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umulh.u.nxv16i8(, , ) -define @replace_umulh_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_umulh_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umulh.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umulh.u.nxv8i16(, , ) -define @replace_umulh_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_umulh_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umulh.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umulh.u.nxv4i32(, , ) -define @replace_umulh_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_umulh_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.umulh.u.nxv2i64(, , ) -define @replace_umulh_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_umulh_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.umulh.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -; Shifts - -declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) -define @replace_asr_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_asr_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.asr.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) -define @replace_asr_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_asr_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.asr.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) -define @replace_asr_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_asr_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.asr.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.asr.u.nxv2i64(, , ) -define @replace_asr_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_asr_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.asr.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsl.u.nxv16i8(, , ) -define @replace_lsl_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsl_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsl.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsl.u.nxv8i16(, , ) -define @replace_lsl_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsl_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsl.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsl.u.nxv4i32(, , ) -define @replace_lsl_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsl_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsl.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsl.u.nxv2i64(, , ) -define @replace_lsl_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsl_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsl.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) -define @replace_lsr_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsr_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsr.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) -define @replace_lsr_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsr_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsr.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) -define @replace_lsr_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsr_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) -define @replace_lsr_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_lsr_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.lsr.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -; Logical operations - -declare @llvm.aarch64.sve.and.u.nxv16i8(, , ) -define @replace_and_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_and_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.and.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.and.u.nxv8i16(, , ) -define @replace_and_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_and_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.and.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.and.u.nxv4i32(, , ) -define @replace_and_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_and_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.and.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.and.u.nxv2i64(, , ) -define @replace_and_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_and_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.and.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.bic.u.nxv16i8(, , ) -define @replace_bic_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_bic_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.bic.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.bic.u.nxv8i16(, , ) -define @replace_bic_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_bic_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.bic.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.bic.u.nxv4i32(, , ) -define @replace_bic_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_bic_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.bic.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.bic.u.nxv2i64(, , ) -define @replace_bic_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_bic_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.bic.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.eor.u.nxv16i8(, , ) -define @replace_eor_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_eor_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.eor.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.eor.u.nxv8i16(, , ) -define @replace_eor_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_eor_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.eor.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.eor.u.nxv4i32(, , ) -define @replace_eor_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_eor_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.eor.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.eor.u.nxv2i64(, , ) -define @replace_eor_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_eor_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.eor.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.orr.u.nxv16i8(, , ) -define @replace_orr_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_orr_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.orr.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.orr.u.nxv8i16(, , ) -define @replace_orr_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_orr_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.orr.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.orr.u.nxv4i32(, , ) -define @replace_orr_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_orr_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.orr.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.orr.u.nxv2i64(, , ) -define @replace_orr_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_orr_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.orr.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -; SVE2 - Uniform DSP operations - -declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, , ) -define @replace_sqsub_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, , ) -define @replace_sqsub_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, , ) -define @replace_sqsub_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, , ) -define @replace_sqsub_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.sqsub.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, , ) -define @replace_uqsub_intrinsic_i8( %a, %b) #0 { -; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv16i8( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, , ) -define @replace_uqsub_intrinsic_i16( %a, %b) #0 { -; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv8i16( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, , ) -define @replace_uqsub_intrinsic_i32( %a, %b) #0 { -; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( zeroinitializer, %a, %b) - ret %1 -} - -declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, , ) -define @replace_uqsub_intrinsic_i64( %a, %b) #0 { -; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 -; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, [[A]], [[B]]) -; CHECK-NEXT: ret [[TMP1]] -; - %1 = tail call @llvm.aarch64.sve.uqsub.u.nxv2i64( zeroinitializer, %a, %b) - ret %1 -} - -attributes #0 = { "target-features"="+sve,+sve2" } \ No newline at end of file