From d871315a4e0cf56fa10cd654c7d5c41b2ac958be Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 26 Mar 2024 11:04:15 +0000 Subject: [PATCH 1/3] [AArch64][SVE] Add optimisation for unary SVE intrinsics with no active lanes --- .../AArch64/AArch64TargetTransformInfo.cpp | 369 ++ ...-intrinsic-comb-m-forms-no-active-lanes.ll | 3292 ++++++++++++++++- 2 files changed, 3660 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index af0b6873d170d..40bd17053d34c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -923,6 +923,80 @@ static bool isAllActivePredicate(Value *Pred) { m_ConstantInt())); } +// Simplify unary operation where predicate has all inactive lanes by replacing +// instruction with its operand +static std::optional +instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II, + bool hasInactiveVector) { + int PredOperand = hasInactiveVector ? 1 : 0; + int ReplaceOperand = hasInactiveVector ? 0 : 1; + if (match(II.getOperand(PredOperand), m_ZeroInt())) { + IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand)); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + +// Simplify unary operation where predicate has all inactive lanes by replacing +// instruction with zeroed object +static std::optional +instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) { + if (match(II.getOperand(0), m_ZeroInt())) { + Constant *Node; + Type *RetTy = II.getType(); + if (RetTy->isStructTy()){ + auto StructT = cast(RetTy); + auto VecT = StructT->getElementType(0); + SmallVector ZerVec; + for (unsigned i = 0; i < StructT->getNumElements(); i++){ + ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0): + ConstantInt::get(VecT, 0)); + } + Node = ConstantStruct::get(StructT, ZerVec); + } + else if (RetTy->isFPOrFPVectorTy()) + Node = ConstantFP::get(RetTy, 0.0); + else + Node = ConstantInt::get(II.getType(), 0); + + IC.replaceInstUsesWith(II, Node); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + +//Erase unary operation where predicate has all inactive lanes +static std::optional +instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos) { + if (match(II.getOperand(PredPos), m_ZeroInt())) { + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + +// Simplify unary operation where predicate has all inactive lanes by replacing +// instruction with given constant +static std::optional +instCombineSVENoActiveUnaryConstant(InstCombiner &IC, IntrinsicInst &II, Constant *NewVal) { + if (match(II.getOperand(0), m_ZeroInt())) { + IC.replaceInstUsesWith(II, NewVal); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + +// Simplify unary operation where predicate has all inactive lanes or try to replace +// with _x form when all lanes are active +static std::optional +instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { + if (isAllActivePredicate(II.getOperand(1)) + && !isa(II.getOperand(0))){ + Value *Undef = llvm::UndefValue::get(II.getType()); + return IC.replaceOperand(II, 0, Undef); + } + return instCombineSVENoActiveUnaryReplace(IC, II, true); +} + static std::optional instCombineSVESel(InstCombiner &IC, IntrinsicInst &II) { // svsel(ptrue, x, y) => x @@ -937,6 +1011,12 @@ static std::optional instCombineSVESel(InstCombiner &IC, static std::optional instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II) { + + // Optimize when predicate is known all active or all inactive + if (auto II_NA = + instCombineSVEAllOrNoActiveUnary(IC, II)) + return II_NA; + IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1)); if (!Pg) return std::nullopt; @@ -971,6 +1051,12 @@ static std::optional instCombineSVEDupX(InstCombiner &IC, static std::optional instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II) { + + //Replace by zero constant when all lanes are inactive + if (auto II_NA = + instCombineSVENoActiveUnaryZero(IC, II)) + return II_NA; + LLVMContext &Ctx = II.getContext(); // Check that the predicate is all active @@ -1156,6 +1242,11 @@ static std::optional instCombineSVECondLast(InstCombiner &IC, Value *Vec = II.getArgOperand(2); Type *Ty = II.getType(); + //If all lanes are inactive replace with operand + if (auto II_NA = + instCombineSVENoActiveUnaryReplace(IC, II, false)) + return II_NA; + if (!Ty->isIntegerTy()) return std::nullopt; @@ -1336,6 +1427,11 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Value *PtrOp = II.getOperand(1); Type *VecTy = II.getType(); + //Replace by zero constant when all lanes are inactive + if (auto II_NA = + instCombineSVENoActiveUnaryZero(IC, II)) + return II_NA; + if (isAllActivePredicate(Pred)) { LoadInst *Load = IC.Builder.CreateLoad(VecTy, PtrOp); Load->copyMetadata(II); @@ -1355,6 +1451,11 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Value *Pred = II.getOperand(1); Value *PtrOp = II.getOperand(2); + //Remove when all lanes are inactive + if (auto II_NA = + instCombineSVENoActiveUnaryErase(IC, II, 0)) + return II_NA; + if (isAllActivePredicate(Pred)) { StoreInst *Store = IC.Builder.CreateStore(VecOp, PtrOp); Store->copyMetadata(II); @@ -1653,6 +1754,11 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) { Type *Ty = II.getType(); Value *PassThru = ConstantAggregateZero::get(Ty); + //Replace by zero constant when all lanes are inactive + if (auto II_NA = + instCombineSVENoActiveUnaryZero(IC, II)) + return II_NA; + // Contiguous gather => masked load. // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1)) // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer) @@ -1683,6 +1789,11 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) { Value *Index = II.getOperand(3); Type *Ty = Val->getType(); + //Remove when all lanes are inactive + if (auto II_NA = + instCombineSVENoActiveUnaryErase(IC, II, 0)) + return II_NA; + // Contiguous scatter => masked store. // (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) // => (masked.store Value (gep BasePtr IndexBase) Align Mask) @@ -1879,6 +1990,264 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, switch (IID) { default: break; + + case Intrinsic::aarch64_sve_abs: + case Intrinsic::aarch64_sve_bfcvt_x2: + case Intrinsic::aarch64_sve_cls: + case Intrinsic::aarch64_sve_clz: + case Intrinsic::aarch64_sve_cnot: + case Intrinsic::aarch64_sve_cnt: + case Intrinsic::aarch64_sve_fabs: + case Intrinsic::aarch64_sve_fcvt: + case Intrinsic::aarch64_sve_fcvt_x2: + case Intrinsic::aarch64_sve_fcvtn_x2: + case Intrinsic::aarch64_sve_fcvtzs_x2: + case Intrinsic::aarch64_sve_fcvtzs_x4: + case Intrinsic::aarch64_sve_fcvtzu_x2: + case Intrinsic::aarch64_sve_fcvtzu_x4: + case Intrinsic::aarch64_sve_fcvtzs: + case Intrinsic::aarch64_sve_fcvtzs_i32f16: + case Intrinsic::aarch64_sve_fcvtzs_i64f16: + case Intrinsic::aarch64_sve_fcvtzs_i64f32: + case Intrinsic::aarch64_sve_fcvt_bf16f32: + case Intrinsic::aarch64_sve_fcvtnt_bf16f32: + case Intrinsic::aarch64_sve_fcvtzs_i32f64: + case Intrinsic::aarch64_sve_fcvtzu: + case Intrinsic::aarch64_sve_fcvtzu_i32f16: + case Intrinsic::aarch64_sve_fcvtzu_i64f16: + case Intrinsic::aarch64_sve_fcvtzu_i64f32: + case Intrinsic::aarch64_sve_fcvtzu_i32f64: + case Intrinsic::aarch64_sve_fcvt_f16f32: + case Intrinsic::aarch64_sve_fcvt_f16f64: + case Intrinsic::aarch64_sve_fcvt_f32f16: + case Intrinsic::aarch64_sve_fcvt_f32f64: + case Intrinsic::aarch64_sve_fcvt_f64f16: + case Intrinsic::aarch64_sve_fcvt_f64f32: + case Intrinsic::aarch64_sve_fcvtlt_f32f16: + case Intrinsic::aarch64_sve_fcvtlt_f64f32: + case Intrinsic::aarch64_sve_fcvtx_f32f64: + case Intrinsic::aarch64_sve_fcvtnt_f16f32: + case Intrinsic::aarch64_sve_fcvtnt_f32f64: + case Intrinsic::aarch64_sve_fcvtxnt_f32f64: + case Intrinsic::aarch64_sve_flogb: + case Intrinsic::aarch64_sve_fmaxp: + case Intrinsic::aarch64_sve_fminp: + case Intrinsic::aarch64_sve_fneg: + case Intrinsic::aarch64_sve_frecpx: + case Intrinsic::aarch64_sve_frinta: + case Intrinsic::aarch64_sve_frinti: + case Intrinsic::aarch64_sve_frintm: + case Intrinsic::aarch64_sve_frintn: + case Intrinsic::aarch64_sve_frintp: + case Intrinsic::aarch64_sve_frintx: + case Intrinsic::aarch64_sve_frintz: + case Intrinsic::aarch64_sve_fscale: + case Intrinsic::aarch64_sve_fsqrt: + case Intrinsic::aarch64_sve_neg: + case Intrinsic::aarch64_sve_not: + case Intrinsic::aarch64_sve_rbit: + case Intrinsic::aarch64_sve_revb: + case Intrinsic::aarch64_sve_revh: + case Intrinsic::aarch64_sve_revw: + case Intrinsic::aarch64_sve_revd: + case Intrinsic::aarch64_sve_scvtf: + case Intrinsic::aarch64_sve_scvtf_f16i32: + case Intrinsic::aarch64_sve_scvtf_f16i64: + case Intrinsic::aarch64_sve_scvtf_f32i64: + case Intrinsic::aarch64_sve_scvtf_f64i32: + case Intrinsic::aarch64_sve_scvtf_x2: + case Intrinsic::aarch64_sve_scvtf_x4: + case Intrinsic::aarch64_sve_ucvtf: + case Intrinsic::aarch64_sve_ucvtf_f16i32: + case Intrinsic::aarch64_sve_ucvtf_f16i64: + case Intrinsic::aarch64_sve_ucvtf_f32i64: + case Intrinsic::aarch64_sve_ucvtf_f64i32: + case Intrinsic::aarch64_sve_ucvtf_x2: + case Intrinsic::aarch64_sve_ucvtf_x4: + case Intrinsic::aarch64_sve_sqabs: + case Intrinsic::aarch64_sve_sqneg: + case Intrinsic::aarch64_sve_sqrshl: + case Intrinsic::aarch64_sve_sqshl: + case Intrinsic::aarch64_sve_sqshlu: + case Intrinsic::aarch64_sve_sxtb: + case Intrinsic::aarch64_sve_sxth: + case Intrinsic::aarch64_sve_sxtw: + case Intrinsic::aarch64_sve_urecpe: + case Intrinsic::aarch64_sve_ursqrte: + case Intrinsic::aarch64_sve_uxtb: + case Intrinsic::aarch64_sve_uxth: + case Intrinsic::aarch64_sve_uxtw: + return instCombineSVEAllOrNoActiveUnary(IC, II); + case Intrinsic::aarch64_sve_brka: + case Intrinsic::aarch64_sve_brkb: + case Intrinsic::aarch64_sve_sqdecp: + case Intrinsic::aarch64_sve_uqdecp: + case Intrinsic::aarch64_sve_sqdecp_n32: + case Intrinsic::aarch64_sve_sqdecp_n64: + return instCombineSVENoActiveUnaryReplace(IC, II, true); + case Intrinsic::aarch64_sve_asrd: + case Intrinsic::aarch64_sve_clasta: + case Intrinsic::aarch64_sve_clastb: + case Intrinsic::aarch64_sve_pfirst: + return instCombineSVENoActiveUnaryReplace(IC, II, false); + case Intrinsic::aarch64_sve_addqv: + case Intrinsic::aarch64_sve_brka_z: + case Intrinsic::aarch64_sve_brkb_z: + case Intrinsic::aarch64_sve_brkn_z: + case Intrinsic::aarch64_sve_brkpa_z: + case Intrinsic::aarch64_sve_brkpb_z: + case Intrinsic::aarch64_sve_cmpeq: + case Intrinsic::aarch64_sve_cmpge: + case Intrinsic::aarch64_sve_cmpgt: + case Intrinsic::aarch64_sve_cmphi: + case Intrinsic::aarch64_sve_cmphs: + case Intrinsic::aarch64_sve_cmpeq_wide: + case Intrinsic::aarch64_sve_cmpge_wide: + case Intrinsic::aarch64_sve_cmpgt_wide: + case Intrinsic::aarch64_sve_cmphi_wide: + case Intrinsic::aarch64_sve_cmphs_wide: + case Intrinsic::aarch64_sve_cmple_wide: + case Intrinsic::aarch64_sve_cmplt_wide: + case Intrinsic::aarch64_sve_cmplo_wide: + case Intrinsic::aarch64_sve_cmpls_wide: + case Intrinsic::aarch64_sve_cntp: + case Intrinsic::aarch64_sve_compact: + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_eorqv: + case Intrinsic::aarch64_sve_facge: + case Intrinsic::aarch64_sve_facgt: + case Intrinsic::aarch64_sve_faddv: + case Intrinsic::aarch64_sve_fcmpeq: + case Intrinsic::aarch64_sve_fcmpne: + case Intrinsic::aarch64_sve_fcmpge: + case Intrinsic::aarch64_sve_fcmpgt: + case Intrinsic::aarch64_sve_fcmpuo: + case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: + case Intrinsic::aarch64_sve_ld1_gather: + case Intrinsic::aarch64_sve_ld1_gather_sxtw: + case Intrinsic::aarch64_sve_ld1_gather_uxtw: + case Intrinsic::aarch64_sve_ld1_gather_sxtw_index: + case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: + case Intrinsic::aarch64_sve_ld1_pn_x2: + case Intrinsic::aarch64_sve_ld1_pn_x4: + case Intrinsic::aarch64_sve_ld1rq: + case Intrinsic::aarch64_sve_ld1ro: + case Intrinsic::aarch64_sve_ld1uwq: + case Intrinsic::aarch64_sve_ld1udq: + case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset: + case Intrinsic::aarch64_sve_ld1q_gather_index: + case Intrinsic::aarch64_sve_ld1q_gather_vector_offset: + case Intrinsic::aarch64_sve_ld2_sret: + case Intrinsic::aarch64_sve_ld2q_sret: + case Intrinsic::aarch64_sve_ld3_sret: + case Intrinsic::aarch64_sve_ld3q_sret: + case Intrinsic::aarch64_sve_ld4_sret: + case Intrinsic::aarch64_sve_ld4q_sret: + case Intrinsic::aarch64_sve_ldff1: + case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset: + case Intrinsic::aarch64_sve_ldff1_gather: + case Intrinsic::aarch64_sve_ldff1_gather_sxtw: + case Intrinsic::aarch64_sve_ldff1_gather_uxtw: + case Intrinsic::aarch64_sve_ldff1_gather_index: + case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index: + case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index: + case Intrinsic::aarch64_sve_ldnf1: + case Intrinsic::aarch64_sve_ldnt1: + case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset: + case Intrinsic::aarch64_sve_ldnt1_gather: + case Intrinsic::aarch64_sve_ldnt1_gather_uxtw: + case Intrinsic::aarch64_sve_ldnt1_gather_index: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_orqv: + case Intrinsic::aarch64_sve_rdffr_z: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_umaxqv: + return instCombineSVENoActiveUnaryZero(IC, II); + case Intrinsic::aarch64_sve_andqv: + case Intrinsic::aarch64_sve_andv: + return instCombineSVENoActiveUnaryConstant(IC, II, + ConstantInt::get(II.getType(), 1)); + case Intrinsic::aarch64_sve_fmaxnmqv: + case Intrinsic::aarch64_sve_fmaxnmv: + case Intrinsic::aarch64_sve_fminnmqv: + case Intrinsic::aarch64_sve_fminnmv: + return instCombineSVENoActiveUnaryConstant(IC, II, + ConstantFP::getQNaN(II.getType())); + case Intrinsic::aarch64_sve_fmaxqv: + case Intrinsic::aarch64_sve_fmaxv: + return instCombineSVENoActiveUnaryConstant(IC, II, + ConstantFP::getInfinity(II.getType(), true)); + case Intrinsic::aarch64_sve_fminqv: + case Intrinsic::aarch64_sve_fminv: + return instCombineSVENoActiveUnaryConstant(IC, II, + ConstantFP::getInfinity(II.getType())); + case Intrinsic::aarch64_sve_prf: + case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfb_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfb_gather_index: + case Intrinsic::aarch64_sve_prfb_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfh_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfh_gather_index: + case Intrinsic::aarch64_sve_prfh_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfw_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfw_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfw_gather_index: + case Intrinsic::aarch64_sve_prfw_gather_uxtw_index: + case Intrinsic::aarch64_sve_prfd_gather_scalar_offset: + case Intrinsic::aarch64_sve_prfd_gather_sxtw_index: + case Intrinsic::aarch64_sve_prfd_gather_index: + case Intrinsic::aarch64_sve_prfd_gather_uxtw_index: + return instCombineSVENoActiveUnaryErase(IC, II, 0); + case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: + case Intrinsic::aarch64_sve_st1_scatter: + case Intrinsic::aarch64_sve_st1_scatter_sxtw: + case Intrinsic::aarch64_sve_st1_scatter_uxtw: + case Intrinsic::aarch64_sve_st1_scatter_sxtw_index: + case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: + case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset: + case Intrinsic::aarch64_sve_st1q_scatter_vector_offset: + case Intrinsic::aarch64_sve_st1q_scatter_index: + case Intrinsic::aarch64_sve_st1dq: + case Intrinsic::aarch64_sve_st1wq: + case Intrinsic::aarch64_sve_stnt1: + case Intrinsic::aarch64_sve_stnt1_scatter: + case Intrinsic::aarch64_sve_stnt1_scatter_index: + case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset: + case Intrinsic::aarch64_sve_stnt1_scatter_uxtw: + return instCombineSVENoActiveUnaryErase(IC, II, 1); + case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_st2q: + return instCombineSVENoActiveUnaryErase(IC, II, 2); + case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_st3q: + return instCombineSVENoActiveUnaryErase(IC, II, 3); + case Intrinsic::aarch64_sve_st4: + case Intrinsic::aarch64_sve_st4q: + return instCombineSVENoActiveUnaryErase(IC, II, 4); + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_smaxqv: + { + auto *MinSInt = ConstantInt::get(II.getType(), APInt::getSignedMinValue( + II.getType()->getScalarSizeInBits())); + return instCombineSVENoActiveUnaryConstant(IC, II, MinSInt); + } + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_sminqv: + { + auto *MaxSInt = ConstantInt::get(II.getType(), APInt::getSignedMaxValue( + II.getType()->getScalarSizeInBits())); + return instCombineSVENoActiveUnaryConstant(IC, II, MaxSInt); + } + case Intrinsic::aarch64_sve_uminv: + case Intrinsic::aarch64_sve_uminqv: + { + auto *MaxUInt = ConstantInt::get(II.getType(), APInt::getMaxValue( + II.getType()->getScalarSizeInBits())); + return instCombineSVENoActiveUnaryConstant(IC, II, MaxUInt); + } case Intrinsic::aarch64_neon_fmaxnm: case Intrinsic::aarch64_neon_fminnm: return instCombineMaxMinNM(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll index 463a5f5d2cfb5..57372c46eecf2 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll @@ -1321,4 +1321,3294 @@ define @replace_uqsub_intrinsic_i64( %a, %1 } -attributes #0 = { "target-features"="+sve,+sve2" } +define dso_local @test_svabs_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svabs_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.abs.nxv16i8(, , ) #1 + + +define dso_local @test_svabs_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svabs_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fabs.nxv8f16(, , ) #1 + + +define dso_local @test_svabs_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svabs_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.abs.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.abs.nxv16i8( %a, %0, %b) + ret %1 +} + + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) #1 + + +define dso_local @test_svabs_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svabs_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local <8 x i16> @test_svaddqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_svaddqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(, ) #1 + + +define dso_local <8 x i16> @test_svandqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_svandqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(, ) #1 + + +define dso_local i16 @test_svandv( %b) #0 { +; CHECK-LABEL: define dso_local i16 @test_svandv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i16 1 +; +entry: + %0 = tail call i16 @llvm.aarch64.sve.andv.nxv8i16( zeroinitializer, %b) + ret i16 %0 +} + + +declare i16 @llvm.aarch64.sve.andv.nxv8i16(, ) #1 + + +define dso_local @test_svasrd_m( %a) #0 { +; CHECK-LABEL: define dso_local @test_svasrd_m( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.asrd.nxv16i8( zeroinitializer, %a, i32 1) + ret %0 +} + + +declare @llvm.aarch64.sve.asrd.nxv16i8(, , i32 immarg) #1 + + +define dso_local @test_svasrd_z( %a) #0 { +; CHECK-LABEL: define dso_local @test_svasrd_z( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.asrd.nxv16i8( zeroinitializer, zeroinitializer, i32 1) + ret %0 +} + + +define dso_local @test_svasrd_x( %a) #0 { +; CHECK-LABEL: define dso_local @test_svasrd_x( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.asrd.nxv8i16( zeroinitializer, %a, i32 1) + ret %0 +} + + +declare @llvm.aarch64.sve.asrd.nxv8i16(, , i32 immarg) #1 + + +define dso_local @test_svcvt_bf16_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_bf16_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvt.bf16f32(, , ) #1 + + +define dso_local @test_svcvt_bf16_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_bf16_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvt_bf16_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_bf16_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvt_bf16_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_bf16_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fcvt.bf16f32( %a, %0, %b) + ret %1 +} + + +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) #1 + + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() #1 + + +define dso_local @test_svcvtnt_bf16_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_bf16_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtnt.bf16f32(, , ) #1 + + +define dso_local @test_svcvtnt_bf16_x( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_bf16_x( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvtnt_bf16_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_bf16_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fcvt.bf16f32( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svbrka_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrka_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.brka.nxv16i1( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brka.nxv16i1(, , ) #1 + + +define dso_local @test_svbrka_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrka_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.brka.z.nxv16i1( zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brka.z.nxv16i1(, ) #1 + + +define dso_local @test_svbrkb_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrkb_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.brkb.nxv16i1( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brkb.nxv16i1(, , ) #1 + + +define dso_local @test_svbrkb_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrkb_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) #1 + + +define dso_local @test_svbrkn_z( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrkn_z( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.brkn.z.nxv16i1( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brkn.z.nxv16i1(, , ) #1 + + +define dso_local @test_svbrkpa_z( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrkpa_z( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brkpa.z.nxv16i1(, , ) #1 + + +define dso_local @test_svbrkpb_z( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svbrkpb_z( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) #1 + + +define dso_local i32 @test_clasta(i32 noundef %a, %b) #0 { +; CHECK-LABEL: define dso_local i32 @test_clasta( +; CHECK-SAME: i32 noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[A]] +; +entry: + %0 = bitcast i32 %a to float + %1 = bitcast %b to + %2 = tail call float @llvm.aarch64.sve.clasta.n.nxv4f32( zeroinitializer, float %0, %1) + %3 = bitcast float %2 to i32 + ret i32 %3 +} + + +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() #1 + + +define dso_local i32 @test_clastb(i32 noundef %a, %b) #0 { +; CHECK-LABEL: define dso_local i32 @test_clastb( +; CHECK-SAME: i32 noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[A]] +; +entry: + %0 = bitcast i32 %a to float + %1 = bitcast %b to + %2 = tail call float @llvm.aarch64.sve.clastb.n.nxv4f32( zeroinitializer, float %0, %1) + %3 = bitcast float %2 to i32 + ret i32 %3 +} + + +define dso_local @test_svcls_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcls_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.cls.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cls.nxv16i8(, , ) #1 + + +define dso_local @test_svcls_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcls_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cls.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcls_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcls_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cls.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.cls.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svcls_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcls_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.cls.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svclz_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svclz_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.clz.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.clz.nxv16i8(, , ) #1 + + +define dso_local @test_svclz_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svclz_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.clz.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svclz_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svclz_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clz.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.clz.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svclz_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svclz_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.clz.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcmpeq_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmpeq_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( zeroinitializer) +; CHECK-NEXT: ret [[TMP0]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcmpeq.nxv8f16( zeroinitializer, %a, %b) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %0) + ret %1 +} + + +declare @llvm.aarch64.sve.fcmpeq.nxv8f16(, , ) #1 + + +define dso_local @test_svcmpeq_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpeq_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpeq.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpeq_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpeq_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpeq.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpgt_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmpgt_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpgt_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpgt_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( zeroinitializer) +; CHECK-NEXT: ret [[TMP0]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcmpgt.nxv8f16( zeroinitializer, %a, shufflevector ( insertelement ( poison, half 0xH4000, i64 0), poison, zeroinitializer)) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %0) + ret %1 +} + + +declare @llvm.aarch64.sve.fcmpgt.nxv8f16(, , ) #1 + + +define dso_local @test_svcmpgt_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpgt_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpgt.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpge_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmpge_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpge_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpge_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +define dso_local @test_svcmpge_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpge_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpge.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpge.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcmplt_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmplt_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( zeroinitializer, %b, %a) + ret %0 +} + + +define dso_local @test_svcmplt_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmplt_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( zeroinitializer, shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), %a) + ret %0 +} + + +define dso_local @test_svcmplt_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmplt_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmplt.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmplt.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcmple_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmple_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( zeroinitializer) +; CHECK-NEXT: ret [[TMP0]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcmpge.nxv4f32( zeroinitializer, %b, %a) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + ret %1 +} + + +declare @llvm.aarch64.sve.fcmpge.nxv4f32(, , ) #1 + + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() #1 + + +define dso_local @test_svcmple_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmple_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( zeroinitializer, shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), %a) + ret %0 +} + + +define dso_local @test_svcmple_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmple_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmple.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmple.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpne_vec( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcmpne_vec( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( zeroinitializer, %a, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpne.nxv16i8(, , ) #1 + + +define dso_local @test_svcmpne_imm( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpne_imm( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +define dso_local @test_svcmpne_wide( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcmpne_wide( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( zeroinitializer, %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) + ret %0 +} + + +declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) #1 + + +define dso_local @test_svcnot_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnot_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.cnot.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cnot.nxv16i8(, , ) #1 + + +define dso_local @test_svcnot_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnot_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cnot.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcnot_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnot_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnot.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.cnot.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svcnot_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnot_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.cnot.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcnt_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnt_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.cnt.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.cnt.nxv16i8(, , ) #1 + + +define dso_local @test_svcnt_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnt_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.cnt.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcnt_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnt_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.cnt.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svcnt_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcnt_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.cnt.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local i64 @test_svcntp( %b) #0 { +; CHECK-LABEL: define dso_local i64 @test_svcntp( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 0 +; +entry: + %0 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %b) + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( zeroinitializer, %0) + ret i64 %1 +} + + +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() #1 + + +declare i64 @llvm.aarch64.sve.cntp.nxv2i1(, ) #1 + + +define dso_local @test_svcompact( %a) #0 { +; CHECK-LABEL: define dso_local @test_svcompact( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.compact.nxv4f32( zeroinitializer, %a) + ret %0 +} + + +declare @llvm.aarch64.sve.compact.nxv4f32(, ) #1 + + +define dso_local @test_svdup_m( %a, i8 noundef %b) #0 { +; CHECK-LABEL: define dso_local @test_svdup_m( +; CHECK-SAME: [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv16i8( %a, zeroinitializer, i8 %b) + ret %0 +} + + +declare @llvm.aarch64.sve.dup.nxv16i8(, , i8) #1 + + +define dso_local @test_svdup_z(i8 noundef %b) #0 { +; CHECK-LABEL: define dso_local @test_svdup_z( +; CHECK-SAME: i8 noundef [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv16i8( zeroinitializer, zeroinitializer, i8 %b) + ret %0 +} + + +define dso_local @test_svdup_m2( %a) #0 { +; CHECK-LABEL: define dso_local @test_svdup_m2( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv16i8( undef, [[TMP0]], i8 1) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.dup.nxv16i8( %a, %0, i8 1) + ret %1 +} + + +define dso_local @test_svdup_x() #0 { +; CHECK-LABEL: define dso_local @test_svdup_x( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv16i8( undef, zeroinitializer, i8 0) + ret %0 +} + + +define dso_local <8 x i16> @test_sveorqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_sveorqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(, ) #1 + + +define dso_local i16 @test_sveorv( %b) #0 { +; CHECK-LABEL: define dso_local i16 @test_sveorv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i16 0 +; +entry: + %0 = tail call i16 @llvm.aarch64.sve.eorv.nxv8i16( zeroinitializer, %b) + ret i16 %0 +} + + +declare i16 @llvm.aarch64.sve.eorv.nxv8i16(, ) #1 + + +define dso_local <8 x half> @test_svaddqv_f( %b) #0 { +; CHECK-LABEL: define dso_local <8 x half> @test_svaddqv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x half> zeroinitializer +; +entry: + %0 = tail call <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16( zeroinitializer, %b) + ret <8 x half> %0 +} + + +declare <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16(, ) #1 + + +define dso_local half @test_svaddv_f( %b) #0 { +; CHECK-LABEL: define dso_local half @test_svaddv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret half 0xH0000 +; +entry: + %0 = tail call half @llvm.aarch64.sve.faddv.nxv8f16( zeroinitializer, %b) + ret half %0 +} + + +declare half @llvm.aarch64.sve.faddv.nxv8f16(, ) #1 + + +define dso_local @test_svdup_f_m( %a, float noundef %b) #0 { +; CHECK-LABEL: define dso_local @test_svdup_f_m( +; CHECK-SAME: [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv4f32( %a, zeroinitializer, float %b) + ret %0 +} + + +declare @llvm.aarch64.sve.dup.nxv4f32(, , float) #1 + + +define dso_local @test_svdup_f_z(float noundef %b) #0 { +; CHECK-LABEL: define dso_local @test_svdup_f_z( +; CHECK-SAME: float noundef [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv4f32( zeroinitializer, zeroinitializer, float %b) + ret %0 +} + + +define dso_local @test_svdup_f_m2( %a) #0 { +; CHECK-LABEL: define dso_local @test_svdup_f_m2( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8f16( undef, [[TMP0]], half 0xH3C00) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.dup.nxv8f16( %a, %0, half 0xH3C00) + ret %1 +} + + +declare @llvm.aarch64.sve.dup.nxv8f16(, , half) #1 + + +define dso_local @test_svdup_f_x() #0 { +; CHECK-LABEL: define dso_local @test_svdup_f_x( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.dup.nxv8f16( undef, zeroinitializer, half 0xH0000) + ret %0 +} + + +define dso_local @test_svcvt_f16_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvt.f16f32(, , ) #1 + + +define dso_local @test_svcvt_f16_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvt.f16f64(, , ) #1 + + +define dso_local @test_svcvt_f16_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvt_f16_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fcvt.f16f32( undef, [[TMP2]], [[B]]) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %0) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + %3 = tail call @llvm.aarch64.sve.fcvt.f16f32( %a, %2, %b) + ret %3 +} + + +define dso_local @test_svcvtlt_f32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtlt_f32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtlt.f32f16(, , ) #1 + + +define dso_local @test_svcvtlt_f64_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtlt_f64_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtlt.f64f32(, , ) #1 + + +define dso_local @test_svcvtlt_f64_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtlt_f64_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtlt.f64f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( %a, %0, %b) + ret %1 +} + + +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg) #1 + + +define dso_local @test_svcvtnt_f32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_f32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtnt.f32f64( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtnt.f32f64(, , ) #1 + + +define dso_local @test_svcvtnt_f32_x( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_f32_x( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtnt.f32f64( %a, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvtnt_f32_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtnt_f32_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.f32f64( undef, [[TMP2]], [[B]]) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + %3 = tail call @llvm.aarch64.sve.fcvtnt.f32f64( %a, %2, %b) + ret %3 +} + + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) #1 + + +define dso_local @test_svcvtx_f32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtx_f32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtx.f32f64(, , ) #1 + + +define dso_local @test_svcvtx_f32_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtx_f32_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvtx_f32_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtx_f32_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvtx_f32_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtx_f32_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fcvtx.f32f64( undef, [[TMP2]], [[B]]) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + %3 = tail call @llvm.aarch64.sve.fcvtx.f32f64( %a, %2, %b) + ret %3 +} + + +define dso_local @test_svcvtxnt_f32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtxnt_f32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtxnt.f32f64(, , ) #1 + + +define dso_local @test_svcvtxnt_f32_x( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtxnt_f32_x( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvtxnt_f32_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvtxnt_f32_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fcvtxnt.f32f64( undef, [[TMP2]], [[B]]) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + %3 = tail call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, %2, %b) + ret %3 +} + + +define dso_local @test_svcvt_s32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_s32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(, , ) #1 + + +define dso_local @test_svcvt_s32_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_s32_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtzs.i32f64(, , ) #1 + + +define dso_local @test_svcvt_s32_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_s32_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svcvt_s32_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_s32_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svcvt_u32_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_u32_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtzu.i32f16(, , ) #1 + + +define dso_local @test_svcvt_u32_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_u32_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtzu.i32f64(, , ) #1 + + +define dso_local @test_svcvt_u32_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_u32_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(, , ) #1 + + +define dso_local @test_svcvt_u32_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_u32_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svlogb_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svlogb_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.flogb.nxv4f32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.flogb.nxv4f32(, , ) #1 + + +define dso_local @test_svlogb_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svlogb_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.flogb.nxv4f32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svlogb_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svlogb_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.flogb.nxv4f32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svlogb_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svlogb_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.flogb.nxv4f32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.flogb.nxv4f32( %a, %0, %b) + ret %1 +} + + +define dso_local <4 x float> @test_svmaxnmqv( %b) #0 { +; CHECK-LABEL: define dso_local <4 x float> @test_svmaxnmqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x float> +; +entry: + %0 = tail call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32( zeroinitializer, %b) + ret <4 x float> %0 +} + + +declare <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(, ) #1 + + +define dso_local float @test_svmaxnmv( %b) #0 { +; CHECK-LABEL: define dso_local float @test_svmaxnmv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret float 0x7FF8000000000000 +; +entry: + %0 = tail call float @llvm.aarch64.sve.fmaxnmv.nxv4f32( zeroinitializer, %b) + ret float %0 +} + + +declare float @llvm.aarch64.sve.fmaxnmv.nxv4f32(, ) #1 + + +define dso_local <4 x float> @test_svmaxqv_f( %b) #0 { +; CHECK-LABEL: define dso_local <4 x float> @test_svmaxqv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x float> +; +entry: + %0 = tail call <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32( zeroinitializer, %b) + ret <4 x float> %0 +} + + +declare <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(, ) #1 + + +define dso_local float @test_svmaxv_f( %b) #0 { +; CHECK-LABEL: define dso_local float @test_svmaxv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret float 0xFFF0000000000000 +; +entry: + %0 = tail call float @llvm.aarch64.sve.fmaxv.nxv4f32( zeroinitializer, %b) + ret float %0 +} + + +declare float @llvm.aarch64.sve.fmaxv.nxv4f32(, ) #1 + + +define dso_local <4 x float> @test_svminnmqv( %b) #0 { +; CHECK-LABEL: define dso_local <4 x float> @test_svminnmqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x float> +; +entry: + %0 = tail call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32( zeroinitializer, %b) + ret <4 x float> %0 +} + + +declare <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(, ) #1 + + +define dso_local float @test_svminnmv( %b) #0 { +; CHECK-LABEL: define dso_local float @test_svminnmv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret float 0x7FF8000000000000 +; +entry: + %0 = tail call float @llvm.aarch64.sve.fminnmv.nxv4f32( zeroinitializer, %b) + ret float %0 +} + + +declare float @llvm.aarch64.sve.fminnmv.nxv4f32(, ) #1 + + +define dso_local <4 x float> @test_svminqv_f( %b) #0 { +; CHECK-LABEL: define dso_local <4 x float> @test_svminqv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x float> +; +entry: + %0 = tail call <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32( zeroinitializer, %b) + ret <4 x float> %0 +} + + +declare <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(, ) #1 + + +define dso_local float @test_svminv_f( %b) #0 { +; CHECK-LABEL: define dso_local float @test_svminv_f( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret float 0x7FF0000000000000 +; +entry: + %0 = tail call float @llvm.aarch64.sve.fminv.nxv4f32( zeroinitializer, %b) + ret float %0 +} + + +declare float @llvm.aarch64.sve.fminv.nxv4f32(, ) #1 + + +define dso_local @test_svneg_f_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_f_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fneg.nxv8f16(, , ) #1 + + +define dso_local @test_svneg_f_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_f_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svneg_f_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_f_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svneg_f_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_f_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fneg.nxv8f16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fneg.nxv8f16( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svrecpx_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpx_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.frecpx.nxv8f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.frecpx.nxv8f16(, , ) #1 + + +define dso_local @test_svrecpx_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpx_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.frecpx.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrecpx_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpx_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.frecpx.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrecpx_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpx_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.frecpx.nxv8f16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.frecpx.nxv8f16( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svrinti_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrinti_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.frinti.nxv8f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.frinti.nxv8f16(, , ) #1 + + +define dso_local @test_svrinta_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrinta_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.frinta.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.frinta.nxv8f16(, , ) #1 + + +define dso_local @test_svrintp_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrintp_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.frintp.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.frintp.nxv8f16(, , ) #1 + + +define dso_local @test_svrintz_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrintz_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.frintz.nxv8f16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.frintz.nxv8f16( %a, %0, %b) + ret %1 +} + + +declare @llvm.aarch64.sve.frintz.nxv8f16(, , ) #1 + + +define dso_local @test_svsqrt_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svsqrt_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.fsqrt.nxv8f16(, , ) #1 + + +define dso_local @test_svsqrt_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svsqrt_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svsqrt_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svsqrt_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svsqrt_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svsqrt_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( %a, %0, %b) + ret %1 +} + + +define dso_local noundef @test_ld1(ptr nocapture noundef readnone %a) #0 { +; CHECK-LABEL: define dso_local noundef @test_ld1( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + ret zeroinitializer +} + + +define dso_local noundef @test_ld1_vnum(ptr nocapture noundef readnone %a) #0 { +; CHECK-LABEL: define dso_local noundef @test_ld1_vnum( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + ret zeroinitializer +} + + +define dso_local @test_ld1_gather( %a) #2 { +; CHECK-LABEL: define dso_local @test_ld1_gather( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 0) + ret %0 +} + + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) #3 + + +define dso_local @test_ld1_gather_offset(ptr noundef readonly %b, %a) #4 { +; CHECK-LABEL: define dso_local @test_ld1_gather_offset( +; CHECK-SAME: ptr noundef readonly [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32( zeroinitializer, ptr %b, %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(, ptr, ) #5 + + +define dso_local @test_ld1_gather_index(ptr noundef readonly %b, %a) #4 { +; CHECK-LABEL: define dso_local @test_ld1_gather_index( +; CHECK-SAME: ptr noundef readonly [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32( zeroinitializer, ptr %b, %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(, ptr, ) #5 + + +define dso_local @test_ld1_gather_offset_s32(i64 noundef %b, %a) #2 { +; CHECK-LABEL: define dso_local @test_ld1_gather_offset_s32( +; CHECK-SAME: i64 noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 %b) + ret %0 +} + + +define dso_local @test_ld1_gather_index_s32(i64 noundef %b, %a) #2 { +; CHECK-LABEL: define dso_local @test_ld1_gather_index_s32( +; CHECK-SAME: i64 noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = shl i64 %b, 2 + %1 = tail call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 %0) + ret %1 +} + + +define dso_local @test_ld1ro(ptr noundef readonly %a) #4 { +; CHECK-LABEL: define dso_local @test_ld1ro( +; CHECK-SAME: ptr noundef readonly [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1ro.nxv8bf16( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ld1ro.nxv8bf16(, ptr) #5 + + +define dso_local @test_ld1rq(ptr noundef readonly %a) #4 { +; CHECK-LABEL: define dso_local @test_ld1rq( +; CHECK-SAME: ptr noundef readonly [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ld1rq.nxv8bf16( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ld1rq.nxv8bf16(, ptr) #5 + + +define dso_local noundef @test_ld1sw_s64(ptr nocapture noundef readnone %a) #0 { +; CHECK-LABEL: define dso_local noundef @test_ld1sw_s64( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + ret zeroinitializer +} + + +define dso_local noundef @test_ld1uw_u64(ptr nocapture noundef readnone %a) #0 { +; CHECK-LABEL: define dso_local noundef @test_ld1uw_u64( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + ret zeroinitializer +} + + +define dso_local @test_ld2(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld2( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( zeroinitializer, ptr %a) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, %1, i64 0) + %3 = extractvalue { , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( %2, %3, i64 8) + ret %4 +} + + +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16(, ptr) #5 + + +declare @llvm.vector.insert.nxv16bf16.nxv8bf16(, , i64 immarg) #6 + + +define dso_local @test_ld2_vnum(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld2_vnum( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( zeroinitializer, ptr %a) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, %1, i64 0) + %3 = extractvalue { , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv16f16.nxv8f16( %2, %3, i64 8) + ret %4 +} + + +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8f16(, ptr) #5 + + +declare @llvm.vector.insert.nxv16f16.nxv8f16(, , i64 immarg) #6 + + +define dso_local @test_ld3(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld3( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP1]], zeroinitializer, i64 16) +; CHECK-NEXT: ret [[TMP2]] +; +entry: + %0 = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( zeroinitializer, ptr %a) + %1 = extractvalue { , , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, %1, i64 0) + %3 = extractvalue { , , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( %2, %3, i64 8) + %5 = extractvalue { , , } %0, 2 + %6 = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( %4, %5, i64 16) + ret %6 +} + + +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16(, ptr) #5 + + +declare @llvm.vector.insert.nxv24bf16.nxv8bf16(, , i64 immarg) #6 + + +define dso_local @test_ld3_vnum(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld3_vnum( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP1]], zeroinitializer, i64 16) +; CHECK-NEXT: ret [[TMP2]] +; +entry: + %0 = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( zeroinitializer, ptr %a) + %1 = extractvalue { , , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, %1, i64 0) + %3 = extractvalue { , , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv24f16.nxv8f16( %2, %3, i64 8) + %5 = extractvalue { , , } %0, 2 + %6 = tail call @llvm.vector.insert.nxv24f16.nxv8f16( %4, %5, i64 16) + ret %6 +} + + +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16(, ptr) #5 + + +declare @llvm.vector.insert.nxv24f16.nxv8f16(, , i64 immarg) #6 + + +define dso_local @test_ld4(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld4( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP1]], zeroinitializer, i64 16) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], zeroinitializer, i64 24) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( zeroinitializer, ptr %a) + %1 = extractvalue { , , , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, %1, i64 0) + %3 = extractvalue { , , , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( %2, %3, i64 8) + %5 = extractvalue { , , , } %0, 2 + %6 = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( %4, %5, i64 16) + %7 = extractvalue { , , , } %0, 3 + %8 = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( %6, %7, i64 24) + ret %8 +} + + +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16(, ptr) #5 + + +declare @llvm.vector.insert.nxv32bf16.nxv8bf16(, , i64 immarg) #6 + + +define dso_local @test_ld4_vnum(ptr noundef %a) #4 { +; CHECK-LABEL: define dso_local @test_ld4_vnum( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP0]], zeroinitializer, i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP1]], zeroinitializer, i64 16) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], zeroinitializer, i64 24) +; CHECK-NEXT: ret [[TMP3]] +; +entry: + %0 = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( zeroinitializer, ptr %a) + %1 = extractvalue { , , , } %0, 0 + %2 = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, %1, i64 0) + %3 = extractvalue { , , , } %0, 1 + %4 = tail call @llvm.vector.insert.nxv32f16.nxv8f16( %2, %3, i64 8) + %5 = extractvalue { , , , } %0, 2 + %6 = tail call @llvm.vector.insert.nxv32f16.nxv8f16( %4, %5, i64 16) + %7 = extractvalue { , , , } %0, 3 + %8 = tail call @llvm.vector.insert.nxv32f16.nxv8f16( %6, %7, i64 24) + ret %8 +} + + +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16(, ptr) #5 + + +declare @llvm.vector.insert.nxv32f16.nxv8f16(, , i64 immarg) #6 + + +define dso_local @test_ldff1(ptr noundef %a) #7 { +; CHECK-LABEL: define dso_local @test_ldff1( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldff1.nxv8bf16(, ptr) #8 + + +define dso_local @test_ldff1_vnum(ptr noundef %a) #7 { +; CHECK-LABEL: define dso_local @test_ldff1_vnum( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.nxv8f16( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldff1.nxv8f16(, ptr) #8 + + +define dso_local @test_ldff1_gather( %a) #9 { +; CHECK-LABEL: define dso_local @test_ldff1_gather( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 0) + ret %0 +} + + +declare @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) #8 + + +define dso_local @test_ldff1_gather_offset(ptr noundef %b, %a) #7 { +; CHECK-LABEL: define dso_local @test_ldff1_gather_offset( +; CHECK-SAME: ptr noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32( zeroinitializer, ptr %b, %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(, ptr, ) #8 + + +define dso_local @test_ldff1_gather_index(ptr noundef %b, %a) #7 { +; CHECK-LABEL: define dso_local @test_ldff1_gather_index( +; CHECK-SAME: ptr noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32( zeroinitializer, ptr %b, %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(, ptr, ) #8 + + +define dso_local @test_ldff1_gather_offset_s32(i64 noundef %b, %a) #9 { +; CHECK-LABEL: define dso_local @test_ldff1_gather_offset_s32( +; CHECK-SAME: i64 noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 %b) + ret %0 +} + + +define dso_local @test_ldff1_gather_index_s32(i64 noundef %b, %a) #9 { +; CHECK-LABEL: define dso_local @test_ldff1_gather_index_s32( +; CHECK-SAME: i64 noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = shl i64 %b, 2 + %1 = tail call @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32( zeroinitializer, %a, i64 %0) + ret %1 +} + + +define dso_local @test_ldnf1(ptr noundef %a) #7 { +; CHECK-LABEL: define dso_local @test_ldnf1( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnf1.nxv16i8( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldnf1.nxv16i8(, ptr) #8 + + +define dso_local @test_ldnf1_vnum(ptr noundef %a) #7 { +; CHECK-LABEL: define dso_local @test_ldnf1_vnum( +; CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnf1.nxv8i8( zeroinitializer, ptr %a) + %1 = zext %0 to + ret %1 +} + + +declare @llvm.aarch64.sve.ldnf1.nxv8i8(, ptr) #8 + + +define dso_local @test_ldnt1(ptr noundef readonly %a) #4 { +; CHECK-LABEL: define dso_local @test_ldnt1( +; CHECK-SAME: ptr noundef readonly [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( zeroinitializer, ptr %a) + ret %0 +} + + +declare @llvm.aarch64.sve.ldnt1.nxv16i8(, ptr) #5 + + +define dso_local @test_ldnt1_vnum(ptr noundef readonly %a) #4 { +; CHECK-LABEL: define dso_local @test_ldnt1_vnum( +; CHECK-SAME: ptr noundef readonly [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( zeroinitializer, ptr %a) + ret %0 +} + + +define dso_local @test_ldnt1_gather( %a) #2 { +; CHECK-LABEL: define dso_local @test_ldnt1_gather( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32( zeroinitializer, %a, i64 0) + %1 = zext %0 to + ret %1 +} + + +declare @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(, , i64) #3 + + +define dso_local @test_ldnt1_gather_offset(ptr noundef %b, %a) #4 { +; CHECK-LABEL: define dso_local @test_ldnt1_gather_offset( +; CHECK-SAME: ptr noundef [[B:%.*]], [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8( zeroinitializer, ptr %b, %a) + %1 = zext %0 to + ret %1 +} + + +declare @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(, ptr, ) #5 + + +define dso_local @test_svneg_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.neg.nxv16i8(, , ) #1 + + +define dso_local @test_svneg_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svneg_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.neg.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.neg.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svneg_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svneg_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svnot_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svnot_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.not.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.not.nxv16i8(, , ) #1 + + +define dso_local @test_svnot_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svnot_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.not.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svnot_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svnot_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.not.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.not.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svnot_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svnot_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.not.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local <8 x i16> @test_svorqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_svorqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(, ) #1 + + +define dso_local i16 @test_svorv( %b) #0 { +; CHECK-LABEL: define dso_local i16 @test_svorv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i16 0 +; +entry: + %0 = tail call i16 @llvm.aarch64.sve.orv.nxv8i16( zeroinitializer, %b) + ret i16 %0 +} + + +declare i16 @llvm.aarch64.sve.orv.nxv8i16(, ) #1 + + +define dso_local @test_svrbit_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrbit_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.rbit.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.rbit.nxv16i8(, , ) #1 + + +define dso_local @test_svrbit_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrbit_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.rbit.nxv16i8( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrbit_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrbit_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rbit.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.rbit.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svrbit_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrbit_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.rbit.nxv16i8( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrevb_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrevb_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.revb.nxv8i16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.revb.nxv8i16(, , ) #1 + + +define dso_local @test_svrevh_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrevh_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.revh.nxv4i32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.revh.nxv4i32(, , ) #1 + + +define dso_local @test_svrevb_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrevb_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revb.nxv8i16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.revb.nxv8i16( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svrevw_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrevw_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.revw.nxv2i64( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.revw.nxv2i64(, , ) #1 + + +define dso_local i64 @test_svaddv( %b) #0 { +; CHECK-LABEL: define dso_local i64 @test_svaddv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 0 +; +entry: + %0 = tail call i64 @llvm.aarch64.sve.saddv.nxv8i16( zeroinitializer, %b) + ret i64 %0 +} + + +declare i64 @llvm.aarch64.sve.saddv.nxv8i16(, ) #1 + + +define dso_local @test_svcvt_f16_i16_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_i16_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(, , ) #1 + + +define dso_local @test_svcvt_f32_i32_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f32_i32_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(, , ) #1 + + +define dso_local @test_svcvt_f16_i16_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f16_i16_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %1 = tail call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svcvt_f64_i64_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svcvt_f64_i64_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(, , ) #1 + + +define dso_local <8 x i16> @test_svmaxqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_svmaxqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(, ) #1 + + +define dso_local i16 @test_svmaxv( %b) #0 { +; CHECK-LABEL: define dso_local i16 @test_svmaxv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i16 0 +; +entry: + %0 = tail call i16 @llvm.aarch64.sve.umaxv.nxv8i16( zeroinitializer, %b) + ret i16 %0 +} + + +declare i16 @llvm.aarch64.sve.umaxv.nxv8i16(, ) #1 + + +define dso_local <8 x i16> @test_svminqv( %b) #0 { +; CHECK-LABEL: define dso_local <8 x i16> @test_svminqv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i16> +; +entry: + %0 = tail call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16( zeroinitializer, %b) + ret <8 x i16> %0 +} + + +declare <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(, ) #1 + + +define dso_local i16 @test_svminv( %b) #0 { +; CHECK-LABEL: define dso_local i16 @test_svminv( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i16 32767 +; +entry: + %0 = tail call i16 @llvm.aarch64.sve.sminv.nxv8i16( zeroinitializer, %b) + ret i16 %0 +} + + +declare i16 @llvm.aarch64.sve.sminv.nxv8i16(, ) #1 + + +define dso_local @test_svqabs_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svqabs_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.sqabs.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sqabs.nxv16i8(, , ) #1 + + +define dso_local @test_svqabs_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svqabs_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.sqabs.nxv8i16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) #1 + + +define dso_local @test_svqabs_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svqabs_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqabs.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.sqabs.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svqabs_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svqabs_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.sqabs.nxv8i16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svqneg_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svqneg_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.sqneg.nxv16i8( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sqneg.nxv16i8(, , ) #1 + + +define dso_local @test_svqneg_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svqneg_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.sqneg.nxv8i16( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sqneg.nxv8i16(, , ) #1 + + +define dso_local @test_svqneg_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svqneg_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sqneg.nxv16i8( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = tail call @llvm.aarch64.sve.sqneg.nxv16i8( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svqneg_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svqneg_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.sqneg.nxv8i16( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local void @test_st1(ptr nocapture noundef readnone %a, %b) #0 { +; CHECK-LABEL: define dso_local void @test_st1( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + + +define dso_local void @test_st1_vnum(ptr nocapture noundef readnone %a, %b) #0 { +; CHECK-LABEL: define dso_local void @test_st1_vnum( +; CHECK-SAME: ptr nocapture noundef readnone [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + + +define dso_local void @test_st1_scatter( %a, %b) #10 { +; CHECK-LABEL: define dso_local void @test_st1_scatter( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32( %b, zeroinitializer, %a, i64 0) + ret void +} + + +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(, , , i64) #11 + + +define dso_local void @test_st1_scatter_offset(ptr noundef %a, %c, %b) #12 { +; CHECK-LABEL: define dso_local void @test_st1_scatter_offset( +; CHECK-SAME: ptr noundef [[A:%.*]], [[C:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32( %b, zeroinitializer, ptr %a, %c) + ret void +} + + +declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(, , ptr, ) #13 + + +define dso_local void @test_st1_scatter_index(ptr noundef %a, %c, %b) #12 { +; CHECK-LABEL: define dso_local void @test_st1_scatter_index( +; CHECK-SAME: ptr noundef [[A:%.*]], [[C:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32( %b, zeroinitializer, ptr %a, %c) + ret void +} + + +declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(, , ptr, ) #13 + + +define dso_local void @test_st2(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st2( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 4) + tail call void @llvm.aarch64.sve.st2.nxv4i32( %0, %1, zeroinitializer, ptr %a) + ret void +} + + +declare @llvm.vector.extract.nxv4i32.nxv8i32(, i64 immarg) #6 + + +declare void @llvm.aarch64.sve.st2.nxv4i32(, , , ptr nocapture) #15 + + +define dso_local void @test_st2_vnum(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st2_vnum( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 4) + %2 = getelementptr , ptr %a, i64 1 + tail call void @llvm.aarch64.sve.st2.nxv4i32( %0, %1, zeroinitializer, ptr %2) + ret void +} + + +define dso_local void @test_st3(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st3( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 8) + tail call void @llvm.aarch64.sve.st3.nxv4i32( %0, %1, %2, zeroinitializer, ptr %a) + ret void +} + + +declare @llvm.vector.extract.nxv4i32.nxv12i32(, i64 immarg) #6 + + +declare void @llvm.aarch64.sve.st3.nxv4i32(, , , , ptr nocapture) #15 + + +define dso_local void @test_st3_vnum(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st3_vnum( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 8) + %3 = getelementptr , ptr %a, i64 1 + tail call void @llvm.aarch64.sve.st3.nxv4i32( %0, %1, %2, zeroinitializer, ptr %3) + ret void +} + + +define dso_local void @test_st4(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st4( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 8) + %3 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 12) + tail call void @llvm.aarch64.sve.st4.nxv4i32( %0, %1, %2, %3, zeroinitializer, ptr %a) + ret void +} + + +declare @llvm.vector.extract.nxv4i32.nxv16i32(, i64 immarg) #6 + + +declare void @llvm.aarch64.sve.st4.nxv4i32(, , , , , ptr nocapture) #15 + + +define dso_local void @test_st4_vnum(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_st4_vnum( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 8) + %3 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 12) + %4 = getelementptr , ptr %a, i64 1 + tail call void @llvm.aarch64.sve.st4.nxv4i32( %0, %1, %2, %3, zeroinitializer, ptr %4) + ret void +} + + +define dso_local void @test_stnt1(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_stnt1( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.stnt1.nxv4i32( %b, zeroinitializer, ptr %a) + ret void +} + + +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , ptr nocapture) #15 + + +define dso_local void @test_stnt1_vnum(ptr nocapture noundef %a, %b) #14 { +; CHECK-LABEL: define dso_local void @test_stnt1_vnum( +; CHECK-SAME: ptr nocapture noundef [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr , ptr %a, i64 1 + tail call void @llvm.aarch64.sve.stnt1.nxv4i32( %b, zeroinitializer, ptr %0) + ret void +} + + +define dso_local void @test_stnt1_scatter( %a, %b) #10 { +; CHECK-LABEL: define dso_local void @test_stnt1_scatter( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32( %b, zeroinitializer, %a, i64 0) + ret void +} + + +declare void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(, , , i64) #11 + + +define dso_local void @test_stnt1_scatter_offset(ptr noundef %a, %c, %b) #12 { +; CHECK-LABEL: define dso_local void @test_stnt1_scatter_offset( +; CHECK-SAME: ptr noundef [[A:%.*]], [[C:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32( %b, zeroinitializer, ptr %a, %c) + ret void +} + + +declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(, , ptr, ) #13 + + +define dso_local void @test_stnt1_scatter_index(ptr noundef %a, %c, %b) #12 { +; CHECK-LABEL: define dso_local void @test_stnt1_scatter_index( +; CHECK-SAME: ptr noundef [[A:%.*]], [[C:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64( %b, zeroinitializer, ptr %a, %c) + ret void +} + + +declare void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(, , ptr, ) #13 + + +define dso_local @test_svextb_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svextb_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.sxtb.nxv8i16( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sxtb.nxv8i16(, , ) #1 + + +define dso_local @test_svexth_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svexth_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.sxth.nxv2i64( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sxth.nxv2i64(, , ) #1 + + +define dso_local @test_svextb_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svextb_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.sxtw.nxv2i64( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %1 = tail call @llvm.aarch64.sve.sxtw.nxv2i64( %a, %0, %b) + ret %1 +} + + +declare @llvm.aarch64.sve.sxtw.nxv2i64(, , ) #1 + + +define dso_local @test_svextw_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svextw_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.sxth.nxv4i32( undef, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.sxth.nxv4i32(, , ) #1 + + +define dso_local @test_svrecpe_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpe_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.urecpe.nxv4i32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.urecpe.nxv4i32(, , ) #1 + + +define dso_local @test_svrecpe_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpe_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.urecpe.nxv4i32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrecpe_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpe_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.urecpe.nxv4i32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrecpe_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrecpe_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.urecpe.nxv4i32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.urecpe.nxv4i32( %a, %0, %b) + ret %1 +} + + +define dso_local @test_svrsqrte_m( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrsqrte_m( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [[A]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, zeroinitializer, %b) + ret %0 +} + + +declare @llvm.aarch64.sve.ursqrte.nxv4i32(, , ) #1 + + +define dso_local @test_svrsqrte_z( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrsqrte_z( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret zeroinitializer +; +entry: + %0 = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( zeroinitializer, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrsqrte_x( %b) #0 { +; CHECK-LABEL: define dso_local @test_svrsqrte_x( +; CHECK-SAME: [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret undef +; +entry: + %0 = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( undef, zeroinitializer, %b) + ret %0 +} + + +define dso_local @test_svrsqrte_m2( %a, %b) #0 { +; CHECK-LABEL: define dso_local @test_svrsqrte_m2( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( undef, [[TMP0]], [[B]]) +; CHECK-NEXT: ret [[TMP1]] +; +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, %0, %b) + ret %1 +} + + +declare float @llvm.aarch64.sve.clasta.n.nxv4f32(, float, ) #16 + + +declare float @llvm.aarch64.sve.clastb.n.nxv4f32(, float, ) #16 + +attributes #0 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #1 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #2 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #3 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #4 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #5 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #6 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #7 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #8 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #9 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #10 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #11 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #12 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #13 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #14 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #15 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} +attributes #16 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"} \ No newline at end of file From 686e7c0cea9a250aba11983e83931fb50688c4d2 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 26 Mar 2024 11:19:46 +0000 Subject: [PATCH 2/3] fixed formatting --- .../AArch64/AArch64TargetTransformInfo.cpp | 143 +++++++++--------- 1 file changed, 68 insertions(+), 75 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 40bd17053d34c..e6e736089848d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -944,53 +944,54 @@ instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) { if (match(II.getOperand(0), m_ZeroInt())) { Constant *Node; Type *RetTy = II.getType(); - if (RetTy->isStructTy()){ - auto StructT = cast(RetTy); - auto VecT = StructT->getElementType(0); - SmallVector ZerVec; - for (unsigned i = 0; i < StructT->getNumElements(); i++){ - ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0): - ConstantInt::get(VecT, 0)); - } - Node = ConstantStruct::get(StructT, ZerVec); - } - else if (RetTy->isFPOrFPVectorTy()) + if (RetTy->isStructTy()) { + auto StructT = cast(RetTy); + auto VecT = StructT->getElementType(0); + SmallVector ZerVec; + for (unsigned i = 0; i < StructT->getNumElements(); i++) { + ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0) + : ConstantInt::get(VecT, 0)); + } + Node = ConstantStruct::get(StructT, ZerVec); + } else if (RetTy->isFPOrFPVectorTy()) Node = ConstantFP::get(RetTy, 0.0); - else + else Node = ConstantInt::get(II.getType(), 0); - + IC.replaceInstUsesWith(II, Node); return IC.eraseInstFromFunction(II); } return std::nullopt; } -//Erase unary operation where predicate has all inactive lanes +// Erase unary operation where predicate has all inactive lanes static std::optional -instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos) { - if (match(II.getOperand(PredPos), m_ZeroInt())) { - return IC.eraseInstFromFunction(II); - } - return std::nullopt; +instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, + int PredPos) { + if (match(II.getOperand(PredPos), m_ZeroInt())) { + return IC.eraseInstFromFunction(II); + } + return std::nullopt; } // Simplify unary operation where predicate has all inactive lanes by replacing // instruction with given constant static std::optional -instCombineSVENoActiveUnaryConstant(InstCombiner &IC, IntrinsicInst &II, Constant *NewVal) { - if (match(II.getOperand(0), m_ZeroInt())) { - IC.replaceInstUsesWith(II, NewVal); - return IC.eraseInstFromFunction(II); - } - return std::nullopt; +instCombineSVENoActiveUnaryConstant(InstCombiner &IC, IntrinsicInst &II, + Constant *NewVal) { + if (match(II.getOperand(0), m_ZeroInt())) { + IC.replaceInstUsesWith(II, NewVal); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; } -// Simplify unary operation where predicate has all inactive lanes or try to replace -// with _x form when all lanes are active +// Simplify unary operation where predicate has all inactive lanes or try to +// replace with _x form when all lanes are active static std::optional instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { - if (isAllActivePredicate(II.getOperand(1)) - && !isa(II.getOperand(0))){ + if (isAllActivePredicate(II.getOperand(1)) && + !isa(II.getOperand(0))) { Value *Undef = llvm::UndefValue::get(II.getType()); return IC.replaceOperand(II, 0, Undef); } @@ -1011,10 +1012,9 @@ static std::optional instCombineSVESel(InstCombiner &IC, static std::optional instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II) { - + // Optimize when predicate is known all active or all inactive - if (auto II_NA = - instCombineSVEAllOrNoActiveUnary(IC, II)) + if (auto II_NA = instCombineSVEAllOrNoActiveUnary(IC, II)) return II_NA; IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1)); @@ -1051,11 +1051,10 @@ static std::optional instCombineSVEDupX(InstCombiner &IC, static std::optional instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II) { - - //Replace by zero constant when all lanes are inactive - if (auto II_NA = - instCombineSVENoActiveUnaryZero(IC, II)) - return II_NA; + + // Replace by zero constant when all lanes are inactive + if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) + return II_NA; LLVMContext &Ctx = II.getContext(); @@ -1242,9 +1241,8 @@ static std::optional instCombineSVECondLast(InstCombiner &IC, Value *Vec = II.getArgOperand(2); Type *Ty = II.getType(); - //If all lanes are inactive replace with operand - if (auto II_NA = - instCombineSVENoActiveUnaryReplace(IC, II, false)) + // If all lanes are inactive replace with operand + if (auto II_NA = instCombineSVENoActiveUnaryReplace(IC, II, false)) return II_NA; if (!Ty->isIntegerTy()) @@ -1427,9 +1425,8 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Value *PtrOp = II.getOperand(1); Type *VecTy = II.getType(); - //Replace by zero constant when all lanes are inactive - if (auto II_NA = - instCombineSVENoActiveUnaryZero(IC, II)) + // Replace by zero constant when all lanes are inactive + if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) return II_NA; if (isAllActivePredicate(Pred)) { @@ -1451,10 +1448,9 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { Value *Pred = II.getOperand(1); Value *PtrOp = II.getOperand(2); - //Remove when all lanes are inactive - if (auto II_NA = - instCombineSVENoActiveUnaryErase(IC, II, 0)) - return II_NA; + // Remove when all lanes are inactive + if (auto II_NA = instCombineSVENoActiveUnaryErase(IC, II, 0)) + return II_NA; if (isAllActivePredicate(Pred)) { StoreInst *Store = IC.Builder.CreateStore(VecOp, PtrOp); @@ -1754,10 +1750,9 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) { Type *Ty = II.getType(); Value *PassThru = ConstantAggregateZero::get(Ty); - //Replace by zero constant when all lanes are inactive - if (auto II_NA = - instCombineSVENoActiveUnaryZero(IC, II)) - return II_NA; + // Replace by zero constant when all lanes are inactive + if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II)) + return II_NA; // Contiguous gather => masked load. // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1)) @@ -1789,10 +1784,9 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) { Value *Index = II.getOperand(3); Type *Ty = Val->getType(); - //Remove when all lanes are inactive - if (auto II_NA = - instCombineSVENoActiveUnaryErase(IC, II, 0)) - return II_NA; + // Remove when all lanes are inactive + if (auto II_NA = instCombineSVENoActiveUnaryErase(IC, II, 0)) + return II_NA; // Contiguous scatter => masked store. // (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) @@ -2167,22 +2161,22 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVENoActiveUnaryZero(IC, II); case Intrinsic::aarch64_sve_andqv: case Intrinsic::aarch64_sve_andv: - return instCombineSVENoActiveUnaryConstant(IC, II, - ConstantInt::get(II.getType(), 1)); + return instCombineSVENoActiveUnaryConstant( + IC, II, ConstantInt::get(II.getType(), 1)); case Intrinsic::aarch64_sve_fmaxnmqv: case Intrinsic::aarch64_sve_fmaxnmv: case Intrinsic::aarch64_sve_fminnmqv: case Intrinsic::aarch64_sve_fminnmv: - return instCombineSVENoActiveUnaryConstant(IC, II, - ConstantFP::getQNaN(II.getType())); + return instCombineSVENoActiveUnaryConstant( + IC, II, ConstantFP::getQNaN(II.getType())); case Intrinsic::aarch64_sve_fmaxqv: case Intrinsic::aarch64_sve_fmaxv: - return instCombineSVENoActiveUnaryConstant(IC, II, - ConstantFP::getInfinity(II.getType(), true)); + return instCombineSVENoActiveUnaryConstant( + IC, II, ConstantFP::getInfinity(II.getType(), true)); case Intrinsic::aarch64_sve_fminqv: case Intrinsic::aarch64_sve_fminv: - return instCombineSVENoActiveUnaryConstant(IC, II, - ConstantFP::getInfinity(II.getType())); + return instCombineSVENoActiveUnaryConstant( + IC, II, ConstantFP::getInfinity(II.getType())); case Intrinsic::aarch64_sve_prf: case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: case Intrinsic::aarch64_sve_prfb_gather_sxtw_index: @@ -2228,24 +2222,23 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_st4q: return instCombineSVENoActiveUnaryErase(IC, II, 4); case Intrinsic::aarch64_sve_smaxv: - case Intrinsic::aarch64_sve_smaxqv: - { - auto *MinSInt = ConstantInt::get(II.getType(), APInt::getSignedMinValue( - II.getType()->getScalarSizeInBits())); + case Intrinsic::aarch64_sve_smaxqv: { + auto *MinSInt = ConstantInt::get( + II.getType(), + APInt::getSignedMinValue(II.getType()->getScalarSizeInBits())); return instCombineSVENoActiveUnaryConstant(IC, II, MinSInt); } case Intrinsic::aarch64_sve_sminv: - case Intrinsic::aarch64_sve_sminqv: - { - auto *MaxSInt = ConstantInt::get(II.getType(), APInt::getSignedMaxValue( - II.getType()->getScalarSizeInBits())); + case Intrinsic::aarch64_sve_sminqv: { + auto *MaxSInt = ConstantInt::get( + II.getType(), + APInt::getSignedMaxValue(II.getType()->getScalarSizeInBits())); return instCombineSVENoActiveUnaryConstant(IC, II, MaxSInt); } case Intrinsic::aarch64_sve_uminv: - case Intrinsic::aarch64_sve_uminqv: - { - auto *MaxUInt = ConstantInt::get(II.getType(), APInt::getMaxValue( - II.getType()->getScalarSizeInBits())); + case Intrinsic::aarch64_sve_uminqv: { + auto *MaxUInt = ConstantInt::get( + II.getType(), APInt::getMaxValue(II.getType()->getScalarSizeInBits())); return instCombineSVENoActiveUnaryConstant(IC, II, MaxUInt); } case Intrinsic::aarch64_neon_fmaxnm: From d089404dcb2b5b576777fdc95b3569f5027e5b42 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 26 Mar 2024 12:59:38 +0000 Subject: [PATCH 3/3] Updated failing testcase --- .../Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll index d6434ad2b4700..15876b3c9ffac 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll @@ -43,7 +43,7 @@ define @srshl_abs_positive_merge( %a, @srshl_abs_all_active_pred( %a, %b, %pg2) #0 { ; CHECK-LABEL: @srshl_abs_all_active_pred( ; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG]], [[A:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, [[PG]], [[A:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ;