diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index e33c065059c44..412ef9abac1bc 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -710,7 +710,7 @@ defm vrndmq: vrnd, "m">; defm vrndpq: vrnd, "p">; defm vrndaq: vrnd, "a">; defm vrndxq: vrnd, "x">; -defm vrndnq: vrnd, "n">; +defm vrndnq: vrnd, "n">; multiclass compare_with_pred { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c index 3e625c739bde9..4888bc8c5e98f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a) // CHECK-LABEL: @test_vrndnq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndnq_f16(float16x8_t a) @@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a) // CHECK-LABEL: @test_vrndnq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndnq_f32(float32x4_t a) diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 9b7dd8099368d..3ee69b72cc5cd 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in { [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>; } -def int_arm_mve_vrintn: DefaultAttrsIntrinsic< - [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vcls: DefaultAttrsIntrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7ba6d411bc7b5..814c00c669cb3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, return false; // Not 'arm.mve.vctp64'. } + if (Name.starts_with("vrintn.v")) { + NewFn = Intrinsic::getOrInsertDeclaration( + F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType()); + return true; + } + // These too are changed to accept a v2i1 instead of the old v4i1. if (Name.consume_back(".v4i1")) { // 'arm.mve.*.v4i1'. diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 6dd8a374a92af..9dffd945d5baa 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m opcode, } multiclass MVE_VRINT_ops { - defm N : MVE_VRINT_m; + defm N : MVE_VRINT_m; defm X : MVE_VRINT_m; defm A : MVE_VRINT_m; defm Z : MVE_VRINT_m; diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll index 1d7dcc8bf8440..6946ec37ddf33 100644 --- a/llvm/test/CodeGen/Thumb2/mve-frint.ll +++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -424,21 +424,74 @@ entry: ret <2 x double> %0 } -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) -declare <4 x float> @llvm.rint.v4f32(<4 x float>) -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) -declare <4 x float> @llvm.floor.v4f32(<4 x float>) -declare <4 x float> @llvm.round.v4f32(<4 x float>) -declare <8 x half> @llvm.ceil.v8f16(<8 x half>) -declare <8 x half> @llvm.trunc.v8f16(<8 x half>) -declare <8 x half> @llvm.rint.v8f16(<8 x half>) -declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) -declare <8 x half> @llvm.floor.v8f16(<8 x half>) -declare <8 x half> @llvm.round.v8f16(<8 x half>) -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) -declare <2 x double> @llvm.rint.v2f64(<2 x double>) -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) -declare <2 x double> @llvm.floor.v2f64(<2 x double>) -declare <2 x double> @llvm.round.v2f64(<2 x double>) +define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: froundeven_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintn.f32 s3, s3 +; CHECK-MVE-NEXT: vrintn.f32 s2, s2 +; CHECK-MVE-NEXT: vrintn.f32 s1, s1 +; CHECK-MVE-NEXT: vrintn.f32 s0, s0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: froundeven_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: froundeven_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmovx.f16 s4, s0 +; CHECK-MVE-NEXT: vrintn.f16 s0, s0 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s1 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s1, s1 +; CHECK-MVE-NEXT: vins.f16 s1, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s2 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s2, s2 +; CHECK-MVE-NEXT: vins.f16 s2, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s3 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s3, s3 +; CHECK-MVE-NEXT: vins.f16 s3, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: froundeven_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) { +; CHECK-LABEL: froundeven_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src) + ret <2 x double> %0 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll index a70975e1e7318..b30bb2e3ad3ff 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s +; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn. + define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) { ; CHECK-LABEL: test_vrndnq_f16: ; CHECK: @ %bb.0: @ %entry