-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Clang][LLVM][AArch64] Add intrinsic for MOVT SME2 instruction #97602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-ir Author: None (CarolineConcatto) ChangesThis patch adds these intrinsics: // Variants are also available for: according to PR#324[1] Patch is 33.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97602.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index ce8908f566f2f..ff68e536e99b0 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -817,4 +817,9 @@ multiclass ZAReadzArray<string vg_num>{
defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;
+
+let SMETargetGuard = "sme2,sme-lutv2" in {
+ def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
+}
} // let SVETargetGuard = InvalidMode
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
new file mode 100644
index 0000000000000..9bdc3481953a2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
@@ -0,0 +1,401 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s
+// REQUIRES: aarch64-registered-target
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u8_1(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z23test_write_lane_zt_u8_1u11__SVUint8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u8_1(svuint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u8)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s8_2(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z23test_write_lane_zt_s8_2u10__SVInt8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s8_2(svint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s8)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u16_3(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u16_3u12__SVUint16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u16_3(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u16)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s16_1(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s16_1u11__SVInt16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s16_1(svint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s16)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u32_2(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u32_2u12__SVUint32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u32_2(svuint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u32)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s32_3(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s32_3u11__SVInt32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]], i32 3)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s32_3(svint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s32)(0, v, 3);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_u64_0(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_u64_0u12__SVUint64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 0)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_u64_0(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _u64)(0, v, 0);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_s64_1(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_s64_1u11__SVInt64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_s64_1(svint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _s64)(0, v, 1);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f16_2(
+// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]], i32 2)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f16_2u13__SVFloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]], i32 2)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f16_2(svfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f16)(0, v, 2);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_bf16_3(
+// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z25test_write_lane_zt_bf16_3u14__SVBfloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]], i32 3)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_bf16_3(svbfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _bf16)(0, v, 3);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f32_0(
+// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f32_0u13__SVFloat32_t(
+// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]], i32 0)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f32_0(svfloat32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f32)(0, v, 0);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_lane_zt_f64_1(
+// CHECK-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]], i32 1)
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z24test_write_lane_zt_f64_1u13__SVFloat64_t(
+// CHECK-CXX-SAME: <vscale x 2 x double> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> [[V]], i32 1)
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_lane_zt_f64_1(svfloat64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_lane_zt, _f64)(0, v, 1);
+}
+
+//ALIAS
+// CHECK-LABEL: define dso_local void @test_write_zt_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_u8u11__SVUint8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u8(svuint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u8)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z16test_write_zt_s8u10__SVInt8_t(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s8(svint8_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s8)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u16(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u16u12__SVUint16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u16(svuint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s16(
+// CHECK-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s16u11__SVInt16_t(
+// CHECK-CXX-SAME: <vscale x 8 x i16> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s16(svint16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u32(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u32u12__SVUint32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u32(svuint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u32)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s32(
+// CHECK-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s32u11__SVInt32_t(
+// CHECK-CXX-SAME: <vscale x 4 x i32> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s32(svint32_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s32)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_u64(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_u64u12__SVUint64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_u64(svuint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _u64)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_s64(
+// CHECK-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_s64u11__SVInt64_t(
+// CHECK-CXX-SAME: <vscale x 2 x i64> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_s64(svint64_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _s64)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_f16(
+// CHECK-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f16u13__SVFloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x half> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_f16(svfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _f16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_bf16(
+// CHECK-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z18test_write_zt_bf16u14__SVBfloat16_t(
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> [[V]])
+// CHECK-CXX-NEXT: ret void
+//
+void test_write_zt_bf16(svbfloat16_t v) __arm_streaming __arm_inout("zt0") {
+ SVE_ACLE_FUNC(svwrite_zt, _bf16)(0, v);
+}
+
+// CHECK-LABEL: define dso_local void @test_write_zt_f32(
+// CHECK-SAME: <vscale x 4 x float> [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> [[V]])
+// CHECK-NEXT: ret void
+//
+// CHECK-CXX-LABEL: define dso_local void @_Z17test_write_zt_f32u13__SVFloat32_t(
+// CHECK-CXX-SAME: <vscale x 4 x float> [[V:%.*]]) local_u...
[truncated]
|
|
The ACLE was merged so we can merge this patch when approved. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not to do with this work, but I think the predication around FEAT_SME_LUTv2 is incorrect.
According to latest spec, these instructions should both only require FEAT_SME_LUTv2
which itself requires SME2:
If FEAT_SME_LUTv2 is implemented, then FEAT_SME2 is implemented.
So we could remove SME2 from this predicate and list it as a dependency of FEAT_SME_LUTv2 in AArch64Features.td.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will do this in another patch, because it is related to more than one intrinsic.
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
Outdated
Show resolved
Hide resolved
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c
Outdated
Show resolved
Hide resolved
SpencerAbson
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM.
- (Note for the issue with ZT0 access attributes and FEAT_SME_LUTv2)
This patch adds these intrinsics:
// Variants are also available for:
// [_s8], [_u16], [_s16], [_u32], [_s32], [_u64], [_s64]
// [_bf16], [_f16], [_f32], [_f64]
void svwrite_lane_zt[_u8](uint64_t zt0, svuint8_t zt, uint64_t idx) __arm_streaming __arm_inout("zt0");
void svwrite_zt[_u8](uint64_t zt0, svuint8_t zt) __arm_streaming __arm_inout("zt0");
according to PR#324[1]
[1]ARM-software/acle#324
43fe090 to
c1ddd1e
Compare
Add '}' at the end of the functions test_write_zt that was missing
…97602) This patch adds these intrinsics: // Variants are also available for: // [_s8], [_u16], [_s16], [_u32], [_s32], [_u64], [_s64] // [_bf16], [_f16], [_f32], [_f64] void svwrite_lane_zt[_u8](uint64_t zt0, svuint8_t zt, uint64_t idx) __arm_streaming __arm_inout("zt0"); void svwrite_zt[_u8](uint64_t zt0, svuint8_t zt) __arm_streaming __arm_inout("zt0"); according to PR#324[1] [1]ARM-software/acle#324
This patch adds these intrinsics:
// Variants are also available for:
// [_s8], [_u16], [_s16], [_u32], [_s32], [_u64], [_s64]
// [_bf16], [_f16], [_f32], [_f64]
void svwrite_lane_zt[_u8](uint64_t zt0, svuint8_t zt, uint64_t idx) __arm_streaming __arm_inout("zt0");
void svwrite_zt[_u8](uint64_t zt0, svuint8_t zt) __arm_streaming __arm_inout("zt0");
according to PR#324[1]
[1]ARM-software/acle#324