[Clang][SVE2p1] Add svpsel builtins

CarolineConcatto · CarolineConcatto · commit 1b93e15bcd9a · 2023-10-18T15:05:26.000Z
As described in: ARM-software/acle#257 Patch by : Sander de Smalen<sander.desmalen@arm.com> Reviewed By: kmclaughlin Differential Revision: https://reviews.llvm.org/D151197
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
@@ -1865,10 +1865,21 @@ def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_
 
 def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
 def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
+
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, "", [], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [], []>;
 }
 
 let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
+
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
+
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10007,7 +10007,33 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   switch (BuiltinID) {
   default:
     return nullptr;
-
+  case SVE::BI__builtin_sve_svpsel_lane_b8:
+  case SVE::BI__builtin_sve_svpsel_lane_b16:
+  case SVE::BI__builtin_sve_svpsel_lane_b32:
+  case SVE::BI__builtin_sve_svpsel_lane_b64:
+  case SVE::BI__builtin_sve_svpsel_lane_c8:
+  case SVE::BI__builtin_sve_svpsel_lane_c16:
+  case SVE::BI__builtin_sve_svpsel_lane_c32:
+  case SVE::BI__builtin_sve_svpsel_lane_c64: {
+    bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
+    assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
+                               "aarch64.svcount")) &&
+           "Unexpected TargetExtType");
+    auto SVCountTy =
+        llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+    Function *CastFromSVCountF =
+        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+    Function *CastToSVCountF =
+        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
+
+    auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
+    llvm::Value *Ops0 =
+        IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
+    llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+    llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
+    return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
+  }
   case SVE::BI__builtin_sve_svmov_b_z: {
     // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
     SVETypeFlags TypeFlags(Builtin->TypeModifier);
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c
@@ -0,0 +1,165 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+// CHECK-LABEL: @test_svpsel_lane_b8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_b8u10__SVBool_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_b8(p1, p2, idx + 15);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_b16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 8 x i1> [[TMP0]], i32 [[ADD]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b16u10__SVBool_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 8 x i1> [[TMP0]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_b16(p1, p2, idx + 7);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_b32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[ADD]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b32u10__SVBool_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_b32(p1, p2, idx + 3);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_b64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 2 x i1> [[TMP0]], i32 [[ADD]])
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b64u10__SVBool_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 2 x i1> [[TMP0]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_b64(p1, p2, idx + 1);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_c8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP1]])
+// CHECK-NEXT:    ret target("aarch64.svcount") [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_c8u11__SVCount_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP1]])
+// CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP2]]
+//
+svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_c8(p1, p2, idx + 15);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_c16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], i32 [[ADD]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c16u11__SVCount_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_c16(p1, p2, idx + 7);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_c32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i32 [[ADD]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c32u11__SVCount_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_c32(p1, p2, idx + 3);
+}
+
+// CHECK-LABEL: @test_svpsel_lane_c64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32 [[ADD]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c64u11__SVCount_tu10__SVBool_tj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32 [[ADD]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
+// CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
+//
+svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) {
+  return svpsel_lane_c64(p1, p2, idx + 1);
+}