Skip to content

Commit 2f69bf0

Browse files
ghehglanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower builtin_neon_vrnda_v and builtin_neon_vrndaq_v (#871)
as title. This also added NeonType support for Float32 Co-authored-by: Guojin He <[email protected]>
1 parent c099848 commit 2f69bf0

File tree

2 files changed

+66
-5
lines changed

2 files changed

+66
-5
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,7 +1390,9 @@ static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags,
13901390
// so we use v16i8 to represent poly128 and get pattern matched.
13911391
llvm_unreachable("NYI");
13921392
case NeonTypeFlags::Float32:
1393-
llvm_unreachable("NYI");
1393+
return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
1394+
CGF->getCIRGenModule().FloatTy,
1395+
V1Ty ? 1 : (2 << IsQuad));
13941396
case NeonTypeFlags::Float64:
13951397
llvm_unreachable("NYI");
13961398
}
@@ -1616,9 +1618,6 @@ mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
16161618
if (shift > 0)
16171619
llvm_unreachable("Argument shift NYI");
16181620

1619-
if (builtinID != clang::NEON::BI__builtin_neon_vrndns_f32)
1620-
llvm_unreachable("NYT");
1621-
16221621
CIRGenBuilderTy &builder = cgf.getBuilder();
16231622
for (unsigned j = 0; j < argTypes.size(); ++j) {
16241623
if (isConstrainedFPIntrinsic) {
@@ -2416,7 +2415,9 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
24162415
}
24172416
case NEON::BI__builtin_neon_vrnda_v:
24182417
case NEON::BI__builtin_neon_vrndaq_v: {
2419-
llvm_unreachable("NYI");
2418+
assert(!MissingFeatures::buildConstrainedFPCall());
2419+
return buildNeonCall(BuiltinID, *this, {Ty}, Ops, "llvm.round", Ty,
2420+
getLoc(E->getExprLoc()));
24202421
}
24212422
case NEON::BI__builtin_neon_vrndih_f16: {
24222423
llvm_unreachable("NYI");

clang/test/CIR/CodeGen/arm-neon-directed-rounding.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,63 @@ float32_t test_vrndns_f32(float32_t a) {
3838
// LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4,
3939
// LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4,
4040
// LLVM: ret float [[RET_VAL]]
41+
42+
float32x2_t test_vrnda_f32(float32x2_t a) {
43+
return vrnda_f32(a);
44+
}
45+
46+
// CIR: cir.func internal private @vrnda_f32(%arg0: !cir.vector<!cir.float x 2>
47+
// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
48+
// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
49+
// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector<!cir.float x 2>), !cir.vector<!s8i x 8>
50+
// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector<!s8i x 8>), !cir.vector<!cir.float x 2>
51+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
52+
// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 2>
53+
54+
// CIR-LABEL: test_vrnda_f32
55+
// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
56+
// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
57+
// CIR: [[FUNC_RES:%.*]] = cir.call @vrnda_f32([[FUNC_ARG]]) : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
58+
// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
59+
// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
60+
// CIR: cir.return [[RET_VAL]] : !cir.vector<!cir.float x 2>
61+
62+
// LLVM: define dso_local <2 x float> @test_vrnda_f32(<2 x float> [[ARG:%.*]])
63+
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 8
64+
// LLVM: [[P0:%.*]] = load <2 x float>, ptr [[ARG_SAVE]], align 8,
65+
// LLVM: store <2 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 8,
66+
// LLVM: [[INTRIN_ARG:%.*]] = load <2 x float>, ptr [[P0_SAVE]], align 8,
67+
// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[INTRIN_ARG]])
68+
// LLVM: store <2 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 8,
69+
// LLVM: [[RES_COPY0:%.*]] = load <2 x float>, ptr [[RES_SAVE0]], align 8,
70+
// LLVM: store <2 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 8,
71+
// LLVM: [[RES_COPY1:%.*]] = load <2 x float>, ptr [[RES_SAVE1]], align 8,
72+
// LLVM: store <2 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 8,
73+
// LLVM: [[RET_VAL:%.*]] = load <2 x float>, ptr [[RET_P]], align 8,
74+
// LLVM: ret <2 x float> [[RET_VAL]]
75+
76+
float32x4_t test_vrndaq_f32(float32x4_t a) {
77+
return vrndaq_f32(a);
78+
}
79+
80+
// CIR: cir.func internal private @vrndaq_f32(%arg0: !cir.vector<!cir.float x 4>
81+
// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
82+
// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.float x 4>
83+
// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector<!cir.float x 4>), !cir.vector<!s8i x 16>
84+
// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector<!s8i x 16>), !cir.vector<!cir.float x 4>
85+
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
86+
// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 4>
87+
88+
// LLVM: define dso_local <4 x float> @test_vrndaq_f32(<4 x float> [[ARG:%.*]])
89+
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 16
90+
// LLVM: [[P0:%.*]] = load <4 x float>, ptr [[ARG_SAVE]], align 16,
91+
// LLVM: store <4 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 16,
92+
// LLVM: [[INTRIN_ARG:%.*]] = load <4 x float>, ptr [[P0_SAVE]], align 16,
93+
// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[INTRIN_ARG]])
94+
// LLVM: store <4 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 16,
95+
// LLVM: [[RES_COPY0:%.*]] = load <4 x float>, ptr [[RES_SAVE0]], align 16,
96+
// LLVM: store <4 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 16,
97+
// LLVM: [[RES_COPY1:%.*]] = load <4 x float>, ptr [[RES_SAVE1]], align 16,
98+
// LLVM: store <4 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 16,
99+
// LLVM: [[RET_VAL:%.*]] = load <4 x float>, ptr [[RET_P]], align 16,
100+
// LLVM: ret <4 x float> [[RET_VAL]]

0 commit comments

Comments
 (0)