@@ -7277,7 +7277,6 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
72777277};
72787278
72797279static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7280- NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
72817280 NEONMAP0(splat_lane_v),
72827281 NEONMAP0(splat_laneq_v),
72837282 NEONMAP0(splatq_lane_v),
@@ -7377,7 +7376,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
73777376 NEONMAP0(vcvtq_f16_s16),
73787377 NEONMAP0(vcvtq_f16_u16),
73797378 NEONMAP0(vcvtq_f32_v),
7380- NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7379+ NEONMAP0(vcvtq_high_bf16_f32),
7380+ NEONMAP0(vcvtq_low_bf16_f32),
73817381 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
73827382 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
73837383 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -7586,7 +7586,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
75867586 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
75877587 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
75887588 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7589- NEONMAP1 (vcvth_bf16_f32, aarch64_neon_bfcvt, 0 ),
7589+ NEONMAP0 (vcvth_bf16_f32),
75907590 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
75917591 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
75927592 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
@@ -12040,6 +12040,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1204012040 return ConstantInt::get(Builder.getInt32Ty(), 0);
1204112041 }
1204212042
12043+ if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12044+ return Builder.CreateFPTrunc(
12045+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12046+ Builder.getFloatTy()),
12047+ Builder.getBFloatTy());
12048+
1204312049 // Handle MSVC intrinsics before argument evaluation to prevent double
1204412050 // evaluation.
1204512051 if (std::optional<MSVCIntrin> MsvcIntId =
@@ -12765,6 +12771,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1276512771 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1276612772 "vgetq_lane");
1276712773 }
12774+ case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12775+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12776+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12777+ return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12778+ }
12779+ case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12780+ SmallVector<int, 16> ConcatMask(8);
12781+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12782+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12783+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12784+ llvm::Value *Trunc =
12785+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12786+ return Builder.CreateShuffleVector(
12787+ Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12788+ }
12789+ case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12790+ SmallVector<int, 16> ConcatMask(8);
12791+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12792+ SmallVector<int, 16> LoMask(4);
12793+ std::iota(LoMask.begin(), LoMask.end(), 0);
12794+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12795+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12796+ llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12797+ llvm::Value *Inactive = Builder.CreateShuffleVector(
12798+ Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12799+ llvm::Value *Trunc =
12800+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12801+ return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12802+ }
1276812803
1276912804 case clang::AArch64::BI_InterlockedAdd:
1277012805 case clang::AArch64::BI_InterlockedAdd64: {
0 commit comments