@@ -7307,7 +7307,6 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
73077307};
73087308
73097309static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7310- NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
73117310 NEONMAP0(splat_lane_v),
73127311 NEONMAP0(splat_laneq_v),
73137312 NEONMAP0(splatq_lane_v),
@@ -7407,7 +7406,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
74077406 NEONMAP0(vcvtq_f16_s16),
74087407 NEONMAP0(vcvtq_f16_u16),
74097408 NEONMAP0(vcvtq_f32_v),
7410- NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7409+ NEONMAP0(vcvtq_high_bf16_f32),
7410+ NEONMAP0(vcvtq_low_bf16_f32),
74117411 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
74127412 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
74137413 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -7616,7 +7616,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
76167616 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
76177617 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
76187618 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7619- NEONMAP1 (vcvth_bf16_f32, aarch64_neon_bfcvt, 0 ),
7619+ NEONMAP0 (vcvth_bf16_f32),
76207620 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
76217621 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
76227622 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
@@ -12083,6 +12083,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1208312083 return ConstantInt::get(Builder.getInt32Ty(), 0);
1208412084 }
1208512085
12086+ if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12087+ return Builder.CreateFPTrunc(
12088+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12089+ Builder.getFloatTy()),
12090+ Builder.getBFloatTy());
12091+
1208612092 // Handle MSVC intrinsics before argument evaluation to prevent double
1208712093 // evaluation.
1208812094 if (std::optional<MSVCIntrin> MsvcIntId =
@@ -12808,6 +12814,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1280812814 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1280912815 "vgetq_lane");
1281012816 }
12817+ case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12818+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12819+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12820+ return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12821+ }
12822+ case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12823+ SmallVector<int, 16> ConcatMask(8);
12824+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12825+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12826+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12827+ llvm::Value *Trunc =
12828+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12829+ return Builder.CreateShuffleVector(
12830+ Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12831+ }
12832+ case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12833+ SmallVector<int, 16> ConcatMask(8);
12834+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12835+ SmallVector<int, 16> LoMask(4);
12836+ std::iota(LoMask.begin(), LoMask.end(), 0);
12837+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12838+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12839+ llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12840+ llvm::Value *Inactive = Builder.CreateShuffleVector(
12841+ Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12842+ llvm::Value *Trunc =
12843+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12844+ return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12845+ }
1281112846
1281212847 case clang::AArch64::BI_InterlockedAdd:
1281312848 case clang::AArch64::BI_InterlockedAdd64: {
0 commit comments