@@ -7319,7 +7319,6 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
73197319};
73207320
73217321static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7322- NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
73237322 NEONMAP0(splat_lane_v),
73247323 NEONMAP0(splat_laneq_v),
73257324 NEONMAP0(splatq_lane_v),
@@ -7419,7 +7418,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
74197418 NEONMAP0(vcvtq_f16_s16),
74207419 NEONMAP0(vcvtq_f16_u16),
74217420 NEONMAP0(vcvtq_f32_v),
7422- NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7421+ NEONMAP0(vcvtq_high_bf16_f32),
7422+ NEONMAP0(vcvtq_low_bf16_f32),
74237423 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
74247424 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
74257425 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -7628,7 +7628,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
76287628 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
76297629 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
76307630 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7631- NEONMAP1 (vcvth_bf16_f32, aarch64_neon_bfcvt, 0 ),
7631+ NEONMAP0 (vcvth_bf16_f32),
76327632 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
76337633 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
76347634 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
@@ -12095,6 +12095,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1209512095 return ConstantInt::get(Builder.getInt32Ty(), 0);
1209612096 }
1209712097
12098+ if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12099+ return Builder.CreateFPTrunc(
12100+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12101+ Builder.getFloatTy()),
12102+ Builder.getBFloatTy());
12103+
1209812104 // Handle MSVC intrinsics before argument evaluation to prevent double
1209912105 // evaluation.
1210012106 if (std::optional<MSVCIntrin> MsvcIntId =
@@ -12820,6 +12826,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1282012826 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1282112827 "vgetq_lane");
1282212828 }
12829+ case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12830+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12831+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12832+ return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12833+ }
12834+ case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12835+ SmallVector<int, 16> ConcatMask(8);
12836+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12837+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12838+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12839+ llvm::Value *Trunc =
12840+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12841+ return Builder.CreateShuffleVector(
12842+ Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12843+ }
12844+ case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12845+ SmallVector<int, 16> ConcatMask(8);
12846+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12847+ SmallVector<int, 16> LoMask(4);
12848+ std::iota(LoMask.begin(), LoMask.end(), 0);
12849+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12850+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12851+ llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12852+ llvm::Value *Inactive = Builder.CreateShuffleVector(
12853+ Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12854+ llvm::Value *Trunc =
12855+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12856+ return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12857+ }
1282312858
1282412859 case clang::AArch64::BI_InterlockedAdd:
1282512860 case clang::AArch64::BI_InterlockedAdd64: {
0 commit comments