Skip to content

Commit 4b463ee

Browse files
Jatin BhatejaPaul SandozBhavana Kilambijddarcyrgiulietti
committed
8342103: C2 compiler support for Float16 type and associated scalar operations
Co-authored-by: Paul Sandoz <[email protected]> Co-authored-by: Bhavana Kilambi <[email protected]> Co-authored-by: Joe Darcy <[email protected]> Co-authored-by: Raffaello Giulietti <[email protected]> Reviewed-by: psandoz, epeter, sviswanathan
1 parent 332d87c commit 4b463ee

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2855
-67
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3475,6 +3475,22 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
34753475
emit_int16(0x6F, (0xC0 | encode));
34763476
}
34773477

3478+
void Assembler::vmovw(XMMRegister dst, Register src) {
3479+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
3480+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3481+
attributes.set_is_evex_instruction();
3482+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes, true);
3483+
emit_int16(0x6E, (0xC0 | encode));
3484+
}
3485+
3486+
void Assembler::vmovw(Register dst, XMMRegister src) {
3487+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
3488+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3489+
attributes.set_is_evex_instruction();
3490+
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes, true);
3491+
emit_int16(0x7E, (0xC0 | encode));
3492+
}
3493+
34783494
void Assembler::vmovdqu(XMMRegister dst, Address src) {
34793495
assert(UseAVX > 0, "");
34803496
InstructionMark im(this);
@@ -8442,6 +8458,70 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
84428458
emit_operand(dst, src, 0);
84438459
}
84448460

8461+
void Assembler::vaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8462+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8463+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8464+
attributes.set_is_evex_instruction();
8465+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8466+
emit_int16(0x58, (0xC0 | encode));
8467+
}
8468+
8469+
void Assembler::vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8470+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8471+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8472+
attributes.set_is_evex_instruction();
8473+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8474+
emit_int16(0x5C, (0xC0 | encode));
8475+
}
8476+
8477+
void Assembler::vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8478+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8479+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8480+
attributes.set_is_evex_instruction();
8481+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8482+
emit_int16(0x5E, (0xC0 | encode));
8483+
}
8484+
8485+
void Assembler::vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8486+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8487+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8488+
attributes.set_is_evex_instruction();
8489+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8490+
emit_int16(0x59, (0xC0 | encode));
8491+
}
8492+
8493+
void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8494+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8495+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8496+
attributes.set_is_evex_instruction();
8497+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8498+
emit_int16(0x5F, (0xC0 | encode));
8499+
}
8500+
8501+
void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8502+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8503+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8504+
attributes.set_is_evex_instruction();
8505+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8506+
emit_int16(0x5D, (0xC0 | encode));
8507+
}
8508+
8509+
void Assembler::vsqrtsh(XMMRegister dst, XMMRegister src) {
8510+
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
8511+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8512+
attributes.set_is_evex_instruction();
8513+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
8514+
emit_int16(0x51, (0xC0 | encode));
8515+
}
8516+
8517+
void Assembler::vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
8518+
assert(VM_Version::supports_avx512_fp16(), "");
8519+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8520+
attributes.set_is_evex_instruction();
8521+
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP6, &attributes);
8522+
emit_int16((unsigned char)0x99, (0xC0 | encode));
8523+
}
8524+
84458525
void Assembler::vpaddsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
84468526
assert(UseAVX > 0 && (vector_len == Assembler::AVX_512bit || (!needs_evex(dst, nds, src) || VM_Version::supports_avx512vl())), "");
84478527
assert(!needs_evex(dst, nds, src) || VM_Version::supports_avx512bw(), "");

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -585,6 +585,8 @@ class Assembler : public AbstractAssembler {
585585
VEX_OPCODE_0F_38 = 0x2,
586586
VEX_OPCODE_0F_3A = 0x3,
587587
VEX_OPCODE_0F_3C = 0x4,
588+
VEX_OPCODE_MAP5 = 0x5,
589+
VEX_OPCODE_MAP6 = 0x6,
588590
VEX_OPCODE_MASK = 0x1F
589591
};
590592

@@ -1815,6 +1817,9 @@ class Assembler : public AbstractAssembler {
18151817
void movsbl(Register dst, Address src);
18161818
void movsbl(Register dst, Register src);
18171819

1820+
void vmovw(XMMRegister dst, Register src);
1821+
void vmovw(Register dst, XMMRegister src);
1822+
18181823
#ifdef _LP64
18191824
void movsbq(Register dst, Address src);
18201825
void movsbq(Register dst, Register src);
@@ -2691,6 +2696,16 @@ class Assembler : public AbstractAssembler {
26912696
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
26922697
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
26932698

2699+
// FP16 instructions
2700+
void vaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2701+
void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2702+
void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2703+
void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2704+
void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2705+
void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2706+
void vsqrtsh(XMMRegister dst, XMMRegister src);
2707+
void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);
2708+
26942709
// Saturating packed insturctions.
26952710
void vpaddsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
26962711
void vpaddsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6675,6 +6675,18 @@ void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst
66756675
}
66766676
}
66776677

6678+
void C2_MacroAssembler::efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
6679+
switch(opcode) {
6680+
case Op_AddHF: vaddsh(dst, src1, src2); break;
6681+
case Op_SubHF: vsubsh(dst, src1, src2); break;
6682+
case Op_MulHF: vmulsh(dst, src1, src2); break;
6683+
case Op_DivHF: vdivsh(dst, src1, src2); break;
6684+
case Op_MaxHF: vmaxsh(dst, src1, src2); break;
6685+
case Op_MinHF: vminsh(dst, src1, src2); break;
6686+
default: assert(false, "%s", NodeClassNames[opcode]); break;
6687+
}
6688+
}
6689+
66786690
void C2_MacroAssembler::vector_saturating_op(int ideal_opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
66796691
switch(elem_bt) {
66806692
case T_BYTE:

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -505,6 +505,7 @@
505505
void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
506506
XMMRegister src, int vlen_enc);
507507

508+
void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
508509

509510
void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset,
510511
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,

src/hotspot/cpu/x86/vm_version_x86.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,7 @@ void VM_Version::get_processor_features() {
10271027
_features &= ~CPU_AVX512_BITALG;
10281028
_features &= ~CPU_AVX512_IFMA;
10291029
_features &= ~CPU_APX_F;
1030+
_features &= ~CPU_AVX512_FP16;
10301031
}
10311032

10321033
// Currently APX support is only enabled for targets supporting AVX512VL feature.
@@ -1077,6 +1078,7 @@ void VM_Version::get_processor_features() {
10771078
_features &= ~CPU_AVX512_BITALG;
10781079
_features &= ~CPU_AVX512_IFMA;
10791080
_features &= ~CPU_AVX_IFMA;
1081+
_features &= ~CPU_AVX512_FP16;
10801082
}
10811083
}
10821084

@@ -3109,6 +3111,9 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
31093111
}
31103112
if (sef_cpuid7_edx.bits.serialize != 0)
31113113
result |= CPU_SERIALIZE;
3114+
3115+
if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3116+
result |= CPU_AVX512_FP16;
31123117
}
31133118

31143119
// ZX features.

src/hotspot/cpu/x86/vm_version_x86.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -276,7 +276,9 @@ class VM_Version : public Abstract_VM_Version {
276276
serialize : 1,
277277
: 5,
278278
cet_ibt : 1,
279-
: 11;
279+
: 2,
280+
avx512_fp16 : 1,
281+
: 8;
280282
} bits;
281283
};
282284

@@ -416,8 +418,9 @@ class VM_Version : public Abstract_VM_Version {
416418
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
417419
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
418420
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
419-
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/\
420-
decl(SHA512, "sha512", 61) /* SHA512 instructions*/
421+
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ \
422+
decl(SHA512, "sha512", 61) /* SHA512 instructions*/ \
423+
decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/
421424

422425
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
423426
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@@ -753,6 +756,7 @@ class VM_Version : public Abstract_VM_Version {
753756
static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
754757
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
755758
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
759+
static bool supports_avx512_fp16() { return (_features & CPU_AVX512_FP16) != 0; }
756760
static bool supports_hv() { return (_features & CPU_HV) != 0; }
757761
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
758762
static bool supports_f16c() { return (_features & CPU_F16C) != 0; }
@@ -840,7 +844,7 @@ class VM_Version : public Abstract_VM_Version {
840844

841845
// For AVX CPUs only. f16c support is disabled if UseAVX == 0.
842846
static bool supports_float16() {
843-
return supports_f16c() || supports_avx512vl();
847+
return supports_f16c() || supports_avx512vl() || supports_avx512_fp16();
844848
}
845849

846850
// Check intrinsic support

src/hotspot/cpu/x86/x86.ad

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -1461,6 +1461,20 @@ bool Matcher::match_rule_supported(int opcode) {
14611461
return false;
14621462
}
14631463
break;
1464+
case Op_AddHF:
1465+
case Op_DivHF:
1466+
case Op_FmaHF:
1467+
case Op_MaxHF:
1468+
case Op_MinHF:
1469+
case Op_MulHF:
1470+
case Op_ReinterpretS2HF:
1471+
case Op_ReinterpretHF2S:
1472+
case Op_SubHF:
1473+
case Op_SqrtHF:
1474+
if (!VM_Version::supports_avx512_fp16()) {
1475+
return false;
1476+
}
1477+
break;
14641478
case Op_VectorLoadShuffle:
14651479
case Op_VectorRearrange:
14661480
case Op_MulReductionVI:
@@ -4521,6 +4535,35 @@ instruct vReplS_reg(vec dst, rRegI src) %{
45214535
ins_pipe( pipe_slow );
45224536
%}
45234537

4538+
#ifdef _LP64
4539+
instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
4540+
match(Set dst (Replicate con));
4541+
effect(TEMP rtmp);
4542+
format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
4543+
ins_encode %{
4544+
int vlen_enc = vector_length_encoding(this);
4545+
BasicType bt = Matcher::vector_element_basic_type(this);
4546+
assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
4547+
__ movl($rtmp$$Register, $con$$constant);
4548+
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4549+
%}
4550+
ins_pipe( pipe_slow );
4551+
%}
4552+
4553+
instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
4554+
predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
4555+
match(Set dst (Replicate src));
4556+
effect(TEMP rtmp);
4557+
format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
4558+
ins_encode %{
4559+
int vlen_enc = vector_length_encoding(this);
4560+
__ vmovw($rtmp$$Register, $src$$XMMRegister);
4561+
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4562+
%}
4563+
ins_pipe( pipe_slow );
4564+
%}
4565+
#endif
4566+
45244567
instruct ReplS_mem(vec dst, memory mem) %{
45254568
predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
45264569
match(Set dst (Replicate (LoadS mem)));
@@ -10837,3 +10880,80 @@ instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
1083710880
%}
1083810881
ins_pipe(pipe_slow);
1083910882
%}
10883+
10884+
instruct reinterpretS2HF(regF dst, rRegI src)
10885+
%{
10886+
match(Set dst (ReinterpretS2HF src));
10887+
format %{ "vmovw $dst, $src" %}
10888+
ins_encode %{
10889+
__ vmovw($dst$$XMMRegister, $src$$Register);
10890+
%}
10891+
ins_pipe(pipe_slow);
10892+
%}
10893+
10894+
instruct convF2HFAndS2HF(regF dst, regF src)
10895+
%{
10896+
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
10897+
format %{ "convF2HFAndS2HF $dst, $src" %}
10898+
ins_encode %{
10899+
__ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
10900+
%}
10901+
ins_pipe(pipe_slow);
10902+
%}
10903+
10904+
instruct convHF2SAndHF2F(regF dst, regF src)
10905+
%{
10906+
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
10907+
format %{ "convHF2SAndHF2F $dst, $src" %}
10908+
ins_encode %{
10909+
__ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
10910+
%}
10911+
ins_pipe(pipe_slow);
10912+
%}
10913+
10914+
instruct reinterpretHF2S(rRegI dst, regF src)
10915+
%{
10916+
match(Set dst (ReinterpretHF2S src));
10917+
format %{ "vmovw $dst, $src" %}
10918+
ins_encode %{
10919+
__ vmovw($dst$$Register, $src$$XMMRegister);
10920+
%}
10921+
ins_pipe(pipe_slow);
10922+
%}
10923+
10924+
instruct scalar_sqrt_HF_reg(regF dst, regF src)
10925+
%{
10926+
match(Set dst (SqrtHF src));
10927+
format %{ "scalar_sqrt_fp16 $dst, $src" %}
10928+
ins_encode %{
10929+
__ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
10930+
%}
10931+
ins_pipe(pipe_slow);
10932+
%}
10933+
10934+
instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
10935+
%{
10936+
match(Set dst (AddHF src1 src2));
10937+
match(Set dst (DivHF src1 src2));
10938+
match(Set dst (MaxHF src1 src2));
10939+
match(Set dst (MinHF src1 src2));
10940+
match(Set dst (MulHF src1 src2));
10941+
match(Set dst (SubHF src1 src2));
10942+
format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
10943+
ins_encode %{
10944+
int opcode = this->ideal_Opcode();
10945+
__ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
10946+
%}
10947+
ins_pipe(pipe_slow);
10948+
%}
10949+
10950+
instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
10951+
%{
10952+
match(Set dst (FmaHF src2 (Binary dst src1)));
10953+
effect(DEF dst);
10954+
format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10955+
ins_encode %{
10956+
__ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
10957+
%}
10958+
ins_pipe( pipe_slow );
10959+
%}

0 commit comments

Comments
 (0)