Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3475,6 +3475,22 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_int16(0x6F, (0xC0 | encode));
}

void Assembler::vmovw(XMMRegister dst, Register src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes, true);
emit_int16(0x6E, (0xC0 | encode));
}

void Assembler::vmovw(Register dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes, true);
emit_int16(0x7E, (0xC0 | encode));
}

void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
InstructionMark im(this);
Expand Down Expand Up @@ -8442,6 +8458,70 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_operand(dst, src, 0);
}

void Assembler::vaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x58, (0xC0 | encode));
}

void Assembler::vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5C, (0xC0 | encode));
}

void Assembler::vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5E, (0xC0 | encode));
}

void Assembler::vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x59, (0xC0 | encode));
}

void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}

void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}

void Assembler::vsqrtsh(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x51, (0xC0 | encode));
}

void Assembler::vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP6, &attributes);
emit_int16((unsigned char)0x99, (0xC0 | encode));
}

void Assembler::vpaddsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0 && (vector_len == Assembler::AVX_512bit || (!needs_evex(dst, nds, src) || VM_Version::supports_avx512vl())), "");
assert(!needs_evex(dst, nds, src) || VM_Version::supports_avx512bw(), "");
Expand Down
17 changes: 16 additions & 1 deletion src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -585,6 +585,8 @@ class Assembler : public AbstractAssembler {
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3,
VEX_OPCODE_0F_3C = 0x4,
VEX_OPCODE_MAP5 = 0x5,
VEX_OPCODE_MAP6 = 0x6,
VEX_OPCODE_MASK = 0x1F
};

Expand Down Expand Up @@ -1815,6 +1817,9 @@ class Assembler : public AbstractAssembler {
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);

void vmovw(XMMRegister dst, Register src);
void vmovw(Register dst, XMMRegister src);

#ifdef _LP64
void movsbq(Register dst, Address src);
void movsbq(Register dst, Register src);
Expand Down Expand Up @@ -2691,6 +2696,16 @@ class Assembler : public AbstractAssembler {
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// FP16 instructions
void vaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vsqrtsh(XMMRegister dst, XMMRegister src);
void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);

// Saturating packed insturctions.
void vpaddsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpaddsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
Expand Down
12 changes: 12 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6675,6 +6675,18 @@ void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst
}
}

void C2_MacroAssembler::efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
switch(opcode) {
case Op_AddHF: vaddsh(dst, src1, src2); break;
case Op_SubHF: vsubsh(dst, src1, src2); break;
case Op_MulHF: vmulsh(dst, src1, src2); break;
case Op_DivHF: vdivsh(dst, src1, src2); break;
case Op_MaxHF: vmaxsh(dst, src1, src2); break;
case Op_MinHF: vminsh(dst, src1, src2); break;
default: assert(false, "%s", NodeClassNames[opcode]); break;
}
}

void C2_MacroAssembler::vector_saturating_op(int ideal_opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
switch(elem_bt) {
case T_BYTE:
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -505,6 +505,7 @@
void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
XMMRegister src, int vlen_enc);

void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);

void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset,
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_APX_F;
_features &= ~CPU_AVX512_FP16;
}

// Currently APX support is only enabled for targets supporting AVX512VL feature.
Expand Down Expand Up @@ -1077,6 +1078,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_AVX_IFMA;
_features &= ~CPU_AVX512_FP16;
}
}

Expand Down Expand Up @@ -3109,6 +3111,9 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
}
if (sef_cpuid7_edx.bits.serialize != 0)
result |= CPU_SERIALIZE;

if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
result |= CPU_AVX512_FP16;
}

// ZX features.
Expand Down
14 changes: 9 additions & 5 deletions src/hotspot/cpu/x86/vm_version_x86.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -276,7 +276,9 @@ class VM_Version : public Abstract_VM_Version {
serialize : 1,
: 5,
cet_ibt : 1,
: 11;
: 2,
avx512_fp16 : 1,
: 8;
} bits;
};

Expand Down Expand Up @@ -416,8 +418,9 @@ class VM_Version : public Abstract_VM_Version {
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/\
decl(SHA512, "sha512", 61) /* SHA512 instructions*/
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ \
decl(SHA512, "sha512", 61) /* SHA512 instructions*/ \
decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/

#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
Expand Down Expand Up @@ -753,6 +756,7 @@ class VM_Version : public Abstract_VM_Version {
static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
static bool supports_avx512_fp16() { return (_features & CPU_AVX512_FP16) != 0; }
static bool supports_hv() { return (_features & CPU_HV) != 0; }
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
static bool supports_f16c() { return (_features & CPU_F16C) != 0; }
Expand Down Expand Up @@ -840,7 +844,7 @@ class VM_Version : public Abstract_VM_Version {

// For AVX CPUs only. f16c support is disabled if UseAVX == 0.
static bool supports_float16() {
return supports_f16c() || supports_avx512vl();
return supports_f16c() || supports_avx512vl() || supports_avx512_fp16();
}

// Check intrinsic support
Expand Down
122 changes: 121 additions & 1 deletion src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -1461,6 +1461,20 @@ bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_AddHF:
case Op_DivHF:
case Op_FmaHF:
case Op_MaxHF:
case Op_MinHF:
case Op_MulHF:
case Op_ReinterpretS2HF:
case Op_ReinterpretHF2S:
case Op_SubHF:
case Op_SqrtHF:
if (!VM_Version::supports_avx512_fp16()) {
return false;
}
break;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
case Op_MulReductionVI:
Expand Down Expand Up @@ -4521,6 +4535,35 @@ instruct vReplS_reg(vec dst, rRegI src) %{
ins_pipe( pipe_slow );
%}

#ifdef _LP64
instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
match(Set dst (Replicate con));
effect(TEMP rtmp);
format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
__ movl($rtmp$$Register, $con$$constant);
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
match(Set dst (Replicate src));
effect(TEMP rtmp);
format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vmovw($rtmp$$Register, $src$$XMMRegister);
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
#endif

instruct ReplS_mem(vec dst, memory mem) %{
predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
match(Set dst (Replicate (LoadS mem)));
Expand Down Expand Up @@ -10837,3 +10880,80 @@ instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
%}
ins_pipe(pipe_slow);
%}

instruct reinterpretS2HF(regF dst, rRegI src)
%{
match(Set dst (ReinterpretS2HF src));
format %{ "vmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow);
%}

instruct convF2HFAndS2HF(regF dst, regF src)
%{
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
format %{ "convF2HFAndS2HF $dst, $src" %}
ins_encode %{
__ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
%}
ins_pipe(pipe_slow);
%}

instruct convHF2SAndHF2F(regF dst, regF src)
%{
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
format %{ "convHF2SAndHF2F $dst, $src" %}
ins_encode %{
__ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe(pipe_slow);
%}

instruct reinterpretHF2S(rRegI dst, regF src)
%{
match(Set dst (ReinterpretHF2S src));
format %{ "vmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}

instruct scalar_sqrt_HF_reg(regF dst, regF src)
%{
match(Set dst (SqrtHF src));
format %{ "scalar_sqrt_fp16 $dst, $src" %}
ins_encode %{
__ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}

instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
%{
match(Set dst (AddHF src1 src2));
match(Set dst (DivHF src1 src2));
match(Set dst (MaxHF src1 src2));
match(Set dst (MinHF src1 src2));
match(Set dst (MulHF src1 src2));
match(Set dst (SubHF src1 src2));
format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}

instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
%{
match(Set dst (FmaHF src2 (Binary dst src1)));
effect(DEF dst);
format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
ins_encode %{
__ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
Loading