Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7098,6 +7098,99 @@ void C2_MacroAssembler::vector_saturating_op(int ideal_opc, BasicType elem_bt, X
}
}

// ----------------------------------------------------------------------
// We are using bitwise ternary logic insturction VPTERNLOG which can
// absorb complex binary expressions involving 3 boolean variables.
//
// For copySign we set the truth table value as 0xE4.
// First column of truth table represents magnitude, second column
// represents sign operand while the third column is a conditional
// operand with fixed value of 0x7FFFFFFF.
//
// Whenever condition bit is 1 corresponding magnitude bit gets selected
// else corresponding sign bit is picked.
// Our condition mask is such that apart for sign bit i.e. MSB bit all
// other bits are set to 1, this ensures that all the bits of result
// apart from MSB bit are copied from magnitude operand while sign bit
// is borrowed from sign operand.
//
// Magnitude Sign Condition Result
// 0 0 0 0
// 0 0 1 0
// 0 1 0 1
// 0 1 1 0
// 1 0 0 0
// 1 0 1 1
// 1 1 0 1
// 1 1 1 1
//
// ----------------------------------------------------------------------

void C2_MacroAssembler::vector_copy_sign_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp, int vlen_enc) {
assert(is_floating_point_type(elem_bt), "");
vpternlogq(xtmp, 0xFF, xtmp, xtmp, vlen_enc);
if (elem_bt == T_FLOAT) {
vpsrld(xtmp, xtmp, 1, vlen_enc);
vpternlogd(dst, 0xE4, src, xtmp, vlen_enc);
} else {
assert(elem_bt == T_DOUBLE, "");
vpsrlq(xtmp, xtmp, 1, vlen_enc);
vpternlogq(dst, 0xE4, src, xtmp, vlen_enc);
}
}

void C2_MacroAssembler::vandpsd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
if (elem_bt == T_FLOAT) {
vandps(dst, src1, src2, vlen_enc);
} else {
assert(elem_bt == T_DOUBLE, "");
vandpd(dst, src1, src2, vlen_enc);
}
}

void C2_MacroAssembler::vpslldq_imm(BasicType elem_bt, XMMRegister dst, XMMRegister src, int shift, int vlen_enc) {
int elem_sz = type2aelembytes(elem_bt);
if (elem_sz == 2) {
vpsllw(dst, src, shift, vlen_enc);
} else if (elem_sz == 4) {
vpslld(dst, src, shift, vlen_enc);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AVX 1 supports 256-bit float/double vector and only128-bit vpsll, vpsrl, vpor for integer vectors. So you will have issues on AVX 1 platform for 256bit float/double vector copysign implementation using vpsll, vpsrl, vpor.

} else if (elem_sz == 8) {
vpsllq(dst, src, shift, vlen_enc);
} else {
fatal("Unsupported lane size %s", type2name(elem_bt));
}
}

void C2_MacroAssembler::vpsrldq_imm(BasicType elem_bt, XMMRegister dst, XMMRegister src, int shift, int vlen_enc) {
int elem_sz = type2aelembytes(elem_bt);
if (elem_sz == 2) {
vpsrlw(dst, src, shift, vlen_enc);
} else if (elem_sz == 4) {
vpsrld(dst, src, shift, vlen_enc);
} else if (elem_sz == 8) {
vpsrlq(dst, src, shift, vlen_enc);
} else {
fatal("Unsupported lane size %s", type2name(elem_bt));
}
}

void C2_MacroAssembler::vector_copy_sign_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp, int vlen_enc) {
int sign_mask_shift = elem_bt == T_DOUBLE ? 63 : 31;
// set all double lanes of temporary vector to 0xFFFFFFFF
vcmpps(xtmp, xtmp, xtmp, Assembler::EQ_UQ, vlen_enc);
// compute mask for magnitude bits i.e. 0x7FFFFFFFF
vpsrldq_imm(elem_bt, xtmp, xtmp, 1, vlen_enc);
// extract magnitude bits from destination lanes.
vandpsd(elem_bt, dst, dst, xtmp, vlen_enc);
// compute mask for sign bit i.e. 0x80000000
vpslldq_imm(elem_bt, xtmp, xtmp, sign_mask_shift, vlen_enc);
// extract sign bit from source lanes.
vandpsd(elem_bt, xtmp, xtmp, src, vlen_enc);
// merge extracted sign with magnitude bits.
vpor(dst, dst, xtmp, vlen_enc);
}

void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
switch(opcode) {
case Op_AddVHF: evaddph(dst, src1, src2, vlen_enc); break;
Expand Down
10 changes: 10 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,16 @@

void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);

void vandpsd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);

void vpsrldq_imm(BasicType elem_bt, XMMRegister dst, XMMRegister src, int shift, int vlen_enc);

void vpslldq_imm(BasicType elem_bt, XMMRegister dst, XMMRegister src, int shift, int vlen_enc);

void vector_copy_sign_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp, int vlen_enc);

void vector_copy_sign_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp, int vlen_enc);

void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);

void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
Expand Down
94 changes: 59 additions & 35 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1538,10 +1538,7 @@ bool Matcher::match_rule_supported(int opcode) {
break;
case Op_CopySignD:
case Op_CopySignF:
if (UseAVX < 3) {
return false;
}
if (!VM_Version::supports_avx512vl()) {
if (UseAVX < 1) {
return false;
}
break;
Expand Down Expand Up @@ -1669,6 +1666,12 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
return false;
}
break;
case Op_CopySignVD:
case Op_CopySignVF:
if (UseAVX < 1) {
return false;
}
break;
case Op_MaxV:
case Op_MinV:
if (UseSSE < 4 && is_integral_type(bt)) {
Expand Down Expand Up @@ -6519,47 +6522,68 @@ instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
ins_pipe( pipe_slow );
%}

// ---------------------------------------
// For copySign use 0xE4 as writemask for vpternlog
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
// C (xmm2) is set to 0x7FFFFFFF
// Wherever xmm2 is 0, we want to pick from B (sign)
// Wherever xmm2 is 1, we want to pick from A (src)
//
// A B C Result
// 0 0 0 0
// 0 0 1 0
// 0 1 0 1
// 0 1 1 0
// 1 0 0 0
// 1 0 1 1
// 1 1 0 1
// 1 1 1 1
//
// Result going from high bit to low bit is 0x11100100 = 0xe4
// ---------------------------------------
#ifdef _LP64
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_LP64 ifdef no more needed in .ad file (32 bit support has been removed).

instruct copySignF_reg(regF dst, regF src, regF xtmp) %{
predicate(VM_Version::supports_avx512vl());
match(Set dst (CopySignF dst src));
effect(TEMP xtmp);
format %{ "CopySignF $dst, $src\t! using $xtmp as TEMP" %}
ins_encode %{
__ vector_copy_sign_evex(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}

instruct copySignD_imm(regD dst, regD src, regD xtmp, immD zero) %{
predicate(VM_Version::supports_avx512vl());
match(Set dst (CopySignD dst (Binary src zero)));
ins_cost(100);
effect(TEMP xtmp);
format %{ "CopySignD $dst, $src\t! using $xtmp as TEMP" %}
ins_encode %{
__ vector_copy_sign_evex(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}

#endif // _LP64

instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
instruct copySignF_reg_avx(regF dst, regF src, regF xtmp) %{
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be vlRegF.

predicate(!VM_Version::supports_avx512vl());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
predicate(!VM_Version::supports_avx512vl());
predicate(UseAVX > 0 && !VM_Version::supports_avx512vl());

Just to be a bit more explicit (and same for the one below).

Copy link
Member Author

@jatin-bhateja jatin-bhateja Feb 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its already handled by match_rule_supported contraint.

match(Set dst (CopySignF dst src));
effect(TEMP tmp1, TEMP tmp2);
format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
effect(TEMP_DEF dst,TEMP xtmp);
format %{ "CopySignF $dst, $src\t! using $xtmp as TEMP" %}
ins_encode %{
__ movl($tmp2$$Register, 0x7FFFFFFF);
__ movdl($tmp1$$XMMRegister, $tmp2$$Register);
__ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
__ vector_copy_sign_avx(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}

instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
instruct copySignD_imm_avx(regD dst, regD src, regD xtmp, immD zero) %{
Copy link

@sviswa7 sviswa7 May 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be vlRegD.

predicate(!VM_Version::supports_avx512vl());
match(Set dst (CopySignD dst (Binary src zero)));
ins_cost(100);
effect(TEMP tmp1, TEMP tmp2);
format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
effect(TEMP_DEF dst,TEMP xtmp);
format %{ "CopySignD $dst, $src\t! using $xtmp as TEMP" %}
ins_encode %{
__ vector_copy_sign_avx(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}

instruct copySignV_reg(vec dst, vec src, vec xtmp) %{
match(Set dst (CopySignVF dst src));
match(Set dst (CopySignVD dst src));
effect(TEMP xtmp);
Comment on lines +6574 to +6577
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vector_copy_sign_avx needs TEMP dst so may need two different instruct rules.

format %{ "vector_copysign $dst, $src\t! using $xtmp as TEMP" %}
ins_encode %{
__ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
__ movq($tmp1$$XMMRegister, $tmp2$$Register);
__ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
int vlen_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
if (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(this) == 64) {
__ vector_copy_sign_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
} else {
__ vector_copy_sign_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/adlc/formssel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4368,7 +4368,7 @@ bool MatchRule::is_vector() const {
"FmaVD", "FmaVF", "FmaVHF", "PopCountVI", "PopCountVL", "PopulateIndex", "VectorLongToMask",
"CountLeadingZerosV", "CountTrailingZerosV", "SignumVF", "SignumVD", "SaturatingAddV", "SaturatingSubV",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast", "CopySignVF", "CopySignVD",
"RoundVF", "RoundVD",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/classes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ macro(SignumD)
macro(SignumF)
macro(SignumVF)
macro(SignumVD)
macro(CopySignVF)
macro(CopySignVD)
macro(SqrtD)
macro(SqrtF)
macro(SqrtHF)
Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/share/opto/superwordVTransformBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,16 +158,20 @@ VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(co
// v = MulAddS2I(a, b) = a0 * b0 + a1 + b1
assert(p0->req() == 5, "MulAddS2I should have 4 operands");
vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, pack_size);
} else if (opc == Op_CopySignD) {
assert(p0->req() == 4, "CopySignD should have 3 operands");
vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, pack_size);
} else {
assert(p0->req() == 3 ||
p0->is_CMove() ||
VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc) ||
VectorNode::is_convert_opcode(opc) ||
VectorNode::is_reinterpret_opcode(opc) ||
VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc) ||
opc == Op_FmaD ||
opc == Op_FmaF ||
opc == Op_FmaD ||
opc == Op_FmaF ||
opc == Op_FmaHF ||
opc == Op_CopySignF ||
opc == Op_SignumF ||
opc == Op_SignumD,
"pack type must be in this list");
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/share/opto/vectornode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_SignumVF;
case Op_SignumD:
return Op_SignumVD;
case Op_CopySignF:
return Op_CopySignVF;
case Op_CopySignD:
return Op_CopySignVD;
case Op_ReinterpretS2HF:
case Op_ReinterpretHF2S:
return Op_VectorReinterpret;
Expand Down Expand Up @@ -749,6 +753,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_SqrtVF : return new SqrtVFNode(n1, vt);
case Op_SqrtVD : return new SqrtVDNode(n1, vt);

case Op_CopySignVF: return new CopySignVFNode(n1, n2, vt);
case Op_CopySignVD: return new CopySignVDNode(n1, n2, vt);

case Op_RoundVF: return new RoundVFNode(n1, vt);
case Op_RoundVD: return new RoundVDNode(n1, vt);

Expand Down
16 changes: 16 additions & 0 deletions src/hotspot/share/opto/vectornode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2089,6 +2089,22 @@ class ReverseBytesVNode : public VectorNode {
virtual int Opcode() const;
};

class CopySignVFNode : public VectorNode {
public:
CopySignVFNode(Node* in1, Node* in2, const TypeVect* vt)
: VectorNode(in1, in2, vt) {}

virtual int Opcode() const;
};

class CopySignVDNode : public VectorNode {
public:
CopySignVDNode(Node* in1, Node* in2, const TypeVect* vt)
: VectorNode(in1, in2, vt) {}

virtual int Opcode() const;
};

class SignumVFNode : public VectorNode {
public:
SignumVFNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
Expand Down
Loading