diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 97c5dbe03ef34..4222b824552e5 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -686,11 +686,20 @@ bool LibraryCallKit::inline_vector_frombits_coerced() { int opc = bcast_mode == VectorSupport::MODE_BITS_COERCED_LONG_TO_MASK ? Op_VectorLongToMask : Op_Replicate; if (!arch_supports_vector(opc, num_elem, elem_bt, checkFlags, true /*has_scalar_args*/)) { - log_if_needed(" ** not supported: arity=0 op=broadcast vlen=%d etype=%s ismask=%d bcast_mode=%d", - num_elem, type2name(elem_bt), - is_mask ? 1 : 0, - bcast_mode); - return false; // not supported + // If the input long sets or unsets all lanes and Replicate is supported, + // generate a MaskAll or Replicate instead. + + // The "maskAll" API uses the corresponding integer types for floating-point data. + BasicType maskall_bt = elem_bt == T_DOUBLE ? T_LONG : (elem_bt == T_FLOAT ? T_INT: elem_bt); + if (!(opc == Op_VectorLongToMask && + VectorNode::is_maskall_type(bits_type, num_elem) && + arch_supports_vector(Op_Replicate, num_elem, maskall_bt, checkFlags, true /*has_scalar_args*/))) { + log_if_needed(" ** not supported: arity=0 op=broadcast vlen=%d etype=%s ismask=%d bcast_mode=%d", + num_elem, type2name(elem_bt), + is_mask ? 1 : 0, + bcast_mode); + return false; // not supported + } } Node* broadcast = nullptr; diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 667e74a476179..c126c91da1b3a 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -434,6 +434,16 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { return false; } +bool VectorNode::is_maskall_type(const TypeLong* type, int vlen) { + assert(type != nullptr, "type must not be null"); + if (!type->is_con()) { + return false; + } + long mask = (-1ULL >> (64 - vlen)); + long bit = type->get_con() & mask; + return bit == 0 || bit == mask; +} + bool VectorNode::is_muladds2i(const Node* n) { return n->Opcode() == Op_MulAddS2I; } @@ -1503,6 +1513,45 @@ Node* ReductionNode::Ideal(PhaseGVN* phase, bool can_reshape) { return nullptr; } +// Convert fromLong to maskAll if the input sets or unsets all lanes. +Node* convertFromLongToMaskAll(PhaseGVN* phase, const TypeLong* bits_type, bool is_mask, const TypeVect* vt) { + uint vlen = vt->length(); + BasicType bt = vt->element_basic_type(); + // The "maskAll" API uses the corresponding integer types for floating-point data. + BasicType maskall_bt = (bt == T_FLOAT) ? T_INT : (bt == T_DOUBLE) ? T_LONG : bt; + + if (VectorNode::is_maskall_type(bits_type, vlen) && + Matcher::match_rule_supported_vector(Op_Replicate, vlen, maskall_bt)) { + Node* con = nullptr; + jlong con_value = bits_type->get_con() == 0L ? 0L : -1L; + if (maskall_bt == T_LONG) { + con = phase->longcon(con_value); + } else { + con = phase->intcon(con_value); + } + Node* res = VectorNode::scalar2vector(con, vlen, maskall_bt, is_mask); + // Convert back to the original floating-point data type. + if (is_floating_point_type(bt)) { + res = new VectorMaskCastNode(phase->transform(res), vt); + } + return res; + } + return nullptr; +} + +Node* VectorLoadMaskNode::Ideal(PhaseGVN* phase, bool can_reshape) { + // VectorLoadMask(VectorLongToMask(-1/0)) => Replicate(-1/0) + if (in(1)->Opcode() == Op_VectorLongToMask) { + const TypeVect* vt = bottom_type()->is_vect(); + Node* res = convertFromLongToMaskAll(phase, in(1)->in(1)->bottom_type()->isa_long(), false, vt); + if (res != nullptr) { + return res; + } + } + + return VectorNode::Ideal(phase, can_reshape); +} + Node* VectorLoadMaskNode::Identity(PhaseGVN* phase) { BasicType out_bt = type()->is_vect()->element_basic_type(); if (!Matcher::has_predicated_vectors() && out_bt == T_BOOLEAN) { @@ -1918,6 +1967,45 @@ Node* VectorMaskOpNode::Ideal(PhaseGVN* phase, bool can_reshape) { return nullptr; } +Node* VectorMaskCastNode::Identity(PhaseGVN* phase) { + Node* in1 = in(1); + // VectorMaskCast (VectorMaskCast x) => x + if (in1->Opcode() == Op_VectorMaskCast && + vect_type()->eq(in1->in(1)->bottom_type())) { + return in1->in(1); + } + return this; +} + +// This function does the following optimization: +// VectorMaskToLong(MaskAll(l)) => (l & (-1ULL >> (64 - vlen))) +// VectorMaskToLong(VectorStoreMask(Replicate(l))) => (l & (-1ULL >> (64 - vlen))) +// l is -1 or 0. +Node* VectorMaskToLongNode::Ideal_MaskAll(PhaseGVN* phase) { + Node* in1 = in(1); + // VectorMaskToLong follows a VectorStoreMask if predicate is not supported. + if (in1->Opcode() == Op_VectorStoreMask) { + assert(!in1->in(1)->bottom_type()->isa_vectmask(), "sanity"); + in1 = in1->in(1); + } + if (VectorNode::is_all_ones_vector(in1)) { + int vlen = in1->bottom_type()->is_vect()->length(); + return new ConLNode(TypeLong::make(-1ULL >> (64 - vlen))); + } + if (VectorNode::is_all_zeros_vector(in1)) { + return new ConLNode(TypeLong::ZERO); + } + return nullptr; +} + +Node* VectorMaskToLongNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* res = Ideal_MaskAll(phase); + if (res != nullptr) { + return res; + } + return VectorMaskOpNode::Ideal(phase, can_reshape); +} + Node* VectorMaskToLongNode::Identity(PhaseGVN* phase) { if (in(1)->Opcode() == Op_VectorLongToMask) { return in(1)->in(1); @@ -1927,28 +2015,41 @@ Node* VectorMaskToLongNode::Identity(PhaseGVN* phase) { Node* VectorLongToMaskNode::Ideal(PhaseGVN* phase, bool can_reshape) { const TypeVect* dst_type = bottom_type()->is_vect(); + uint vlen = dst_type->length(); + const TypeVectMask* is_mask = dst_type->isa_vectmask(); + if (in(1)->Opcode() == Op_AndL && in(1)->in(1)->Opcode() == Op_VectorMaskToLong && in(1)->in(2)->bottom_type()->isa_long() && in(1)->in(2)->bottom_type()->is_long()->is_con() && - in(1)->in(2)->bottom_type()->is_long()->get_con() == ((1L << dst_type->length()) - 1)) { + in(1)->in(2)->bottom_type()->is_long()->get_con() == ((1L << vlen) - 1)) { // Different src/dst mask length represents a re-interpretation operation, // we can however generate a mask casting operation if length matches. Node* src = in(1)->in(1)->in(1); - if (dst_type->isa_vectmask() == nullptr) { + if (is_mask == nullptr) { if (src->Opcode() != Op_VectorStoreMask) { return nullptr; } src = src->in(1); } const TypeVect* src_type = src->bottom_type()->is_vect(); - if (src_type->length() == dst_type->length() && - ((src_type->isa_vectmask() == nullptr && dst_type->isa_vectmask() == nullptr) || - (src_type->isa_vectmask() && dst_type->isa_vectmask()))) { + if (src_type->length() == vlen && + ((src_type->isa_vectmask() == nullptr && is_mask == nullptr) || + (src_type->isa_vectmask() && is_mask))) { return new VectorMaskCastNode(src, dst_type); } } - return nullptr; + + // VectorLongToMask(-1/0) => MaskAll(-1/0) + const TypeLong* bits_type = in(1)->bottom_type()->isa_long(); + if (bits_type && is_mask) { + Node* res = convertFromLongToMaskAll(phase, bits_type, true, dst_type); + if (res != nullptr) { + return res; + } + } + + return VectorNode::Ideal(phase, can_reshape); } Node* FmaVNode::Ideal(PhaseGVN* phase, bool can_reshape) { diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 3caaf7c59d7d3..463680d0a52dd 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -104,6 +104,7 @@ class VectorNode : public TypeNode { static bool implemented(int opc, uint vlen, BasicType bt); static bool is_shift(Node* n); static bool is_vshift_cnt(Node* n); + static bool is_maskall_type(const TypeLong* type, int vlen); static bool is_muladds2i(const Node* n); static bool is_roundopD(Node* n); static bool is_scalar_rotate(Node* n); @@ -1383,6 +1384,8 @@ class VectorMaskToLongNode : public VectorMaskOpNode { VectorMaskToLongNode(Node* mask, const Type* ty): VectorMaskOpNode(mask, ty, Op_VectorMaskToLong) {} virtual int Opcode() const; + Node* Ideal(PhaseGVN* phase, bool can_reshape); + Node* Ideal_MaskAll(PhaseGVN* phase); virtual uint ideal_reg() const { return Op_RegL; } virtual Node* Identity(PhaseGVN* phase); }; @@ -1776,6 +1779,7 @@ class VectorLoadMaskNode : public VectorNode { virtual int Opcode() const; virtual Node* Identity(PhaseGVN* phase); + Node* Ideal(PhaseGVN* phase, bool can_reshape); }; class VectorStoreMaskNode : public VectorNode { @@ -1795,6 +1799,7 @@ class VectorMaskCastNode : public VectorNode { const TypeVect* in_vt = in->bottom_type()->is_vect(); assert(in_vt->length() == vt->length(), "vector length must match"); } + Node* Identity(PhaseGVN* phase); virtual int Opcode() const; }; diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 01bc13482fd75..e1171255b8720 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1387,6 +1387,21 @@ public class IRNode { vectorNode(UMAX_VL, "UMaxV", TYPE_LONG); } + public static final String MASK_ALL = PREFIX + "MASK_ALL" + POSTFIX; + static { + beforeMatchingNameRegex(MASK_ALL, "MaskAll"); + } + + public static final String VECTOR_LONG_TO_MASK = PREFIX + "VECTOR_LONG_TO_MASK" + POSTFIX; + static { + beforeMatchingNameRegex(VECTOR_LONG_TO_MASK, "VectorLongToMask"); + } + + public static final String VECTOR_MASK_TO_LONG = PREFIX + "VECTOR_MASK_TO_LONG" + POSTFIX; + static { + beforeMatchingNameRegex(VECTOR_MASK_TO_LONG, "VectorMaskToLong"); + } + // Can only be used if avx512_vnni is available. public static final String MUL_ADD_VS2VI_VNNI = PREFIX + "MUL_ADD_VS2VI_VNNI" + POSTFIX; static { diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCastIdentityTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCastIdentityTest.java new file mode 100644 index 0000000000000..e66b16f053b77 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCastIdentityTest.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* +* @test +* @bug 8356760 +* @library /test/lib / +* @summary Optimize VectorMask.fromLong for all-true/all-false cases +* @modules jdk.incubator.vector +* +* @run driver compiler.vectorapi.VectorMaskCastIdentityTest +*/ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; + +public class VectorMaskCastIdentityTest { + private static final boolean[] mr = new boolean[128]; // 128 is large enough + private static final Random rd = Utils.getRandomInstance(); + static { + for (int i = 0; i < mr.length; i++) { + mr[i] = rd.nextBoolean(); + } + } + + @Test + @IR(counts = { IRNode.VECTOR_MASK_CAST, "= 2" }, applyIfCPUFeatureOr = {"asimd", "true"}) + public static int testTwoCastToDifferentType() { + // The types before and after the two casts are not the same, so the cast cannot be eliminated. + VectorMask mFloat64 = VectorMask.fromArray(FloatVector.SPECIES_64, mr, 0); + VectorMask mDouble128 = mFloat64.cast(DoubleVector.SPECIES_128); + VectorMask mInt64 = mDouble128.cast(IntVector.SPECIES_64); + return mInt64.trueCount(); + } + + @Run(test = "testTwoCastToDifferentType") + public static void testTwoCastToDifferentType_runner() { + int count = testTwoCastToDifferentType(); + VectorMask mFloat64 = VectorMask.fromArray(FloatVector.SPECIES_64, mr, 0); + Asserts.assertEquals(count, mFloat64.trueCount()); + } + + @Test + @IR(counts = { IRNode.VECTOR_MASK_CAST, "= 2" }, applyIfCPUFeatureOr = {"avx2", "true"}) + public static int testTwoCastToDifferentType2() { + // The types before and after the two casts are not the same, so the cast cannot be eliminated. + VectorMask mInt128 = VectorMask.fromArray(IntVector.SPECIES_128, mr, 0); + VectorMask mDouble256 = mInt128.cast(DoubleVector.SPECIES_256); + VectorMask mShort64 = mDouble256.cast(ShortVector.SPECIES_64); + return mShort64.trueCount(); + } + + @Run(test = "testTwoCastToDifferentType2") + public static void testTwoCastToDifferentType2_runner() { + int count = testTwoCastToDifferentType2(); + VectorMask mInt128 = VectorMask.fromArray(IntVector.SPECIES_128, mr, 0); + Asserts.assertEquals(count, mInt128.trueCount()); + } + + @Test + @IR(counts = { IRNode.VECTOR_MASK_CAST, "= 0" }, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + public static int testTwoCastToSameType() { + // The types before and after the two casts are the same, so the cast will be eliminated. + VectorMask mInt128 = VectorMask.fromArray(IntVector.SPECIES_128, mr, 0); + VectorMask mFloat128 = mInt128.cast(FloatVector.SPECIES_128); + VectorMask mInt128_2 = mFloat128.cast(IntVector.SPECIES_128); + return mInt128_2.trueCount(); + } + + @Run(test = "testTwoCastToSameType") + public static void testTwoCastToSameType_runner() { + int count = testTwoCastToSameType(); + VectorMask mInt128 = VectorMask.fromArray(IntVector.SPECIES_128, mr, 0); + Asserts.assertEquals(count, mInt128.trueCount()); + } + + @Test + @IR(counts = { IRNode.VECTOR_MASK_CAST, "= 1" }, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + public static int testOneCastToDifferentType() { + // The types before and after the only cast are different, the cast will not be eliminated. + VectorMask mFloat128 = VectorMask.fromArray(FloatVector.SPECIES_128, mr, 0).not(); + VectorMask mInt128 = mFloat128.cast(IntVector.SPECIES_128); + return mInt128.trueCount(); + } + + @Run(test = "testOneCastToDifferentType") + public static void testOneCastToDifferentType_runner() { + int count = testOneCastToDifferentType(); + VectorMask mInt128 = VectorMask.fromArray(FloatVector.SPECIES_128, mr, 0).not(); + Asserts.assertEquals(count, mInt128.trueCount()); + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +} diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskFromLongTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskFromLongTest.java new file mode 100644 index 0000000000000..a97ce2f9162a8 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskFromLongTest.java @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* +* @test +* @bug 8356760 +* @library /test/lib / +* @summary Optimize VectorMask.fromLong for all-true/all-false cases +* @modules jdk.incubator.vector +* +* @run driver compiler.vectorapi.VectorMaskFromLongTest +*/ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; + +public class VectorMaskFromLongTest { + static final VectorSpecies B_SPECIES = ByteVector.SPECIES_MAX; + static final VectorSpecies S_SPECIES = ShortVector.SPECIES_MAX; + static final VectorSpecies I_SPECIES = IntVector.SPECIES_MAX; + static final VectorSpecies F_SPECIES = FloatVector.SPECIES_MAX; + static final VectorSpecies L_SPECIES = LongVector.SPECIES_MAX; + static final VectorSpecies D_SPECIES = DoubleVector.SPECIES_MAX; + + static boolean[] mr = new boolean[B_SPECIES.length()]; + + @ForceInline + public static void maskFromLongKernel(VectorSpecies species, long inputLong) { + VectorMask.fromLong(species, inputLong).intoArray(mr, 0); + } + + @DontInline + public static void verifyMaskFromLong(VectorSpecies species, long inputLong) { + for (int i = 0; i < species.length(); i++) { + long expectedValue = (inputLong >>> i) & 1L; + if (mr[i] != (expectedValue == 1L)) { + Asserts.fail("Mask bit " + i + " is expected to be " + expectedValue + + " but was " + mr[i] + " for long " + inputLong); + } + } + } + + @ForceInline + public static void testMaskFromLong(VectorSpecies species, long inputLong ) { + maskFromLongKernel(species, inputLong); + verifyMaskFromLong(species, inputLong); + } + + @ForceInline + public static void testMaskFromLongMaskAll(VectorSpecies species) { + int vlen = species.length(); + long inputLong = 0L; + testMaskFromLong(species, inputLong); + + inputLong = vlen >= 64 ? 0L : (0x1L << vlen); + testMaskFromLong(species, inputLong); + + inputLong = -1L; + testMaskFromLong(species, inputLong); + + inputLong = (-1L >>> (64 - vlen)); + testMaskFromLong(species, inputLong); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_B, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_B, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllByte() { + testMaskFromLongMaskAll(B_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_S, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_S, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllShort() { + testMaskFromLongMaskAll(S_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllInt() { + testMaskFromLongMaskAll(I_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllLong() { + testMaskFromLongMaskAll(L_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllFloat() { + testMaskFromLongMaskAll(F_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongMaskAllDouble() { + testMaskFromLongMaskAll(D_SPECIES); + } + + // Tests for general input long values + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_B, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_B, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongByte() { + // Test the case where some but not all bits are set. + testMaskFromLong(B_SPECIES, (-1L >>> (64 - B_SPECIES.length()))-1); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_S, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_S, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongShort() { + // Test the case where some but not all bits are set. + testMaskFromLong(S_SPECIES, (-1L >>> (64 - S_SPECIES.length()))-1); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongInt() { + // Test the case where some but not all bits are set. + testMaskFromLong(I_SPECIES, (-1L >>> (64 - I_SPECIES.length()))-1); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongLong() { + // Test the case where some but not all bits are set. + testMaskFromLong(L_SPECIES, (-1L >>> (64 - L_SPECIES.length()))-1); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongFloat() { + // Test the case where some but not all bits are set. + testMaskFromLong(F_SPECIES, (-1L >>> (64 - F_SPECIES.length()))-1); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskFromLongDouble() { + // Test the case where some but not all bits are set. + testMaskFromLong(D_SPECIES, (-1L >>> (64 - D_SPECIES.length()))-1); + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +} diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskToLongTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskToLongTest.java new file mode 100644 index 0000000000000..3201d593efe2a --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskToLongTest.java @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* +* @test +* @bug 8356760 +* @library /test/lib / +* @summary Optimize VectorMask.fromLong for all-true/all-false cases +* @modules jdk.incubator.vector +* +* @run driver compiler.vectorapi.VectorMaskToLongTest +*/ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; + +public class VectorMaskToLongTest { + static final VectorSpecies B_SPECIES = ByteVector.SPECIES_MAX; + static final VectorSpecies S_SPECIES = ShortVector.SPECIES_MAX; + static final VectorSpecies I_SPECIES = IntVector.SPECIES_MAX; + static final VectorSpecies F_SPECIES = FloatVector.SPECIES_MAX; + static final VectorSpecies L_SPECIES = LongVector.SPECIES_MAX; + static final VectorSpecies D_SPECIES = DoubleVector.SPECIES_MAX; + + @DontInline + public static void verifyMaskToLong(VectorSpecies species, long inputLong, long got) { + long expected = inputLong & (-1L >>> (64 - species.length())); + Asserts.assertEquals(expected, got, "for input long " + inputLong); + } + + @ForceInline + public static void testMaskAllToLong(VectorSpecies species) { + int vlen = species.length(); + long inputLong = 0L; + // fromLong is expected to be converted to maskAll. + long got = VectorMask.fromLong(species, inputLong).toLong(); + verifyMaskToLong(species, inputLong, got); + + inputLong = vlen >= 64 ? 0 : (0x1L << vlen); + got = VectorMask.fromLong(species, inputLong).toLong(); + verifyMaskToLong(species, inputLong, got); + + inputLong = -1L; + got = VectorMask.fromLong(species, inputLong).toLong(); + verifyMaskToLong(species, inputLong, got); + + inputLong = (-1L >>> (64 - vlen)); + got = VectorMask.fromLong(species, inputLong).toLong(); + verifyMaskToLong(species, inputLong, got); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_B, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_B, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongByte() { + testMaskAllToLong(B_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_S, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_S, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongShort() { + testMaskAllToLong(S_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongInt() { + testMaskAllToLong(I_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongLong() { + testMaskAllToLong(L_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_I, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongFloat() { + testMaskAllToLong(F_SPECIES); + } + + @Test + @IR(counts = { IRNode.MASK_ALL, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + @IR(counts = { IRNode.REPLICATE_L, "= 0", + IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" }) + public static void testMaskAllToLongDouble() { + testMaskAllToLong(D_SPECIES); + } + + // General cases for (VectorMaskToLong (VectorLongToMask (x))) => x. + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongByte() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - B_SPECIES.length()))-1; + long got = VectorMask.fromLong(B_SPECIES, inputLong).toLong(); + verifyMaskToLong(B_SPECIES, inputLong, got); + } + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongShort() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - S_SPECIES.length()))-1; + long got = VectorMask.fromLong(S_SPECIES, inputLong).toLong(); + verifyMaskToLong(S_SPECIES, inputLong, got); + } + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongInt() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - I_SPECIES.length()))-1; + long got = VectorMask.fromLong(I_SPECIES, inputLong).toLong(); + verifyMaskToLong(I_SPECIES, inputLong, got); + } + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 0" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongLong() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - L_SPECIES.length()))-1; + long got = VectorMask.fromLong(L_SPECIES, inputLong).toLong(); + verifyMaskToLong(L_SPECIES, inputLong, got); + } + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 1", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongFloat() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - F_SPECIES.length()))-1; + long got = VectorMask.fromLong(F_SPECIES, inputLong).toLong(); + verifyMaskToLong(F_SPECIES, inputLong, got); + } + + @Test + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 1", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureOr = { "sve2", "true", "avx2", "true", "rvv", "true" }) + @IR(counts = { IRNode.VECTOR_LONG_TO_MASK, "= 0", + IRNode.VECTOR_MASK_TO_LONG, "= 1" }, + applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" }) + public static void testFromLongToLongDouble() { + // Test the case where some but not all bits are set. + long inputLong = (-1L >>> (64 - D_SPECIES.length()))-1; + long got = VectorMask.fromLong(D_SPECIES, inputLong).toLong(); + verifyMaskToLong(D_SPECIES, inputLong, got); + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +} \ No newline at end of file diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskCastOperationsBenchmark.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskCastOperationsBenchmark.java index fdef723e067b9..fae27daed9f4b 100644 --- a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskCastOperationsBenchmark.java +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskCastOperationsBenchmark.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +30,9 @@ @OutputTimeUnit(TimeUnit.MILLISECONDS) @State(Scope.Thread) -@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector"}) +@Warmup(iterations = 10, time = 1) +@Measurement(iterations = 10, time = 1) +@Fork(value = 1, jvmArgs = {"--add-modules=jdk.incubator.vector"}) public class MaskCastOperationsBenchmark { VectorMask bmask64; VectorMask bmask128; @@ -50,6 +52,15 @@ public class MaskCastOperationsBenchmark { VectorMask lmask256; VectorMask lmask512; + VectorMask fmask64; + VectorMask fmask128; + VectorMask fmask256; + VectorMask fmask512; + + VectorMask dmask128; + VectorMask dmask256; + VectorMask dmask512; + static final boolean [] mask_arr = { false, false, false, true, false, false, false, false, false, false, false, true, false, false, false, false, @@ -80,6 +91,15 @@ public void BmSetup() { lmask128 = VectorMask.fromArray(LongVector.SPECIES_128, mask_arr, 0); lmask256 = VectorMask.fromArray(LongVector.SPECIES_256, mask_arr, 0); lmask512 = VectorMask.fromArray(LongVector.SPECIES_512, mask_arr, 0); + + fmask64 = VectorMask.fromArray(FloatVector.SPECIES_64, mask_arr, 0); + fmask128 = VectorMask.fromArray(FloatVector.SPECIES_128, mask_arr, 0); + fmask256 = VectorMask.fromArray(FloatVector.SPECIES_256, mask_arr, 0); + fmask512 = VectorMask.fromArray(FloatVector.SPECIES_512, mask_arr, 0); + + dmask128 = VectorMask.fromArray(DoubleVector.SPECIES_128, mask_arr, 0); + dmask256 = VectorMask.fromArray(DoubleVector.SPECIES_256, mask_arr, 0); + dmask512 = VectorMask.fromArray(DoubleVector.SPECIES_512, mask_arr, 0); } @Benchmark @@ -221,4 +241,112 @@ public VectorMask microMaskCastLong512ToShort128() { public VectorMask microMaskCastLong512ToInteger256() { return lmask512.cast(IntVector.SPECIES_256); } + + // Benchmarks for optimization "VectorMaskCast (VectorMaskCast x) => x" + + @Benchmark + public int microMaskCastCastByte64() { + return bmask64.cast(ShortVector.SPECIES_128).cast(ByteVector.SPECIES_64).trueCount(); + } + + @Benchmark + public int microMaskCastCastByte128() { + return bmask128.cast(ShortVector.SPECIES_256).cast(ByteVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastByte256() { + return bmask256.cast(ShortVector.SPECIES_512).cast(ByteVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastShort64() { + return smask64.cast(IntVector.SPECIES_128).cast(ShortVector.SPECIES_64).trueCount(); + } + + @Benchmark + public int microMaskCastCastShort128() { + return smask128.cast(ByteVector.SPECIES_64).cast(ShortVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastShort256() { + return smask256.cast(IntVector.SPECIES_512).cast(ShortVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastShort512() { + return smask512.cast(ByteVector.SPECIES_256).cast(ShortVector.SPECIES_512).trueCount(); + } + + @Benchmark + public int microMaskCastCastInt64() { + return imask64.cast(FloatVector.SPECIES_64).cast(IntVector.SPECIES_64).trueCount(); + } + + @Benchmark + public int microMaskCastCastInt128() { + return imask128.cast(ShortVector.SPECIES_64).cast(IntVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastInt256() { + return imask256.cast(LongVector.SPECIES_512).cast(IntVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastInt512() { + return imask512.cast(ShortVector.SPECIES_256).cast(IntVector.SPECIES_512).trueCount(); + } + + @Benchmark + public int microMaskCastCastLong128() { + return lmask128.cast(IntVector.SPECIES_64).cast(LongVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastLong256() { + return lmask256.cast(DoubleVector.SPECIES_256).cast(LongVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastLong512() { + return lmask512.cast(IntVector.SPECIES_256).cast(LongVector.SPECIES_512).trueCount(); + } + + @Benchmark + public int microMaskCastCastFloat64() { + return fmask64.cast(DoubleVector.SPECIES_128).cast(FloatVector.SPECIES_64).trueCount(); + } + + @Benchmark + public int microMaskCastCastFloat128() { + return fmask128.cast(DoubleVector.SPECIES_256).cast(FloatVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastFloat256() { + return fmask256.cast(IntVector.SPECIES_256).cast(FloatVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastFloat512() { + return fmask512.cast(ShortVector.SPECIES_256).cast(FloatVector.SPECIES_512).trueCount(); + } + + @Benchmark + public int microMaskCastCastDouble128() { + return dmask128.cast(FloatVector.SPECIES_64).cast(DoubleVector.SPECIES_128).trueCount(); + } + + @Benchmark + public int microMaskCastCastDouble256() { + return dmask256.cast(FloatVector.SPECIES_128).cast(DoubleVector.SPECIES_256).trueCount(); + } + + @Benchmark + public int microMaskCastCastDouble512() { + return dmask512.cast(IntVector.SPECIES_256).cast(DoubleVector.SPECIES_512).trueCount(); + } + } diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskFromLongToLongBenchmark.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskFromLongToLongBenchmark.java new file mode 100644 index 0000000000000..02aa1aced2bb6 --- /dev/null +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskFromLongToLongBenchmark.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.jdk.incubator.vector; + +import jdk.incubator.vector.*; +import org.openjdk.jmh.annotations.*; + +@State(Scope.Thread) +@Warmup(iterations = 10, time = 1) +@Measurement(iterations = 10, time = 1) +@Fork(value = 1, jvmArgs = {"--add-modules=jdk.incubator.vector"}) +public class MaskFromLongToLongBenchmark { + private static final int ITERATION = 10000; + + @CompilerControl(CompilerControl.Mode.INLINE) + public long microMaskFromLongToLong(VectorSpecies species) { + long result = 0; + for (int i = 0; i < ITERATION; i++) { + long mask = Math.min(-1, Math.max(-1, result)); + result += VectorMask.fromLong(species, mask).toLong(); + } + return result; + } + + @Benchmark + public long microMaskFromLongToLong_Byte64() { + return microMaskFromLongToLong(ByteVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Byte128() { + return microMaskFromLongToLong(ByteVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Byte256() { + return microMaskFromLongToLong(ByteVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Byte512() { + return microMaskFromLongToLong(ByteVector.SPECIES_512); + } + + @Benchmark + public long microMaskFromLongToLong_Short64() { + return microMaskFromLongToLong(ShortVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Short128() { + return microMaskFromLongToLong(ShortVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Short256() { + return microMaskFromLongToLong(ShortVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Short512() { + return microMaskFromLongToLong(ShortVector.SPECIES_512); + } + + @Benchmark + public long microMaskFromLongToLong_Integer64() { + return microMaskFromLongToLong(IntVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Integer128() { + return microMaskFromLongToLong(IntVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Integer256() { + return microMaskFromLongToLong(IntVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Integer512() { + return microMaskFromLongToLong(IntVector.SPECIES_512); + } + + @Benchmark + public long microMaskFromLongToLong_Long64() { + return microMaskFromLongToLong(LongVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Long128() { + return microMaskFromLongToLong(LongVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Long256() { + return microMaskFromLongToLong(LongVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Long512() { + return microMaskFromLongToLong(LongVector.SPECIES_512); + } + + @Benchmark + public long microMaskFromLongToLong_Float64() { + return microMaskFromLongToLong(FloatVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Float128() { + return microMaskFromLongToLong(FloatVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Float256() { + return microMaskFromLongToLong(FloatVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Float512() { + return microMaskFromLongToLong(FloatVector.SPECIES_512); + } + + @Benchmark + public long microMaskFromLongToLong_Double64() { + return microMaskFromLongToLong(DoubleVector.SPECIES_64); + } + + @Benchmark + public long microMaskFromLongToLong_Double128() { + return microMaskFromLongToLong(DoubleVector.SPECIES_128); + } + + @Benchmark + public long microMaskFromLongToLong_Double256() { + return microMaskFromLongToLong(DoubleVector.SPECIES_256); + } + + @Benchmark + public long microMaskFromLongToLong_Double512() { + return microMaskFromLongToLong(DoubleVector.SPECIES_512); + } +}