From d9d742e411c28ce4799fd72b91881e0de88cc38b Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Tue, 24 May 2022 18:23:21 -0700 Subject: [PATCH 1/7] Limit auto vectorization to 32 byte vector on Cascade Lake --- src/hotspot/cpu/x86/vm_version_x86.cpp | 10 ++++++++-- src/hotspot/share/opto/c2_globals.hpp | 5 +++++ src/hotspot/share/opto/superword.cpp | 22 +++++++++++++++++----- src/hotspot/share/opto/superword.hpp | 2 ++ src/hotspot/share/opto/vectornode.cpp | 6 ++++-- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 41a43c7de3c97..fed2c4e8fffda 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -896,8 +896,14 @@ void VM_Version::get_processor_features() { } if (FLAG_IS_DEFAULT(UseAVX)) { // Don't use AVX-512 on older Skylakes unless explicitly requested. - if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { - FLAG_SET_DEFAULT(UseAVX, 2); + if (use_avx_limit > 2 && is_intel_skylake()) { + if (_stepping < 5) { + FLAG_SET_DEFAULT(UseAVX, 2); + } else { + if (FLAG_IS_DEFAULT(SuperWordMaxVectorLimit)) { + FLAG_SET_DEFAULT(SuperWordMaxVectorLimit, 32); + } + } } else { FLAG_SET_DEFAULT(UseAVX, use_avx_limit); } diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 65995f7bdda7b..9f3ddb33bf243 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -82,6 +82,11 @@ "actual size could be less depending on elements type") \ range(0, max_jint) \ \ + product(intx, SuperWordMaxVectorLimit, -1, \ + "Vector size limit in bytes for superword, " \ + "-1 implies superword vector size limit is same as MaxVectorSize")\ + range(-1, max_jint) \ + \ product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \ "Partial inline size used for small array operations" \ "(e.g. copy,cmp) acceleration.") \ diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 100abbad65499..f41ee1099ca73 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -201,6 +201,18 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { return success; } +//------------------------------max vector size------------------------------ +int SuperWord::max_vector_size(BasicType bt) { + int max_vector = Matcher::max_vector_size(bt); + if (SuperWordMaxVectorLimit != -1) { + int max_vector_limit = SuperWordMaxVectorLimit / type2aelembytes(bt); + if (max_vector > max_vector_limit) { + max_vector = max_vector_limit; + } + } + return max_vector; +} + //------------------------------early unrolling analysis------------------------------ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { bool is_slp = true; @@ -219,7 +231,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { ignored_loop_nodes[i] = -1; } - int max_vector = Matcher::max_vector_size(T_BYTE); + int max_vector = max_vector_size(T_BYTE); // Process the loop, some/all of the stack entries will not be in order, ergo // need to preprocess the ignored initial state before we process the loop @@ -354,8 +366,8 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { if (is_java_primitive(bt) == false) continue; - int cur_max_vector = Matcher::max_vector_size(bt); - + int cur_max_vector = max_vector_size(bt); + // If a max vector exists which is not larger than _local_loop_unroll_factor // stop looking, we already have the max vector to map to. if (cur_max_vector < local_loop_unroll_factor) { @@ -993,7 +1005,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) { } } if (should_combine_adjacent) { - vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2); + vw = MIN2(max_vector_size(btype)*type2aelembytes(btype), vw * 2); } } @@ -1691,7 +1703,7 @@ void SuperWord::combine_packs() { Node_List* p1 = _packset.at(i); if (p1 != NULL) { BasicType bt = velt_basic_type(p1->at(0)); - uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector + uint max_vlen = max_vector_size(bt); // Max elements in vector assert(is_power_of_2(max_vlen), "sanity"); uint psize = p1->size(); if (!is_power_of_2(psize)) { diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 6bb0aa08ed781..3ef389dfb77d5 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -327,6 +327,8 @@ class SuperWord : public ResourceObj { bool transform_loop(IdealLoopTree* lpt, bool do_optimization); + int max_vector_size(BasicType bt); + void unrolling_analysis(int &local_loop_unroll_factor); // Accessors for SWPointer diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 5de2b3c79252d..8e28b7302fbe9 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -282,7 +282,8 @@ int VectorNode::replicate_opcode(BasicType bt) { bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { + Matcher::vector_size_supported(bt, vlen) && + (SuperWordMaxVectorLimit == -1 || (vlen * type2aelembytes(bt) <= SuperWordMaxVectorLimit))) { int vopc = VectorNode::opcode(opc, bt); // For rotate operation we will do a lazy de-generation into // OrV/LShiftV/URShiftV pattern if the target does not support @@ -1275,7 +1276,8 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { + Matcher::vector_size_supported(bt, vlen) && + (SuperWordMaxVectorLimit == -1 || (vlen * type2aelembytes(bt) <= SuperWordMaxVectorLimit))) { int vopc = ReductionNode::opcode(opc, bt); return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); } From 9c18c0a33b74764dc04ac751d5cb570e1cdcb694 Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Thu, 26 May 2022 17:45:43 -0700 Subject: [PATCH 2/7] Change option name and add checks --- src/hotspot/cpu/x86/vm_version_x86.cpp | 24 ++++++++++++++++++++---- src/hotspot/share/opto/c2_globals.hpp | 6 +++--- src/hotspot/share/opto/superword.cpp | 8 +++----- src/hotspot/share/opto/vectornode.cpp | 4 ++-- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index fed2c4e8fffda..413a6380dbe7e 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -899,15 +899,12 @@ void VM_Version::get_processor_features() { if (use_avx_limit > 2 && is_intel_skylake()) { if (_stepping < 5) { FLAG_SET_DEFAULT(UseAVX, 2); - } else { - if (FLAG_IS_DEFAULT(SuperWordMaxVectorLimit)) { - FLAG_SET_DEFAULT(SuperWordMaxVectorLimit, 32); - } } } else { FLAG_SET_DEFAULT(UseAVX, use_avx_limit); } } + if (UseAVX > use_avx_limit) { warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); FLAG_SET_DEFAULT(UseAVX, use_avx_limit); @@ -1298,6 +1295,25 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); } + if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) { + if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 && + is_intel_skylake() && _stepping > 5) { + // Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake + FLAG_SET_DEFAULT(SuperWordMaxVectorSize, 32); + } else { + FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); + } + } else { + if (SuperWordMaxVectorSize > MaxVectorSize) { + warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %ld", MaxVectorSize); + FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); + } + if (!is_power_of_2(SuperWordMaxVectorSize)) { + warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %ld", MaxVectorSize); + FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); + } + } + #if defined(COMPILER2) && defined(ASSERT) if (MaxVectorSize > 0) { if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 9f3ddb33bf243..b9d6c422f2f8a 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -82,10 +82,10 @@ "actual size could be less depending on elements type") \ range(0, max_jint) \ \ - product(intx, SuperWordMaxVectorLimit, -1, \ + product(intx, SuperWordMaxVectorSize, 64, \ "Vector size limit in bytes for superword, " \ - "-1 implies superword vector size limit is same as MaxVectorSize")\ - range(-1, max_jint) \ + "superword vector size limit in bytes") \ + range(0, max_jint) \ \ product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \ "Partial inline size used for small array operations" \ diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index f41ee1099ca73..04c077c4c7c3e 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -204,11 +204,9 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { //------------------------------max vector size------------------------------ int SuperWord::max_vector_size(BasicType bt) { int max_vector = Matcher::max_vector_size(bt); - if (SuperWordMaxVectorLimit != -1) { - int max_vector_limit = SuperWordMaxVectorLimit / type2aelembytes(bt); - if (max_vector > max_vector_limit) { - max_vector = max_vector_limit; - } + int sw_max_vector_limit = SuperWordMaxVectorSize / type2aelembytes(bt); + if (max_vector > sw_max_vector_limit) { + max_vector = sw_max_vector_limit; } return max_vector; } diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 8e28b7302fbe9..a5041dd6aed3d 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -283,7 +283,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen) && - (SuperWordMaxVectorLimit == -1 || (vlen * type2aelembytes(bt) <= SuperWordMaxVectorLimit))) { + (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)) { int vopc = VectorNode::opcode(opc, bt); // For rotate operation we will do a lazy de-generation into // OrV/LShiftV/URShiftV pattern if the target does not support @@ -1277,7 +1277,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen) && - (SuperWordMaxVectorLimit == -1 || (vlen * type2aelembytes(bt) <= SuperWordMaxVectorLimit))) { + (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)) { int vopc = ReductionNode::opcode(opc, bt); return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); } From 64653951995f4d0b0d46c52c830e7f0a06fb7216 Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Tue, 31 May 2022 20:03:29 -0700 Subject: [PATCH 3/7] review comment resolution --- src/hotspot/share/opto/superword.cpp | 4 ++-- src/hotspot/share/opto/vectornode.cpp | 11 +++++++---- src/hotspot/share/opto/vectornode.hpp | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 04c077c4c7c3e..abfe5ab4327e1 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -208,7 +208,7 @@ int SuperWord::max_vector_size(BasicType bt) { if (max_vector > sw_max_vector_limit) { max_vector = sw_max_vector_limit; } - return max_vector; + return max_vector; } //------------------------------early unrolling analysis------------------------------ @@ -365,7 +365,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { if (is_java_primitive(bt) == false) continue; int cur_max_vector = max_vector_size(bt); - + // If a max vector exists which is not larger than _local_loop_unroll_factor // stop looking, we already have the max vector to map to. if (cur_max_vector < local_loop_unroll_factor) { diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index a5041dd6aed3d..a581bc2e1b411 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -277,13 +277,17 @@ int VectorNode::replicate_opcode(BasicType bt) { } } +bool VectorNode::vector_size_supported(BasicType bt, uint vlen) { + return (Matcher::vector_size_supported(bt, vlen) && + (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)); +} + // Also used to check if the code generator // supports the vector operation. bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen) && - (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)) { + vector_size_supported(bt, vlen)) { int vopc = VectorNode::opcode(opc, bt); // For rotate operation we will do a lazy de-generation into // OrV/LShiftV/URShiftV pattern if the target does not support @@ -1276,8 +1280,7 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen) && - (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)) { + VectorNode::vector_size_supported(bt, vlen)) { int vopc = ReductionNode::opcode(opc, bt); return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); } diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 341da3017223f..1233aac604c8a 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -87,6 +87,7 @@ class VectorNode : public TypeNode { static int opcode(int opc, BasicType bt); static int replicate_opcode(BasicType bt); + static bool vector_size_supported(BasicType bt, uint vlen); static bool implemented(int opc, uint vlen, BasicType bt); static bool is_shift(Node* n); static bool is_vshift_cnt(Node* n); From acea7666f5b2cf410f623f08ee0cba7f97402b55 Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Wed, 1 Jun 2022 13:36:44 -0700 Subject: [PATCH 4/7] Fix 32-bit build --- src/hotspot/share/opto/vectornode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index a581bc2e1b411..6705743fe4b76 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -279,7 +279,7 @@ int VectorNode::replicate_opcode(BasicType bt) { bool VectorNode::vector_size_supported(BasicType bt, uint vlen) { return (Matcher::vector_size_supported(bt, vlen) && - (vlen * type2aelembytes(bt) <= SuperWordMaxVectorSize)); + (vlen * type2aelembytes(bt) <= (uint)SuperWordMaxVectorSize)); } // Also used to check if the code generator From d677fd9a2e506bf4740b98bc44fe860d86dda88d Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Wed, 1 Jun 2022 16:00:57 -0700 Subject: [PATCH 5/7] x86 build fix --- src/hotspot/cpu/x86/vm_version_x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 413a6380dbe7e..d9962ef0636eb 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1305,11 +1305,11 @@ void VM_Version::get_processor_features() { } } else { if (SuperWordMaxVectorSize > MaxVectorSize) { - warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %ld", MaxVectorSize); + warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize); FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); } if (!is_power_of_2(SuperWordMaxVectorSize)) { - warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %ld", MaxVectorSize); + warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize); FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); } } From e8ea837ac518d157d3c5d510cae5326586133c0e Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Wed, 1 Jun 2022 21:06:37 -0700 Subject: [PATCH 6/7] Change SuperWordMaxVectorSize to develop option --- src/hotspot/share/opto/c2_globals.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index b9d6c422f2f8a..1ecaf43fb42c2 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -82,7 +82,7 @@ "actual size could be less depending on elements type") \ range(0, max_jint) \ \ - product(intx, SuperWordMaxVectorSize, 64, \ + develop(intx, SuperWordMaxVectorSize, 64, \ "Vector size limit in bytes for superword, " \ "superword vector size limit in bytes") \ range(0, max_jint) \ From 420851609cd30820b590dd486006c14866953ca0 Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Thu, 2 Jun 2022 10:17:37 -0700 Subject: [PATCH 7/7] Review comment resolution --- src/hotspot/cpu/x86/vm_version_x86.cpp | 11 ++++------- src/hotspot/share/opto/c2_globals.hpp | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 47b178a7432b9..248c46ddf4643 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -896,15 +896,12 @@ void VM_Version::get_processor_features() { } if (FLAG_IS_DEFAULT(UseAVX)) { // Don't use AVX-512 on older Skylakes unless explicitly requested. - if (use_avx_limit > 2 && is_intel_skylake()) { - if (_stepping < 5) { - FLAG_SET_DEFAULT(UseAVX, 2); - } + if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { + FLAG_SET_DEFAULT(UseAVX, 2); } else { FLAG_SET_DEFAULT(UseAVX, use_avx_limit); } } - if (UseAVX > use_avx_limit) { warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); FLAG_SET_DEFAULT(UseAVX, use_avx_limit); @@ -1300,9 +1297,9 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) { if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 && - is_intel_skylake() && _stepping > 5) { + is_intel_skylake() && _stepping >= 5) { // Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake - FLAG_SET_DEFAULT(SuperWordMaxVectorSize, 32); + FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32)); } else { FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); } diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 1ecaf43fb42c2..3d12db02744e9 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -82,7 +82,7 @@ "actual size could be less depending on elements type") \ range(0, max_jint) \ \ - develop(intx, SuperWordMaxVectorSize, 64, \ + product(intx, SuperWordMaxVectorSize, 64, DIAGNOSTIC, \ "Vector size limit in bytes for superword, " \ "superword vector size limit in bytes") \ range(0, max_jint) \