From dfa0afae3cd33df3dc2a9595cd15a751e2821d21 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 1 Jul 2024 15:31:40 +0200 Subject: [PATCH 01/89] 8335392 --- src/hotspot/share/opto/mempointer.cpp | 27 +++++++++ src/hotspot/share/opto/mempointer.hpp | 82 +++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 src/hotspot/share/opto/mempointer.cpp create mode 100644 src/hotspot/share/opto/mempointer.hpp diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp new file mode 100644 index 0000000000000..7275c7bb0fd1d --- /dev/null +++ b/src/hotspot/share/opto/mempointer.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "opto/mempointer.hpp" + + diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp new file mode 100644 index 0000000000000..1d32608c6ed3a --- /dev/null +++ b/src/hotspot/share/opto/mempointer.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_MEMPOINTER_HPP +#define SHARE_OPTO_MEMPOINTER_HPP + +#include "opto/memnode.hpp" + +// TODO general description + +// Summand of a MemPointerSimpleForm. +// if var is a long (varL): +// s = scaleL * varL +// else, i.e. if var is a int (varI): +// s = scaleL * ConvI2L(scaleI * varI) +// +class MemPointerSummand : public StackObj { +public: + const Node* _var; + const jlong _scaleL; + const jlong _scaleI; + +public: + MemPointerSummand() : _var(nullptr), _scaleL(0), _scaleI(0) {} + MemPointerSummand(const Node* var, const jlong scaleL, const jlong scaleI) + : _var(var), _scaleL(scaleL), _scaleI(scaleI) + { + assert(_var != nullptr, "must have variable"); + assert(_scaleL != 0 && _scaleI != 0, "non-zero scale"); + } +}; + +// Simple form of the pointer sub-expression of "pointer". +// +// pointer = sum(summands) + con +// +class MemPointerSimpleForm : public StackObj { +private: + static const int SUMMANDS_SIZE = 10; // TODO good? + + bool _is_valid; // the parsing succeeded + Node* _pointer; // pointer node associated with this (sub)pointer + + MemPointerSummand _summands[SUMMANDS_SIZE]; + jlong _con; + + MemPointerSimpleForm() {} +}; + +// TODO +class MemPointer : public StackObj { +private: + bool _is_valid; + Node* _mem; + + MemPointerSimpleForm _simple_form; +}; + +#endif // SHARE_OPTO_MEMPOINTER_HPP + + From 0cf7be6a97839d61cd6120058e1a04ef4e40b79b Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 1 Jul 2024 15:56:52 +0200 Subject: [PATCH 02/89] rm old code --- src/hotspot/share/opto/memnode.cpp | 259 +++-------------------------- 1 file changed, 24 insertions(+), 235 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index d0b6c59637f13..5fa50c8242fcd 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2697,184 +2697,6 @@ uint StoreNode::hash() const { return NO_HASH; } -// Class to parse array pointers, and determine if they are adjacent. We parse the form: -// -// pointer = base -// + constant_offset -// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) -// + sum(other_offsets) -// -// -// Note: we accumulate all constant offsets into constant_offset, even the int constant behind -// the "LShiftL(ConvI2L(...))" pattern. We convert "ConvI2L(int_offset + int_con)" to -// "ConvI2L(int_offset) + int_con", which is only safe if we can assume that either all -// compared addresses have an overflow for "int_offset + int_con" or none. -// For loads and stores on arrays, we know that if one overflows and the other not, then -// the two addresses lay almost max_int indices apart, but the maximal array size is -// only about half of that. Therefore, the RangeCheck on at least one of them must have -// failed. -// -// constant_offset += LShiftL( ConvI2L(int_con), int_offset_shift) -// -// pointer = base -// + constant_offset -// + LShiftL( ConvI2L(int_offset), int_offset_shift) -// + sum(other_offsets) -// -class ArrayPointer { -private: - const bool _is_valid; // The parsing succeeded - const Node* _pointer; // The final pointer to the position in the array - const Node* _base; // Base address of the array - const jlong _constant_offset; // Sum of collected constant offsets - const Node* _int_offset; // (optional) Offset behind LShiftL and ConvI2L - const jint _int_offset_shift; // (optional) Shift value for int_offset - const GrowableArray* _other_offsets; // List of other AddP offsets - - ArrayPointer(const bool is_valid, - const Node* pointer, - const Node* base, - const jlong constant_offset, - const Node* int_offset, - const jint int_offset_shift, - const GrowableArray* other_offsets) : - _is_valid(is_valid), - _pointer(pointer), - _base(base), - _constant_offset(constant_offset), - _int_offset(int_offset), - _int_offset_shift(int_offset_shift), - _other_offsets(other_offsets) - { - assert(_pointer != nullptr, "must always have pointer"); - assert(is_valid == (_base != nullptr), "have base exactly if valid"); - assert(is_valid == (_other_offsets != nullptr), "have other_offsets exactly if valid"); - } - - static ArrayPointer make_invalid(const Node* pointer) { - return ArrayPointer(false, pointer, nullptr, 0, nullptr, 0, nullptr); - } - - static bool parse_int_offset(Node* offset, Node*& int_offset, jint& int_offset_shift) { - // offset = LShiftL( ConvI2L(int_offset), int_offset_shift) - if (offset->Opcode() == Op_LShiftL && - offset->in(1)->Opcode() == Op_ConvI2L && - offset->in(2)->Opcode() == Op_ConI) { - int_offset = offset->in(1)->in(1); // LShiftL -> ConvI2L -> int_offset - int_offset_shift = offset->in(2)->get_int(); // LShiftL -> int_offset_shift - return true; - } - - // offset = ConvI2L(int_offset) = LShiftL( ConvI2L(int_offset), 0) - if (offset->Opcode() == Op_ConvI2L) { - int_offset = offset->in(1); - int_offset_shift = 0; - return true; - } - - // parse failed - return false; - } - -public: - // Parse the structure above the pointer - static ArrayPointer make(PhaseGVN* phase, const Node* pointer) { - assert(phase->type(pointer)->isa_aryptr() != nullptr, "must be array pointer"); - if (!pointer->is_AddP()) { return ArrayPointer::make_invalid(pointer); } - - const Node* base = pointer->in(AddPNode::Base); - if (base == nullptr) { return ArrayPointer::make_invalid(pointer); } - - const int search_depth = 5; - Node* offsets[search_depth]; - int count = pointer->as_AddP()->unpack_offsets(offsets, search_depth); - - // We expect at least a constant each - if (count <= 0) { return ArrayPointer::make_invalid(pointer); } - - // We extract the form: - // - // pointer = base - // + constant_offset - // + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) - // + sum(other_offsets) - // - jlong constant_offset = 0; - Node* int_offset = nullptr; - jint int_offset_shift = 0; - GrowableArray* other_offsets = new GrowableArray(count); - - for (int i = 0; i < count; i++) { - Node* offset = offsets[i]; - if (offset->Opcode() == Op_ConI) { - // Constant int offset - constant_offset += offset->get_int(); - } else if (offset->Opcode() == Op_ConL) { - // Constant long offset - constant_offset += offset->get_long(); - } else if(int_offset == nullptr && parse_int_offset(offset, int_offset, int_offset_shift)) { - // LShiftL( ConvI2L(int_offset), int_offset_shift) - int_offset = int_offset->uncast(); - if (int_offset->Opcode() == Op_AddI && int_offset->in(2)->Opcode() == Op_ConI) { - // LShiftL( ConvI2L(int_offset + int_con), int_offset_shift) - constant_offset += ((jlong)int_offset->in(2)->get_int()) << int_offset_shift; - int_offset = int_offset->in(1); - } - } else { - // All others - other_offsets->append(offset); - } - } - - return ArrayPointer(true, pointer, base, constant_offset, int_offset, int_offset_shift, other_offsets); - } - - bool is_adjacent_to_and_before(const ArrayPointer& other, const jlong data_size) const { - if (!_is_valid || !other._is_valid) { return false; } - - // Offset adjacent? - if (this->_constant_offset + data_size != other._constant_offset) { return false; } - - // All other components identical? - if (this->_base != other._base || - this->_int_offset != other._int_offset || - this->_int_offset_shift != other._int_offset_shift || - this->_other_offsets->length() != other._other_offsets->length()) { - return false; - } - - for (int i = 0; i < this->_other_offsets->length(); i++) { - Node* o1 = this->_other_offsets->at(i); - Node* o2 = other._other_offsets->at(i); - if (o1 != o2) { return false; } - } - - return true; - } - -#ifndef PRODUCT - void dump() { - if (!_is_valid) { - tty->print("ArrayPointer[%d %s, invalid]", _pointer->_idx, _pointer->Name()); - return; - } - tty->print("ArrayPointer[%d %s, base[%d %s] + %lld", - _pointer->_idx, _pointer->Name(), - _base->_idx, _base->Name(), - (long long)_constant_offset); - if (_int_offset != 0) { - tty->print(" + I2L[%d %s] << %d", - _int_offset->_idx, _int_offset->Name(), _int_offset_shift); - } - for (int i = 0; i < _other_offsets->length(); i++) { - Node* n = _other_offsets->at(i); - tty->print(" + [%d %s]", n->_idx, n->Name()); - } - tty->print_cr("]"); - } -#endif -}; - // Link together multiple stores (B/S/C/I) into a longer one. // // Example: _store = StoreB[i+3] @@ -2910,13 +2732,13 @@ class ArrayPointer { // of adjacent stores there remains exactly one RangeCheck, located between the // first and the second store (e.g. RangeCheck[i+3]). // -class MergePrimitiveArrayStores : public StackObj { +class MergePrimitiveStores : public StackObj { private: PhaseGVN* _phase; StoreNode* _store; public: - MergePrimitiveArrayStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {} + MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {} StoreNode* run(); @@ -2963,27 +2785,14 @@ class MergePrimitiveArrayStores : public StackObj { DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; ) }; -StoreNode* MergePrimitiveArrayStores::run() { +StoreNode* MergePrimitiveStores::run() { // Check for B/S/C/I int opc = _store->Opcode(); if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) { return nullptr; } - // Only merge stores on arrays, and the stores must have the same size as the elements. - const TypePtr* ptr_t = _store->adr_type(); - if (ptr_t == nullptr) { - return nullptr; - } - const TypeAryPtr* aryptr_t = ptr_t->isa_aryptr(); - if (aryptr_t == nullptr) { - return nullptr; - } - BasicType bt = aryptr_t->elem()->array_element_basic_type(); - if (!is_java_primitive(bt) || - type2aelembytes(bt) != _store->memory_size()) { - return nullptr; - } + // TODO maybe parse pointer, see if viable? - only if cached! // The _store must be the "last" store in a chain. If we find a use we could merge with // then that use or a store further down is the "last" store. @@ -3013,37 +2822,21 @@ StoreNode* MergePrimitiveArrayStores::run() { } // Check compatibility between _store and other_store. -bool MergePrimitiveArrayStores::is_compatible_store(const StoreNode* other_store) const { +bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) const { int opc = _store->Opcode(); assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition"); - assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store"); + // assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store"); if (other_store == nullptr || - _store->Opcode() != other_store->Opcode() || - other_store->adr_type() == nullptr || - other_store->adr_type()->isa_aryptr() == nullptr) { + _store->Opcode() != other_store->Opcode()) { return false; } - // Check that the size of the stores, and the array elements are all the same. - const TypeAryPtr* aryptr_t1 = _store->adr_type()->is_aryptr(); - const TypeAryPtr* aryptr_t2 = other_store->adr_type()->is_aryptr(); - BasicType aryptr_bt1 = aryptr_t1->elem()->array_element_basic_type(); - BasicType aryptr_bt2 = aryptr_t2->elem()->array_element_basic_type(); - if (!is_java_primitive(aryptr_bt1) || !is_java_primitive(aryptr_bt2)) { - return false; - } - int size1 = type2aelembytes(aryptr_bt1); - int size2 = type2aelembytes(aryptr_bt2); - if (size1 != size2 || - size1 != _store->memory_size() || - _store->memory_size() != other_store->memory_size()) { - return false; - } + // TODO: check if same base or both no base??? return true; } -bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const { +bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const { if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn), use_store->in(MemNode::ValueIn), def_store->memory_size())) { @@ -3051,16 +2844,12 @@ bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, con } ResourceMark rm; - ArrayPointer array_pointer_use = ArrayPointer::make(_phase, use_store->in(MemNode::Address)); - ArrayPointer array_pointer_def = ArrayPointer::make(_phase, def_store->in(MemNode::Address)); - if (!array_pointer_def.is_adjacent_to_and_before(array_pointer_use, use_store->memory_size())) { - return false; - } + // TODO return true; } -bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const { +bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const { // Pattern: [n1 = ConI, n2 = ConI] if (n1->Opcode() == Op_ConI) { return n2->Opcode() == Op_ConI; @@ -3102,7 +2891,7 @@ bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Nod } // Detect pattern: n = base_out >> shift_out -bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) { +bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) { assert(n != nullptr, "precondition"); int opc = n->Opcode(); @@ -3125,7 +2914,7 @@ bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_o } // Check if there is nothing between the two stores, except optionally a RangeCheck leading to an uncommon trap. -MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) { +MergePrimitiveStores::CFGStatus MergePrimitiveStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) { assert(use_store->in(MemNode::Memory) == def_store, "use-def relationship"); Node* ctrl_use = use_store->in(MemNode::Control); @@ -3170,7 +2959,7 @@ MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_p return CFGStatus::SuccessWithRangeCheck; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_store(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const StoreNode* def_store) const { Status status_use = find_use_store(def_store); StoreNode* use_store = status_use.found_store(); if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) { @@ -3179,7 +2968,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_s return status_use; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_store(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_def_store(const StoreNode* use_store) const { Status status_def = find_def_store(use_store); StoreNode* def_store = status_def.found_store(); if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) { @@ -3188,7 +2977,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_s return status_def; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_use_store(const StoreNode* def_store) const { Status status_use = find_use_store_unidirectional(def_store); #ifdef ASSERT @@ -3204,7 +2993,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(cons return status_use; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_def_store(const StoreNode* use_store) const { Status status_def = find_def_store_unidirectional(use_store); #ifdef ASSERT @@ -3220,7 +3009,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(cons return status_def; } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unidirectional(const StoreNode* def_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_use_store_unidirectional(const StoreNode* def_store) const { assert(is_compatible_store(def_store), "precondition: must be compatible with _store"); for (DUIterator_Fast imax, i = def_store->fast_outs(imax); i < imax; i++) { @@ -3233,7 +3022,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unid return Status::make_failure(); } -MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unidirectional(const StoreNode* use_store) const { +MergePrimitiveStores::Status MergePrimitiveStores::find_def_store_unidirectional(const StoreNode* use_store) const { assert(is_compatible_store(use_store), "precondition: must be compatible with _store"); StoreNode* def_store = use_store->in(MemNode::Memory)->isa_Store(); @@ -3244,7 +3033,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unid return Status::make(def_store, cfg_status_for_pair(use_store, def_store)); } -void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const { +void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const { // The merged store can be at most 8 bytes. const uint merge_list_max_size = 8 / _store->memory_size(); assert(merge_list_max_size >= 2 && @@ -3275,7 +3064,7 @@ void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const } // Merge the input values of the smaller stores to a single larger input value. -Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_list) { +Node* MergePrimitiveStores::make_merged_input_value(const Node_List& merge_list) { int new_memory_size = _store->memory_size() * merge_list.size(); Node* first = merge_list.at(merge_list.size()-1); Node* merged_input_value = nullptr; @@ -3361,7 +3150,7 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_ // | | | | | | | | // // last_store (= _store) merged_store // // // -StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) { +StoreNode* MergePrimitiveStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) { Node* first_store = merge_list.at(merge_list.size()-1); Node* last_ctrl = _store->in(MemNode::Control); // after (optional) RangeCheck Node* first_mem = first_store->in(MemNode::Memory); @@ -3391,7 +3180,7 @@ StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_l } #ifdef ASSERT -void MergePrimitiveArrayStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const { +void MergePrimitiveStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const { stringStream ss; ss.print_cr("[TraceMergeStores]: Replace"); for (int i = (int)merge_list.size() - 1; i >= 0; i--) { @@ -3491,7 +3280,7 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { if (MergeStores && UseUnalignedAccesses) { if (phase->C->post_loop_opts_phase()) { - MergePrimitiveArrayStores merge(phase, this); + MergePrimitiveStores merge(phase, this); Node* progress = merge.run(); if (progress != nullptr) { return progress; } } else { From 98ee6eb9d516d4f583775199e3f45b2737fd782e Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 2 Jul 2024 11:41:34 +0200 Subject: [PATCH 03/89] MemPointer::parse_simple_form WIP --- src/hotspot/share/opto/memnode.cpp | 8 +- src/hotspot/share/opto/mempointer.cpp | 120 ++++++++++++++++++++++++++ src/hotspot/share/opto/mempointer.hpp | 33 +++++-- 3 files changed, 153 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 5fa50c8242fcd..7c8468ada314d 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -42,6 +42,7 @@ #include "opto/machnode.hpp" #include "opto/matcher.hpp" #include "opto/memnode.hpp" +#include "opto/mempointer.hpp" #include "opto/mulnode.hpp" #include "opto/narrowptrnode.hpp" #include "opto/phaseX.hpp" @@ -2844,8 +2845,11 @@ bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const St } ResourceMark rm; - // TODO - + const MemPointer pointer_use(_phase, use_store); + const MemPointer pointer_def(_phase, def_store); + if (!pointer_def.is_adjacent_to_and_before(pointer_use)) { + return false; + } return true; } diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 7275c7bb0fd1d..f4d280d307921 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -23,5 +23,125 @@ */ #include "opto/mempointer.hpp" +#include "utilities/resourceHash.hpp" +MemPointerSimpleForm MemPointer::parse_simple_form(Node* pointer) { + ResourceMark rm; + ResourceHashtable idx_to_simple_form; + GrowableArray stack; + + auto get_simple_form_of_input_or_push = [&](Node* n, int i) { + Node* in = n->in(i); + MemPointerSimpleForm* simple_form = idx_to_simple_form.get(in->_idx); + if (simple_form == nullptr) { + stack.push(in); + } + return simple_form; + }; + + stack.push(pointer); + while (stack.is_nonempty()) { + Node* n = stack.top(); + + if (idx_to_simple_form.get(n->_idx) != nullptr) { + stack.pop(); // already processed elsewhere + continue; + } + + n->dump(); + + int opc = n->Opcode(); + switch (opc) { + case Op_ConI: + case Op_ConL: + { + jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); + MemPointerSimpleForm f = MemPointerSimpleForm::make_from_ConIL(n, con); + idx_to_simple_form.put_when_absent(n->_idx, f); + stack.pop(); + continue; + } + case Op_AddP: + case Op_AddL: + case Op_AddI: + case Op_SubL: + case Op_SubI: + { + const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, (opc == Op_AddP) ? 2 : 1); + const MemPointerSimpleForm* b = get_simple_form_of_input_or_push(n, (opc == Op_AddP) ? 3 : 2); + if (a == nullptr || b == nullptr) { continue; } + MemPointerSimpleForm f = MemPointerSimpleForm::make_from_AddSubILP(n, a, b); + idx_to_simple_form.put_when_absent(n->_idx, f); + stack.pop(); + continue; + } + case Op_MulL: + case Op_MulI: + case Op_LShiftL: + case Op_LShiftI: + { + // Form must be linear: only multiplication with constants is allowed. + Node* in2 = n->in(2); + if (!in2->is_Con()) { break; } + jlong scale; + switch (opc) { + case Op_MulL: scale = in2->get_long(); break; + case Op_MulI: scale = in2->get_int(); break; + case Op_LShiftL: + case Op_LShiftI: + assert(false, "shift"); + } + // Scale cannot be too large: TODO make this a special method, maybe better threshold? + const jlong max_scale = 1 << 30; + if (scale > max_scale || scale < -max_scale) { break; } + + const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, 1); + if (a == nullptr) { continue; } + MemPointerSimpleForm f = MemPointerSimpleForm::make_from_Mul(n, a, scale); + idx_to_simple_form.put_when_absent(n->_idx, f); + stack.pop(); + continue; + } + case Op_CastII: + case Op_CastLL: + case Op_CastX2P: + { + assert(false, "unary"); + break; + } + case Op_ConvI2L: + { + const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, 1); + if (a == nullptr) { continue; } + MemPointerSimpleForm f = MemPointerSimpleForm::make_from_ConvI2L(n, a); + idx_to_simple_form.put_when_absent(n->_idx, f); + stack.pop(); + continue; + } + } + assert(false, "default"); + } + + return MemPointerSimpleForm(); +} + +MemPointerSimpleForm MemPointerSimpleForm::make_from_ConIL(Node* n, const jlong con) { + return MemPointerSimpleForm(); // TODO +} + +MemPointerSimpleForm MemPointerSimpleForm::make_from_AddSubILP(Node* n, const MemPointerSimpleForm* a, const MemPointerSimpleForm* b) { + return MemPointerSimpleForm(); // TODO +} + +MemPointerSimpleForm MemPointerSimpleForm::make_from_Mul(Node* n, const MemPointerSimpleForm* a, const jlong scale) { + return MemPointerSimpleForm(); // TODO +} + +MemPointerSimpleForm MemPointerSimpleForm::make_from_ConvI2L(Node* n, const MemPointerSimpleForm* a) { + return MemPointerSimpleForm(); // TODO +} + +bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { + return true; // TODO +} diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 1d32608c6ed3a..de8523d96b568 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -37,13 +37,13 @@ // class MemPointerSummand : public StackObj { public: - const Node* _var; - const jlong _scaleL; - const jlong _scaleI; + Node* _var; + jlong _scaleL; + jlong _scaleI; public: MemPointerSummand() : _var(nullptr), _scaleL(0), _scaleI(0) {} - MemPointerSummand(const Node* var, const jlong scaleL, const jlong scaleI) + MemPointerSummand(Node* var, const jlong scaleL, const jlong scaleI) : _var(var), _scaleL(scaleL), _scaleI(scaleI) { assert(_var != nullptr, "must have variable"); @@ -65,16 +65,37 @@ class MemPointerSimpleForm : public StackObj { MemPointerSummand _summands[SUMMANDS_SIZE]; jlong _con; +public: MemPointerSimpleForm() {} + + static MemPointerSimpleForm make_from_ConIL(Node* n, const jlong con); + static MemPointerSimpleForm make_from_AddSubILP(Node* n, const MemPointerSimpleForm* a, const MemPointerSimpleForm* b); + static MemPointerSimpleForm make_from_Mul(Node* n, const MemPointerSimpleForm* a, const jlong scale); + static MemPointerSimpleForm make_from_ConvI2L(Node* n, const MemPointerSimpleForm* a); }; // TODO class MemPointer : public StackObj { private: bool _is_valid; - Node* _mem; - + const MemNode* _mem; MemPointerSimpleForm _simple_form; + +public: + MemPointer(PhaseGVN* phase, const MemNode* mem) : + _is_valid(false), + _mem(mem) + { + assert(_mem->is_Store(), "only stores are supported"); + Node* pointer = mem->in(MemNode::Address); + _simple_form = parse_simple_form(pointer); + assert(false, "TODO"); + // _mem->memory_size(); + } + + static MemPointerSimpleForm parse_simple_form(Node* pointer); + + bool is_adjacent_to_and_before(const MemPointer& other) const; }; #endif // SHARE_OPTO_MEMPOINTER_HPP From 702efada174afcd878827641aa6df3f1c1a20a94 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 5 Jul 2024 12:41:16 +0200 Subject: [PATCH 04/89] refactor --- src/hotspot/share/opto/mempointer.cpp | 183 +++++++++++--------------- src/hotspot/share/opto/mempointer.hpp | 60 ++++++--- 2 files changed, 120 insertions(+), 123 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index f4d280d307921..5f1e870afa2d7 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -25,120 +25,95 @@ #include "opto/mempointer.hpp" #include "utilities/resourceHash.hpp" -MemPointerSimpleForm MemPointer::parse_simple_form(Node* pointer) { - ResourceMark rm; - ResourceHashtable idx_to_simple_form; - GrowableArray stack; +// DFS all-path traversal (i.e. with node repetitions), starting at the pointer: +MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { + assert(_worklist.is_empty(), "no prior parsing"); + assert(_summands.is_empty(), "no prior parsing"); - auto get_simple_form_of_input_or_push = [&](Node* n, int i) { - Node* in = n->in(i); - MemPointerSimpleForm* simple_form = idx_to_simple_form.get(in->_idx); - if (simple_form == nullptr) { - stack.push(in); - } - return simple_form; - }; + Node* pointer = _mem->in(MemNode::Address); + _worklist.push(MemPointerSummand(pointer, 1, 1)); - stack.push(pointer); - while (stack.is_nonempty()) { - Node* n = stack.top(); + int traversal_count = 0; + while (_worklist.is_nonempty()) { + if (traversal_count++ > 1000) { return MemPointerSimpleForm(); } // TODO invalid? + parse_sub_expression(_worklist.pop()); + } + return MemPointerSimpleForm(); // TODO build from internals +} - if (idx_to_simple_form.get(n->_idx) != nullptr) { - stack.pop(); // already processed elsewhere - continue; - } +void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { + Node* n = summand.node(); + jlong scaleL = summand.scaleL(); + jlong scaleI = summand.scaleI(); - n->dump(); + n->dump(); - int opc = n->Opcode(); - switch (opc) { - case Op_ConI: - case Op_ConL: - { - jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); - MemPointerSimpleForm f = MemPointerSimpleForm::make_from_ConIL(n, con); - idx_to_simple_form.put_when_absent(n->_idx, f); - stack.pop(); - continue; - } - case Op_AddP: - case Op_AddL: - case Op_AddI: - case Op_SubL: - case Op_SubI: - { - const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, (opc == Op_AddP) ? 2 : 1); - const MemPointerSimpleForm* b = get_simple_form_of_input_or_push(n, (opc == Op_AddP) ? 3 : 2); - if (a == nullptr || b == nullptr) { continue; } - MemPointerSimpleForm f = MemPointerSimpleForm::make_from_AddSubILP(n, a, b); - idx_to_simple_form.put_when_absent(n->_idx, f); - stack.pop(); - continue; + int opc = n->Opcode(); + switch (opc) { + case Op_ConI: + case Op_ConL: + { + jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); + _con += scaleL * scaleI * con; + // TODO problematic: int con and int scale could overflow??? or irrelevant? + return; + } + case Op_AddP: + case Op_AddL: + case Op_AddI: + case Op_SubL: + case Op_SubI: + { + // TODO check if we should decompose or not + Node* a = n->in((opc == Op_AddP) ? 2 : 1); + Node* b = n->in((opc == Op_AddP) ? 3 : 2); + _worklist.push(MemPointerSummand(a, scaleL, scaleI)); + // TODO figure out how to do subtraction, which scale to negate + _worklist.push(MemPointerSummand(b, scaleL, scaleI)); + return; + } + case Op_MulL: + case Op_MulI: + case Op_LShiftL: + case Op_LShiftI: + { + // TODO check if we should decompose or not + // Form must be linear: only multiplication with constants is allowed. + Node* in2 = n->in(2); + if (!in2->is_Con()) { break; } + jlong scale; + switch (opc) { + case Op_MulL: scale = in2->get_long(); break; + case Op_MulI: scale = in2->get_int(); break; + case Op_LShiftL: + case Op_LShiftI: + assert(false, "shift"); } - case Op_MulL: - case Op_MulI: - case Op_LShiftL: - case Op_LShiftI: - { - // Form must be linear: only multiplication with constants is allowed. - Node* in2 = n->in(2); - if (!in2->is_Con()) { break; } - jlong scale; - switch (opc) { - case Op_MulL: scale = in2->get_long(); break; - case Op_MulI: scale = in2->get_int(); break; - case Op_LShiftL: - case Op_LShiftI: - assert(false, "shift"); - } - // Scale cannot be too large: TODO make this a special method, maybe better threshold? - const jlong max_scale = 1 << 30; - if (scale > max_scale || scale < -max_scale) { break; } + // Scale cannot be too large: TODO make this a special method, maybe better threshold? + const jlong max_scale = 1 << 30; + if (scale > max_scale || scale < -max_scale) { break; } - const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, 1); - if (a == nullptr) { continue; } - MemPointerSimpleForm f = MemPointerSimpleForm::make_from_Mul(n, a, scale); - idx_to_simple_form.put_when_absent(n->_idx, f); - stack.pop(); - continue; - } - case Op_CastII: - case Op_CastLL: - case Op_CastX2P: - { - assert(false, "unary"); - break; - } - case Op_ConvI2L: - { - const MemPointerSimpleForm* a = get_simple_form_of_input_or_push(n, 1); - if (a == nullptr) { continue; } - MemPointerSimpleForm f = MemPointerSimpleForm::make_from_ConvI2L(n, a); - idx_to_simple_form.put_when_absent(n->_idx, f); - stack.pop(); - continue; - } + Node* a = n->in(1); + // TODO figure out which scale to change, check for total overflow??? + _worklist.push(MemPointerSummand(a, scaleL * scale, scaleI)); + return; + } + case Op_CastII: + case Op_CastLL: + case Op_CastX2P: + { + assert(false, "unary"); + break; + } + case Op_ConvI2L: + { + Node* a = n->in(1); + _worklist.push(MemPointerSummand(a, scaleL, scaleI)); + return; } - assert(false, "default"); } - return MemPointerSimpleForm(); -} - -MemPointerSimpleForm MemPointerSimpleForm::make_from_ConIL(Node* n, const jlong con) { - return MemPointerSimpleForm(); // TODO -} - -MemPointerSimpleForm MemPointerSimpleForm::make_from_AddSubILP(Node* n, const MemPointerSimpleForm* a, const MemPointerSimpleForm* b) { - return MemPointerSimpleForm(); // TODO -} - -MemPointerSimpleForm MemPointerSimpleForm::make_from_Mul(Node* n, const MemPointerSimpleForm* a, const jlong scale) { - return MemPointerSimpleForm(); // TODO -} - -MemPointerSimpleForm MemPointerSimpleForm::make_from_ConvI2L(Node* n, const MemPointerSimpleForm* a) { - return MemPointerSimpleForm(); // TODO + assert(false, "default"); } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index de8523d96b568..4cc4589cb0c7e 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -30,25 +30,29 @@ // TODO general description // Summand of a MemPointerSimpleForm. -// if var is a long (varL): -// s = scaleL * varL -// else, i.e. if var is a int (varI): -// s = scaleL * ConvI2L(scaleI * varI) +// if node is a long (nodeL): +// s = scaleL * nodeL +// else, i.e. if node is a int (nodeI): +// s = scaleL * ConvI2L(scaleI * nodeI) // class MemPointerSummand : public StackObj { -public: - Node* _var; +private: + Node* _node; jlong _scaleL; jlong _scaleI; public: - MemPointerSummand() : _var(nullptr), _scaleL(0), _scaleI(0) {} - MemPointerSummand(Node* var, const jlong scaleL, const jlong scaleI) - : _var(var), _scaleL(scaleL), _scaleI(scaleI) + MemPointerSummand() : _node(nullptr), _scaleL(0), _scaleI(0) {} + MemPointerSummand(Node* node, const jlong scaleL, const jlong scaleI) + : _node(node), _scaleL(scaleL), _scaleI(scaleI) { - assert(_var != nullptr, "must have variable"); + assert(_node != nullptr, "must have node"); assert(_scaleL != 0 && _scaleI != 0, "non-zero scale"); } + + Node* node() const { return _node; } + jlong scaleL() const { return _scaleL; } + jlong scaleI() const { return _scaleI; } }; // Simple form of the pointer sub-expression of "pointer". @@ -67,17 +71,36 @@ class MemPointerSimpleForm : public StackObj { public: MemPointerSimpleForm() {} +}; + +class MemPointerSimpleFormParser : public StackObj { +private: + const MemNode* _mem; + + // Internal data-structures for parsing. + GrowableArray _worklist; + GrowableArray _summands; + jlong _con; - static MemPointerSimpleForm make_from_ConIL(Node* n, const jlong con); - static MemPointerSimpleForm make_from_AddSubILP(Node* n, const MemPointerSimpleForm* a, const MemPointerSimpleForm* b); - static MemPointerSimpleForm make_from_Mul(Node* n, const MemPointerSimpleForm* a, const jlong scale); - static MemPointerSimpleForm make_from_ConvI2L(Node* n, const MemPointerSimpleForm* a); + // Resulting simple-form. + MemPointerSimpleForm _simple_form; + +public: + MemPointerSimpleFormParser(const MemNode* mem) : _mem(mem), _con(0) { + _simple_form = parse_simple_form(); + } + + const MemPointerSimpleForm simple_form() const { return _simple_form; } + +private: + MemPointerSimpleForm parse_simple_form(); + void parse_sub_expression(const MemPointerSummand summand); }; // TODO class MemPointer : public StackObj { private: - bool _is_valid; + bool _is_valid; // TODO needed? const MemNode* _mem; MemPointerSimpleForm _simple_form; @@ -87,14 +110,13 @@ class MemPointer : public StackObj { _mem(mem) { assert(_mem->is_Store(), "only stores are supported"); - Node* pointer = mem->in(MemNode::Address); - _simple_form = parse_simple_form(pointer); + ResourceMark rm; + MemPointerSimpleFormParser parser(_mem); + _simple_form = parser.simple_form(); assert(false, "TODO"); // _mem->memory_size(); } - static MemPointerSimpleForm parse_simple_form(Node* pointer); - bool is_adjacent_to_and_before(const MemPointer& other) const; }; From 52b00ff992edb3a3c7efeb7a1d692fb21175ae74 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 8 Jul 2024 17:28:01 +0200 Subject: [PATCH 05/89] shift and some printing --- src/hotspot/share/opto/mempointer.cpp | 18 +++++++++--- src/hotspot/share/opto/mempointer.hpp | 40 +++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 5f1e870afa2d7..7511883364aaa 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -38,6 +38,14 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { if (traversal_count++ > 1000) { return MemPointerSimpleForm(); } // TODO invalid? parse_sub_expression(_worklist.pop()); } + + for (int i = 0; i < _summands.length(); i++) { + MemPointerSummand summand = _summands.at(i); + summand.print(); + } + + tty->print_cr("con: %d", (int)_con); + return MemPointerSimpleForm(); // TODO build from internals } @@ -85,9 +93,8 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su switch (opc) { case Op_MulL: scale = in2->get_long(); break; case Op_MulI: scale = in2->get_int(); break; - case Op_LShiftL: - case Op_LShiftI: - assert(false, "shift"); + case Op_LShiftL: scale = 1 << in2->get_long(); break; // TODO check overflow! + case Op_LShiftI: scale = 1 << in2->get_int(); break; } // Scale cannot be too large: TODO make this a special method, maybe better threshold? const jlong max_scale = 1 << 30; @@ -113,7 +120,10 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su } } - assert(false, "default"); + // Default: could not parse the "summand" further, take it as one of the + // "terminal" summands. + // TODO wording of "terminal summands"? + _summands.push(summand); } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 4cc4589cb0c7e..6f58c3c49c374 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -38,7 +38,7 @@ class MemPointerSummand : public StackObj { private: Node* _node; - jlong _scaleL; + jlong _scaleL; // TODO make jint jlong _scaleI; public: @@ -53,6 +53,13 @@ class MemPointerSummand : public StackObj { Node* node() const { return _node; } jlong scaleL() const { return _scaleL; } jlong scaleI() const { return _scaleI; } + +#ifndef PRODUCT + void print() const { + tty->print(" MemPointerSummand: %d * %d * node: ", (int)_scaleL, (int)_scaleI); + _node->dump(); + } +#endif }; // Simple form of the pointer sub-expression of "pointer". @@ -63,14 +70,36 @@ class MemPointerSimpleForm : public StackObj { private: static const int SUMMANDS_SIZE = 10; // TODO good? - bool _is_valid; // the parsing succeeded Node* _pointer; // pointer node associated with this (sub)pointer MemPointerSummand _summands[SUMMANDS_SIZE]; - jlong _con; + jlong _con; // TODO make jint public: - MemPointerSimpleForm() {} + // Empty + MemPointerSimpleForm() : _pointer(nullptr), _con(0) {} + // Default: pointer = node + MemPointerSimpleForm(Node* node) : _pointer(node), _con(0) { + _summands[0] = MemPointerSummand(node, 1, 1); + } + +#ifndef PRODUCT + void print() const { + if (_pointer == nullptr) { + tty->print_cr("MemPointerSimpleForm empty."); + return; + } + tty->print("MemPointerSimpleForm for "); + _pointer->dump(); + tty->print(" con = %d", (int)_con); + for (int i = 0; i < SUMMANDS_SIZE; i++) { + const MemPointerSummand& summand = _summands[i]; + if (summand.node() != nullptr) { + summand.print(); + } + } + } +#endif }; class MemPointerSimpleFormParser : public StackObj { @@ -100,19 +129,18 @@ class MemPointerSimpleFormParser : public StackObj { // TODO class MemPointer : public StackObj { private: - bool _is_valid; // TODO needed? const MemNode* _mem; MemPointerSimpleForm _simple_form; public: MemPointer(PhaseGVN* phase, const MemNode* mem) : - _is_valid(false), _mem(mem) { assert(_mem->is_Store(), "only stores are supported"); ResourceMark rm; MemPointerSimpleFormParser parser(_mem); _simple_form = parser.simple_form(); + _simple_form.print(); assert(false, "TODO"); // _mem->memory_size(); } From 2fbd54636c3b9447c67c93d2408f11a06845fe85 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 9 Jul 2024 17:33:56 +0200 Subject: [PATCH 06/89] create a proper MemPointerSimpleForm from the summands --- src/hotspot/share/opto/mempointer.cpp | 4 ++-- src/hotspot/share/opto/mempointer.hpp | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 7511883364aaa..a2fcf5740e7cb 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -35,7 +35,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { int traversal_count = 0; while (_worklist.is_nonempty()) { - if (traversal_count++ > 1000) { return MemPointerSimpleForm(); } // TODO invalid? + if (traversal_count++ > 1000) { return MemPointerSimpleForm(pointer); } parse_sub_expression(_worklist.pop()); } @@ -46,7 +46,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { tty->print_cr("con: %d", (int)_con); - return MemPointerSimpleForm(); // TODO build from internals + return MemPointerSimpleForm::make(pointer, _summands, _con); } void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 6f58c3c49c374..7e553ab268045 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -83,6 +83,24 @@ class MemPointerSimpleForm : public StackObj { _summands[0] = MemPointerSummand(node, 1, 1); } +private: + MemPointerSimpleForm(Node* node, const GrowableArray& summands, const jlong con) + :_pointer(node), _con(con) { + assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); + for (int i = 0; i < summands.length(); i++) { + _summands[i] = summands.at(i); + } + } + +public: + static MemPointerSimpleForm make(Node* node, const GrowableArray& summands, const jlong con) { + if (summands.length() <= SUMMANDS_SIZE) { + return MemPointerSimpleForm(node, summands, con); + } else { + return MemPointerSimpleForm(node); + } + } + #ifndef PRODUCT void print() const { if (_pointer == nullptr) { @@ -91,7 +109,7 @@ class MemPointerSimpleForm : public StackObj { } tty->print("MemPointerSimpleForm for "); _pointer->dump(); - tty->print(" con = %d", (int)_con); + tty->print_cr(" con = %d", (int)_con); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; if (summand.node() != nullptr) { From fcc317fdb2cf0066b01d576367b3fed58cd38d2d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 9 Jul 2024 17:50:22 +0200 Subject: [PATCH 07/89] node -> variable renaming --- src/hotspot/share/opto/mempointer.cpp | 2 +- src/hotspot/share/opto/mempointer.hpp | 53 +++++++++++++++------------ 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index a2fcf5740e7cb..5d7b104fe2213 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -50,7 +50,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { } void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { - Node* n = summand.node(); + Node* n = summand.variable(); jlong scaleL = summand.scaleL(); jlong scaleI = summand.scaleI(); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 7e553ab268045..5f9a10b1ef3ab 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -30,34 +30,41 @@ // TODO general description // Summand of a MemPointerSimpleForm. -// if node is a long (nodeL): -// s = scaleL * nodeL -// else, i.e. if node is a int (nodeI): -// s = scaleL * ConvI2L(scaleI * nodeI) +// +// On 32-bit platforms, we trivially use 32-bit jint values for the address computation: +// s = scale * variable +// +// if variable is a long (variableL): +// s = scaleL * variableL +// else, i.e. if variable is a int (variableI): +// s = scaleL * ConvI2L(scaleI * variableI) +// +// As a matter of simplicity, we only allow jint scales, and the absolute +// value // class MemPointerSummand : public StackObj { private: - Node* _node; - jlong _scaleL; // TODO make jint - jlong _scaleI; + Node* _variable; + jint _scaleL; // TODO make jint + jint _scaleI; public: - MemPointerSummand() : _node(nullptr), _scaleL(0), _scaleI(0) {} - MemPointerSummand(Node* node, const jlong scaleL, const jlong scaleI) - : _node(node), _scaleL(scaleL), _scaleI(scaleI) + MemPointerSummand() : _variable(nullptr), _scaleL(0), _scaleI(0) {} + MemPointerSummand(Node* variable, const jlong scaleL, const jlong scaleI) + : _variable(variable), _scaleL(scaleL), _scaleI(scaleI) { - assert(_node != nullptr, "must have node"); + assert(_variable != nullptr, "must have variable"); assert(_scaleL != 0 && _scaleI != 0, "non-zero scale"); } - Node* node() const { return _node; } + Node* variable() const { return _variable; } jlong scaleL() const { return _scaleL; } jlong scaleI() const { return _scaleI; } #ifndef PRODUCT void print() const { - tty->print(" MemPointerSummand: %d * %d * node: ", (int)_scaleL, (int)_scaleI); - _node->dump(); + tty->print(" MemPointerSummand: %d * %d * variable: ", (int)_scaleL, (int)_scaleI); + _variable->dump(); } #endif }; @@ -78,14 +85,14 @@ class MemPointerSimpleForm : public StackObj { public: // Empty MemPointerSimpleForm() : _pointer(nullptr), _con(0) {} - // Default: pointer = node - MemPointerSimpleForm(Node* node) : _pointer(node), _con(0) { - _summands[0] = MemPointerSummand(node, 1, 1); + // Default: pointer = variable + MemPointerSimpleForm(Node* variable) : _pointer(variable), _con(0) { + _summands[0] = MemPointerSummand(variable, 1, 1); } private: - MemPointerSimpleForm(Node* node, const GrowableArray& summands, const jlong con) - :_pointer(node), _con(con) { + MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const jlong con) + :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { _summands[i] = summands.at(i); @@ -93,11 +100,11 @@ class MemPointerSimpleForm : public StackObj { } public: - static MemPointerSimpleForm make(Node* node, const GrowableArray& summands, const jlong con) { + static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const jlong con) { if (summands.length() <= SUMMANDS_SIZE) { - return MemPointerSimpleForm(node, summands, con); + return MemPointerSimpleForm(pointer, summands, con); } else { - return MemPointerSimpleForm(node); + return MemPointerSimpleForm(pointer); } } @@ -112,7 +119,7 @@ class MemPointerSimpleForm : public StackObj { tty->print_cr(" con = %d", (int)_con); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; - if (summand.node() != nullptr) { + if (summand.variable() != nullptr) { summand.print(); } } From f1d5c87218f540746a4fb0de6bb5674d863f0a9c Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 09:44:58 +0200 Subject: [PATCH 08/89] more work on scale --- src/hotspot/share/opto/mempointer.cpp | 12 ++++---- src/hotspot/share/opto/mempointer.hpp | 41 +++++++++++++++++---------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 5d7b104fe2213..510726461f62a 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -52,7 +52,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); jlong scaleL = summand.scaleL(); - jlong scaleI = summand.scaleI(); + jlong scale = summand.scale(); n->dump(); @@ -62,7 +62,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConL: { jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); - _con += scaleL * scaleI * con; + _con += scaleL * scale * con; // TODO problematic: int con and int scale could overflow??? or irrelevant? return; } @@ -75,9 +75,9 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su // TODO check if we should decompose or not Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); - _worklist.push(MemPointerSummand(a, scaleL, scaleI)); + _worklist.push(MemPointerSummand(a, scaleL, scale)); // TODO figure out how to do subtraction, which scale to negate - _worklist.push(MemPointerSummand(b, scaleL, scaleI)); + _worklist.push(MemPointerSummand(b, scaleL, scale)); return; } case Op_MulL: @@ -102,7 +102,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su Node* a = n->in(1); // TODO figure out which scale to change, check for total overflow??? - _worklist.push(MemPointerSummand(a, scaleL * scale, scaleI)); + _worklist.push(MemPointerSummand(a, scaleL * scale, scale)); return; } case Op_CastII: @@ -115,7 +115,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConvI2L: { Node* a = n->in(1); - _worklist.push(MemPointerSummand(a, scaleL, scaleI)); + _worklist.push(MemPointerSummand(a, scaleL, scale)); return; } } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 5f9a10b1ef3ab..1ec228a509804 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -32,38 +32,49 @@ // Summand of a MemPointerSimpleForm. // // On 32-bit platforms, we trivially use 32-bit jint values for the address computation: -// s = scale * variable // -// if variable is a long (variableL): -// s = scaleL * variableL -// else, i.e. if variable is a int (variableI): -// s = scaleL * ConvI2L(scaleI * variableI) +// s = scaleI * variable // 32-bit variable +// scale = scaleI // -// As a matter of simplicity, we only allow jint scales, and the absolute -// value +// On 64-bit platforms, we have a mix of 64-bit jlong and 32-bit jint values for the +// address computation: // +// s = scaleL * ConvI2L(scaleI * variable) // 32-bit variable +// scale = scaleL * scaleI +// +// s = scaleL * variable // 64-bit variable +// scale = scaleL +// +// For simplicity, we only allow 32-bit jint scales, where: +// +// abs(scale) < (1 << 30) +// +// This allows very high scales, but allows calculations with scale to +// avoid overflows. +// +// TODO generalization: final product only needs to use scale, not scaleL class MemPointerSummand : public StackObj { private: Node* _variable; - jint _scaleL; // TODO make jint - jint _scaleI; + jint _scaleL; // TODO make jint, only available on 64-bit??? + jint _scale; public: - MemPointerSummand() : _variable(nullptr), _scaleL(0), _scaleI(0) {} - MemPointerSummand(Node* variable, const jlong scaleL, const jlong scaleI) - : _variable(variable), _scaleL(scaleL), _scaleI(scaleI) + MemPointerSummand() : _variable(nullptr), _scaleL(0), _scale(0) {} + MemPointerSummand(Node* variable, const jlong scaleL, const jlong scale) + : _variable(variable), _scaleL(scaleL), _scale(scale) { assert(_variable != nullptr, "must have variable"); - assert(_scaleL != 0 && _scaleI != 0, "non-zero scale"); + assert(_scaleL != 0 && _scale != 0, "non-zero scale"); } Node* variable() const { return _variable; } jlong scaleL() const { return _scaleL; } - jlong scaleI() const { return _scaleI; } + jlong scale() const { return _scale; } #ifndef PRODUCT void print() const { - tty->print(" MemPointerSummand: %d * %d * variable: ", (int)_scaleL, (int)_scaleI); + tty->print(" MemPointerSummand: %d * %d * variable: ", (int)_scaleL, (int)_scale); _variable->dump(); } #endif From 9d7bf145031b48d5112a99d753b5a3f83d2ee4c2 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 11:45:03 +0200 Subject: [PATCH 09/89] 64-bit vs 32 bit --- src/hotspot/share/opto/mempointer.cpp | 45 ++++++++++++++++++--------- src/hotspot/share/opto/mempointer.hpp | 24 +++++++++----- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 510726461f62a..8ff979f1d6993 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -31,7 +31,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { assert(_summands.is_empty(), "no prior parsing"); Node* pointer = _mem->in(MemNode::Address); - _worklist.push(MemPointerSummand(pointer, 1, 1)); + _worklist.push(MemPointerSummand(pointer, 1 LP64_ONLY( COMMA 1 ))); int traversal_count = 0; while (_worklist.is_nonempty()) { @@ -51,8 +51,8 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); - jlong scaleL = summand.scaleL(); - jlong scale = summand.scale(); + LP64_ONLY( const jlong scaleL = summand.scaleL(); ) + const jlong scale = summand.scale(); n->dump(); @@ -62,7 +62,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConL: { jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); - _con += scaleL * scale * con; + _con += scale * con; // TODO problematic: int con and int scale could overflow??? or irrelevant? return; } @@ -75,9 +75,9 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su // TODO check if we should decompose or not Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); - _worklist.push(MemPointerSummand(a, scaleL, scale)); + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); // TODO figure out how to do subtraction, which scale to negate - _worklist.push(MemPointerSummand(b, scaleL, scale)); + _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); return; } case Op_MulL: @@ -89,20 +89,35 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su // Form must be linear: only multiplication with constants is allowed. Node* in2 = n->in(2); if (!in2->is_Con()) { break; } - jlong scale; + jlong factor; + LP64_ONLY( jlong factorL; ) switch (opc) { - case Op_MulL: scale = in2->get_long(); break; - case Op_MulI: scale = in2->get_int(); break; - case Op_LShiftL: scale = 1 << in2->get_long(); break; // TODO check overflow! - case Op_LShiftI: scale = 1 << in2->get_int(); break; + case Op_MulL: + factor = in2->get_long(); + LP64_ONLY( factorL = factor; ) + break; + case Op_MulI: + factor = in2->get_int(); + LP64_ONLY( factorL = 1; ) + break; + case Op_LShiftL: + factor = 1LL << in2->get_long(); + LP64_ONLY( factorL = factor; ) + break; // TODO check overflow! + case Op_LShiftI: + factor = 1LL << in2->get_int(); + LP64_ONLY( factorL = 1; ) + break; } // Scale cannot be too large: TODO make this a special method, maybe better threshold? - const jlong max_scale = 1 << 30; - if (scale > max_scale || scale < -max_scale) { break; } + const jlong max_factor = 1 << 30; + if (factor > max_factor || factor < -max_factor) { break; } Node* a = n->in(1); // TODO figure out which scale to change, check for total overflow??? - _worklist.push(MemPointerSummand(a, scaleL * scale, scale)); + const jint new_scale = scale * factor; // TODO check overflow + LP64_ONLY( const jint new_scaleL = scaleL * factorL; ) + _worklist.push(MemPointerSummand(a, new_scale LP64_ONLY( COMMA new_scaleL ))); return; } case Op_CastII: @@ -115,7 +130,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConvI2L: { Node* a = n->in(1); - _worklist.push(MemPointerSummand(a, scaleL, scale)); + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); return; } } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 1ec228a509804..39d5d789d85c4 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -56,25 +56,33 @@ class MemPointerSummand : public StackObj { private: Node* _variable; - jint _scaleL; // TODO make jint, only available on 64-bit??? jint _scale; + LP64_ONLY( jint _scaleL; ) public: - MemPointerSummand() : _variable(nullptr), _scaleL(0), _scale(0) {} - MemPointerSummand(Node* variable, const jlong scaleL, const jlong scale) - : _variable(variable), _scaleL(scaleL), _scale(scale) + MemPointerSummand() : + _variable(nullptr), + _scale(0) + LP64_ONLY( COMMA _scaleL(0) ) {} + MemPointerSummand(Node* variable, const jlong scale LP64_ONLY( COMMA const jlong scaleL )) : + _variable(variable), + _scale(scale) + LP64_ONLY( COMMA _scaleL(scaleL) ) { assert(_variable != nullptr, "must have variable"); - assert(_scaleL != 0 && _scale != 0, "non-zero scale"); + assert(_scale != 0, "non-zero scale"); + LP64_ONLY( assert(_scaleL != 0, "non-zero scale") ); } Node* variable() const { return _variable; } - jlong scaleL() const { return _scaleL; } jlong scale() const { return _scale; } + LP64_ONLY( jlong scaleL() const { return _scaleL; } ) #ifndef PRODUCT void print() const { - tty->print(" MemPointerSummand: %d * %d * variable: ", (int)_scaleL, (int)_scale); + tty->print(" MemPointerSummand: "); + LP64_ONLY( tty->print("(scaleL = %d) ", _scaleL); ) + tty->print(" MemPointerSummand: %d * variable: ", _scale); _variable->dump(); } #endif @@ -98,7 +106,7 @@ class MemPointerSimpleForm : public StackObj { MemPointerSimpleForm() : _pointer(nullptr), _con(0) {} // Default: pointer = variable MemPointerSimpleForm(Node* variable) : _pointer(variable), _con(0) { - _summands[0] = MemPointerSummand(variable, 1, 1); + _summands[0] = MemPointerSummand(variable, 1 LP64_ONLY( COMMA 1 )); } private: From bb2f2a33c019f4b6ae678150c7c86c862a741e1b Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 12:47:40 +0200 Subject: [PATCH 10/89] replace jlong with jint --- src/hotspot/share/opto/mempointer.cpp | 12 ++++++------ src/hotspot/share/opto/mempointer.hpp | 18 ++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 8ff979f1d6993..629091fa99a60 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -51,8 +51,8 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); - LP64_ONLY( const jlong scaleL = summand.scaleL(); ) - const jlong scale = summand.scale(); + LP64_ONLY( const jint scaleL = summand.scaleL(); ) + const jint scale = summand.scale(); n->dump(); @@ -61,7 +61,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConI: case Op_ConL: { - jlong con = (opc == Op_ConI) ? n->get_int() : n->get_long(); + jint con = (opc == Op_ConI) ? n->get_int() : n->get_long(); _con += scale * con; // TODO problematic: int con and int scale could overflow??? or irrelevant? return; @@ -89,8 +89,8 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su // Form must be linear: only multiplication with constants is allowed. Node* in2 = n->in(2); if (!in2->is_Con()) { break; } - jlong factor; - LP64_ONLY( jlong factorL; ) + jint factor; + LP64_ONLY( jint factorL; ) switch (opc) { case Op_MulL: factor = in2->get_long(); @@ -110,7 +110,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su break; } // Scale cannot be too large: TODO make this a special method, maybe better threshold? - const jlong max_factor = 1 << 30; + const jint max_factor = 1 << 30; if (factor > max_factor || factor < -max_factor) { break; } Node* a = n->in(1); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 39d5d789d85c4..3eb7a3dcc0f2c 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -27,8 +27,6 @@ #include "opto/memnode.hpp" -// TODO general description - // Summand of a MemPointerSimpleForm. // // On 32-bit platforms, we trivially use 32-bit jint values for the address computation: @@ -64,7 +62,7 @@ class MemPointerSummand : public StackObj { _variable(nullptr), _scale(0) LP64_ONLY( COMMA _scaleL(0) ) {} - MemPointerSummand(Node* variable, const jlong scale LP64_ONLY( COMMA const jlong scaleL )) : + MemPointerSummand(Node* variable, const jint scale LP64_ONLY( COMMA const jint scaleL )) : _variable(variable), _scale(scale) LP64_ONLY( COMMA _scaleL(scaleL) ) @@ -75,14 +73,14 @@ class MemPointerSummand : public StackObj { } Node* variable() const { return _variable; } - jlong scale() const { return _scale; } - LP64_ONLY( jlong scaleL() const { return _scaleL; } ) + jint scale() const { return _scale; } + LP64_ONLY( jint scaleL() const { return _scaleL; } ) #ifndef PRODUCT void print() const { tty->print(" MemPointerSummand: "); LP64_ONLY( tty->print("(scaleL = %d) ", _scaleL); ) - tty->print(" MemPointerSummand: %d * variable: ", _scale); + tty->print("%d * variable: ", _scale); _variable->dump(); } #endif @@ -99,7 +97,7 @@ class MemPointerSimpleForm : public StackObj { Node* _pointer; // pointer node associated with this (sub)pointer MemPointerSummand _summands[SUMMANDS_SIZE]; - jlong _con; // TODO make jint + jint _con; public: // Empty @@ -110,7 +108,7 @@ class MemPointerSimpleForm : public StackObj { } private: - MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const jlong con) + MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const jint con) :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { @@ -119,7 +117,7 @@ class MemPointerSimpleForm : public StackObj { } public: - static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const jlong con) { + static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const jint con) { if (summands.length() <= SUMMANDS_SIZE) { return MemPointerSimpleForm(pointer, summands, con); } else { @@ -153,7 +151,7 @@ class MemPointerSimpleFormParser : public StackObj { // Internal data-structures for parsing. GrowableArray _worklist; GrowableArray _summands; - jlong _con; + jint _con; // Resulting simple-form. MemPointerSimpleForm _simple_form; From adec9f779b8b52fc45394f4204c676bd388f4d11 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 12:56:02 +0200 Subject: [PATCH 11/89] add comment --- src/hotspot/share/opto/mempointer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 629091fa99a60..fab63a2288d88 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -56,6 +56,8 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su n->dump(); + // TODO make all get_long calls safe! + int opc = n->Opcode(); switch (opc) { case Op_ConI: From 4769cbf3777f0a15dd71930b2d8d9877e9464a82 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 18:09:54 +0200 Subject: [PATCH 12/89] introduce NoOverflowInt --- src/hotspot/share/opto/mempointer.cpp | 75 +++++++++++------ src/hotspot/share/opto/mempointer.hpp | 114 +++++++++++++++++++++----- 2 files changed, 142 insertions(+), 47 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index fab63a2288d88..a7d2d98c9ae69 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -31,7 +31,9 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { assert(_summands.is_empty(), "no prior parsing"); Node* pointer = _mem->in(MemNode::Address); - _worklist.push(MemPointerSummand(pointer, 1 LP64_ONLY( COMMA 1 ))); + + const NoOverflowInt one(1); + _worklist.push(MemPointerSummand(pointer, one LP64_ONLY( COMMA one ))); int traversal_count = 0; while (_worklist.is_nonempty()) { @@ -44,27 +46,43 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { summand.print(); } - tty->print_cr("con: %d", (int)_con); + tty->print("con: "); + _con.print(); + tty->cr(); + + // TODO gtest??? + // NoOverflowInt a(1 << 20); + // a.print(); tty->cr(); + // NoOverflowInt b(1LL << 33); + // b.print(); tty->cr(); + // NoOverflowInt c(55); + // NoOverflowInt d(22); + // NoOverflowInt e = c + d; + // e.print(); tty->cr(); + // NoOverflowInt f(max_jint); + // NoOverflowInt g(max_jint); + // NoOverflowInt h = f + g; + // h.print(); tty->cr(); return MemPointerSimpleForm::make(pointer, _summands, _con); } void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); - LP64_ONLY( const jint scaleL = summand.scaleL(); ) - const jint scale = summand.scale(); + const NoOverflowInt scale = summand.scale(); + LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) + const NoOverflowInt one(1); n->dump(); - // TODO make all get_long calls safe! - int opc = n->Opcode(); switch (opc) { case Op_ConI: case Op_ConL: { - jint con = (opc == Op_ConI) ? n->get_int() : n->get_long(); - _con += scale * con; + NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) + : NoOverflowInt(n->get_long()); + _con = _con + scale * con; // TODO problematic: int con and int scale could overflow??? or irrelevant? return; } @@ -87,39 +105,44 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_LShiftL: case Op_LShiftI: { - // TODO check if we should decompose or not // Form must be linear: only multiplication with constants is allowed. + Node* in1 = n->in(1); Node* in2 = n->in(2); if (!in2->is_Con()) { break; } - jint factor; - LP64_ONLY( jint factorL; ) + NoOverflowInt factor; + LP64_ONLY( NoOverflowInt factorL; ) switch (opc) { case Op_MulL: - factor = in2->get_long(); + factor = NoOverflowInt(in2->get_long()); LP64_ONLY( factorL = factor; ) break; case Op_MulI: - factor = in2->get_int(); - LP64_ONLY( factorL = 1; ) + factor = NoOverflowInt(in2->get_int()); + LP64_ONLY( factorL = one; ) break; case Op_LShiftL: - factor = 1LL << in2->get_long(); + factor = one << NoOverflowInt(in2->get_long()); LP64_ONLY( factorL = factor; ) - break; // TODO check overflow! + break; case Op_LShiftI: - factor = 1LL << in2->get_int(); - LP64_ONLY( factorL = 1; ) + factor = one << NoOverflowInt(in2->get_int()); + LP64_ONLY( factorL = one; ) break; } - // Scale cannot be too large: TODO make this a special method, maybe better threshold? - const jint max_factor = 1 << 30; - if (factor > max_factor || factor < -max_factor) { break; } - Node* a = n->in(1); - // TODO figure out which scale to change, check for total overflow??? - const jint new_scale = scale * factor; // TODO check overflow - LP64_ONLY( const jint new_scaleL = scaleL * factorL; ) - _worklist.push(MemPointerSummand(a, new_scale LP64_ONLY( COMMA new_scaleL ))); + // Accumulate scale. + NoOverflowInt new_scale = scale * factor; + LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) + + // Make sure abs(scale) is not larger than "1 << 30". + new_scale = new_scale.truncate_to_30_bits(); + LP64_ONLY( new_scaleL = new_scaleL.truncate_to_30_bits(); ) + + // If anything went wrong with the scale computation: bailout. + if (new_scale.is_NaN()) { break; } + LP64_ONLY( if (new_scaleL.is_NaN()) { break; } ) + + _worklist.push(MemPointerSummand(in1, new_scale LP64_ONLY( COMMA new_scaleL ))); return; } case Op_CastII: diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 3eb7a3dcc0f2c..c08027212918d 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -27,6 +27,70 @@ #include "opto/memnode.hpp" +// Wrapper around jint, which detects overflow. +// TODO consider moving to separate file, and have GTests? +class NoOverflowInt { +private: + bool _is_NaN; // overflow, uninitialized, etc. + jint _value; + +public: + // Default: NaN. + NoOverflowInt() : _is_NaN(true), _value(0) {} + + // Create from jlong (or jint) -> NaN if overflows jint. + explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { + jint trunc = (jint)value; + if ((jlong)trunc == value) { + _is_NaN = false; + _value = trunc; + } + } + + static NoOverflowInt make_NaN() { return NoOverflowInt(); } + + bool is_NaN() const { return _is_NaN; } + jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } + bool is_zero() const { return !is_NaN() && value() == 0; } + + friend NoOverflowInt operator+(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_add((jlong)a.value(), (jlong)b.value())); + } + + friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_multiply((jlong)a.value(), (jlong)b.value())); + } + + friend NoOverflowInt operator<<(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + jint shift = b.value(); + if (shift < 0 || shift > 31) { return make_NaN(); } + return NoOverflowInt(java_shift_left((jlong)a.value(), shift)); + } + + NoOverflowInt truncate_to_30_bits() const { + if (is_NaN()) { return make_NaN(); } + const jint max_value = 1 << 30; + if (value() > max_value || value() < -max_value) { return make_NaN(); } + return *this; + } + +#ifndef PRODUCT + void print() const { + if (is_NaN()) { + tty->print("NaN"); + } else { + tty->print("%d", value()); + } + } +#endif +}; + // Summand of a MemPointerSimpleForm. // // On 32-bit platforms, we trivially use 32-bit jint values for the address computation: @@ -43,7 +107,7 @@ // s = scaleL * variable // 64-bit variable // scale = scaleL // -// For simplicity, we only allow 32-bit jint scales, where: +// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: // // abs(scale) < (1 << 30) // @@ -54,33 +118,38 @@ class MemPointerSummand : public StackObj { private: Node* _variable; - jint _scale; - LP64_ONLY( jint _scaleL; ) + NoOverflowInt _scale; + LP64_ONLY( NoOverflowInt _scaleL; ) public: MemPointerSummand() : _variable(nullptr), - _scale(0) - LP64_ONLY( COMMA _scaleL(0) ) {} - MemPointerSummand(Node* variable, const jint scale LP64_ONLY( COMMA const jint scaleL )) : + _scale(NoOverflowInt::make_NaN()) + LP64_ONLY( COMMA _scaleL(NoOverflowInt::make_NaN()) ) {} + MemPointerSummand(Node* variable, const NoOverflowInt scale LP64_ONLY( COMMA const NoOverflowInt scaleL )) : _variable(variable), _scale(scale) LP64_ONLY( COMMA _scaleL(scaleL) ) { assert(_variable != nullptr, "must have variable"); - assert(_scale != 0, "non-zero scale"); - LP64_ONLY( assert(_scaleL != 0, "non-zero scale") ); + assert(!_scale.is_zero(), "non-zero scale"); + LP64_ONLY( assert(!_scaleL.is_zero(), "non-zero scale") ); } Node* variable() const { return _variable; } - jint scale() const { return _scale; } - LP64_ONLY( jint scaleL() const { return _scaleL; } ) + NoOverflowInt scale() const { return _scale; } + LP64_ONLY( NoOverflowInt scaleL() const { return _scaleL; } ) #ifndef PRODUCT void print() const { tty->print(" MemPointerSummand: "); - LP64_ONLY( tty->print("(scaleL = %d) ", _scaleL); ) - tty->print("%d * variable: ", _scale); +#ifdef _LP64 + tty->print("(scaleL = "); + _scaleL.print(); + tty->print(") "); +#endif + _scale.print(); + tty->print(" * variable: "); _variable->dump(); } #endif @@ -97,18 +166,19 @@ class MemPointerSimpleForm : public StackObj { Node* _pointer; // pointer node associated with this (sub)pointer MemPointerSummand _summands[SUMMANDS_SIZE]; - jint _con; + NoOverflowInt _con; public: // Empty - MemPointerSimpleForm() : _pointer(nullptr), _con(0) {} + MemPointerSimpleForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} // Default: pointer = variable - MemPointerSimpleForm(Node* variable) : _pointer(variable), _con(0) { - _summands[0] = MemPointerSummand(variable, 1 LP64_ONLY( COMMA 1 )); + MemPointerSimpleForm(Node* variable) : _pointer(variable), _con(NoOverflowInt(0)) { + const NoOverflowInt one(1); + _summands[0] = MemPointerSummand(variable, one LP64_ONLY( COMMA one )); } private: - MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const jint con) + MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { @@ -117,7 +187,7 @@ class MemPointerSimpleForm : public StackObj { } public: - static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const jint con) { + static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { if (summands.length() <= SUMMANDS_SIZE) { return MemPointerSimpleForm(pointer, summands, con); } else { @@ -133,7 +203,9 @@ class MemPointerSimpleForm : public StackObj { } tty->print("MemPointerSimpleForm for "); _pointer->dump(); - tty->print_cr(" con = %d", (int)_con); + tty->print(" con = "); + _con.print(); + tty->cr(); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; if (summand.variable() != nullptr) { @@ -151,13 +223,13 @@ class MemPointerSimpleFormParser : public StackObj { // Internal data-structures for parsing. GrowableArray _worklist; GrowableArray _summands; - jint _con; + NoOverflowInt _con; // Resulting simple-form. MemPointerSimpleForm _simple_form; public: - MemPointerSimpleFormParser(const MemNode* mem) : _mem(mem), _con(0) { + MemPointerSimpleFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { _simple_form = parse_simple_form(); } From 21048a7dd395c8c29c589bd6292f88901103424d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 10 Jul 2024 18:18:56 +0200 Subject: [PATCH 13/89] some bits --- src/hotspot/share/opto/mempointer.cpp | 2 ++ src/hotspot/share/opto/mempointer.hpp | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index a7d2d98c9ae69..82bf5b684aa8c 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -92,6 +92,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_SubL: case Op_SubI: { + // TODO check if we should decompose or not: int-overflow!!! // TODO check if we should decompose or not Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); @@ -105,6 +106,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_LShiftL: case Op_LShiftI: { + // TODO check if we should decompose or not: int-overflow!!! // Form must be linear: only multiplication with constants is allowed. Node* in1 = n->in(1); Node* in2 = n->in(2); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index c08027212918d..b8fe7e32cc707 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -254,9 +254,7 @@ class MemPointer : public StackObj { ResourceMark rm; MemPointerSimpleFormParser parser(_mem); _simple_form = parser.simple_form(); - _simple_form.print(); - assert(false, "TODO"); - // _mem->memory_size(); + _simple_form.print(); // TODO tracing??? } bool is_adjacent_to_and_before(const MemPointer& other) const; From 4a60797378f153d510c522a69613fe4f8fc79036 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 11 Jul 2024 10:37:05 +0200 Subject: [PATCH 14/89] handle sub --- src/hotspot/share/opto/mempointer.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 82bf5b684aa8c..d4fff8f709523 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -89,16 +89,31 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_AddP: case Op_AddL: case Op_AddI: + { + // TODO check if we should decompose or not: int-overflow!!! + Node* a = n->in((opc == Op_AddP) ? 2 : 1); + Node* b = n->in((opc == Op_AddP) ? 3 : 2); + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); + return; + } case Op_SubL: case Op_SubI: { // TODO check if we should decompose or not: int-overflow!!! - // TODO check if we should decompose or not Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); + + NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; + LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) + : scaleL; ) + + // If anything went wrong with the scale computation: bailout. + if (sub_scale.is_NaN()) { break; } + LP64_ONLY( if (sub_scaleL.is_NaN()) { break; } ) + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - // TODO figure out how to do subtraction, which scale to negate - _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(b, sub_scale LP64_ONLY( COMMA sub_scaleL ))); return; } case Op_MulL: From e116e1b57ccd63e1b50615036be9dd3033e340bf Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 12 Jul 2024 09:04:55 +0200 Subject: [PATCH 15/89] move from constructor to init --- src/hotspot/share/opto/mempointer.hpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index b8fe7e32cc707..2995dc7a9aa88 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -244,22 +244,26 @@ class MemPointerSimpleFormParser : public StackObj { class MemPointer : public StackObj { private: const MemNode* _mem; - MemPointerSimpleForm _simple_form; + const MemPointerSimpleForm _simple_form; public: + // TODO no need for phase? MemPointer(PhaseGVN* phase, const MemNode* mem) : - _mem(mem) + _mem(mem), + _simple_form(init_simple_form(_mem)) { - assert(_mem->is_Store(), "only stores are supported"); - ResourceMark rm; - MemPointerSimpleFormParser parser(_mem); - _simple_form = parser.simple_form(); _simple_form.print(); // TODO tracing??? } bool is_adjacent_to_and_before(const MemPointer& other) const; + +private: + static const MemPointerSimpleForm init_simple_form(const MemNode* mem) { + assert(mem->is_Store(), "only stores are supported"); + ResourceMark rm; + MemPointerSimpleFormParser parser(mem); + return parser.simple_form(); + } }; #endif // SHARE_OPTO_MEMPOINTER_HPP - - From b0e1b37e180d6675d3447be20e2e2a1525750aef Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 12 Jul 2024 16:03:22 +0200 Subject: [PATCH 16/89] MemPointerAliasing --- src/hotspot/share/opto/mempointer.hpp | 84 +++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 2995dc7a9aa88..491a14695e31b 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -216,6 +216,90 @@ class MemPointerSimpleForm : public StackObj { #endif }; +// Class to represent aliasing between two MemPointer. +class MemPointerAliasing { +public: + enum Aliasing { + Unknown, // Distance unknown. + // Example: two "int[]" with different variable index offsets. + // e.g. "array[i] = array[j]". + Never, // Can never alias. + // Example: "int[]" and "float[]". + // e.g. "intArray[i] = floatArray[i]". + Always, // Constant distance = p1 - p2. + // Example: The same address expression, except for a constant offset + // e.g. "array[i] = array[i+1]". + Maybe}; // Either "Never" (i.e. different memory objects) + // or "Always" (at constant distance). + // Example: "array1[i] = array2[i]": + // If at runtime "array1 != array2": cannot alias. + // If at runtime "array1 == array2": constant distance. +private: + const Aliasing _aliasing; + const jint _distance; + + MemPointerAliasing(const Aliasing aliasing, const jint distance) : + _aliasing(aliasing), + _distance(distance) + { + const jint max_distance = 1 << 30; + assert(_distance < max_distance && _distance > -max_distance, "safe distance"); + } + +public: + MemPointerAliasing() : MemPointerAliasing(Unknown, 0) {} + + static MemPointerAliasing make_unknown() { + return MemPointerAliasing(); + } + + static MemPointerAliasing make_never() { + return MemPointerAliasing(Never, 0); + } + + static MemPointerAliasing make_always(const jint distance) { + return MemPointerAliasing(Always, distance); + } + + static MemPointerAliasing make_maybe(const jint distance) { + return MemPointerAliasing(Maybe, distance); + } + + Aliasing aliasing() const { return _aliasing; } + bool has_distance() const { return _aliasing == Always || _aliasing == Maybe; } + jint distance() const { assert(has_distance(), "must have"); return _distance; } + + // Use case: exact aliasing and adjacency. + bool is_always_at_distance(const jint distance) { + return _aliasing == Always && _distance == distance; + } + + bool is_never_overlapping(const jint size1, const jint size2) { + assert(1 <= size1 && size1 <= 1024, "sane size"); + assert(1 <= size2 && size2 <= 1024, "sane size"); + + if (_aliasing == Unknown) { return false; } + if (_aliasing == Never) { return true; } + + // distance = p2 - p1 + const jint d = distance(); + return size1 <= d || // <==> size1 <= p2 - p1 <==> p1 + size1 <= p2 + size2 <= -d; // <==> size2 <= p1 - p2 <==> p2 + size2 <= p1 + } + +#ifndef PRODUCT + void print() const { + switch(_aliasing) { + case Unknown: tty->print("Unknown"); break; + case Never: tty->print("Never"); break; + case Always: tty->print("Always(%d)", _distance); break; + case Maybe: tty->print("Maybe(%d)", _distance); break; + default: ShouldNotReachHere(); + } + } +#endif +}; + class MemPointerSimpleFormParser : public StackObj { private: const MemNode* _mem; From c13b0f5d11a7860965ef12cc4e47d17aba2e284a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 12 Jul 2024 17:52:57 +0200 Subject: [PATCH 17/89] move some code, start with is_adjacent_to_and_before impl --- src/hotspot/share/opto/mempointer.cpp | 11 +- src/hotspot/share/opto/mempointer.hpp | 173 +++++++++++++------------- 2 files changed, 99 insertions(+), 85 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index d4fff8f709523..485ec45aa8ba0 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -183,7 +183,16 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su _summands.push(summand); } +MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other) const { + return MemPointerAliasing::make_unknown(); +} + bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { - return true; // TODO + const MemPointerAliasing aliasing = simple_form().get_aliasing_with(other.simple_form()); + tty->print_cr("MemPointer::is_adjacent_to_and_before"); + simple_form().print(); + other.simple_form().print(); + tty->print("Aliasing: "); aliasing.print(); tty->cr(); + return aliasing.is_always_at_distance(mem()->memory_size()); } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 491a14695e31b..71681faa6e979 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -91,6 +91,91 @@ class NoOverflowInt { #endif }; +// Class to represent aliasing between two MemPointer. +class MemPointerAliasing { +public: + enum Aliasing { + Unknown, // Distance unknown. + // Example: two "int[]" with different variable index offsets. + // e.g. "array[i] = array[j]". + Never, // Can never alias. + // Example: "int[]" and "float[]". + // e.g. "intArray[i] = floatArray[i]". + Always, // Constant distance = p1 - p2. + // Example: The same address expression, except for a constant offset + // e.g. "array[i] = array[i+1]". + Maybe}; // Either "Never" (i.e. different memory objects) + // or "Always" (at constant distance). + // Example: "array1[i] = array2[i]": + // If at runtime "array1 != array2": cannot alias. + // If at runtime "array1 == array2": constant distance. +private: + const Aliasing _aliasing; + const jint _distance; + + MemPointerAliasing(const Aliasing aliasing, const jint distance) : + _aliasing(aliasing), + _distance(distance) + { + const jint max_distance = 1 << 30; + assert(_distance < max_distance && _distance > -max_distance, "safe distance"); + } + +public: + MemPointerAliasing() : MemPointerAliasing(Unknown, 0) {} + + static MemPointerAliasing make_unknown() { + return MemPointerAliasing(); + } + + static MemPointerAliasing make_never() { + return MemPointerAliasing(Never, 0); + } + + static MemPointerAliasing make_always(const jint distance) { + return MemPointerAliasing(Always, distance); + } + + static MemPointerAliasing make_maybe(const jint distance) { + return MemPointerAliasing(Maybe, distance); + } + + Aliasing aliasing() const { return _aliasing; } + bool has_distance() const { return _aliasing == Always || _aliasing == Maybe; } + jint distance() const { assert(has_distance(), "must have"); return _distance; } + + // Use case: exact aliasing and adjacency. + bool is_always_at_distance(const jint distance) const { + return _aliasing == Always && _distance == distance; + } + +// TODO maybe not yet +// bool is_never_overlapping(const jint size1, const jint size2) { +// assert(1 <= size1 && size1 <= 1024, "sane size"); +// assert(1 <= size2 && size2 <= 1024, "sane size"); +// +// if (_aliasing == Unknown) { return false; } +// if (_aliasing == Never) { return true; } +// +// // distance = p2 - p1 +// const jint d = distance(); +// return size1 <= d || // <==> size1 <= p2 - p1 <==> p1 + size1 <= p2 +// size2 <= -d; // <==> size2 <= p1 - p2 <==> p2 + size2 <= p1 +// } + +#ifndef PRODUCT + void print() const { + switch(_aliasing) { + case Unknown: tty->print("Unknown"); break; + case Never: tty->print("Never"); break; + case Always: tty->print("Always(%d)", _distance); break; + case Maybe: tty->print("Maybe(%d)", _distance); break; + default: ShouldNotReachHere(); + } + } +#endif +}; + // Summand of a MemPointerSimpleForm. // // On 32-bit platforms, we trivially use 32-bit jint values for the address computation: @@ -195,6 +280,8 @@ class MemPointerSimpleForm : public StackObj { } } + MemPointerAliasing get_aliasing_with(const MemPointerSimpleForm& other) const; + #ifndef PRODUCT void print() const { if (_pointer == nullptr) { @@ -216,90 +303,6 @@ class MemPointerSimpleForm : public StackObj { #endif }; -// Class to represent aliasing between two MemPointer. -class MemPointerAliasing { -public: - enum Aliasing { - Unknown, // Distance unknown. - // Example: two "int[]" with different variable index offsets. - // e.g. "array[i] = array[j]". - Never, // Can never alias. - // Example: "int[]" and "float[]". - // e.g. "intArray[i] = floatArray[i]". - Always, // Constant distance = p1 - p2. - // Example: The same address expression, except for a constant offset - // e.g. "array[i] = array[i+1]". - Maybe}; // Either "Never" (i.e. different memory objects) - // or "Always" (at constant distance). - // Example: "array1[i] = array2[i]": - // If at runtime "array1 != array2": cannot alias. - // If at runtime "array1 == array2": constant distance. -private: - const Aliasing _aliasing; - const jint _distance; - - MemPointerAliasing(const Aliasing aliasing, const jint distance) : - _aliasing(aliasing), - _distance(distance) - { - const jint max_distance = 1 << 30; - assert(_distance < max_distance && _distance > -max_distance, "safe distance"); - } - -public: - MemPointerAliasing() : MemPointerAliasing(Unknown, 0) {} - - static MemPointerAliasing make_unknown() { - return MemPointerAliasing(); - } - - static MemPointerAliasing make_never() { - return MemPointerAliasing(Never, 0); - } - - static MemPointerAliasing make_always(const jint distance) { - return MemPointerAliasing(Always, distance); - } - - static MemPointerAliasing make_maybe(const jint distance) { - return MemPointerAliasing(Maybe, distance); - } - - Aliasing aliasing() const { return _aliasing; } - bool has_distance() const { return _aliasing == Always || _aliasing == Maybe; } - jint distance() const { assert(has_distance(), "must have"); return _distance; } - - // Use case: exact aliasing and adjacency. - bool is_always_at_distance(const jint distance) { - return _aliasing == Always && _distance == distance; - } - - bool is_never_overlapping(const jint size1, const jint size2) { - assert(1 <= size1 && size1 <= 1024, "sane size"); - assert(1 <= size2 && size2 <= 1024, "sane size"); - - if (_aliasing == Unknown) { return false; } - if (_aliasing == Never) { return true; } - - // distance = p2 - p1 - const jint d = distance(); - return size1 <= d || // <==> size1 <= p2 - p1 <==> p1 + size1 <= p2 - size2 <= -d; // <==> size2 <= p1 - p2 <==> p2 + size2 <= p1 - } - -#ifndef PRODUCT - void print() const { - switch(_aliasing) { - case Unknown: tty->print("Unknown"); break; - case Never: tty->print("Never"); break; - case Always: tty->print("Always(%d)", _distance); break; - case Maybe: tty->print("Maybe(%d)", _distance); break; - default: ShouldNotReachHere(); - } - } -#endif -}; - class MemPointerSimpleFormParser : public StackObj { private: const MemNode* _mem; @@ -339,6 +342,8 @@ class MemPointer : public StackObj { _simple_form.print(); // TODO tracing??? } + const MemNode* mem() const { return _mem; } + const MemPointerSimpleForm simple_form() const { return _simple_form; } bool is_adjacent_to_and_before(const MemPointer& other) const; private: From 6aff01f22c83cb85e1d84318171170333cdbfcc5 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 12 Jul 2024 18:19:37 +0200 Subject: [PATCH 18/89] MemPointerSimpleForm::get_aliasing_with --- src/hotspot/share/opto/mempointer.cpp | 18 ++++++++++++++- src/hotspot/share/opto/mempointer.hpp | 33 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 485ec45aa8ba0..89f4b17887f6b 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -184,7 +184,23 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su } MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other) const { - return MemPointerAliasing::make_unknown(); + // Check if all summands are the same: + for (uint i = 0; i < SUMMANDS_SIZE; i++) { + const MemPointerSummand s1 = summands_at(i); + const MemPointerSummand s2 = other.summands_at(i); + if (s1 != s2) { + return MemPointerAliasing::make_unknown(); + } + } + + // Compute distance: + NoOverflowInt distance = other.con() - con(); + distance = distance.truncate_to_30_bits(); + if (distance.is_NaN()) { + return MemPointerAliasing::make_unknown(); + } + + return MemPointerAliasing::make_always(distance.value()); } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 71681faa6e979..af0d4f9c1b6cd 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -59,6 +59,12 @@ class NoOverflowInt { return NoOverflowInt(java_add((jlong)a.value(), (jlong)b.value())); } + friend NoOverflowInt operator-(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_subtract((jlong)a.value(), (jlong)b.value())); + } + friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { if (a.is_NaN()) { return make_NaN(); } if (b.is_NaN()) { return make_NaN(); } @@ -73,6 +79,12 @@ class NoOverflowInt { return NoOverflowInt(java_shift_left((jlong)a.value(), shift)); } + friend bool operator==(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + return a.value() == b.value(); + } + NoOverflowInt truncate_to_30_bits() const { if (is_NaN()) { return make_NaN(); } const jint max_value = 1 << 30; @@ -109,6 +121,7 @@ class MemPointerAliasing { // Example: "array1[i] = array2[i]": // If at runtime "array1 != array2": cannot alias. // If at runtime "array1 == array2": constant distance. + // TODO consider to simplify for MergeStores...? private: const Aliasing _aliasing; const jint _distance; @@ -225,6 +238,19 @@ class MemPointerSummand : public StackObj { NoOverflowInt scale() const { return _scale; } LP64_ONLY( NoOverflowInt scaleL() const { return _scaleL; } ) + friend bool operator==(const MemPointerSummand a, const MemPointerSummand b) { + // Both "null" -> equal. + if (a.variable() == nullptr && b.variable() == nullptr) { return true; } + + // Same variable and scale? + if (a.variable() != b.variable()) { return false; } + return a.scale() == b.scale(); + } + + friend bool operator!=(const MemPointerSummand a, const MemPointerSummand b) { + return !(a == b); + } + #ifndef PRODUCT void print() const { tty->print(" MemPointerSummand: "); @@ -282,6 +308,13 @@ class MemPointerSimpleForm : public StackObj { MemPointerAliasing get_aliasing_with(const MemPointerSimpleForm& other) const; + const MemPointerSummand summands_at(const uint i) const { + assert(i < SUMMANDS_SIZE, "in bounds"); + return _summands[i]; + } + + const NoOverflowInt con() const { return _con; } + #ifndef PRODUCT void print() const { if (_pointer == nullptr) { From da2d6d3789b2c4ba369d9c1e9d32c5cf2460e367 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 15 Jul 2024 09:03:04 +0200 Subject: [PATCH 19/89] casts --- src/hotspot/share/opto/mempointer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 89f4b17887f6b..e4395b4035771 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -165,10 +165,6 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_CastII: case Op_CastLL: case Op_CastX2P: - { - assert(false, "unary"); - break; - } case Op_ConvI2L: { Node* a = n->in(1); From 8f3faac742988faa605ad90033e6f6e0c3e4aaf3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 15 Jul 2024 09:40:30 +0200 Subject: [PATCH 20/89] work on tests now --- src/hotspot/share/opto/mempointer.cpp | 26 +++++++++---------- src/hotspot/share/opto/mempointer.hpp | 2 +- .../jtreg/compiler/c2/TestMergeStores.java | 9 +++---- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index e4395b4035771..1b59c8d589969 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -41,14 +41,14 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { parse_sub_expression(_worklist.pop()); } - for (int i = 0; i < _summands.length(); i++) { - MemPointerSummand summand = _summands.at(i); - summand.print(); - } + // for (int i = 0; i < _summands.length(); i++) { + // MemPointerSummand summand = _summands.at(i); + // summand.print(); + // } - tty->print("con: "); - _con.print(); - tty->cr(); + // tty->print("con: "); + // _con.print(); + // tty->cr(); // TODO gtest??? // NoOverflowInt a(1 << 20); @@ -73,7 +73,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) const NoOverflowInt one(1); - n->dump(); + // n->dump(); int opc = n->Opcode(); switch (opc) { @@ -138,7 +138,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su LP64_ONLY( factorL = one; ) break; case Op_LShiftL: - factor = one << NoOverflowInt(in2->get_long()); + factor = one << NoOverflowInt(in2->get_int()); LP64_ONLY( factorL = factor; ) break; case Op_LShiftI: @@ -201,10 +201,10 @@ MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpl bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { const MemPointerAliasing aliasing = simple_form().get_aliasing_with(other.simple_form()); - tty->print_cr("MemPointer::is_adjacent_to_and_before"); - simple_form().print(); - other.simple_form().print(); - tty->print("Aliasing: "); aliasing.print(); tty->cr(); + // tty->print_cr("MemPointer::is_adjacent_to_and_before"); + // simple_form().print(); + // other.simple_form().print(); + // tty->print("Aliasing: "); aliasing.print(); tty->cr(); return aliasing.is_always_at_distance(mem()->memory_size()); } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index af0d4f9c1b6cd..ef970a1d11ac7 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -372,7 +372,7 @@ class MemPointer : public StackObj { _mem(mem), _simple_form(init_simple_form(_mem)) { - _simple_form.print(); // TODO tracing??? + // _simple_form.print(); // TODO tracing??? } const MemNode* mem() const { return _mem; } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index a94004d8e26c3..a5a184c6fe0e9 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -33,7 +33,7 @@ /* * @test - * @bug 8318446 8331054 8331311 + * @bug 8318446 8331054 8331311 8335392 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / @@ -42,7 +42,7 @@ /* * @test - * @bug 8318446 8331054 8331311 + * @bug 8318446 8331054 8331311 8335392 * @summary Test merging of consecutive stores * @modules java.base/jdk.internal.misc * @library /test/lib / @@ -611,9 +611,8 @@ static Object[] test1e(byte[] a) { } @Test - // Disabled by JDK-8335390, to be enabled again by JDK-8335392. - // @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, - // applyIf = {"UseUnalignedAccesses", "true"}) + @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test1f(byte[] a) { UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0, (byte)0xbe); UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1, (byte)0xba); From fbbb1275cdde28d391a47ffe4a3983fa4e4bfc1a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 15 Jul 2024 15:40:57 +0200 Subject: [PATCH 21/89] wip fuzzing --- .../compiler/c2/TestMergeStoresFuzzer.java | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java new file mode 100644 index 0000000000000..fc7e94613bf46 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2; + +/* + * @test + * @bug 8318446 8335392 + * @summary Test merging of consecutive stores, and more specifically the MemPointer. + * @modules java.base/jdk.internal.misc + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresFuzzer + */ + +import javax.tools.Diagnostic; +import javax.tools.DiagnosticCollector; +import javax.tools.JavaCompiler; +import javax.tools.JavaCompiler.CompilationTask; +import javax.tools.JavaFileObject; +import javax.tools.SimpleJavaFileObject; +import javax.tools.ToolProvider; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.reflect.InvocationTargetException; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; + +import compiler.lib.ir_framework.*; + +public class TestMergeStoresFuzzer { + public static void main(String args[]) throws IOException { + JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); + DiagnosticCollector diagnostics = new DiagnosticCollector(); + + StringWriter writer = new StringWriter(); + PrintWriter out = new PrintWriter(writer); + out.println("import compiler.lib.ir_framework.*;"); + out.println(""); + out.println("public class HelloWorld {"); + out.println(" public static void main(String args[]) {"); + out.println(" System.out.println(\"This is in another java file\");"); + out.println(" TestFramework.run(HelloWorld.class);"); + out.println(" System.out.println(\"Done with IR framework.\");"); + out.println(" }"); + out.println(""); + out.println(" @Test"); + out.println(" static void test() {"); + out.println(" throw new RuntimeException(\"xyz\");"); + out.println(" }"); + out.println("}"); + out.close(); + JavaFileObject file = new JavaSourceFromString("HelloWorld", writer.toString()); + + Iterable compilationUnits = Arrays.asList(file); + List optionList = new ArrayList(); + optionList.add("-classpath"); + optionList.add(System.getProperty("test.classes")); + CompilationTask task = compiler.getTask(null, null, diagnostics, optionList, null, compilationUnits); + + boolean success = task.call(); + for (Diagnostic diagnostic : diagnostics.getDiagnostics()) { + System.out.println(diagnostic.getCode()); + System.out.println(diagnostic.getKind()); + System.out.println(diagnostic.getPosition()); + System.out.println(diagnostic.getStartPosition()); + System.out.println(diagnostic.getEndPosition()); + System.out.println(diagnostic.getSource()); + System.out.println(diagnostic.getMessage(null)); + } + + if (success) { + System.out.println("Compilation successfull, invoking test..."); + try { + ClassLoader sysLoader = ClassLoader.getSystemClassLoader(); + // Classpath for all included classes (e.g. IR Framework). + URL[] urls = new URL[] { new File("").toURI().toURL(), + new File(System.getProperty("test.classes")).toURI().toURL()}; + URLClassLoader classLoader = URLClassLoader.newInstance(urls, sysLoader); + Class.forName("HelloWorld", true, classLoader).getDeclaredMethod("main", new Class[] { String[].class }).invoke(null, new Object[] { null }); + + } catch (ClassNotFoundException e) { + throw new RuntimeException("Class not found:", e); + } catch (NoSuchMethodException e) { + throw new RuntimeException("No such method:", e); + } catch (IllegalAccessException e) { + throw new RuntimeException("Illegal access:", e); + } catch (InvocationTargetException e) { + throw new RuntimeException("Invocation target:", e); + } + + System.out.println("Invocation successful."); + } else { + System.out.println("Compilation failed."); + throw new RuntimeException("Compilation failed."); + } + } +} + +class JavaSourceFromString extends SimpleJavaFileObject { + final String code; + + JavaSourceFromString(String name, String code) { + super(URI.create("string:///" + name.replace('.','/') + Kind.SOURCE.extension), Kind.SOURCE); + this.code = code; + } + + @Override + public CharSequence getCharContent(boolean ignoreEncodingErrors) { + return code; + } +} From 4add54d1e32cc27d2a5f1ecd1a6eda1b09c15837 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 15 Jul 2024 16:24:58 +0200 Subject: [PATCH 22/89] make slicker --- test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java index fc7e94613bf46..4ed819a879db6 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java @@ -51,8 +51,6 @@ import java.util.ArrayList; import java.util.List; -import compiler.lib.ir_framework.*; - public class TestMergeStoresFuzzer { public static void main(String args[]) throws IOException { JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); @@ -97,11 +95,10 @@ public static void main(String args[]) throws IOException { if (success) { System.out.println("Compilation successfull, invoking test..."); try { - ClassLoader sysLoader = ClassLoader.getSystemClassLoader(); // Classpath for all included classes (e.g. IR Framework). URL[] urls = new URL[] { new File("").toURI().toURL(), new File(System.getProperty("test.classes")).toURI().toURL()}; - URLClassLoader classLoader = URLClassLoader.newInstance(urls, sysLoader); + URLClassLoader classLoader = URLClassLoader.newInstance(urls); Class.forName("HelloWorld", true, classLoader).getDeclaredMethod("main", new Class[] { String[].class }).invoke(null, new Object[] { null }); } catch (ClassNotFoundException e) { From 39a6f4c97ec9ec7974c7ed14a40e40d10888603e Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 12 Aug 2024 12:24:09 +0200 Subject: [PATCH 23/89] rm test stub --- .../compiler/c2/TestMergeStoresFuzzer.java | 134 ------------------ 1 file changed, 134 deletions(-) delete mode 100644 test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java deleted file mode 100644 index 4ed819a879db6..0000000000000 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresFuzzer.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package compiler.c2; - -/* - * @test - * @bug 8318446 8335392 - * @summary Test merging of consecutive stores, and more specifically the MemPointer. - * @modules java.base/jdk.internal.misc - * @library /test/lib / - * @run driver compiler.c2.TestMergeStoresFuzzer - */ - -import javax.tools.Diagnostic; -import javax.tools.DiagnosticCollector; -import javax.tools.JavaCompiler; -import javax.tools.JavaCompiler.CompilationTask; -import javax.tools.JavaFileObject; -import javax.tools.SimpleJavaFileObject; -import javax.tools.ToolProvider; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.lang.reflect.InvocationTargetException; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.List; - -public class TestMergeStoresFuzzer { - public static void main(String args[]) throws IOException { - JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); - DiagnosticCollector diagnostics = new DiagnosticCollector(); - - StringWriter writer = new StringWriter(); - PrintWriter out = new PrintWriter(writer); - out.println("import compiler.lib.ir_framework.*;"); - out.println(""); - out.println("public class HelloWorld {"); - out.println(" public static void main(String args[]) {"); - out.println(" System.out.println(\"This is in another java file\");"); - out.println(" TestFramework.run(HelloWorld.class);"); - out.println(" System.out.println(\"Done with IR framework.\");"); - out.println(" }"); - out.println(""); - out.println(" @Test"); - out.println(" static void test() {"); - out.println(" throw new RuntimeException(\"xyz\");"); - out.println(" }"); - out.println("}"); - out.close(); - JavaFileObject file = new JavaSourceFromString("HelloWorld", writer.toString()); - - Iterable compilationUnits = Arrays.asList(file); - List optionList = new ArrayList(); - optionList.add("-classpath"); - optionList.add(System.getProperty("test.classes")); - CompilationTask task = compiler.getTask(null, null, diagnostics, optionList, null, compilationUnits); - - boolean success = task.call(); - for (Diagnostic diagnostic : diagnostics.getDiagnostics()) { - System.out.println(diagnostic.getCode()); - System.out.println(diagnostic.getKind()); - System.out.println(diagnostic.getPosition()); - System.out.println(diagnostic.getStartPosition()); - System.out.println(diagnostic.getEndPosition()); - System.out.println(diagnostic.getSource()); - System.out.println(diagnostic.getMessage(null)); - } - - if (success) { - System.out.println("Compilation successfull, invoking test..."); - try { - // Classpath for all included classes (e.g. IR Framework). - URL[] urls = new URL[] { new File("").toURI().toURL(), - new File(System.getProperty("test.classes")).toURI().toURL()}; - URLClassLoader classLoader = URLClassLoader.newInstance(urls); - Class.forName("HelloWorld", true, classLoader).getDeclaredMethod("main", new Class[] { String[].class }).invoke(null, new Object[] { null }); - - } catch (ClassNotFoundException e) { - throw new RuntimeException("Class not found:", e); - } catch (NoSuchMethodException e) { - throw new RuntimeException("No such method:", e); - } catch (IllegalAccessException e) { - throw new RuntimeException("Illegal access:", e); - } catch (InvocationTargetException e) { - throw new RuntimeException("Invocation target:", e); - } - - System.out.println("Invocation successful."); - } else { - System.out.println("Compilation failed."); - throw new RuntimeException("Compilation failed."); - } - } -} - -class JavaSourceFromString extends SimpleJavaFileObject { - final String code; - - JavaSourceFromString(String name, String code) { - super(URI.create("string:///" + name.replace('.','/') + Kind.SOURCE.extension), Kind.SOURCE); - this.code = code; - } - - @Override - public CharSequence getCharContent(boolean ignoreEncodingErrors) { - return code; - } -} From 37de8646284d894229e351312892faa618ffa824 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 12 Aug 2024 14:39:25 +0200 Subject: [PATCH 24/89] add another test case --- .../c2/TestMergeStoresUnsafeArrayPointer.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index dbfdfe6895766..6ee4249f938c6 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -52,6 +52,7 @@ public class TestMergeStoresUnsafeArrayPointer { static final long ANCHOR = BYTE_SIZE / 2; static int four = 4; + static int max_int = Integer.MAX_VALUE; public static void main(String[] args) { System.out.println("Allocate big array of SIZE = " + SIZE); @@ -95,6 +96,23 @@ public static void main(String[] args) { } } + val = 0; + System.out.println("test3"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test3(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test3 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -129,4 +147,11 @@ static void test2(int[] a, long anchor) { UNSAFE.putInt(a, base + 0 + (long)(four + Integer.MAX_VALUE), 0x42424242); UNSAFE.putInt(a, base + Integer.MAX_VALUE + (long)(four + 4 ), 0x66666666); } + + // Test: if MergeStores is applied this can lead to wrong results + static void test3(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(max_int + 0), 0x42424242); + UNSAFE.putInt(a, base + (long)(max_int + 4), 0x66666666); + } } From dcb5e96b48353e8f9aae010137dcd733fa0559d1 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 12 Aug 2024 17:32:06 +0200 Subject: [PATCH 25/89] Handle AddI overflow --- src/hotspot/share/opto/mempointer.cpp | 35 ++++++++++++++++++++++++++- src/hotspot/share/opto/mempointer.hpp | 16 ++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 1b59c8d589969..42a56b5fd6acb 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -32,6 +32,8 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { Node* pointer = _mem->in(MemNode::Address); + // pointer->dump_bfs(4,0,"#"); + const NoOverflowInt one(1); _worklist.push(MemPointerSummand(pointer, one LP64_ONLY( COMMA one ))); @@ -90,7 +92,8 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_AddL: case Op_AddI: { - // TODO check if we should decompose or not: int-overflow!!! + LP64_ONLY( if (opc == Op_AddI && !is_safe_from_int_overflow(scaleL)) { break; } ) + Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); @@ -179,6 +182,36 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su _summands.push(summand); } +#ifdef _LP64 +bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const NoOverflowInt scaleL) { + // TODO needed? + if (scaleL.is_NaN()) { + assert(false, "scaleL must not be NaN"); + return false; + } + + const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); + if (ary_ptr_t != nullptr) { + // Array accesses that are not Unsafe always have a RangeCheck which ensures + // that there is no int overflow. + if (!_mem->is_unsafe_access()) { + return true; + } + + // TODO + BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); + if (is_java_primitive(array_element_bt)) { + NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt)); + if (scaleL.is_multiple_of(array_element_size_in_bytes)) { + return true; + } + } + } + + return false; +} +#endif + MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other) const { // Check if all summands are the same: for (uint i = 0; i < SUMMANDS_SIZE; i++) { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index ef970a1d11ac7..4a599b4d87a33 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -92,6 +92,21 @@ class NoOverflowInt { return *this; } + NoOverflowInt abs() const { + if (is_NaN()) { return make_NaN(); } + if (value() >= 0) { return *this; } + return NoOverflowInt(0) - *this; + } + + bool is_multiple_of(const NoOverflowInt other) const { + NoOverflowInt a = this->abs(); + NoOverflowInt b = other.abs(); + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + if (b.is_zero()) { return false; } + return a.value() % b.value() == 0; + } + #ifndef PRODUCT void print() const { if (is_NaN()) { @@ -358,6 +373,7 @@ class MemPointerSimpleFormParser : public StackObj { private: MemPointerSimpleForm parse_simple_form(); void parse_sub_expression(const MemPointerSummand summand); + LP64_ONLY( bool is_safe_from_int_overflow(const NoOverflowInt scaleL); ) }; // TODO From 1414e6dd768cc3351954a95a3682b493b784b9bf Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 12 Aug 2024 17:51:57 +0200 Subject: [PATCH 26/89] SubI test --- .../c2/TestMergeStoresUnsafeArrayPointer.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index 6ee4249f938c6..1516684134a79 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -53,6 +53,7 @@ public class TestMergeStoresUnsafeArrayPointer { static int four = 4; static int max_int = Integer.MAX_VALUE; + static int min_int = Integer.MIN_VALUE; public static void main(String[] args) { System.out.println("Allocate big array of SIZE = " + SIZE); @@ -113,6 +114,40 @@ public static void main(String[] args) { } } + val = 0; + System.out.println("test4"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test4(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test4 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + + val = 0; + System.out.println("test5"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test5(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test5 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -149,9 +184,26 @@ static void test2(int[] a, long anchor) { } // Test: if MergeStores is applied this can lead to wrong results + // -> AddI needs overflow check. static void test3(int[] a, long anchor) { long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; UNSAFE.putInt(a, base + (long)(max_int + 0), 0x42424242); UNSAFE.putInt(a, base + (long)(max_int + 4), 0x66666666); } + + // Test: "max_int - four" cannot be parsed further, but would not make a difference here. + static void test4(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(min_int - four) + 0, 0x42424242); + UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); + } + + + // Test: if MergeStores is applied this can lead to wrong results + // -> SubI needs overflow check. + static void test5(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(min_int) - (long)(four) + 0, 0x42424242); // no overflow + UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); // overflow + } } From 7cfe51ea490b1058ea92bf85cdd8731b35e886b9 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 12 Aug 2024 17:53:40 +0200 Subject: [PATCH 27/89] fix SubI parsing --- src/hotspot/share/opto/mempointer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 42a56b5fd6acb..23b32f5a92aa8 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -103,7 +103,8 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_SubL: case Op_SubI: { - // TODO check if we should decompose or not: int-overflow!!! + LP64_ONLY( if (opc == Op_SubI && !is_safe_from_int_overflow(scaleL)) { break; } ) + Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); From 6efccfdaef59fec0f52c89d6c461aa3c4fbf571c Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 09:14:11 +0200 Subject: [PATCH 28/89] refactor is_safe_from_int_overflow --- src/hotspot/share/opto/mempointer.cpp | 215 ++++++++++-------- src/hotspot/share/opto/mempointer.hpp | 3 +- .../c2/TestMergeStoresUnsafeArrayPointer.java | 1 - 3 files changed, 120 insertions(+), 99 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 23b32f5a92aa8..09e13d4dc7c99 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -75,116 +75,137 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) const NoOverflowInt one(1); - // n->dump(); - int opc = n->Opcode(); - switch (opc) { + if (is_safe_from_int_overflow(opc LP64_ONLY( COMMA scaleL ))) { + switch (opc) { + case Op_ConI: + case Op_ConL: + { + NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) + : NoOverflowInt(n->get_long()); + _con = _con + scale * con; + // TODO problematic: int con and int scale could overflow??? or irrelevant? + return; + } + case Op_AddP: + case Op_AddL: + case Op_AddI: + { + Node* a = n->in((opc == Op_AddP) ? 2 : 1); + Node* b = n->in((opc == Op_AddP) ? 3 : 2); + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); + return; + } + case Op_SubL: + case Op_SubI: + { + Node* a = n->in((opc == Op_AddP) ? 2 : 1); + Node* b = n->in((opc == Op_AddP) ? 3 : 2); + + NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; + LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) + : scaleL; ) + + // If anything went wrong with the scale computation: bailout. + if (sub_scale.is_NaN()) { break; } + LP64_ONLY( if (sub_scaleL.is_NaN()) { break; } ) + + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(b, sub_scale LP64_ONLY( COMMA sub_scaleL ))); + return; + } + case Op_MulL: + case Op_MulI: + case Op_LShiftL: + case Op_LShiftI: + { + // TODO check if we should decompose or not: int-overflow!!! + // Form must be linear: only multiplication with constants is allowed. + Node* in1 = n->in(1); + Node* in2 = n->in(2); + if (!in2->is_Con()) { break; } + NoOverflowInt factor; + LP64_ONLY( NoOverflowInt factorL; ) + switch (opc) { + case Op_MulL: + factor = NoOverflowInt(in2->get_long()); + LP64_ONLY( factorL = factor; ) + break; + case Op_MulI: + factor = NoOverflowInt(in2->get_int()); + LP64_ONLY( factorL = one; ) + break; + case Op_LShiftL: + factor = one << NoOverflowInt(in2->get_int()); + LP64_ONLY( factorL = factor; ) + break; + case Op_LShiftI: + factor = one << NoOverflowInt(in2->get_int()); + LP64_ONLY( factorL = one; ) + break; + } + + // Accumulate scale. + NoOverflowInt new_scale = scale * factor; + LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) + + // Make sure abs(scale) is not larger than "1 << 30". + new_scale = new_scale.truncate_to_30_bits(); + LP64_ONLY( new_scaleL = new_scaleL.truncate_to_30_bits(); ) + + // If anything went wrong with the scale computation: bailout. + if (new_scale.is_NaN()) { break; } + LP64_ONLY( if (new_scaleL.is_NaN()) { break; } ) + + _worklist.push(MemPointerSummand(in1, new_scale LP64_ONLY( COMMA new_scaleL ))); + return; + } + case Op_CastII: + case Op_CastLL: + case Op_CastX2P: + case Op_ConvI2L: + { + Node* a = n->in(1); + _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); + return; + } + } + } + + // Default: could not parse the "summand" further, take it as one of the + // "terminal" summands. + // TODO wording of "terminal summands"? + _summands.push(summand); +} + +bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { +#ifndef _LP64 + // On 32-bit platforms, ... TODO + return true; +#else + + // TODO: trivially safe ops + // Not trivially safe: AddI, SubI, MulI, LShiftI + switch(opc) { case Op_ConI: case Op_ConL: - { - NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) - : NoOverflowInt(n->get_long()); - _con = _con + scale * con; - // TODO problematic: int con and int scale could overflow??? or irrelevant? - return; - } case Op_AddP: case Op_AddL: - case Op_AddI: - { - LP64_ONLY( if (opc == Op_AddI && !is_safe_from_int_overflow(scaleL)) { break; } ) - - Node* a = n->in((opc == Op_AddP) ? 2 : 1); - Node* b = n->in((opc == Op_AddP) ? 3 : 2); - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); - return; - } case Op_SubL: - case Op_SubI: - { - LP64_ONLY( if (opc == Op_SubI && !is_safe_from_int_overflow(scaleL)) { break; } ) - - Node* a = n->in((opc == Op_AddP) ? 2 : 1); - Node* b = n->in((opc == Op_AddP) ? 3 : 2); - - NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; - LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) - : scaleL; ) - - // If anything went wrong with the scale computation: bailout. - if (sub_scale.is_NaN()) { break; } - LP64_ONLY( if (sub_scaleL.is_NaN()) { break; } ) - - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - _worklist.push(MemPointerSummand(b, sub_scale LP64_ONLY( COMMA sub_scaleL ))); - return; - } case Op_MulL: - case Op_MulI: case Op_LShiftL: - case Op_LShiftI: - { - // TODO check if we should decompose or not: int-overflow!!! - // Form must be linear: only multiplication with constants is allowed. - Node* in1 = n->in(1); - Node* in2 = n->in(2); - if (!in2->is_Con()) { break; } - NoOverflowInt factor; - LP64_ONLY( NoOverflowInt factorL; ) - switch (opc) { - case Op_MulL: - factor = NoOverflowInt(in2->get_long()); - LP64_ONLY( factorL = factor; ) - break; - case Op_MulI: - factor = NoOverflowInt(in2->get_int()); - LP64_ONLY( factorL = one; ) - break; - case Op_LShiftL: - factor = one << NoOverflowInt(in2->get_int()); - LP64_ONLY( factorL = factor; ) - break; - case Op_LShiftI: - factor = one << NoOverflowInt(in2->get_int()); - LP64_ONLY( factorL = one; ) - break; - } - - // Accumulate scale. - NoOverflowInt new_scale = scale * factor; - LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) - - // Make sure abs(scale) is not larger than "1 << 30". - new_scale = new_scale.truncate_to_30_bits(); - LP64_ONLY( new_scaleL = new_scaleL.truncate_to_30_bits(); ) - - // If anything went wrong with the scale computation: bailout. - if (new_scale.is_NaN()) { break; } - LP64_ONLY( if (new_scaleL.is_NaN()) { break; } ) - - _worklist.push(MemPointerSummand(in1, new_scale LP64_ONLY( COMMA new_scaleL ))); - return; - } case Op_CastII: case Op_CastLL: case Op_CastX2P: case Op_ConvI2L: - { - Node* a = n->in(1); - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - return; - } - } - // Default: could not parse the "summand" further, take it as one of the - // "terminal" summands. - // TODO wording of "terminal summands"? - _summands.push(summand); -} + // TODO to find some counter-examples: + case Op_MulI: + case Op_LShiftI: + return true; + } -#ifdef _LP64 -bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const NoOverflowInt scaleL) { // TODO needed? if (scaleL.is_NaN()) { assert(false, "scaleL must not be NaN"); @@ -210,8 +231,8 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const NoOverflowInt s } return false; -} #endif +} MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other) const { // Check if all summands are the same: diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 4a599b4d87a33..3ac25ed5c40cb 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -373,7 +373,8 @@ class MemPointerSimpleFormParser : public StackObj { private: MemPointerSimpleForm parse_simple_form(); void parse_sub_expression(const MemPointerSummand summand); - LP64_ONLY( bool is_safe_from_int_overflow(const NoOverflowInt scaleL); ) + + bool is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; }; // TODO diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index 1516684134a79..a4f2bbfbac170 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -198,7 +198,6 @@ static void test4(int[] a, long anchor) { UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); } - // Test: if MergeStores is applied this can lead to wrong results // -> SubI needs overflow check. static void test5(int[] a, long anchor) { From 9dd172fc4ff2a816fa43c91a97815cd0c4440a0b Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 09:27:19 +0200 Subject: [PATCH 29/89] fix LShiftI --- src/hotspot/share/opto/mempointer.cpp | 12 +++++---- .../c2/TestMergeStoresUnsafeArrayPointer.java | 27 +++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 09e13d4dc7c99..7c6473cd95c31 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -84,7 +84,6 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) : NoOverflowInt(n->get_long()); _con = _con + scale * con; - // TODO problematic: int con and int scale could overflow??? or irrelevant? return; } case Op_AddP: @@ -120,7 +119,6 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su case Op_LShiftL: case Op_LShiftI: { - // TODO check if we should decompose or not: int-overflow!!! // Form must be linear: only multiplication with constants is allowed. Node* in1 = n->in(1); Node* in2 = n->in(2); @@ -185,8 +183,13 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON return true; #else - // TODO: trivially safe ops - // Not trivially safe: AddI, SubI, MulI, LShiftI + // Not trivially safe: + // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) + // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) + // MulI: ConvI2L(a * conI) != ConvI2L(a) * convI2L(conI) + // LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) + // + // But these are always safe: switch(opc) { case Op_ConI: case Op_ConL: @@ -202,7 +205,6 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON // TODO to find some counter-examples: case Op_MulI: - case Op_LShiftI: return true; } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index a4f2bbfbac170..3308097ae0d26 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -54,6 +54,7 @@ public class TestMergeStoresUnsafeArrayPointer { static int four = 4; static int max_int = Integer.MAX_VALUE; static int min_int = Integer.MIN_VALUE; + static int val_2_to_30 = (1 << 30); public static void main(String[] args) { System.out.println("Allocate big array of SIZE = " + SIZE); @@ -148,6 +149,23 @@ public static void main(String[] args) { } } + val = 0; + System.out.println("test6"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test6(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test6 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -205,4 +223,13 @@ static void test5(int[] a, long anchor) { UNSAFE.putInt(a, base + (long)(min_int) - (long)(four) + 0, 0x42424242); // no overflow UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); // overflow } + + // Test: if MergeStores is applied this can lead to wrong results + // -> LShiftI needs overflow check. + static void test6(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(2 * val_2_to_30) + 0, 0x42424242); // overflow + UNSAFE.putInt(a, base + 2L * (long)(val_2_to_30) + 4, 0x66666666); // no overflow + } + } From 2f501dc154428542591bf5825374df3fe9afae10 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 09:38:50 +0200 Subject: [PATCH 30/89] fix MulI --- src/hotspot/share/opto/mempointer.cpp | 3 --- .../c2/TestMergeStoresUnsafeArrayPointer.java | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 7c6473cd95c31..e41980bf4afc6 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -202,9 +202,6 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON case Op_CastLL: case Op_CastX2P: case Op_ConvI2L: - - // TODO to find some counter-examples: - case Op_MulI: return true; } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index 3308097ae0d26..9b129324b882b 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -55,6 +55,7 @@ public class TestMergeStoresUnsafeArrayPointer { static int max_int = Integer.MAX_VALUE; static int min_int = Integer.MIN_VALUE; static int val_2_to_30 = (1 << 30); + static int large_by_53 = (int)((1L << 31) / 53L + 1L); public static void main(String[] args) { System.out.println("Allocate big array of SIZE = " + SIZE); @@ -166,6 +167,23 @@ public static void main(String[] args) { } } + val = 0; + System.out.println("test7"); + for (int i = 0; i < 100_000; i++) { + testClear(big); + test7(big, ANCHOR); + long sum = testSum(big); + if (i == 0) { + val = sum; + } else { + if (sum != val) { + System.out.println("ERROR: test7 had wrong value: " + val + " != " + sum); + errors++; + break; + } + } + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -232,4 +250,11 @@ static void test6(int[] a, long anchor) { UNSAFE.putInt(a, base + 2L * (long)(val_2_to_30) + 4, 0x66666666); // no overflow } + // Test: if MergeStores is applied this can lead to wrong results + // -> MulI needs overflow check. + static void test7(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putInt(a, base + (long)(53 * large_by_53) + 0, 0x42424242); // overflow + UNSAFE.putInt(a, base + 53L * (long)(large_by_53) + 4, 0x66666666); // no overflow + } } From ccb2df4411f02a21b42693d59d3e446087e3a75d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 09:52:54 +0200 Subject: [PATCH 31/89] fix test400a --- src/hotspot/share/opto/mempointer.cpp | 6 +++--- test/hotspot/jtreg/compiler/c2/TestMergeStores.java | 11 ++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index e41980bf4afc6..7330f7ef2e408 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -184,9 +184,9 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON #else // Not trivially safe: - // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) - // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) - // MulI: ConvI2L(a * conI) != ConvI2L(a) * convI2L(conI) + // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) + // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) + // MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) // LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) // // But these are always safe: diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index a5a184c6fe0e9..2dfd7d409b823 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -1559,15 +1559,12 @@ static Object[] test400R(int[] a) { } @Test - // We must be careful with mismatched accesses on arrays: - // An int-array can have about 2x max_int size, and hence if we address bytes in it, we can have int-overflows. - // We might consider addresses (x + 0) and (x + 1) as adjacent, even if x = max_int, and therefore the second - // address overflows and is not adjacent at all. - // Therefore, we should only consider stores that have the same size as the element type of the array. - @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merging + // All constants are known, and AddI can be converted to AddL safely, hence the stores can be merged. + @IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", - IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test400a(int[] a) { UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 0, (byte)0xbe); UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 1, (byte)0xba); From 9829b5dfa5f93de9297d3016ddfd2feaa8ad6d98 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 10:42:29 +0200 Subject: [PATCH 32/89] fix 600 series of tests --- .../jtreg/compiler/c2/TestMergeStores.java | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 2dfd7d409b823..02f173b68fe7f 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -234,6 +234,10 @@ public TestMergeStores() { testGroups.get("test600").put("test600R", (_,i) -> { return test600R(aB.clone(), aI.clone(), i); }); testGroups.get("test600").put("test600a", (_,i) -> { return test600a(aB.clone(), aI.clone(), i); }); + testGroups.put("test601", new HashMap()); + testGroups.get("test601").put("test601R", (_,i) -> { return test601R(aB.clone(), aI.clone(), i, offset1); }); + testGroups.get("test601").put("test601a", (_,i) -> { return test601a(aB.clone(), aI.clone(), i, offset1); }); + testGroups.put("test700", new HashMap()); testGroups.get("test700").put("test700R", (_,i) -> { return test700R(aI.clone(), i); }); testGroups.get("test700").put("test700a", (_,i) -> { return test700a(aI.clone(), i); }); @@ -292,6 +296,7 @@ public TestMergeStores() { "test501aBE", "test502aBE", "test600a", + "test601a", "test700a", "test800a", "test800aBE"}) @@ -1854,7 +1859,11 @@ static Object[] test600R(byte[] aB, int[] aI, int i) { } @Test - @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8"}) // note: bottom type + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test600a(byte[] aB, int[] aI, int i) { Object a = null; long base = 0; @@ -1865,7 +1874,7 @@ static Object[] test600a(byte[] aB, int[] aI, int i) { a = aI; base = UNSAFE.ARRAY_INT_BASE_OFFSET; } - // array a is an aryptr, but its element type is unknown, i.e. bottom. + // Array type is unknown, i.e. bottom[]. But all AddI can be safely converted to AddL -> safe to merge. UNSAFE.putByte(a, base + 0, (byte)0xbe); UNSAFE.putByte(a, base + 1, (byte)0xba); UNSAFE.putByte(a, base + 2, (byte)0xad); @@ -1877,6 +1886,56 @@ static Object[] test600a(byte[] aB, int[] aI, int i) { return new Object[]{ aB, aI }; } + @DontCompile + static Object[] test601R(byte[] aB, int[] aI, int i, int offset1) { + Object a = null; + long base = 0; + if (i % 2 == 0) { + a = aB; + base = UNSAFE.ARRAY_BYTE_BASE_OFFSET; + } else { + a = aI; + base = UNSAFE.ARRAY_INT_BASE_OFFSET; + } + UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef); + UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde); + return new Object[]{ aB, aI }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // nothing merged + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) { + Object a = null; + long base = 0; + if (i % 2 == 0) { + a = aB; + base = UNSAFE.ARRAY_BYTE_BASE_OFFSET; + } else { + a = aI; + base = UNSAFE.ARRAY_INT_BASE_OFFSET; + } + // Array type is unknown, i.e. bottom[]. Hence we do not know the element size of the array. + // Thus, merging is not always safe, there could be overflows. + UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba); + UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef); + UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe); + UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad); + UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde); + return new Object[]{ aB, aI }; + } + @DontCompile static Object[] test700R(int[] a, long v1) { a[0] = (int)(v1 >> -1); From 845801b712b4412954accb225db4a1c8920abffa Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 13 Aug 2024 10:54:28 +0200 Subject: [PATCH 33/89] some TODOs --- src/hotspot/share/opto/mempointer.cpp | 4 ++++ src/hotspot/share/opto/mempointer.hpp | 1 + 2 files changed, 5 insertions(+) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 7330f7ef2e408..008e93e93bd49 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -43,6 +43,8 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { parse_sub_expression(_worklist.pop()); } + // TODO: sort and combine summands! + // for (int i = 0; i < _summands.length(); i++) { // MemPointerSummand summand = _summands.at(i); // summand.print(); @@ -205,6 +207,8 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON return true; } + // TODO tests with native memory, etc. + // TODO needed? if (scaleL.is_NaN()) { assert(false, "scaleL must not be NaN"); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 3ac25ed5c40cb..e6b763521af49 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -307,6 +307,7 @@ class MemPointerSimpleForm : public StackObj { MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); + // TODO test with more summands? for (int i = 0; i < summands.length(); i++) { _summands[i] = summands.at(i); } From 91297766beb358188a84f6dc99b808053f191e6a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 10:25:20 +0200 Subject: [PATCH 34/89] first attempts at sort --- src/hotspot/share/opto/mempointer.cpp | 2 ++ src/hotspot/share/opto/mempointer.hpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 008e93e93bd49..f8faee3c5efed 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -43,6 +43,8 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { parse_sub_expression(_worklist.pop()); } + _summands.sort(MemPointerSummand::cmp_for_sort); + // TODO: sort and combine summands! // for (int i = 0; i < _summands.length(); i++) { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index e6b763521af49..3d95124bc56e6 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -253,6 +253,16 @@ class MemPointerSummand : public StackObj { NoOverflowInt scale() const { return _scale; } LP64_ONLY( NoOverflowInt scaleL() const { return _scaleL; } ) + static int cmp_for_sort(MemPointerSummand* p1, MemPointerSummand* p2) { + if (p1->variable() == nullptr) { + return (p2->variable() == nullptr) ? 0 : 1; + } else if (p2->variable() == nullptr) { + return -1; + } + + return p1->variable()->_idx - p2->variable()->_idx; + } + friend bool operator==(const MemPointerSummand a, const MemPointerSummand b) { // Both "null" -> equal. if (a.variable() == nullptr && b.variable() == nullptr) { return true; } From f068b0bcb2d8ebd2c8e58e7eecb9dd18d642f42b Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 11:39:09 +0200 Subject: [PATCH 35/89] TraceMergeStores CompileCommand refactor --- .../share/compiler/compilerDirectives.cpp | 16 ++- .../share/compiler/compilerDirectives.hpp | 8 ++ src/hotspot/share/compiler/compilerOracle.cpp | 7 + src/hotspot/share/compiler/compilerOracle.hpp | 1 + .../share/compiler/directivesParser.cpp | 10 ++ src/hotspot/share/opto/c2_globals.hpp | 3 - src/hotspot/share/opto/memnode.cpp | 25 +++- .../share/opto/traceMergeStoresTag.hpp | 136 ++++++++++++++++++ 8 files changed, 197 insertions(+), 9 deletions(-) create mode 100644 src/hotspot/share/opto/traceMergeStoresTag.hpp diff --git a/src/hotspot/share/compiler/compilerDirectives.cpp b/src/hotspot/share/compiler/compilerDirectives.cpp index 74259c8e5d436..46750cacc35db 100644 --- a/src/hotspot/share/compiler/compilerDirectives.cpp +++ b/src/hotspot/share/compiler/compilerDirectives.cpp @@ -33,6 +33,7 @@ #include "memory/resourceArea.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/globals_extension.hpp" CompilerDirectives::CompilerDirectives() : _next(nullptr), _match(nullptr), _ref_count(0) { @@ -302,7 +303,8 @@ DirectiveSet::DirectiveSet(CompilerDirectives* d) : _inlinematchers(nullptr), _directive(d), _ideal_phase_name_set(PHASE_NUM_TYPES, mtCompiler), - _trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler) + _trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler), + _trace_merge_stores_tags(TraceMergeStores::TAG_NUM, mtCompiler) { #define init_defaults_definition(name, type, dvalue, compiler) this->name##Option = dvalue; compilerdirectives_common_flags(init_defaults_definition) @@ -432,7 +434,6 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle compilerdirectives_c1_flags(init_default_cc) #undef init_default_cc - // Parse PrintIdealPhaseName and create a lookup set #ifndef PRODUCT #ifdef COMPILER2 if (!_modified[TraceAutoVectorizationIndex]) { @@ -445,6 +446,17 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle } } } + if (!_modified[TraceMergeStoresIndex]) { + // Parse ccstr and create mask + ccstrlist option; + if (CompilerOracle::has_option_value(method, CompileCommandEnum::TraceMergeStores, option)) { + TraceMergeStores::TagValidator validator(option, false); + if (validator.is_valid()) { + set.cloned()->set_trace_merge_stores_tags(validator.tags()); + } + } + } + // Parse PrintIdealPhaseName and create a lookup set if (!_modified[PrintIdealPhaseIndex]) { // Parse ccstr and create set ccstrlist option; diff --git a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp index bf15fe9c71397..e960fdb1e53ce 100644 --- a/src/hotspot/share/compiler/compilerDirectives.hpp +++ b/src/hotspot/share/compiler/compilerDirectives.hpp @@ -90,6 +90,7 @@ NOT_PRODUCT(cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLeve cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) #define compilerdirectives_c2_string_flags(cflags) \ NOT_PRODUCT(cflags(TraceAutoVectorization, ccstrlist, "", TraceAutoVectorization)) \ +NOT_PRODUCT(cflags(TraceMergeStores, ccstrlist, "", TraceMergeStores)) \ NOT_PRODUCT(cflags(PrintIdealPhase, ccstrlist, "", PrintIdealPhase)) #else #define compilerdirectives_c2_other_flags(cflags) @@ -131,6 +132,7 @@ class DirectiveSet : public CHeapObj { TriBoolArray<(size_t)vmIntrinsics::number_of_intrinsics(), int> _intrinsic_control_words; CHeapBitMap _ideal_phase_name_set; CHeapBitMap _trace_auto_vectorization_tags; + CHeapBitMap _trace_merge_stores_tags; public: DirectiveSet(CompilerDirectives* directive); @@ -211,6 +213,12 @@ void set_##name(void* value) { \ const CHeapBitMap& trace_auto_vectorization_tags() { return _trace_auto_vectorization_tags; }; + void set_trace_merge_stores_tags(const CHeapBitMap& tags) { + _trace_merge_stores_tags.set_from(tags); + }; + const CHeapBitMap& trace_merge_stores_tags() { + return _trace_merge_stores_tags; + }; void print_intx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" INTX_FORMAT " ", n, v); } } void print_uintx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" UINTX_FORMAT " ", n, v); } } diff --git a/src/hotspot/share/compiler/compilerOracle.cpp b/src/hotspot/share/compiler/compilerOracle.cpp index a81d60c9fc422..a4cb25a3b4283 100644 --- a/src/hotspot/share/compiler/compilerOracle.cpp +++ b/src/hotspot/share/compiler/compilerOracle.cpp @@ -36,6 +36,7 @@ #include "oops/symbol.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" #include "runtime/jniHandles.hpp" @@ -798,6 +799,12 @@ static void scan_value(enum OptionType type, char* line, int& total_bytes_read, else if (option == CompileCommandEnum::TraceAutoVectorization) { TraceAutoVectorizationTagValidator validator(value, true); + if (!validator.is_valid()) { + jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what()); + } + } else if (option == CompileCommandEnum::TraceMergeStores) { + TraceMergeStores::TagValidator validator(value, true); + if (!validator.is_valid()) { jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what()); } diff --git a/src/hotspot/share/compiler/compilerOracle.hpp b/src/hotspot/share/compiler/compilerOracle.hpp index 5864ca5dc0dd7..0e55ca416e03c 100644 --- a/src/hotspot/share/compiler/compilerOracle.hpp +++ b/src/hotspot/share/compiler/compilerOracle.hpp @@ -86,6 +86,7 @@ NOT_PRODUCT(option(PrintIdeal, "PrintIdeal", Bool)) \ NOT_PRODUCT(option(PrintIdealPhase, "PrintIdealPhase", Ccstrlist)) \ NOT_PRODUCT(option(IGVPrintLevel, "IGVPrintLevel", Intx)) \ NOT_PRODUCT(option(TraceAutoVectorization, "TraceAutoVectorization", Ccstrlist)) \ +NOT_PRODUCT(option(TraceMergeStores, "TraceMergeStores", Ccstrlist)) \ option(Vectorize, "Vectorize", Bool) \ option(CloneMapDebug, "CloneMapDebug", Bool) \ option(IncrementalInlineForceCleanup, "IncrementalInlineForceCleanup", Bool) \ diff --git a/src/hotspot/share/compiler/directivesParser.cpp b/src/hotspot/share/compiler/directivesParser.cpp index 5501490028349..e28a5cd99d3b7 100644 --- a/src/hotspot/share/compiler/directivesParser.cpp +++ b/src/hotspot/share/compiler/directivesParser.cpp @@ -29,6 +29,7 @@ #include "memory/resourceArea.hpp" #include "opto/phasetype.hpp" #include "opto/traceAutoVectorizationTag.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "runtime/os.hpp" #include @@ -347,6 +348,15 @@ bool DirectivesParser::set_option_flag(JSON_TYPE t, JSON_VAL* v, const key* opti } else { error(VALUE_ERROR, "Unrecognized tag name detected in TraceAutoVectorization: %s", validator.what()); } + } else if (strncmp(option_key->name, "TraceMergeStores", 16) == 0) { + TraceMergeStores::TagValidator validator(s, false); + + valid = validator.is_valid(); + if (valid) { + set->set_trace_merge_stores_tags(validator.tags()); + } else { + error(VALUE_ERROR, "Unrecognized tag name detected in TraceMergeStores: %s", validator.what()); + } } else if (strncmp(option_key->name, "PrintIdealPhase", 15) == 0) { PhaseNameValidator validator(s); diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 7288533cd33e7..e6dae9e457f3d 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -356,9 +356,6 @@ product(bool, MergeStores, true, DIAGNOSTIC, \ "Optimize stores by combining values into larger store") \ \ - develop(bool, TraceMergeStores, false, \ - "Trace creation of merged stores") \ - \ product_pd(bool, OptoBundling, \ "Generate nops to fill i-cache lines") \ \ diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 7c8468ada314d..2060447d5a247 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -48,6 +48,7 @@ #include "opto/phaseX.hpp" #include "opto/regmask.hpp" #include "opto/rootnode.hpp" +#include "opto/traceMergeStoresTag.hpp" #include "opto/vectornode.hpp" #include "utilities/align.hpp" #include "utilities/copy.hpp" @@ -2738,8 +2739,13 @@ class MergePrimitiveStores : public StackObj { PhaseGVN* _phase; StoreNode* _store; + NOT_PRODUCT( const CHeapBitMap &_trace_tags; ) + public: - MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {} + MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) : + _phase(phase), _store(store) + NOT_PRODUCT( COMMA _trace_tags(Compile::current()->directive()->trace_merge_stores_tags()) ) + {} StoreNode* run(); @@ -2783,7 +2789,18 @@ class MergePrimitiveStores : public StackObj { Node* make_merged_input_value(const Node_List& merge_list); StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value); - DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; ) +#ifndef PRODUCT + // Access to TraceMergeStores tags + bool is_trace(TraceMergeStores::Tag tag) const { + return _trace_tags.at(tag); + } + + bool is_trace_success() const { + return is_trace(TraceMergeStores::Tag::SUCCESS); + } +#endif + + NOT_PRODUCT( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; ) }; StoreNode* MergePrimitiveStores::run() { @@ -2817,7 +2834,7 @@ StoreNode* MergePrimitiveStores::run() { StoreNode* merged_store = make_merged_store(merge_list, merged_input_value); - DEBUG_ONLY( if(TraceMergeStores) { trace(merge_list, merged_input_value, merged_store); } ) + NOT_PRODUCT( if(is_trace_success()) { trace(merge_list, merged_input_value, merged_store); } ) return merged_store; } @@ -3183,7 +3200,7 @@ StoreNode* MergePrimitiveStores::make_merged_store(const Node_List& merge_list, return merged_store; } -#ifdef ASSERT +#ifndef PRODUCT void MergePrimitiveStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const { stringStream ss; ss.print_cr("[TraceMergeStores]: Replace"); diff --git a/src/hotspot/share/opto/traceMergeStoresTag.hpp b/src/hotspot/share/opto/traceMergeStoresTag.hpp new file mode 100644 index 0000000000000..e295e2a9a9ebf --- /dev/null +++ b/src/hotspot/share/opto/traceMergeStoresTag.hpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_TRACEMERGESTORESTAG_HPP +#define SHARE_OPTO_TRACEMERGESTORESTAG_HPP + +#include "utilities/bitMap.inline.hpp" +#include "utilities/stringUtils.hpp" + +namespace TraceMergeStores { + #define COMPILER_TAG(flags) \ + flags(MEM_POINTER, "Trace MemPointer (verbose)") \ + flags(ALIASING, "Trace MemPointerSimpleForm::get_aliasing_with") \ + flags(SUCCESS, "Trace successful merges") \ + + #define table_entry(name, description) name, + enum Tag { + COMPILER_TAG(table_entry) + TAG_NUM, + TAG_NONE + }; + #undef table_entry + + static const char* tag_descriptions[] = { + #define array_of_labels(name, description) description, + COMPILER_TAG(array_of_labels) + #undef array_of_labels + }; + + static const char* tag_names[] = { + #define array_of_labels(name, description) #name, + COMPILER_TAG(array_of_labels) + #undef array_of_labels + }; + + static Tag find_tag(const char* str) { + for (int i = 0; i < TAG_NUM; i++) { + if (strcmp(tag_names[i], str) == 0) { + return (Tag)i; + } + } + return TAG_NONE; + } + + class TagValidator { + private: + CHeapBitMap _tags; + bool _valid; + char* _bad; + bool _is_print_usage; + + public: + TagValidator(ccstrlist option, bool is_print_usage) : + _tags(TAG_NUM, mtCompiler), + _valid(true), + _bad(nullptr), + _is_print_usage(is_print_usage) + { + for (StringUtils::CommaSeparatedStringIterator iter(option); *iter != nullptr && _valid; ++iter) { + char const* tag_name = *iter; + if (strcmp("help", tag_name) == 0) { + if (_is_print_usage) { + print_help(); + } + continue; + } + bool set_bit = true; + // Check for "TAG" or "-TAG" + if (strncmp("-", tag_name, strlen("-")) == 0) { + tag_name++; + set_bit = false; + } + Tag tag = find_tag(tag_name); + if (TAG_NONE == tag) { + // cap len to a value we know is enough for all tags + const size_t len = MIN2(strlen(*iter), 63) + 1; + _bad = NEW_C_HEAP_ARRAY(char, len, mtCompiler); + // strncpy always writes len characters. If the source string is + // shorter, the function fills the remaining bytes with nulls. + strncpy(_bad, *iter, len); + _valid = false; + } else { + assert(tag < TAG_NUM, "out of bounds"); + _tags.at_put(tag, set_bit); + } + } + } + + ~TagValidator() { + if (_bad != nullptr) { + FREE_C_HEAP_ARRAY(char, _bad); + } + } + + bool is_valid() const { return _valid; } + const char* what() const { return _bad; } + const CHeapBitMap& tags() const { + assert(is_valid(), "only read tags when valid"); + return _tags; + } + + static void print_help() { + tty->cr(); + tty->print_cr("Usage for CompileCommand TraceMergeStores:"); + tty->print_cr(" -XX:CompileCommand=TraceMergeStores,,"); + tty->print_cr(" %-22s %s", "tags", "descriptions"); + for (int i = 0; i < TAG_NUM; i++) { + tty->print_cr(" %-22s %s", tag_names[i], tag_descriptions[i]); + } + tty->cr(); + } + }; +} + +#endif // SHARE_OPTO_TRACEMERGESTORESTAG_HPP From 128c710b529a9cffa3566539c33709cf24503a77 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 14:36:49 +0200 Subject: [PATCH 36/89] more details for tracing --- src/hotspot/share/opto/memnode.cpp | 21 +++++++++- src/hotspot/share/opto/mempointer.cpp | 42 ++++++++++++++++--- src/hotspot/share/opto/mempointer.hpp | 39 +++++++++++++++-- .../share/opto/traceMergeStoresTag.hpp | 3 +- 4 files changed, 93 insertions(+), 12 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 2060447d5a247..b982ff680fe8b 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2795,6 +2795,18 @@ class MergePrimitiveStores : public StackObj { return _trace_tags.at(tag); } + bool is_trace_pointer() const { + return is_trace(TraceMergeStores::Tag::POINTER); + } + + bool is_trace_aliasing() const { + return is_trace(TraceMergeStores::Tag::ALIASING); + } + + bool is_trace_adjacency() const { + return is_trace(TraceMergeStores::Tag::ADJACENCY); + } + bool is_trace_success() const { return is_trace(TraceMergeStores::Tag::SUCCESS); } @@ -2862,8 +2874,13 @@ bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const St } ResourceMark rm; - const MemPointer pointer_use(_phase, use_store); - const MemPointer pointer_def(_phase, def_store); +#ifndef PRODUCT + const TraceMemPointer trace(is_trace_pointer(), + is_trace_aliasing(), + is_trace_adjacency()); +#endif + const MemPointer pointer_use(_phase, use_store NOT_PRODUCT( COMMA trace )); + const MemPointer pointer_def(_phase, def_store NOT_PRODUCT( COMMA trace )); if (!pointer_def.is_adjacent_to_and_before(pointer_use)) { return false; } diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index f8faee3c5efed..db673616c3999 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -239,12 +239,26 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON #endif } -MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other) const { +MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other + NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr("MemPointerSimpleForm::get_aliasing_with:"); + print(); + other.print(); + } +#endif + // Check if all summands are the same: for (uint i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand s1 = summands_at(i); const MemPointerSummand s2 = other.summands_at(i); if (s1 != s2) { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr(" -> Aliasing unknown, differ on summand %d.", i); + } +#endif return MemPointerAliasing::make_unknown(); } } @@ -253,18 +267,34 @@ MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpl NoOverflowInt distance = other.con() - con(); distance = distance.truncate_to_30_bits(); if (distance.is_NaN()) { +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr(" -> Aliasing unknown, distance is NaN."); + } +#endif return MemPointerAliasing::make_unknown(); } +#ifndef PRODUCT + if (trace.is_trace_aliasing()) { + tty->print_cr(" -> Aliasing always, distance = %d.", distance.value()); + } +#endif return MemPointerAliasing::make_always(distance.value()); } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { - const MemPointerAliasing aliasing = simple_form().get_aliasing_with(other.simple_form()); - // tty->print_cr("MemPointer::is_adjacent_to_and_before"); - // simple_form().print(); - // other.simple_form().print(); - // tty->print("Aliasing: "); aliasing.print(); tty->cr(); + const MemPointerSimpleForm& s1 = simple_form(); + const MemPointerSimpleForm& s2 = other.simple_form(); + const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace )); + +#ifndef PRODUCT + if (_trace.is_trace_adjacency()) { + tty->print("Aliasing for adjacency: "); aliasing.print(); tty->cr(); + // TODO + } +#endif + return aliasing.is_always_at_distance(mem()->memory_size()); } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 3d95124bc56e6..1cdfcb4880223 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -118,6 +118,28 @@ class NoOverflowInt { #endif }; +#ifndef PRODUCT +class TraceMemPointer : public StackObj { +private: + const bool _is_trace_pointer; + const bool _is_trace_aliasing; + const bool _is_trace_adjacency; + +public: + TraceMemPointer(const bool is_trace_pointer, + const bool is_trace_aliasing, + const bool is_trace_adjacency) : + _is_trace_pointer( is_trace_pointer), + _is_trace_aliasing( is_trace_aliasing), + _is_trace_adjacency(is_trace_adjacency) + {} + + bool is_trace_pointer() const { return _is_trace_pointer; } + bool is_trace_aliasing() const { return _is_trace_aliasing; } + bool is_trace_adjacency() const { return _is_trace_adjacency; } +}; +#endif + // Class to represent aliasing between two MemPointer. class MemPointerAliasing { public: @@ -332,7 +354,8 @@ class MemPointerSimpleForm : public StackObj { } } - MemPointerAliasing get_aliasing_with(const MemPointerSimpleForm& other) const; + MemPointerAliasing get_aliasing_with(const MemPointerSimpleForm& other + NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const; const MemPointerSummand summands_at(const uint i) const { assert(i < SUMMANDS_SIZE, "in bounds"); @@ -394,13 +417,23 @@ class MemPointer : public StackObj { const MemNode* _mem; const MemPointerSimpleForm _simple_form; + NOT_PRODUCT( const TraceMemPointer& _trace; ) + public: // TODO no need for phase? - MemPointer(PhaseGVN* phase, const MemNode* mem) : + MemPointer(PhaseGVN* phase, const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : _mem(mem), _simple_form(init_simple_form(_mem)) + NOT_PRODUCT( COMMA _trace(trace) ) { - // _simple_form.print(); // TODO tracing??? +#ifndef PRODUCT + if (_trace.is_trace_pointer()) { + tty->print_cr("MemPointer::MemPointer:"); + tty->print("mem: "); mem->dump(); + _mem->in(MemNode::Address)->dump_bfs(5, 0, "d"); + _simple_form.print(); + } +#endif } const MemNode* mem() const { return _mem; } diff --git a/src/hotspot/share/opto/traceMergeStoresTag.hpp b/src/hotspot/share/opto/traceMergeStoresTag.hpp index e295e2a9a9ebf..fa126239c5e98 100644 --- a/src/hotspot/share/opto/traceMergeStoresTag.hpp +++ b/src/hotspot/share/opto/traceMergeStoresTag.hpp @@ -30,8 +30,9 @@ namespace TraceMergeStores { #define COMPILER_TAG(flags) \ - flags(MEM_POINTER, "Trace MemPointer (verbose)") \ + flags(POINTER, "Trace pointer IR") \ flags(ALIASING, "Trace MemPointerSimpleForm::get_aliasing_with") \ + flags(ADJACENCY, "Trace adjacency") \ flags(SUCCESS, "Trace successful merges") \ #define table_entry(name, description) name, From a8066916604e12fdea7285aa39809f30fa12cb4a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 15:05:36 +0200 Subject: [PATCH 37/89] make printing more concise --- src/hotspot/share/opto/mempointer.cpp | 6 ++-- src/hotspot/share/opto/mempointer.hpp | 48 +++++++++++++-------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index db673616c3999..022414306e888 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -244,8 +244,8 @@ MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpl #ifndef PRODUCT if (trace.is_trace_aliasing()) { tty->print_cr("MemPointerSimpleForm::get_aliasing_with:"); - print(); - other.print(); + print_on(tty); + other.print_on(tty); } #endif @@ -290,7 +290,7 @@ bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { #ifndef PRODUCT if (_trace.is_trace_adjacency()) { - tty->print("Aliasing for adjacency: "); aliasing.print(); tty->cr(); + tty->print("Aliasing for adjacency: "); aliasing.print_on(tty); tty->cr(); // TODO } #endif diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 1cdfcb4880223..f1ce09a67b7d9 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -108,11 +108,11 @@ class NoOverflowInt { } #ifndef PRODUCT - void print() const { + void print_on(outputStream* st) const { if (is_NaN()) { - tty->print("NaN"); + st->print("NaN"); } else { - tty->print("%d", value()); + st->print("%d", value()); } } #endif @@ -214,12 +214,12 @@ class MemPointerAliasing { // } #ifndef PRODUCT - void print() const { + void print_on(outputStream* st) const { switch(_aliasing) { - case Unknown: tty->print("Unknown"); break; - case Never: tty->print("Never"); break; - case Always: tty->print("Always(%d)", _distance); break; - case Maybe: tty->print("Maybe(%d)", _distance); break; + case Unknown: st->print("Unknown"); break; + case Never: st->print("Never"); break; + case Always: st->print("Always(%d)", _distance); break; + case Maybe: st->print("Maybe(%d)", _distance); break; default: ShouldNotReachHere(); } } @@ -299,16 +299,15 @@ class MemPointerSummand : public StackObj { } #ifndef PRODUCT - void print() const { - tty->print(" MemPointerSummand: "); + void print_on(outputStream* st) const { + st->print("Summand["); #ifdef _LP64 - tty->print("(scaleL = "); - _scaleL.print(); - tty->print(") "); + st->print("(scaleL = "); + _scaleL.print_on(st); + st->print(") "); #endif - _scale.print(); - tty->print(" * variable: "); - _variable->dump(); + _scale.print_on(st); + tty->print(" * [%d %s]]", _variable->_idx, _variable->Name()); } #endif }; @@ -365,22 +364,21 @@ class MemPointerSimpleForm : public StackObj { const NoOverflowInt con() const { return _con; } #ifndef PRODUCT - void print() const { + void print_on(outputStream* st) const { if (_pointer == nullptr) { - tty->print_cr("MemPointerSimpleForm empty."); + st->print_cr("MemPointerSimpleForm empty."); return; } - tty->print("MemPointerSimpleForm for "); - _pointer->dump(); - tty->print(" con = "); - _con.print(); - tty->cr(); + st->print("MemPointerSimpleForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); + _con.print_on(st); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; if (summand.variable() != nullptr) { - summand.print(); + st->print(", "); + summand.print_on(st); } } + st->print_cr("]"); } #endif }; @@ -431,7 +429,7 @@ class MemPointer : public StackObj { tty->print_cr("MemPointer::MemPointer:"); tty->print("mem: "); mem->dump(); _mem->in(MemNode::Address)->dump_bfs(5, 0, "d"); - _simple_form.print(); + _simple_form.print_on(tty); } #endif } From 06424baea0e8184039c23c233bcee8b0da93abcf Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 15:06:00 +0200 Subject: [PATCH 38/89] test601a only for 64bit --- test/hotspot/jtreg/compiler/c2/TestMergeStores.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 02f173b68fe7f..be9257a958ba9 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -1912,7 +1912,8 @@ static Object[] test601R(byte[] aB, int[] aI, int i, int offset1) { @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // nothing merged IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", - IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, + applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to fold some cases, but not others. static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) { Object a = null; long base = 0; From ea0f0476806536ce1357fca696be016ac2eb6995 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 15:19:20 +0200 Subject: [PATCH 39/89] NoOverflowInt in its own file --- src/hotspot/share/opto/mempointer.hpp | 92 +---------------- src/hotspot/share/opto/noOverflowInt.hpp | 121 +++++++++++++++++++++++ 2 files changed, 122 insertions(+), 91 deletions(-) create mode 100644 src/hotspot/share/opto/noOverflowInt.hpp diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index f1ce09a67b7d9..873886ce6a719 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -26,97 +26,7 @@ #define SHARE_OPTO_MEMPOINTER_HPP #include "opto/memnode.hpp" - -// Wrapper around jint, which detects overflow. -// TODO consider moving to separate file, and have GTests? -class NoOverflowInt { -private: - bool _is_NaN; // overflow, uninitialized, etc. - jint _value; - -public: - // Default: NaN. - NoOverflowInt() : _is_NaN(true), _value(0) {} - - // Create from jlong (or jint) -> NaN if overflows jint. - explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { - jint trunc = (jint)value; - if ((jlong)trunc == value) { - _is_NaN = false; - _value = trunc; - } - } - - static NoOverflowInt make_NaN() { return NoOverflowInt(); } - - bool is_NaN() const { return _is_NaN; } - jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } - bool is_zero() const { return !is_NaN() && value() == 0; } - - friend NoOverflowInt operator+(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_add((jlong)a.value(), (jlong)b.value())); - } - - friend NoOverflowInt operator-(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_subtract((jlong)a.value(), (jlong)b.value())); - } - - friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_multiply((jlong)a.value(), (jlong)b.value())); - } - - friend NoOverflowInt operator<<(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - jint shift = b.value(); - if (shift < 0 || shift > 31) { return make_NaN(); } - return NoOverflowInt(java_shift_left((jlong)a.value(), shift)); - } - - friend bool operator==(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return false; } - if (b.is_NaN()) { return false; } - return a.value() == b.value(); - } - - NoOverflowInt truncate_to_30_bits() const { - if (is_NaN()) { return make_NaN(); } - const jint max_value = 1 << 30; - if (value() > max_value || value() < -max_value) { return make_NaN(); } - return *this; - } - - NoOverflowInt abs() const { - if (is_NaN()) { return make_NaN(); } - if (value() >= 0) { return *this; } - return NoOverflowInt(0) - *this; - } - - bool is_multiple_of(const NoOverflowInt other) const { - NoOverflowInt a = this->abs(); - NoOverflowInt b = other.abs(); - if (a.is_NaN()) { return false; } - if (b.is_NaN()) { return false; } - if (b.is_zero()) { return false; } - return a.value() % b.value() == 0; - } - -#ifndef PRODUCT - void print_on(outputStream* st) const { - if (is_NaN()) { - st->print("NaN"); - } else { - st->print("%d", value()); - } - } -#endif -}; +#include "opto/noOverflowInt.hpp" #ifndef PRODUCT class TraceMemPointer : public StackObj { diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp new file mode 100644 index 0000000000000..2bf336cb988f8 --- /dev/null +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_OPTO_NOOVERFLOWINT_HPP +#define SHARE_OPTO_NOOVERFLOWINT_HPP + +#include "utilities/globalDefinitions.hpp" + +// Wrapper around jint, which detects overflow. +// TODO consider moving to separate file, and have GTests? +class NoOverflowInt { +private: + bool _is_NaN; // overflow, uninitialized, etc. + jint _value; + +public: + // Default: NaN. + NoOverflowInt() : _is_NaN(true), _value(0) {} + + // Create from jlong (or jint) -> NaN if overflows jint. + explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { + jint trunc = (jint)value; + if ((jlong)trunc == value) { + _is_NaN = false; + _value = trunc; + } + } + + static NoOverflowInt make_NaN() { return NoOverflowInt(); } + + bool is_NaN() const { return _is_NaN; } + jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } + bool is_zero() const { return !is_NaN() && value() == 0; } + + friend NoOverflowInt operator+(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_add((jlong)a.value(), (jlong)b.value())); + } + + friend NoOverflowInt operator-(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_subtract((jlong)a.value(), (jlong)b.value())); + } + + friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + return NoOverflowInt(java_multiply((jlong)a.value(), (jlong)b.value())); + } + + friend NoOverflowInt operator<<(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return make_NaN(); } + if (b.is_NaN()) { return make_NaN(); } + jint shift = b.value(); + if (shift < 0 || shift > 31) { return make_NaN(); } + return NoOverflowInt(java_shift_left((jlong)a.value(), shift)); + } + + friend bool operator==(const NoOverflowInt a, const NoOverflowInt b) { + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + return a.value() == b.value(); + } + + NoOverflowInt truncate_to_30_bits() const { + if (is_NaN()) { return make_NaN(); } + const jint max_value = 1 << 30; + if (value() > max_value || value() < -max_value) { return make_NaN(); } + return *this; + } + + NoOverflowInt abs() const { + if (is_NaN()) { return make_NaN(); } + if (value() >= 0) { return *this; } + return NoOverflowInt(0) - *this; + } + + bool is_multiple_of(const NoOverflowInt other) const { + NoOverflowInt a = this->abs(); + NoOverflowInt b = other.abs(); + if (a.is_NaN()) { return false; } + if (b.is_NaN()) { return false; } + if (b.is_zero()) { return false; } + return a.value() % b.value() == 0; + } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + if (is_NaN()) { + st->print("NaN"); + } else { + st->print("%d", value()); + } + } +#endif +}; + +#endif // SHARE_OPTO_NOOVERFLOWINT_HPP From c385835cec61ef36f27731fe7fa75a45a5452cc3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 16:34:33 +0200 Subject: [PATCH 40/89] NoOverflowInt gtests --- src/hotspot/share/opto/mempointer.cpp | 23 ---- src/hotspot/share/opto/noOverflowInt.hpp | 3 +- .../gtest/opto/test_no_overflow_int.cpp | 130 ++++++++++++++++++ 3 files changed, 132 insertions(+), 24 deletions(-) create mode 100644 test/hotspot/gtest/opto/test_no_overflow_int.cpp diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 022414306e888..0fec6c105b6e5 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -47,29 +47,6 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { // TODO: sort and combine summands! - // for (int i = 0; i < _summands.length(); i++) { - // MemPointerSummand summand = _summands.at(i); - // summand.print(); - // } - - // tty->print("con: "); - // _con.print(); - // tty->cr(); - - // TODO gtest??? - // NoOverflowInt a(1 << 20); - // a.print(); tty->cr(); - // NoOverflowInt b(1LL << 33); - // b.print(); tty->cr(); - // NoOverflowInt c(55); - // NoOverflowInt d(22); - // NoOverflowInt e = c + d; - // e.print(); tty->cr(); - // NoOverflowInt f(max_jint); - // NoOverflowInt g(max_jint); - // NoOverflowInt h = f + g; - // h.print(); tty->cr(); - return MemPointerSimpleForm::make(pointer, _summands, _con); } diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 2bf336cb988f8..3837455627b58 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -26,9 +26,10 @@ #define SHARE_OPTO_NOOVERFLOWINT_HPP #include "utilities/globalDefinitions.hpp" +#include "utilities/ostream.hpp" // Wrapper around jint, which detects overflow. -// TODO consider moving to separate file, and have GTests? +// If any operation overflows, then it returns a NaN. class NoOverflowInt { private: bool _is_NaN; // overflow, uninitialized, etc. diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp new file mode 100644 index 0000000000000..de51c40da5f62 --- /dev/null +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "opto/noOverflowInt.hpp" +#include "unittest.hpp" + +static void check_jlong(const jlong val) { + const NoOverflowInt x(val); + + if (val > max_jint || min_jint > val) { + ASSERT_TRUE(x.is_NaN()); + } else { + ASSERT_FALSE(x.is_NaN()); + ASSERT_EQ(x.value(), val); + } +} + +TEST_VM(opto, NoOverflowInt_check_jlong) { + jlong start = (jlong)min_jint - 10000LL; + jlong end = (jlong)max_jint + 10000LL; + for (jlong i = start; i < end; i+= 1000LL) { + check_jlong(i); + } + + check_jlong((jlong)min_jint - 1LL); + check_jlong((jlong)min_jint); + check_jlong((jlong)min_jint + 1LL); + check_jlong((jlong)max_jint - 1LL); + check_jlong((jlong)max_jint); + check_jlong((jlong)max_jint + 1LL); + + const NoOverflowInt nan; + ASSERT_TRUE(nan.is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_add_sub) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + ASSERT_EQ((one + two).value(), 3); + ASSERT_EQ((one - two).value(), -1); + ASSERT_TRUE((nan + one).is_NaN()); + ASSERT_TRUE((one + nan).is_NaN()); + ASSERT_TRUE((nan + nan).is_NaN()); + ASSERT_TRUE((nan - one).is_NaN()); + ASSERT_TRUE((one - nan).is_NaN()); + ASSERT_TRUE((nan - nan).is_NaN()); + + ASSERT_EQ((big + one).value(), (1 << 30) + 1); + ASSERT_TRUE((big + big).is_NaN()); + ASSERT_EQ((big - one).value(), (1 << 30) - 1); + ASSERT_EQ((big - big).value(), 0); + + ASSERT_EQ((big - one + big).value(), max_jint); + ASSERT_EQ((zero - big - big).value(), min_jint); + ASSERT_TRUE((zero - big - big - one).is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_mul) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + ASSERT_EQ((one * two).value(), 2); + ASSERT_TRUE((nan * one).is_NaN()); + ASSERT_TRUE((one * nan).is_NaN()); + ASSERT_TRUE((nan * nan).is_NaN()); + + ASSERT_EQ((big * one).value(), (1 << 30)); + ASSERT_EQ((one * big).value(), (1 << 30)); + ASSERT_EQ((big * zero).value(), 0); + ASSERT_EQ((zero * big).value(), 0); + ASSERT_TRUE((big * big).is_NaN()); + ASSERT_TRUE((big * two).is_NaN()); + + ASSERT_EQ(((big - one) * two).value(), max_jint - 1); + ASSERT_EQ(((one - big) * two).value(), min_jint + 2); + ASSERT_EQ(((zero - big) * two).value(), min_jint); + ASSERT_TRUE(((big + one) * two).is_NaN()); + ASSERT_TRUE(((zero - big - one) * two).is_NaN()); +} + +TEST_VM(opto, NoOverflowInt_lshift) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + for (int i = 0; i < 31; i++) { + ASSERT_EQ((one << NoOverflowInt(i)).value(), 1LL << i); + } + for (int i = 31; i < 1000; i++) { + ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN()); + } + for (int i = -1000; i < 0; i++) { + ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN()); + } + + ASSERT_EQ((NoOverflowInt(3) << NoOverflowInt(2)).value(), 3 * 4); + ASSERT_EQ((NoOverflowInt(11) << NoOverflowInt(5)).value(), 11 * 32); + ASSERT_EQ((NoOverflowInt(-13) << NoOverflowInt(4)).value(), -13 * 16); +} + From d76db70338c65fd864254aee00dbd5c8322d6d11 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 17:47:04 +0200 Subject: [PATCH 41/89] more test, and fix truncate_to_30_bits --- src/hotspot/share/opto/noOverflowInt.hpp | 2 +- .../gtest/opto/test_no_overflow_int.cpp | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 3837455627b58..23def9f42cf8d 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -89,7 +89,7 @@ class NoOverflowInt { NoOverflowInt truncate_to_30_bits() const { if (is_NaN()) { return make_NaN(); } const jint max_value = 1 << 30; - if (value() > max_value || value() < -max_value) { return make_NaN(); } + if (value() >= max_value || value() <= -max_value) { return make_NaN(); } return *this; } diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp index de51c40da5f62..260e397be4208 100644 --- a/test/hotspot/gtest/opto/test_no_overflow_int.cpp +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -128,3 +128,57 @@ TEST_VM(opto, NoOverflowInt_lshift) { ASSERT_EQ((NoOverflowInt(-13) << NoOverflowInt(4)).value(), -13 * 16); } +TEST_VM(opto, NoOverflowInt_misc) { + const NoOverflowInt nan; + const NoOverflowInt zero(0); + const NoOverflowInt one(1); + const NoOverflowInt two(2); + const NoOverflowInt big(1 << 30); + + // operator== + ASSERT_FALSE(nan == nan); + ASSERT_FALSE(nan == zero); + ASSERT_FALSE(zero == nan); + ASSERT_TRUE(zero == zero); + ASSERT_TRUE(one == one); + ASSERT_TRUE((one + two) == (two + one)); + ASSERT_TRUE((big + two) == (two + big)); + ASSERT_FALSE((big + big) == (big + big)); + ASSERT_TRUE((big - one + big) == (big - one + big)); + + // truncate_to_30_bits + for (int i = -(1 << 30) + 1; i < (1 << 30); i += 1000) { + ASSERT_EQ(NoOverflowInt(i).truncate_to_30_bits().value(), i); + } + ASSERT_TRUE(big.truncate_to_30_bits().is_NaN()); + ASSERT_FALSE((big - one).truncate_to_30_bits().is_NaN()); + ASSERT_TRUE((zero - big).truncate_to_30_bits().is_NaN()); + ASSERT_FALSE((one - big).truncate_to_30_bits().is_NaN()); + ASSERT_TRUE(nan.truncate_to_30_bits().is_NaN()); + + // abs + for (int i = 0; i < (1 << 31); i += 1024) { + ASSERT_EQ(NoOverflowInt(i).abs().value(), i); + ASSERT_EQ(NoOverflowInt(-i).abs().value(), i); + } + ASSERT_EQ(NoOverflowInt(max_jint).abs().value(), max_jint); + ASSERT_EQ(NoOverflowInt(min_jint + 1).abs().value(), max_jint); + ASSERT_TRUE(NoOverflowInt(min_jint).abs().is_NaN()); + ASSERT_TRUE(NoOverflowInt(nan).abs().is_NaN()); + + // is_multiple_of + ASSERT_TRUE(one.is_multiple_of(one)); + ASSERT_FALSE(one.is_multiple_of(nan)); + ASSERT_FALSE(nan.is_multiple_of(one)); + ASSERT_FALSE(nan.is_multiple_of(nan)); + for (int i = 0; i < (1 << 31); i += 1023) { + ASSERT_TRUE(NoOverflowInt(i).is_multiple_of(one)); + ASSERT_TRUE(NoOverflowInt(-i).is_multiple_of(one)); + ASSERT_FALSE(NoOverflowInt(i).is_multiple_of(zero)); + ASSERT_FALSE(NoOverflowInt(-i).is_multiple_of(zero)); + } + ASSERT_TRUE(NoOverflowInt(33 * 7).is_multiple_of(NoOverflowInt(33))); + ASSERT_TRUE(NoOverflowInt(13 * 5).is_multiple_of(NoOverflowInt(5))); + ASSERT_FALSE(NoOverflowInt(7).is_multiple_of(NoOverflowInt(5))); +} + From 899da478d664a3a022d0fa627c8c8b0272fbbbd8 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 14 Aug 2024 18:04:13 +0200 Subject: [PATCH 42/89] simplify MemPointerAliasing to Always and Unknown --- src/hotspot/share/opto/mempointer.hpp | 44 +++------------------------ 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 873886ce6a719..23c2c30c70235 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -56,19 +56,11 @@ class MemPointerAliasing { enum Aliasing { Unknown, // Distance unknown. // Example: two "int[]" with different variable index offsets. - // e.g. "array[i] = array[j]". - Never, // Can never alias. - // Example: "int[]" and "float[]". - // e.g. "intArray[i] = floatArray[i]". - Always, // Constant distance = p1 - p2. + // e.g. "array[i] vs array[j]". + // e.g. "array1[i] vs array2[j]". + Always}; // Constant distance = p1 - p2. // Example: The same address expression, except for a constant offset - // e.g. "array[i] = array[i+1]". - Maybe}; // Either "Never" (i.e. different memory objects) - // or "Always" (at constant distance). - // Example: "array1[i] = array2[i]": - // If at runtime "array1 != array2": cannot alias. - // If at runtime "array1 == array2": constant distance. - // TODO consider to simplify for MergeStores...? + // e.g. "array[i] vs array[i+1]". private: const Aliasing _aliasing; const jint _distance; @@ -88,48 +80,20 @@ class MemPointerAliasing { return MemPointerAliasing(); } - static MemPointerAliasing make_never() { - return MemPointerAliasing(Never, 0); - } - static MemPointerAliasing make_always(const jint distance) { return MemPointerAliasing(Always, distance); } - static MemPointerAliasing make_maybe(const jint distance) { - return MemPointerAliasing(Maybe, distance); - } - - Aliasing aliasing() const { return _aliasing; } - bool has_distance() const { return _aliasing == Always || _aliasing == Maybe; } - jint distance() const { assert(has_distance(), "must have"); return _distance; } - // Use case: exact aliasing and adjacency. bool is_always_at_distance(const jint distance) const { return _aliasing == Always && _distance == distance; } -// TODO maybe not yet -// bool is_never_overlapping(const jint size1, const jint size2) { -// assert(1 <= size1 && size1 <= 1024, "sane size"); -// assert(1 <= size2 && size2 <= 1024, "sane size"); -// -// if (_aliasing == Unknown) { return false; } -// if (_aliasing == Never) { return true; } -// -// // distance = p2 - p1 -// const jint d = distance(); -// return size1 <= d || // <==> size1 <= p2 - p1 <==> p1 + size1 <= p2 -// size2 <= -d; // <==> size2 <= p1 - p2 <==> p2 + size2 <= p1 -// } - #ifndef PRODUCT void print_on(outputStream* st) const { switch(_aliasing) { case Unknown: st->print("Unknown"); break; - case Never: st->print("Never"); break; case Always: st->print("Always(%d)", _distance); break; - case Maybe: st->print("Maybe(%d)", _distance); break; default: ShouldNotReachHere(); } } From 49b96a62fec06344491ecd27ce1be0597c1ebc35 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 08:59:52 +0200 Subject: [PATCH 43/89] 10 vs 11 summands test --- src/hotspot/share/opto/mempointer.hpp | 9 ++- .../jtreg/compiler/c2/TestMergeStores.java | 76 +++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 23c2c30c70235..0c34c50cb77e2 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -192,7 +192,13 @@ class MemPointerSummand : public StackObj { // class MemPointerSimpleForm : public StackObj { private: - static const int SUMMANDS_SIZE = 10; // TODO good? + // We limit the number of summands to 10. Usually, a pointer contains a base pointer + // (e.g. array pointer or null for native memory) and a few variables. For example: + // + // array[j] -> array_base + j + con -> 2 summands + // nativeMemorySegment.get(j) -> null + address + offset + j + con -> 3 summands + // + static const int SUMMANDS_SIZE = 10; Node* _pointer; // pointer node associated with this (sub)pointer @@ -212,7 +218,6 @@ class MemPointerSimpleForm : public StackObj { MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); - // TODO test with more summands? for (int i = 0; i < summands.length(); i++) { _summands[i] = summands.at(i); } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index be9257a958ba9..3b21e322291a9 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -75,6 +75,17 @@ public class TestMergeStores { long vL1; long vL2; + static int zero0 = 0; + static int zero1 = 0; + static int zero2 = 0; + static int zero3 = 0; + static int zero4 = 0; + static int zero5 = 0; + static int zero6 = 0; + static int zero7 = 0; + static int zero8 = 0; + static int zero9 = 0; + interface TestFunction { Object[] run(boolean isWarmUp, int rnd); } @@ -154,6 +165,11 @@ public TestMergeStores() { testGroups.get("test7BE").put("test7RBE", (_,_) -> { return test7RBE(aB.clone(), offset1, vI1); }); testGroups.get("test7BE").put("test7aBE", (_,_) -> { return test7aBE(aB.clone(), offset1, vI1); }); + testGroups.put("test10", new HashMap()); + testGroups.get("test10").put("test10R", (_,_) -> { return test10R(aB.clone()); }); + testGroups.get("test10").put("test10a", (_,_) -> { return test10a(aB.clone()); }); + testGroups.get("test10").put("test10b", (_,_) -> { return test10b(aB.clone()); }); + testGroups.put("test100", new HashMap()); testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); }); testGroups.get("test100").put("test100a", (_,_) -> { return test100a(aS.clone(), offset1); }); @@ -278,6 +294,8 @@ public TestMergeStores() { "test5a", "test6a", "test7a", + "test10a", + "test10b", "test7aBE", "test100a", "test101a", @@ -1128,6 +1146,64 @@ static Object[] test7aBE(byte[] a, int offset1, int v1) { return new Object[]{ a }; } + @DontCompile + static Object[] test10R(byte[] a) { + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8 + zero9; + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test10a(byte[] a) { + // We have 11 summands: 10x zero variable + 1x array base. + // Parsing only allows 10 summands -> does not merge the stores. + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8 + zero9; + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10b(byte[] a) { + int zero = zero0 + zero1 + zero2 + zero3 + zero4 + + zero5 + zero6 + zero7 + zero8; + // We have 10 summands: 9x zero variable + 1x array base. + // Parsing allows 10 summands, so this should merge the stores. + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + @DontCompile static Object[] test100R(short[] a, int offset) { a[offset + 0] = (short)0x0100; From 5a299dcde51087d915e0aeacef3f6b53db5a9872 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 09:13:28 +0200 Subject: [PATCH 44/89] address some small issues --- src/hotspot/share/opto/memnode.cpp | 4 ++-- src/hotspot/share/opto/mempointer.cpp | 10 +++++++--- src/hotspot/share/opto/mempointer.hpp | 3 +-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index b982ff680fe8b..10e5e88f07687 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2879,8 +2879,8 @@ bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const St is_trace_aliasing(), is_trace_adjacency()); #endif - const MemPointer pointer_use(_phase, use_store NOT_PRODUCT( COMMA trace )); - const MemPointer pointer_def(_phase, def_store NOT_PRODUCT( COMMA trace )); + const MemPointer pointer_use(use_store NOT_PRODUCT( COMMA trace )); + const MemPointer pointer_def(def_store NOT_PRODUCT( COMMA trace )); if (!pointer_def.is_adjacent_to_and_before(pointer_use)) { return false; } diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 0fec6c105b6e5..c2cfab10bd3b9 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -264,14 +264,18 @@ bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { const MemPointerSimpleForm& s1 = simple_form(); const MemPointerSimpleForm& s2 = other.simple_form(); const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace )); + const jint size = mem()->memory_size(); + const bool is_adjacent = aliasing.is_always_at_distance(size); #ifndef PRODUCT if (_trace.is_trace_adjacency()) { - tty->print("Aliasing for adjacency: "); aliasing.print_on(tty); tty->cr(); - // TODO + tty->print("Adjacent: %s, because size = %d and aliasing = ", + is_adjacent ? "true" : "false", size); + aliasing.print_on(tty); + tty->cr(); } #endif - return aliasing.is_always_at_distance(mem()->memory_size()); + return is_adjacent; } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 0c34c50cb77e2..661a034a9ba00 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -297,8 +297,7 @@ class MemPointer : public StackObj { NOT_PRODUCT( const TraceMemPointer& _trace; ) public: - // TODO no need for phase? - MemPointer(PhaseGVN* phase, const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : + MemPointer(const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : _mem(mem), _simple_form(init_simple_form(_mem)) NOT_PRODUCT( COMMA _trace(trace) ) From c7561dd79ef76defc55488b62da00e7dee038ea3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 11:25:32 +0200 Subject: [PATCH 45/89] combine summands --- src/hotspot/share/opto/mempointer.cpp | 17 ++++++- src/hotspot/share/opto/mempointer.hpp | 4 +- .../jtreg/compiler/c2/TestMergeStores.java | 46 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index c2cfab10bd3b9..613b74e33b5bd 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -43,9 +43,24 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { parse_sub_expression(_worklist.pop()); } + // Sort summands by variable->_idx _summands.sort(MemPointerSummand::cmp_for_sort); - // TODO: sort and combine summands! + // Combine summands for the same variable, adding up the scales. + int pos_put = 0; + int pos_get = 0; + while (pos_get < _summands.length()) { + MemPointerSummand summand = _summands.at(pos_get++); + Node* variable = summand.variable(); + NoOverflowInt scale = summand.scale(); + while (pos_get < _summands.length() && _summands.at(pos_get).variable() == variable) { + MemPointerSummand s = _summands.at(pos_get++); + scale = scale + s.scale(); + // TODO test with overflow or zero + } + _summands.at_put(pos_put++, MemPointerSummand(variable, scale LP64_ONLY( COMMA NoOverflowInt(1) ))); + } + _summands.trunc_to(pos_put); return MemPointerSimpleForm::make(pointer, _summands, _con); } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 661a034a9ba00..7d4c4c989e440 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -219,7 +219,9 @@ class MemPointerSimpleForm : public StackObj { :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { - _summands[i] = summands.at(i); + MemPointerSummand s = summands.at(i); + assert(s.variable() != nullptr, "variable cannot be null"); + _summands[i] = s; } } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 3b21e322291a9..79da4a8579a55 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -169,6 +169,8 @@ public TestMergeStores() { testGroups.get("test10").put("test10R", (_,_) -> { return test10R(aB.clone()); }); testGroups.get("test10").put("test10a", (_,_) -> { return test10a(aB.clone()); }); testGroups.get("test10").put("test10b", (_,_) -> { return test10b(aB.clone()); }); + testGroups.get("test10").put("test10c", (_,_) -> { return test10c(aB.clone()); }); + testGroups.get("test10").put("test10d", (_,_) -> { return test10d(aB.clone()); }); testGroups.put("test100", new HashMap()); testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); }); @@ -296,6 +298,8 @@ public TestMergeStores() { "test7a", "test10a", "test10b", + "test10c", + "test10d", "test7aBE", "test100a", "test101a", @@ -1204,6 +1208,48 @@ static Object[] test10b(byte[] a) { return new Object[]{ a }; } + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10c(byte[] a) { + int zero = 7 * zero0 + 7 * zero1 + 7 * zero2 + 7 * zero3 + 7 * zero4 + + 7 * zero5 + 7 * zero6 + 7 * zero7 + 7 * zero8; + // The "7 * zero" is split into "zero << 3 - zero". But the parsing combines it again, lowering the summand count. + // We have 10 summands: 9x zero variable + 1x array base. + // Parsing allows 10 summands, so this should merge the stores. + a[zero + 0] = 'h'; + a[zero + 1] = 'e'; + a[zero + 2] = 'l'; + a[zero + 3] = 'l'; + a[zero + 4] = 'o'; + a[zero + 5] = ' '; + a[zero + 6] = ':'; + a[zero + 7] = ')'; + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test10d(byte[] a) { + // Summand is subtracted from itself -> scale = 0 -> should be removed from list. + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 4) - zero0, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 5) - zero0, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 6) - zero0, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 7) - zero0, (byte)')'); + return new Object[]{ a }; + } + @DontCompile static Object[] test100R(short[] a, int offset) { a[offset + 0] = (short)0x0100; From 402cc0de00f0bafaf17c44969e7ad2ef9aae371d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 13:03:15 +0200 Subject: [PATCH 46/89] handle zero and NaN scale summands --- src/hotspot/share/opto/mempointer.cpp | 19 ++++--- src/hotspot/share/opto/mempointer.hpp | 5 +- .../jtreg/compiler/c2/TestMergeStores.java | 49 ++++++++++++++++++- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 613b74e33b5bd..ebbccf2d7dcbc 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -53,12 +53,19 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { MemPointerSummand summand = _summands.at(pos_get++); Node* variable = summand.variable(); NoOverflowInt scale = summand.scale(); + // Add up scale of all summands with the same variable. while (pos_get < _summands.length() && _summands.at(pos_get).variable() == variable) { MemPointerSummand s = _summands.at(pos_get++); scale = scale + s.scale(); - // TODO test with overflow or zero } - _summands.at_put(pos_put++, MemPointerSummand(variable, scale LP64_ONLY( COMMA NoOverflowInt(1) ))); + // Bail out if scale does not fit in 30bits or is NaN (i.e. overflow). + if (scale.truncate_to_30_bits().is_NaN()) { + return MemPointerSimpleForm(pointer); + } + // Keep summands with non-zero scale. + if (!scale.is_zero()) { + _summands.at_put(pos_put++, MemPointerSummand(variable, scale LP64_ONLY( COMMA NoOverflowInt(1) ))); + } } _summands.trunc_to(pos_put); @@ -144,14 +151,6 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su NoOverflowInt new_scale = scale * factor; LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) - // Make sure abs(scale) is not larger than "1 << 30". - new_scale = new_scale.truncate_to_30_bits(); - LP64_ONLY( new_scaleL = new_scaleL.truncate_to_30_bits(); ) - - // If anything went wrong with the scale computation: bailout. - if (new_scale.is_NaN()) { break; } - LP64_ONLY( if (new_scaleL.is_NaN()) { break; } ) - _worklist.push(MemPointerSummand(in1, new_scale LP64_ONLY( COMMA new_scaleL ))); return; } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 7d4c4c989e440..ff5a530edd859 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -142,7 +142,7 @@ class MemPointerSummand : public StackObj { { assert(_variable != nullptr, "must have variable"); assert(!_scale.is_zero(), "non-zero scale"); - LP64_ONLY( assert(!_scaleL.is_zero(), "non-zero scale") ); + LP64_ONLY( assert(!_scaleL.is_zero(), "non-zero scaleL") ); } Node* variable() const { return _variable; } @@ -190,6 +190,7 @@ class MemPointerSummand : public StackObj { // // pointer = sum(summands) + con // +// TODO summands scale 30 bits class MemPointerSimpleForm : public StackObj { private: // We limit the number of summands to 10. Usually, a pointer contains a base pointer @@ -221,6 +222,8 @@ class MemPointerSimpleForm : public StackObj { for (int i = 0; i < summands.length(); i++) { MemPointerSummand s = summands.at(i); assert(s.variable() != nullptr, "variable cannot be null"); + assert(!s.scale().truncate_to_30_bits().is_NaN(), "non-NaN scale and fits in 30bits"); + LP64_ONLY( assert(!s.scaleL().truncate_to_30_bits().is_NaN(), "non-NaN scaleL and fits in 30bits"); ) _summands[i] = s; } } diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 79da4a8579a55..74c41d6003442 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -171,6 +171,8 @@ public TestMergeStores() { testGroups.get("test10").put("test10b", (_,_) -> { return test10b(aB.clone()); }); testGroups.get("test10").put("test10c", (_,_) -> { return test10c(aB.clone()); }); testGroups.get("test10").put("test10d", (_,_) -> { return test10d(aB.clone()); }); + testGroups.get("test10").put("test10e", (_,_) -> { return test10e(aB.clone()); }); + testGroups.get("test10").put("test10f", (_,_) -> { return test10f(aB.clone()); }); testGroups.put("test100", new HashMap()); testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); }); @@ -300,6 +302,8 @@ public TestMergeStores() { "test10b", "test10c", "test10d", + "test10e", + "test10f", "test7aBE", "test100a", "test101a", @@ -1232,11 +1236,12 @@ static Object[] test10c(byte[] a) { } @Test - @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged - applyIf = {"UseUnalignedAccesses", "true"}) + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to struggle folding ConvI2L / ConvL2I cases static Object[] test10d(byte[] a) { // Summand is subtracted from itself -> scale = 0 -> should be removed from list. UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); @@ -1250,6 +1255,46 @@ static Object[] test10d(byte[] a) { return new Object[]{ a }; } + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to struggle folding ConvI2L / ConvL2I cases + static Object[] test10e(byte[] a) { + // Summand is subtracted from itself -> scale = 0 -> should be removed from list. Thus equal to if not present at all. + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 5, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 6, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 7, (byte)')'); + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge + IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}) + static Object[] test10f(byte[] a) { + int big = 1 << 29; + // Adding up the scales overflows -> no merge. + long offset = zero9 * big + zero9 * big + zero9 * big + zero9 * big; + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)'h'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)'e'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)'l'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)'o'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)' '); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)':'); + UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)')'); + return new Object[]{ a }; + } + @DontCompile static Object[] test100R(short[] a, int offset) { a[offset + 0] = (short)0x0100; From afbff6024749f54e73ad8da5419bbbc723880bd4 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 13:07:56 +0200 Subject: [PATCH 47/89] rm scaleL NaN case --- src/hotspot/share/opto/mempointer.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index ebbccf2d7dcbc..f4037f9787f29 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -202,12 +202,6 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON // TODO tests with native memory, etc. - // TODO needed? - if (scaleL.is_NaN()) { - assert(false, "scaleL must not be NaN"); - return false; - } - const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); if (ary_ptr_t != nullptr) { // Array accesses that are not Unsafe always have a RangeCheck which ensures From 64ea993c733d0e91df847f4400b86649c9010c0d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 14:05:22 +0200 Subject: [PATCH 48/89] simple -> linear --- src/hotspot/share/opto/mempointer.cpp | 20 ++--- src/hotspot/share/opto/mempointer.hpp | 115 ++++++++++++++++++-------- 2 files changed, 89 insertions(+), 46 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index f4037f9787f29..be715c4fe5037 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -26,7 +26,7 @@ #include "utilities/resourceHash.hpp" // DFS all-path traversal (i.e. with node repetitions), starting at the pointer: -MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { +MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { assert(_worklist.is_empty(), "no prior parsing"); assert(_summands.is_empty(), "no prior parsing"); @@ -39,7 +39,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { int traversal_count = 0; while (_worklist.is_nonempty()) { - if (traversal_count++ > 1000) { return MemPointerSimpleForm(pointer); } + if (traversal_count++ > 1000) { return MemPointerLinearForm(pointer); } parse_sub_expression(_worklist.pop()); } @@ -60,7 +60,7 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { } // Bail out if scale does not fit in 30bits or is NaN (i.e. overflow). if (scale.truncate_to_30_bits().is_NaN()) { - return MemPointerSimpleForm(pointer); + return MemPointerLinearForm(pointer); } // Keep summands with non-zero scale. if (!scale.is_zero()) { @@ -69,10 +69,10 @@ MemPointerSimpleForm MemPointerSimpleFormParser::parse_simple_form() { } _summands.trunc_to(pos_put); - return MemPointerSimpleForm::make(pointer, _summands, _con); + return MemPointerLinearForm::make(pointer, _summands, _con); } -void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand summand) { +void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); const NoOverflowInt scale = summand.scale(); LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) @@ -172,7 +172,7 @@ void MemPointerSimpleFormParser::parse_sub_expression(const MemPointerSummand su _summands.push(summand); } -bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { +bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 // On 32-bit platforms, ... TODO return true; @@ -224,11 +224,11 @@ bool MemPointerSimpleFormParser::is_safe_from_int_overflow(const int opc LP64_ON #endif } -MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpleForm& other +MemPointerAliasing MemPointerLinearForm::get_aliasing_with(const MemPointerLinearForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { #ifndef PRODUCT if (trace.is_trace_aliasing()) { - tty->print_cr("MemPointerSimpleForm::get_aliasing_with:"); + tty->print_cr("MemPointerLinearForm::get_aliasing_with:"); print_on(tty); other.print_on(tty); } @@ -269,8 +269,8 @@ MemPointerAliasing MemPointerSimpleForm::get_aliasing_with(const MemPointerSimpl } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { - const MemPointerSimpleForm& s1 = simple_form(); - const MemPointerSimpleForm& s2 = other.simple_form(); + const MemPointerLinearForm& s1 = linear_form(); + const MemPointerLinearForm& s2 = other.linear_form(); const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace )); const jint size = mem()->memory_size(); const bool is_adjacent = aliasing.is_always_at_distance(size); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index ff5a530edd859..22fd8d6a990ff 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -28,6 +28,45 @@ #include "opto/memnode.hpp" #include "opto/noOverflowInt.hpp" +// The MemPointer is a shared facility to parse pointers and check the aliasing of pointers, +// e.g. checking if two stores are adjacent. +// +// MemPointerLinearForm: +// When the pointer is parsed, it is represented as a linear form: +// +// pointer = con + sum(summands) +// +// Where each summand_i in summands has the form: +// +// summand_i = scale_i * variable_i +// +// Hence, the full linear form is: +// +// pointer = con + sum_i(scale_i * variable_i) +// +// On 64bit systems, this linear form is computed with long-add/mul, on 32bit systems it is +// computed with int-add/mul. +// +// MemPointerAliasing: +// This linear form allows us to determine the aliasing between two pointers easily. For +// example, if two pointers are identical, except for their constant: +// +// pointer1 = con1 + sum(summands) +// pointer2 = con2 + sum(summands) +// +// then we can easily compute the distance between the pointers (distance = con2 - con1), +// and determine if they are adjacent. +// +// MemPointerLinearFormParser: +// TODO + + +// TODO +// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: +// +// abs(scale) < (1 << 30) +// + #ifndef PRODUCT class TraceMemPointer : public StackObj { private: @@ -100,30 +139,34 @@ class MemPointerAliasing { #endif }; -// Summand of a MemPointerSimpleForm. +// Summand of a MemPointerLinearForm: +// +// summand = scale * variable // // On 32-bit platforms, we trivially use 32-bit jint values for the address computation: // -// s = scaleI * variable // 32-bit variable +// summand = scaleI * variable // 32-bit variable // scale = scaleI // // On 64-bit platforms, we have a mix of 64-bit jlong and 32-bit jint values for the // address computation: // -// s = scaleL * ConvI2L(scaleI * variable) // 32-bit variable +// summand = scaleL * ConvI2L(scaleI * variable) // 32-bit variable // scale = scaleL * scaleI // -// s = scaleL * variable // 64-bit variable +// summand = scaleL * variable // 64-bit variable // scale = scaleL // -// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: -// -// abs(scale) < (1 << 30) +// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt. During +// the decomposition into the summands, we might encounter a scale that overflows the +// jint-range. Then, the scale becomes NaN, which indicates that we cannot decompose +// the pointer using this summand. // -// This allows very high scales, but allows calculations with scale to -// avoid overflows. +// Note: we only need scaleL during the decomposition of the pointer. We need to check +// if decomposing a summand further is safe (i.e. if there cannot be an overflow), +// see MemPointerLinearFormParser::is_safe_from_int_overflow. But during aliasing +// computation, we fully rely on scale, and do not need scaleL any more. // -// TODO generalization: final product only needs to use scale, not scaleL class MemPointerSummand : public StackObj { private: Node* _variable; @@ -186,12 +229,12 @@ class MemPointerSummand : public StackObj { #endif }; -// Simple form of the pointer sub-expression of "pointer". +// Linear form of the pointer sub-expression of "pointer". // -// pointer = sum(summands) + con +// pointer = con + sum(summands) // // TODO summands scale 30 bits -class MemPointerSimpleForm : public StackObj { +class MemPointerLinearForm : public StackObj { private: // We limit the number of summands to 10. Usually, a pointer contains a base pointer // (e.g. array pointer or null for native memory) and a few variables. For example: @@ -208,15 +251,15 @@ class MemPointerSimpleForm : public StackObj { public: // Empty - MemPointerSimpleForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} + MemPointerLinearForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} // Default: pointer = variable - MemPointerSimpleForm(Node* variable) : _pointer(variable), _con(NoOverflowInt(0)) { + MemPointerLinearForm(Node* variable) : _pointer(variable), _con(NoOverflowInt(0)) { const NoOverflowInt one(1); _summands[0] = MemPointerSummand(variable, one LP64_ONLY( COMMA one )); } private: - MemPointerSimpleForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) + MemPointerLinearForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { @@ -229,15 +272,15 @@ class MemPointerSimpleForm : public StackObj { } public: - static MemPointerSimpleForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { + static MemPointerLinearForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { if (summands.length() <= SUMMANDS_SIZE) { - return MemPointerSimpleForm(pointer, summands, con); + return MemPointerLinearForm(pointer, summands, con); } else { - return MemPointerSimpleForm(pointer); + return MemPointerLinearForm(pointer); } } - MemPointerAliasing get_aliasing_with(const MemPointerSimpleForm& other + MemPointerAliasing get_aliasing_with(const MemPointerLinearForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const; const MemPointerSummand summands_at(const uint i) const { @@ -250,10 +293,10 @@ class MemPointerSimpleForm : public StackObj { #ifndef PRODUCT void print_on(outputStream* st) const { if (_pointer == nullptr) { - st->print_cr("MemPointerSimpleForm empty."); + st->print_cr("MemPointerLinearForm empty."); return; } - st->print("MemPointerSimpleForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); + st->print("MemPointerLinearForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); _con.print_on(st); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; @@ -267,7 +310,7 @@ class MemPointerSimpleForm : public StackObj { #endif }; -class MemPointerSimpleFormParser : public StackObj { +class MemPointerLinearFormParser : public StackObj { private: const MemNode* _mem; @@ -276,18 +319,18 @@ class MemPointerSimpleFormParser : public StackObj { GrowableArray _summands; NoOverflowInt _con; - // Resulting simple-form. - MemPointerSimpleForm _simple_form; + // Resulting linear-form. + MemPointerLinearForm _linear_form; public: - MemPointerSimpleFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { - _simple_form = parse_simple_form(); + MemPointerLinearFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { + _linear_form = parse_linear_form(); } - const MemPointerSimpleForm simple_form() const { return _simple_form; } + const MemPointerLinearForm linear_form() const { return _linear_form; } private: - MemPointerSimpleForm parse_simple_form(); + MemPointerLinearForm parse_linear_form(); void parse_sub_expression(const MemPointerSummand summand); bool is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; @@ -297,14 +340,14 @@ class MemPointerSimpleFormParser : public StackObj { class MemPointer : public StackObj { private: const MemNode* _mem; - const MemPointerSimpleForm _simple_form; + const MemPointerLinearForm _linear_form; NOT_PRODUCT( const TraceMemPointer& _trace; ) public: MemPointer(const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : _mem(mem), - _simple_form(init_simple_form(_mem)) + _linear_form(init_linear_form(_mem)) NOT_PRODUCT( COMMA _trace(trace) ) { #ifndef PRODUCT @@ -312,21 +355,21 @@ class MemPointer : public StackObj { tty->print_cr("MemPointer::MemPointer:"); tty->print("mem: "); mem->dump(); _mem->in(MemNode::Address)->dump_bfs(5, 0, "d"); - _simple_form.print_on(tty); + _linear_form.print_on(tty); } #endif } const MemNode* mem() const { return _mem; } - const MemPointerSimpleForm simple_form() const { return _simple_form; } + const MemPointerLinearForm linear_form() const { return _linear_form; } bool is_adjacent_to_and_before(const MemPointer& other) const; private: - static const MemPointerSimpleForm init_simple_form(const MemNode* mem) { + static const MemPointerLinearForm init_linear_form(const MemNode* mem) { assert(mem->is_Store(), "only stores are supported"); ResourceMark rm; - MemPointerSimpleFormParser parser(mem); - return parser.simple_form(); + MemPointerLinearFormParser parser(mem); + return parser.linear_form(); } }; From f7419abdb3c13fcdb160610a992f08a65e766123 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 14:58:59 +0200 Subject: [PATCH 49/89] beautify parsing --- src/hotspot/share/opto/mempointer.cpp | 26 +++++++++++++---------- src/hotspot/share/opto/mempointer.hpp | 30 ++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index be715c4fe5037..aee116a927bb7 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -25,18 +25,20 @@ #include "opto/mempointer.hpp" #include "utilities/resourceHash.hpp" -// DFS all-path traversal (i.e. with node repetitions), starting at the pointer: +// Recursively parse the pointer expression with a DFS all-path traversal +// (i.e. with node repetitions), starting at the pointer. MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { assert(_worklist.is_empty(), "no prior parsing"); assert(_summands.is_empty(), "no prior parsing"); Node* pointer = _mem->in(MemNode::Address); - // pointer->dump_bfs(4,0,"#"); - + // Start with the trivial summand. const NoOverflowInt one(1); _worklist.push(MemPointerSummand(pointer, one LP64_ONLY( COMMA one ))); + // Decompose the summands until only terminal summands remain. This effectively + // parses the pointer expression recursively. int traversal_count = 0; while (_worklist.is_nonempty()) { if (traversal_count++ > 1000) { return MemPointerLinearForm(pointer); } @@ -72,6 +74,9 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { return MemPointerLinearForm::make(pointer, _summands, _con); } +// Parse a sub-expression of the pointer, starting at the current summand. We parse the +// current node, and see if it can be decomposed into further summands, or if the current +// summand is terminal. void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); const NoOverflowInt scale = summand.scale(); @@ -84,6 +89,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_ConI: case Op_ConL: { + // Terminal: add to constant. NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int()) : NoOverflowInt(n->get_long()); _con = _con + scale * con; @@ -93,6 +99,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_AddL: case Op_AddI: { + // Decompose addition. Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); @@ -102,6 +109,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_SubL: case Op_SubI: { + // Decompose subtraction. Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); @@ -109,10 +117,6 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) : scaleL; ) - // If anything went wrong with the scale computation: bailout. - if (sub_scale.is_NaN()) { break; } - LP64_ONLY( if (sub_scaleL.is_NaN()) { break; } ) - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); _worklist.push(MemPointerSummand(b, sub_scale LP64_ONLY( COMMA sub_scaleL ))); return; @@ -122,7 +126,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_LShiftL: case Op_LShiftI: { - // Form must be linear: only multiplication with constants is allowed. + // Form must be linear: only multiplication with constants can be decomposed. Node* in1 = n->in(1); Node* in2 = n->in(2); if (!in2->is_Con()) { break; } @@ -159,6 +163,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_CastX2P: case Op_ConvI2L: { + // Decompose: look through. Node* a = n->in(1); _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); return; @@ -166,9 +171,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su } } - // Default: could not parse the "summand" further, take it as one of the - // "terminal" summands. - // TODO wording of "terminal summands"? + // Default: we could not parse the "summand" further, i.e. it is terminal. _summands.push(summand); } @@ -196,6 +199,7 @@ bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ON case Op_CastII: case Op_CastLL: case Op_CastX2P: + // TODO CastPP ? case Op_ConvI2L: return true; } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 22fd8d6a990ff..70781b28ce36b 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -48,7 +48,7 @@ // computed with int-add/mul. // // MemPointerAliasing: -// This linear form allows us to determine the aliasing between two pointers easily. For +// The linear form allows us to determine the aliasing between two pointers easily. For // example, if two pointers are identical, except for their constant: // // pointer1 = con1 + sum(summands) @@ -58,8 +58,32 @@ // and determine if they are adjacent. // // MemPointerLinearFormParser: -// TODO - +// Any pointer can be parsed into this (default / trivial) linear form: +// +// pointer = 0 + 1 * pointer +// con scale +// +// However, this is not particularly useful to compute aliasing. We would like to decompose +// the pointer as far as possible, i.e. extract as many summands and add up the constants to +// a single constant. +// +// Example (normal int-array access): +// pointer1 = array[i + 0] = array_base + array_int_base_offset + 4L * ConvI2L(i + 0) +// pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1) +// +// At first, computing aliasing is difficult because the distance is hidden inside the +// ConvI2L. we can convert this (with array_int_base_offset = 16) into these linear forms: +// +// pointer1 = 16L + 1L * array_base + 4L * i +// pointer2 = 20L + 1L * array_base + 4L * i +// +// This allows us to easily see that these two pointers are adjacent (distance = 4). +// +// Hence, in MemPointerLinearFormParser::parse_linear_form, we start with the pointer as +// a trivial summand. A summand can either be decomposed further or it is terminal (cannot +// be decomposed further). We decompose the summands recursively until all remaining summands +// are terminal, see MemPointerLinearFormParser::parse_sub_expression. This effectively parses +// the pointer expression recursively. // TODO // For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: From 9deaa483bbdb5049800a533345cb52382969bbb0 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 15:27:59 +0200 Subject: [PATCH 50/89] parse through ConvI2L on 32bit --- src/hotspot/share/opto/mempointer.cpp | 6 ++++++ src/hotspot/share/opto/mempointer.hpp | 2 ++ .../jtreg/compiler/c2/TestMergeStores.java | 16 ++++++++++------ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index aee116a927bb7..d51343decb3a6 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -162,6 +162,12 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_CastLL: case Op_CastX2P: case Op_ConvI2L: + // On 32bit systems we can also look through ConvI2L, since the final result will always + // be truncated back with ConvL2I. On 64bit systems this is not linear: + // + // ConvI2L(ConvL2I(max_jint + 1)) = ConvI2L(min_jint) = min_jint + // + NOT_LP64( case Op_ConvL2I: ) { // Decompose: look through. Node* a = n->in(1); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 70781b28ce36b..ab1e220bb23ef 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -85,6 +85,8 @@ // are terminal, see MemPointerLinearFormParser::parse_sub_expression. This effectively parses // the pointer expression recursively. +// TODO why not everything linear? + // TODO // For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: // diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 74c41d6003442..c8e8bd337ad4a 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -1240,8 +1240,7 @@ static Object[] test10c(byte[] a) { IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged - applyIf = {"UseUnalignedAccesses", "true"}, - applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to struggle folding ConvI2L / ConvL2I cases + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test10d(byte[] a) { // Summand is subtracted from itself -> scale = 0 -> should be removed from list. UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); @@ -1260,8 +1259,7 @@ static Object[] test10d(byte[] a) { IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged - applyIf = {"UseUnalignedAccesses", "true"}, - applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to struggle folding ConvI2L / ConvL2I cases + applyIf = {"UseUnalignedAccesses", "true"}) static Object[] test10e(byte[] a) { // Summand is subtracted from itself -> scale = 0 -> should be removed from list. Thus equal to if not present at all. UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h'); @@ -2080,7 +2078,13 @@ static Object[] test601R(byte[] aB, int[] aI, int i, int offset1) { IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"}, - applyIfPlatform = {"64-bit", "true"}) // 32-bit seems to fold some cases, but not others. + applyIfPlatform = {"64-bit", "true"}) + @IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0", + IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged + applyIf = {"UseUnalignedAccesses", "true"}, + applyIfPlatform = {"32-bit", "true"}) static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) { Object a = null; long base = 0; @@ -2092,7 +2096,7 @@ static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) { base = UNSAFE.ARRAY_INT_BASE_OFFSET; } // Array type is unknown, i.e. bottom[]. Hence we do not know the element size of the array. - // Thus, merging is not always safe, there could be overflows. + // Thus, on 64-bits systems merging is not safe, there could be overflows. UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe); UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba); UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad); From d810b5e9aa3ee91a8eb8082b7c9d1e22bb964aef Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 17:51:11 +0200 Subject: [PATCH 51/89] address some of Christians feedback --- src/hotspot/share/opto/mempointer.cpp | 15 ++++++++------- src/hotspot/share/opto/mempointer.hpp | 4 ++-- src/hotspot/share/opto/noOverflowInt.hpp | 8 ++++---- test/hotspot/gtest/opto/test_no_overflow_int.cpp | 14 +++++++------- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index d51343decb3a6..4f50bd7cd1053 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -61,7 +61,7 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { scale = scale + s.scale(); } // Bail out if scale does not fit in 30bits or is NaN (i.e. overflow). - if (scale.truncate_to_30_bits().is_NaN()) { + if (!scale.is_abs_less_than_2_to_30()) { return MemPointerLinearForm(pointer); } // Keep summands with non-zero scale. @@ -110,8 +110,8 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_SubI: { // Decompose subtraction. - Node* a = n->in((opc == Op_AddP) ? 2 : 1); - Node* b = n->in((opc == Op_AddP) ? 3 : 2); + Node* a = n->in(1); + Node* b = n->in(2); NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) @@ -259,12 +259,13 @@ MemPointerAliasing MemPointerLinearForm::get_aliasing_with(const MemPointerLinea } // Compute distance: - NoOverflowInt distance = other.con() - con(); - distance = distance.truncate_to_30_bits(); - if (distance.is_NaN()) { + const NoOverflowInt distance = other.con() - con(); + if (distance.is_NaN() || !distance.is_abs_less_than_2_to_30()) { #ifndef PRODUCT if (trace.is_trace_aliasing()) { - tty->print_cr(" -> Aliasing unknown, distance is NaN."); + tty->print(" -> Aliasing unknown, bad distance: "); + distance.print_on(tty); + tty->cr(); } #endif return MemPointerAliasing::make_unknown(); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index ab1e220bb23ef..7addd4f34c758 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -291,8 +291,8 @@ class MemPointerLinearForm : public StackObj { for (int i = 0; i < summands.length(); i++) { MemPointerSummand s = summands.at(i); assert(s.variable() != nullptr, "variable cannot be null"); - assert(!s.scale().truncate_to_30_bits().is_NaN(), "non-NaN scale and fits in 30bits"); - LP64_ONLY( assert(!s.scaleL().truncate_to_30_bits().is_NaN(), "non-NaN scaleL and fits in 30bits"); ) + assert(s.scale().is_abs_less_than_2_to_30(), "non-NaN scale and fits in 30bits"); + LP64_ONLY( assert(s.scaleL().is_abs_less_than_2_to_30(), "non-NaN scaleL and fits in 30bits"); ) _summands[i] = s; } } diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 23def9f42cf8d..82291ad31a9d1 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -86,11 +86,11 @@ class NoOverflowInt { return a.value() == b.value(); } - NoOverflowInt truncate_to_30_bits() const { - if (is_NaN()) { return make_NaN(); } + bool is_abs_less_than_2_to_30() const { + const NoOverflowInt i = abs(); + if (i.is_NaN()) { return false; } const jint max_value = 1 << 30; - if (value() >= max_value || value() <= -max_value) { return make_NaN(); } - return *this; + return i.value() < max_value; } NoOverflowInt abs() const { diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp index 260e397be4208..4bc6bc45934bb 100644 --- a/test/hotspot/gtest/opto/test_no_overflow_int.cpp +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -146,15 +146,15 @@ TEST_VM(opto, NoOverflowInt_misc) { ASSERT_FALSE((big + big) == (big + big)); ASSERT_TRUE((big - one + big) == (big - one + big)); - // truncate_to_30_bits + // is_abs_less_than_2_to_30 for (int i = -(1 << 30) + 1; i < (1 << 30); i += 1000) { - ASSERT_EQ(NoOverflowInt(i).truncate_to_30_bits().value(), i); + ASSERT_TRUE(NoOverflowInt(i).is_abs_less_than_2_to_30()); } - ASSERT_TRUE(big.truncate_to_30_bits().is_NaN()); - ASSERT_FALSE((big - one).truncate_to_30_bits().is_NaN()); - ASSERT_TRUE((zero - big).truncate_to_30_bits().is_NaN()); - ASSERT_FALSE((one - big).truncate_to_30_bits().is_NaN()); - ASSERT_TRUE(nan.truncate_to_30_bits().is_NaN()); + ASSERT_FALSE(big.is_abs_less_than_2_to_30()); + ASSERT_TRUE((big - one).is_abs_less_than_2_to_30()); + ASSERT_FALSE((zero - big).is_abs_less_than_2_to_30()); + ASSERT_TRUE((one - big).is_abs_less_than_2_to_30()); + ASSERT_FALSE(nan.is_abs_less_than_2_to_30()); // abs for (int i = 0; i < (1 << 31); i += 1024) { From 4d1200b83ea997ef2fb650ad09a6fdbeea1abcb4 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 18:15:14 +0200 Subject: [PATCH 52/89] make more elegant, add comments --- src/hotspot/share/opto/mempointer.cpp | 8 ++++++-- src/hotspot/share/opto/mempointer.hpp | 24 ++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 4f50bd7cd1053..eb4b87918bb6e 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -45,6 +45,9 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { parse_sub_expression(_worklist.pop()); } + // Check for constant overflow. + if (_con.is_NaN()) { return MemPointerLinearForm(pointer); } + // Sort summands by variable->_idx _summands.sort(MemPointerSummand::cmp_for_sort); @@ -60,8 +63,8 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { MemPointerSummand s = _summands.at(pos_get++); scale = scale + s.scale(); } - // Bail out if scale does not fit in 30bits or is NaN (i.e. overflow). - if (!scale.is_abs_less_than_2_to_30()) { + // Bail out if scale is NaN. + if (scale.is_NaN()) { return MemPointerLinearForm(pointer); } // Keep summands with non-zero scale. @@ -260,6 +263,7 @@ MemPointerAliasing MemPointerLinearForm::get_aliasing_with(const MemPointerLinea // Compute distance: const NoOverflowInt distance = other.con() - con(); + // TODO why 2_to_30 ? if (distance.is_NaN() || !distance.is_abs_less_than_2_to_30()) { #ifndef PRODUCT if (trace.is_trace_aliasing()) { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 7addd4f34c758..b9922619ca794 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -84,14 +84,17 @@ // be decomposed further). We decompose the summands recursively until all remaining summands // are terminal, see MemPointerLinearFormParser::parse_sub_expression. This effectively parses // the pointer expression recursively. - -// TODO why not everything linear? - -// TODO -// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt, where: // -// abs(scale) < (1 << 30) +// We have to be careful on 64bit systems with ConvI2L: decomposing its input is not +// correct in general, overflows may not be preserved in the linear form: +// +// AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) +// SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) +// MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) +// LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) // +// However, there are some cases where we can prove that the decomposition is safe, +// see MemPointerLinearFormParser::is_safe_from_int_overflow. #ifndef PRODUCT class TraceMemPointer : public StackObj { @@ -259,7 +262,6 @@ class MemPointerSummand : public StackObj { // // pointer = con + sum(summands) // -// TODO summands scale 30 bits class MemPointerLinearForm : public StackObj { private: // We limit the number of summands to 10. Usually, a pointer contains a base pointer @@ -287,12 +289,13 @@ class MemPointerLinearForm : public StackObj { private: MemPointerLinearForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { + assert(!_con.is_NaN(), "non-NaN constant"); assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { MemPointerSummand s = summands.at(i); assert(s.variable() != nullptr, "variable cannot be null"); - assert(s.scale().is_abs_less_than_2_to_30(), "non-NaN scale and fits in 30bits"); - LP64_ONLY( assert(s.scaleL().is_abs_less_than_2_to_30(), "non-NaN scaleL and fits in 30bits"); ) + assert(!s.scale().is_NaN(), "non-NaN scale"); + LP64_ONLY( assert(!s.scaleL().is_NaN(), "non-NaN scaleL"); ) _summands[i] = s; } } @@ -362,7 +365,8 @@ class MemPointerLinearFormParser : public StackObj { bool is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; }; -// TODO +// Facility to parse the pointer of a Load or Store, so that aliasing between two such +// memory operations can be determined (e.g. adjacency). class MemPointer : public StackObj { private: const MemNode* _mem; From 048cba928460bf295de6535a777f04c08816e661 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 18:19:37 +0200 Subject: [PATCH 53/89] more comments --- src/hotspot/share/opto/mempointer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index eb4b87918bb6e..2cde5589c5276 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -186,11 +186,12 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 - // On 32-bit platforms, ... TODO + // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always + // be truncated. Thus, it does not matter if we have int or long overflows. return true; #else - // Not trivially safe: + // But on 64-bit platforms, these operations are not trivially safe: // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) // MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) From efcbb706a9fc8ff86627727a6ae4e59c1c69f506 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 15 Aug 2024 18:25:29 +0200 Subject: [PATCH 54/89] TestMergeStoresMemorySegment.java from compiler/loopopts/superword/TestMemorySegment.java --- .../c2/TestMergeStoresMemorySegment.java | 810 ++++++++++++++++++ 1 file changed, 810 insertions(+) create mode 100644 test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java new file mode 100644 index 0000000000000..df854bb945cfc --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java @@ -0,0 +1,810 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2; + +import compiler.lib.ir_framework.*; +import jdk.test.lib.Utils; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.HashMap; +import java.util.Random; +import java.lang.foreign.*; + +/* + * @test id=byte-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteArray + */ + +/* + * @test id=char-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment CharArray + */ + +/* + * @test id=short-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ShortArray + */ + +/* + * @test id=int-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment IntArray + */ + +/* + * @test id=long-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment LongArray + */ + +/* + * @test id=float-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment FloatArray + */ + +/* + * @test id=double-array + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment DoubleArray + */ + +/* + * @test id=byte-buffer + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBuffer + */ + +/* + * @test id=byte-buffer-direct + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBufferDirect + */ + +/* + * @test id=native + * @bug 8335392 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.c2.TestMergeStoresMemorySegment Native + */ + +// FAILS: mixed providers currently do not vectorize. Maybe there is some inlining issue. +// /* +// * @test id=mixed-array +// * @bug 8335392 +// * @summary Test vectorization of loops over MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedArray +// */ +// +// /* +// * @test id=MixedBuffer +// * @bug 8335392 +// * @summary Test vectorization of loops over MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedBuffer +// */ +// +// /* +// * @test id=mixed +// * @bug 8335392 +// * @summary Test vectorization of loops over MemorySegment +// * @library /test/lib / +// * @run driver compiler.c2.TestMergeStoresMemorySegment Mixed +// */ + +public class TestMergeStoresMemorySegment { + public static void main(String[] args) { + TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class); + framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0]); + framework.setDefaultWarmup(100); + framework.start(); + } +} + +class TestMergeStoresMemorySegmentImpl { + static final int BACKING_SIZE = 1024 * 8; + static final Random RANDOM = Utils.getRandomInstance(); + + + interface TestFunction { + Object[] run(); + } + + interface MemorySegmentProvider { + MemorySegment newMemorySegment(); + } + + static MemorySegmentProvider provider; + + static { + String providerName = System.getProperty("memorySegmentProviderNameForTestVM"); + provider = switch (providerName) { + case "ByteArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteArray; + case "CharArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfCharArray; + case "ShortArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfShortArray; + case "IntArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfIntArray; + case "LongArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfLongArray; + case "FloatArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfFloatArray; + case "DoubleArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfDoubleArray; + case "ByteBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBuffer; + case "ByteBufferDirect" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBufferDirect; + case "Native" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfNative; + case "MixedArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedArray; + case "MixedBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedBuffer; + case "Mixed" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixed; + default -> throw new RuntimeException("Test argument not recognized: " + providerName); + }; + } + + // List of tests + Map tests = new HashMap<>(); + + // List of gold, the results from the first run before compilation + Map golds = new HashMap<>(); + + public TestMergeStoresMemorySegmentImpl () { + // Generate two MemorySegments as inputs + MemorySegment a = newMemorySegment(); + MemorySegment b = newMemorySegment(); + fillRandom(a); + fillRandom(b); + + // Add all tests to list + tests.put("testMemorySegmentBadExitCheck", () -> testMemorySegmentBadExitCheck(copy(a))); + tests.put("testIntLoop_iv_byte", () -> testIntLoop_iv_byte(copy(a))); + tests.put("testIntLoop_longIndex_intInvar_sameAdr_byte", () -> testIntLoop_longIndex_intInvar_sameAdr_byte(copy(a), 0)); + tests.put("testIntLoop_longIndex_longInvar_sameAdr_byte", () -> testIntLoop_longIndex_longInvar_sameAdr_byte(copy(a), 0)); + tests.put("testIntLoop_longIndex_intInvar_byte", () -> testIntLoop_longIndex_intInvar_byte(copy(a), 0)); + tests.put("testIntLoop_longIndex_longInvar_byte", () -> testIntLoop_longIndex_longInvar_byte(copy(a), 0)); + tests.put("testIntLoop_intIndex_intInvar_byte", () -> testIntLoop_intIndex_intInvar_byte(copy(a), 0)); + tests.put("testIntLoop_iv_int", () -> testIntLoop_iv_int(copy(a))); + tests.put("testIntLoop_longIndex_intInvar_sameAdr_int", () -> testIntLoop_longIndex_intInvar_sameAdr_int(copy(a), 0)); + tests.put("testIntLoop_longIndex_longInvar_sameAdr_int", () -> testIntLoop_longIndex_longInvar_sameAdr_int(copy(a), 0)); + tests.put("testIntLoop_longIndex_intInvar_int", () -> testIntLoop_longIndex_intInvar_int(copy(a), 0)); + tests.put("testIntLoop_longIndex_longInvar_int", () -> testIntLoop_longIndex_longInvar_int(copy(a), 0)); + tests.put("testIntLoop_intIndex_intInvar_int", () -> testIntLoop_intIndex_intInvar_int(copy(a), 0)); + tests.put("testLongLoop_iv_byte", () -> testLongLoop_iv_byte(copy(a))); + tests.put("testLongLoop_longIndex_intInvar_sameAdr_byte", () -> testLongLoop_longIndex_intInvar_sameAdr_byte(copy(a), 0)); + tests.put("testLongLoop_longIndex_longInvar_sameAdr_byte", () -> testLongLoop_longIndex_longInvar_sameAdr_byte(copy(a), 0)); + tests.put("testLongLoop_longIndex_intInvar_byte", () -> testLongLoop_longIndex_intInvar_byte(copy(a), 0)); + tests.put("testLongLoop_longIndex_longInvar_byte", () -> testLongLoop_longIndex_longInvar_byte(copy(a), 0)); + tests.put("testLongLoop_intIndex_intInvar_byte", () -> testLongLoop_intIndex_intInvar_byte(copy(a), 0)); + tests.put("testLongLoop_iv_int", () -> testLongLoop_iv_int(copy(a))); + tests.put("testLongLoop_longIndex_intInvar_sameAdr_int", () -> testLongLoop_longIndex_intInvar_sameAdr_int(copy(a), 0)); + tests.put("testLongLoop_longIndex_longInvar_sameAdr_int", () -> testLongLoop_longIndex_longInvar_sameAdr_int(copy(a), 0)); + tests.put("testLongLoop_longIndex_intInvar_int", () -> testLongLoop_longIndex_intInvar_int(copy(a), 0)); + tests.put("testLongLoop_longIndex_longInvar_int", () -> testLongLoop_longIndex_longInvar_int(copy(a), 0)); + tests.put("testLongLoop_intIndex_intInvar_int", () -> testLongLoop_intIndex_intInvar_int(copy(a), 0)); + + // Compute gold value for all test methods before compilation + for (Map.Entry entry : tests.entrySet()) { + String name = entry.getKey(); + TestFunction test = entry.getValue(); + Object[] gold = test.run(); + golds.put(name, gold); + } + } + + MemorySegment newMemorySegment() { + return provider.newMemorySegment(); + } + + MemorySegment copy(MemorySegment src) { + MemorySegment dst = newMemorySegment(); + MemorySegment.copy(src, 0, dst, 0, src.byteSize()); + return dst; + } + + static MemorySegment newMemorySegmentOfByteArray() { + return MemorySegment.ofArray(new byte[BACKING_SIZE]); + } + + static MemorySegment newMemorySegmentOfCharArray() { + return MemorySegment.ofArray(new char[BACKING_SIZE / 2]); + } + + static MemorySegment newMemorySegmentOfShortArray() { + return MemorySegment.ofArray(new short[BACKING_SIZE / 2]); + } + + static MemorySegment newMemorySegmentOfIntArray() { + return MemorySegment.ofArray(new int[BACKING_SIZE / 4]); + } + + static MemorySegment newMemorySegmentOfLongArray() { + return MemorySegment.ofArray(new long[BACKING_SIZE / 8]); + } + + static MemorySegment newMemorySegmentOfFloatArray() { + return MemorySegment.ofArray(new float[BACKING_SIZE / 4]); + } + + static MemorySegment newMemorySegmentOfDoubleArray() { + return MemorySegment.ofArray(new double[BACKING_SIZE / 8]); + } + + static MemorySegment newMemorySegmentOfByteBuffer() { + return MemorySegment.ofBuffer(ByteBuffer.allocate(BACKING_SIZE)); + } + + static MemorySegment newMemorySegmentOfByteBufferDirect() { + return MemorySegment.ofBuffer(ByteBuffer.allocateDirect(BACKING_SIZE)); + } + + static MemorySegment newMemorySegmentOfNative() { + // Auto arena: GC decides when there is no reference to the MemorySegment, + // and then it deallocates the backing memory. + return Arena.ofAuto().allocate(BACKING_SIZE, 1); + } + + static MemorySegment newMemorySegmentOfMixedArray() { + switch(RANDOM.nextInt(7)) { + case 0 -> { return newMemorySegmentOfByteArray(); } + case 1 -> { return newMemorySegmentOfCharArray(); } + case 2 -> { return newMemorySegmentOfShortArray(); } + case 3 -> { return newMemorySegmentOfIntArray(); } + case 4 -> { return newMemorySegmentOfLongArray(); } + case 5 -> { return newMemorySegmentOfFloatArray(); } + default -> { return newMemorySegmentOfDoubleArray(); } + } + } + + static MemorySegment newMemorySegmentOfMixedBuffer() { + switch (RANDOM.nextInt(2)) { + case 0 -> { return newMemorySegmentOfByteBuffer(); } + default -> { return newMemorySegmentOfByteBufferDirect(); } + } + } + + static MemorySegment newMemorySegmentOfMixed() { + switch (RANDOM.nextInt(3)) { + case 0 -> { return newMemorySegmentOfMixedArray(); } + case 1 -> { return newMemorySegmentOfMixedBuffer(); } + default -> { return newMemorySegmentOfNative(); } + } + } + + static void fillRandom(MemorySegment data) { + for (int i = 0; i < (int)data.byteSize(); i += 8) { + data.set(ValueLayout.JAVA_LONG_UNALIGNED, i, RANDOM.nextLong()); + } + } + + + static void verify(String name, Object[] gold, Object[] result) { + if (gold.length != result.length) { + throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + + gold.length + ", result.length = " + result.length); + } + for (int i = 0; i < gold.length; i++) { + Object g = gold[i]; + Object r = result[i]; + if (g == r) { + throw new RuntimeException("verify " + name + ": should be two separate objects (with identical content):" + + " gold[" + i + "] == result[" + i + "]"); + } + + if (!(g instanceof MemorySegment && r instanceof MemorySegment)) { + throw new RuntimeException("verify " + name + ": only MemorySegment supported, i=" + i); + } + + MemorySegment mg = (MemorySegment)g; + MemorySegment mr = (MemorySegment)r; + + if (mg.byteSize() != mr.byteSize()) { + throw new RuntimeException("verify " + name + ": MemorySegment must have same byteSize:" + + " gold[" + i + "].byteSize = " + mg.byteSize() + + " result[" + i + "].byteSize = " + mr.byteSize()); + } + + for (int j = 0; j < (int)mg.byteSize(); j++) { + byte vg = mg.get(ValueLayout.JAVA_BYTE, j); + byte vr = mr.get(ValueLayout.JAVA_BYTE, j); + if (vg != vr) { + throw new RuntimeException("verify " + name + ": MemorySegment must have same content:" + + " gold[" + i + "][" + j + "] = " + vg + + " result[" + i + "][" + j + "] = " + vr); + } + } + } + } + + @Run(test = {"testMemorySegmentBadExitCheck", + "testIntLoop_iv_byte", + "testIntLoop_longIndex_intInvar_sameAdr_byte", + "testIntLoop_longIndex_longInvar_sameAdr_byte", + "testIntLoop_longIndex_intInvar_byte", + "testIntLoop_longIndex_longInvar_byte", + "testIntLoop_intIndex_intInvar_byte", + "testIntLoop_iv_int", + "testIntLoop_longIndex_intInvar_sameAdr_int", + "testIntLoop_longIndex_longInvar_sameAdr_int", + "testIntLoop_longIndex_intInvar_int", + "testIntLoop_longIndex_longInvar_int", + "testIntLoop_intIndex_intInvar_int", + "testLongLoop_iv_byte", + "testLongLoop_longIndex_intInvar_sameAdr_byte", + "testLongLoop_longIndex_longInvar_sameAdr_byte", + "testLongLoop_longIndex_intInvar_byte", + "testLongLoop_longIndex_longInvar_byte", + "testLongLoop_intIndex_intInvar_byte", + "testLongLoop_iv_int", + "testLongLoop_longIndex_intInvar_sameAdr_int", + "testLongLoop_longIndex_longInvar_sameAdr_int", + "testLongLoop_longIndex_intInvar_int", + "testLongLoop_longIndex_longInvar_int", + "testLongLoop_intIndex_intInvar_int"}) + void runTests() { + for (Map.Entry entry : tests.entrySet()) { + String name = entry.getKey(); + TestFunction test = entry.getValue(); + // Recall gold value from before compilation + Object[] gold = golds.get(name); + // Compute new result + Object[] result = test.run(); + // Compare gold and new result + verify(name, gold, result); + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS + // Exit check: iv < long_limit -> (long)iv < long_limit + // Thus, we have an int-iv, but a long-exit-check. + // Is not properly recognized by either CountedLoop or LongCountedLoop + static Object[] testMemorySegmentBadExitCheck(MemorySegment a) { + for (int i = 0; i < a.byteSize(); i++) { + long adr = i; + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_iv_byte(MemorySegment a) { + for (int i = 0; i < (int)a.byteSize(); i++) { + long adr = i; + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_longIndex_intInvar_sameAdr_byte(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize(); i++) { + long adr = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_longIndex_longInvar_sameAdr_byte(MemorySegment a, long invar) { + for (int i = 0; i < (int)a.byteSize(); i++) { + long adr = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testIntLoop_longIndex_intInvar_byte(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize(); i++) { + long adr1 = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr1); + long adr2 = (long)(i) + (long)(invar); + a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testIntLoop_longIndex_longInvar_byte(MemorySegment a, long invar) { + for (int i = 0; i < (int)a.byteSize(); i++) { + long adr1 = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr1); + long adr2 = (long)(i) + (long)(invar); + a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: RangeCheck cannot be eliminated because of int_index + static Object[] testIntLoop_intIndex_intInvar_byte(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize(); i++) { + int int_index = i + invar; + byte v = a.get(ValueLayout.JAVA_BYTE, int_index); + a.set(ValueLayout.JAVA_BYTE, int_index, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_iv_int(MemorySegment a) { + for (int i = 0; i < (int)a.byteSize()/4; i++ ) { + long adr = 4L * i; + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_longIndex_intInvar_sameAdr_int(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize()/4; i++) { + long adr = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testIntLoop_longIndex_longInvar_sameAdr_int(MemorySegment a, long invar) { + for (int i = 0; i < (int)a.byteSize()/4; i++) { + long adr = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testIntLoop_longIndex_intInvar_int(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize()/4; i++) { + long adr1 = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); + long adr2 = 4L * (long)(i) + 4L * (long)(invar); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testIntLoop_longIndex_longInvar_int(MemorySegment a, long invar) { + for (int i = 0; i < (int)a.byteSize()/4; i++) { + long adr1 = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); + long adr2 = 4L * (long)(i) + 4L * (long)(invar); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: RangeCheck cannot be eliminated because of int_index + static Object[] testIntLoop_intIndex_intInvar_int(MemorySegment a, int invar) { + for (int i = 0; i < (int)a.byteSize()/4; i++) { + int int_index = i + invar; + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index); + a.set(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testLongLoop_iv_byte(MemorySegment a) { + for (long i = 0; i < a.byteSize(); i++) { + long adr = i; + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testLongLoop_longIndex_intInvar_sameAdr_byte(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize(); i++) { + long adr = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.ADD_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testLongLoop_longIndex_longInvar_sameAdr_byte(MemorySegment a, long invar) { + for (long i = 0; i < a.byteSize(); i++) { + long adr = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr); + a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testLongLoop_longIndex_intInvar_byte(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize(); i++) { + long adr1 = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr1); + long adr2 = (long)(i) + (long)(invar); + a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testLongLoop_longIndex_longInvar_byte(MemorySegment a, long invar) { + for (long i = 0; i < a.byteSize(); i++) { + long adr1 = (long)(i) + (long)(invar); + byte v = a.get(ValueLayout.JAVA_BYTE, adr1); + long adr2 = (long)(i) + (long)(invar); + a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", + IRNode.ADD_VB, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: RangeCheck cannot be eliminated because of int_index + static Object[] testLongLoop_intIndex_intInvar_byte(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize(); i++) { + int int_index = (int)(i + invar); + byte v = a.get(ValueLayout.JAVA_BYTE, int_index); + a.set(ValueLayout.JAVA_BYTE, int_index, (byte)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static Object[] testLongLoop_iv_int(MemorySegment a) { + for (long i = 0; i < a.byteSize()/4; i++ ) { + long adr = 4L * i; + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + //@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + // IRNode.ADD_VI, "> 0", + // IRNode.STORE_VECTOR, "> 0"}, + // applyIfPlatform = {"64-bit", "true"}, + // applyIf = {"AlignVector", "false"}, + // applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: for native memory. I think it is because of invariants, but need investigation. + // The long -> int loop conversion introduces extra invariants. + static Object[] testLongLoop_longIndex_intInvar_sameAdr_int(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize()/4; i++) { + long adr = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + //@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + // IRNode.ADD_VI, "> 0", + // IRNode.STORE_VECTOR, "> 0"}, + // applyIfPlatform = {"64-bit", "true"}, + // applyIf = {"AlignVector", "false"}, + // applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: for native memory. I think it is because of invariants, but need investigation. + // The long -> int loop conversion introduces extra invariants. + static Object[] testLongLoop_longIndex_longInvar_sameAdr_int(MemorySegment a, long invar) { + for (long i = 0; i < a.byteSize()/4; i++) { + long adr = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testLongLoop_longIndex_intInvar_int(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize()/4; i++) { + long adr1 = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); + long adr2 = 4L * (long)(i) + 4L * (long)(invar); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: invariants are sorted differently, because of differently inserted Cast. + // See: JDK-8330274 + static Object[] testLongLoop_longIndex_longInvar_int(MemorySegment a, long invar) { + for (long i = 0; i < a.byteSize()/4; i++) { + long adr1 = 4L * (long)(i) + 4L * (long)(invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); + long adr2 = 4L * (long)(i) + 4L * (long)(invar); + a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); + } + return new Object[]{ a }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.ADD_VI, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // FAILS: RangeCheck cannot be eliminated because of int_index + static Object[] testLongLoop_intIndex_intInvar_int(MemorySegment a, int invar) { + for (long i = 0; i < a.byteSize()/4; i++) { + int int_index = (int)(i + invar); + int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index); + a.set(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index, (int)(v + 1)); + } + return new Object[]{ a }; + } +} From 55aa851afa2c528297c5cc8f031476d94ee12a34 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 15:57:41 +0200 Subject: [PATCH 55/89] add BASIC tag --- src/hotspot/share/opto/memnode.cpp | 26 +++++++++++++++++++ .../share/opto/traceMergeStoresTag.hpp | 1 + 2 files changed, 27 insertions(+) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 10e5e88f07687..3bc3e0e3bc29e 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2776,6 +2776,17 @@ class MergePrimitiveStores : public StackObj { } return Status(found_store, cfg_status == CFGStatus::SuccessWithRangeCheck); } + +#ifndef PRODUCT + void print_on(outputStream* st) const { + if (_found_store == nullptr) { + st->print_cr("None"); + } else { + st->print_cr("Found[%d %s, %s]", _found_store->_idx, _found_store->Name(), + _found_range_check ? "RC" : "no-RC"); + } + } +#endif }; Status find_adjacent_use_store(const StoreNode* def_store) const; @@ -2795,6 +2806,10 @@ class MergePrimitiveStores : public StackObj { return _trace_tags.at(tag); } + bool is_trace_basic() const { + return is_trace(TraceMergeStores::Tag::BASIC); + } + bool is_trace_pointer() const { return is_trace(TraceMergeStores::Tag::POINTER); } @@ -2822,17 +2837,21 @@ StoreNode* MergePrimitiveStores::run() { return nullptr; } + NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); }) + // TODO maybe parse pointer, see if viable? - only if cached! // The _store must be the "last" store in a chain. If we find a use we could merge with // then that use or a store further down is the "last" store. Status status_use = find_adjacent_use_store(_store); + NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] expect no use: "); status_use.print_on(tty); }) if (status_use.found_store() != nullptr) { return nullptr; } // Check if we can merge with at least one def, so that we have at least 2 stores to merge. Status status_def = find_adjacent_def_store(_store); + NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] expect def: "); status_def.print_on(tty); }) if (status_def.found_store() == nullptr) { return nullptr; } @@ -3084,21 +3103,28 @@ void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const { merge_list.push(current); while (current != nullptr && merge_list.size() < merge_list_max_size) { Status status = find_adjacent_def_store(current); + NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] find def: "); status.print_on(tty); }) + current = status.found_store(); if (current != nullptr) { merge_list.push(current); // We can have at most one RangeCheck. if (status.found_range_check()) { + NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] found RangeCheck, stop traversal."); }) break; } } } + NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] found:"); merge_list.dump(); }) + // Truncate the merge_list to a power of 2. const uint pow2size = round_down_power_of_2(merge_list.size()); assert(pow2size >= 2, "must be merging at least 2 stores"); while (merge_list.size() > pow2size) { merge_list.pop(); } + + NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] truncated:"); merge_list.dump(); }) } // Merge the input values of the smaller stores to a single larger input value. diff --git a/src/hotspot/share/opto/traceMergeStoresTag.hpp b/src/hotspot/share/opto/traceMergeStoresTag.hpp index fa126239c5e98..9f33c9efa0525 100644 --- a/src/hotspot/share/opto/traceMergeStoresTag.hpp +++ b/src/hotspot/share/opto/traceMergeStoresTag.hpp @@ -30,6 +30,7 @@ namespace TraceMergeStores { #define COMPILER_TAG(flags) \ + flags(BASIC, "Trace basic analysis steps") \ flags(POINTER, "Trace pointer IR") \ flags(ALIASING, "Trace MemPointerSimpleForm::get_aliasing_with") \ flags(ADJACENCY, "Trace adjacency") \ From 47f4b7048a920b68308c51f06e9e9ffddd51e627 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 17:21:21 +0200 Subject: [PATCH 56/89] implement tests for TestMergeStoresMemorySegment.java --- .../c2/TestMergeStoresMemorySegment.java | 571 +++--------------- 1 file changed, 94 insertions(+), 477 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java index df854bb945cfc..a543ae69ef511 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java @@ -34,7 +34,7 @@ /* * @test id=byte-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment ByteArray */ @@ -42,7 +42,7 @@ /* * @test id=char-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment CharArray */ @@ -50,7 +50,7 @@ /* * @test id=short-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment ShortArray */ @@ -58,7 +58,7 @@ /* * @test id=int-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment IntArray */ @@ -66,7 +66,7 @@ /* * @test id=long-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment LongArray */ @@ -74,7 +74,7 @@ /* * @test id=float-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment FloatArray */ @@ -82,7 +82,7 @@ /* * @test id=double-array * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment DoubleArray */ @@ -90,7 +90,7 @@ /* * @test id=byte-buffer * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBuffer */ @@ -98,7 +98,7 @@ /* * @test id=byte-buffer-direct * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment ByteBufferDirect */ @@ -106,16 +106,16 @@ /* * @test id=native * @bug 8335392 - * @summary Test vectorization of loops over MemorySegment + * @summary Test MergeStores optimization for MemorySegment * @library /test/lib / * @run driver compiler.c2.TestMergeStoresMemorySegment Native */ -// FAILS: mixed providers currently do not vectorize. Maybe there is some inlining issue. +// FAILS: mixed providers currently do not merge stores. Maybe there is some inlining issue. // /* // * @test id=mixed-array // * @bug 8335392 -// * @summary Test vectorization of loops over MemorySegment +// * @summary Test MergeStores optimization for MemorySegment // * @library /test/lib / // * @run driver compiler.c2.TestMergeStoresMemorySegment MixedArray // */ @@ -123,7 +123,7 @@ // /* // * @test id=MixedBuffer // * @bug 8335392 -// * @summary Test vectorization of loops over MemorySegment +// * @summary Test MergeStores optimization for MemorySegment // * @library /test/lib / // * @run driver compiler.c2.TestMergeStoresMemorySegment MixedBuffer // */ @@ -131,17 +131,19 @@ // /* // * @test id=mixed // * @bug 8335392 -// * @summary Test vectorization of loops over MemorySegment +// * @summary Test MergeStores optimization for MemorySegment // * @library /test/lib / // * @run driver compiler.c2.TestMergeStoresMemorySegment Mixed // */ public class TestMergeStoresMemorySegment { public static void main(String[] args) { - TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class); - framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0]); - framework.setDefaultWarmup(100); - framework.start(); + for (String unaligned : new String[]{"-XX:-UseUnalignedAccesses", "-XX:+UseUnalignedAccesses"}) { + TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class); + framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0], unaligned); + framework.setDefaultWarmup(100); + framework.start(); + } } } @@ -149,6 +151,15 @@ class TestMergeStoresMemorySegmentImpl { static final int BACKING_SIZE = 1024 * 8; static final Random RANDOM = Utils.getRandomInstance(); + private static final String START = "(\\d+(\\s){2}("; + private static final String MID = ".*)+(\\s){2}===.*"; + private static final String END = ")"; + + // Custom Regex: allows us to only match Store that come from MemorySegment internals. + private static final String REGEX_STORE_B_TO_MS_FROM_B = START + "StoreB" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_C_TO_MS_FROM_B = START + "StoreC" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_I_TO_MS_FROM_B = START + "StoreI" + MID + END + "ScopedMemoryAccess::putByteInternal"; + private static final String REGEX_STORE_L_TO_MS_FROM_B = START + "StoreL" + MID + END + "ScopedMemoryAccess::putByteInternal"; interface TestFunction { Object[] run(); @@ -193,32 +204,19 @@ public TestMergeStoresMemorySegmentImpl () { fillRandom(a); fillRandom(b); - // Add all tests to list - tests.put("testMemorySegmentBadExitCheck", () -> testMemorySegmentBadExitCheck(copy(a))); - tests.put("testIntLoop_iv_byte", () -> testIntLoop_iv_byte(copy(a))); - tests.put("testIntLoop_longIndex_intInvar_sameAdr_byte", () -> testIntLoop_longIndex_intInvar_sameAdr_byte(copy(a), 0)); - tests.put("testIntLoop_longIndex_longInvar_sameAdr_byte", () -> testIntLoop_longIndex_longInvar_sameAdr_byte(copy(a), 0)); - tests.put("testIntLoop_longIndex_intInvar_byte", () -> testIntLoop_longIndex_intInvar_byte(copy(a), 0)); - tests.put("testIntLoop_longIndex_longInvar_byte", () -> testIntLoop_longIndex_longInvar_byte(copy(a), 0)); - tests.put("testIntLoop_intIndex_intInvar_byte", () -> testIntLoop_intIndex_intInvar_byte(copy(a), 0)); - tests.put("testIntLoop_iv_int", () -> testIntLoop_iv_int(copy(a))); - tests.put("testIntLoop_longIndex_intInvar_sameAdr_int", () -> testIntLoop_longIndex_intInvar_sameAdr_int(copy(a), 0)); - tests.put("testIntLoop_longIndex_longInvar_sameAdr_int", () -> testIntLoop_longIndex_longInvar_sameAdr_int(copy(a), 0)); - tests.put("testIntLoop_longIndex_intInvar_int", () -> testIntLoop_longIndex_intInvar_int(copy(a), 0)); - tests.put("testIntLoop_longIndex_longInvar_int", () -> testIntLoop_longIndex_longInvar_int(copy(a), 0)); - tests.put("testIntLoop_intIndex_intInvar_int", () -> testIntLoop_intIndex_intInvar_int(copy(a), 0)); - tests.put("testLongLoop_iv_byte", () -> testLongLoop_iv_byte(copy(a))); - tests.put("testLongLoop_longIndex_intInvar_sameAdr_byte", () -> testLongLoop_longIndex_intInvar_sameAdr_byte(copy(a), 0)); - tests.put("testLongLoop_longIndex_longInvar_sameAdr_byte", () -> testLongLoop_longIndex_longInvar_sameAdr_byte(copy(a), 0)); - tests.put("testLongLoop_longIndex_intInvar_byte", () -> testLongLoop_longIndex_intInvar_byte(copy(a), 0)); - tests.put("testLongLoop_longIndex_longInvar_byte", () -> testLongLoop_longIndex_longInvar_byte(copy(a), 0)); - tests.put("testLongLoop_intIndex_intInvar_byte", () -> testLongLoop_intIndex_intInvar_byte(copy(a), 0)); - tests.put("testLongLoop_iv_int", () -> testLongLoop_iv_int(copy(a))); - tests.put("testLongLoop_longIndex_intInvar_sameAdr_int", () -> testLongLoop_longIndex_intInvar_sameAdr_int(copy(a), 0)); - tests.put("testLongLoop_longIndex_longInvar_sameAdr_int", () -> testLongLoop_longIndex_longInvar_sameAdr_int(copy(a), 0)); - tests.put("testLongLoop_longIndex_intInvar_int", () -> testLongLoop_longIndex_intInvar_int(copy(a), 0)); - tests.put("testLongLoop_longIndex_longInvar_int", () -> testLongLoop_longIndex_longInvar_int(copy(a), 0)); - tests.put("testLongLoop_intIndex_intInvar_int", () -> testLongLoop_intIndex_intInvar_int(copy(a), 0)); + // Future Work: add more test cases. For now, the issue seems to be that + // RangeCheck smearing does not remove the RangeChecks, thus + // we can only ever merge two stores. + // + // Ideas for more test cases, once they are better optimized: + // + // Have about 3 variables, each either int or long. Add all in int or + // long. Give them different scales. Compute the address in the same + // expression or separately. Use different element store sizes (BCIL). + // + tests.put("test_xxx", () -> test_xxx(copy(a), 5, 11, 31)); + tests.put("test_yyy", () -> test_yyy(copy(a), 5, 11, 31)); + tests.put("test_zzz", () -> test_zzz(copy(a), 5, 11, 31)); // Compute gold value for all test methods before compilation for (Map.Entry entry : tests.entrySet()) { @@ -353,31 +351,7 @@ static void verify(String name, Object[] gold, Object[] result) { } } - @Run(test = {"testMemorySegmentBadExitCheck", - "testIntLoop_iv_byte", - "testIntLoop_longIndex_intInvar_sameAdr_byte", - "testIntLoop_longIndex_longInvar_sameAdr_byte", - "testIntLoop_longIndex_intInvar_byte", - "testIntLoop_longIndex_longInvar_byte", - "testIntLoop_intIndex_intInvar_byte", - "testIntLoop_iv_int", - "testIntLoop_longIndex_intInvar_sameAdr_int", - "testIntLoop_longIndex_longInvar_sameAdr_int", - "testIntLoop_longIndex_intInvar_int", - "testIntLoop_longIndex_longInvar_int", - "testIntLoop_intIndex_intInvar_int", - "testLongLoop_iv_byte", - "testLongLoop_longIndex_intInvar_sameAdr_byte", - "testLongLoop_longIndex_longInvar_sameAdr_byte", - "testLongLoop_longIndex_intInvar_byte", - "testLongLoop_longIndex_longInvar_byte", - "testLongLoop_intIndex_intInvar_byte", - "testLongLoop_iv_int", - "testLongLoop_longIndex_intInvar_sameAdr_int", - "testLongLoop_longIndex_longInvar_sameAdr_int", - "testLongLoop_longIndex_intInvar_int", - "testLongLoop_longIndex_longInvar_int", - "testLongLoop_intIndex_intInvar_int"}) + @Run(test = { "test_xxx", "test_yyy", "test_zzz" }) void runTests() { for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); @@ -392,419 +366,62 @@ void runTests() { } @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS - // Exit check: iv < long_limit -> (long)iv < long_limit - // Thus, we have an int-iv, but a long-exit-check. - // Is not properly recognized by either CountedLoop or LongCountedLoop - static Object[] testMemorySegmentBadExitCheck(MemorySegment a) { - for (int i = 0; i < a.byteSize(); i++) { - long adr = i; - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_iv_byte(MemorySegment a) { - for (int i = 0; i < (int)a.byteSize(); i++) { - long adr = i; - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_longIndex_intInvar_sameAdr_byte(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize(); i++) { - long adr = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_longIndex_longInvar_sameAdr_byte(MemorySegment a, long invar) { - for (int i = 0; i < (int)a.byteSize(); i++) { - long adr = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testIntLoop_longIndex_intInvar_byte(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize(); i++) { - long adr1 = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr1); - long adr2 = (long)(i) + (long)(invar); - a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testIntLoop_longIndex_longInvar_byte(MemorySegment a, long invar) { - for (int i = 0; i < (int)a.byteSize(); i++) { - long adr1 = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr1); - long adr2 = (long)(i) + (long)(invar); - a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); - } - return new Object[]{ a }; + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_xxx(MemorySegment a, int xI, int yI, int zI) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 0), (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 1), (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 2), (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 3), (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 4), (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 5), (byte)' '); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 6), (byte)':'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 7), (byte)')'); + return new Object[]{ a }; } @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: RangeCheck cannot be eliminated because of int_index - static Object[] testIntLoop_intIndex_intInvar_byte(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize(); i++) { - int int_index = i + invar; - byte v = a.get(ValueLayout.JAVA_BYTE, int_index); - a.set(ValueLayout.JAVA_BYTE, int_index, (byte)(v + 1)); - } - return new Object[]{ a }; + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_yyy(MemorySegment a, int xI, int yI, int zI) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 0L, (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 1L, (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 2L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 3L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 4L, (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 5L, (byte)' '); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 6L, (byte)':'); + a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 7L, (byte)')'); + return new Object[]{ a }; } @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - IRNode.ADD_VI, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIf = {"AlignVector", "false"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_iv_int(MemorySegment a) { - for (int i = 0; i < (int)a.byteSize()/4; i++ ) { - long adr = 4L * i; - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - IRNode.ADD_VI, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIf = {"AlignVector", "false"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_longIndex_intInvar_sameAdr_int(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize()/4; i++) { - long adr = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - IRNode.ADD_VI, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIf = {"AlignVector", "false"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testIntLoop_longIndex_longInvar_sameAdr_int(MemorySegment a, long invar) { - for (int i = 0; i < (int)a.byteSize()/4; i++) { - long adr = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testIntLoop_longIndex_intInvar_int(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize()/4; i++) { - long adr1 = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); - long adr2 = 4L * (long)(i) + 4L * (long)(invar); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testIntLoop_longIndex_longInvar_int(MemorySegment a, long invar) { - for (int i = 0; i < (int)a.byteSize()/4; i++) { - long adr1 = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); - long adr2 = 4L * (long)(i) + 4L * (long)(invar); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: RangeCheck cannot be eliminated because of int_index - static Object[] testIntLoop_intIndex_intInvar_int(MemorySegment a, int invar) { - for (int i = 0; i < (int)a.byteSize()/4; i++) { - int int_index = i + invar; - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index); - a.set(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testLongLoop_iv_byte(MemorySegment a) { - for (long i = 0; i < a.byteSize(); i++) { - long adr = i; - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testLongLoop_longIndex_intInvar_sameAdr_byte(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize(); i++) { - long adr = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", - IRNode.ADD_VB, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testLongLoop_longIndex_longInvar_sameAdr_byte(MemorySegment a, long invar) { - for (long i = 0; i < a.byteSize(); i++) { - long adr = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr); - a.set(ValueLayout.JAVA_BYTE, adr, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testLongLoop_longIndex_intInvar_byte(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize(); i++) { - long adr1 = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr1); - long adr2 = (long)(i) + (long)(invar); - a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testLongLoop_longIndex_longInvar_byte(MemorySegment a, long invar) { - for (long i = 0; i < a.byteSize(); i++) { - long adr1 = (long)(i) + (long)(invar); - byte v = a.get(ValueLayout.JAVA_BYTE, adr1); - long adr2 = (long)(i) + (long)(invar); - a.set(ValueLayout.JAVA_BYTE, adr2, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", - IRNode.ADD_VB, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: RangeCheck cannot be eliminated because of int_index - static Object[] testLongLoop_intIndex_intInvar_byte(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize(); i++) { - int int_index = (int)(i + invar); - byte v = a.get(ValueLayout.JAVA_BYTE, int_index); - a.set(ValueLayout.JAVA_BYTE, int_index, (byte)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - IRNode.ADD_VI, "> 0", - IRNode.STORE_VECTOR, "> 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIf = {"AlignVector", "false"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testLongLoop_iv_int(MemorySegment a) { - for (long i = 0; i < a.byteSize()/4; i++ ) { - long adr = 4L * i; - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - //@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - // IRNode.ADD_VI, "> 0", - // IRNode.STORE_VECTOR, "> 0"}, - // applyIfPlatform = {"64-bit", "true"}, - // applyIf = {"AlignVector", "false"}, - // applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: for native memory. I think it is because of invariants, but need investigation. - // The long -> int loop conversion introduces extra invariants. - static Object[] testLongLoop_longIndex_intInvar_sameAdr_int(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize()/4; i++) { - long adr = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - //@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", - // IRNode.ADD_VI, "> 0", - // IRNode.STORE_VECTOR, "> 0"}, - // applyIfPlatform = {"64-bit", "true"}, - // applyIf = {"AlignVector", "false"}, - // applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: for native memory. I think it is because of invariants, but need investigation. - // The long -> int loop conversion introduces extra invariants. - static Object[] testLongLoop_longIndex_longInvar_sameAdr_int(MemorySegment a, long invar) { - for (long i = 0; i < a.byteSize()/4; i++) { - long adr = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testLongLoop_longIndex_intInvar_int(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize()/4; i++) { - long adr1 = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); - long adr2 = 4L * (long)(i) + 4L * (long)(invar); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: invariants are sorted differently, because of differently inserted Cast. - // See: JDK-8330274 - static Object[] testLongLoop_longIndex_longInvar_int(MemorySegment a, long invar) { - for (long i = 0; i < a.byteSize()/4; i++) { - long adr1 = 4L * (long)(i) + 4L * (long)(invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, adr1); - long adr2 = 4L * (long)(i) + 4L * (long)(invar); - a.set(ValueLayout.JAVA_INT_UNALIGNED, adr2, (int)(v + 1)); - } - return new Object[]{ a }; - } - - @Test - @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", - IRNode.ADD_VI, "= 0", - IRNode.STORE_VECTOR, "= 0"}, - applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: RangeCheck cannot be eliminated because of int_index - static Object[] testLongLoop_intIndex_intInvar_int(MemorySegment a, int invar) { - for (long i = 0; i < a.byteSize()/4; i++) { - int int_index = (int)(i + invar); - int v = a.get(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index); - a.set(ValueLayout.JAVA_INT_UNALIGNED, 4L * int_index, (int)(v + 1)); - } - return new Object[]{ a }; + @IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC + REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged + REGEX_STORE_I_TO_MS_FROM_B, "0", + REGEX_STORE_L_TO_MS_FROM_B, "0"}, + phase = CompilePhase.PRINT_IDEAL, + applyIf = {"UseUnalignedAccesses", "true"}) + static Object[] test_zzz(MemorySegment a, long xL, long yL, long zL) { + // All RangeChecks remain -> RC smearing not good enough? + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 0L, (byte)'h'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 1L, (byte)'e'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 2L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 3L, (byte)'l'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 4L, (byte)'o'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 5L, (byte)' '); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 6L, (byte)':'); + a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 7L, (byte)')'); + return new Object[]{ a }; } } From 737432c82419d0ac641a44e95c895e3d91c125f2 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 17:34:57 +0200 Subject: [PATCH 57/89] rm some TODOs --- src/hotspot/share/opto/memnode.cpp | 3 --- src/hotspot/share/opto/mempointer.cpp | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 3bc3e0e3bc29e..936e9c2dce9a1 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2839,8 +2839,6 @@ StoreNode* MergePrimitiveStores::run() { NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); }) - // TODO maybe parse pointer, see if viable? - only if cached! - // The _store must be the "last" store in a chain. If we find a use we could merge with // then that use or a store further down is the "last" store. Status status_use = find_adjacent_use_store(_store); @@ -2881,7 +2879,6 @@ bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) con return false; } - // TODO: check if same base or both no base??? return true; } diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 2cde5589c5276..8baef4139ec8f 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -209,13 +209,11 @@ bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ON case Op_CastII: case Op_CastLL: case Op_CastX2P: - // TODO CastPP ? + case Op_CastPP: case Op_ConvI2L: return true; } - // TODO tests with native memory, etc. - const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); if (ary_ptr_t != nullptr) { // Array accesses that are not Unsafe always have a RangeCheck which ensures From 5b7b06d915fe08c9ed09b1daeeb7d840e3ae18d0 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 17:47:07 +0200 Subject: [PATCH 58/89] small fixes --- src/hotspot/share/opto/mempointer.cpp | 2 ++ src/hotspot/share/opto/mempointer.hpp | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 8baef4139ec8f..a0904018f5fdf 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -184,6 +184,8 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su _summands.push(summand); } +// Check if the decomposition of operation opc is guaranteed to be safe from int overflows. +// TODO maybe use linearity in name? BC what is safe and why int-overflow??? bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index b9922619ca794..843406d311f20 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -280,10 +280,11 @@ class MemPointerLinearForm : public StackObj { public: // Empty MemPointerLinearForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} - // Default: pointer = variable - MemPointerLinearForm(Node* variable) : _pointer(variable), _con(NoOverflowInt(0)) { + // Default / trivial: pointer = 0 + 1 * pointer + MemPointerLinearForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { + assert(pointer != nullptr, "pointer must be non-null"); const NoOverflowInt one(1); - _summands[0] = MemPointerSummand(variable, one LP64_ONLY( COMMA one )); + _summands[0] = MemPointerSummand(pointer, one LP64_ONLY( COMMA one )); } private: @@ -344,9 +345,9 @@ class MemPointerLinearFormParser : public StackObj { const MemNode* _mem; // Internal data-structures for parsing. + NoOverflowInt _con; GrowableArray _worklist; GrowableArray _summands; - NoOverflowInt _con; // Resulting linear-form. MemPointerLinearForm _linear_form; From 1b7e0dc012dd6f1ed226716921d75150ea7b0e4a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 18:13:41 +0200 Subject: [PATCH 59/89] rename linear -> decomposed --- src/hotspot/share/opto/mempointer.cpp | 39 ++++++------- src/hotspot/share/opto/mempointer.hpp | 82 +++++++++++++-------------- 2 files changed, 61 insertions(+), 60 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index a0904018f5fdf..7a091b1d60132 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -27,7 +27,7 @@ // Recursively parse the pointer expression with a DFS all-path traversal // (i.e. with node repetitions), starting at the pointer. -MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { +MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() { assert(_worklist.is_empty(), "no prior parsing"); assert(_summands.is_empty(), "no prior parsing"); @@ -41,12 +41,12 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { // parses the pointer expression recursively. int traversal_count = 0; while (_worklist.is_nonempty()) { - if (traversal_count++ > 1000) { return MemPointerLinearForm(pointer); } + if (traversal_count++ > 1000) { return MemPointerDecomposedForm(pointer); } parse_sub_expression(_worklist.pop()); } // Check for constant overflow. - if (_con.is_NaN()) { return MemPointerLinearForm(pointer); } + if (_con.is_NaN()) { return MemPointerDecomposedForm(pointer); } // Sort summands by variable->_idx _summands.sort(MemPointerSummand::cmp_for_sort); @@ -65,7 +65,7 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { } // Bail out if scale is NaN. if (scale.is_NaN()) { - return MemPointerLinearForm(pointer); + return MemPointerDecomposedForm(pointer); } // Keep summands with non-zero scale. if (!scale.is_zero()) { @@ -74,20 +74,20 @@ MemPointerLinearForm MemPointerLinearFormParser::parse_linear_form() { } _summands.trunc_to(pos_put); - return MemPointerLinearForm::make(pointer, _summands, _con); + return MemPointerDecomposedForm::make(pointer, _summands, _con); } // Parse a sub-expression of the pointer, starting at the current summand. We parse the // current node, and see if it can be decomposed into further summands, or if the current // summand is terminal. -void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand summand) { +void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); const NoOverflowInt scale = summand.scale(); LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) const NoOverflowInt one(1); int opc = n->Opcode(); - if (is_safe_from_int_overflow(opc LP64_ONLY( COMMA scaleL ))) { + if (is_safe_to_decompose_op(opc LP64_ONLY( COMMA scaleL ))) { switch (opc) { case Op_ConI: case Op_ConL: @@ -129,7 +129,7 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_LShiftL: case Op_LShiftI: { - // Form must be linear: only multiplication with constants can be decomposed. + // Only multiplication with constants is allowed: factor * in2 Node* in1 = n->in(1); Node* in2 = n->in(2); if (!in2->is_Con()) { break; } @@ -165,10 +165,12 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su case Op_CastLL: case Op_CastX2P: case Op_ConvI2L: - // On 32bit systems we can also look through ConvI2L, since the final result will always - // be truncated back with ConvL2I. On 64bit systems this is not linear: + // On 32bit systems we can also look through ConvL2I, since the final result will always + // be truncated back with ConvL2I. On 64bit systems we cannot decompose ConvL2I because + // such int values will eventually be expanded to long with a ConvI2L: // - // ConvI2L(ConvL2I(max_jint + 1)) = ConvI2L(min_jint) = min_jint + // valL = max_jint + 1 + // ConvI2L(ConvL2I(valL)) = ConvI2L(min_jint) = min_jint != max_jint + 1 = valL // NOT_LP64( case Op_ConvL2I: ) { @@ -184,9 +186,8 @@ void MemPointerLinearFormParser::parse_sub_expression(const MemPointerSummand su _summands.push(summand); } -// Check if the decomposition of operation opc is guaranteed to be safe from int overflows. -// TODO maybe use linearity in name? BC what is safe and why int-overflow??? -bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { +// Check if the decomposition of operation opc is guaranteed to be safe. +bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always // be truncated. Thus, it does not matter if we have int or long overflows. @@ -238,11 +239,11 @@ bool MemPointerLinearFormParser::is_safe_from_int_overflow(const int opc LP64_ON #endif } -MemPointerAliasing MemPointerLinearForm::get_aliasing_with(const MemPointerLinearForm& other - NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { +MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other + NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { #ifndef PRODUCT if (trace.is_trace_aliasing()) { - tty->print_cr("MemPointerLinearForm::get_aliasing_with:"); + tty->print_cr("MemPointerDecomposedForm::get_aliasing_with:"); print_on(tty); other.print_on(tty); } @@ -285,8 +286,8 @@ MemPointerAliasing MemPointerLinearForm::get_aliasing_with(const MemPointerLinea } bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { - const MemPointerLinearForm& s1 = linear_form(); - const MemPointerLinearForm& s2 = other.linear_form(); + const MemPointerDecomposedForm& s1 = decomposed_form(); + const MemPointerDecomposedForm& s2 = other.decomposed_form(); const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace )); const jint size = mem()->memory_size(); const bool is_adjacent = aliasing.is_always_at_distance(size); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 843406d311f20..8481bbd2cff2b 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -31,8 +31,8 @@ // The MemPointer is a shared facility to parse pointers and check the aliasing of pointers, // e.g. checking if two stores are adjacent. // -// MemPointerLinearForm: -// When the pointer is parsed, it is represented as a linear form: +// MemPointerDecomposedForm: +// When the pointer is parsed, it is decomposed into a constant and a sum of summands: // // pointer = con + sum(summands) // @@ -40,15 +40,15 @@ // // summand_i = scale_i * variable_i // -// Hence, the full linear form is: +// Hence, the full decomposed form is: // // pointer = con + sum_i(scale_i * variable_i) // -// On 64bit systems, this linear form is computed with long-add/mul, on 32bit systems it is -// computed with int-add/mul. +// On 64bit systems, this decomposed form is computed with long-add/mul, on 32bit systems +// it is computed with int-add/mul. // // MemPointerAliasing: -// The linear form allows us to determine the aliasing between two pointers easily. For +// The decomposed form allows us to determine the aliasing between two pointers easily. For // example, if two pointers are identical, except for their constant: // // pointer1 = con1 + sum(summands) @@ -57,8 +57,8 @@ // then we can easily compute the distance between the pointers (distance = con2 - con1), // and determine if they are adjacent. // -// MemPointerLinearFormParser: -// Any pointer can be parsed into this (default / trivial) linear form: +// MemPointerDecomposedFormParser: +// Any pointer can be parsed into this (default / trivial) decomposed form: // // pointer = 0 + 1 * pointer // con scale @@ -72,21 +72,21 @@ // pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1) // // At first, computing aliasing is difficult because the distance is hidden inside the -// ConvI2L. we can convert this (with array_int_base_offset = 16) into these linear forms: +// ConvI2L. we can convert this (with array_int_base_offset = 16) into these decomposed forms: // // pointer1 = 16L + 1L * array_base + 4L * i // pointer2 = 20L + 1L * array_base + 4L * i // // This allows us to easily see that these two pointers are adjacent (distance = 4). // -// Hence, in MemPointerLinearFormParser::parse_linear_form, we start with the pointer as +// Hence, in MemPointerDecomposedFormParser::parse_decomposed_form, we start with the pointer as // a trivial summand. A summand can either be decomposed further or it is terminal (cannot // be decomposed further). We decompose the summands recursively until all remaining summands -// are terminal, see MemPointerLinearFormParser::parse_sub_expression. This effectively parses +// are terminal, see MemPointerDecomposedFormParser::parse_sub_expression. This effectively parses // the pointer expression recursively. // // We have to be careful on 64bit systems with ConvI2L: decomposing its input is not -// correct in general, overflows may not be preserved in the linear form: +// correct in general, overflows may not be preserved in the decomposed form: // // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) @@ -94,7 +94,7 @@ // LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) // // However, there are some cases where we can prove that the decomposition is safe, -// see MemPointerLinearFormParser::is_safe_from_int_overflow. +// see MemPointerDecomposedFormParser::is_safe_to_decompose_op. #ifndef PRODUCT class TraceMemPointer : public StackObj { @@ -168,7 +168,7 @@ class MemPointerAliasing { #endif }; -// Summand of a MemPointerLinearForm: +// Summand of a MemPointerDecomposedForm: // // summand = scale * variable // @@ -193,7 +193,7 @@ class MemPointerAliasing { // // Note: we only need scaleL during the decomposition of the pointer. We need to check // if decomposing a summand further is safe (i.e. if there cannot be an overflow), -// see MemPointerLinearFormParser::is_safe_from_int_overflow. But during aliasing +// see MemPointerDecomposedFormParser::is_safe_to_decompose_op. But during aliasing // computation, we fully rely on scale, and do not need scaleL any more. // class MemPointerSummand : public StackObj { @@ -258,11 +258,11 @@ class MemPointerSummand : public StackObj { #endif }; -// Linear form of the pointer sub-expression of "pointer". +// Decomposed form of the pointer sub-expression of "pointer". // // pointer = con + sum(summands) // -class MemPointerLinearForm : public StackObj { +class MemPointerDecomposedForm : public StackObj { private: // We limit the number of summands to 10. Usually, a pointer contains a base pointer // (e.g. array pointer or null for native memory) and a few variables. For example: @@ -279,16 +279,16 @@ class MemPointerLinearForm : public StackObj { public: // Empty - MemPointerLinearForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} + MemPointerDecomposedForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} // Default / trivial: pointer = 0 + 1 * pointer - MemPointerLinearForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { + MemPointerDecomposedForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { assert(pointer != nullptr, "pointer must be non-null"); const NoOverflowInt one(1); _summands[0] = MemPointerSummand(pointer, one LP64_ONLY( COMMA one )); } private: - MemPointerLinearForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) + MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) :_pointer(pointer), _con(con) { assert(!_con.is_NaN(), "non-NaN constant"); assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); @@ -302,15 +302,15 @@ class MemPointerLinearForm : public StackObj { } public: - static MemPointerLinearForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { + static MemPointerDecomposedForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { if (summands.length() <= SUMMANDS_SIZE) { - return MemPointerLinearForm(pointer, summands, con); + return MemPointerDecomposedForm(pointer, summands, con); } else { - return MemPointerLinearForm(pointer); + return MemPointerDecomposedForm(pointer); } } - MemPointerAliasing get_aliasing_with(const MemPointerLinearForm& other + MemPointerAliasing get_aliasing_with(const MemPointerDecomposedForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const; const MemPointerSummand summands_at(const uint i) const { @@ -323,10 +323,10 @@ class MemPointerLinearForm : public StackObj { #ifndef PRODUCT void print_on(outputStream* st) const { if (_pointer == nullptr) { - st->print_cr("MemPointerLinearForm empty."); + st->print_cr("MemPointerDecomposedForm empty."); return; } - st->print("MemPointerLinearForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); + st->print("MemPointerDecomposedForm[%d %s: con = ", _pointer->_idx, _pointer->Name()); _con.print_on(st); for (int i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand& summand = _summands[i]; @@ -340,7 +340,7 @@ class MemPointerLinearForm : public StackObj { #endif }; -class MemPointerLinearFormParser : public StackObj { +class MemPointerDecomposedFormParser : public StackObj { private: const MemNode* _mem; @@ -349,21 +349,21 @@ class MemPointerLinearFormParser : public StackObj { GrowableArray _worklist; GrowableArray _summands; - // Resulting linear-form. - MemPointerLinearForm _linear_form; + // Resulting decomposed-form. + MemPointerDecomposedForm _decomposed_form; public: - MemPointerLinearFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { - _linear_form = parse_linear_form(); + MemPointerDecomposedFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) { + _decomposed_form = parse_decomposed_form(); } - const MemPointerLinearForm linear_form() const { return _linear_form; } + const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; } private: - MemPointerLinearForm parse_linear_form(); + MemPointerDecomposedForm parse_decomposed_form(); void parse_sub_expression(const MemPointerSummand summand); - bool is_safe_from_int_overflow(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; + bool is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; }; // Facility to parse the pointer of a Load or Store, so that aliasing between two such @@ -371,14 +371,14 @@ class MemPointerLinearFormParser : public StackObj { class MemPointer : public StackObj { private: const MemNode* _mem; - const MemPointerLinearForm _linear_form; + const MemPointerDecomposedForm _decomposed_form; NOT_PRODUCT( const TraceMemPointer& _trace; ) public: MemPointer(const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) : _mem(mem), - _linear_form(init_linear_form(_mem)) + _decomposed_form(init_decomposed_form(_mem)) NOT_PRODUCT( COMMA _trace(trace) ) { #ifndef PRODUCT @@ -386,21 +386,21 @@ class MemPointer : public StackObj { tty->print_cr("MemPointer::MemPointer:"); tty->print("mem: "); mem->dump(); _mem->in(MemNode::Address)->dump_bfs(5, 0, "d"); - _linear_form.print_on(tty); + _decomposed_form.print_on(tty); } #endif } const MemNode* mem() const { return _mem; } - const MemPointerLinearForm linear_form() const { return _linear_form; } + const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; } bool is_adjacent_to_and_before(const MemPointer& other) const; private: - static const MemPointerLinearForm init_linear_form(const MemNode* mem) { + static const MemPointerDecomposedForm init_decomposed_form(const MemNode* mem) { assert(mem->is_Store(), "only stores are supported"); ResourceMark rm; - MemPointerLinearFormParser parser(mem); - return parser.linear_form(); + MemPointerDecomposedFormParser parser(mem); + return parser.decomposed_form(); } }; From 23916a8686c508eb4e15a0fef233b8b334ce5b68 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 18:43:14 +0200 Subject: [PATCH 60/89] beautify some more things --- src/hotspot/share/opto/mempointer.cpp | 48 +++++++++++++++------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 7a091b1d60132..4e41ecb542269 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -129,27 +129,27 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman case Op_LShiftL: case Op_LShiftI: { - // Only multiplication with constants is allowed: factor * in2 - Node* in1 = n->in(1); - Node* in2 = n->in(2); - if (!in2->is_Con()) { break; } + // Only multiplication with constants is allowed: factor * variable + Node* variable = n->in(1); + Node* con = n->in(2); + if (!con->is_Con()) { break; } NoOverflowInt factor; LP64_ONLY( NoOverflowInt factorL; ) switch (opc) { - case Op_MulL: - factor = NoOverflowInt(in2->get_long()); + case Op_MulL: // variable * con + factor = NoOverflowInt(con->get_long()); LP64_ONLY( factorL = factor; ) break; - case Op_MulI: - factor = NoOverflowInt(in2->get_int()); + case Op_MulI: // variable * con + factor = NoOverflowInt(con->get_int()); LP64_ONLY( factorL = one; ) break; - case Op_LShiftL: - factor = one << NoOverflowInt(in2->get_int()); + case Op_LShiftL: // variable << con = variable * (1 << con) + factor = one << NoOverflowInt(con->get_int()); LP64_ONLY( factorL = factor; ) break; - case Op_LShiftI: - factor = one << NoOverflowInt(in2->get_int()); + case Op_LShiftI: // variable << con = variable * (1 << con) + factor = one << NoOverflowInt(con->get_int()); LP64_ONLY( factorL = one; ) break; } @@ -158,7 +158,7 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman NoOverflowInt new_scale = scale * factor; LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) - _worklist.push(MemPointerSummand(in1, new_scale LP64_ONLY( COMMA new_scaleL ))); + _worklist.push(MemPointerSummand(variable, new_scale LP64_ONLY( COMMA new_scaleL ))); return; } case Op_CastII: @@ -194,14 +194,8 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ return true; #else - // But on 64-bit platforms, these operations are not trivially safe: - // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) - // SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) - // MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) - // LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) - // - // But these are always safe: switch(opc) { + // These operations are always safe to decompose: case Op_ConI: case Op_ConL: case Op_AddP: @@ -215,6 +209,17 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ case Op_CastPP: case Op_ConvI2L: return true; + + // But on 64-bit platforms, these operations are not trivially safe to decompose: + case Op_AddI: // ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) + case Op_SubI: // ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b) + case Op_MulI: // ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) + case Op_LShiftI: // ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) + break; // Analysis below. + + default: + assert(false, "case not covered explicitly"); + return false; } const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); @@ -225,7 +230,8 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ return true; } - // TODO + // Intuition: what happens if a AddI, SubI, MulI or LShiftI (with constant) overflows + // the int range? TODO: generalize this a bit and write the proof! BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); if (is_java_primitive(array_element_bt)) { NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt)); From 6545579c6adfb2fb382dcbe794e29c6c0c2a1153 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Aug 2024 18:54:28 +0200 Subject: [PATCH 61/89] more ideas --- src/hotspot/share/opto/mempointer.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 4e41ecb542269..6c2bfb379dca4 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -232,6 +232,20 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ // Intuition: what happens if a AddI, SubI, MulI or LShiftI (with constant) overflows // the int range? TODO: generalize this a bit and write the proof! + // + // Idea: pointer = con + sum(other_summands) + summand + // ------------------------- ------- + // rest scale * ConvI2L(op) + // + // thus, can we replace: + // + // scale * ConvI2L(a + b) -> scale * ConvI2L(a) + scale * ConvI2L(b) + // scale * ConvI2L(a - b) -> scale * ConvI2L(a) - scale * ConvI2L(b) + // scale * ConvI2L(a * con) -> scale * con * ConvI2L(a) + // scale * ConvI2L(a << con) -> scale * (1 << con) * ConvI2L(a) + // + // TODO what scale are we talking about??? scaleI or scaleL or scale??? not sure + // BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); if (is_java_primitive(array_element_bt)) { NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt)); From 9ded881ff100b69d040acb33fbc37e6db7099cc5 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 10 Sep 2024 15:17:14 +0200 Subject: [PATCH 62/89] fix the default --- src/hotspot/share/compiler/directivesParser.cpp | 2 +- src/hotspot/share/opto/mempointer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/compiler/directivesParser.cpp b/src/hotspot/share/compiler/directivesParser.cpp index e28a5cd99d3b7..731bf33d799dd 100644 --- a/src/hotspot/share/compiler/directivesParser.cpp +++ b/src/hotspot/share/compiler/directivesParser.cpp @@ -348,7 +348,7 @@ bool DirectivesParser::set_option_flag(JSON_TYPE t, JSON_VAL* v, const key* opti } else { error(VALUE_ERROR, "Unrecognized tag name detected in TraceAutoVectorization: %s", validator.what()); } - } else if (strncmp(option_key->name, "TraceMergeStores", 16) == 0) { + } else if (strncmp(option_key->name, "TraceMergeStores", 16) == 0) { TraceMergeStores::TagValidator validator(s, false); valid = validator.is_valid(); diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 6c2bfb379dca4..4a02bbda885a5 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -217,8 +217,8 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ case Op_LShiftI: // ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) break; // Analysis below. + // All other operations are assumed not safe to decompose, or simply cannot be decomposed default: - assert(false, "case not covered explicitly"); return false; } From 17712f8e90d2284e49fb8aa096ef609984226cd7 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 10 Sep 2024 19:12:07 +0200 Subject: [PATCH 63/89] first part of the proof --- src/hotspot/share/opto/mempointer.cpp | 73 +++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 4a02bbda885a5..14e54efe1cf8c 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -187,6 +187,66 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman } // Check if the decomposition of operation opc is guaranteed to be safe. +// +// TODO +// +// Definition: Safe decomposition +// We decompose summand in: +// mp1 = con + summand + sum(other_summands) +// Resulting in: +-------------------------+ +// mp2 = con + dec_con + sum(dec_summands) + sum(other_summands) +// = new_con + sum(new_summands) +// +// We call a decomposition safe if either: +// S1) No matter the values of the summand variables: +// mp1 = mp2 +// +// S2) The pointer is on an array with a known array_element_size_in_bytes, +// and there is an integer x, such that: +// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 +// +// +// Statement: +// Given two pointers p1 and p2, and their MemPointers mp1 and mp2. +// If the two MemPointers satisfy these conditions: +// 1) All summands are identical. +// 2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 +// +// Then the ponter difference between p1 and p2 is identical to the difference between +// mp1 and mp2: +// p1 - p2 = mp1 - mp2 +// +// +// Proof Statement: +// If only decompositions of type (S1) were used, then trivially: +// p1 = mp1 +// p2 = mp2 +// => +// p1 - p2 = mp1 - mp2 +// +// If decompositions of type (S2) were used, then we can prove via induction over all +// decomposition steps that there must be some x1 and x2, such that: +// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 +// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 +// +// And hence, there must be an x, such that: +// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// +// If "x = 0", then it follows: +// p1 - p2 = mp1 - mp2 +// +// If "x != 0", then: +// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) +// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) +// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) +// > array_element_size_in_bytes * 2^32 - 2^31 +// >= array_element_size_in_bytes * 2^31 +// >= max_possible_array_size_in_bytes +// >= array_size_in_bytes +// +// +// TODO +// bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always @@ -237,12 +297,15 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ // ------------------------- ------- // rest scale * ConvI2L(op) // - // thus, can we replace: + // ... and so ...: + // + // scale * ConvI2L(a + b) = scale * ConvI2L(a) + scale * ConvI2L(b) + scale * x * 2^32 + // scale * ConvI2L(a - b) = scale * ConvI2L(a) - scale * ConvI2L(b) + scale * x * 2^32 + // scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * x * 2^32 + // scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * x * 2^32 + // \_______________________/ \_____________________________________/ \______________/ + // before decomposition after decomposition overflow correction // - // scale * ConvI2L(a + b) -> scale * ConvI2L(a) + scale * ConvI2L(b) - // scale * ConvI2L(a - b) -> scale * ConvI2L(a) - scale * ConvI2L(b) - // scale * ConvI2L(a * con) -> scale * con * ConvI2L(a) - // scale * ConvI2L(a << con) -> scale * (1 << con) * ConvI2L(a) // // TODO what scale are we talking about??? scaleI or scaleL or scale??? not sure // From 075aa442ff19cfcb1012dd5635d68fa7340dc785 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 11:03:18 +0200 Subject: [PATCH 64/89] move proof to hpp --- src/hotspot/share/opto/mempointer.cpp | 62 +------------------- src/hotspot/share/opto/mempointer.hpp | 83 ++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 62 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 14e54efe1cf8c..305c59edf143a 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -187,66 +187,7 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman } // Check if the decomposition of operation opc is guaranteed to be safe. -// -// TODO -// -// Definition: Safe decomposition -// We decompose summand in: -// mp1 = con + summand + sum(other_summands) -// Resulting in: +-------------------------+ -// mp2 = con + dec_con + sum(dec_summands) + sum(other_summands) -// = new_con + sum(new_summands) -// -// We call a decomposition safe if either: -// S1) No matter the values of the summand variables: -// mp1 = mp2 -// -// S2) The pointer is on an array with a known array_element_size_in_bytes, -// and there is an integer x, such that: -// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 -// -// -// Statement: -// Given two pointers p1 and p2, and their MemPointers mp1 and mp2. -// If the two MemPointers satisfy these conditions: -// 1) All summands are identical. -// 2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 -// -// Then the ponter difference between p1 and p2 is identical to the difference between -// mp1 and mp2: -// p1 - p2 = mp1 - mp2 -// -// -// Proof Statement: -// If only decompositions of type (S1) were used, then trivially: -// p1 = mp1 -// p2 = mp2 -// => -// p1 - p2 = mp1 - mp2 -// -// If decompositions of type (S2) were used, then we can prove via induction over all -// decomposition steps that there must be some x1 and x2, such that: -// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 -// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 -// -// And hence, there must be an x, such that: -// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 -// -// If "x = 0", then it follows: -// p1 - p2 = mp1 - mp2 -// -// If "x != 0", then: -// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) -// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) -// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) -// > array_element_size_in_bytes * 2^32 - 2^31 -// >= array_element_size_in_bytes * 2^31 -// >= max_possible_array_size_in_bytes -// >= array_size_in_bytes -// -// -// TODO -// +// Please refer to the definition of "safe decomposition" in mempointer.hpp bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always @@ -322,6 +263,7 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ #endif } +// TODO add proof based on "Statement" MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { #ifndef PRODUCT diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 8481bbd2cff2b..db59ef1e0cda8 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -31,6 +31,8 @@ // The MemPointer is a shared facility to parse pointers and check the aliasing of pointers, // e.g. checking if two stores are adjacent. // +// ----------------------------------------------------------------------------------------- +// // MemPointerDecomposedForm: // When the pointer is parsed, it is decomposed into a constant and a sum of summands: // @@ -85,6 +87,8 @@ // are terminal, see MemPointerDecomposedFormParser::parse_sub_expression. This effectively parses // the pointer expression recursively. // +// ----------------------------------------------------------------------------------------- +// // We have to be careful on 64bit systems with ConvI2L: decomposing its input is not // correct in general, overflows may not be preserved in the decomposed form: // @@ -93,8 +97,83 @@ // MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI) // LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI) // -// However, there are some cases where we can prove that the decomposition is safe, -// see MemPointerDecomposedFormParser::is_safe_to_decompose_op. +// If we want to prove the correctness of MemPointerAliasing, we need some guarantees, +// that the MemPointers adequately represent the underlying pointers, such that we can +// compute the aliasing based on the summands and constants. +// +// ----------------------------------------------------------------------------------------- +// +// Below, we will formulate a "Statement" that helps us to prove the correctness of the +// MemPointerAliasing computations. To prove the "Statement", we need to define the idea +// of a "safe decomposition", and then prove that all the decompositions we apply are +// such "safe decompositions". +// +// +// Definition: Safe decomposition +// We decompose summand in: +// mp1 = con + summand + sum(other_summands) +// Resulting in: +-------------------------+ +// mp2 = con + dec_con + sum(dec_summands) + sum(other_summands) +// = new_con + sum(new_summands) +// +// We call a decomposition safe if either: +// S1) No matter the values of the summand variables: +// mp1 = mp2 +// +// S2) The pointer is on an array with a known array_element_size_in_bytes, +// and there is an integer x, such that: +// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 +// +// Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all +// decompositions we apply are safe. +// +// +// Statement: +// Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2. +// If these conditions hold: +// 1) All summands of mp1 and mp2 are identical. +// 2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 +// 3) Both p1 and p2 are within the bounds of the same memory object. +// +// Then the ponter difference between p1 and p2 is identical to the difference between +// mp1 and mp2: +// p1 - p2 = mp1 - mp2 +// +// Note: MemPointerDecomposedForm::get_aliasing_with relies on this statememt to +// prove the correctness of its aliasing computation between two MemPointers. +// +// +// Proof Statement: +// If only decompositions of type (S1) were used, then trivially: +// p1 = mp1 +// p2 = mp2 +// => +// p1 - p2 = mp1 - mp2 +// +// If decompositions of type (S2) were used, then we can prove via induction over all +// decomposition steps that there must be some x1 and x2, such that: +// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 +// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 +// +// And hence, there must be an x, such that: +// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// +// If "x = 0", then it follows: +// p1 - p2 = mp1 - mp2 +// +// If "x != 0", then: +// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) +// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) +// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) +// > array_element_size_in_bytes * 2^32 - 2^31 +// >= array_element_size_in_bytes * 2^31 +// >= max_possible_array_size_in_bytes +// >= array_size_in_bytes +// +// Thus we get a contradiction: p1 and p2 have a distance greater than the array +// size, and hence at least one of the two must be out of bounds. But condition 3 +// of the statement requires that both p1 and p2 are both in bounds of the same +// memory object. #ifndef PRODUCT class TraceMemPointer : public StackObj { From ec376b9ed0fd875ef52cbeb9a54810a65d7ef2e7 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 12:50:09 +0200 Subject: [PATCH 65/89] improve the proof --- src/hotspot/share/opto/mempointer.hpp | 99 ++++++++++++++++++--------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index db59ef1e0cda8..698aa45a38431 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -117,12 +117,12 @@ // = new_con + sum(new_summands) // // We call a decomposition safe if either: -// S1) No matter the values of the summand variables: -// mp1 = mp2 +// SAFE1) No matter the values of the summand variables: +// mp1 = mp2 // -// S2) The pointer is on an array with a known array_element_size_in_bytes, -// and there is an integer x, such that: -// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 +// SAFE2) The pointer is on an array with a known array_element_size_in_bytes, +// and there is an integer x, such that: +// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 // // Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all // decompositions we apply are safe. @@ -131,49 +131,82 @@ // Statement: // Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2. // If these conditions hold: -// 1) All summands of mp1 and mp2 are identical. -// 2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 -// 3) Both p1 and p2 are within the bounds of the same memory object. +// S1) Both p1 and p2 are within the bounds of the same memory object. +// S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 +// S3) All summands of mp1 and mp2 are identical. // // Then the ponter difference between p1 and p2 is identical to the difference between // mp1 and mp2: // p1 - p2 = mp1 - mp2 // -// Note: MemPointerDecomposedForm::get_aliasing_with relies on this statememt to +// Note: MemPointerDecomposedForm::get_aliasing_with relies on this Statement to // prove the correctness of its aliasing computation between two MemPointers. // // // Proof Statement: -// If only decompositions of type (S1) were used, then trivially: -// p1 = mp1 -// p2 = mp2 +// Case 0: no decompositions were used: +// mp1 = 0 + 1 * p1 = p1 +// mp2 = 0 + 1 * p2 = p2 // => // p1 - p2 = mp1 - mp2 // -// If decompositions of type (S2) were used, then we can prove via induction over all -// decomposition steps that there must be some x1 and x2, such that: -// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 -// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 -// -// And hence, there must be an x, such that: -// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// Case 1: only decompositions of type (SAFE1) were used: +// We make an induction proof over the decompositions from p1 to mp1, starting with +// the trivial decompoisition: +// mp1_0 = 0 + 1 * p1 = p1 +// and then for the i'th decomposition, we know that +// mp1_i = mp1_{i+1} +// and hence, if mp1 was decomposed with n decompositions from p1: +// p1 = mp1_0 = mp1_i = mp1_n = mp1 +// The analogue can be proven for p2 and mp2: +// p2 = mp2 // -// If "x = 0", then it follows: +// p1 = mp1 +// p2 = mp2 +// => // p1 - p2 = mp1 - mp2 // -// If "x != 0", then: -// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) -// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) -// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) -// > array_element_size_in_bytes * 2^32 - 2^31 -// >= array_element_size_in_bytes * 2^31 -// >= max_possible_array_size_in_bytes -// >= array_size_in_bytes -// -// Thus we get a contradiction: p1 and p2 have a distance greater than the array -// size, and hence at least one of the two must be out of bounds. But condition 3 -// of the statement requires that both p1 and p2 are both in bounds of the same -// memory object. +// Case 2: decompositions of type (SAFE2) were used, and possibly also decompositions of +// type (SAFE1). +// Given we have (SAFE2) decompositions, we know that we are operating on an array of +// known array_element_size_in_bytes. We can weaken the guarantees from (SAFE1) +// decompositions to the same guarantee as (SAFE2) decompositions, hence all applied +// decompositions satisfy: +// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 +// where x_i = 0 for (SAFE1) decompositions. +// +// We make an induction proof over the decompositions from p1 to mp1, starting with +// the trivial decompoisition: +// mp1_0 = 0 + 1 * p1 = p1 +// and then for the i'th decomposition, we know that +// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 +// and hence, if mp1 was decomposed with n decompositions from p1: +// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 +// where x1 = sum(x1_i). +// The analogue can be proven for p2 and mp2: +// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 +// +// And hence, there must be an x, such that: +// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// +// If "x = 0", then it follows: +// p1 - p2 = mp1 - mp2 +// +// If "x != 0", then: +// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32) +// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) +// -- apply x != 0 -- +// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) +// -- apply S2 and S3 -- +// > array_element_size_in_bytes * 2^32 - 2^31 +// >= array_element_size_in_bytes * 2^31 +// >= max_possible_array_size_in_bytes +// >= array_size_in_bytes +// +// Thus we get a contradiction: p1 and p2 have a distance greater than the array +// size, and hence at least one of the two must be out of bounds. But condition S1 +// of the Statement requires that both p1 and p2 are both in bounds of the same +// memory object. #ifndef PRODUCT class TraceMemPointer : public StackObj { From c314db9fd329baf9c87233678432946db1ff6120 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 13:41:06 +0200 Subject: [PATCH 66/89] more proof --- src/hotspot/share/opto/mempointer.cpp | 65 ++++++++++++++++++++------- src/hotspot/share/opto/mempointer.hpp | 18 +++++--- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 305c59edf143a..d7237c3c0c7c9 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -192,11 +192,12 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always // be truncated. Thus, it does not matter if we have int or long overflows. + // Simply put: all decompositions are (SAFE1). return true; #else switch(opc) { - // These operations are always safe to decompose: + // These operations are always safe to decompose, i.e. (SAFE1): case Op_ConI: case Op_ConL: case Op_AddP: @@ -226,29 +227,63 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr(); if (ary_ptr_t != nullptr) { // Array accesses that are not Unsafe always have a RangeCheck which ensures - // that there is no int overflow. + // that there is no int overflow. And without overflows, all decompositions + // are (SAFE1). if (!_mem->is_unsafe_access()) { return true; } - // Intuition: what happens if a AddI, SubI, MulI or LShiftI (with constant) overflows - // the int range? TODO: generalize this a bit and write the proof! + // Intuition: In general, the decomposition of AddI, SubI, MulI or LShiftI is not safe, + // because of overflows. But under some conditions, we can prove that such a + // decomposition is (SAFE2). Intuitively, we want to prove that an overflow + // would mean that the pointers have such a large distance, that at least one + // must lie out of bounds. In the proof of the "Statement", we thus get a + // contradiction with the condition that both pointers are in bounds. // - // Idea: pointer = con + sum(other_summands) + summand - // ------------------------- ------- - // rest scale * ConvI2L(op) + // We prove that the decomposition of AddI, SubI, MulI (with constant) and ShiftI (with + // constant) is (SAFE2), under the condition: // - // ... and so ...: + // abs(scale) % array_element_size_in_bytes = 0 // - // scale * ConvI2L(a + b) = scale * ConvI2L(a) + scale * ConvI2L(b) + scale * x * 2^32 - // scale * ConvI2L(a - b) = scale * ConvI2L(a) - scale * ConvI2L(b) + scale * x * 2^32 - // scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * x * 2^32 - // scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * x * 2^32 - // \_______________________/ \_____________________________________/ \______________/ - // before decomposition after decomposition overflow correction + // First, we describe how the decomposition works: // + // mp_i = con + sum(other_summands) + summand + // ------------------------- ------- + // rest scale * ConvI2L(op) // - // TODO what scale are we talking about??? scaleI or scaleL or scale??? not sure + // We decompose the summand depending on the op, where we know that there is some + // integer y, such that: + // + // scale * ConvI2L(a + b) = scale * ConvI2L(a) + scale * ConvI2L(b) + scale * y * 2^32 + // scale * ConvI2L(a - b) = scale * ConvI2L(a) - scale * ConvI2L(b) + scale * y * 2^32 + // scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * y * 2^32 + // scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * y * 2^32 + // \_______________________/ \_____________________________________/ \______________/ + // before decomposition after decomposition overflow correction + // + // Thus, for AddI and SubI, we get: + // summand = new_summand1 + new_summand2 + scale * y * 2^32 + // + // mp_{i+1} = con + sum(other_summands) + new_summand1 + new_summand2 + // = con + sum(other_summands) + summand - scale * y * 2^32 + // = mp_i - scale * y * 2^32 + // + // And for MulI and ShiftI we get: + // summand = new_summand + scale * y * 2^32 + // + // mp_{i+1} = con + sum(other_summands) + new_summand + // = con + sum(other_summands) + summand - scale * y * 2^32 + // = mp_i - scale * y * 2^32 + // + // Further: + // abs(scale) % array_element_size_in_bytes = 0 + // implies that there is some integer z, such that: + // z * array_element_size_in_bytes = scale + // + // And hence, with "x = y * z": + // mp_i = mp_{i+1} + scale * y * 2^32 + // = mp_{i+1} + z * array_element_size_in_bytes * y * 2^32 + // = mp_{i+1} + x * array_element_size_in_bytes * 2^32 // BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); if (is_java_primitive(array_element_bt)) { diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 698aa45a38431..fedfc401853d0 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -109,20 +109,24 @@ // such "safe decompositions". // // -// Definition: Safe decomposition +// Definition: Safe decomposition (from some mp_i to mp_{i+1}) // We decompose summand in: -// mp1 = con + summand + sum(other_summands) -// Resulting in: +-------------------------+ -// mp2 = con + dec_con + sum(dec_summands) + sum(other_summands) -// = new_con + sum(new_summands) +// mp_i = con + summand + sum(other_summands) +// Resulting in: +-------------------------+ +// mp_{i+1} = con + dec_con + sum(dec_summands) + sum(other_summands) +// = new_con + sum(new_summands) // // We call a decomposition safe if either: // SAFE1) No matter the values of the summand variables: -// mp1 = mp2 +// mp_i = mp_{i+1} // // SAFE2) The pointer is on an array with a known array_element_size_in_bytes, // and there is an integer x, such that: -// mp1 = mp2 + x * array_element_size_in_bytes * 2^32 +// mp_i = mp_{i+1} + x * array_element_size_in_bytes * 2^32 +// +// Note: if "x = 0", we have "mp1 = mp2", and if "x != 0", then mp1 and mp2 +// have a distance at least twice as large as the array size, and so +// at least one of mp1 or mp2 must be out of bounds of the array. // // Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all // decompositions we apply are safe. From fdc67d8a4fb76c473d19de566dc24e377697b0f3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 14:02:47 +0200 Subject: [PATCH 67/89] rm scaleL, was not even necessary! --- src/hotspot/share/opto/mempointer.cpp | 32 +++++++------------ src/hotspot/share/opto/mempointer.hpp | 44 +++------------------------ 2 files changed, 15 insertions(+), 61 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index d7237c3c0c7c9..9c6e2d55abae2 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -34,8 +34,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() Node* pointer = _mem->in(MemNode::Address); // Start with the trivial summand. - const NoOverflowInt one(1); - _worklist.push(MemPointerSummand(pointer, one LP64_ONLY( COMMA one ))); + _worklist.push(MemPointerSummand(pointer, NoOverflowInt(1))); // Decompose the summands until only terminal summands remain. This effectively // parses the pointer expression recursively. @@ -69,7 +68,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() } // Keep summands with non-zero scale. if (!scale.is_zero()) { - _summands.at_put(pos_put++, MemPointerSummand(variable, scale LP64_ONLY( COMMA NoOverflowInt(1) ))); + _summands.at_put(pos_put++, MemPointerSummand(variable, scale)); } } _summands.trunc_to(pos_put); @@ -83,11 +82,10 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand summand) { Node* n = summand.variable(); const NoOverflowInt scale = summand.scale(); - LP64_ONLY( const NoOverflowInt scaleL = summand.scaleL(); ) const NoOverflowInt one(1); int opc = n->Opcode(); - if (is_safe_to_decompose_op(opc LP64_ONLY( COMMA scaleL ))) { + if (is_safe_to_decompose_op(opc, scale)) { switch (opc) { case Op_ConI: case Op_ConL: @@ -105,8 +103,8 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman // Decompose addition. Node* a = n->in((opc == Op_AddP) ? 2 : 1); Node* b = n->in((opc == Op_AddP) ? 3 : 2); - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - _worklist.push(MemPointerSummand(b, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(a, scale)); + _worklist.push(MemPointerSummand(b, scale)); return; } case Op_SubL: @@ -117,11 +115,9 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman Node* b = n->in(2); NoOverflowInt sub_scale = NoOverflowInt(-1) * scale; - LP64_ONLY( NoOverflowInt sub_scaleL = (opc == Op_SubL) ? scaleL * NoOverflowInt(-1) - : scaleL; ) - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); - _worklist.push(MemPointerSummand(b, sub_scale LP64_ONLY( COMMA sub_scaleL ))); + _worklist.push(MemPointerSummand(a, scale)); + _worklist.push(MemPointerSummand(b, sub_scale)); return; } case Op_MulL: @@ -134,31 +130,25 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman Node* con = n->in(2); if (!con->is_Con()) { break; } NoOverflowInt factor; - LP64_ONLY( NoOverflowInt factorL; ) switch (opc) { case Op_MulL: // variable * con factor = NoOverflowInt(con->get_long()); - LP64_ONLY( factorL = factor; ) break; case Op_MulI: // variable * con factor = NoOverflowInt(con->get_int()); - LP64_ONLY( factorL = one; ) break; case Op_LShiftL: // variable << con = variable * (1 << con) factor = one << NoOverflowInt(con->get_int()); - LP64_ONLY( factorL = factor; ) break; case Op_LShiftI: // variable << con = variable * (1 << con) factor = one << NoOverflowInt(con->get_int()); - LP64_ONLY( factorL = one; ) break; } // Accumulate scale. NoOverflowInt new_scale = scale * factor; - LP64_ONLY( NoOverflowInt new_scaleL = scaleL * factorL; ) - _worklist.push(MemPointerSummand(variable, new_scale LP64_ONLY( COMMA new_scaleL ))); + _worklist.push(MemPointerSummand(variable, new_scale)); return; } case Op_CastII: @@ -176,7 +166,7 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman { // Decompose: look through. Node* a = n->in(1); - _worklist.push(MemPointerSummand(a, scale LP64_ONLY( COMMA scaleL ))); + _worklist.push(MemPointerSummand(a, scale)); return; } } @@ -188,7 +178,7 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman // Check if the decomposition of operation opc is guaranteed to be safe. // Please refer to the definition of "safe decomposition" in mempointer.hpp -bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const { +bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, const NoOverflowInt scale) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always // be truncated. Thus, it does not matter if we have int or long overflows. @@ -288,7 +278,7 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc LP64_ BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type(); if (is_java_primitive(array_element_bt)) { NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt)); - if (scaleL.is_multiple_of(array_element_size_in_bytes)) { + if (scale.is_multiple_of(array_element_size_in_bytes)) { return true; } } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index fedfc401853d0..05b8fe5a399bd 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -288,54 +288,25 @@ class MemPointerAliasing { // // summand = scale * variable // -// On 32-bit platforms, we trivially use 32-bit jint values for the address computation: -// -// summand = scaleI * variable // 32-bit variable -// scale = scaleI -// -// On 64-bit platforms, we have a mix of 64-bit jlong and 32-bit jint values for the -// address computation: -// -// summand = scaleL * ConvI2L(scaleI * variable) // 32-bit variable -// scale = scaleL * scaleI -// -// summand = scaleL * variable // 64-bit variable -// scale = scaleL -// -// For simplicity, we only allow 32-bit jint scales, wrapped in NoOverflowInt. During -// the decomposition into the summands, we might encounter a scale that overflows the -// jint-range. Then, the scale becomes NaN, which indicates that we cannot decompose -// the pointer using this summand. -// -// Note: we only need scaleL during the decomposition of the pointer. We need to check -// if decomposing a summand further is safe (i.e. if there cannot be an overflow), -// see MemPointerDecomposedFormParser::is_safe_to_decompose_op. But during aliasing -// computation, we fully rely on scale, and do not need scaleL any more. -// class MemPointerSummand : public StackObj { private: Node* _variable; NoOverflowInt _scale; - LP64_ONLY( NoOverflowInt _scaleL; ) public: MemPointerSummand() : _variable(nullptr), - _scale(NoOverflowInt::make_NaN()) - LP64_ONLY( COMMA _scaleL(NoOverflowInt::make_NaN()) ) {} - MemPointerSummand(Node* variable, const NoOverflowInt scale LP64_ONLY( COMMA const NoOverflowInt scaleL )) : + _scale(NoOverflowInt::make_NaN()) {} + MemPointerSummand(Node* variable, const NoOverflowInt scale) : _variable(variable), _scale(scale) - LP64_ONLY( COMMA _scaleL(scaleL) ) { assert(_variable != nullptr, "must have variable"); assert(!_scale.is_zero(), "non-zero scale"); - LP64_ONLY( assert(!_scaleL.is_zero(), "non-zero scaleL") ); } Node* variable() const { return _variable; } NoOverflowInt scale() const { return _scale; } - LP64_ONLY( NoOverflowInt scaleL() const { return _scaleL; } ) static int cmp_for_sort(MemPointerSummand* p1, MemPointerSummand* p2) { if (p1->variable() == nullptr) { @@ -363,11 +334,6 @@ class MemPointerSummand : public StackObj { #ifndef PRODUCT void print_on(outputStream* st) const { st->print("Summand["); -#ifdef _LP64 - st->print("(scaleL = "); - _scaleL.print_on(st); - st->print(") "); -#endif _scale.print_on(st); tty->print(" * [%d %s]]", _variable->_idx, _variable->Name()); } @@ -399,8 +365,7 @@ class MemPointerDecomposedForm : public StackObj { // Default / trivial: pointer = 0 + 1 * pointer MemPointerDecomposedForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { assert(pointer != nullptr, "pointer must be non-null"); - const NoOverflowInt one(1); - _summands[0] = MemPointerSummand(pointer, one LP64_ONLY( COMMA one )); + _summands[0] = MemPointerSummand(pointer, NoOverflowInt(1)); } private: @@ -412,7 +377,6 @@ class MemPointerDecomposedForm : public StackObj { MemPointerSummand s = summands.at(i); assert(s.variable() != nullptr, "variable cannot be null"); assert(!s.scale().is_NaN(), "non-NaN scale"); - LP64_ONLY( assert(!s.scaleL().is_NaN(), "non-NaN scaleL"); ) _summands[i] = s; } } @@ -479,7 +443,7 @@ class MemPointerDecomposedFormParser : public StackObj { MemPointerDecomposedForm parse_decomposed_form(); void parse_sub_expression(const MemPointerSummand summand); - bool is_safe_to_decompose_op(const int opc LP64_ONLY( COMMA const NoOverflowInt scaleL )) const; + bool is_safe_to_decompose_op(const int opc, const NoOverflowInt scale) const; }; // Facility to parse the pointer of a Load or Store, so that aliasing between two such From f9e655ca83ffb8d3e8c69aa9562047a33962208a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 14:42:37 +0200 Subject: [PATCH 68/89] finishing up more proofs --- src/hotspot/share/opto/mempointer.cpp | 29 ++++++++++++++----- src/hotspot/share/opto/mempointer.hpp | 18 ++++++------ src/hotspot/share/opto/noOverflowInt.hpp | 7 ----- .../gtest/opto/test_no_overflow_int.cpp | 10 ------- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 9c6e2d55abae2..1041c549c0495 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -227,8 +227,8 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons // because of overflows. But under some conditions, we can prove that such a // decomposition is (SAFE2). Intuitively, we want to prove that an overflow // would mean that the pointers have such a large distance, that at least one - // must lie out of bounds. In the proof of the "Statement", we thus get a - // contradiction with the condition that both pointers are in bounds. + // must lie out of bounds. In the proof of the "MemPointer Lemma", we thus + // get a contradiction with the condition that both pointers are in bounds. // // We prove that the decomposition of AddI, SubI, MulI (with constant) and ShiftI (with // constant) is (SAFE2), under the condition: @@ -288,7 +288,12 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons #endif } -// TODO add proof based on "Statement" +// Compute the aliasing between two MemPointerDecomposedForm. We use the "MemPointer Lemma" to +// prove that the computed aliasing also applies for the underlying pointers. +// +// Pre-Condition: +// We assume that both pointers are in-bounds of their respective memory object. +// MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { #ifndef PRODUCT @@ -299,7 +304,7 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD } #endif - // Check if all summands are the same: + // "MemPointer Lemma" condition S2: check if all summands are the same: for (uint i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand s1 = summands_at(i); const MemPointerSummand s2 = other.summands_at(i); @@ -313,10 +318,11 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD } } - // Compute distance: + // "MemPointer Lemma" condition S3: check that the constants do not differ too much: const NoOverflowInt distance = other.con() - con(); - // TODO why 2_to_30 ? - if (distance.is_NaN() || !distance.is_abs_less_than_2_to_30()) { + // We must check that: abs(distance) < 2^32 + // However, this is only false if: distance = min_jint + if (distance.is_NaN() || distance.value() == min_jint) { #ifndef PRODUCT if (trace.is_trace_aliasing()) { tty->print(" -> Aliasing unknown, bad distance: "); @@ -327,6 +333,15 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD return MemPointerAliasing::make_unknown(); } + // "MemPointer Lemma" condition S1: + // Given that all summands are the same, we know that both pointers point into the + // same memory object. With the Pre-Condition, we know that both pointers are in + // bounds of that same memory object. + + // Hence, all 3 conditions of the "MemoryPointer Lemma" are established, and hence + // we know that the distance between the underlying pointers is equal to the distance + // we computed for the MemPointers: + // p_other - p_this = distance = other.con - this.con #ifndef PRODUCT if (trace.is_trace_aliasing()) { tty->print_cr(" -> Aliasing always, distance = %d.", distance.value()); diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 05b8fe5a399bd..605ab545ee268 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -103,10 +103,10 @@ // // ----------------------------------------------------------------------------------------- // -// Below, we will formulate a "Statement" that helps us to prove the correctness of the -// MemPointerAliasing computations. To prove the "Statement", we need to define the idea -// of a "safe decomposition", and then prove that all the decompositions we apply are -// such "safe decompositions". +// Below, we will formulate a "MemPointer Lemma" that helps us to prove the correctness of +// the MemPointerAliasing computations. To prove the "MemPointer Lemma", we need to define +// the idea of a "safe decomposition", and then prove that all the decompositions we apply +// are such "safe decompositions". // // // Definition: Safe decomposition (from some mp_i to mp_{i+1}) @@ -132,7 +132,7 @@ // decompositions we apply are safe. // // -// Statement: +// MemPointer Lemma: // Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2. // If these conditions hold: // S1) Both p1 and p2 are within the bounds of the same memory object. @@ -143,11 +143,11 @@ // mp1 and mp2: // p1 - p2 = mp1 - mp2 // -// Note: MemPointerDecomposedForm::get_aliasing_with relies on this Statement to +// Note: MemPointerDecomposedForm::get_aliasing_with relies on this MemPointer Lemma to // prove the correctness of its aliasing computation between two MemPointers. // // -// Proof Statement: +// Proof of the "MemPointer Lemma": // Case 0: no decompositions were used: // mp1 = 0 + 1 * p1 = p1 // mp2 = 0 + 1 * p2 = p2 @@ -209,8 +209,8 @@ // // Thus we get a contradiction: p1 and p2 have a distance greater than the array // size, and hence at least one of the two must be out of bounds. But condition S1 -// of the Statement requires that both p1 and p2 are both in bounds of the same -// memory object. +// of the MemPointer Lemma requires that both p1 and p2 are both in bounds of the +// same memory object. #ifndef PRODUCT class TraceMemPointer : public StackObj { diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 82291ad31a9d1..0e4fdff239a3d 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -86,13 +86,6 @@ class NoOverflowInt { return a.value() == b.value(); } - bool is_abs_less_than_2_to_30() const { - const NoOverflowInt i = abs(); - if (i.is_NaN()) { return false; } - const jint max_value = 1 << 30; - return i.value() < max_value; - } - NoOverflowInt abs() const { if (is_NaN()) { return make_NaN(); } if (value() >= 0) { return *this; } diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp index 4bc6bc45934bb..08f87d62ec205 100644 --- a/test/hotspot/gtest/opto/test_no_overflow_int.cpp +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -146,16 +146,6 @@ TEST_VM(opto, NoOverflowInt_misc) { ASSERT_FALSE((big + big) == (big + big)); ASSERT_TRUE((big - one + big) == (big - one + big)); - // is_abs_less_than_2_to_30 - for (int i = -(1 << 30) + 1; i < (1 << 30); i += 1000) { - ASSERT_TRUE(NoOverflowInt(i).is_abs_less_than_2_to_30()); - } - ASSERT_FALSE(big.is_abs_less_than_2_to_30()); - ASSERT_TRUE((big - one).is_abs_less_than_2_to_30()); - ASSERT_FALSE((zero - big).is_abs_less_than_2_to_30()); - ASSERT_TRUE((one - big).is_abs_less_than_2_to_30()); - ASSERT_FALSE(nan.is_abs_less_than_2_to_30()); - // abs for (int i = 0; i < (1 << 31); i += 1024) { ASSERT_EQ(NoOverflowInt(i).abs().value(), i); From 0687da5d11d4a427d1f352e605971bdb45413204 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 11 Sep 2024 18:04:18 +0200 Subject: [PATCH 69/89] add precompiled.hpp to gtest --- test/hotspot/gtest/opto/test_no_overflow_int.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/hotspot/gtest/opto/test_no_overflow_int.cpp b/test/hotspot/gtest/opto/test_no_overflow_int.cpp index 08f87d62ec205..7b4b4259bb841 100644 --- a/test/hotspot/gtest/opto/test_no_overflow_int.cpp +++ b/test/hotspot/gtest/opto/test_no_overflow_int.cpp @@ -22,6 +22,7 @@ * */ +#include "precompiled.hpp" #include "opto/noOverflowInt.hpp" #include "unittest.hpp" From ae68fa7d562f00ce9ee886ef5a8d7bf6556726ca Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 12 Sep 2024 09:40:51 +0200 Subject: [PATCH 70/89] fix build and test --- src/hotspot/share/opto/mempointer.cpp | 1 + test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 1041c549c0495..602f536af7c4d 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -22,6 +22,7 @@ * */ +#include "precompiled.hpp" #include "opto/mempointer.hpp" #include "utilities/resourceHash.hpp" diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java index a543ae69ef511..3cd79a8e786d7 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java @@ -141,7 +141,6 @@ public static void main(String[] args) { for (String unaligned : new String[]{"-XX:-UseUnalignedAccesses", "-XX:+UseUnalignedAccesses"}) { TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class); framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0], unaligned); - framework.setDefaultWarmup(100); framework.start(); } } From d716e9a3739934d9495e4e8c2bd7da288396b68a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 18 Oct 2024 09:59:34 +0200 Subject: [PATCH 71/89] more examples and comments for Vladimir --- src/hotspot/share/opto/mempointer.cpp | 7 ++ src/hotspot/share/opto/mempointer.hpp | 114 ++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 602f536af7c4d..ae53df000c480 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -127,6 +127,9 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman case Op_LShiftI: { // Only multiplication with constants is allowed: factor * variable + // IGVN already folds constants to in(2). If we find a variable there + // instead, we cannot further decompose this summand, and have to add + // it to the terminal summands. Node* variable = n->in(1); Node* con = n->in(2); if (!con->is_Con()) { break; } @@ -170,6 +173,10 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman _worklist.push(MemPointerSummand(a, scale)); return; } + default: + // All other operations cannot be further decomposed. We just add them to the + // terminal summands below. + break; } } diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 605ab545ee268..27afdbb801485 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -33,6 +33,112 @@ // // ----------------------------------------------------------------------------------------- // +// Intuition and Examples: +// We parse / decompose pointers into a linear form: +// +// pointer = con + sum_i(scale_i * variable_i) +// +// The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are +// compile-time variables (C2 nodes). +// +// For the MemPointer, we do not explicitly track base address. For Java heap pointers, the +// base address is just a variable. For native memory (C heap) pointers, the base address is +// null, and is hence implicitly a zero constant. +// +// +// Example1: byte array access: +// +// array[i] +// +// pointer = array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i +// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i +// -------------------- ---------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example2: int array access +// +// array[5 + i + 3 * j] +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * j + 4 * 3 * j +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * j + 12 * j +// -------------------- ----------------------------- -------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 +// +// +// Example3: Unsafe with int array +// +// UNSAFE.getInt(array, ARRAY_INT_BASE_OFFSET + 4 * i); +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * i +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 4 * i +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example4: Unsafe with native memory address +// +// long address; +// UNSAFE.getInt(null, address + 4 * i); +// +// pointer = address + 4 * i +// = 1 * address + 0 + 4 * i +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example5: MemorySegment with byte array as backing type +// +// byte[] array = new byte[1000]; +// MemorySegment ms = MemorySegment.ofArray(array); +// assert ms.heapBase().get() == array: "array is base"; +// assert ms.address() == 0: "zero offset from base"; +// byte val = ms.get(ValueLayout.JAVA_BYTE, i); +// +// pointer = ms.heapBase() + ARRAY_BYTE_BASE_OFFSET + ms.address() + i +// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 0 + 1 * i +// ----------------------- ------------------------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 +// +// +// Example6: MemorySegment with native memory +// +// MemorySegment ms = Arena.ofAuto().allocate(1000, 1); +// assert ms.heapBase().isEmpty(): "null base"; +// assert ms.address() != 0: "non-zero native memory address"; +// byte val2 = ms.get(ValueLayout.JAVA_BYTE, i); +// +// pointer = ms.heapBase() + ms.address() + i +// = 0 + 1 * ms.address() + 1 * i +// ------------ ---------------------- -------------------- +// = con scale_0 * variable_0 + scale_1 * variable_1 +// +// +// Example7: Non-linear access to int array +// +// array[5 + i + j * k] +// +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * j + 4 * j * k +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * j + 4 * j * k +// -------------------- ----------------------------- -------------------- -------------------- +// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 +// +// Note: we simply stop parsing once a term is not linear. We keep "j * k" as its own variable. +// +// +// Example8: Unsafe with native memory address, non-linear access +// +// UNSAFE.getInt(null, i * j); +// +// pointer = i * j +// = 0 + 1 * i * j +// --- -------------------- +// = con + scale_0 * variable_0 +// +// Note: we can always parse a pointer into its trivial linear form: +// +// pointer = 0 + 1 * pointer. +// +// ----------------------------------------------------------------------------------------- +// // MemPointerDecomposedForm: // When the pointer is parsed, it is decomposed into a constant and a sum of summands: // @@ -46,6 +152,8 @@ // // pointer = con + sum_i(scale_i * variable_i) // +// Note: the scale_i are compile-time constants (NoOverflowInt), and the variable_i are +// compile-time variables (C2 nodes). // On 64bit systems, this decomposed form is computed with long-add/mul, on 32bit systems // it is computed with int-add/mul. // @@ -347,11 +455,7 @@ class MemPointerSummand : public StackObj { class MemPointerDecomposedForm : public StackObj { private: // We limit the number of summands to 10. Usually, a pointer contains a base pointer - // (e.g. array pointer or null for native memory) and a few variables. For example: - // - // array[j] -> array_base + j + con -> 2 summands - // nativeMemorySegment.get(j) -> null + address + offset + j + con -> 3 summands - // + // (e.g. array pointer or null for native memory) and a few variables. static const int SUMMANDS_SIZE = 10; Node* _pointer; // pointer node associated with this (sub)pointer From 531500595074a7b6d3758a1304c71d4564bf4370 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 18 Oct 2024 11:46:11 +0200 Subject: [PATCH 72/89] some unsafe and native benchmarks added --- .../bench/vm/compiler/MergeStores.java | 64 +++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java b/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java index 84017573c075b..d4c79fd416ca1 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java +++ b/test/micro/org/openjdk/bench/vm/compiler/MergeStores.java @@ -41,12 +41,12 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Warmup(iterations = 3, time = 3) -@Measurement(iterations = 3, time = 3) -@Fork(value = 3, jvmArgsAppend = { +@Warmup(iterations = 2, time = 1) +@Measurement(iterations = 3, time = 1) +@Fork(value = 1, jvmArgsAppend = { "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", "--add-exports", "java.base/jdk.internal.util=ALL-UNNAMED"}) -@State(Scope.Benchmark) +@State(Scope.Thread) public class MergeStores { public static final int RANGE = 100; @@ -66,6 +66,7 @@ public class MergeStores { public static byte[] aB = new byte[RANGE]; public static short[] aS = new short[RANGE]; public static int[] aI = new int[RANGE]; + public static long native_adr = UNSAFE.allocateMemory(RANGE * 8); // ------------------------------------------- // ------- Little-Endian API ---------- @@ -691,4 +692,59 @@ public int[] store_I2_zero_offs_nonalloc_direct() { aI[offset + 1] = 0; return aI; } + + @Benchmark + public void store_unsafe_B8_L_offs_noalloc_direct() { + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)(vL >> 0 )); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)(vL >> 8 )); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)(vL >> 16)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)(vL >> 24)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)(vL >> 32)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)(vL >> 40)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)(vL >> 48)); + UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)(vL >> 56)); + } + + @Benchmark + public void store_unsafe_B8_L_offs_noalloc_unsafe() { + UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vL); + } + + @Benchmark + public void store_unsafe_C4_L_offs_noalloc_direct() { + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (char)(vL >> 0 )); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (char)(vL >> 16)); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (char)(vL >> 32)); + UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (char)(vL >> 48)); + } + + @Benchmark + public void store_unsafe_native_B8_L_offs_noalloc_direct() { + UNSAFE.putByte(null, native_adr + offset + 0, (byte)(vL >> 0 )); + UNSAFE.putByte(null, native_adr + offset + 1, (byte)(vL >> 8 )); + UNSAFE.putByte(null, native_adr + offset + 2, (byte)(vL >> 16)); + UNSAFE.putByte(null, native_adr + offset + 3, (byte)(vL >> 24)); + UNSAFE.putByte(null, native_adr + offset + 4, (byte)(vL >> 32)); + UNSAFE.putByte(null, native_adr + offset + 5, (byte)(vL >> 40)); + UNSAFE.putByte(null, native_adr + offset + 6, (byte)(vL >> 48)); + UNSAFE.putByte(null, native_adr + offset + 7, (byte)(vL >> 56)); + } + + @Benchmark + public void store_unsafe_native_C4_L_offs_noalloc_direct() { + UNSAFE.putChar(null, native_adr + offset + 0, (char)(vL >> 0 )); + UNSAFE.putChar(null, native_adr + offset + 2, (char)(vL >> 16)); + UNSAFE.putChar(null, native_adr + offset + 4, (char)(vL >> 32)); + UNSAFE.putChar(null, native_adr + offset + 6, (char)(vL >> 48)); + } + + @Benchmark + public void store_unsafe_native_B8_L_offs_noalloc_unsafe() { + UNSAFE.putLongUnaligned(null, native_adr + offset + 0, vL); + } + + @Fork(value = 1, jvmArgsPrepend = { + "-XX:+UnlockDiagnosticVMOptions", "-XX:-MergeStores" + }) + public static class MergeStoresDisabled extends MergeStores {} } From a911b63053a9885cd7a5671c722914ca4261d166 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 21 Oct 2024 08:08:49 +0200 Subject: [PATCH 73/89] updates for Vladimir --- src/hotspot/share/opto/mempointer.hpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 27afdbb801485..6163c40ff8c01 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -36,14 +36,14 @@ // Intuition and Examples: // We parse / decompose pointers into a linear form: // -// pointer = con + sum_i(scale_i * variable_i) +// pointer = sum_i(scale_i * variable_i) + con // // The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are // compile-time variables (C2 nodes). // // For the MemPointer, we do not explicitly track base address. For Java heap pointers, the -// base address is just a variable. For native memory (C heap) pointers, the base address is -// null, and is hence implicitly a zero constant. +// base address is just a variable in a summand with scale == 1. For native memory (C heap) +// pointers, the base address is null, and is hence implicitly a zero constant. // // // Example1: byte array access: @@ -140,9 +140,9 @@ // ----------------------------------------------------------------------------------------- // // MemPointerDecomposedForm: -// When the pointer is parsed, it is decomposed into a constant and a sum of summands: +// When the pointer is parsed, it is decomposed into sum of summands plus a constant: // -// pointer = con + sum(summands) +// pointer = sum(summands) + con // // Where each summand_i in summands has the form: // @@ -150,7 +150,7 @@ // // Hence, the full decomposed form is: // -// pointer = con + sum_i(scale_i * variable_i) +// pointer = sum_i(scale_i * variable_i) + con // // Note: the scale_i are compile-time constants (NoOverflowInt), and the variable_i are // compile-time variables (C2 nodes). @@ -161,8 +161,8 @@ // The decomposed form allows us to determine the aliasing between two pointers easily. For // example, if two pointers are identical, except for their constant: // -// pointer1 = con1 + sum(summands) -// pointer2 = con2 + sum(summands) +// pointer1 = sum(summands) + con1 +// pointer2 = sum(summands) + con2 // // then we can easily compute the distance between the pointers (distance = con2 - con1), // and determine if they are adjacent. @@ -170,8 +170,8 @@ // MemPointerDecomposedFormParser: // Any pointer can be parsed into this (default / trivial) decomposed form: // -// pointer = 0 + 1 * pointer -// con scale +// pointer = 1 * pointer + 0 +// scale_0 * variable_0 + con // // However, this is not particularly useful to compute aliasing. We would like to decompose // the pointer as far as possible, i.e. extract as many summands and add up the constants to @@ -184,8 +184,8 @@ // At first, computing aliasing is difficult because the distance is hidden inside the // ConvI2L. we can convert this (with array_int_base_offset = 16) into these decomposed forms: // -// pointer1 = 16L + 1L * array_base + 4L * i -// pointer2 = 20L + 1L * array_base + 4L * i +// pointer1 = 1L * array_base + 4L * i + 16L +// pointer2 = 1L * array_base + 4L * i + 20L // // This allows us to easily see that these two pointers are adjacent (distance = 4). // @@ -450,7 +450,7 @@ class MemPointerSummand : public StackObj { // Decomposed form of the pointer sub-expression of "pointer". // -// pointer = con + sum(summands) +// pointer = sum(summands) + con // class MemPointerDecomposedForm : public StackObj { private: From b8fc83ba161f43c4f61b96087ef26a8c5733452c Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 21 Oct 2024 10:02:00 +0200 Subject: [PATCH 74/89] rm dead assert --- src/hotspot/share/opto/memnode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 8af3e4d2ae142..d642c0d975a79 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2913,7 +2913,6 @@ StoreNode* MergePrimitiveStores::run() { bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) const { int opc = _store->Opcode(); assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition"); - // assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store"); if (other_store == nullptr || _store->Opcode() != other_store->Opcode()) { From a35a7cfe6b53a03e00eebb032231dcd5bb798dd8 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 22 Oct 2024 09:14:53 +0200 Subject: [PATCH 75/89] changes to NoOverflowInt for Dean --- src/hotspot/share/opto/noOverflowInt.hpp | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 0e4fdff239a3d..c850e487f1fd7 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -37,10 +37,10 @@ class NoOverflowInt { public: // Default: NaN. - NoOverflowInt() : _is_NaN(true), _value(0) {} + constexpr NoOverflowInt() : _is_NaN(true), _value(0) {} // Create from jlong (or jint) -> NaN if overflows jint. - explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { + constexpr explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) { jint trunc = (jint)value; if ((jlong)trunc == value) { _is_NaN = false; @@ -48,36 +48,36 @@ class NoOverflowInt { } } - static NoOverflowInt make_NaN() { return NoOverflowInt(); } + static constexpr NoOverflowInt make_NaN() { return NoOverflowInt(); } bool is_NaN() const { return _is_NaN; } jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } bool is_zero() const { return !is_NaN() && value() == 0; } friend NoOverflowInt operator+(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_add((jlong)a.value(), (jlong)b.value())); + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() + (jlong)b.value()); } friend NoOverflowInt operator-(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_subtract((jlong)a.value(), (jlong)b.value())); + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() - (jlong)b.value()); } friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } - return NoOverflowInt(java_multiply((jlong)a.value(), (jlong)b.value())); + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } + return NoOverflowInt((jlong)a.value() * (jlong)b.value()); } friend NoOverflowInt operator<<(const NoOverflowInt a, const NoOverflowInt b) { - if (a.is_NaN()) { return make_NaN(); } - if (b.is_NaN()) { return make_NaN(); } + if (a.is_NaN()) { return a; } + if (b.is_NaN()) { return b; } jint shift = b.value(); if (shift < 0 || shift > 31) { return make_NaN(); } - return NoOverflowInt(java_shift_left((jlong)a.value(), shift)); + return NoOverflowInt((jlong)a.value() << shift); } friend bool operator==(const NoOverflowInt a, const NoOverflowInt b) { From 75400f2cb7c74718e3996905972ee44ee3de1fd6 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 15:09:28 +0100 Subject: [PATCH 76/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/memnode.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index d642c0d975a79..89fa915c2defb 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2777,8 +2777,8 @@ uint StoreNode::hash() const { // class MergePrimitiveStores : public StackObj { private: - PhaseGVN* _phase; - StoreNode* _store; + PhaseGVN* const _phase; + StoreNode* const _store; NOT_PRODUCT( const CHeapBitMap &_trace_tags; ) @@ -2878,19 +2878,19 @@ StoreNode* MergePrimitiveStores::run() { return nullptr; } - NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); }) // The _store must be the "last" store in a chain. If we find a use we could merge with // then that use or a store further down is the "last" store. Status status_use = find_adjacent_use_store(_store); - NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] expect no use: "); status_use.print_on(tty); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect no use: "); status_use.print_on(tty); }) if (status_use.found_store() != nullptr) { return nullptr; } // Check if we can merge with at least one def, so that we have at least 2 stores to merge. Status status_def = find_adjacent_def_store(_store); - NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] expect def: "); status_def.print_on(tty); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect def: "); status_def.print_on(tty); }) if (status_def.found_store() == nullptr) { return nullptr; } @@ -2904,7 +2904,7 @@ StoreNode* MergePrimitiveStores::run() { StoreNode* merged_store = make_merged_store(merge_list, merged_input_value); - NOT_PRODUCT( if(is_trace_success()) { trace(merge_list, merged_input_value, merged_store); } ) + NOT_PRODUCT( if (is_trace_success()) { trace(merge_list, merged_input_value, merged_store); } ) return merged_store; } @@ -3140,7 +3140,7 @@ void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const { merge_list.push(current); while (current != nullptr && merge_list.size() < merge_list_max_size) { Status status = find_adjacent_def_store(current); - NOT_PRODUCT( if(is_trace_basic()) { tty->print("[TraceMergeStores] find def: "); status.print_on(tty); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] find def: "); status.print_on(tty); }) current = status.found_store(); if (current != nullptr) { @@ -3148,20 +3148,20 @@ void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const { // We can have at most one RangeCheck. if (status.found_range_check()) { - NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] found RangeCheck, stop traversal."); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found RangeCheck, stop traversal."); }) break; } } } - NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] found:"); merge_list.dump(); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found:"); merge_list.dump(); }) // Truncate the merge_list to a power of 2. const uint pow2size = round_down_power_of_2(merge_list.size()); assert(pow2size >= 2, "must be merging at least 2 stores"); while (merge_list.size() > pow2size) { merge_list.pop(); } - NOT_PRODUCT( if(is_trace_basic()) { tty->print_cr("[TraceMergeStores] truncated:"); merge_list.dump(); }) + NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] truncated:"); merge_list.dump(); }) } // Merge the input values of the smaller stores to a single larger input value. From 93f123ac1b03e4bdf9d3bde05650bc92895c0b9f Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 15:10:59 +0100 Subject: [PATCH 77/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 6163c40ff8c01..cbf2233e611df 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -46,7 +46,7 @@ // pointers, the base address is null, and is hence implicitly a zero constant. // // -// Example1: byte array access: +// Example 1: byte array access: // // array[i] // @@ -60,8 +60,8 @@ // // array[5 + i + 3 * j] // -// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * j + 4 * 3 * j -// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * j + 12 * j +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * 3 * j +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 12 * j // -------------------- ----------------------------- -------------------- -------------------- // = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 // @@ -72,6 +72,7 @@ // // pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * i // = 1 * array_base + ARRAY_INT_BASE_OFFSET + 4 * i +// -------------------- --------------------- -------------------- // = scale_0 * variable_0 + con + scale_1 * variable_1 // // @@ -82,6 +83,7 @@ // // pointer = address + 4 * i // = 1 * address + 0 + 4 * i +// -------------------- --- -------------------- // = scale_0 * variable_0 + con + scale_1 * variable_1 // // @@ -116,8 +118,8 @@ // // array[5 + i + j * k] // -// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * j + 4 * j * k -// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * j + 4 * j * k +// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * j * k +// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 4 * j * k // -------------------- ----------------------------- -------------------- -------------------- // = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 // From 072452c0a183702a2c106edf09acb193d286fdb0 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 15:13:24 +0100 Subject: [PATCH 78/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.hpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index cbf2233e611df..8ebd19252c799 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -142,9 +142,9 @@ // ----------------------------------------------------------------------------------------- // // MemPointerDecomposedForm: -// When the pointer is parsed, it is decomposed into sum of summands plus a constant: +// When the pointer is parsed, it is decomposed into a SUM of summands plus a constant: // -// pointer = sum(summands) + con +// pointer = SUM(summands) + con // // Where each summand_i in summands has the form: // @@ -152,19 +152,19 @@ // // Hence, the full decomposed form is: // -// pointer = sum_i(scale_i * variable_i) + con +// pointer = SUM(scale_i * variable_i) + con // // Note: the scale_i are compile-time constants (NoOverflowInt), and the variable_i are // compile-time variables (C2 nodes). -// On 64bit systems, this decomposed form is computed with long-add/mul, on 32bit systems +// On 64-bit systems, this decomposed form is computed with long-add/mul, on 32-bit systems // it is computed with int-add/mul. // // MemPointerAliasing: // The decomposed form allows us to determine the aliasing between two pointers easily. For // example, if two pointers are identical, except for their constant: // -// pointer1 = sum(summands) + con1 -// pointer2 = sum(summands) + con2 +// pointer1 = SUM(summands) + con1 +// pointer2 = SUM(summands) + con2 // // then we can easily compute the distance between the pointers (distance = con2 - con1), // and determine if they are adjacent. @@ -183,8 +183,9 @@ // pointer1 = array[i + 0] = array_base + array_int_base_offset + 4L * ConvI2L(i + 0) // pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1) // -// At first, computing aliasing is difficult because the distance is hidden inside the -// ConvI2L. we can convert this (with array_int_base_offset = 16) into these decomposed forms: +// At first, computing the aliasing is not immediately straight-forward in the general case because +// the distance is hidden inside the ConvI2L. We can convert this (with array_int_base_offset = 16) +// into these decomposed forms: // // pointer1 = 1L * array_base + 4L * i + 16L // pointer2 = 1L * array_base + 4L * i + 20L @@ -199,7 +200,7 @@ // // ----------------------------------------------------------------------------------------- // -// We have to be careful on 64bit systems with ConvI2L: decomposing its input is not +// We have to be careful on 64-bit systems with ConvI2L: decomposing its input is not // correct in general, overflows may not be preserved in the decomposed form: // // AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b) @@ -221,10 +222,10 @@ // // Definition: Safe decomposition (from some mp_i to mp_{i+1}) // We decompose summand in: -// mp_i = con + summand + sum(other_summands) +// mp_i = con + summand + SUM(other_summands) // Resulting in: +-------------------------+ -// mp_{i+1} = con + dec_con + sum(dec_summands) + sum(other_summands) -// = new_con + sum(new_summands) +// mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands) +// = new_con + SUM(new_summands) // // We call a decomposition safe if either: // SAFE1) No matter the values of the summand variables: @@ -249,7 +250,7 @@ // S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 // S3) All summands of mp1 and mp2 are identical. // -// Then the ponter difference between p1 and p2 is identical to the difference between +// Then the pointer difference between p1 and p2 is identical to the difference between // mp1 and mp2: // p1 - p2 = mp1 - mp2 // From c52c5b60da964a4ce613fff73e0cca967334506d Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 15:14:06 +0100 Subject: [PATCH 79/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 8ebd19252c799..387ed3da78db3 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -267,7 +267,7 @@ // // Case 1: only decompositions of type (SAFE1) were used: // We make an induction proof over the decompositions from p1 to mp1, starting with -// the trivial decompoisition: +// the trivial decomposition: // mp1_0 = 0 + 1 * p1 = p1 // and then for the i'th decomposition, we know that // mp1_i = mp1_{i+1} @@ -303,6 +303,8 @@ // // And hence, there must be an x, such that: // p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 +// where +// x = x1 - x2 // // If "x = 0", then it follows: // p1 - p2 = mp1 - mp2 @@ -314,6 +316,7 @@ // >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) // -- apply S2 and S3 -- // > array_element_size_in_bytes * 2^32 - 2^31 +// -- apply array_element_size_in_bytes > 0 -- // >= array_element_size_in_bytes * 2^31 // >= max_possible_array_size_in_bytes // >= array_size_in_bytes From 46bcc48a4c4456012694fe7abc26348600d73baa Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 15:27:31 +0100 Subject: [PATCH 80/89] more updates for Christian --- src/hotspot/share/opto/mempointer.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 387ed3da78db3..d1bc269420928 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -56,7 +56,7 @@ // = scale_0 * variable_0 + con + scale_1 * variable_1 // // -// Example2: int array access +// Example 2: int array access // // array[5 + i + 3 * j] // @@ -66,7 +66,7 @@ // = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2 // // -// Example3: Unsafe with int array +// Example 3: Unsafe with int array // // UNSAFE.getInt(array, ARRAY_INT_BASE_OFFSET + 4 * i); // @@ -76,7 +76,7 @@ // = scale_0 * variable_0 + con + scale_1 * variable_1 // // -// Example4: Unsafe with native memory address +// Example 4: Unsafe with native memory address // // long address; // UNSAFE.getInt(null, address + 4 * i); @@ -87,7 +87,7 @@ // = scale_0 * variable_0 + con + scale_1 * variable_1 // // -// Example5: MemorySegment with byte array as backing type +// Example 5: MemorySegment with byte array as backing type // // byte[] array = new byte[1000]; // MemorySegment ms = MemorySegment.ofArray(array); @@ -101,20 +101,20 @@ // = scale_0 * variable_0 + con + scale_1 * variable_1 // // -// Example6: MemorySegment with native memory +// Example 6: MemorySegment with native memory // // MemorySegment ms = Arena.ofAuto().allocate(1000, 1); // assert ms.heapBase().isEmpty(): "null base"; // assert ms.address() != 0: "non-zero native memory address"; -// byte val2 = ms.get(ValueLayout.JAVA_BYTE, i); +// short val = ms.get(ValueLayout.JAVA_SHORT, 2L * i); // -// pointer = ms.heapBase() + ms.address() + i -// = 0 + 1 * ms.address() + 1 * i +// pointer = ms.heapBase() + ms.address() + 2 i +// = 0 + 1 * ms.address() + 2 * i // ------------ ---------------------- -------------------- // = con scale_0 * variable_0 + scale_1 * variable_1 // // -// Example7: Non-linear access to int array +// Example 7: Non-linear access to int array // // array[5 + i + j * k] // @@ -126,7 +126,7 @@ // Note: we simply stop parsing once a term is not linear. We keep "j * k" as its own variable. // // -// Example8: Unsafe with native memory address, non-linear access +// Example 8: Unsafe with native memory address, non-linear access // // UNSAFE.getInt(null, i * j); // From 51381eb36e9b4bc17eef8570a46aec5db62dbd1b Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 17:04:26 +0100 Subject: [PATCH 81/89] whitespace --- src/hotspot/share/opto/mempointer.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index d1bc269420928..2db95deb638ca 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -183,8 +183,8 @@ // pointer1 = array[i + 0] = array_base + array_int_base_offset + 4L * ConvI2L(i + 0) // pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1) // -// At first, computing the aliasing is not immediately straight-forward in the general case because -// the distance is hidden inside the ConvI2L. We can convert this (with array_int_base_offset = 16) +// At first, computing the aliasing is not immediately straight-forward in the general case because +// the distance is hidden inside the ConvI2L. We can convert this (with array_int_base_offset = 16) // into these decomposed forms: // // pointer1 = 1L * array_base + 4L * i + 16L From 9f442d27b7ab3eb6accdf67f3e83da9e32fe2b64 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 29 Oct 2024 19:23:38 +0100 Subject: [PATCH 82/89] fix distance assert --- src/hotspot/share/opto/mempointer.hpp | 3 +- .../c2/TestMergeStoresUnsafeArrayPointer.java | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 2db95deb638ca..df9b0e4650912 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -367,8 +367,7 @@ class MemPointerAliasing { _aliasing(aliasing), _distance(distance) { - const jint max_distance = 1 << 30; - assert(_distance < max_distance && _distance > -max_distance, "safe distance"); + assert(_distance != min_jint, "given by condition S3 of MemPointer Lemma"); } public: diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java index 9b129324b882b..3b65272c3c7ff 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresUnsafeArrayPointer.java @@ -184,6 +184,18 @@ public static void main(String[] args) { } } + // No result verification here. We only want to make sure we do not hit asserts. + System.out.println("test8 and test9"); + for (int i = 0; i < 100_000; i++) { + test8a(big, ANCHOR); + test8b(big, ANCHOR); + test8c(big, ANCHOR); + test8d(big, ANCHOR); + test9a(big, ANCHOR); + test9b(big, ANCHOR); + test9c(big, ANCHOR); + } + if (errors > 0) { throw new RuntimeException("ERRORS: " + errors); } @@ -257,4 +269,56 @@ static void test7(int[] a, long anchor) { UNSAFE.putInt(a, base + (long)(53 * large_by_53) + 0, 0x42424242); // overflow UNSAFE.putInt(a, base + 53L * (long)(large_by_53) + 4, 0x66666666); // no overflow } + + // Test: check if large distance leads to assert + static void test8a(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42); + UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11); + } + + // Test: check if large distance leads to assert + static void test8b(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11); + UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42); + } + + // Test: check if large distance leads to assert + static void test8c(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42); + UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11); + } + + // Test: check if large distance leads to assert + static void test8d(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11); + UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42); + } + + // Test: check if large distance leads to assert + // case: bad distance: NaN + static void test9a(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - 100, (byte)42); + UNSAFE.putByte(a, base - 100 + (1L << 31), (byte)11); + } + + // Test: check if large distance leads to assert + // case: just before NaN, it is still a valid distance for MemPointer aliasing. + static void test9b(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base - 100, (byte)42); + UNSAFE.putByte(a, base - 100 + (1L << 31) - 1, (byte)11); + } + + // Test: check if large distance leads to assert + // case: constant too large + static void test9c(int[] a, long anchor) { + long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor; + UNSAFE.putByte(a, base, (byte)42); + UNSAFE.putByte(a, base + (1L << 31), (byte)11); + } } From 63496f3316122aa5fd76b5ddfea7d121786bd8ed Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 1 Nov 2024 11:18:38 +0100 Subject: [PATCH 83/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.cpp | 9 +++++---- src/hotspot/share/opto/mempointer.hpp | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index ae53df000c480..a330982f620f2 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -45,7 +45,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() parse_sub_expression(_worklist.pop()); } - // Check for constant overflow. + // Bail out if there is a constant overflow. if (_con.is_NaN()) { return MemPointerDecomposedForm(pointer); } // Sort summands by variable->_idx @@ -55,7 +55,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() int pos_put = 0; int pos_get = 0; while (pos_get < _summands.length()) { - MemPointerSummand summand = _summands.at(pos_get++); + const MemPointerSummand& summand = _summands.at(pos_get++); Node* variable = summand.variable(); NoOverflowInt scale = summand.scale(); // Add up scale of all summands with the same variable. @@ -300,8 +300,9 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons // prove that the computed aliasing also applies for the underlying pointers. // // Pre-Condition: -// We assume that both pointers are in-bounds of their respective memory object. -// +// We assume that both pointers are in-bounds of their respective memory object. If this does +// not hold, for example, with the use of Unsafe, then we would already have undefined behavior, +// and we are allowed to do anything. MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const { #ifndef PRODUCT diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index df9b0e4650912..2a299a5c4cc6e 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -36,7 +36,9 @@ // Intuition and Examples: // We parse / decompose pointers into a linear form: // -// pointer = sum_i(scale_i * variable_i) + con +// pointer = SUM(scale_i * variable_i) + con +// +// where SUM() adds all "scale_i * variable_i" for each i together. // // The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are // compile-time variables (C2 nodes). @@ -401,6 +403,7 @@ class MemPointerAliasing { // // summand = scale * variable // +// where variable is a C2 node. class MemPointerSummand : public StackObj { private: Node* _variable; @@ -455,7 +458,7 @@ class MemPointerSummand : public StackObj { // Decomposed form of the pointer sub-expression of "pointer". // -// pointer = sum(summands) + con +// pointer = SUM(summands) + con // class MemPointerDecomposedForm : public StackObj { private: From 3ca647e6d5a8c98e9a640f6c6813789c23eddff4 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 1 Nov 2024 11:26:02 +0100 Subject: [PATCH 84/89] apply more suggestions from Christian --- src/hotspot/share/opto/mempointer.cpp | 6 +- src/hotspot/share/opto/mempointer.hpp | 107 +++++++++++++++----------- 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index a330982f620f2..a77b341e719e2 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -41,6 +41,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() // parses the pointer expression recursively. int traversal_count = 0; while (_worklist.is_nonempty()) { + // Bail out if the graph is too complex. if (traversal_count++ > 1000) { return MemPointerDecomposedForm(pointer); } parse_sub_expression(_worklist.pop()); } @@ -48,8 +49,9 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() // Bail out if there is a constant overflow. if (_con.is_NaN()) { return MemPointerDecomposedForm(pointer); } - // Sort summands by variable->_idx - _summands.sort(MemPointerSummand::cmp_for_sort); + // Sorting by variable idx means that all summands with the same variable are consecutive. + // This simplifies the combining of summands with the same variable below. + _summands.sort(MemPointerSummand::cmp_by_variable_idx); // Combine summands for the same variable, adding up the scales. int pos_put = 0; diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 2a299a5c4cc6e..d63e1256e9a34 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -222,18 +222,23 @@ // are such "safe decompositions". // // -// Definition: Safe decomposition (from some mp_i to mp_{i+1}) -// We decompose summand in: -// mp_i = con + summand + SUM(other_summands) -// Resulting in: +-------------------------+ -// mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands) -// = new_con + SUM(new_summands) -// -// We call a decomposition safe if either: -// SAFE1) No matter the values of the summand variables: +// Definition: Safe decomposition +// Trivial decomposition: +// (SAFE0) The trivial decomposition from p to mp_0 = 0 + 1 * p is always safe. +// +// Non-trivial decomposition: +// We decompose summand in: +// mp_i = con + summand + SUM(other_summands) +// Resulting in: +-------------------------+ +// mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands) +// = new_con + SUM(new_summands) +// where mp_i means that the original pointer p was decomposed i times. +// +// We call a non-trivial decomposition safe if either: +// (SAFE1) No matter the values of the summand variables: // mp_i = mp_{i+1} // -// SAFE2) The pointer is on an array with a known array_element_size_in_bytes, +// (SAFE2) The pointer is on an array with a known array_element_size_in_bytes, // and there is an integer x, such that: // mp_i = mp_{i+1} + x * array_element_size_in_bytes * 2^32 // @@ -241,16 +246,14 @@ // have a distance at least twice as large as the array size, and so // at least one of mp1 or mp2 must be out of bounds of the array. // -// Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all -// decompositions we apply are safe. -// -// -// MemPointer Lemma: +// MemPointer Lemma: // Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2. // If these conditions hold: -// S1) Both p1 and p2 are within the bounds of the same memory object. -// S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31 -// S3) All summands of mp1 and mp2 are identical. +// (S0) mp1 and mp2 are constructed only with safe decompositions (SAFE0, SAFE1, SAFE2) +// from p1 and p2, respectively. +// (S1) Both p1 and p2 are within the bounds of the same memory object. +// (S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31. +// (S3) All summands of mp1 and mp2 are identical (i.e. only the constants are possibly different). // // Then the pointer difference between p1 and p2 is identical to the difference between // mp1 and mp2: @@ -260,22 +263,34 @@ // prove the correctness of its aliasing computation between two MemPointers. // // +// Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all +// decompositions we apply are safe. +// +// // Proof of the "MemPointer Lemma": -// Case 0: no decompositions were used: +// Assume (S0-S3) and show that +// p1 - p2 = mp1 - mp2 +// +// We make a case distinction over the types of decompositions used in the construction of mp1 and mp2. +// +// Trivial Case: Only trivial (SAFE0) decompositions were used: // mp1 = 0 + 1 * p1 = p1 // mp2 = 0 + 1 * p2 = p2 // => // p1 - p2 = mp1 - mp2 // -// Case 1: only decompositions of type (SAFE1) were used: +// Unsafe Case: We apply at least one unsafe decomposition: +// This is a contradiction to (S0) and we are done. +// +// Case 1: Only decomposition of type (SAFE0) and (SAFE1) are used: // We make an induction proof over the decompositions from p1 to mp1, starting with -// the trivial decomposition: +// the trivial decomposition (SAFE0): // mp1_0 = 0 + 1 * p1 = p1 -// and then for the i'th decomposition, we know that +// Then for the i-th non-trivial decomposition (SAFE1) we know that // mp1_i = mp1_{i+1} -// and hence, if mp1 was decomposed with n decompositions from p1: +// and hence, after the n-th non-trivial decomposition from p1: // p1 = mp1_0 = mp1_i = mp1_n = mp1 -// The analogue can be proven for p2 and mp2: +// Analogously, we can prove: // p2 = mp2 // // p1 = mp1 @@ -283,30 +298,28 @@ // => // p1 - p2 = mp1 - mp2 // -// Case 2: decompositions of type (SAFE2) were used, and possibly also decompositions of -// type (SAFE1). -// Given we have (SAFE2) decompositions, we know that we are operating on an array of -// known array_element_size_in_bytes. We can weaken the guarantees from (SAFE1) -// decompositions to the same guarantee as (SAFE2) decompositions, hence all applied -// decompositions satisfy: -// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 -// where x_i = 0 for (SAFE1) decompositions. +// Case 2: At least one decomposition of type (SAFE2) and no unsafe decomposition is used. +// Given we have (SAFE2) decompositions, we know that we are operating on an array of +// known array_element_size_in_bytes. We can weaken the guarantees from (SAFE1) +// decompositions to the same guarantee as (SAFE2) decompositions. Hence all applied +// non-trivial decompositions satisfy: +// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 +// where x1_i = 0 for (SAFE1) decompositions. // // We make an induction proof over the decompositions from p1 to mp1, starting with -// the trivial decompoisition: +// the trivial decomposition (SAFE0): // mp1_0 = 0 + 1 * p1 = p1 -// and then for the i'th decomposition, we know that +// Then for the i-th non-trivial decomposition (SAFE1) or (SAFE2), we know that // mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32 -// and hence, if mp1 was decomposed with n decompositions from p1: +// and hence, if mp1 was decomposed with n non-trivial decompositions (SAFE1) or (SAFE2) from p1: // p1 = mp1 + x1 * array_element_size_in_bytes * 2^32 -// where x1 = sum(x1_i). -// The analogue can be proven for p2 and mp2: +// where +// x1 = SUM(x1_i) +// Analogously, we can prove: // p2 = mp2 + x2 * array_element_size_in_bytes * 2^32 // -// And hence, there must be an x, such that: +// And hence, with x = x1 - x2 we have: // p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32 -// where -// x = x1 - x2 // // If "x = 0", then it follows: // p1 - p2 = mp1 - mp2 @@ -316,18 +329,18 @@ // >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) // -- apply x != 0 -- // >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) -// -- apply S2 and S3 -- +// -- apply (S3) -- +// = array_element_size_in_bytes * 2^32 - abs(mp1.con - mp2.con) +// -- apply (S2) -- // > array_element_size_in_bytes * 2^32 - 2^31 // -- apply array_element_size_in_bytes > 0 -- // >= array_element_size_in_bytes * 2^31 // >= max_possible_array_size_in_bytes // >= array_size_in_bytes // -// Thus we get a contradiction: p1 and p2 have a distance greater than the array -// size, and hence at least one of the two must be out of bounds. But condition S1 -// of the MemPointer Lemma requires that both p1 and p2 are both in bounds of the -// same memory object. - +// This shows that p1 and p2 have a distance greater than the array size, and hence at least one of the two +// pointers must be out of bounds. This contradicts our assumption (S1) and we are done. +// #ifndef PRODUCT class TraceMemPointer : public StackObj { private: @@ -424,7 +437,7 @@ class MemPointerSummand : public StackObj { Node* variable() const { return _variable; } NoOverflowInt scale() const { return _scale; } - static int cmp_for_sort(MemPointerSummand* p1, MemPointerSummand* p2) { + static int cmp_by_variable_idx(MemPointerSummand* p1, MemPointerSummand* p2) { if (p1->variable() == nullptr) { return (p2->variable() == nullptr) ? 0 : 1; } else if (p2->variable() == nullptr) { From e2550c9bad9affb54faa5988da087fdddf1d149a Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 1 Nov 2024 14:53:47 +0100 Subject: [PATCH 85/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.hpp | 7 ++++--- src/hotspot/share/opto/noOverflowInt.hpp | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index d63e1256e9a34..b9fafe32e79bb 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -340,7 +340,8 @@ // // This shows that p1 and p2 have a distance greater than the array size, and hence at least one of the two // pointers must be out of bounds. This contradicts our assumption (S1) and we are done. -// + + #ifndef PRODUCT class TraceMemPointer : public StackObj { private: @@ -382,7 +383,7 @@ class MemPointerAliasing { _aliasing(aliasing), _distance(distance) { - assert(_distance != min_jint, "given by condition S3 of MemPointer Lemma"); + assert(_distance != min_jint, "given by condition (S3) of MemPointer Lemma"); } public: @@ -495,7 +496,7 @@ class MemPointerDecomposedForm : public StackObj { private: MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) - :_pointer(pointer), _con(con) { + : _pointer(pointer), _con(con) { assert(!_con.is_NaN(), "non-NaN constant"); assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); for (int i = 0; i < summands.length(); i++) { diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index c850e487f1fd7..8a240da295b57 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -25,7 +25,6 @@ #ifndef SHARE_OPTO_NOOVERFLOWINT_HPP #define SHARE_OPTO_NOOVERFLOWINT_HPP -#include "utilities/globalDefinitions.hpp" #include "utilities/ostream.hpp" // Wrapper around jint, which detects overflow. @@ -92,7 +91,7 @@ class NoOverflowInt { return NoOverflowInt(0) - *this; } - bool is_multiple_of(const NoOverflowInt other) const { + bool is_multiple_of(const NoOverflowInt& other) const { NoOverflowInt a = this->abs(); NoOverflowInt b = other.abs(); if (a.is_NaN()) { return false; } From d10b76ffbec1d82ac83009fd9858bb60eed70818 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 1 Nov 2024 15:46:01 +0100 Subject: [PATCH 86/89] more review applications --- src/hotspot/share/opto/mempointer.cpp | 4 ++-- src/hotspot/share/opto/mempointer.hpp | 20 ++++++++++---------- src/hotspot/share/opto/noOverflowInt.hpp | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index a77b341e719e2..4782aa1292e89 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -82,7 +82,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() // Parse a sub-expression of the pointer, starting at the current summand. We parse the // current node, and see if it can be decomposed into further summands, or if the current // summand is terminal. -void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand summand) { +void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand& summand) { Node* n = summand.variable(); const NoOverflowInt scale = summand.scale(); const NoOverflowInt one(1); @@ -188,7 +188,7 @@ void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSumman // Check if the decomposition of operation opc is guaranteed to be safe. // Please refer to the definition of "safe decomposition" in mempointer.hpp -bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, const NoOverflowInt scale) const { +bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const { #ifndef _LP64 // On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always // be truncated. Thus, it does not matter if we have int or long overflows. diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index b9fafe32e79bb..32e2f31fedfa4 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -387,10 +387,8 @@ class MemPointerAliasing { } public: - MemPointerAliasing() : MemPointerAliasing(Unknown, 0) {} - static MemPointerAliasing make_unknown() { - return MemPointerAliasing(); + return MemPointerAliasing(Unknown, 0); } static MemPointerAliasing make_always(const jint distance) { @@ -427,7 +425,7 @@ class MemPointerSummand : public StackObj { MemPointerSummand() : _variable(nullptr), _scale(NoOverflowInt::make_NaN()) {} - MemPointerSummand(Node* variable, const NoOverflowInt scale) : + MemPointerSummand(Node* variable, const NoOverflowInt& scale) : _variable(variable), _scale(scale) { @@ -476,8 +474,10 @@ class MemPointerSummand : public StackObj { // class MemPointerDecomposedForm : public StackObj { private: - // We limit the number of summands to 10. Usually, a pointer contains a base pointer - // (e.g. array pointer or null for native memory) and a few variables. + // We limit the number of summands to 10. This is just a best guess, and not at this + // point supported by evidence. But I think it is reasonable: usually, a pointer + // contains a base pointer (e.g. array pointer or null for native memory) and a few + // variables. It should be rare that we have more than 9 variables. static const int SUMMANDS_SIZE = 10; Node* _pointer; // pointer node associated with this (sub)pointer @@ -495,7 +495,7 @@ class MemPointerDecomposedForm : public StackObj { } private: - MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) + MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) : _pointer(pointer), _con(con) { assert(!_con.is_NaN(), "non-NaN constant"); assert(summands.length() <= SUMMANDS_SIZE, "summands must fit"); @@ -508,7 +508,7 @@ class MemPointerDecomposedForm : public StackObj { } public: - static MemPointerDecomposedForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt con) { + static MemPointerDecomposedForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) { if (summands.length() <= SUMMANDS_SIZE) { return MemPointerDecomposedForm(pointer, summands, con); } else { @@ -567,9 +567,9 @@ class MemPointerDecomposedFormParser : public StackObj { private: MemPointerDecomposedForm parse_decomposed_form(); - void parse_sub_expression(const MemPointerSummand summand); + void parse_sub_expression(const MemPointerSummand& summand); - bool is_safe_to_decompose_op(const int opc, const NoOverflowInt scale) const; + bool is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const; }; // Facility to parse the pointer of a Load or Store, so that aliasing between two such diff --git a/src/hotspot/share/opto/noOverflowInt.hpp b/src/hotspot/share/opto/noOverflowInt.hpp index 8a240da295b57..9da24645b4117 100644 --- a/src/hotspot/share/opto/noOverflowInt.hpp +++ b/src/hotspot/share/opto/noOverflowInt.hpp @@ -53,25 +53,25 @@ class NoOverflowInt { jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; } bool is_zero() const { return !is_NaN() && value() == 0; } - friend NoOverflowInt operator+(const NoOverflowInt a, const NoOverflowInt b) { + friend NoOverflowInt operator+(const NoOverflowInt& a, const NoOverflowInt& b) { if (a.is_NaN()) { return a; } if (b.is_NaN()) { return b; } return NoOverflowInt((jlong)a.value() + (jlong)b.value()); } - friend NoOverflowInt operator-(const NoOverflowInt a, const NoOverflowInt b) { + friend NoOverflowInt operator-(const NoOverflowInt& a, const NoOverflowInt& b) { if (a.is_NaN()) { return a; } if (b.is_NaN()) { return b; } return NoOverflowInt((jlong)a.value() - (jlong)b.value()); } - friend NoOverflowInt operator*(const NoOverflowInt a, const NoOverflowInt b) { + friend NoOverflowInt operator*(const NoOverflowInt& a, const NoOverflowInt& b) { if (a.is_NaN()) { return a; } if (b.is_NaN()) { return b; } return NoOverflowInt((jlong)a.value() * (jlong)b.value()); } - friend NoOverflowInt operator<<(const NoOverflowInt a, const NoOverflowInt b) { + friend NoOverflowInt operator<<(const NoOverflowInt& a, const NoOverflowInt& b) { if (a.is_NaN()) { return a; } if (b.is_NaN()) { return b; } jint shift = b.value(); @@ -79,14 +79,14 @@ class NoOverflowInt { return NoOverflowInt((jlong)a.value() << shift); } - friend bool operator==(const NoOverflowInt a, const NoOverflowInt b) { + friend bool operator==(const NoOverflowInt& a, const NoOverflowInt& b) { if (a.is_NaN()) { return false; } if (b.is_NaN()) { return false; } return a.value() == b.value(); } NoOverflowInt abs() const { - if (is_NaN()) { return make_NaN(); } + if (is_NaN()) { return *this; } if (value() >= 0) { return *this; } return NoOverflowInt(0) - *this; } From 03219bbd75303dbfcaa6c19c096f2cbf180c7bd3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 4 Nov 2024 11:26:45 +0100 Subject: [PATCH 87/89] Apply suggestions from code review Co-authored-by: Christian Hagedorn --- src/hotspot/share/opto/mempointer.cpp | 13 ++++++------- src/hotspot/share/opto/mempointer.hpp | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index 4782aa1292e89..e358d20f3acb4 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -196,7 +196,7 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons return true; #else - switch(opc) { + switch (opc) { // These operations are always safe to decompose, i.e. (SAFE1): case Op_ConI: case Op_ConL: @@ -259,7 +259,7 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons // scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * y * 2^32 // scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * y * 2^32 // \_______________________/ \_____________________________________/ \______________/ - // before decomposition after decomposition overflow correction + // before decomposition after decomposition ("new_summands") overflow correction // // Thus, for AddI and SubI, we get: // summand = new_summand1 + new_summand2 + scale * y * 2^32 @@ -280,7 +280,7 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons // implies that there is some integer z, such that: // z * array_element_size_in_bytes = scale // - // And hence, with "x = y * z": + // And hence, with "x = y * z", the decomposition is (SAFE2) under the assumed condition: // mp_i = mp_{i+1} + scale * y * 2^32 // = mp_{i+1} + z * array_element_size_in_bytes * y * 2^32 // = mp_{i+1} + x * array_element_size_in_bytes * 2^32 @@ -315,7 +315,7 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD } #endif - // "MemPointer Lemma" condition S2: check if all summands are the same: + // "MemPointer Lemma" condition (S2): check if all summands are the same: for (uint i = 0; i < SUMMANDS_SIZE; i++) { const MemPointerSummand s1 = summands_at(i); const MemPointerSummand s2 = other.summands_at(i); @@ -329,7 +329,7 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD } } - // "MemPointer Lemma" condition S3: check that the constants do not differ too much: + // "MemPointer Lemma" condition (S3): check that the constants do not differ too much: const NoOverflowInt distance = other.con() - con(); // We must check that: abs(distance) < 2^32 // However, this is only false if: distance = min_jint @@ -344,7 +344,7 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD return MemPointerAliasing::make_unknown(); } - // "MemPointer Lemma" condition S1: + // "MemPointer Lemma" condition (S1): // Given that all summands are the same, we know that both pointers point into the // same memory object. With the Pre-Condition, we know that both pointers are in // bounds of that same memory object. @@ -379,4 +379,3 @@ bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const { return is_adjacent; } - diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 32e2f31fedfa4..77274ad14e472 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -43,7 +43,7 @@ // The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are // compile-time variables (C2 nodes). // -// For the MemPointer, we do not explicitly track base address. For Java heap pointers, the +// For the MemPointer, we do not explicitly track the base address. For Java heap pointers, the // base address is just a variable in a summand with scale == 1. For native memory (C heap) // pointers, the base address is null, and is hence implicitly a zero constant. // @@ -229,7 +229,7 @@ // Non-trivial decomposition: // We decompose summand in: // mp_i = con + summand + SUM(other_summands) -// Resulting in: +-------------------------+ +// resulting in: +-------------------------+ // mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands) // = new_con + SUM(new_summands) // where mp_i means that the original pointer p was decomposed i times. @@ -255,7 +255,7 @@ // (S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31. // (S3) All summands of mp1 and mp2 are identical (i.e. only the constants are possibly different). // -// Then the pointer difference between p1 and p2 is identical to the difference between +// then the pointer difference between p1 and p2 is identical to the difference between // mp1 and mp2: // p1 - p2 = mp1 - mp2 // @@ -329,9 +329,9 @@ // >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2) // -- apply x != 0 -- // >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2) -// -- apply (S3) -- +// -- apply (S3) -- // = array_element_size_in_bytes * 2^32 - abs(mp1.con - mp2.con) -// -- apply (S2) -- +// -- apply (S2) -- // > array_element_size_in_bytes * 2^32 - 2^31 // -- apply array_element_size_in_bytes > 0 -- // >= array_element_size_in_bytes * 2^31 From 823bed757a8d42ce8084eb04450355a868cf406f Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 4 Nov 2024 11:27:25 +0100 Subject: [PATCH 88/89] Update test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java Co-authored-by: Christian Hagedorn --- .../hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java index 3cd79a8e786d7..a5302d1b5158e 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java @@ -193,7 +193,7 @@ interface MemorySegmentProvider { // List of tests Map tests = new HashMap<>(); - // List of gold, the results from the first run before compilation + // List of golden values, the results from the first run before compilation Map golds = new HashMap<>(); public TestMergeStoresMemorySegmentImpl () { From c1f274f29ff81c5f94d30d6a1f868d0c67627ac5 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 4 Nov 2024 12:45:20 +0100 Subject: [PATCH 89/89] more changes for Christian --- src/hotspot/share/opto/memnode.cpp | 5 +---- src/hotspot/share/opto/mempointer.cpp | 12 +++++++----- src/hotspot/share/opto/mempointer.hpp | 9 +++++++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 89fa915c2defb..919d23fea8da5 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2937,10 +2937,7 @@ bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const St #endif const MemPointer pointer_use(use_store NOT_PRODUCT( COMMA trace )); const MemPointer pointer_def(def_store NOT_PRODUCT( COMMA trace )); - if (!pointer_def.is_adjacent_to_and_before(pointer_use)) { - return false; - } - return true; + return pointer_def.is_adjacent_to_and_before(pointer_use); } bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const { diff --git a/src/hotspot/share/opto/mempointer.cpp b/src/hotspot/share/opto/mempointer.cpp index e358d20f3acb4..df443c69449cb 100644 --- a/src/hotspot/share/opto/mempointer.cpp +++ b/src/hotspot/share/opto/mempointer.cpp @@ -42,12 +42,12 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() int traversal_count = 0; while (_worklist.is_nonempty()) { // Bail out if the graph is too complex. - if (traversal_count++ > 1000) { return MemPointerDecomposedForm(pointer); } + if (traversal_count++ > 1000) { return MemPointerDecomposedForm::make_trivial(pointer); } parse_sub_expression(_worklist.pop()); } // Bail out if there is a constant overflow. - if (_con.is_NaN()) { return MemPointerDecomposedForm(pointer); } + if (_con.is_NaN()) { return MemPointerDecomposedForm::make_trivial(pointer); } // Sorting by variable idx means that all summands with the same variable are consecutive. // This simplifies the combining of summands with the same variable below. @@ -67,7 +67,7 @@ MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() } // Bail out if scale is NaN. if (scale.is_NaN()) { - return MemPointerDecomposedForm(pointer); + return MemPointerDecomposedForm::make_trivial(pointer); } // Keep summands with non-zero scale. if (!scale.is_zero()) { @@ -299,7 +299,9 @@ bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, cons } // Compute the aliasing between two MemPointerDecomposedForm. We use the "MemPointer Lemma" to -// prove that the computed aliasing also applies for the underlying pointers. +// prove that the computed aliasing also applies for the underlying pointers. Note that the +// condition (S0) is already given, because the MemPointerDecomposedForm is always constructed +// using only safe decompositions. // // Pre-Condition: // We assume that both pointers are in-bounds of their respective memory object. If this does @@ -349,7 +351,7 @@ MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerD // same memory object. With the Pre-Condition, we know that both pointers are in // bounds of that same memory object. - // Hence, all 3 conditions of the "MemoryPointer Lemma" are established, and hence + // Hence, all 4 conditions of the "MemoryPointer Lemma" are established, and hence // we know that the distance between the underlying pointers is equal to the distance // we computed for the MemPointers: // p_other - p_this = distance = other.con - this.con diff --git a/src/hotspot/share/opto/mempointer.hpp b/src/hotspot/share/opto/mempointer.hpp index 77274ad14e472..1e5b2c00b8822 100644 --- a/src/hotspot/share/opto/mempointer.hpp +++ b/src/hotspot/share/opto/mempointer.hpp @@ -488,13 +488,14 @@ class MemPointerDecomposedForm : public StackObj { public: // Empty MemPointerDecomposedForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {} + +private: // Default / trivial: pointer = 0 + 1 * pointer MemPointerDecomposedForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) { assert(pointer != nullptr, "pointer must be non-null"); _summands[0] = MemPointerSummand(pointer, NoOverflowInt(1)); } -private: MemPointerDecomposedForm(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) : _pointer(pointer), _con(con) { assert(!_con.is_NaN(), "non-NaN constant"); @@ -508,11 +509,15 @@ class MemPointerDecomposedForm : public StackObj { } public: + static MemPointerDecomposedForm make_trivial(Node* pointer) { + return MemPointerDecomposedForm(pointer); + } + static MemPointerDecomposedForm make(Node* pointer, const GrowableArray& summands, const NoOverflowInt& con) { if (summands.length() <= SUMMANDS_SIZE) { return MemPointerDecomposedForm(pointer, summands, con); } else { - return MemPointerDecomposedForm(pointer); + return MemPointerDecomposedForm::make_trivial(pointer); } }