Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
bd77da8
C2: optimize constant addends in masked sums
mernst-github Dec 21, 2024
2b8da62
review:
mernst-github Dec 24, 2024
bc3c4f0
undo method reordering
mernst-github Dec 24, 2024
ba2f82b
T != X
mernst-github Dec 24, 2024
cabc0d8
counter
mernst-github Dec 24, 2024
df79a96
--more null checks
mernst-github Dec 24, 2024
2a79c24
consistently use bt
mernst-github Dec 24, 2024
1555846
comment readability
mernst-github Jan 2, 2025
e0f44b4
naming, comment
mernst-github Jan 8, 2025
6df13ff
randomize, test "(j + const) & nonconst_mask".
mernst-github Jan 8, 2025
9d3ca06
addConstNonConstMaskLong
mernst-github Jan 8, 2025
cbe6165
avoid redundant comment
mernst-github Jan 8, 2025
35afeb0
"element".
mernst-github Jan 8, 2025
541e44c
Assert that MulNode::Ideal already masks constant shift amounts for us.
mernst-github Jan 19, 2025
f839e5f
(c)
mernst-github Jan 20, 2025
c9a6545
(c)
mernst-github Jan 23, 2025
ff10ac2
JLS: only the lower bits of the shift are taken into account (aka we …
mernst-github Jan 23, 2025
4849477
fully randomized
mernst-github Jan 23, 2025
272267c
make the check more clear: shift >= mask_width
mernst-github Jan 27, 2025
490cc2f
Merge branch 'openjdk:master' into mernst/JDK-8346664
mernst-github Jan 29, 2025
a8c3a6b
undo assertion and mask shift amount ourselves.
mernst-github Jan 30, 2025
442e6c8
TestEquivalentInvariants: split scenarios that differ by 'AlignVector'
mernst-github Jan 30, 2025
419292b
expect vectorization of i|7 instead of i&7
mernst-github Jan 30, 2025
bcdd6f8
You can have two @IR rules with different applyIfs.
mernst-github Jan 30, 2025
2ce808d
completely disable IR rule
mernst-github Jan 30, 2025
a421aec
"should never vectorize" only holds for long[] input.
mernst-github Jan 30, 2025
cd1c8a1
disable `|` . comments.
mernst-github Jan 31, 2025
ed21918
Apply suggestions from code review
mernst-github Jan 31, 2025
e5beda4
consistently label failing cases due to Align requirements.
mernst-github Jan 31, 2025
397cf15
indent
mernst-github Jan 31, 2025
0e9de51
dropped bug ref.
mernst-github Jan 31, 2025
98da0e0
Merge branch 'openjdk:master' into mernst/JDK-8346664
mernst-github Feb 2, 2025
5837558
jlong, not long
mernst-github Feb 2, 2025
c39d223
Apply suggestions from code review
mernst-github Feb 5, 2025
1d23c1a
Comments, "Proof", order of checks.
mernst-github Feb 5, 2025
2b9ef02
Reword correctness.
mernst-github Feb 8, 2025
a754444
Reword correctness.
mernst-github Feb 8, 2025
9783ce4
Merge branch 'openjdk:master' into mernst/JDK-8346664
mernst-github Feb 8, 2025
09f01e8
Reword correctness (fixes).
mernst-github Feb 8, 2025
c1ea957
Merge branch 'openjdk:master' into mernst/JDK-8346664
mernst-github Feb 14, 2025
b7a16a1
incorporate @eme64's comment suggestions
mernst-github Feb 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 104 additions & 94 deletions src/hotspot/share/opto/mulnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,9 +692,11 @@ const Type *AndINode::mul_ring( const Type *t0, const Type *t1 ) const {
return and_value<TypeInt>(r0, r1);
}

static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you split declaration and definition? Could the body just be moved up here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of the details wrt ADD the body feels out of context here. I find it easier to read when it's close to AndIL_sum_and_mask below.


const Type* AndINode::Value(PhaseGVN* phase) const {
// patterns similar to (v << 2) & 3
if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_INT, true)) {
if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_INT) ||
AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_INT)) {
return TypeInt::ZERO;
}

Expand Down Expand Up @@ -740,8 +742,8 @@ Node* AndINode::Identity(PhaseGVN* phase) {

//------------------------------Ideal------------------------------------------
Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) {
// pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3
Node* progress = AndIL_add_shift_and_mask(phase, T_INT);
// Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible.
Node* progress = AndIL_sum_and_mask(phase, T_INT);
if (progress != nullptr) {
return progress;
}
Expand Down Expand Up @@ -824,8 +826,8 @@ const Type *AndLNode::mul_ring( const Type *t0, const Type *t1 ) const {
}

const Type* AndLNode::Value(PhaseGVN* phase) const {
// patterns similar to (v << 2) & 3
if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_LONG, true)) {
if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_LONG) ||
AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_LONG)) {
return TypeLong::ZERO;
}

Expand Down Expand Up @@ -872,8 +874,8 @@ Node* AndLNode::Identity(PhaseGVN* phase) {

//------------------------------Ideal------------------------------------------
Node *AndLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3
Node* progress = AndIL_add_shift_and_mask(phase, T_LONG);
// Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible.
Node* progress = AndIL_sum_and_mask(phase, T_LONG);
if (progress != nullptr) {
return progress;
}
Expand Down Expand Up @@ -2096,99 +2098,109 @@ const Type* RotateRightNode::Value(PhaseGVN* phase) const {
}
}

// Given an expression (AndX shift mask) or (AndX mask shift),
// determine if the AndX must always produce zero, because the
// the shift (x<<N) is bitwise disjoint from the mask #M.
// The X in AndX must be I or L, depending on bt.
// Specifically, the following cases fold to zero,
// when the shift value N is large enough to zero out
// all the set positions of the and-mask M.
// (AndI (LShiftI _ #N) #M) => #0
// (AndL (LShiftL _ #N) #M) => #0
// (AndL (ConvI2L (LShiftI _ #N)) #M) => #0
// The M and N values must satisfy ((-1 << N) & M) == 0.
// Because the optimization might work for a non-constant
// mask M, we check the AndX for both operand orders.
bool MulNode::AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse) {
if (mask == nullptr || shift == nullptr) {
return false;
}
const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt);
if (mask_t == nullptr || phase->type(shift)->isa_integer(bt) == nullptr) {
return false;
}
shift = shift->uncast();
if (shift == nullptr) {
return false;
//------------------------------ Sum & Mask ------------------------------

// Returns a lower bound on the number of trailing zeros in expr.
static jint AndIL_min_trailing_zeros(const PhaseGVN* phase, const Node* expr, BasicType bt) {
Comment on lines +2103 to +2104
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this method restricted to use in AndIL? Because it looks like it is doing something more generic: trying to figure out a lower bound on the trailing zeros of an expression.

If that is the case: Why not put it in Node::get_trailing_zeros_lower_bound(phase, bt), so it can be used elsewhere too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would argue that while this might be incidentally reusable outside of the scope of "And" nodes, as long as there is no actual demand to reuse this, I would rather not add it to the rather prominent Node class to avoid api bloat.

Iff the notion of "is known to be a multiple of a certain power of two" is really of general interest, I would expect it to become part of TypeInteger.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, just leave it where it is for now. I'm ok with it.

expr = expr->uncast();
const TypeInteger* type = phase->type(expr)->isa_integer(bt);
if (type == nullptr) {
return 0;
}
if (phase->type(shift)->isa_integer(bt) == nullptr) {
return false;

if (type->is_con()) {
jlong con = type->get_con_as_long(bt);
return con == 0L ? (type2aelembytes(bt) * BitsPerByte) : count_trailing_zeros(con);
}
BasicType shift_bt = bt;
if (bt == T_LONG && shift->Opcode() == Op_ConvI2L) {

if (expr->Opcode() == Op_ConvI2L) {
expr = expr->in(1)->uncast();
bt = T_INT;
Node* val = shift->in(1);
if (val == nullptr) {
return false;
}
val = val->uncast();
if (val == nullptr) {
return false;
}
if (val->Opcode() == Op_LShiftI) {
shift_bt = T_INT;
shift = val;
if (phase->type(shift)->isa_integer(bt) == nullptr) {
return false;
}
}
type = phase->type(expr)->isa_int();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are trying to look through a ConvI2L, I think for the sake of consistency, you can reassign bt to T_INT at this point.

Copy link
Contributor Author

@mernst-github mernst-github Dec 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}
if (shift->Opcode() != Op_LShift(shift_bt)) {
if (check_reverse &&
(mask->Opcode() == Op_LShift(bt) ||
(bt == T_LONG && mask->Opcode() == Op_ConvI2L))) {
// try it the other way around
return AndIL_shift_and_mask_is_always_zero(phase, mask, shift, bt, false);

// Pattern: expr = (x << shift)
if (expr->Opcode() == Op_LShift(bt)) {
const TypeInt* shift_t = phase->type(expr->in(2))->isa_int();
if (shift_t == nullptr || !shift_t->is_con()) {
return 0;
}
return false;
}
Node* shift2 = shift->in(2);
if (shift2 == nullptr) {
return false;
// We need to truncate the shift, as it may not have been canonicalized yet.
// T_INT: 0..31 -> shift_mask = 4 * 8 - 1 = 31
// T_LONG: 0..63 -> shift_mask = 8 * 8 - 1 = 63
// (JLS: "Shift Operators")
jint shift_mask = type2aelembytes(bt) * BitsPerByte - 1;
return shift_t->get_con() & shift_mask;
}
const Type* shift2_t = phase->type(shift2);
if (!shift2_t->isa_int() || !shift2_t->is_int()->is_con()) {

return 0;
}

// Checks whether expr is neutral additive element (zero) under mask,
// i.e. whether an expression of the form:
// (AndX (AddX (expr addend) mask)
// (expr + addend) & mask
// is equivalent to
// (AndX addend mask)
// addend & mask
// for any addend.
// (The X in AndX must be I or L, depending on bt).
//
// We check for the sufficient condition when the lowest set bit in expr is higher than
// the highest set bit in mask, i.e.:
// expr: eeeeee0000000000000
// mask: 000000mmmmmmmmmmmmm
// <--w bits--->
// We do not test for other cases.
//
// Correctness:
// Given "expr" with at least "w" trailing zeros,
// let "mod = 2^w", "suffix_mask = mod - 1"
//
// Since "mask" only has bits set where "suffix_mask" does, we have:
// mask = suffix_mask & mask (SUFFIX_MASK)
//
// And since expr only has bits set above w, and suffix_mask only below:
// expr & suffix_mask == 0 (NO_BIT_OVERLAP)
//
// From unsigned modular arithmetic (with unsigned modulo %), and since mod is
// a power of 2, and we are computing in a ring of powers of 2, we know that
// (x + y) % mod = (x % mod + y) % mod
// (x + y) & suffix_mask = (x & suffix_mask + y) & suffix_mask (MOD_ARITH)
//
// We can now prove the equality:
// (expr + addend) & mask
// = (expr + addend) & suffix_mask & mask (SUFFIX_MASK)
// = (expr & suffix_mask + addend) & suffix_mask & mask (MOD_ARITH)
// = (0 + addend) & suffix_mask & mask (NO_BIT_OVERLAP)
// = addend & mask (SUFFIX_MASK)
//
// Hence, an expr with at least w trailing zeros is a neutral additive element under any mask with bit width w.
static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt) {
// When the mask is negative, it has the most significant bit set.
const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt);
if (mask_t == nullptr || mask_t->lo_as_long() < 0) {
return false;
}

jint shift_con = shift2_t->is_int()->get_con() & ((shift_bt == T_INT ? BitsPerJavaInteger : BitsPerJavaLong) - 1);
if ((((jlong)1) << shift_con) > mask_t->hi_as_long() && mask_t->lo_as_long() >= 0) {
return true;
// When the mask is constant zero, we defer to MulNode::Value to eliminate the entire AndX operation.
if (mask_t->hi_as_long() == 0) {
assert(mask_t->lo_as_long() == 0, "checked earlier");
return false;
}

return false;
jint mask_bit_width = BitsPerLong - count_leading_zeros(mask_t->hi_as_long());
jint expr_trailing_zeros = AndIL_min_trailing_zeros(phase, expr, bt);
return expr_trailing_zeros >= mask_bit_width;
}

// Given an expression (AndX (AddX v1 (LShiftX v2 #N)) #M)
// determine if the AndX must always produce (AndX v1 #M),
// because the shift (v2<<N) is bitwise disjoint from the mask #M.
// The X in AndX will be I or L, depending on bt.
// Specifically, the following cases fold,
// when the shift value N is large enough to zero out
// all the set positions of the and-mask M.
// (AndI (AddI v1 (LShiftI _ #N)) #M) => (AndI v1 #M)
// (AndL (AddI v1 (LShiftL _ #N)) #M) => (AndL v1 #M)
// (AndL (AddL v1 (ConvI2L (LShiftI _ #N))) #M) => (AndL v1 #M)
// The M and N values must satisfy ((-1 << N) & M) == 0.
// Because the optimization might work for a non-constant
// mask M, and because the AddX operands can come in either
// order, we check for every operand order.
Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) {
// Reduces the pattern:
// (AndX (AddX add1 add2) mask)
// to
// (AndX add1 mask), if add2 is neutral wrt mask (see above), and vice versa.
Node* MulNode::AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt) {
Node* add = in(1);
Node* mask = in(2);
if (add == nullptr || mask == nullptr) {
return nullptr;
}
int addidx = 0;
if (add->Opcode() == Op_Add(bt)) {
addidx = 1;
Expand All @@ -2200,14 +2212,12 @@ Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) {
if (addidx > 0) {
Node* add1 = add->in(1);
Node* add2 = add->in(2);
if (add1 != nullptr && add2 != nullptr) {
if (AndIL_shift_and_mask_is_always_zero(phase, add1, mask, bt, false)) {
set_req_X(addidx, add2, phase);
return this;
} else if (AndIL_shift_and_mask_is_always_zero(phase, add2, mask, bt, false)) {
set_req_X(addidx, add1, phase);
return this;
}
if (AndIL_is_zero_element_under_mask(phase, add1, mask, bt)) {
set_req_X(addidx, add2, phase);
return this;
} else if (AndIL_is_zero_element_under_mask(phase, add2, mask, bt)) {
set_req_X(addidx, add1, phase);
return this;
}
}
return nullptr;
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/opto/mulnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ class MulNode : public Node {

static MulNode* make(Node* in1, Node* in2, BasicType bt);

static bool AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse);
Node* AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt);
protected:
Node* AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please update the copyright from 2024 -> 2025?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect!

};

//------------------------------MulINode---------------------------------------
Expand Down
Loading