-
Notifications
You must be signed in to change notification settings - Fork 6.2k
8346664: C2: Optimize mask check with constant offset #22856
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bd77da8
2b8da62
bc3c4f0
ba2f82b
cabc0d8
df79a96
2a79c24
1555846
e0f44b4
6df13ff
9d3ca06
cbe6165
35afeb0
541e44c
f839e5f
c9a6545
ff10ac2
4849477
272267c
490cc2f
a8c3a6b
442e6c8
419292b
bcdd6f8
2ce808d
a421aec
cd1c8a1
ed21918
e5beda4
397cf15
0e9de51
98da0e0
5837558
c39d223
1d23c1a
2b9ef02
a754444
9783ce4
09f01e8
c1ea957
b7a16a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -692,9 +692,11 @@ const Type *AndINode::mul_ring( const Type *t0, const Type *t1 ) const { | |
| return and_value<TypeInt>(r0, r1); | ||
| } | ||
|
|
||
| static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt); | ||
|
|
||
| const Type* AndINode::Value(PhaseGVN* phase) const { | ||
| // patterns similar to (v << 2) & 3 | ||
| if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_INT, true)) { | ||
| if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_INT) || | ||
| AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_INT)) { | ||
| return TypeInt::ZERO; | ||
| } | ||
|
|
||
|
|
@@ -740,8 +742,8 @@ Node* AndINode::Identity(PhaseGVN* phase) { | |
|
|
||
| //------------------------------Ideal------------------------------------------ | ||
| Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) { | ||
| // pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3 | ||
| Node* progress = AndIL_add_shift_and_mask(phase, T_INT); | ||
| // Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible. | ||
| Node* progress = AndIL_sum_and_mask(phase, T_INT); | ||
| if (progress != nullptr) { | ||
| return progress; | ||
| } | ||
|
|
@@ -824,8 +826,8 @@ const Type *AndLNode::mul_ring( const Type *t0, const Type *t1 ) const { | |
| } | ||
|
|
||
| const Type* AndLNode::Value(PhaseGVN* phase) const { | ||
| // patterns similar to (v << 2) & 3 | ||
| if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_LONG, true)) { | ||
| if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_LONG) || | ||
| AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_LONG)) { | ||
| return TypeLong::ZERO; | ||
| } | ||
|
|
||
|
|
@@ -872,8 +874,8 @@ Node* AndLNode::Identity(PhaseGVN* phase) { | |
|
|
||
| //------------------------------Ideal------------------------------------------ | ||
| Node *AndLNode::Ideal(PhaseGVN *phase, bool can_reshape) { | ||
| // pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3 | ||
| Node* progress = AndIL_add_shift_and_mask(phase, T_LONG); | ||
| // Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible. | ||
| Node* progress = AndIL_sum_and_mask(phase, T_LONG); | ||
| if (progress != nullptr) { | ||
| return progress; | ||
| } | ||
|
|
@@ -2096,99 +2098,109 @@ const Type* RotateRightNode::Value(PhaseGVN* phase) const { | |
| } | ||
| } | ||
|
|
||
| // Given an expression (AndX shift mask) or (AndX mask shift), | ||
| // determine if the AndX must always produce zero, because the | ||
| // the shift (x<<N) is bitwise disjoint from the mask #M. | ||
| // The X in AndX must be I or L, depending on bt. | ||
| // Specifically, the following cases fold to zero, | ||
| // when the shift value N is large enough to zero out | ||
| // all the set positions of the and-mask M. | ||
| // (AndI (LShiftI _ #N) #M) => #0 | ||
| // (AndL (LShiftL _ #N) #M) => #0 | ||
| // (AndL (ConvI2L (LShiftI _ #N)) #M) => #0 | ||
| // The M and N values must satisfy ((-1 << N) & M) == 0. | ||
| // Because the optimization might work for a non-constant | ||
| // mask M, we check the AndX for both operand orders. | ||
| bool MulNode::AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse) { | ||
| if (mask == nullptr || shift == nullptr) { | ||
| return false; | ||
| } | ||
| const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt); | ||
| if (mask_t == nullptr || phase->type(shift)->isa_integer(bt) == nullptr) { | ||
| return false; | ||
| } | ||
| shift = shift->uncast(); | ||
| if (shift == nullptr) { | ||
| return false; | ||
| //------------------------------ Sum & Mask ------------------------------ | ||
|
|
||
| // Returns a lower bound on the number of trailing zeros in expr. | ||
| static jint AndIL_min_trailing_zeros(const PhaseGVN* phase, const Node* expr, BasicType bt) { | ||
|
Comment on lines
+2103
to
+2104
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this method restricted to use in AndIL? Because it looks like it is doing something more generic: trying to figure out a lower bound on the trailing zeros of an expression. If that is the case: Why not put it in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would argue that while this might be incidentally reusable outside of the scope of "And" nodes, as long as there is no actual demand to reuse this, I would rather not add it to the rather prominent Node class to avoid api bloat. Iff the notion of "is known to be a multiple of a certain power of two" is really of general interest, I would expect it to become part of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, just leave it where it is for now. I'm ok with it. |
||
| expr = expr->uncast(); | ||
| const TypeInteger* type = phase->type(expr)->isa_integer(bt); | ||
| if (type == nullptr) { | ||
| return 0; | ||
| } | ||
| if (phase->type(shift)->isa_integer(bt) == nullptr) { | ||
| return false; | ||
|
|
||
| if (type->is_con()) { | ||
| jlong con = type->get_con_as_long(bt); | ||
| return con == 0L ? (type2aelembytes(bt) * BitsPerByte) : count_trailing_zeros(con); | ||
| } | ||
| BasicType shift_bt = bt; | ||
| if (bt == T_LONG && shift->Opcode() == Op_ConvI2L) { | ||
|
|
||
| if (expr->Opcode() == Op_ConvI2L) { | ||
| expr = expr->in(1)->uncast(); | ||
| bt = T_INT; | ||
| Node* val = shift->in(1); | ||
| if (val == nullptr) { | ||
| return false; | ||
| } | ||
| val = val->uncast(); | ||
| if (val == nullptr) { | ||
| return false; | ||
| } | ||
| if (val->Opcode() == Op_LShiftI) { | ||
| shift_bt = T_INT; | ||
| shift = val; | ||
| if (phase->type(shift)->isa_integer(bt) == nullptr) { | ||
| return false; | ||
| } | ||
| } | ||
| type = phase->type(expr)->isa_int(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are trying to look through a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| } | ||
| if (shift->Opcode() != Op_LShift(shift_bt)) { | ||
| if (check_reverse && | ||
| (mask->Opcode() == Op_LShift(bt) || | ||
| (bt == T_LONG && mask->Opcode() == Op_ConvI2L))) { | ||
| // try it the other way around | ||
| return AndIL_shift_and_mask_is_always_zero(phase, mask, shift, bt, false); | ||
|
|
||
| // Pattern: expr = (x << shift) | ||
| if (expr->Opcode() == Op_LShift(bt)) { | ||
| const TypeInt* shift_t = phase->type(expr->in(2))->isa_int(); | ||
| if (shift_t == nullptr || !shift_t->is_con()) { | ||
| return 0; | ||
| } | ||
| return false; | ||
| } | ||
| Node* shift2 = shift->in(2); | ||
| if (shift2 == nullptr) { | ||
| return false; | ||
| // We need to truncate the shift, as it may not have been canonicalized yet. | ||
| // T_INT: 0..31 -> shift_mask = 4 * 8 - 1 = 31 | ||
| // T_LONG: 0..63 -> shift_mask = 8 * 8 - 1 = 63 | ||
| // (JLS: "Shift Operators") | ||
| jint shift_mask = type2aelembytes(bt) * BitsPerByte - 1; | ||
| return shift_t->get_con() & shift_mask; | ||
| } | ||
| const Type* shift2_t = phase->type(shift2); | ||
| if (!shift2_t->isa_int() || !shift2_t->is_int()->is_con()) { | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| // Checks whether expr is neutral additive element (zero) under mask, | ||
| // i.e. whether an expression of the form: | ||
| // (AndX (AddX (expr addend) mask) | ||
| // (expr + addend) & mask | ||
| // is equivalent to | ||
| // (AndX addend mask) | ||
| // addend & mask | ||
| // for any addend. | ||
| // (The X in AndX must be I or L, depending on bt). | ||
| // | ||
| // We check for the sufficient condition when the lowest set bit in expr is higher than | ||
| // the highest set bit in mask, i.e.: | ||
| // expr: eeeeee0000000000000 | ||
| // mask: 000000mmmmmmmmmmmmm | ||
| // <--w bits---> | ||
| // We do not test for other cases. | ||
| // | ||
| // Correctness: | ||
| // Given "expr" with at least "w" trailing zeros, | ||
| // let "mod = 2^w", "suffix_mask = mod - 1" | ||
| // | ||
| // Since "mask" only has bits set where "suffix_mask" does, we have: | ||
| // mask = suffix_mask & mask (SUFFIX_MASK) | ||
| // | ||
| // And since expr only has bits set above w, and suffix_mask only below: | ||
| // expr & suffix_mask == 0 (NO_BIT_OVERLAP) | ||
| // | ||
| // From unsigned modular arithmetic (with unsigned modulo %), and since mod is | ||
| // a power of 2, and we are computing in a ring of powers of 2, we know that | ||
| // (x + y) % mod = (x % mod + y) % mod | ||
| // (x + y) & suffix_mask = (x & suffix_mask + y) & suffix_mask (MOD_ARITH) | ||
| // | ||
| // We can now prove the equality: | ||
| // (expr + addend) & mask | ||
| // = (expr + addend) & suffix_mask & mask (SUFFIX_MASK) | ||
| // = (expr & suffix_mask + addend) & suffix_mask & mask (MOD_ARITH) | ||
| // = (0 + addend) & suffix_mask & mask (NO_BIT_OVERLAP) | ||
| // = addend & mask (SUFFIX_MASK) | ||
| // | ||
| // Hence, an expr with at least w trailing zeros is a neutral additive element under any mask with bit width w. | ||
| static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt) { | ||
| // When the mask is negative, it has the most significant bit set. | ||
| const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt); | ||
| if (mask_t == nullptr || mask_t->lo_as_long() < 0) { | ||
| return false; | ||
| } | ||
|
|
||
| jint shift_con = shift2_t->is_int()->get_con() & ((shift_bt == T_INT ? BitsPerJavaInteger : BitsPerJavaLong) - 1); | ||
| if ((((jlong)1) << shift_con) > mask_t->hi_as_long() && mask_t->lo_as_long() >= 0) { | ||
| return true; | ||
| // When the mask is constant zero, we defer to MulNode::Value to eliminate the entire AndX operation. | ||
| if (mask_t->hi_as_long() == 0) { | ||
| assert(mask_t->lo_as_long() == 0, "checked earlier"); | ||
| return false; | ||
| } | ||
|
|
||
| return false; | ||
| jint mask_bit_width = BitsPerLong - count_leading_zeros(mask_t->hi_as_long()); | ||
| jint expr_trailing_zeros = AndIL_min_trailing_zeros(phase, expr, bt); | ||
| return expr_trailing_zeros >= mask_bit_width; | ||
| } | ||
|
|
||
| // Given an expression (AndX (AddX v1 (LShiftX v2 #N)) #M) | ||
| // determine if the AndX must always produce (AndX v1 #M), | ||
| // because the shift (v2<<N) is bitwise disjoint from the mask #M. | ||
| // The X in AndX will be I or L, depending on bt. | ||
| // Specifically, the following cases fold, | ||
| // when the shift value N is large enough to zero out | ||
| // all the set positions of the and-mask M. | ||
| // (AndI (AddI v1 (LShiftI _ #N)) #M) => (AndI v1 #M) | ||
| // (AndL (AddI v1 (LShiftL _ #N)) #M) => (AndL v1 #M) | ||
| // (AndL (AddL v1 (ConvI2L (LShiftI _ #N))) #M) => (AndL v1 #M) | ||
| // The M and N values must satisfy ((-1 << N) & M) == 0. | ||
| // Because the optimization might work for a non-constant | ||
| // mask M, and because the AddX operands can come in either | ||
| // order, we check for every operand order. | ||
| Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) { | ||
| // Reduces the pattern: | ||
| // (AndX (AddX add1 add2) mask) | ||
| // to | ||
| // (AndX add1 mask), if add2 is neutral wrt mask (see above), and vice versa. | ||
| Node* MulNode::AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt) { | ||
| Node* add = in(1); | ||
| Node* mask = in(2); | ||
| if (add == nullptr || mask == nullptr) { | ||
| return nullptr; | ||
| } | ||
| int addidx = 0; | ||
| if (add->Opcode() == Op_Add(bt)) { | ||
| addidx = 1; | ||
|
|
@@ -2200,14 +2212,12 @@ Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) { | |
| if (addidx > 0) { | ||
| Node* add1 = add->in(1); | ||
| Node* add2 = add->in(2); | ||
| if (add1 != nullptr && add2 != nullptr) { | ||
| if (AndIL_shift_and_mask_is_always_zero(phase, add1, mask, bt, false)) { | ||
| set_req_X(addidx, add2, phase); | ||
| return this; | ||
| } else if (AndIL_shift_and_mask_is_always_zero(phase, add2, mask, bt, false)) { | ||
| set_req_X(addidx, add1, phase); | ||
| return this; | ||
| } | ||
| if (AndIL_is_zero_element_under_mask(phase, add1, mask, bt)) { | ||
| set_req_X(addidx, add2, phase); | ||
| return this; | ||
| } else if (AndIL_is_zero_element_under_mask(phase, add2, mask, bt)) { | ||
| set_req_X(addidx, add1, phase); | ||
| return this; | ||
| } | ||
| } | ||
| return nullptr; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,8 +83,8 @@ class MulNode : public Node { | |
|
|
||
| static MulNode* make(Node* in1, Node* in2, BasicType bt); | ||
|
|
||
| static bool AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse); | ||
| Node* AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt); | ||
| protected: | ||
| Node* AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please update the copyright from 2024 -> 2025?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perfect! |
||
| }; | ||
|
|
||
| //------------------------------MulINode--------------------------------------- | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you split declaration and definition? Could the body just be moved up here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because of the details wrt ADD the body feels out of context here. I find it easier to read when it's close to
AndIL_sum_and_maskbelow.