Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 156 additions & 37 deletions src/hotspot/share/opto/intrinsicnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,58 +237,167 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg

jlong hi = bt == T_INT ? max_jint : max_jlong;
jlong lo = bt == T_INT ? min_jint : min_jlong;
assert(bt == T_INT || bt == T_LONG, "");

if(mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now you removed the condition mask_type->get_con_as_long(bt) != -1L. Do you know why it was there in the first place?

It seems to me that if mask_type->get_con_as_long(bt) == -1L, then we can just return the type of src, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bug-fix for CompressBitsNode::Value, but this change also has an effect on ExpandBitsNode::Value, and that makes me a little nervous. For example: do we have enough test coverage for expand? It seems we did not have enough tests for compress, so probably also not for expand...

Copy link
Member Author

@jatin-bhateja jatin-bhateja Jul 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now you removed the condition mask_type->get_con_as_long(bt) != -1L. Do you know why it was there in the first place?

It seems to me that if mask_type->get_con_as_long(bt) == -1L, then we can just return the type of src, right?

Correct, also for non-constant masks we can even find the maximum set bit count of entier value range and then estimate the bounds of the result.

#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>

uint64_t popcnt(int64_t val) {
  uint64_t res = 0;
  asm volatile("popcntq %1, %0 " : "=r"(res) : "r"(val) : "cc");
  return res;
}

typedef struct _result {
  int64_t value;
  int64_t mask;
  int64_t count;
} result;

result compute_max_mask(int64_t hi, int64_t lo) {
  assert(hi >= lo);
  result res;
  int64_t max_true_bits = 0;
  int64_t max_true_bits_val = 0;
  for (int64_t iter = lo; iter < hi; iter++) {
    int setbitscnt = popcnt(iter);
    if (max_true_bits < setbitscnt) {
      max_true_bits = setbitscnt;
      max_true_bits_val = iter;
    }
  }
  res.value = max_true_bits_val;
  res.mask = (1L << max_true_bits) - 1L;
  res.count = max_true_bits;
  return res;
}

int main(int argc, char* argv[]) {
  if (argc != 3) {
     return printf("Invalid arguments, <app> [lo] [hi]\n");
  }
  int64_t lo = atol(argv[1]);
  int64_t hi = atol(argv[2]);
  result mask = compute_max_mask(hi, lo);
  return printf("[lo] = %ld [hi] = %ld [set bits count] %ld [mask] = %ld [max bits count value] %ld\n",
                lo, hi, mask.count, mask.mask, mask.value);
}

For this bug fix patch I don't want to include this extension.

// Rule 1: Bit compression selects the source bits corresponding to true mask bits,
// packs them and places them contiguously at destination bit positions
// starting from least significant bit, remaining higher order bits are set
// to zero.

// Rule 2: Bit expansion is a reverse process, which sequentially reads source bits
// starting from LSB and places them at bit positions in result value where
// corresponding mask bits are 1. Thus, bit expansion for non-negative mask
// value will always generate a +ve value, this is because sign bit of result
// will never be set to 1 as corresponding mask bit is always 0.

// Case A) Constant mask
if (mask_type->is_con()) {
jlong maskcon = mask_type->get_con_as_long(bt);
int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
if (opc == Op_CompressBits) {
// Bit compression selects the source bits corresponding to true mask bits
// and lays them out contiguously at destination bit positions starting from
// LSB, remaining higher order bits are set to zero.
// Thus, it will always generate a +ve value i.e. sign bit set to 0 if
// any bit of constant mask value is zero.
lo = 0L;
hi = (1UL << bitcount) - 1;
// Case A.1 bit compression:-
// For an outlier mask value of -1 upper bound of the result equals
// maximum integral value, for any other mask value its computed using
// following formula
// Result.Hi = 1 << popcount(mask_bits) - 1
//
// For mask values other than -1, lower bound of the result is estimated
// as zero, by assuming at least one mask bit is zero and corresponding source
// bit will be masked, hence result of bit compression will always be
// non-negative value. For outlier mask value of -1, assume all source bits
// apart from most significant bit were set to 0, thereby resulting in
// a minimum integral value.
// e.g.
// src = 0xXXXXXXXX (non-constant source)
// mask = 0xEFFFFFFF (constant mask)
// result.hi = 0x7FFFFFFF
// result.lo = 0
if (maskcon != -1L) {
int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
hi = (1UL << bitcount) - 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace 1UL -> 1ULL for Windows.

lo = 0L;
} else {
// preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values
// for unknown source bits.
assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
}
} else {
// Case A.2 bit expansion:-
assert(opc == Op_ExpandBits, "");
// Expansion sequentially reads source bits starting from LSB
// and places them over destination at bit positions corresponding
// set mask bit. Thus bit expansion for non-negative mask value
// will always generate a +ve value.
hi = maskcon >= 0L ? maskcon : maskcon ^ lo;
lo = maskcon >= 0L ? 0L : lo;
if (maskcon >= 0L) {
// Case A.2.1 constant mask >= 0
// Result.Hi = mask, optimistically assuming all source bits
// read starting from least significant bit positions are 1.
// Result.Lo = 0, because at least one bit in mask is zero.
// e.g.
// src = 0xXXXXXXXX (non-constant source)
// mask = 0x7FFFFFFF (constant mask >= 0)
// result.hi = 0x7FFFFFFF
// result.lo = 0
hi = maskcon;
lo = 0L;
} else {
// Case A.2.2) mask < 0
// For constant mask strictly less than zero, the maximum result value will be
// the same as the mask value with its sign bit flipped, assuming all source bits
// except the MSB bit are set(one).
//
// To compute minimum result value we assume all but last read source bit as zero,
// this is because sign bit of result will always be set to 1 while other bit
// corresponding to set mask bit should be zero.
// e.g.
// src = 0xXXXXXXXX (non-constant source)
// mask = 0xEFFFFFFF (constant mask)
// result.hi = 0xEFFFFFFF ^ 0x80000000 = 0x6FFFFFFF
// result.lo = 0x80000000
//
hi = maskcon ^ lo;
// lo still retains MIN_INT/LONG.
assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
}
}
}

// Case B) Non-constant mask.
if (!mask_type->is_con()) {
int mask_max_bw;
int max_bw = bt == T_INT ? 32 : 64;
// Case 1) Mask value range includes -1.
if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
mask_max_bw = max_bw;
// Case 2) Mask value range is less than -1.
} else if (mask_type->hi_as_long() < -1L) {
mask_max_bw = max_bw - 1;
} else {
// Case 3) Mask value range only includes +ve values.
assert(mask_type->lo_as_long() >= 0, "");
jlong clz = count_leading_zeros(mask_type->hi_as_long());
clz = bt == T_INT ? clz - 32 : clz;
mask_max_bw = max_bw - clz;
}
if ( opc == Op_CompressBits) {
lo = mask_max_bw == max_bw ? lo : 0L;
// Compress operation is inherently an unsigned operation and
// result value range is primarily dependent on true count
// of participating mask value.
hi = mask_max_bw < max_bw ? (1L << mask_max_bw) - 1 : src_type->hi_as_long();
int result_bit_width;
int mask_bit_width = bt == T_INT ? 32 : 64;
if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
// Case B.1 The mask value range includes -1, hence we may use all bits,
// the result has the whole value range.
result_bit_width = mask_bit_width;
} else if (mask_type->hi_as_long() < -1L) {
// Case B.2 Mask value range is strictly less than -1, this indicates presence of at least
// one unset(zero) bit in mask value, thus as per Rule 1, bit compression will always
// result in a non-negative value. This guarantees that MSB bit of result value will
// always be set to zero.
result_bit_width = mask_bit_width - 1;
} else {
assert(mask_type->lo_as_long() >= 0, "");
// Case B.3 Mask value range only includes non-negative values. Since all integral
// types honours an invariant that TypeInteger._lo <= TypeInteger._hi, thus computing
// leading zero bits of upper bound of mask value will allow us to ascertain
// optimistic upper bound of result i.e. all the bits other than leading zero bits
// can be assumed holding 1 value.
jlong clz = count_leading_zeros(mask_type->hi_as_long());
// Here, result of clz is w.r.t to long argument, hence for integer argument
// we explicitly subtract 32 from the result.
clz = bt == T_INT ? clz - 32 : clz;
result_bit_width = mask_bit_width - clz;
}
// If the number of bits required to for the mask value range is less than the
// full bit width of the integral type, then the MSB bit is guaranteed to be zero,
// thus the compression result will never be a -ve value and we can safely set the
// lower bound of the bit compression to zero.
lo = result_bit_width == mask_bit_width ? lo : 0L;

assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
assert(lo == (bt == T_INT ? min_jint : min_jlong) || lo == 0, "");

if (src_type->lo_as_long() >= 0) {
// Lemma 1: For strictly non-negative src, the result of the compression will never be
// greater than src.
// Proof: Since src is a non-negative value, its most significant bit is always 0.
// Thus even if the corresponding MSB of the mask is one, the result will be a +ve
// value. There are three possible cases
// a. All the mask bits corresponding to set source bits are unset(zero).
// b. All the mask bits corresponding to set source bits are set(one)
// c. Some mask bits corresponding to set source bits are set(one) while others are unset(zero)
//
// Case a. results into an allzero result, while Case b. gives us the upper bound which is equals source
// value, while for Case c. the result will lie within [0, src]
//
hi = src_type->hi_as_long();
lo = 0L;
}

if (result_bit_width < mask_bit_width) {
// Rule 3:
// We can further constrain the upper bound of bit compression if the number of bits
// which can be set(one) is less than the maximum number of bits of integral type.
hi = MIN2((jlong)((1UL << result_bit_width) - 1L), hi);
Copy link
Contributor

@eme64 eme64 Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace 1UL -> 1ULL for Windows.

}
} else {
assert(opc == Op_ExpandBits, "");
jlong max_mask = mask_type->hi_as_long();
jlong min_mask = mask_type->lo_as_long();
// Since mask here a range and not a constant value, hence being
// conservative in determining the value range of result.
lo = mask_type->lo_as_long() >= 0L ? 0L : lo;
hi = mask_type->lo_as_long() >= 0L ? max_mask : hi;
if (min_mask >= 0L) {
// Lemma 2: Based on the integral type invariant ie. TypeInteger.lo <= TypeInteger.hi,
// if the lower bound of non-constant mask is a non-negative value then result can never
// be greater than the mask.
// Proof: Since lower bound of the mask is a non-negative value, hence most significant
// bit of its entire value must be unset(zero). If all the lower order 'n' source bits
// where n corresponds to popcount of mask are set(ones) then upper bound of the result equals
// mask. In order to compute the lower bound, we pssimistically assume all the lower order 'n'
// source bits are unset(zero) there by resuling into a zero value.
hi = max_mask;
lo = 0;
} else {
// preserve the lo and hi bounds estimated till now.
}
}
}

Expand Down Expand Up @@ -329,6 +438,11 @@ const Type* CompressBitsNode::Value(PhaseGVN* phase) const {
static_cast<const Type*>(TypeLong::make(res));
}

// Result is zero if src is zero irrespective of mask value.
if (src_type == TypeInteger::zero(bt)) {
return TypeInteger::zero(bt);
}

return bitshuffle_value(src_type, mask_type, Op_CompressBits, bt);
}

Expand Down Expand Up @@ -365,5 +479,10 @@ const Type* ExpandBitsNode::Value(PhaseGVN* phase) const {
static_cast<const Type*>(TypeLong::make(res));
}

// Result is zero if src is zero irrespective of mask value.
if (src_type == TypeInteger::zero(bt)) {
return TypeInteger::zero(bt);
}

return bitshuffle_value(src_type, mask_type, Op_ExpandBits, bt);
}
Loading