openjdk · jatin-bhateja · Mar 7, 2025 · Apr 2, 2025 · Apr 10, 2025 · Apr 21, 2025
diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -237,58 +237,167 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
 
   jlong hi = bt == T_INT ? max_jint : max_jlong;
   jlong lo = bt == T_INT ? min_jint : min_jlong;
+  assert(bt == T_INT || bt == T_LONG, "");
 
-  if(mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
+  // Rule 1: Bit compression selects the source bits corresponding to true mask bits,
+  // packs them and places them contiguously at destination bit positions
+  // starting from least significant bit, remaining higher order bits are set
+  // to zero.
+
+  // Rule 2: Bit expansion is a reverse process, which sequentially reads source bits
+  // starting from LSB and places them at bit positions in result value where
+  // corresponding mask bits are 1. Thus, bit expansion for non-negative mask
+  // value will always generate a +ve value, this is because sign bit of result
+  // will never be set to 1 as corresponding mask bit is always 0.
+
+  // Case A) Constant mask
+  if (mask_type->is_con()) {
     jlong maskcon = mask_type->get_con_as_long(bt);
-    int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
     if (opc == Op_CompressBits) {
-      // Bit compression selects the source bits corresponding to true mask bits
-      // and lays them out contiguously at destination bit positions starting from
-      // LSB, remaining higher order bits are set to zero.
-      // Thus, it will always generate a +ve value i.e. sign bit set to 0 if
-      // any bit of constant mask value is zero.
-      lo = 0L;
-      hi = (1UL << bitcount) - 1;
+      // Case A.1 bit compression:-
+      // For an outlier mask value of -1 upper bound of the result equals
+      // maximum integral value, for any other mask value its computed using
+      // following formula
+      //       Result.Hi = 1 << popcount(mask_bits) - 1
+      //
+      // For mask values other than -1, lower bound of the result is estimated
+      // as zero, by assuming at least one mask bit is zero and corresponding source
+      // bit will be masked, hence result of bit compression will always be
+      // non-negative value. For outlier mask value of -1, assume all source bits
+      // apart from most significant bit were set to 0, thereby resulting in
+      // a minimum integral value.
+      // e.g.
+      //  src = 0xXXXXXXXX (non-constant source)
+      //  mask = 0xEFFFFFFF (constant mask)
+      //  result.hi = 0x7FFFFFFF
+      //  result.lo = 0
+      if (maskcon != -1L) {
+        int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
+        hi = (1UL << bitcount) - 1;
+        lo = 0L;
+      } else {
+        // preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values
+        // for unknown source bits.
+        assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
+        assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
+      }
     } else {
+      // Case A.2 bit expansion:-
       assert(opc == Op_ExpandBits, "");
-      // Expansion sequentially reads source bits starting from LSB
-      // and places them over destination at bit positions corresponding
-      // set mask bit. Thus bit expansion for non-negative mask value
-      // will always generate a +ve value.
-      hi = maskcon >= 0L ? maskcon : maskcon ^ lo;
-      lo = maskcon >= 0L ? 0L : lo;
+      if (maskcon >= 0L) {
+        //   Case A.2.1 constant mask >= 0
+        //     Result.Hi = mask, optimistically assuming all source bits
+        //     read starting from least significant bit positions are 1.
+        //     Result.Lo = 0, because at least one bit in mask is zero.
+        //   e.g.
+        //    src = 0xXXXXXXXX (non-constant source)
+        //    mask = 0x7FFFFFFF (constant mask >= 0)
+        //    result.hi = 0x7FFFFFFF
+        //    result.lo = 0
+        hi = maskcon;
+        lo = 0L;
+      } else {
+        //   Case A.2.2) mask < 0
+        //     For constant mask strictly less than zero, the maximum result value will be
+        //     the same as the mask value with its sign bit flipped, assuming all source bits
+        //     except the MSB bit are set(one).
+        //
+        //     To compute minimum result value we assume all but last read source bit as zero,
+        //     this is because sign bit of result will always be set to 1 while other bit
+        //     corresponding to set mask bit should be zero.
+        //   e.g.
+        //    src = 0xXXXXXXXX (non-constant source)
+        //    mask = 0xEFFFFFFF (constant mask)
+        //    result.hi = 0xEFFFFFFF ^ 0x80000000 = 0x6FFFFFFF
+        //    result.lo = 0x80000000
+        //
+        hi = maskcon ^ lo;
+        // lo still retains MIN_INT/LONG.
+        assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
+      }
     }
   }
 
+  // Case B) Non-constant mask.
   if (!mask_type->is_con()) {
-    int mask_max_bw;
-    int max_bw = bt == T_INT ? 32 : 64;
-    // Case 1) Mask value range includes -1.
-    if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
-      mask_max_bw = max_bw;
-    // Case 2) Mask value range is less than -1.
-    } else if (mask_type->hi_as_long() < -1L) {
-      mask_max_bw = max_bw - 1;
-    } else {
-    // Case 3) Mask value range only includes +ve values.
-      assert(mask_type->lo_as_long() >= 0, "");
-      jlong clz = count_leading_zeros(mask_type->hi_as_long());
-      clz = bt == T_INT ? clz - 32 : clz;
-      mask_max_bw = max_bw - clz;
-    }
     if ( opc == Op_CompressBits) {
-      lo = mask_max_bw == max_bw ? lo : 0L;
-      // Compress operation is inherently an unsigned operation and
-      // result value range is primarily dependent on true count
-      // of participating mask value.
-      hi = mask_max_bw < max_bw ? (1L << mask_max_bw) - 1 : src_type->hi_as_long();
+      int result_bit_width;
+      int mask_bit_width = bt == T_INT ? 32 : 64;
+      if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
+        // Case B.1 The mask value range includes -1, hence we may use all bits,
+        //          the result has the whole value range.
+        result_bit_width = mask_bit_width;
+      } else if (mask_type->hi_as_long() < -1L) {
+        // Case B.2 Mask value range is strictly less than -1, this indicates presence of at least
+        // one unset(zero) bit in mask value, thus as per Rule 1, bit compression will always
+        // result in a non-negative value. This guarantees that MSB bit of result value will
+        // always be set to zero.
+        result_bit_width = mask_bit_width - 1;
+      } else {
+        assert(mask_type->lo_as_long() >= 0, "");
+        // Case B.3 Mask value range only includes non-negative values. Since all integral
+        // types honours an invariant that TypeInteger._lo <= TypeInteger._hi, thus computing
+        // leading zero bits of upper bound of mask value will allow us to ascertain
+        // optimistic upper bound of result i.e. all the bits other than leading zero bits
+        // can be assumed holding 1 value.
+        jlong clz = count_leading_zeros(mask_type->hi_as_long());
+        // Here, result of clz is w.r.t to long argument, hence for integer argument
+        // we explicitly subtract 32 from the result.
+        clz = bt == T_INT ? clz - 32 : clz;
+        result_bit_width = mask_bit_width - clz;
+      }
+      // If the number of bits required to for the mask value range is less than the
+      // full bit width of the integral type, then the MSB bit is guaranteed to be zero,
+      // thus the compression result will never be a -ve value and we can safely set the
+      // lower bound of the bit compression to zero.
+      lo = result_bit_width == mask_bit_width ? lo : 0L;
+
+      assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
+      assert(lo == (bt == T_INT ? min_jint : min_jlong) || lo == 0, "");
+
+      if (src_type->lo_as_long() >= 0) {
+        // Lemma 1: For strictly non-negative src, the result of the compression will never be
+        // greater than src.
+        // Proof: Since src is a non-negative value, its most significant bit is always 0.
+        // Thus even if the corresponding MSB of the mask is one, the result will be a +ve
+        // value. There are three possible cases
+        //   a. All the mask bits corresponding to set source bits are unset(zero).
+        //   b. All the mask bits corresponding to set source bits are set(one)
+        //   c. Some mask bits corresponding to set source bits are set(one) while others are unset(zero)
+        //
+        // Case a. results into an allzero result, while Case b. gives us the upper bound which is equals source
+        // value, while for Case c. the result will lie within [0, src]
+        //
+        hi = src_type->hi_as_long();
+        lo = 0L;
+      }
+
+      if (result_bit_width < mask_bit_width) {
+        // Rule 3:
+        // We can further constrain the upper bound of bit compression if the number of bits
+        // which can be set(one) is less than the maximum number of bits of integral type.
+        hi = MIN2((jlong)((1UL << result_bit_width) - 1L), hi);
+      }
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
+      jlong min_mask = mask_type->lo_as_long();
       // Since mask here a range and not a constant value, hence being
       // conservative in determining the value range of result.
-      lo = mask_type->lo_as_long() >= 0L ? 0L : lo;
-      hi = mask_type->lo_as_long() >= 0L ? max_mask : hi;
+      if (min_mask >= 0L) {
+        // Lemma 2: Based on the integral type invariant ie. TypeInteger.lo <= TypeInteger.hi,
+        // if the lower bound of non-constant mask is a non-negative value then result can never
+        // be greater than the mask.
+        // Proof: Since lower bound of the mask is a non-negative value, hence most significant
+        // bit of its entire value must be unset(zero). If all the lower order 'n' source bits
+        // where n corresponds to popcount of mask are set(ones) then upper bound of the result equals
+        // mask. In order to compute the lower bound, we pssimistically assume all the lower order 'n'
+        // source bits are unset(zero) there by resuling into a zero value.
+        hi = max_mask;
+        lo = 0;
+      } else {
+        // preserve the lo and hi bounds estimated till now.
+      }
     }
   }
 
@@ -329,6 +438,11 @@ const Type* CompressBitsNode::Value(PhaseGVN* phase) const {
                          static_cast<const Type*>(TypeLong::make(res));
   }
 
+  // Result is zero if src is zero irrespective of mask value.
+  if (src_type == TypeInteger::zero(bt)) {
+     return TypeInteger::zero(bt);
+  }
+
   return bitshuffle_value(src_type, mask_type, Op_CompressBits, bt);
 }
 
@@ -365,5 +479,10 @@ const Type* ExpandBitsNode::Value(PhaseGVN* phase) const {
                           static_cast<const Type*>(TypeLong::make(res));
   }
 
+  // Result is zero if src is zero irrespective of mask value.
+  if (src_type == TypeInteger::zero(bt)) {
+     return TypeInteger::zero(bt);
+  }
+
   return bitshuffle_value(src_type, mask_type, Op_ExpandBits, bt);
 }