From ee67ee2214e154d676438ebc80b3a62d2a7db8b1 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 7 Mar 2025 22:11:09 +0530
Subject: [PATCH 01/20] 8350896: Integer/Long.compress gets wrong type from
 CompressBitsNode::Value

---
 src/hotspot/share/opto/intrinsicnode.cpp      |  46 ++++---
 .../c2/TestBitCompressValueTransform.java     | 119 ++++++++++++++++++
 .../ir_framework/test/IREncodingPrinter.java  |   1 +
 3 files changed, 148 insertions(+), 18 deletions(-)
 create mode 100644 test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index e67352d85bdcc..90ad46763f542 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -238,7 +238,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
   jlong hi = bt == T_INT ? max_jint : max_jlong;
   jlong lo = bt == T_INT ? min_jint : min_jlong;
 
-  if(mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
+  if (mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
     jlong maskcon = mask_type->get_con_as_long(bt);
     int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
     if (opc == Op_CompressBits) {
@@ -261,27 +261,37 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
   }
 
   if (!mask_type->is_con()) {
-    int mask_max_bw;
-    int max_bw = bt == T_INT ? 32 : 64;
-    // Case 1) Mask value range includes -1.
-    if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
-      mask_max_bw = max_bw;
-    // Case 2) Mask value range is less than -1.
-    } else if (mask_type->hi_as_long() < -1L) {
-      mask_max_bw = max_bw - 1;
-    } else {
-    // Case 3) Mask value range only includes +ve values.
-      assert(mask_type->lo_as_long() >= 0, "");
-      jlong clz = count_leading_zeros(mask_type->hi_as_long());
-      clz = bt == T_INT ? clz - 32 : clz;
-      mask_max_bw = max_bw - clz;
-    }
     if ( opc == Op_CompressBits) {
+      int mask_max_bw;
+      int max_bw = bt == T_INT ? 32 : 64;
+      // Case 1) Mask value range includes -1, this negates the possibility of
+      // strictly non-negative result value range.
+      if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
+        mask_max_bw = max_bw;
+      // Case 2) Mask value range is less than -1, this indicates presence of at least
+      // one zero bit in the mask value, there by constraining the result of compression to
+      // a +ve value range.
+      } else if (mask_type->hi_as_long() < -1L) {
+        mask_max_bw = max_bw - 1;
+      } else {
+      // Case 3) Mask value range only includes +ve values, this can again be
+      // used to ascertain known Zero bits of resultant value.
+        assert(mask_type->lo_as_long() >= 0, "");
+        jlong clz = count_leading_zeros(mask_type->hi_as_long());
+        clz = bt == T_INT ? clz - 32 : clz;
+        mask_max_bw = max_bw - clz;
+      }
+
       lo = mask_max_bw == max_bw ? lo : 0L;
       // Compress operation is inherently an unsigned operation and
       // result value range is primarily dependent on true count
-      // of participating mask value.
-      hi = mask_max_bw < max_bw ? (1L << mask_max_bw) - 1 : src_type->hi_as_long();
+      // of participating mask value. Thus bit compression can never
+      // result into a value greater than original value.
+      // For constant input we pessimistically set the upper bound
+      // of result to max_int to prevent incorrect constant value in case
+      // input equals lower bound of mask value range.
+      hi = src_type->hi_as_long() == lo ? hi : src_type->hi_as_long();
+      hi = mask_max_bw < max_bw ? (1L << mask_max_bw) - 1 : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
diff --git a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
new file mode 100644
index 0000000000000..9e5399f51426e
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @key stress randomness
+ * @requires vm.compiler2.enabled & os.simpleArch == "x64"
+ * @bug 8350896
+ * @library /test/lib /
+ * @summary C2: wrong result: Integer/Long.compress gets wrong type from CompressBitsNode::Value.
+ * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileThresholdScaling=0.3 compiler.c2.TestBitCompressValueTransform
+ */
+package compiler.c2;
+
+import jdk.test.lib.Asserts;
+import compiler.lib.ir_framework.*;
+
+public class TestBitCompressValueTransform {
+
+    public static final int  field_I = 0x400_0000;
+    public static final long field_L = 0x400_0000_0000_0000L;
+    public static final int  gold_I = Integer.valueOf(Integer.compress(0x8000_0000, field_I));
+    public static final long gold_L = Long.valueOf(Long.compress(0x8000_0000_0000_0000L, field_L));
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test1(long value) {
+        return Long.compress(0x8000_0000_0000_0000L, value);
+    }
+
+    @Run(test = "test1")
+    public void run1(RunInfo info) {
+        long res = 0;
+        for (int i = 0; i < 100000; i++) {
+            res |= test1(field_L);
+        }
+        Asserts.assertEQ(res, gold_L);
+    }
+
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test2(int value) {
+        return Integer.compress(0x8000_0000, value);
+    }
+
+    @Run(test = "test2")
+    public void run2(RunInfo info) {
+        int res = 0;
+        for (int i = 0; i < 100000; i++) {
+            res |= test2(field_I);
+        }
+        Asserts.assertEQ(res, gold_I);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 "} , failOn = { IRNode.UNSTABLE_IF_TRAP }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test3(int value) {
+        int filter_bits = value & 0xF;
+        int compress_bits = Integer.compress(15, filter_bits);
+        if (compress_bits > 15) {
+            value = -1;
+        }
+        return value;
+    }
+
+    @Run(test = "test3")
+    public void run3(RunInfo info) {
+        int res = 0;
+        for (int i = 1; i < 100000; i++) {
+            res |= test3(i);
+        }
+        Asserts.assertGT(res, 0);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 "} , failOn = { IRNode.UNSTABLE_IF_TRAP }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test4(long value) {
+        long filter_bits = value & 0xF;
+        long compress_bits = Long.compress(15, filter_bits);
+        if (compress_bits > 15) {
+            value = -1;
+        }
+        return value;
+    }
+
+    @Run(test = "test4")
+    public void run4(RunInfo info) {
+        long res = 0;
+        for (long i = 1; i < 100000; i++) {
+            res |= test4(i);
+        }
+        Asserts.assertGT(res, 0L);
+    }
+
+    public static void main(String[] args) {
+        TestFramework.run(TestBitCompressValueTransform.class);
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
index 0b7dcbae9d9b0..93129e54f9a66 100644
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
@@ -104,6 +104,7 @@ public class IREncodingPrinter {
         "avx512f",
         "avx512_fp16",
         "avx512_vnni",
+        "bmi2",
         // AArch64
         "sha3",
         "asimd",

From ae48895b0266cb3bf3708da55be8720f558162d4 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Wed, 2 Apr 2025 12:29:48 +0530
Subject: [PATCH 02/20] Review comments resolutions

---
 src/hotspot/share/opto/intrinsicnode.cpp      |  58 ++++---
 .../c2/TestBitCompressValueTransform.java     | 156 +++++++++++++++++-
 2 files changed, 185 insertions(+), 29 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 90ad46763f542..52d783c1f379b 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -237,7 +237,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
 
   jlong hi = bt == T_INT ? max_jint : max_jlong;
   jlong lo = bt == T_INT ? min_jint : min_jlong;
-
+  assert(bt == T_INT || bt == T_LONG, "");
   if (mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
     jlong maskcon = mask_type->get_con_as_long(bt);
     int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
@@ -262,36 +262,40 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
 
   if (!mask_type->is_con()) {
     if ( opc == Op_CompressBits) {
-      int mask_max_bw;
-      int max_bw = bt == T_INT ? 32 : 64;
-      // Case 1) Mask value range includes -1, this negates the possibility of
-      // strictly non-negative result value range.
+      // Pattern: Integer/Long.compress(src_type, mask_type)
+      int max_mask_bit_width;
+      int mask_bit_width = bt == T_INT ? 32 : 64;
       if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
-        mask_max_bw = max_bw;
-      // Case 2) Mask value range is less than -1, this indicates presence of at least
-      // one zero bit in the mask value, there by constraining the result of compression to
-      // a +ve value range.
+        // Case 1) Mask value range includes -1, this negates the possibility of
+        // strictly non-negative result value range.
+        max_mask_bit_width = mask_bit_width;
       } else if (mask_type->hi_as_long() < -1L) {
-        mask_max_bw = max_bw - 1;
+        // Case 2) Mask value range is less than -1, this indicates presence of at least
+        // one zero bit in the mask value, there by constraining the result of compression
+        // to a +ve value range.
+        max_mask_bit_width = mask_bit_width - 1;
       } else {
-      // Case 3) Mask value range only includes +ve values, this can again be
-      // used to ascertain known Zero bits of resultant value.
+        // Case 3) Mask value range only includes +ve values, this can again be
+        // used to ascertain known Zero bits of resultant value.
         assert(mask_type->lo_as_long() >= 0, "");
+        // Here, result of clz is w.r.t to long argument, hence for integer argument
+        // we explicitly subtract 32 from the result.
         jlong clz = count_leading_zeros(mask_type->hi_as_long());
         clz = bt == T_INT ? clz - 32 : clz;
-        mask_max_bw = max_bw - clz;
+        max_mask_bit_width = mask_bit_width - clz;
       }
 
-      lo = mask_max_bw == max_bw ? lo : 0L;
-      // Compress operation is inherently an unsigned operation and
-      // result value range is primarily dependent on true count
-      // of participating mask value. Thus bit compression can never
-      // result into a value greater than original value.
-      // For constant input we pessimistically set the upper bound
-      // of result to max_int to prevent incorrect constant value in case
-      // input equals lower bound of mask value range.
+      // If number of bits required to accommodated mask value range is less than the
+      // full bit width of integral type, then MSB bit is guaranteed to be zero, thus
+      // compression result will never be a -ve value and we can safely set the
+      // lower bound of the result value range to zero.
+      lo = max_mask_bit_width == mask_bit_width ? lo : 0L;
+
+      // For upper bound estimation of result value range for a constant input we
+      // pessimistically pick max_int value to prevent incorrect constant folding
+      // in case input equals above estimated lower bound.
       hi = src_type->hi_as_long() == lo ? hi : src_type->hi_as_long();
-      hi = mask_max_bw < max_bw ? (1L << mask_max_bw) - 1 : hi;
+      hi = max_mask_bit_width < mask_bit_width ? (1L << max_mask_bit_width) - 1 : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
@@ -339,6 +343,11 @@ const Type* CompressBitsNode::Value(PhaseGVN* phase) const {
                          static_cast<const Type*>(TypeLong::make(res));
   }
 
+  // Result is zero if src is zero irrespective of mask value.
+  if (src_type == TypeInteger::zero(bt)) {
+     return TypeInteger::zero(bt);
+  }
+
   return bitshuffle_value(src_type, mask_type, Op_CompressBits, bt);
 }
 
@@ -375,5 +384,10 @@ const Type* ExpandBitsNode::Value(PhaseGVN* phase) const {
                           static_cast<const Type*>(TypeLong::make(res));
   }
 
+  // Result is zero if src is zero irrespective of mask value.
+  if (src_type == TypeInteger::zero(bt)) {
+     return TypeInteger::zero(bt);
+  }
+
   return bitshuffle_value(src_type, mask_type, Op_ExpandBits, bt);
 }
diff --git a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
index 9e5399f51426e..f91b108ae8654 100644
--- a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
@@ -23,8 +23,6 @@
 
 /*
  * @test
- * @key stress randomness
- * @requires vm.compiler2.enabled & os.simpleArch == "x64"
  * @bug 8350896
  * @library /test/lib /
  * @summary C2: wrong result: Integer/Long.compress gets wrong type from CompressBitsNode::Value.
@@ -90,15 +88,15 @@ public void run3(RunInfo info) {
         for (int i = 1; i < 100000; i++) {
             res |= test3(i);
         }
-        Asserts.assertGT(res, 0);
+        Asserts.assertLTE(0, res);
     }
 
     @Test
     @IR (counts = { IRNode.COMPRESS_BITS, " 0 "} , failOn = { IRNode.UNSTABLE_IF_TRAP }, applyIfCPUFeature = { "bmi2", "true" })
     public long test4(long value) {
-        long filter_bits = value & 0xF;
-        long compress_bits = Long.compress(15, filter_bits);
-        if (compress_bits > 15) {
+        long filter_bits = value & 0xFL;
+        long compress_bits = Long.compress(15L, filter_bits);
+        if (compress_bits > 15L) {
             value = -1;
         }
         return value;
@@ -110,7 +108,151 @@ public void run4(RunInfo info) {
         for (long i = 1; i < 100000; i++) {
             res |= test4(i);
         }
-        Asserts.assertGT(res, 0L);
+        Asserts.assertLTE(0L, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test5(long value) {
+        // Since value range includes -1 hence with mask
+        // and value as -1 all the result bits will be set.
+        long mask = Long.min(10000L, Long.max(-10000L, value));
+        return Long.compress(value, mask);
+    }
+
+    @Run(test = "test5")
+    public void run5(RunInfo info) {
+        long res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test5((long)i);
+        }
+        Asserts.assertEQ(-1L, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test6(long value) {
+        // For mask within a strictly -ve value range less than -1,
+        // result of compression will always be a +ve value.
+        long mask = Long.min(-2L, Long.max(-10000L, value));
+        return Long.compress(value, mask);
+    }
+
+    @Run(test = "test6")
+    public void run6(RunInfo info) {
+        long res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test6((long)i);
+        }
+        Asserts.assertLTE(0L, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test7(long value) {
+        // For mask within a strictly +ve value range,
+        // result of compression will always be a +ve value with
+        // upper bound capped at max mask value.
+        long mask = Long.min(10000L, Long.max(0L, value));
+        return Long.compress(value, mask);
+    }
+
+    @Run(test = "test7")
+    public void run7(RunInfo info) {
+        long res = Long.MIN_VALUE;
+        for (int i = -10000; i < 100000; i++) {
+            res = Long.max(test7((long)i), res);
+        }
+        Asserts.assertGTE(10000L, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test8(int value) {
+        // Since value range includes -1 hence with mask
+        // and value as -1 all the result bits will be set.
+        int mask = Integer.min(10000, Integer.max(-10000, value));
+        return Integer.compress(value, mask);
+    }
+
+    @Run(test = "test8")
+    public void run8(RunInfo info) {
+        int res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test8(i);
+        }
+        Asserts.assertEQ(-1, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test9(int value) {
+        // For mask within a strictly -ve value range less than -1,
+        // result of compression will always be a +ve value.
+        int mask = Integer.min(-2, Integer.max(-10000, value));
+        return Integer.compress(value, mask);
+    }
+
+    @Run(test = "test9")
+    public void run9(RunInfo info) {
+        int res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test9(i);
+        }
+        Asserts.assertLTE(0, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test10(int value) {
+        // For mask within a strictly +ve value range,
+        // result of compression will always be a +ve value with
+        // upper bound capped at max mask value.
+        int mask = Integer.min(10000, Integer.max(0, value));
+        return Integer.compress(value, mask);
+    }
+
+    @Run(test = "test10")
+    public void run10(RunInfo info) {
+        int res = Integer.MIN_VALUE;
+        for (int i = -10000; i < 100000; i++) {
+            res = Integer.max(test10(i), res);
+        }
+        Asserts.assertGTE(10000, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public int test11(int value) {
+        // For constant zero input, compress is folded to zero
+        int mask = Integer.min(10000, Integer.max(0, value));
+        return Integer.compress(0, mask);
+    }
+
+    @Run(test = "test11")
+    public void run11(RunInfo info) {
+        int res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test11(i);
+        }
+        Asserts.assertEQ(0, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    public long test12(long value) {
+        // For constant zero input, compress is folded to zero
+        long mask = Long.min(10000L, Long.max(0L, value));
+        return Long.compress(0L, mask);
+    }
+
+    @Run(test = "test12")
+    public void run12(RunInfo info) {
+        long res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test12(i);
+        }
+        Asserts.assertEQ(0L, res);
     }
 
     public static void main(String[] args) {

From 73e1118ede2f31dcf737ba5b57eeec89e04ab6a7 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Thu, 10 Apr 2025 11:47:17 +0530
Subject: [PATCH 03/20] Review comments resolutions

---
 src/hotspot/share/opto/intrinsicnode.cpp      |  9 +++--
 .../c2/TestBitCompressValueTransform.java     | 40 +++++++++++++++++--
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 52d783c1f379b..9c6e794d3fcc0 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -271,12 +271,13 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         max_mask_bit_width = mask_bit_width;
       } else if (mask_type->hi_as_long() < -1L) {
         // Case 2) Mask value range is less than -1, this indicates presence of at least
-        // one zero bit in the mask value, there by constraining the result of compression
+        // one zero bit in the mask value, thereby constraining the result of compression
         // to a +ve value range.
         max_mask_bit_width = mask_bit_width - 1;
       } else {
-        // Case 3) Mask value range only includes +ve values, this can again be
-        // used to ascertain known Zero bits of resultant value.
+        // Case 3) Mask value range only includes +ve values, thus we can
+        // identify leading known zero bits of mask value and use this to
+        // constrain upper bound of result value range.
         assert(mask_type->lo_as_long() >= 0, "");
         // Here, result of clz is w.r.t to long argument, hence for integer argument
         // we explicitly subtract 32 from the result.
@@ -291,7 +292,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       // lower bound of the result value range to zero.
       lo = max_mask_bit_width == mask_bit_width ? lo : 0L;
 
-      // For upper bound estimation of result value range for a constant input we
+      // For upper bound estimation of result value range with a constant input we
       // pessimistically pick max_int value to prevent incorrect constant folding
       // in case input equals above estimated lower bound.
       hi = src_type->hi_as_long() == lo ? hi : src_type->hi_as_long();
diff --git a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
index f91b108ae8654..d87a75ea6ca1a 100644
--- a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
@@ -26,7 +26,7 @@
  * @bug 8350896
  * @library /test/lib /
  * @summary C2: wrong result: Integer/Long.compress gets wrong type from CompressBitsNode::Value.
- * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileThresholdScaling=0.3 compiler.c2.TestBitCompressValueTransform
+ * @run main/othervm compiler.c2.TestBitCompressValueTransform
  */
 package compiler.c2;
 
@@ -222,7 +222,7 @@ public void run10(RunInfo info) {
     }
 
     @Test
-    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " })
     public int test11(int value) {
         // For constant zero input, compress is folded to zero
         int mask = Integer.min(10000, Integer.max(0, value));
@@ -239,7 +239,7 @@ public void run11(RunInfo info) {
     }
 
     @Test
-    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " }, applyIfCPUFeature = { "bmi2", "true" })
+    @IR (counts = { IRNode.COMPRESS_BITS, " 0 " })
     public long test12(long value) {
         // For constant zero input, compress is folded to zero
         long mask = Long.min(10000L, Long.max(0L, value));
@@ -255,6 +255,40 @@ public void run12(RunInfo info) {
         Asserts.assertEQ(0L, res);
     }
 
+    @Test
+    @IR (counts = { IRNode.EXPAND_BITS, " 0 " })
+    public int test13(int value) {
+        // For constant zero input, expand is folded to zero
+        int mask = Integer.min(10000, Integer.max(0, value));
+        return Integer.expand(0, mask);
+    }
+
+    @Run(test = "test13")
+    public void run13(RunInfo info) {
+        int res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test13(i);
+        }
+        Asserts.assertEQ(0, res);
+    }
+
+    @Test
+    @IR (counts = { IRNode.EXPAND_BITS, " 0 " })
+    public long test14(long value) {
+        // For constant zero input, compress is folded to zero
+        long mask = Long.min(10000L, Long.max(0L, value));
+        return Long.expand(0L, mask);
+    }
+
+    @Run(test = "test14")
+    public void run14(RunInfo info) {
+        long res = 0;
+        for (int i = -10000; i < 100000; i++) {
+            res |= test14(i);
+        }
+        Asserts.assertEQ(0L, res);
+    }
+
     public static void main(String[] args) {
         TestFramework.run(TestBitCompressValueTransform.class);
     }

From 18b5c23944346455d6a5717d16c853c7c1422a0c Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Wed, 23 Apr 2025 13:26:14 +0530
Subject: [PATCH 04/20] Review comments resoultions

---
 src/hotspot/share/opto/intrinsicnode.cpp        | 17 +++++++++--------
 .../TestBitCompressValueTransform.java          |  4 ++--
 2 files changed, 11 insertions(+), 10 deletions(-)
 rename test/hotspot/jtreg/compiler/c2/{ => gvn}/TestBitCompressValueTransform.java (99%)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 9c6e794d3fcc0..88a224b7b55f9 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -263,40 +263,41 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
   if (!mask_type->is_con()) {
     if ( opc == Op_CompressBits) {
       // Pattern: Integer/Long.compress(src_type, mask_type)
-      int max_mask_bit_width;
+      int result_bit_width;
       int mask_bit_width = bt == T_INT ? 32 : 64;
       if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
         // Case 1) Mask value range includes -1, this negates the possibility of
         // strictly non-negative result value range.
-        max_mask_bit_width = mask_bit_width;
+        result_bit_width = mask_bit_width;
       } else if (mask_type->hi_as_long() < -1L) {
         // Case 2) Mask value range is less than -1, this indicates presence of at least
         // one zero bit in the mask value, thereby constraining the result of compression
         // to a +ve value range.
-        max_mask_bit_width = mask_bit_width - 1;
+        result_bit_width = mask_bit_width - 1;
       } else {
         // Case 3) Mask value range only includes +ve values, thus we can
         // identify leading known zero bits of mask value and use this to
         // constrain upper bound of result value range.
         assert(mask_type->lo_as_long() >= 0, "");
+        jlong clz = count_leading_zeros(mask_type->hi_as_long());
         // Here, result of clz is w.r.t to long argument, hence for integer argument
         // we explicitly subtract 32 from the result.
-        jlong clz = count_leading_zeros(mask_type->hi_as_long());
         clz = bt == T_INT ? clz - 32 : clz;
-        max_mask_bit_width = mask_bit_width - clz;
+        result_bit_width = mask_bit_width - clz;
       }
-
       // If number of bits required to accommodated mask value range is less than the
       // full bit width of integral type, then MSB bit is guaranteed to be zero, thus
       // compression result will never be a -ve value and we can safely set the
       // lower bound of the result value range to zero.
-      lo = max_mask_bit_width == mask_bit_width ? lo : 0L;
+      lo = result_bit_width == mask_bit_width ? lo : 0L;
 
+      assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");
+      assert((lo == (bt == T_INT) ? min_jint : min_jlong) || lo == 0, "");
       // For upper bound estimation of result value range with a constant input we
       // pessimistically pick max_int value to prevent incorrect constant folding
       // in case input equals above estimated lower bound.
       hi = src_type->hi_as_long() == lo ? hi : src_type->hi_as_long();
-      hi = max_mask_bit_width < mask_bit_width ? (1L << max_mask_bit_width) - 1 : hi;
+      hi = result_bit_width < mask_bit_width ? (1L << result_bit_width) - 1 : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
diff --git a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
similarity index 99%
rename from test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
rename to test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
index d87a75ea6ca1a..0030f72e66547 100644
--- a/test/hotspot/jtreg/compiler/c2/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
@@ -26,9 +26,9 @@
  * @bug 8350896
  * @library /test/lib /
  * @summary C2: wrong result: Integer/Long.compress gets wrong type from CompressBitsNode::Value.
- * @run main/othervm compiler.c2.TestBitCompressValueTransform
+ * @run driver compiler.c2.gvn.TestBitCompressValueTransform
  */
-package compiler.c2;
+package compiler.c2.gvn;
 
 import jdk.test.lib.Asserts;
 import compiler.lib.ir_framework.*;

From 73f9749a5b47693ac2a1d31add4b14d1124733fd Mon Sep 17 00:00:00 2001
From: Jatin Bhateaja <jatin.bhateja@intel.com>
Date: Mon, 5 May 2025 08:27:23 -0400
Subject: [PATCH 05/20] Fine tuning the hi bound computation

---
 src/hotspot/share/opto/intrinsicnode.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 88a224b7b55f9..6e6c927c58379 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -293,11 +293,13 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
 
       assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");
       assert((lo == (bt == T_INT) ? min_jint : min_jlong) || lo == 0, "");
-      // For upper bound estimation of result value range with a constant input we
-      // pessimistically pick max_int value to prevent incorrect constant folding
-      // in case input equals above estimated lower bound.
-      hi = src_type->hi_as_long() == lo ? hi : src_type->hi_as_long();
-      hi = result_bit_width < mask_bit_width ? (1L << result_bit_width) - 1 : hi;
+
+      // Following rules applies to upper bound estimation of results value range
+      // res.hi = src.hi iff src.hi > 0 else max_value
+      // if result_bit_width < mask_bit_width, then we can further constrain res.hi as follows.
+      // res.hi = MIN(res.hi, (1L << result_bit_width) - 1)
+      hi = src_type->hi_as_long() >= 0 ? src_type->hi_as_long() : hi;
+      hi = result_bit_width < mask_bit_width ? MIN2((1L << result_bit_width) - 1, hi) : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();

From 4c4d168802d08f4d7f1e93581e946cf77c3a85dd Mon Sep 17 00:00:00 2001
From: Jatin Bhateaja <jatin.bhateja@intel.com>
Date: Mon, 5 May 2025 10:17:50 -0400
Subject: [PATCH 06/20] Adding additional test

---
 src/hotspot/share/opto/intrinsicnode.cpp        |  2 +-
 .../c2/gvn/TestBitCompressValueTransform.java   | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 6e6c927c58379..d43ab95ad7019 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -299,7 +299,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       // if result_bit_width < mask_bit_width, then we can further constrain res.hi as follows.
       // res.hi = MIN(res.hi, (1L << result_bit_width) - 1)
       hi = src_type->hi_as_long() >= 0 ? src_type->hi_as_long() : hi;
-      hi = result_bit_width < mask_bit_width ? MIN2((1L << result_bit_width) - 1, hi) : hi;
+      hi = result_bit_width < mask_bit_width ? MIN2((jlong)((1L << result_bit_width) - 1L), hi) : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
diff --git a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
index 0030f72e66547..eb04e575d770f 100644
--- a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
@@ -289,6 +289,23 @@ public void run14(RunInfo info) {
         Asserts.assertEQ(0L, res);
     }
 
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " 1 " })
+    public int test15(int src, int mask) {
+        // src_type = [min_int + 1, -1]
+        src = Math.max(Integer.MIN_VALUE + 1, Math.min(src, -1));
+        return Integer.compress(src, mask);
+    }
+
+    @Run (test = "test15")
+    public void run15(RunInfo info) {
+        int res = 0;
+        for (int i = 0; i < 100000; i++) {
+            res |= test15(0, 0);
+        }
+        Asserts.assertEQ(0, res);
+    }
+
     public static void main(String[] args) {
         TestFramework.run(TestBitCompressValueTransform.class);
     }

From 4065fb9c89ae9661ead0e6a95fe2cbbbfd2b666b Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 30 May 2025 22:25:47 +0530
Subject: [PATCH 07/20] Review comments resolutions

---
 src/hotspot/share/opto/intrinsicnode.cpp | 123 +++++++++++++++++------
 1 file changed, 95 insertions(+), 28 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index d43ab95ad7019..f865c6dce89ac 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -238,46 +238,93 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
   jlong hi = bt == T_INT ? max_jint : max_jlong;
   jlong lo = bt == T_INT ? min_jint : min_jlong;
   assert(bt == T_INT || bt == T_LONG, "");
-  if (mask_type->is_con() && mask_type->get_con_as_long(bt) != -1L) {
+
+  // Rule 1: Bit compression selects the source bits corresponding to true mask bits,
+  // packs them and places them contiguously at destination bit positions
+  // starting from least significant bit, remaining higher order bits are set
+  // to zero.
+
+  // Rule 2: Bit expansion is a reverse process, which sequentially reads source bits
+  // starting from LSB and places them at bit positions in result value where
+  // corresponding mask bits are 1. Thus, bit expansion for non-negative mask
+  // value will always generate a +ve value, this is because sign bit of result
+  // will never be set to 1 as corresponding mask bit is always 0.
+
+  // Case A) Constant mask
+  if (mask_type->is_con()) {
+    // Case A.1 bit compression:-
+    // Result.Hi = popcount(1 << mask_bits - 1)
+    // Result.Lo = min iff mask == -1 assuming all source bits apart from most
+    //                                significant bit were set to 0
+    //       else
+    // Result.Lo = 0 iff atleast one mask bit is zero, corresponding source
+    // bit will be masked, hence result of bit compression will be a +ve
+    // value.
+    // e.g.
+    //  src = 0xXXXXXXXX (non-constant source)
+    //  mask = 0xEFFFFFFF (constant mask)
+    //  result.hi = 0x7FFFFFFF
+    //  result.lo = 0
     jlong maskcon = mask_type->get_con_as_long(bt);
-    int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
     if (opc == Op_CompressBits) {
-      // Bit compression selects the source bits corresponding to true mask bits
-      // and lays them out contiguously at destination bit positions starting from
-      // LSB, remaining higher order bits are set to zero.
-      // Thus, it will always generate a +ve value i.e. sign bit set to 0 if
-      // any bit of constant mask value is zero.
-      lo = 0L;
-      hi = (1UL << bitcount) - 1;
+      int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
+      hi = maskcon == -1L ? hi : (1UL << bitcount) - 1;
+      lo = maskcon == -1L ? lo : 0L;
     } else {
+    // Case A.2 bit expansion:-
+    //   Case A.2.1 constant mask >= 0
+    //     Result.Hi = mask, optimistically assuming all source bits
+    //     read starting from least significant bit positions are 1.
+    //     Result.Lo = 0
+    //   e.g.
+    //    src = 0xXXXXXXXX (non-constant source)
+    //    mask = 0x7FFFFFFF (constant mask >= 0)
+    //    result.hi = 0x7FFFFFFF
+    //    result.lo = 0
+
+    //   Case A.2.2) mask < 0
+    //     For constant mask strictly less than zero, maximum result value will be
+    //     same as mask value with its sign bit flipped, assuming all but last read
+    //     source bits are set to 1.
+    //
+    //     To compute minimum result value we assume all but last read source bit as zero,
+    //     this is because sign bit of result will always be set to 1 while other bit
+    //     corresponding to set mask bit should be zero.
+    //   e.g.
+    //    src = 0xXXXXXXXX (non-constant source)
+    //    mask = 0xEFFFFFFF (constant mask)
+    //    result.hi = 0xEFFFFFFF ^ 0x80000000 = 0x6FFFFFFF
+    //    result.lo = 0x80000000
+    //
       assert(opc == Op_ExpandBits, "");
-      // Expansion sequentially reads source bits starting from LSB
-      // and places them over destination at bit positions corresponding
-      // set mask bit. Thus bit expansion for non-negative mask value
-      // will always generate a +ve value.
       hi = maskcon >= 0L ? maskcon : maskcon ^ lo;
       lo = maskcon >= 0L ? 0L : lo;
     }
   }
 
+  // Case B) Non-constant mask.
   if (!mask_type->is_con()) {
     if ( opc == Op_CompressBits) {
-      // Pattern: Integer/Long.compress(src_type, mask_type)
       int result_bit_width;
       int mask_bit_width = bt == T_INT ? 32 : 64;
       if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
-        // Case 1) Mask value range includes -1, this negates the possibility of
-        // strictly non-negative result value range.
+        // Case B.1 Since mask value range includes -1 value, hence there is atleast one
+        // mask value for which iff all corresponding input bits are set then bit compression
+        // will result in a -ve value, therefore this case negates the possibility of
+        // strictly non-negative bit compression result.
         result_bit_width = mask_bit_width;
       } else if (mask_type->hi_as_long() < -1L) {
-        // Case 2) Mask value range is less than -1, this indicates presence of at least
-        // one zero bit in the mask value, thereby constraining the result of compression
-        // to a +ve value range.
+        // Case B.2 Mask value range is strictly less than -1, this indicates presence of at least
+        // one unset(zero) bit in mask value, thus as per Rule 1, bit compression will always
+        // result in a non-negative value. This guarantees that MSB bit of result value will
+        // always be set to zero.
         result_bit_width = mask_bit_width - 1;
       } else {
-        // Case 3) Mask value range only includes +ve values, thus we can
-        // identify leading known zero bits of mask value and use this to
-        // constrain upper bound of result value range.
+        // Case B.3 Mask value range only includes non-negative values. Since all integral
+        // types honours an invariant that TypeInteger._lo <= TypeInteger._hi, thus computing
+        // leading zero bits of upper bound of mask value will allow us to ascertain
+        // optimistic upper bound of result i.e. all the bits other than leading zero bits
+        // can be assumed holding 1 value.
         assert(mask_type->lo_as_long() >= 0, "");
         jlong clz = count_leading_zeros(mask_type->hi_as_long());
         // Here, result of clz is w.r.t to long argument, hence for integer argument
@@ -288,18 +335,38 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       // If number of bits required to accommodated mask value range is less than the
       // full bit width of integral type, then MSB bit is guaranteed to be zero, thus
       // compression result will never be a -ve value and we can safely set the
-      // lower bound of the result value range to zero.
+      // lower bound of bit compression to zero.
       lo = result_bit_width == mask_bit_width ? lo : 0L;
 
       assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");
       assert((lo == (bt == T_INT) ? min_jint : min_jlong) || lo == 0, "");
 
-      // Following rules applies to upper bound estimation of results value range
-      // res.hi = src.hi iff src.hi > 0 else max_value
-      // if result_bit_width < mask_bit_width, then we can further constrain res.hi as follows.
-      // res.hi = MIN(res.hi, (1L << result_bit_width) - 1)
+      // As per Rule 1, bit compression packs the source bits corresponding to
+      // set mask bits, hence for a non-negative input, result of compression will
+      // always be less that equal to input.
+      // e.g.
+      //    input = 0x0000F0F0
+      //    mask  = 0xFFFFFF00
+      // Lemma 1: For strictly non-negative input, result of compression will never be greater
+      // than input.
+      // Proof: Since input is a non-negative value, hence, its most significant bit will
+      // always be 0, thus even if corresponding MSB of mask is one results will be a +ve
+      // value. Bit compression discards the input bits corresponding zero mask bits, hence
+      // in order to consider all the set input bits, corresponding mask bits must also be
+      // set. If a mask bit corresponding to set input bit is zero then that input bit will
+      // not take part in bit compression, which means that maximum possible result value
+      // can never be greater than non-negative input.
+      //
+      // Rule 3:
+      // We can further constrain the upper bound of bit compression if number of bits which
+      // can be set to 1 is less than the maximum number of bits of integral type.
+      // by using following equation.
+      // res.hi = MIN(res.hi, (1UL << result_bit_width) - 1)
+
+      // Using Lemma 1, for non-negative input, upper bound of bit compression is equal to input.
       hi = src_type->hi_as_long() >= 0 ? src_type->hi_as_long() : hi;
-      hi = result_bit_width < mask_bit_width ? MIN2((jlong)((1L << result_bit_width) - 1L), hi) : hi;
+      // Tightening upper bound of bit compression as per Rule 3.
+      hi = result_bit_width < mask_bit_width ? MIN2((jlong)((1UL << result_bit_width) - 1L), hi) : hi;
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();

From 96ecbac1fa66d22cf95e294cda50aa4f4c3ff6be Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Mon, 2 Jun 2025 22:43:35 +0530
Subject: [PATCH 08/20] Fix aarch64 failure

---
 .../jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
index eb04e575d770f..ab575089a0f44 100644
--- a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
@@ -290,7 +290,7 @@ public void run14(RunInfo info) {
     }
 
     @Test
-    @IR (counts = { IRNode.COMPRESS_BITS, " 1 " })
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = {"bmi2" , "true"})
     public int test15(int src, int mask) {
         // src_type = [min_int + 1, -1]
         src = Math.max(Integer.MIN_VALUE + 1, Math.min(src, -1));

From 86d0a972fd8dfca708476d790a6e714777f46167 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:26:28 +0530
Subject: [PATCH 09/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 41 +++++++++---------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index f865c6dce89ac..e7b3de5f1df7e 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -341,32 +341,21 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");
       assert((lo == (bt == T_INT) ? min_jint : min_jlong) || lo == 0, "");
 
-      // As per Rule 1, bit compression packs the source bits corresponding to
-      // set mask bits, hence for a non-negative input, result of compression will
-      // always be less that equal to input.
-      // e.g.
-      //    input = 0x0000F0F0
-      //    mask  = 0xFFFFFF00
-      // Lemma 1: For strictly non-negative input, result of compression will never be greater
-      // than input.
-      // Proof: Since input is a non-negative value, hence, its most significant bit will
-      // always be 0, thus even if corresponding MSB of mask is one results will be a +ve
-      // value. Bit compression discards the input bits corresponding zero mask bits, hence
-      // in order to consider all the set input bits, corresponding mask bits must also be
-      // set. If a mask bit corresponding to set input bit is zero then that input bit will
-      // not take part in bit compression, which means that maximum possible result value
-      // can never be greater than non-negative input.
-      //
-      // Rule 3:
-      // We can further constrain the upper bound of bit compression if number of bits which
-      // can be set to 1 is less than the maximum number of bits of integral type.
-      // by using following equation.
-      // res.hi = MIN(res.hi, (1UL << result_bit_width) - 1)
-
-      // Using Lemma 1, for non-negative input, upper bound of bit compression is equal to input.
-      hi = src_type->hi_as_long() >= 0 ? src_type->hi_as_long() : hi;
-      // Tightening upper bound of bit compression as per Rule 3.
-      hi = result_bit_width < mask_bit_width ? MIN2((jlong)((1UL << result_bit_width) - 1L), hi) : hi;
+      if (src_type->hi_as_long() >= 0) {
+        // Lemma 1: For strictly non-negative src, the result of the compression will never be
+        // greater than src.
+        // Proof: Since src is a non-negative value, its most significant bit is always 0.
+        // Thus even if the corresponding MSB of the mask is one, the result will be a +ve
+        // value. <the actual proof>
+        hi = src_type->hi_as_long();
+      }
+
+      if (result_bit_width < mask_bit_width) {
+        // Rule 3:
+        // We can further constrain the upper bound of bit compression if the number of bits
+        // which can be set to 1 is less than the maximum number of bits of integral type.
+        hi = MIN2((jlong)((1UL << result_bit_width) - 1L), hi);
+      }
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();

From 89ec8f74b204a09450bef7eae8dec61aab074943 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:26:43 +0530
Subject: [PATCH 10/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index e7b3de5f1df7e..3bd03c933a2a7 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -332,10 +332,10 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         clz = bt == T_INT ? clz - 32 : clz;
         result_bit_width = mask_bit_width - clz;
       }
-      // If number of bits required to accommodated mask value range is less than the
-      // full bit width of integral type, then MSB bit is guaranteed to be zero, thus
-      // compression result will never be a -ve value and we can safely set the
-      // lower bound of bit compression to zero.
+      // If the number of bits required to for the mask value range is less than the
+      // full bit width of the integral type, then the MSB bit is guaranteed to be zero,
+      // thus the compression result will never be a -ve value and we can safely set the
+      // lower bound of the bit compression to zero.
       lo = result_bit_width == mask_bit_width ? lo : 0L;
 
       assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");

From ca6c770cd7431002f2c81f9dafdcc2ca2292cb13 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:26:53 +0530
Subject: [PATCH 11/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 3bd03c933a2a7..7cb48e2055ffb 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -320,12 +320,12 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         // always be set to zero.
         result_bit_width = mask_bit_width - 1;
       } else {
+        assert(mask_type->lo_as_long() >= 0, "");
         // Case B.3 Mask value range only includes non-negative values. Since all integral
         // types honours an invariant that TypeInteger._lo <= TypeInteger._hi, thus computing
         // leading zero bits of upper bound of mask value will allow us to ascertain
         // optimistic upper bound of result i.e. all the bits other than leading zero bits
         // can be assumed holding 1 value.
-        assert(mask_type->lo_as_long() >= 0, "");
         jlong clz = count_leading_zeros(mask_type->hi_as_long());
         // Here, result of clz is w.r.t to long argument, hence for integer argument
         // we explicitly subtract 32 from the result.

From b00b855f1d8fe8425781f085a4e34ee2a0be696c Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:27:05 +0530
Subject: [PATCH 12/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 7cb48e2055ffb..437cbc07f40d4 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -308,10 +308,8 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       int result_bit_width;
       int mask_bit_width = bt == T_INT ? 32 : 64;
       if ((mask_type->lo_as_long() < 0L && mask_type->hi_as_long() >= -1L)) {
-        // Case B.1 Since mask value range includes -1 value, hence there is atleast one
-        // mask value for which iff all corresponding input bits are set then bit compression
-        // will result in a -ve value, therefore this case negates the possibility of
-        // strictly non-negative bit compression result.
+        // Case B.1 The mask value range includes -1, hence we may use all bits,
+        //          the result has the whole value range.
         result_bit_width = mask_bit_width;
       } else if (mask_type->hi_as_long() < -1L) {
         // Case B.2 Mask value range is strictly less than -1, this indicates presence of at least

From 5674605be09f94b221704655af3954839812ef0d Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:27:57 +0530
Subject: [PATCH 13/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 437cbc07f40d4..d4c774c7fc357 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -283,9 +283,9 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
     //    result.lo = 0
 
     //   Case A.2.2) mask < 0
-    //     For constant mask strictly less than zero, maximum result value will be
-    //     same as mask value with its sign bit flipped, assuming all but last read
-    //     source bits are set to 1.
+    //     For constant mask strictly less than zero, the maximum result value will be
+    //     the same as the mask value with its sign bit flipped, assuming all source bits but the last
+    //     are set to 1.
     //
     //     To compute minimum result value we assume all but last read source bit as zero,
     //     this is because sign bit of result will always be set to 1 while other bit

From f00634a40499f9bab678ad74ea7171d716f01fef Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 18:28:13 +0530
Subject: [PATCH 14/20] Update src/hotspot/share/opto/intrinsicnode.cpp

Co-authored-by: Emanuel Peter <emanuel.peter@oracle.com>
---
 src/hotspot/share/opto/intrinsicnode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index d4c774c7fc357..305b3ea4d1328 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -275,7 +275,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
     //   Case A.2.1 constant mask >= 0
     //     Result.Hi = mask, optimistically assuming all source bits
     //     read starting from least significant bit positions are 1.
-    //     Result.Lo = 0
+    //     Result.Lo = 0, because at least one bit in mask is zero.
     //   e.g.
     //    src = 0xXXXXXXXX (non-constant source)
     //    mask = 0x7FFFFFFF (constant mask >= 0)

From c79efe092a48e199fb3b2cc8d7ca2e841f6b7d3a Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Fri, 11 Jul 2025 07:11:03 -0700
Subject: [PATCH 15/20] Update test

---
 .../c2/gvn/TestBitCompressValueTransform.java | 364 ++++++++++++++++++
 1 file changed, 364 insertions(+)

diff --git a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
index ab575089a0f44..1154b65f224e7 100644
--- a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
@@ -32,6 +32,7 @@
 
 import jdk.test.lib.Asserts;
 import compiler.lib.ir_framework.*;
+import compiler.lib.generators.*;
 
 public class TestBitCompressValueTransform {
 
@@ -40,6 +41,33 @@ public class TestBitCompressValueTransform {
     public static final int  gold_I = Integer.valueOf(Integer.compress(0x8000_0000, field_I));
     public static final long gold_L = Long.valueOf(Long.compress(0x8000_0000_0000_0000L, field_L));
 
+    public static RestrictableGenerator<Integer> GEN_I = Generators.G.ints();
+    public static RestrictableGenerator<Long> GEN_L = Generators.G.longs();
+
+    public final int LIMIT_I1 = GEN_I.next();
+    public final int LIMIT_I2 = GEN_I.next();
+    public final int LIMIT_I3 = GEN_I.next();
+    public final int LIMIT_I4 = GEN_I.next();
+    public final int LIMIT_I5 = GEN_I.next();
+    public final int LIMIT_I6 = GEN_I.next();
+    public final int LIMIT_I7 = GEN_I.next();
+    public final int LIMIT_I8 = GEN_I.next();
+
+    public final long LIMIT_L1 = GEN_L.next();
+    public final long LIMIT_L2 = GEN_L.next();
+    public final long LIMIT_L3 = GEN_L.next();
+    public final long LIMIT_L4 = GEN_L.next();
+    public final long LIMIT_L5 = GEN_L.next();
+    public final long LIMIT_L6 = GEN_L.next();
+    public final long LIMIT_L7 = GEN_L.next();
+    public final long LIMIT_L8 = GEN_L.next();
+
+    public final int BOUND_LO_I = GEN_I.next();
+    public final int BOUND_HI_I = GEN_I.next();
+
+    public final long BOUND_LO_L = GEN_L.next();
+    public final long BOUND_HI_L = GEN_L.next();
+
     @Test
     @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = { "bmi2", "true" })
     public long test1(long value) {
@@ -306,6 +334,342 @@ public void run15(RunInfo info) {
         Asserts.assertEQ(0, res);
     }
 
+    @DontCompile
+    public int test16_interpreted(int src, int mask) {
+        src = Math.max(BOUND_LO_I, Math.min(src, BOUND_HI_I));
+        int res = Integer.compress(src, mask);
+
+        if (res > LIMIT_I1) {
+            res += 1;
+        }
+        if (res > LIMIT_I2) {
+            res += 2;
+        }
+        if (res > LIMIT_I3) {
+            res += 4;
+        }
+        if (res > LIMIT_I4) {
+            res += 8;
+        }
+        if (res > LIMIT_I5) {
+            res += 16;
+        }
+        if (res > LIMIT_I6) {
+            res += 32;
+        }
+        if (res > LIMIT_I7) {
+            res += 64;
+        }
+        if (res > LIMIT_I8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = {"bmi2" , "true"})
+    public int test16(int src, int mask) {
+        src = Math.max(BOUND_LO_I, Math.min(src, BOUND_HI_I));
+        int res = Integer.compress(src, mask);
+
+        // Check the result with some random value ranges, if any of the
+        // following conditions incorrectly constant folds the result will
+        // not comply with the interpreter.
+
+        if (res > LIMIT_I1) {
+            res += 1;
+        }
+        if (res > LIMIT_I2) {
+            res += 2;
+        }
+        if (res > LIMIT_I3) {
+            res += 4;
+        }
+        if (res > LIMIT_I4) {
+            res += 8;
+        }
+        if (res > LIMIT_I5) {
+            res += 16;
+        }
+        if (res > LIMIT_I6) {
+            res += 32;
+        }
+        if (res > LIMIT_I7) {
+            res += 64;
+        }
+        if (res > LIMIT_I8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Run (test = "test16")
+    public void run16(RunInfo info) {
+        int actual = 0;
+        int expected = 0;
+
+        for (int i = 0; i < 10000; i++) {
+            int arg1 = GEN_I.next();
+            int arg2 = GEN_I.next();
+
+            actual += test16(arg1, arg2);
+            expected += test16_interpreted(arg1, arg2);
+        }
+        Asserts.assertEQ(actual, expected);
+    }
+
+    @DontCompile
+    public int test17_interpreted(int src, int mask) {
+        src = Math.max(BOUND_LO_I, Math.min(src, BOUND_HI_I));
+        int res = Integer.expand(src, mask);
+
+        if (res > LIMIT_I1) {
+            res += 1;
+        }
+        if (res > LIMIT_I2) {
+            res += 2;
+        }
+        if (res > LIMIT_I3) {
+            res += 4;
+        }
+        if (res > LIMIT_I4) {
+            res += 8;
+        }
+        if (res > LIMIT_I5) {
+            res += 16;
+        }
+        if (res > LIMIT_I6) {
+            res += 32;
+        }
+        if (res > LIMIT_I7) {
+            res += 64;
+        }
+        if (res > LIMIT_I8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Test
+    @IR (counts = { IRNode.EXPAND_BITS, " >0 " }, applyIfCPUFeature = {"bmi2" , "true"})
+    public int test17(int src, int mask) {
+        src = Math.max(BOUND_LO_I, Math.min(src, BOUND_HI_I));
+        int res = Integer.expand(src, mask);
+
+        // Check the result with some random value ranges, if any of the
+        // following conditions incorrectly constant folds the result will
+        // not comply with the interpreter.
+
+        if (res > LIMIT_I1) {
+            res += 1;
+        }
+        if (res > LIMIT_I2) {
+            res += 2;
+        }
+        if (res > LIMIT_I3) {
+            res += 4;
+        }
+        if (res > LIMIT_I4) {
+            res += 8;
+        }
+        if (res > LIMIT_I5) {
+            res += 16;
+        }
+        if (res > LIMIT_I6) {
+            res += 32;
+        }
+        if (res > LIMIT_I7) {
+            res += 64;
+        }
+        if (res > LIMIT_I8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Run (test = "test17")
+    public void run17(RunInfo info) {
+        int actual = 0;
+        int expected = 0;
+
+        for (int i = 0; i < 10000; i++) {
+            int arg1 = GEN_I.next();
+            int arg2 = GEN_I.next();
+
+            actual += test16(arg1, arg2);
+            expected += test16_interpreted(arg1, arg2);
+        }
+        Asserts.assertEQ(actual, expected);
+    }
+
+    @DontCompile
+    public long test18_interpreted(long src, long mask) {
+        src = Math.max(BOUND_LO_L, Math.min(src, BOUND_HI_L));
+        long res = Long.compress(src, mask);
+
+        if (res > LIMIT_L1) {
+            res += 1;
+        }
+        if (res > LIMIT_L2) {
+            res += 2;
+        }
+        if (res > LIMIT_L3) {
+            res += 4;
+        }
+        if (res > LIMIT_L4) {
+            res += 8;
+        }
+        if (res > LIMIT_L5) {
+            res += 16;
+        }
+        if (res > LIMIT_L6) {
+            res += 32;
+        }
+        if (res > LIMIT_L7) {
+            res += 64;
+        }
+        if (res > LIMIT_L8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Test
+    @IR (counts = { IRNode.COMPRESS_BITS, " >0 " }, applyIfCPUFeature = {"bmi2" , "true"})
+    public long test18(long src, long mask) {
+        src = Math.max(BOUND_LO_L, Math.min(src, BOUND_HI_L));
+        long res = Long.compress(src, mask);
+
+        // Check the result with some random value ranges, if any of the
+        // following conditions incorrectly constant folds the result will
+        // not comply with the interpreter.
+
+        if (res > LIMIT_L1) {
+            res += 1;
+        }
+        if (res > LIMIT_L2) {
+            res += 2;
+        }
+        if (res > LIMIT_L3) {
+            res += 4;
+        }
+        if (res > LIMIT_L4) {
+            res += 8;
+        }
+        if (res > LIMIT_L5) {
+            res += 16;
+        }
+        if (res > LIMIT_L6) {
+            res += 32;
+        }
+        if (res > LIMIT_L7) {
+            res += 64;
+        }
+        if (res > LIMIT_L8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Run (test = "test18")
+    public void run18(RunInfo info) {
+        long actual = 0;
+        long expected = 0;
+
+        for (int i = 0; i < 10000; i++) {
+            long arg1 = GEN_L.next();
+            long arg2 = GEN_L.next();
+
+            actual += test18(arg1, arg2);
+            expected += test18_interpreted(arg1, arg2);
+        }
+        Asserts.assertEQ(actual, expected);
+    }
+
+    @DontCompile
+    public long test19_interpreted(long src, long mask) {
+        src = Math.max(BOUND_LO_L, Math.min(src, BOUND_HI_L));
+        long res = Long.expand(src, mask);
+
+        if (res > LIMIT_L1) {
+            res += 1;
+        }
+        if (res > LIMIT_L2) {
+            res += 2;
+        }
+        if (res > LIMIT_L3) {
+            res += 4;
+        }
+        if (res > LIMIT_L4) {
+            res += 8;
+        }
+        if (res > LIMIT_L5) {
+            res += 16;
+        }
+        if (res > LIMIT_L6) {
+            res += 32;
+        }
+        if (res > LIMIT_L7) {
+            res += 64;
+        }
+        if (res > LIMIT_L8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Test
+    @IR (counts = { IRNode.EXPAND_BITS, " >0 " }, applyIfCPUFeature = {"bmi2" , "true"})
+    public long test19(long src, long mask) {
+        src = Math.max(BOUND_LO_L, Math.min(src, BOUND_HI_L));
+        long res = Long.expand(src, mask);
+
+        // Check the result with some random value ranges, if any of the
+        // following conditions incorrectly constant folds the result will
+        // not comply with the interpreter.
+
+        if (res > LIMIT_L1) {
+            res += 1;
+        }
+        if (res > LIMIT_L2) {
+            res += 2;
+        }
+        if (res > LIMIT_L3) {
+            res += 4;
+        }
+        if (res > LIMIT_L4) {
+            res += 8;
+        }
+        if (res > LIMIT_L5) {
+            res += 16;
+        }
+        if (res > LIMIT_L6) {
+            res += 32;
+        }
+        if (res > LIMIT_L7) {
+            res += 64;
+        }
+        if (res > LIMIT_L8) {
+            res += 128;
+        }
+        return res;
+    }
+
+    @Run (test = "test19")
+    public void run19(RunInfo info) {
+        long actual = 0;
+        long expected = 0;
+
+        for (int i = 0; i < 10000; i++) {
+            long arg1 = GEN_L.next();
+            long arg2 = GEN_L.next();
+
+            actual += test19(arg1, arg2);
+            expected += test19_interpreted(arg1, arg2);
+        }
+        Asserts.assertEQ(actual, expected);
+    }
+
     public static void main(String[] args) {
         TestFramework.run(TestBitCompressValueTransform.class);
     }

From 7be678b555b8d6c52b0716d93977eed566808d9e Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Mon, 14 Jul 2025 17:22:27 +0530
Subject: [PATCH 16/20] Review comments resolutions

---
 src/hotspot/share/opto/intrinsicnode.cpp      | 125 +++++++++++-------
 .../c2/gvn/TestBitCompressValueTransform.java |  30 ++---
 2 files changed, 94 insertions(+), 61 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 305b3ea4d1328..ff26edd571869 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -252,53 +252,68 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
 
   // Case A) Constant mask
   if (mask_type->is_con()) {
-    // Case A.1 bit compression:-
-    // Result.Hi = popcount(1 << mask_bits - 1)
-    // Result.Lo = min iff mask == -1 assuming all source bits apart from most
-    //                                significant bit were set to 0
-    //       else
-    // Result.Lo = 0 iff atleast one mask bit is zero, corresponding source
-    // bit will be masked, hence result of bit compression will be a +ve
-    // value.
-    // e.g.
-    //  src = 0xXXXXXXXX (non-constant source)
-    //  mask = 0xEFFFFFFF (constant mask)
-    //  result.hi = 0x7FFFFFFF
-    //  result.lo = 0
     jlong maskcon = mask_type->get_con_as_long(bt);
     if (opc == Op_CompressBits) {
+      // Case A.1 bit compression:-
+      // For an outlier mask value of -1 upper bound of the result equals
+      // maximum integral value, for any other mask value its computed using
+      // following formula
+      //       Result.Hi = 1 << popcount(mask_bits) - 1
+      //
+      // For mask values other than -1, lower bound of the result is estimated
+      // as zero, by assuming at least one mask bit is zero and corresponding source
+      // bit will be masked, hence result of bit compression will always be
+      // non-negative value. For outlier mask value of -1, assume all source bits
+      // apart from most significant bit were set to 0, thereby resulting in
+      // a minimum integral value.
+      // e.g.
+      //  src = 0xXXXXXXXX (non-constant source)
+      //  mask = 0xEFFFFFFF (constant mask)
+      //  result.hi = 0x7FFFFFFF
+      //  result.lo = 0
       int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
-      hi = maskcon == -1L ? hi : (1UL << bitcount) - 1;
-      lo = maskcon == -1L ? lo : 0L;
+      if (maskcon != -1L) {
+        hi = (1UL << bitcount) - 1;
+        lo = 0L;
+      } else {
+        // preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values.
+        assert(hi == (T_INT ? max_jint : max_jlong), "");
+        assert(lo == (T_INT ? min_jint : min_jlong), "");
+      }
     } else {
-    // Case A.2 bit expansion:-
-    //   Case A.2.1 constant mask >= 0
-    //     Result.Hi = mask, optimistically assuming all source bits
-    //     read starting from least significant bit positions are 1.
-    //     Result.Lo = 0, because at least one bit in mask is zero.
-    //   e.g.
-    //    src = 0xXXXXXXXX (non-constant source)
-    //    mask = 0x7FFFFFFF (constant mask >= 0)
-    //    result.hi = 0x7FFFFFFF
-    //    result.lo = 0
-
-    //   Case A.2.2) mask < 0
-    //     For constant mask strictly less than zero, the maximum result value will be
-    //     the same as the mask value with its sign bit flipped, assuming all source bits but the last
-    //     are set to 1.
-    //
-    //     To compute minimum result value we assume all but last read source bit as zero,
-    //     this is because sign bit of result will always be set to 1 while other bit
-    //     corresponding to set mask bit should be zero.
-    //   e.g.
-    //    src = 0xXXXXXXXX (non-constant source)
-    //    mask = 0xEFFFFFFF (constant mask)
-    //    result.hi = 0xEFFFFFFF ^ 0x80000000 = 0x6FFFFFFF
-    //    result.lo = 0x80000000
-    //
+      // Case A.2 bit expansion:-
       assert(opc == Op_ExpandBits, "");
-      hi = maskcon >= 0L ? maskcon : maskcon ^ lo;
-      lo = maskcon >= 0L ? 0L : lo;
+      if (maskcon >= 0L) {
+        //   Case A.2.1 constant mask >= 0
+        //     Result.Hi = mask, optimistically assuming all source bits
+        //     read starting from least significant bit positions are 1.
+        //     Result.Lo = 0, because at least one bit in mask is zero.
+        //   e.g.
+        //    src = 0xXXXXXXXX (non-constant source)
+        //    mask = 0x7FFFFFFF (constant mask >= 0)
+        //    result.hi = 0x7FFFFFFF
+        //    result.lo = 0
+        hi = maskcon;
+        lo = 0L;
+      } else {
+        //   Case A.2.2) mask < 0
+        //     For constant mask strictly less than zero, the maximum result value will be
+        //     the same as the mask value with its sign bit flipped, assuming all source bits
+        //     except the MSB bit are set(one).
+        //
+        //     To compute minimum result value we assume all but last read source bit as zero,
+        //     this is because sign bit of result will always be set to 1 while other bit
+        //     corresponding to set mask bit should be zero.
+        //   e.g.
+        //    src = 0xXXXXXXXX (non-constant source)
+        //    mask = 0xEFFFFFFF (constant mask)
+        //    result.hi = 0xEFFFFFFF ^ 0x80000000 = 0x6FFFFFFF
+        //    result.lo = 0x80000000
+        //
+        hi = maskcon ^ lo;
+        // lo still retains MIN_INT/LONG.
+        assert(lo == (T_INT ? min_jint : min_jlong), "");
+      }
     }
   }
 
@@ -344,23 +359,41 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         // greater than src.
         // Proof: Since src is a non-negative value, its most significant bit is always 0.
         // Thus even if the corresponding MSB of the mask is one, the result will be a +ve
-        // value. <the actual proof>
+        // value. There are three possible cases
+        //   a. All the mask bits corresponding to set source bits are unset(zero).
+        //   b. All the mask bits corresponding to set source bits are set(one)
+        //   c. Some mask bits corresponding to set source bits are set(one) while others are unset(zero)
+        //
+        // Case a. results into an allzero result, while Case b. gives us the upper bound which is equals source
+        // value, while for Case c. the result will lie within [0, src]
+        //
         hi = src_type->hi_as_long();
       }
 
       if (result_bit_width < mask_bit_width) {
         // Rule 3:
         // We can further constrain the upper bound of bit compression if the number of bits
-        // which can be set to 1 is less than the maximum number of bits of integral type.
+        // which can be set(one) is less than the maximum number of bits of integral type.
         hi = MIN2((jlong)((1UL << result_bit_width) - 1L), hi);
       }
     } else {
       assert(opc == Op_ExpandBits, "");
       jlong max_mask = mask_type->hi_as_long();
+      jlong min_mask = mask_type->lo_as_long();
       // Since mask here a range and not a constant value, hence being
       // conservative in determining the value range of result.
-      lo = mask_type->lo_as_long() >= 0L ? 0L : lo;
-      hi = mask_type->lo_as_long() >= 0L ? max_mask : hi;
+      if (min_mask >= 0L) {
+        // Lemma 2: Based on the integral type invariant ie. TypeInteger.lo <= TypeInteger.hi,
+        // if the lower bound of non-constant mask is a non-negative value then result can never
+        // be greater than the mask.
+        // Proof: Since lower bound of the mask is a non-negative value, hence most significant
+        // bit of its entire value must be unset(zero). Similar to the proof of Lemma1, upper and
+        // lower bounds of result will always match the bounds of the mask value range.
+        hi = max_mask;
+        lo = min_mask;
+      } else {
+        // preserve the lo and hi bounds estimated till now.
+      }
     }
   }
 
diff --git a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
index 1154b65f224e7..98f26f120b191 100644
--- a/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
+++ b/test/hotspot/jtreg/compiler/c2/gvn/TestBitCompressValueTransform.java
@@ -77,7 +77,7 @@ public long test1(long value) {
     @Run(test = "test1")
     public void run1(RunInfo info) {
         long res = 0;
-        for (int i = 0; i < 100000; i++) {
+        for (int i = 0; i < 10000; i++) {
             res |= test1(field_L);
         }
         Asserts.assertEQ(res, gold_L);
@@ -93,7 +93,7 @@ public int test2(int value) {
     @Run(test = "test2")
     public void run2(RunInfo info) {
         int res = 0;
-        for (int i = 0; i < 100000; i++) {
+        for (int i = 0; i < 10000; i++) {
             res |= test2(field_I);
         }
         Asserts.assertEQ(res, gold_I);
@@ -113,7 +113,7 @@ public int test3(int value) {
     @Run(test = "test3")
     public void run3(RunInfo info) {
         int res = 0;
-        for (int i = 1; i < 100000; i++) {
+        for (int i = 1; i < 10000; i++) {
             res |= test3(i);
         }
         Asserts.assertLTE(0, res);
@@ -133,7 +133,7 @@ public long test4(long value) {
     @Run(test = "test4")
     public void run4(RunInfo info) {
         long res = 0;
-        for (long i = 1; i < 100000; i++) {
+        for (long i = 1; i < 10000; i++) {
             res |= test4(i);
         }
         Asserts.assertLTE(0L, res);
@@ -151,7 +151,7 @@ public long test5(long value) {
     @Run(test = "test5")
     public void run5(RunInfo info) {
         long res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test5((long)i);
         }
         Asserts.assertEQ(-1L, res);
@@ -169,7 +169,7 @@ public long test6(long value) {
     @Run(test = "test6")
     public void run6(RunInfo info) {
         long res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test6((long)i);
         }
         Asserts.assertLTE(0L, res);
@@ -188,7 +188,7 @@ public long test7(long value) {
     @Run(test = "test7")
     public void run7(RunInfo info) {
         long res = Long.MIN_VALUE;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res = Long.max(test7((long)i), res);
         }
         Asserts.assertGTE(10000L, res);
@@ -206,7 +206,7 @@ public int test8(int value) {
     @Run(test = "test8")
     public void run8(RunInfo info) {
         int res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test8(i);
         }
         Asserts.assertEQ(-1, res);
@@ -224,7 +224,7 @@ public int test9(int value) {
     @Run(test = "test9")
     public void run9(RunInfo info) {
         int res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test9(i);
         }
         Asserts.assertLTE(0, res);
@@ -243,7 +243,7 @@ public int test10(int value) {
     @Run(test = "test10")
     public void run10(RunInfo info) {
         int res = Integer.MIN_VALUE;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res = Integer.max(test10(i), res);
         }
         Asserts.assertGTE(10000, res);
@@ -260,7 +260,7 @@ public int test11(int value) {
     @Run(test = "test11")
     public void run11(RunInfo info) {
         int res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test11(i);
         }
         Asserts.assertEQ(0, res);
@@ -277,7 +277,7 @@ public long test12(long value) {
     @Run(test = "test12")
     public void run12(RunInfo info) {
         long res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test12(i);
         }
         Asserts.assertEQ(0L, res);
@@ -294,7 +294,7 @@ public int test13(int value) {
     @Run(test = "test13")
     public void run13(RunInfo info) {
         int res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test13(i);
         }
         Asserts.assertEQ(0, res);
@@ -311,7 +311,7 @@ public long test14(long value) {
     @Run(test = "test14")
     public void run14(RunInfo info) {
         long res = 0;
-        for (int i = -10000; i < 100000; i++) {
+        for (int i = -10000; i < 10000; i++) {
             res |= test14(i);
         }
         Asserts.assertEQ(0L, res);
@@ -328,7 +328,7 @@ public int test15(int src, int mask) {
     @Run (test = "test15")
     public void run15(RunInfo info) {
         int res = 0;
-        for (int i = 0; i < 100000; i++) {
+        for (int i = 0; i < 10000; i++) {
             res |= test15(0, 0);
         }
         Asserts.assertEQ(0, res);

From 06eafe7712833d830bbd60cdb729ad261eca59b8 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Mon, 14 Jul 2025 18:49:25 +0530
Subject: [PATCH 17/20] Broken assertions fix

---
 src/hotspot/share/opto/intrinsicnode.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index ff26edd571869..a9f983ef37f24 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -277,8 +277,8 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         lo = 0L;
       } else {
         // preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values.
-        assert(hi == (T_INT ? max_jint : max_jlong), "");
-        assert(lo == (T_INT ? min_jint : min_jlong), "");
+        assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
+        assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
       }
     } else {
       // Case A.2 bit expansion:-
@@ -312,7 +312,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         //
         hi = maskcon ^ lo;
         // lo still retains MIN_INT/LONG.
-        assert(lo == (T_INT ? min_jint : min_jlong), "");
+        assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
       }
     }
   }
@@ -351,8 +351,8 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       // lower bound of the bit compression to zero.
       lo = result_bit_width == mask_bit_width ? lo : 0L;
 
-      assert(hi == (bt == T_INT) ? max_jint : max_jlong, "");
-      assert((lo == (bt == T_INT) ? min_jint : min_jlong) || lo == 0, "");
+      assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
+      assert(lo == (bt == T_INT ? min_jint : min_jlong) || lo == 0, "");
 
       if (src_type->hi_as_long() >= 0) {
         // Lemma 1: For strictly non-negative src, the result of the compression will never be

From 4f33d4b46d41e10486f3ee3783009eb4ddce07cd Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@gmail.com>
Date: Wed, 16 Jul 2025 15:50:21 +0530
Subject: [PATCH 18/20] Refine lower bound computation

---
 src/hotspot/share/opto/intrinsicnode.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index a9f983ef37f24..3444d408b4d54 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -271,12 +271,13 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       //  mask = 0xEFFFFFFF (constant mask)
       //  result.hi = 0x7FFFFFFF
       //  result.lo = 0
-      int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
       if (maskcon != -1L) {
+        int bitcount = population_count(static_cast<julong>(bt == T_INT ? maskcon & 0xFFFFFFFFL : maskcon));
         hi = (1UL << bitcount) - 1;
         lo = 0L;
       } else {
-        // preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values.
+        // preserve originally assigned hi (MAX_INT/LONG) and lo (MIN_INT/LONG) values
+        // for unknown source bits.
         assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
         assert(lo == (bt == T_INT ? min_jint : min_jlong), "");
       }
@@ -387,10 +388,12 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         // if the lower bound of non-constant mask is a non-negative value then result can never
         // be greater than the mask.
         // Proof: Since lower bound of the mask is a non-negative value, hence most significant
-        // bit of its entire value must be unset(zero). Similar to the proof of Lemma1, upper and
-        // lower bounds of result will always match the bounds of the mask value range.
+        // bit of its entire value must be unset(zero). If all the lower order 'n' source bits
+        // where n corresponds to popcount of mask are set(ones) then upper bound of the result equals
+        // mask. In order to compute the lower bound, we pssimistically assume all the lower order 'n'
+        // source bits are unset(zero) there by resuling into a zero value.
         hi = max_mask;
-        lo = min_mask;
+        lo = 0;
       } else {
         // preserve the lo and hi bounds estimated till now.
       }

From 161487e692b1ffed1ba40332a599c0780e030304 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatinbha.cloud@gmail.com>
Date: Tue, 22 Jul 2025 15:50:40 +0530
Subject: [PATCH 19/20] Update intrinsicnode.cpp

---
 src/hotspot/share/opto/intrinsicnode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 3444d408b4d54..24f53b749d7ec 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -355,7 +355,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
       assert(hi == (bt == T_INT ? max_jint : max_jlong), "");
       assert(lo == (bt == T_INT ? min_jint : min_jlong) || lo == 0, "");
 
-      if (src_type->hi_as_long() >= 0) {
+      if (src_type->lo_as_long() >= 0) {
         // Lemma 1: For strictly non-negative src, the result of the compression will never be
         // greater than src.
         // Proof: Since src is a non-negative value, its most significant bit is always 0.

From 68e24cf835331889bd010dbf1361f5e3ed1225b5 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatinbha.cloud@gmail.com>
Date: Wed, 23 Jul 2025 07:19:21 +0530
Subject: [PATCH 20/20] Update intrinsicnode.cpp

---
 src/hotspot/share/opto/intrinsicnode.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/hotspot/share/opto/intrinsicnode.cpp b/src/hotspot/share/opto/intrinsicnode.cpp
index 24f53b749d7ec..febff7b740bf0 100644
--- a/src/hotspot/share/opto/intrinsicnode.cpp
+++ b/src/hotspot/share/opto/intrinsicnode.cpp
@@ -369,6 +369,7 @@ static const Type* bitshuffle_value(const TypeInteger* src_type, const TypeInteg
         // value, while for Case c. the result will lie within [0, src]
         //
         hi = src_type->hi_as_long();
+        lo = 0L;
       }
 
       if (result_bit_width < mask_bit_width) {