8333583: Crypto-XDH.generateSecret regression after JDK-8329538

vpaprotsk · Sandhya Viswanathan · commit f101e153cee6 · 2024-06-25T22:31:39.000Z
Reviewed-by: sviswanathan, kvn, ascarpino
diff --git a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
@@ -778,7 +778,7 @@ private String generate(FieldParams params) throws IOException {
         result.appendLine("}");
 
         result.appendLine("@Override");
-        result.appendLine("protected int mult(long[] a, long[] b, long[] r) {");
+        result.appendLine("protected void mult(long[] a, long[] b, long[] r) {");
         result.incrIndent();
         for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
             result.appendIndent();
@@ -804,9 +804,6 @@ private String generate(FieldParams params) throws IOException {
             }
         }
         result.append(");\n");
-        result.appendIndent();
-        result.append("return 0;");
-        result.appendLine();
         result.decrIndent();
         result.appendLine("}");
 
@@ -836,7 +833,7 @@ private String generate(FieldParams params) throws IOException {
         //      }
         //  }
         result.appendLine("@Override");
-        result.appendLine("protected int square(long[] a, long[] r) {");
+        result.appendLine("protected void square(long[] a, long[] r) {");
         result.incrIndent();
         for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
             result.appendIndent();
@@ -877,9 +874,6 @@ private String generate(FieldParams params) throws IOException {
             }
         }
         result.append(");\n");
-        result.appendIndent();
-        result.append("return 0;");
-        result.appendLine();
         result.decrIndent();
         result.appendLine("}");
 
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
@@ -249,7 +249,6 @@ address StubGenerator::generate_intpoly_montgomeryMult_P256() {
   const Register tmp     = r9;
 
   montgomeryMultiply(aLimbs, bLimbs, rLimbs, tmp, _masm);
-  __ mov64(rax, 0x1); // Return 1 (Fig. 5, Step 6 [1] skipped in montgomeryMultiply)
 
   __ leave();
   __ ret(0);
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -529,8 +529,8 @@ class methodHandle;
   /* support for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256 */                                      \
   do_class(sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, "sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256")  \
   do_intrinsic(_intpoly_montgomeryMult_P256, sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, intPolyMult_name, intPolyMult_signature, F_R) \
-  do_name(intPolyMult_name, "mult")                                                                                     \
-  do_signature(intPolyMult_signature, "([J[J[J)I")                                                                      \
+  do_name(intPolyMult_name, "multImpl")                                                                                     \
+  do_signature(intPolyMult_signature, "([J[J[J)V")                                                                      \
                                                                                                                         \
   do_class(sun_security_util_math_intpoly_IntegerPolynomial, "sun/security/util/math/intpoly/IntegerPolynomial")        \
   do_intrinsic(_intpoly_assign, sun_security_util_math_intpoly_IntegerPolynomial, intPolyAssign_name, intPolyAssign_signature, F_S) \
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
@@ -7580,8 +7580,6 @@ bool LibraryCallKit::inline_intpoly_montgomeryMult_P256() {
                                  OptoRuntime::intpoly_montgomeryMult_P256_Type(),
                                  stubAddr, stubName, TypePtr::BOTTOM,
                                  a_start, b_start, r_start);
-  Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
-  set_result(result);
   return true;
 }
 
diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp
@@ -1435,8 +1435,8 @@ const TypeFunc* OptoRuntime::intpoly_montgomeryMult_P256_Type() {
 
   // result type needed
   fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms + 0] = TypeInt::INT; // carry bits in output
-  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  fields[TypeFunc::Parms + 0] = nullptr; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
   return TypeFunc::make(domain, range);
 }
 
@@ -1455,7 +1455,7 @@ const TypeFunc* OptoRuntime::intpoly_assign_Type() {
 
   // result type needed
   fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms + 0] = NULL; // void
+  fields[TypeFunc::Parms + 0] = nullptr; // void
   const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
   return TypeFunc::make(domain, range);
 }
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
@@ -90,12 +90,11 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
      * store the result in an IntegerPolynomial representation in a. Requires
      * that a.length == numLimbs.
      */
-    protected int multByInt(long[] a, long b) {
+    protected void multByInt(long[] a, long b) {
         for (int i = 0; i < a.length; i++) {
             a[i] *= b;
         }
         reduce(a);
-        return 0;
     }
 
     /**
@@ -104,15 +103,15 @@ protected int multByInt(long[] a, long b) {
      * a.length == b.length == r.length == numLimbs. It is allowed for a and r
      * to be the same array.
      */
-    protected abstract int mult(long[] a, long[] b, long[] r);
+    protected abstract void mult(long[] a, long[] b, long[] r);
 
     /**
      * Multiply an IntegerPolynomial representation (a) with itself and store
      * the result in an IntegerPolynomialRepresentation (r). Requires that
      * a.length == r.length == numLimbs. It is allowed for a and r
      * to be the same array.
      */
-    protected abstract int square(long[] a, long[] r);
+    protected abstract void square(long[] a, long[] r);
 
     IntegerPolynomial(int bitsPerLimb,
                       int numLimbs,
@@ -622,8 +621,8 @@ public ImmutableElement multiply(IntegerModuloP genB) {
             }
 
             long[] newLimbs = new long[limbs.length];
-            int numAdds = mult(limbs, b.limbs, newLimbs);
-            return new ImmutableElement(newLimbs, numAdds);
+            mult(limbs, b.limbs, newLimbs);
+            return new ImmutableElement(newLimbs, 0);
         }
 
         @Override
@@ -635,8 +634,8 @@ public ImmutableElement square() {
             }
 
             long[] newLimbs = new long[limbs.length];
-            int numAdds = IntegerPolynomial.this.square(limbs, newLimbs);
-            return new ImmutableElement(newLimbs, numAdds);
+            IntegerPolynomial.this.square(limbs, newLimbs);
+            return new ImmutableElement(newLimbs, 0);
         }
 
         public void addModPowerTwo(IntegerModuloP arg, byte[] result) {
@@ -751,7 +750,8 @@ public MutableElement setProduct(IntegerModuloP genB) {
                 b.numAdds = 0;
             }
 
-            numAdds = mult(limbs, b.limbs, limbs);
+            mult(limbs, b.limbs, limbs);
+            numAdds = 0;
             return this;
         }
 
@@ -764,7 +764,8 @@ public MutableElement setProduct(SmallValue v) {
             }
 
             int value = ((Limb)v).value;
-            numAdds += multByInt(limbs, value);
+            multByInt(limbs, value);
+            numAdds = 0;
             return this;
         }
 
@@ -824,7 +825,8 @@ public MutableElement setSquare() {
                 numAdds = 0;
             }
 
-            numAdds = IntegerPolynomial.this.square(limbs, limbs);
+            IntegerPolynomial.this.square(limbs, limbs);
+            numAdds = 0;
             return this;
         }
 
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
@@ -50,7 +50,7 @@ private IntegerPolynomial1305() {
         super(BITS_PER_LIMB, NUM_LIMBS, 1, MODULUS);
     }
 
-    protected int mult(long[] a, long[] b, long[] r) {
+    protected void mult(long[] a, long[] b, long[] r) {
 
         // Use grade-school multiplication into primitives to avoid the
         // temporary array allocation. This is equivalent to the following
@@ -73,7 +73,6 @@ protected int mult(long[] a, long[] b, long[] r) {
         long c8 = (a[4] * b[4]);
 
         carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
-        return 0;
     }
 
     private void carryReduce(long[] r, long c0, long c1, long c2, long c3,
@@ -100,7 +99,7 @@ private void carryReduce(long[] r, long c0, long c1, long c2, long c3,
     }
 
     @Override
-    protected int square(long[] a, long[] r) {
+    protected void square(long[] a, long[] r) {
         // Use grade-school multiplication with a simple squaring optimization.
         // Multiply into primitives to avoid the temporary array allocation.
         // This is equivalent to the following code:
@@ -123,7 +122,6 @@ protected int square(long[] a, long[] r) {
         long c8 = (a[4] * a[4]);
 
         carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
-        return 0;
     }
 
     @Override
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
@@ -131,12 +131,11 @@ private void multOnly(long[] a, long[] b, long[] c) {
     }
 
     @Override
-    protected int mult(long[] a, long[] b, long[] r) {
+    protected void mult(long[] a, long[] b, long[] r) {
 
         long[] c = new long[2 * numLimbs];
         multOnly(a, b, c);
         carryReduce(c, r);
-        return 0;
     }
 
     private void modReduceInBits(long[] limbs, int index, int bits, long x) {
@@ -189,7 +188,7 @@ protected void reduce(long[] a) {
     }
 
     @Override
-    protected int square(long[] a, long[] r) {
+    protected void square(long[] a, long[] r) {
 
         long[] c = new long[2 * numLimbs];
         for (int i = 0; i < numLimbs; i++) {
@@ -200,7 +199,6 @@ protected int square(long[] a, long[] r) {
         }
 
         carryReduce(c, r);
-        return 0;
     }
 
     /**
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
@@ -31,6 +31,7 @@
 import sun.security.util.math.IntegerFieldModuloP;
 import java.math.BigInteger;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
+import jdk.internal.vm.annotation.ForceInline;
 
 // Reference:
 // - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve
@@ -103,8 +104,8 @@ public ImmutableElement getElement(BigInteger v) {
         setLimbsValuePositive(v, vLimbs);
 
         // Convert to Montgomery domain
-        int numAdds = mult(vLimbs, h, montLimbs);
-        return new ImmutableElement(montLimbs, numAdds);
+        mult(vLimbs, h, montLimbs);
+        return new ImmutableElement(montLimbs, 0);
     }
 
     @Override
@@ -114,24 +115,6 @@ public SmallValue getSmallValue(int value) {
         return super.getSmallValue(value);
     }
 
-    /*
-     * This function is used by IntegerPolynomial.setProduct(SmallValue v) to
-     * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a
-     * montgomery conversion followed by a montgomery multiplication, just use
-     * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1]
-     * Section 4 )
-     *
-     * Will return an unreduced value
-     */
-    @Override
-    protected int multByInt(long[] a, long b) {
-        assert (b < (1 << BITS_PER_LIMB));
-        for (int i = 0; i < a.length; i++) {
-            a[i] *= b;
-        }
-        return (int) (b - 1);
-    }
-
     @Override
     public ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP n) {
         assert n.getField() == MontgomeryIntegerPolynomialP256.ONE;
@@ -163,19 +146,27 @@ private void halfLimbs(long[] a, long[] r) {
     }
 
     @Override
-    protected int square(long[] a, long[] r) {
-        return mult(a, a, r);
+    protected void square(long[] a, long[] r) {
+        mult(a, a, r);
     }
 
+
     /**
      * Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P)
      *
      * See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication
      * for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use
      * numAdds to reuse existing overflow logic.
      */
+    @Override
+    protected void mult(long[] a, long[] b, long[] r) {
+        multImpl(a, b, r);
+        reducePositive(r);
+    }
+
+    @ForceInline
     @IntrinsicCandidate
-    protected int mult(long[] a, long[] b, long[] r) {
+    private void multImpl(long[] a, long[] b, long[] r) {
         long aa0 = a[0];
         long aa1 = a[1];
         long aa2 = a[2];
@@ -408,36 +399,16 @@ protected int mult(long[] a, long[] b, long[] r) {
         d4 += n4 & LIMB_MASK;
 
         c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
-        c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB);
-        c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB);
-        c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB);
-        c9 = dd4 + (c8 >>> BITS_PER_LIMB);
-
-        c5 &= LIMB_MASK;
-        c6 &= LIMB_MASK;
-        c7 &= LIMB_MASK;
-        c8 &= LIMB_MASK;
-
-        // At this point, the result could overflow by one modulus.
-        c0 = c5 - modulus[0];
-        c1 = c6 - modulus[1] + (c0 >> BITS_PER_LIMB);
-        c0 &= LIMB_MASK;
-        c2 = c7 - modulus[2] + (c1 >> BITS_PER_LIMB);
-        c1 &= LIMB_MASK;
-        c3 = c8 - modulus[3] + (c2 >> BITS_PER_LIMB);
-        c2 &= LIMB_MASK;
-        c4 = c9 - modulus[4] + (c3 >> BITS_PER_LIMB);
-        c3 &= LIMB_MASK;
-
-        long mask = c4 >> BITS_PER_LIMB; // Signed shift!
-
-        r[0] = ((c5 & mask) | (c0 & ~mask));
-        r[1] = ((c6 & mask) | (c1 & ~mask));
-        r[2] = ((c7 & mask) | (c2 & ~mask));
-        r[3] = ((c8 & mask) | (c3 & ~mask));
-        r[4] = ((c9 & mask) | (c4 & ~mask));
-
-        return 0;
+        c6 += d2 + dd1;
+        c7 += d3 + dd2;
+        c8 += d4 + dd3;
+        c9 = dd4;
+
+        r[0] = c5;
+        r[1] = c6;
+        r[2] = c7;
+        r[3] = c8;
+        r[4] = c9;
     }
 
     @Override
@@ -516,8 +487,8 @@ public ImmutableElement getElement(byte[] v, int offset, int length,
         super.encode(v, offset, length, highByte, vLimbs);
 
         // Convert to Montgomery domain
-        int numAdds = mult(vLimbs, h, montLimbs);
-        return new ImmutableElement(montLimbs, numAdds);
+        mult(vLimbs, h, montLimbs);
+        return new ImmutableElement(montLimbs, 0);
     }
 
     /*
@@ -556,4 +527,27 @@ protected void reduceIn(long[] limbs, long v, int i) {
         limbs[i - 5] += (v << 4) & LIMB_MASK;
         limbs[i - 4] += v >> 48;
     }
+
+    // Used when limbs a could overflow by one modulus.
+    @ForceInline
+    protected void reducePositive(long[] a) {
+        long aa0 = a[0];
+        long aa1 = a[1] + (aa0>>BITS_PER_LIMB);
+        long aa2 = a[2] + (aa1>>BITS_PER_LIMB);
+        long aa3 = a[3] + (aa2>>BITS_PER_LIMB);
+        long aa4 = a[4] + (aa3>>BITS_PER_LIMB);
+
+        long c0 = a[0] - modulus[0];
+        long c1 = a[1] - modulus[1] + (c0 >> BITS_PER_LIMB);
+        long c2 = a[2] - modulus[2] + (c1 >> BITS_PER_LIMB);
+        long c3 = a[3] - modulus[3] + (c2 >> BITS_PER_LIMB);
+        long c4 = a[4] - modulus[4] + (c3 >> BITS_PER_LIMB);
+        long mask = c4 >> BITS_PER_LIMB; // Signed shift!
+
+        a[0] = ((aa0 & mask) | (c0 & ~mask)) & LIMB_MASK;
+        a[1] = ((aa1 & mask) | (c1 & ~mask)) & LIMB_MASK;
+        a[2] = ((aa2 & mask) | (c2 & ~mask)) & LIMB_MASK;
+        a[3] = ((aa3 & mask) | (c3 & ~mask)) & LIMB_MASK;
+        a[4] = ((aa4 & mask) | (c4 & ~mask));
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -7580,8 +7580,6 @@ bool LibraryCallKit::inline_intpoly_montgomeryMult_P256() {`
`7580`	`7580`	`OptoRuntime::intpoly_montgomeryMult_P256_Type(),`
`7581`	`7581`	`stubAddr, stubName, TypePtr::BOTTOM,`
`7582`	`7582`	`a_start, b_start, r_start);`
`7583`		`- Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));`
`7584`		`- set_result(result);`
`7585`	`7583`	`return true;`
`7586`	`7584`	`}`
`7587`	`7585`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ private IntegerPolynomial1305() {`
`50`	`50`	`super(BITS_PER_LIMB, NUM_LIMBS, 1, MODULUS);`
`51`	`51`	`}`
`52`	`52`
`53`		`- protected int mult(long[] a, long[] b, long[] r) {`
	`53`	`+ protected void mult(long[] a, long[] b, long[] r) {`
`54`	`54`
`55`	`55`	`// Use grade-school multiplication into primitives to avoid the`
`56`	`56`	`// temporary array allocation. This is equivalent to the following`
`@@ -73,7 +73,6 @@ protected int mult(long[] a, long[] b, long[] r) {`
`73`	`73`	`long c8 = (a[4] * b[4]);`
`74`	`74`
`75`	`75`	`carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);`
`76`		`- return 0;`
`77`	`76`	`}`
`78`	`77`
`79`	`78`	`private void carryReduce(long[] r, long c0, long c1, long c2, long c3,`
`@@ -100,7 +99,7 @@ private void carryReduce(long[] r, long c0, long c1, long c2, long c3,`
`100`	`99`	`}`
`101`	`100`
`102`	`101`	`@Override`
`103`		`- protected int square(long[] a, long[] r) {`
	`102`	`+ protected void square(long[] a, long[] r) {`
`104`	`103`	`// Use grade-school multiplication with a simple squaring optimization.`
`105`	`104`	`// Multiply into primitives to avoid the temporary array allocation.`
`106`	`105`	`// This is equivalent to the following code:`
`@@ -123,7 +122,6 @@ protected int square(long[] a, long[] r) {`
`123`	`122`	`long c8 = (a[4] * a[4]);`
`124`	`123`
`125`	`124`	`carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);`
`126`		`- return 0;`
`127`	`125`	`}`
`128`	`126`
`129`	`127`	`@Override`
Original file line number	Diff line number	Diff line change
`@@ -131,12 +131,11 @@ private void multOnly(long[] a, long[] b, long[] c) {`
`131`	`131`	`}`
`132`	`132`
`133`	`133`	`@Override`
`134`		`- protected int mult(long[] a, long[] b, long[] r) {`
	`134`	`+ protected void mult(long[] a, long[] b, long[] r) {`
`135`	`135`
`136`	`136`	`long[] c = new long[2 * numLimbs];`
`137`	`137`	`multOnly(a, b, c);`
`138`	`138`	`carryReduce(c, r);`
`139`		`- return 0;`
`140`	`139`	`}`
`141`	`140`
`142`	`141`	`private void modReduceInBits(long[] limbs, int index, int bits, long x) {`
`@@ -189,7 +188,7 @@ protected void reduce(long[] a) {`
`189`	`188`	`}`
`190`	`189`
`191`	`190`	`@Override`
`192`		`- protected int square(long[] a, long[] r) {`
	`191`	`+ protected void square(long[] a, long[] r) {`
`193`	`192`
`194`	`193`	`long[] c = new long[2 * numLimbs];`
`195`	`194`	`for (int i = 0; i < numLimbs; i++) {`
`@@ -200,7 +199,6 @@ protected int square(long[] a, long[] r) {`
`200`	`199`	`}`
`201`	`200`
`202`	`201`	`carryReduce(c, r);`
`203`		`- return 0;`
`204`	`202`	`}`
`205`	`203`
`206`	`204`	`/**`