3131import sun .security .util .math .IntegerFieldModuloP ;
3232import java .math .BigInteger ;
3333import jdk .internal .vm .annotation .IntrinsicCandidate ;
34+ import jdk .internal .vm .annotation .ForceInline ;
3435
3536// Reference:
3637// - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve
@@ -103,8 +104,8 @@ public ImmutableElement getElement(BigInteger v) {
103104 setLimbsValuePositive (v , vLimbs );
104105
105106 // Convert to Montgomery domain
106- int numAdds = mult (vLimbs , h , montLimbs );
107- return new ImmutableElement (montLimbs , numAdds );
107+ mult (vLimbs , h , montLimbs );
108+ return new ImmutableElement (montLimbs , 0 );
108109 }
109110
110111 @ Override
@@ -114,24 +115,6 @@ public SmallValue getSmallValue(int value) {
114115 return super .getSmallValue (value );
115116 }
116117
117- /*
118- * This function is used by IntegerPolynomial.setProduct(SmallValue v) to
119- * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a
120- * montgomery conversion followed by a montgomery multiplication, just use
121- * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1]
122- * Section 4 )
123- *
124- * Will return an unreduced value
125- */
126- @ Override
127- protected int multByInt (long [] a , long b ) {
128- assert (b < (1 << BITS_PER_LIMB ));
129- for (int i = 0 ; i < a .length ; i ++) {
130- a [i ] *= b ;
131- }
132- return (int ) (b - 1 );
133- }
134-
135118 @ Override
136119 public ImmutableIntegerModuloP fromMontgomery (ImmutableIntegerModuloP n ) {
137120 assert n .getField () == MontgomeryIntegerPolynomialP256 .ONE ;
@@ -163,19 +146,27 @@ private void halfLimbs(long[] a, long[] r) {
163146 }
164147
165148 @ Override
166- protected int square (long [] a , long [] r ) {
167- return mult (a , a , r );
149+ protected void square (long [] a , long [] r ) {
150+ mult (a , a , r );
168151 }
169152
153+
170154 /**
171155 * Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P)
172156 *
173157 * See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication
174158 * for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use
175159 * numAdds to reuse existing overflow logic.
176160 */
161+ @ Override
162+ protected void mult (long [] a , long [] b , long [] r ) {
163+ multImpl (a , b , r );
164+ reducePositive (r );
165+ }
166+
167+ @ ForceInline
177168 @ IntrinsicCandidate
178- protected int mult (long [] a , long [] b , long [] r ) {
169+ private void multImpl (long [] a , long [] b , long [] r ) {
179170 long aa0 = a [0 ];
180171 long aa1 = a [1 ];
181172 long aa2 = a [2 ];
@@ -408,36 +399,16 @@ protected int mult(long[] a, long[] b, long[] r) {
408399 d4 += n4 & LIMB_MASK ;
409400
410401 c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB );
411- c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB );
412- c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB );
413- c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB );
414- c9 = dd4 + (c8 >>> BITS_PER_LIMB );
415-
416- c5 &= LIMB_MASK ;
417- c6 &= LIMB_MASK ;
418- c7 &= LIMB_MASK ;
419- c8 &= LIMB_MASK ;
420-
421- // At this point, the result could overflow by one modulus.
422- c0 = c5 - modulus [0 ];
423- c1 = c6 - modulus [1 ] + (c0 >> BITS_PER_LIMB );
424- c0 &= LIMB_MASK ;
425- c2 = c7 - modulus [2 ] + (c1 >> BITS_PER_LIMB );
426- c1 &= LIMB_MASK ;
427- c3 = c8 - modulus [3 ] + (c2 >> BITS_PER_LIMB );
428- c2 &= LIMB_MASK ;
429- c4 = c9 - modulus [4 ] + (c3 >> BITS_PER_LIMB );
430- c3 &= LIMB_MASK ;
431-
432- long mask = c4 >> BITS_PER_LIMB ; // Signed shift!
433-
434- r [0 ] = ((c5 & mask ) | (c0 & ~mask ));
435- r [1 ] = ((c6 & mask ) | (c1 & ~mask ));
436- r [2 ] = ((c7 & mask ) | (c2 & ~mask ));
437- r [3 ] = ((c8 & mask ) | (c3 & ~mask ));
438- r [4 ] = ((c9 & mask ) | (c4 & ~mask ));
439-
440- return 0 ;
402+ c6 += d2 + dd1 ;
403+ c7 += d3 + dd2 ;
404+ c8 += d4 + dd3 ;
405+ c9 = dd4 ;
406+
407+ r [0 ] = c5 ;
408+ r [1 ] = c6 ;
409+ r [2 ] = c7 ;
410+ r [3 ] = c8 ;
411+ r [4 ] = c9 ;
441412 }
442413
443414 @ Override
@@ -516,8 +487,8 @@ public ImmutableElement getElement(byte[] v, int offset, int length,
516487 super .encode (v , offset , length , highByte , vLimbs );
517488
518489 // Convert to Montgomery domain
519- int numAdds = mult (vLimbs , h , montLimbs );
520- return new ImmutableElement (montLimbs , numAdds );
490+ mult (vLimbs , h , montLimbs );
491+ return new ImmutableElement (montLimbs , 0 );
521492 }
522493
523494 /*
@@ -556,4 +527,27 @@ protected void reduceIn(long[] limbs, long v, int i) {
556527 limbs [i - 5 ] += (v << 4 ) & LIMB_MASK ;
557528 limbs [i - 4 ] += v >> 48 ;
558529 }
530+
531+ // Used when limbs a could overflow by one modulus.
532+ @ ForceInline
533+ protected void reducePositive (long [] a ) {
534+ long aa0 = a [0 ];
535+ long aa1 = a [1 ] + (aa0 >>BITS_PER_LIMB );
536+ long aa2 = a [2 ] + (aa1 >>BITS_PER_LIMB );
537+ long aa3 = a [3 ] + (aa2 >>BITS_PER_LIMB );
538+ long aa4 = a [4 ] + (aa3 >>BITS_PER_LIMB );
539+
540+ long c0 = a [0 ] - modulus [0 ];
541+ long c1 = a [1 ] - modulus [1 ] + (c0 >> BITS_PER_LIMB );
542+ long c2 = a [2 ] - modulus [2 ] + (c1 >> BITS_PER_LIMB );
543+ long c3 = a [3 ] - modulus [3 ] + (c2 >> BITS_PER_LIMB );
544+ long c4 = a [4 ] - modulus [4 ] + (c3 >> BITS_PER_LIMB );
545+ long mask = c4 >> BITS_PER_LIMB ; // Signed shift!
546+
547+ a [0 ] = ((aa0 & mask ) | (c0 & ~mask )) & LIMB_MASK ;
548+ a [1 ] = ((aa1 & mask ) | (c1 & ~mask )) & LIMB_MASK ;
549+ a [2 ] = ((aa2 & mask ) | (c2 & ~mask )) & LIMB_MASK ;
550+ a [3 ] = ((aa3 & mask ) | (c3 & ~mask )) & LIMB_MASK ;
551+ a [4 ] = ((aa4 & mask ) | (c4 & ~mask ));
552+ }
559553}
0 commit comments