Skip to content

Commit 2033fb9

Browse files
authored
Merge pull request #206 from czurnieden/bn_mul_balance
Multiplication balancing for the Toom-Cook algorithms
2 parents 7f42ce0 + 27ca25c commit 2033fb9

12 files changed

+1992
-286
lines changed

bn_mp_balance_mul.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#include "tommath_private.h"
2+
#ifdef BN_MP_BALANCE_MUL_C
3+
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
4+
/* SPDX-License-Identifier: Unlicense */
5+
6+
/* single-digit multiplication with the smaller number as the single-digit */
7+
int mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c)
8+
{
9+
int e, count, len_a, len_b, nblocks, i, j, bsize;
10+
mp_int a0, tmp, A, B, r;
11+
12+
len_a = a->used;
13+
len_b = b->used;
14+
15+
nblocks = MAX(a->used, b->used) / MIN(a->used, b->used);
16+
bsize = MIN(a->used, b->used) ;
17+
e = MP_OKAY;
18+
19+
if ((e = mp_init_size(&a0, bsize + 2)) != MP_OKAY) {
20+
return e;
21+
}
22+
if ((e = mp_init_multi(&tmp, &r, NULL)) != MP_OKAY) {
23+
mp_clear(&a0);
24+
return e;
25+
}
26+
27+
/* Make sure that A is the larger one*/
28+
if (len_a < len_b) {
29+
B = *a;
30+
A = *b;
31+
} else {
32+
A = *a;
33+
B = *b;
34+
}
35+
36+
for (i = 0, j=0; i < nblocks; i++) {
37+
/* Cut a slice off of a */
38+
a0.used = 0;
39+
for (count = 0; count < bsize; count++) {
40+
a0.dp[count] = A.dp[ j++ ];
41+
a0.used++;
42+
}
43+
/* Multiply with b */
44+
if ((e = mp_mul(&a0, &B, &tmp)) != MP_OKAY) {
45+
goto LBL_ERR;
46+
}
47+
/* Shift tmp to the correct position */
48+
if ((e = mp_lshd(&tmp, bsize * i)) != MP_OKAY) {
49+
goto LBL_ERR;
50+
}
51+
/* Add to output. No carry needed */
52+
if ((e = mp_add(&r, &tmp, &r)) != MP_OKAY) {
53+
goto LBL_ERR;
54+
}
55+
}
56+
/* The left-overs; there are always left-overs */
57+
if (j < A.used) {
58+
a0.used = 0;
59+
for (count = 0; j < A.used; count++) {
60+
a0.dp[count] = A.dp[ j++ ];
61+
a0.used++;
62+
}
63+
if ((e = mp_mul(&a0, &B, &tmp)) != MP_OKAY) {
64+
goto LBL_ERR;
65+
}
66+
if ((e = mp_lshd(&tmp, bsize * i)) != MP_OKAY) {
67+
goto LBL_ERR;
68+
}
69+
if ((e = mp_add(&r, &tmp, &r)) != MP_OKAY) {
70+
goto LBL_ERR;
71+
}
72+
}
73+
74+
mp_exch(&r,c);
75+
LBL_ERR:
76+
mp_clear_multi(&a0, &tmp, &r,NULL);
77+
return e;
78+
}
79+
#endif

bn_mp_mul.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,41 @@
77
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
88
{
99
int res, neg;
10+
#ifdef BN_MP_BALANCE_MUL_C
11+
int len_b, len_a;
12+
#endif
1013
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
14+
#ifdef BN_MP_BALANCE_MUL_C
15+
len_a = a->used;
16+
len_b = b->used;
17+
18+
if (len_a == len_b) {
19+
goto GO_ON;
20+
}
21+
/*
22+
* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off.
23+
* The bigger one needs to be at least about one KARATSUBA_MUL_CUTOFF bigger
24+
* to make some sense, but it depends on architecture, OS, position of the
25+
* stars... so YMMV.
26+
* Using it to cut the input into slices small enough for fast_s_mp_mul_digs
27+
* was actually slower on the author's machine, but YMMV.
28+
*/
29+
if ((MIN(len_a, len_b) < KARATSUBA_MUL_CUTOFF)
30+
|| ((MAX(len_a, len_b)) / 2 < KARATSUBA_MUL_CUTOFF)) {
31+
goto GO_ON;
32+
}
33+
/*
34+
* Not much effect was observed below a ratio of 1:2, but again: YMMV.
35+
*/
36+
if ((MAX(len_a, len_b) / MIN(len_a, len_b)) < 2) {
37+
goto GO_ON;
38+
}
39+
40+
res = mp_balance_mul(a,b,c);
41+
goto END;
42+
43+
GO_ON:
44+
#endif
1145

1246
/* use Toom-Cook? */
1347
#ifdef BN_MP_TOOM_MUL_C
@@ -45,7 +79,9 @@ int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
4579
#endif
4680
}
4781
}
82+
END:
4883
c->sign = (c->used > 0) ? neg : MP_ZPOS;
4984
return res;
5085
}
5186
#endif
87+

0 commit comments

Comments
 (0)