diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 3b3b696f99d46..a30325503262c 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -33,220 +33,414 @@ #include #include #include +#include #include "private.h" /* For _bc_rm_leading_zeros() */ #include "zend_alloc.h" -/* Recursive vs non-recursive multiply crossover ranges. */ -#if defined(MULDIGITS) -#include "muldigits.h" + +#if SIZEOF_SIZE_T >= 8 +# define BC_MUL_UINT_DIGITS 8 +# define BC_MUL_UINT_OVERFLOW 100000000 #else -#define MUL_BASE_DIGITS 80 +# define BC_MUL_UINT_DIGITS 4 +# define BC_MUL_UINT_OVERFLOW 10000 #endif -int mul_base_digits = MUL_BASE_DIGITS; -#define MUL_SMALL_DIGITS mul_base_digits/4 +#define BC_UINT_TO_BCD_ONE_DIGIT(num, bcd) do { \ + bcd = num % 10; \ + num /= 10; \ +} while (0) + /* Multiply utility routines */ -static bc_num new_sub_num(size_t length, size_t scale, char *value) +/* + * Converts BCD to uint, going backwards from pointer n by the number of + * characters specified by len. + * + * Since the upper limit of len is known, open the loop to save loop cost. + */ +static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) { - bc_num temp = (bc_num) emalloc(sizeof(bc_struct)); - - temp->n_sign = PLUS; - temp->n_len = length; - temp->n_scale = scale; - temp->n_refs = 1; - temp->n_value = value; - return temp; + BC_UINT_T num = n[0]; + + switch (len) { + case 1: + return num; + case 2: + num += n[-1] * 10; + return num; + case 3: + num += n[-1] * 10; + num += n[-2] * 100; + return num; + case 4: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + return num; +#if BC_MUL_UINT_DIGITS == 8 + case 5: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + return num; + case 6: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + return num; + case 7: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + return num; + case 8: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + num += n[-7] * 10000000; + return num; +#endif + } + + return num; } -static void _bc_simp_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) +/* + * Since the number of digits is fixed, open the loop and saves cost. + */ +static inline BC_UINT_T bc_partial_convert_to_uint_fixed_digits(const char *n) { - char *n1ptr, *n2ptr, *pvptr; - char *n1end, *n2end; /* To the end of n1 and n2. */ - int sum = 0; - - int prodlen = n1len + n2len + 1; + BC_UINT_T num = n[0]; + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; +#if BC_MUL_UINT_DIGITS == 8 + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + num += n[-7] * 10000000; +#endif - *prod = bc_new_num_nonzeroed(prodlen, 0); + return num; +} - n1end = (char *) (n1->n_value + n1len - 1); - n2end = (char *) (n2->n_value + n2len - 1); - pvptr = (char *) ((*prod)->n_value + prodlen - 1); - - /* Here is the loop... */ - for (int index = 0; index < prodlen - 1; index++) { - n1ptr = (char *) (n1end - MAX(0, index - n2len + 1)); - n2ptr = (char *) (n2end - MIN(index, n2len - 1)); - while ((n1ptr >= n1->n_value) && (n2ptr <= n2end)) { - sum += *n1ptr * *n2ptr; - n1ptr--; - n2ptr++; - } - *pvptr-- = sum % BASE; - sum = sum / BASE; +/* + * Convert HEX to BCD. The number of digits can be up to 16 decimal digits (64-bit). + */ +static inline void bc_uint_to_bcd(BC_UINT_T num, char *bcd, size_t len) +{ + switch (len) { + case 0: + return; + case 1: + *bcd = num % 10; + return; + case 2: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + bcd[-1] = num % 10; + return; + case 3: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + bcd[-2] = num % 10; + return; + case 4: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + bcd[-3] = num % 10; + return; + case 5: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + bcd[-4] = num % 10; + return; + case 6: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + bcd[-5] = num % 10; + return; + case 7: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + bcd[-6] = num % 10; + return; + case 8: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + bcd[-7] = num % 10; + return; +#if BC_MUL_UINT_DIGITS == 8 + case 9: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + bcd[-8] = num % 10; + return; + case 10: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + bcd[-9] = num % 10; + return; + case 11: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + bcd[-10] = num % 10; + return; + case 12: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + bcd[-11] = num % 10; + return; + case 13: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + bcd[-12] = num % 10; + return; + case 14: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + bcd[-13] = num % 10; + return; + case 15: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-13]); + bcd[-14] = num % 10; + return; + case 16: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-13]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-14]); + bcd[-15] = num % 10; + return; +#endif } - *pvptr = sum; } - -/* A special adder/subtractor for the recursive divide and conquer - multiply algorithm. Note: if sub is called, accum must - be larger that what is being subtracted. Also, accum and val - must have n_scale = 0. (e.g. they must look like integers. *) */ -static void _bc_shift_addsub(bc_num accum, bc_num val, int shift, bool sub) +/* + * Converts 8 decimal digits of HEX to 8 digits of BCD (64-bit). + * Since the number of digits is fixed, open the loop and saves cost. + */ +static inline BC_UINT_T bc_uint_to_bcd_fixed_digits(BC_UINT_T num, char *bcd) { - signed char *accp, *valp; - unsigned int carry = 0; - size_t count = val->n_len; + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); +#if BC_MUL_UINT_DIGITS == 8 + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); +#endif - if (val->n_value[0] == 0) { - count--; - } - assert(accum->n_len + accum->n_scale >= shift + count); - - /* Set up pointers and others */ - accp = (signed char *) (accum->n_value + accum->n_len + accum->n_scale - shift - 1); - valp = (signed char *) (val->n_value + val->n_len - 1); - - if (sub) { - /* Subtraction, carry is really borrow. */ - while (count--) { - *accp -= *valp-- + carry; - if (*accp < 0) { - carry = 1; - *accp-- += BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp -= carry; - if (*accp < 0) { - *accp-- += BASE; - } else { - carry = 0; - } - } - } else { - /* Addition */ - while (count--) { - *accp += *valp-- + carry; - if (*accp > (BASE - 1)) { - carry = 1; - *accp-- -= BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp += carry; - if (*accp > (BASE - 1)) { - *accp-- -= BASE; - } else { - carry = 0; - } - } - } + return num; } -/* Recursive divide and conquer multiply algorithm. - Based on - Let u = u0 + u1*(b^n) - Let v = v0 + v1*(b^n) - Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0 - - B is the base of storage, number of digits in u1,u0 close to equal. -*/ -static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *prod) +/* + * If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, + * the calculation will be performed at high speed without using an array. + */ +static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - bc_num u0, u1, v0, v1; - bc_num m1, m2, m3; - size_t n; - bool m1zero; - - /* Base case? */ - if ((ulen + vlen) < mul_base_digits - || ulen < MUL_SMALL_DIGITS - || vlen < MUL_SMALL_DIGITS - ) { - _bc_simp_mul(u, ulen, v, vlen, prod); - return; - } + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; - /* Calculate n -- the u and v split point in digits. */ - n = (MAX(ulen, vlen) + 1) / 2; + BC_UINT_T n1_uint = bc_partial_convert_to_uint(n1end, n1len); + BC_UINT_T n2_uint = bc_partial_convert_to_uint(n2end, n2len); + BC_UINT_T prod_uint = n1_uint * n2_uint; - /* Split u and v. */ - if (ulen < n) { - u1 = bc_copy_num(BCG(_zero_)); - u0 = new_sub_num(ulen, 0, u->n_value); - } else { - u1 = new_sub_num(ulen - n, 0, u->n_value); - u0 = new_sub_num(n, 0, u->n_value + ulen - n); - } - if (vlen < n) { - v1 = bc_copy_num(BCG(_zero_)); - v0 = new_sub_num(vlen, 0, v->n_value); - } else { - v1 = new_sub_num(vlen - n, 0, v->n_value); - v0 = new_sub_num(n, 0, v->n_value + vlen - n); - } - _bc_rm_leading_zeros(u1); - _bc_rm_leading_zeros(u0); - _bc_rm_leading_zeros(v1); - _bc_rm_leading_zeros(v0); + size_t prodlen = n1len + n2len; + *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pend = (*prod)->n_value + prodlen - 1; - m1zero = bc_is_zero(u1) || bc_is_zero(v1); + bc_uint_to_bcd(prod_uint, pend, prodlen); +} + +/* + * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_UINT_Ts. + * The array is generated starting with the smaller digits. + * e.g. 12345678901234567890 => {34567890, 56789012, 1234} + * + * Multiply and add these groups of numbers to perform multiplication fast. + * How much to shift the digits when adding values ​​can be calculated from the index of the array. + */ +static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) +{ + size_t i; + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; + size_t prodlen = n1len + n2len; - /* Calculate sub results ... */ + size_t n1_arr_size = n1len / BC_MUL_UINT_DIGITS + (n1len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t n2_arr_size = n2len / BC_MUL_UINT_DIGITS + (n2len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - bc_num d1 = bc_sub(u1, u0, 0); - bc_num d2 = bc_sub(v0, v1, 0); + BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); + BC_UINT_T *n1_uint = buf; + BC_UINT_T *n2_uint = buf + n1_arr_size; + BC_UINT_T *prod_uint = n2_uint + n2_arr_size; - /* Do recursive multiplies and shifted adds. */ - if (m1zero) { - m1 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u1, u1->n_len, v1, v1->n_len, &m1); + for (i = 0; i < prod_arr_size; i++) { + prod_uint[i] = 0; } - if (bc_is_zero(d1) || bc_is_zero(d2)) { - m2 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(d1, d1->n_len, d2, d2->n_len, &m2); + /* Convert n1 to uint[] */ + i = 0; + while (n1len >= BC_MUL_UINT_DIGITS) { + n1_uint[i] = bc_partial_convert_to_uint_fixed_digits(n1end); + n1end -= BC_MUL_UINT_DIGITS; + n1len -= BC_MUL_UINT_DIGITS; + i++; + } + if (n1len > 0) { + n1_uint[i] = bc_partial_convert_to_uint(n1end, n1len); } - if (bc_is_zero(u0) || bc_is_zero(v0)) { - m3 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u0, u0->n_len, v0, v0->n_len, &m3); + /* Convert n2 to uint[] */ + i = 0; + while (n2len >= BC_MUL_UINT_DIGITS) { + n2_uint[i] = bc_partial_convert_to_uint_fixed_digits(n2end); + n2end -= BC_MUL_UINT_DIGITS; + n2len -= BC_MUL_UINT_DIGITS; + i++; + } + if (n2len > 0) { + n2_uint[i] = bc_partial_convert_to_uint(n2end, n2len); + } + + /* Multiplication and addition */ + for (i = 0; i < n1_arr_size; i++) { + for (size_t j = 0; j < n2_arr_size; j++) { + prod_uint[i + j] += n1_uint[i] * n2_uint[j]; + } } - /* Initialize product */ - *prod = bc_new_num(ulen + vlen + 1, 0); + /* + * Move a value exceeding 4/8 digits by carrying to the next digit. + * However, the last digit does nothing. + */ + for (i = 0; i < prod_arr_size - 1; i++) { + prod_uint[i + 1] += prod_uint[i] / BC_MUL_UINT_OVERFLOW; + prod_uint[i] %= BC_MUL_UINT_OVERFLOW; + } - if (!m1zero) { - _bc_shift_addsub(*prod, m1, 2 * n, false); - _bc_shift_addsub(*prod, m1, n, false); + /* Convert to bc_num */ + *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pend = (*prod)->n_value + prodlen - 1; + for (i = 0; i < prod_arr_size - 1; i++) { + prod_uint[i] = bc_uint_to_bcd_fixed_digits(prod_uint[i], pend); + pend -= BC_MUL_UINT_DIGITS; + prodlen -= BC_MUL_UINT_DIGITS; } - _bc_shift_addsub(*prod, m3, n, false); - _bc_shift_addsub(*prod, m3, 0, false); - _bc_shift_addsub(*prod, m2, n, d1->n_sign != d2->n_sign); - - /* Now clean up! */ - bc_free_num (&u1); - bc_free_num (&u0); - bc_free_num (&v1); - bc_free_num (&m1); - bc_free_num (&v0); - bc_free_num (&m2); - bc_free_num (&m3); - bc_free_num (&d1); - bc_free_num (&d2); + + /* + * The last digit may carry over. + * Also need to fill it to the end with zeros, so loop until the end of the string. + */ + bc_uint_to_bcd(prod_uint[i], pend, prodlen); + + efree(buf); } /* The multiply routine. N2 times N1 is put int PROD with the scale of @@ -255,26 +449,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) { - bc_num pval; - size_t len1, len2; - size_t full_scale, prod_scale; + bc_num prod; /* Initialize things. */ - len1 = n1->n_len + n1->n_scale; - len2 = n2->n_len + n2->n_scale; - full_scale = n1->n_scale + n2->n_scale; - prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); + size_t len1 = n1->n_len + n1->n_scale; + size_t len2 = n2->n_len + n2->n_scale; + size_t full_scale = n1->n_scale + n2->n_scale; + size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - _bc_rec_mul(n1, len1, n2, len2, &pval); + if (EXPECTED(len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS)) { + bc_fast_mul(n1, len1, n2, len2, &prod); + } else { + bc_standard_mul(n1, len1, n2, len2, &prod); + } /* Assign to prod and clean up the number. */ - pval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); - pval->n_len = len2 + len1 + 1 - full_scale; - pval->n_scale = prod_scale; - _bc_rm_leading_zeros(pval); - if (bc_is_zero(pval)) { - pval->n_sign = PLUS; + prod->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); + prod->n_len -= full_scale; + prod->n_scale = prod_scale; + _bc_rm_leading_zeros(prod); + if (bc_is_zero(prod)) { + prod->n_sign = PLUS; } - return pval; + return prod; }