From 91b93433bc8e20f655bd307bfe0664876d9672a3 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 17:40:23 +0900 Subject: [PATCH 1/9] Changed the bcmul calculation method Multiplication is performed after converting to unsigned long, resulting in faster calculations. --- ext/bcmath/libbcmath/src/private.h | 4 + ext/bcmath/libbcmath/src/recmul.c | 313 +++++++++++------------------ 2 files changed, 120 insertions(+), 197 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 1403baad0c257..fa538150ef73a 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,9 +84,13 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t +# define BC_LONGABLE_DIGITS 8 +# define BC_LONGABLE_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t +# define BC_LONGABLE_DIGITS 4 +# define BC_LONGABLE_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 3b3b696f99d46..75ace8a88355a 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -36,217 +36,134 @@ #include "private.h" /* For _bc_rm_leading_zeros() */ #include "zend_alloc.h" -/* Recursive vs non-recursive multiply crossover ranges. */ -#if defined(MULDIGITS) -#include "muldigits.h" -#else -#define MUL_BASE_DIGITS 80 -#endif - -int mul_base_digits = MUL_BASE_DIGITS; -#define MUL_SMALL_DIGITS mul_base_digits/4 /* Multiply utility routines */ -static bc_num new_sub_num(size_t length, size_t scale, char *value) +/* + * Converts BCD to long, going backwards from pointer n by the number of + * characters specified by len. + */ +static inline unsigned long bc_partial_convert_to_long(const char *n, size_t len) { - bc_num temp = (bc_num) emalloc(sizeof(bc_struct)); + unsigned long num = 0; + unsigned long base = 1; - temp->n_sign = PLUS; - temp->n_len = length; - temp->n_scale = scale; - temp->n_refs = 1; - temp->n_value = value; - return temp; + for (size_t i = 0; i < len; i++) { + num += *n * base; + base *= BASE; + n--; + } + + return num; } -static void _bc_simp_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) +/* + * If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, + * the calculation will be performed at high speed without using an array. + */ +static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - char *n1ptr, *n2ptr, *pvptr; - char *n1end, *n2end; /* To the end of n1 and n2. */ - int sum = 0; + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; - int prodlen = n1len + n2len + 1; + unsigned long n1_l = bc_partial_convert_to_long(n1end, n1len); + unsigned long n2_l = bc_partial_convert_to_long(n2end, n2len); + unsigned long prod_l = n1_l * n2_l; + size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pptr = (*prod)->n_value; + char *pend = pptr + prodlen - 1; - n1end = (char *) (n1->n_value + n1len - 1); - n2end = (char *) (n2->n_value + n2len - 1); - pvptr = (char *) ((*prod)->n_value + prodlen - 1); - - /* Here is the loop... */ - for (int index = 0; index < prodlen - 1; index++) { - n1ptr = (char *) (n1end - MAX(0, index - n2len + 1)); - n2ptr = (char *) (n2end - MIN(index, n2len - 1)); - while ((n1ptr >= n1->n_value) && (n2ptr <= n2end)) { - sum += *n1ptr * *n2ptr; - n1ptr--; - n2ptr++; - } - *pvptr-- = sum % BASE; - sum = sum / BASE; + while (pend >= pptr) { + *pend-- = prod_l % BASE; + prod_l /= BASE; } - *pvptr = sum; } - -/* A special adder/subtractor for the recursive divide and conquer - multiply algorithm. Note: if sub is called, accum must - be larger that what is being subtracted. Also, accum and val - must have n_scale = 0. (e.g. they must look like integers. *) */ -static void _bc_shift_addsub(bc_num accum, bc_num val, int shift, bool sub) +/* + * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs. + * The array is generated starting with the smaller digits. + * e.g. 12345678901234567890 => {34567890, 56789012, 1234} + * + * Multiply and add these groups of numbers to perform multiplication fast. + * How much to shift the digits when adding values ​​can be calculated from the index of the array. + */ +static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - signed char *accp, *valp; - unsigned int carry = 0; - size_t count = val->n_len; - - if (val->n_value[0] == 0) { - count--; + size_t i; + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; + size_t prodlen = n1len + n2len; + + size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; + + unsigned long n1_l[n1_arr_size]; + unsigned long n2_l[n2_arr_size]; + unsigned long prod_l[prod_arr_size]; + for (i = 0; i < prod_arr_size; i++) { + prod_l[i] = 0; } - assert(accum->n_len + accum->n_scale >= shift + count); - - /* Set up pointers and others */ - accp = (signed char *) (accum->n_value + accum->n_len + accum->n_scale - shift - 1); - valp = (signed char *) (val->n_value + val->n_len - 1); - - if (sub) { - /* Subtraction, carry is really borrow. */ - while (count--) { - *accp -= *valp-- + carry; - if (*accp < 0) { - carry = 1; - *accp-- += BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp -= carry; - if (*accp < 0) { - *accp-- += BASE; - } else { - carry = 0; - } - } - } else { - /* Addition */ - while (count--) { - *accp += *valp-- + carry; - if (*accp > (BASE - 1)) { - carry = 1; - *accp-- -= BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp += carry; - if (*accp > (BASE - 1)) { - *accp-- -= BASE; - } else { - carry = 0; - } - } - } -} - -/* Recursive divide and conquer multiply algorithm. - Based on - Let u = u0 + u1*(b^n) - Let v = v0 + v1*(b^n) - Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0 - B is the base of storage, number of digits in u1,u0 close to equal. -*/ -static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *prod) -{ - bc_num u0, u1, v0, v1; - bc_num m1, m2, m3; - size_t n; - bool m1zero; - - /* Base case? */ - if ((ulen + vlen) < mul_base_digits - || ulen < MUL_SMALL_DIGITS - || vlen < MUL_SMALL_DIGITS - ) { - _bc_simp_mul(u, ulen, v, vlen, prod); - return; + /* Convert n1 to long[] */ + i = 0; + while (n1len > 0) { + size_t len = MIN(BC_LONGABLE_DIGITS, n1len); + n1_l[i] = bc_partial_convert_to_long(n1end, len); + n1end -= len; + n1len -= len; + i++; } - /* Calculate n -- the u and v split point in digits. */ - n = (MAX(ulen, vlen) + 1) / 2; - - /* Split u and v. */ - if (ulen < n) { - u1 = bc_copy_num(BCG(_zero_)); - u0 = new_sub_num(ulen, 0, u->n_value); - } else { - u1 = new_sub_num(ulen - n, 0, u->n_value); - u0 = new_sub_num(n, 0, u->n_value + ulen - n); - } - if (vlen < n) { - v1 = bc_copy_num(BCG(_zero_)); - v0 = new_sub_num(vlen, 0, v->n_value); - } else { - v1 = new_sub_num(vlen - n, 0, v->n_value); - v0 = new_sub_num(n, 0, v->n_value + vlen - n); + /* Convert n2 to long[] */ + i = 0; + while (n2len > 0) { + size_t len = MIN(BC_LONGABLE_DIGITS, n2len); + n2_l[i] = bc_partial_convert_to_long(n2end, len); + n2end -= len; + n2len -= len; + i++; } - _bc_rm_leading_zeros(u1); - _bc_rm_leading_zeros(u0); - _bc_rm_leading_zeros(v1); - _bc_rm_leading_zeros(v0); - - m1zero = bc_is_zero(u1) || bc_is_zero(v1); - - /* Calculate sub results ... */ - - bc_num d1 = bc_sub(u1, u0, 0); - bc_num d2 = bc_sub(v0, v1, 0); - - /* Do recursive multiplies and shifted adds. */ - if (m1zero) { - m1 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u1, u1->n_len, v1, v1->n_len, &m1); + /* Multiplication and addition */ + for (i = 0; i < n1_arr_size; i++) { + for (size_t j = 0; j < n2_arr_size; j++) { + prod_l[i + j] += n1_l[i] * n2_l[j]; + } } - if (bc_is_zero(d1) || bc_is_zero(d2)) { - m2 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(d1, d1->n_len, d2, d2->n_len, &m2); + /* + * Move a value exceeding 8 digits by carrying to the next digit. + * However, the last digit does nothing. + */ + for (i = 0; i < prod_arr_size - 1; i++) { + prod_l[i + 1] += prod_l[i] / BC_LONGABLE_OVERFLOW; + prod_l[i] %= BC_LONGABLE_OVERFLOW; } - if (bc_is_zero(u0) || bc_is_zero(v0)) { - m3 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u0, u0->n_len, v0, v0->n_len, &m3); + /* Convert to bc_num */ + *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pptr = (*prod)->n_value; + char *pend = pptr + prodlen - 1; + i = 0; + while (i < prod_arr_size - 1) { + for (size_t j = 0; j < BC_LONGABLE_DIGITS; j++) { + *pend-- = prod_l[i] % BASE; + prod_l[i] /= BASE; + } + i++; } - /* Initialize product */ - *prod = bc_new_num(ulen + vlen + 1, 0); - - if (!m1zero) { - _bc_shift_addsub(*prod, m1, 2 * n, false); - _bc_shift_addsub(*prod, m1, n, false); + /* + * The last digit may carry over. + * Also need to fill it to the end with zeros, so loop until the end of the string. + */ + while (pend >= pptr) { + *pend-- = prod_l[i] % BASE; + prod_l[i] /= BASE; } - _bc_shift_addsub(*prod, m3, n, false); - _bc_shift_addsub(*prod, m3, 0, false); - _bc_shift_addsub(*prod, m2, n, d1->n_sign != d2->n_sign); - - /* Now clean up! */ - bc_free_num (&u1); - bc_free_num (&u0); - bc_free_num (&v1); - bc_free_num (&m1); - bc_free_num (&v0); - bc_free_num (&m2); - bc_free_num (&m3); - bc_free_num (&d1); - bc_free_num (&d2); } /* The multiply routine. N2 times N1 is put int PROD with the scale of @@ -255,26 +172,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) { - bc_num pval; - size_t len1, len2; - size_t full_scale, prod_scale; + bc_num prod; /* Initialize things. */ - len1 = n1->n_len + n1->n_scale; - len2 = n2->n_len + n2->n_scale; - full_scale = n1->n_scale + n2->n_scale; - prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); + size_t len1 = n1->n_len + n1->n_scale; + size_t len2 = n2->n_len + n2->n_scale; + size_t full_scale = n1->n_scale + n2->n_scale; + size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - _bc_rec_mul(n1, len1, n2, len2, &pval); + if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS) { + bc_fast_mul(n1, len1, n2, len2, &prod); + } else { + bc_standard_mul(n1, len1, n2, len2, &prod); + } /* Assign to prod and clean up the number. */ - pval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); - pval->n_len = len2 + len1 + 1 - full_scale; - pval->n_scale = prod_scale; - _bc_rm_leading_zeros(pval); - if (bc_is_zero(pval)) { - pval->n_sign = PLUS; + prod->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); + prod->n_len -= full_scale; + prod->n_scale = prod_scale; + _bc_rm_leading_zeros(prod); + if (bc_is_zero(prod)) { + prod->n_sign = PLUS; } - return pval; + return prod; } From ec9f8dfe5a9be43fee345ff00355e54e73f81d49 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 18:44:16 +0900 Subject: [PATCH 2/9] Fixed array allocation --- ext/bcmath/libbcmath/src/recmul.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 75ace8a88355a..f676ab4f672f6 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -100,12 +100,9 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long n1_l[n1_arr_size]; - unsigned long n2_l[n2_arr_size]; - unsigned long prod_l[prod_arr_size]; - for (i = 0; i < prod_arr_size; i++) { - prod_l[i] = 0; - } + unsigned long *n1_l = emalloc(n1_arr_size * sizeof(unsigned long)); + unsigned long *n2_l = emalloc(n2_arr_size * sizeof(unsigned long)); + unsigned long *prod_l = ecalloc(prod_arr_size, sizeof(unsigned long)); /* Convert n1 to long[] */ i = 0; @@ -164,6 +161,10 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu *pend-- = prod_l[i] % BASE; prod_l[i] /= BASE; } + + efree(n1_l); + efree(n2_l); + efree(prod_l); } /* The multiply routine. N2 times N1 is put int PROD with the scale of From ac9342df8f5d2cd16d8b214fa8861059b4bdb955 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 19:28:57 +0900 Subject: [PATCH 3/9] Fixed array allocation --- ext/bcmath/libbcmath/src/recmul.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index f676ab4f672f6..6edf856b5a2df 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -100,9 +100,15 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long *n1_l = emalloc(n1_arr_size * sizeof(unsigned long)); - unsigned long *n2_l = emalloc(n2_arr_size * sizeof(unsigned long)); - unsigned long *prod_l = ecalloc(prod_arr_size, sizeof(unsigned long)); + unsigned long *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(unsigned long)); + + unsigned long *n1_l = buf; + unsigned long *n2_l = buf + n1_arr_size; + unsigned long *prod_l = n2_l + n2_arr_size; + + for (i = 0; i < prod_arr_size; i++) { + prod_l[i] = 0; + } /* Convert n1 to long[] */ i = 0; @@ -162,9 +168,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu prod_l[i] /= BASE; } - efree(n1_l); - efree(n2_l); - efree(prod_l); + efree(buf); } /* The multiply routine. N2 times N1 is put int PROD with the scale of From 899bb39ee2f209d348fe47cf40dea10d4261e6ba Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 19:32:10 +0900 Subject: [PATCH 4/9] Changed unsigned long to BC_UINT_T --- ext/bcmath/libbcmath/src/recmul.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 6edf856b5a2df..e457a7333e132 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -43,10 +43,10 @@ * Converts BCD to long, going backwards from pointer n by the number of * characters specified by len. */ -static inline unsigned long bc_partial_convert_to_long(const char *n, size_t len) +static inline BC_UINT_T bc_partial_convert_to_long(const char *n, size_t len) { - unsigned long num = 0; - unsigned long base = 1; + BC_UINT_T num = 0; + BC_UINT_T base = 1; for (size_t i = 0; i < len; i++) { num += *n * base; @@ -66,9 +66,9 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - unsigned long n1_l = bc_partial_convert_to_long(n1end, n1len); - unsigned long n2_l = bc_partial_convert_to_long(n2end, n2len); - unsigned long prod_l = n1_l * n2_l; + BC_UINT_T n1_l = bc_partial_convert_to_long(n1end, n1len); + BC_UINT_T n2_l = bc_partial_convert_to_long(n2end, n2len); + BC_UINT_T prod_l = n1_l * n2_l; size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); @@ -82,7 +82,7 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc } /* - * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs. + * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_UINT_Ts. * The array is generated starting with the smaller digits. * e.g. 12345678901234567890 => {34567890, 56789012, 1234} * @@ -100,11 +100,11 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(unsigned long)); + BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); - unsigned long *n1_l = buf; - unsigned long *n2_l = buf + n1_arr_size; - unsigned long *prod_l = n2_l + n2_arr_size; + BC_UINT_T *n1_l = buf; + BC_UINT_T *n2_l = buf + n1_arr_size; + BC_UINT_T *prod_l = n2_l + n2_arr_size; for (i = 0; i < prod_arr_size; i++) { prod_l[i] = 0; From ea57a9f3c8a94372065aa3d82cb06cdd727c415f Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 08:21:32 +0900 Subject: [PATCH 5/9] Fixed comments and constant names --- ext/bcmath/libbcmath/src/private.h | 8 ++++---- ext/bcmath/libbcmath/src/recmul.c | 32 +++++++++++++++--------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index fa538150ef73a..7e367441292ee 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,13 +84,13 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t -# define BC_LONGABLE_DIGITS 8 -# define BC_LONGABLE_OVERFLOW 100000000 +# define BC_MUL_UINT_DIGITS 8 +# define BC_MUL_UINT_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t -# define BC_LONGABLE_DIGITS 4 -# define BC_LONGABLE_OVERFLOW 10000 +# define BC_MUL_UINT_DIGITS 4 +# define BC_MUL_UINT_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index e457a7333e132..0d916747e0e37 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -40,10 +40,10 @@ /* Multiply utility routines */ /* - * Converts BCD to long, going backwards from pointer n by the number of + * Converts BCD to uint, going backwards from pointer n by the number of * characters specified by len. */ -static inline BC_UINT_T bc_partial_convert_to_long(const char *n, size_t len) +static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) { BC_UINT_T num = 0; BC_UINT_T base = 1; @@ -66,8 +66,8 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - BC_UINT_T n1_l = bc_partial_convert_to_long(n1end, n1len); - BC_UINT_T n2_l = bc_partial_convert_to_long(n2end, n2len); + BC_UINT_T n1_l = bc_partial_convert_to_uint(n1end, n1len); + BC_UINT_T n2_l = bc_partial_convert_to_uint(n2end, n2len); BC_UINT_T prod_l = n1_l * n2_l; size_t prodlen = n1len + n2len; @@ -96,8 +96,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu char *n2end = n2->n_value + n2len - 1; size_t prodlen = n1len + n2len; - size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0); - size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t n1_arr_size = n1len / BC_MUL_UINT_DIGITS + (n1len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t n2_arr_size = n2len / BC_MUL_UINT_DIGITS + (n2len % BC_MUL_UINT_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); @@ -110,21 +110,21 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu prod_l[i] = 0; } - /* Convert n1 to long[] */ + /* Convert n1 to uint[] */ i = 0; while (n1len > 0) { - size_t len = MIN(BC_LONGABLE_DIGITS, n1len); - n1_l[i] = bc_partial_convert_to_long(n1end, len); + size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); + n1_l[i] = bc_partial_convert_to_uint(n1end, len); n1end -= len; n1len -= len; i++; } - /* Convert n2 to long[] */ + /* Convert n2 to uint[] */ i = 0; while (n2len > 0) { - size_t len = MIN(BC_LONGABLE_DIGITS, n2len); - n2_l[i] = bc_partial_convert_to_long(n2end, len); + size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); + n2_l[i] = bc_partial_convert_to_uint(n2end, len); n2end -= len; n2len -= len; i++; @@ -142,8 +142,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu * However, the last digit does nothing. */ for (i = 0; i < prod_arr_size - 1; i++) { - prod_l[i + 1] += prod_l[i] / BC_LONGABLE_OVERFLOW; - prod_l[i] %= BC_LONGABLE_OVERFLOW; + prod_l[i + 1] += prod_l[i] / BC_MUL_UINT_OVERFLOW; + prod_l[i] %= BC_MUL_UINT_OVERFLOW; } /* Convert to bc_num */ @@ -152,7 +152,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu char *pend = pptr + prodlen - 1; i = 0; while (i < prod_arr_size - 1) { - for (size_t j = 0; j < BC_LONGABLE_DIGITS; j++) { + for (size_t j = 0; j < BC_MUL_UINT_DIGITS; j++) { *pend-- = prod_l[i] % BASE; prod_l[i] /= BASE; } @@ -186,7 +186,7 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS) { + if (len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS) { bc_fast_mul(n1, len1, n2, len2, &prod); } else { bc_standard_mul(n1, len1, n2, len2, &prod); From b23e0a392978fd37658a06cbc37feededc1e19fb Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 08:22:55 +0900 Subject: [PATCH 6/9] Moved the constant --- ext/bcmath/libbcmath/src/private.h | 4 ---- ext/bcmath/libbcmath/src/recmul.c | 9 +++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 7e367441292ee..1403baad0c257 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,13 +84,9 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t -# define BC_MUL_UINT_DIGITS 8 -# define BC_MUL_UINT_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t -# define BC_MUL_UINT_DIGITS 4 -# define BC_MUL_UINT_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 0d916747e0e37..247e2a238cc53 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -37,6 +37,15 @@ #include "zend_alloc.h" +#if SIZEOF_SIZE_T >= 8 +# define BC_MUL_UINT_DIGITS 8 +# define BC_MUL_UINT_OVERFLOW 100000000 +#else +# define BC_MUL_UINT_DIGITS 4 +# define BC_MUL_UINT_OVERFLOW 10000 +#endif + + /* Multiply utility routines */ /* From ec51d7664e8516828851b8a312436f87245b24d9 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 09:12:55 +0900 Subject: [PATCH 7/9] Fixed variable names and comments --- ext/bcmath/libbcmath/src/recmul.c | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 247e2a238cc53..9a832d754c459 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -75,9 +75,9 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - BC_UINT_T n1_l = bc_partial_convert_to_uint(n1end, n1len); - BC_UINT_T n2_l = bc_partial_convert_to_uint(n2end, n2len); - BC_UINT_T prod_l = n1_l * n2_l; + BC_UINT_T n1_uint = bc_partial_convert_to_uint(n1end, n1len); + BC_UINT_T n2_uint = bc_partial_convert_to_uint(n2end, n2len); + BC_UINT_T prod_uint = n1_uint * n2_uint; size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); @@ -85,8 +85,8 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *pend = pptr + prodlen - 1; while (pend >= pptr) { - *pend-- = prod_l % BASE; - prod_l /= BASE; + *pend-- = prod_uint % BASE; + prod_uint /= BASE; } } @@ -111,19 +111,19 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); - BC_UINT_T *n1_l = buf; - BC_UINT_T *n2_l = buf + n1_arr_size; - BC_UINT_T *prod_l = n2_l + n2_arr_size; + BC_UINT_T *n1_uint = buf; + BC_UINT_T *n2_uint = buf + n1_arr_size; + BC_UINT_T *prod_uint = n2_uint + n2_arr_size; for (i = 0; i < prod_arr_size; i++) { - prod_l[i] = 0; + prod_uint[i] = 0; } /* Convert n1 to uint[] */ i = 0; while (n1len > 0) { size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); - n1_l[i] = bc_partial_convert_to_uint(n1end, len); + n1_uint[i] = bc_partial_convert_to_uint(n1end, len); n1end -= len; n1len -= len; i++; @@ -133,7 +133,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu i = 0; while (n2len > 0) { size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); - n2_l[i] = bc_partial_convert_to_uint(n2end, len); + n2_uint[i] = bc_partial_convert_to_uint(n2end, len); n2end -= len; n2len -= len; i++; @@ -142,17 +142,17 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu /* Multiplication and addition */ for (i = 0; i < n1_arr_size; i++) { for (size_t j = 0; j < n2_arr_size; j++) { - prod_l[i + j] += n1_l[i] * n2_l[j]; + prod_uint[i + j] += n1_uint[i] * n2_uint[j]; } } /* - * Move a value exceeding 8 digits by carrying to the next digit. + * Move a value exceeding 4/8 digits by carrying to the next digit. * However, the last digit does nothing. */ for (i = 0; i < prod_arr_size - 1; i++) { - prod_l[i + 1] += prod_l[i] / BC_MUL_UINT_OVERFLOW; - prod_l[i] %= BC_MUL_UINT_OVERFLOW; + prod_uint[i + 1] += prod_uint[i] / BC_MUL_UINT_OVERFLOW; + prod_uint[i] %= BC_MUL_UINT_OVERFLOW; } /* Convert to bc_num */ @@ -162,8 +162,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu i = 0; while (i < prod_arr_size - 1) { for (size_t j = 0; j < BC_MUL_UINT_DIGITS; j++) { - *pend-- = prod_l[i] % BASE; - prod_l[i] /= BASE; + *pend-- = prod_uint[i] % BASE; + prod_uint[i] /= BASE; } i++; } @@ -173,8 +173,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu * Also need to fill it to the end with zeros, so loop until the end of the string. */ while (pend >= pptr) { - *pend-- = prod_l[i] % BASE; - prod_l[i] /= BASE; + *pend-- = prod_uint[i] % BASE; + prod_uint[i] /= BASE; } efree(buf); From 079f31c2772c8449d7d4168ba3b550f530e1f785 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 21:56:20 +0900 Subject: [PATCH 8/9] save loop cost --- ext/bcmath/libbcmath/src/recmul.c | 333 ++++++++++++++++++++++++++---- 1 file changed, 298 insertions(+), 35 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 9a832d754c459..b635a20fe7a81 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "private.h" /* For _bc_rm_leading_zeros() */ #include "zend_alloc.h" @@ -45,23 +46,292 @@ # define BC_MUL_UINT_OVERFLOW 10000 #endif +#define BC_UINT_TO_BCD_ONE_DIGIT(num, bcd) do { \ + bcd = num % 10; \ + num /= 10; \ +} while (0) + /* Multiply utility routines */ /* * Converts BCD to uint, going backwards from pointer n by the number of * characters specified by len. + * + * Since the upper limit of len is known, open the loop to save loop cost. */ static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) { - BC_UINT_T num = 0; - BC_UINT_T base = 1; + BC_UINT_T num = n[0]; + + switch (len) { + case 1: + return num; + case 2: + num += n[-1] * 10; + return num; + case 3: + num += n[-1] * 10; + num += n[-2] * 100; + return num; + case 4: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + return num; +#if BC_MUL_UINT_DIGITS == 8 + case 5: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + return num; + case 6: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + return num; + case 7: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + return num; + case 8: + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + num += n[-7] * 10000000; + return num; +#endif + } - for (size_t i = 0; i < len; i++) { - num += *n * base; - base *= BASE; - n--; + return num; +} + +/* + * Since the number of digits is fixed, open the loop and saves cost. + */ +static inline BC_UINT_T bc_partial_convert_to_uint_fixed_digits(const char *n) +{ + BC_UINT_T num = n[0]; + num += n[-1] * 10; + num += n[-2] * 100; + num += n[-3] * 1000; +#if BC_MUL_UINT_DIGITS == 8 + num += n[-4] * 10000; + num += n[-5] * 100000; + num += n[-6] * 1000000; + num += n[-7] * 10000000; +#endif + + return num; +} + +/* + * Convert HEX to BCD. The number of digits can be up to 16 decimal digits (64-bit). + */ +static inline void bc_uint_to_bcd(BC_UINT_T num, char *bcd, size_t len) +{ + switch (len) { + case 0: + return; + case 1: + *bcd = num % 10; + return; + case 2: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + bcd[-1] = num % 10; + return; + case 3: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + bcd[-2] = num % 10; + return; + case 4: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + bcd[-3] = num % 10; + return; + case 5: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + bcd[-4] = num % 10; + return; + case 6: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + bcd[-5] = num % 10; + return; + case 7: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + bcd[-6] = num % 10; + return; + case 8: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + bcd[-7] = num % 10; + return; +#if BC_MUL_UINT_DIGITS == 8 + case 9: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + bcd[-8] = num % 10; + return; + case 10: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + bcd[-9] = num % 10; + return; + case 11: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + bcd[-10] = num % 10; + return; + case 12: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + bcd[-11] = num % 10; + return; + case 13: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + bcd[-12] = num % 10; + return; + case 14: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + bcd[-13] = num % 10; + return; + case 15: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-13]); + bcd[-14] = num % 10; + return; + case 16: + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-8]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-9]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-10]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-11]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-12]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-13]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-14]); + bcd[-15] = num % 10; + return; +#endif } +} + +/* + * Converts 8 decimal digits of HEX to 8 digits of BCD (64-bit). + * Since the number of digits is fixed, open the loop and saves cost. + */ +static inline BC_UINT_T bc_uint_to_bcd_fixed_digits(BC_UINT_T num, char *bcd) +{ + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[0]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-1]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-2]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-3]); +#if BC_MUL_UINT_DIGITS == 8 + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-4]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-5]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-6]); + BC_UINT_TO_BCD_ONE_DIGIT(num, bcd[-7]); +#endif return num; } @@ -81,13 +351,9 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); - char *pptr = (*prod)->n_value; - char *pend = pptr + prodlen - 1; + char *pend = (*prod)->n_value + prodlen - 1; - while (pend >= pptr) { - *pend-- = prod_uint % BASE; - prod_uint /= BASE; - } + bc_uint_to_bcd(prod_uint, pend, prodlen); } /* @@ -121,23 +387,27 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu /* Convert n1 to uint[] */ i = 0; - while (n1len > 0) { - size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); - n1_uint[i] = bc_partial_convert_to_uint(n1end, len); - n1end -= len; - n1len -= len; + while (n1len >= BC_MUL_UINT_DIGITS) { + n1_uint[i] = bc_partial_convert_to_uint_fixed_digits(n1end); + n1end -= BC_MUL_UINT_DIGITS; + n1len -= BC_MUL_UINT_DIGITS; i++; } + if (n1len > 0) { + n1_uint[i] = bc_partial_convert_to_uint(n1end, n1len); + } /* Convert n2 to uint[] */ i = 0; - while (n2len > 0) { - size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); - n2_uint[i] = bc_partial_convert_to_uint(n2end, len); - n2end -= len; - n2len -= len; + while (n2len >= BC_MUL_UINT_DIGITS) { + n2_uint[i] = bc_partial_convert_to_uint_fixed_digits(n2end); + n2end -= BC_MUL_UINT_DIGITS; + n2len -= BC_MUL_UINT_DIGITS; i++; } + if (n2len > 0) { + n2_uint[i] = bc_partial_convert_to_uint(n2end, n2len); + } /* Multiplication and addition */ for (i = 0; i < n1_arr_size; i++) { @@ -157,25 +427,18 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu /* Convert to bc_num */ *prod = bc_new_num_nonzeroed(prodlen, 0); - char *pptr = (*prod)->n_value; - char *pend = pptr + prodlen - 1; - i = 0; - while (i < prod_arr_size - 1) { - for (size_t j = 0; j < BC_MUL_UINT_DIGITS; j++) { - *pend-- = prod_uint[i] % BASE; - prod_uint[i] /= BASE; - } - i++; + char *pend = (*prod)->n_value + prodlen - 1; + for (i = 0; i < prod_arr_size - 1; i++) { + prod_uint[i] = bc_uint_to_bcd_fixed_digits(prod_uint[i], pend); + pend -= BC_MUL_UINT_DIGITS; + prodlen -= BC_MUL_UINT_DIGITS; } /* * The last digit may carry over. * Also need to fill it to the end with zeros, so loop until the end of the string. */ - while (pend >= pptr) { - *pend-- = prod_uint[i] % BASE; - prod_uint[i] /= BASE; - } + bc_uint_to_bcd(prod_uint[i], pend, prodlen); efree(buf); } From c2ffd4aab7316d8156f3302cce490ceda128ab04 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 21:56:30 +0900 Subject: [PATCH 9/9] use EXPECTED --- ext/bcmath/libbcmath/src/recmul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index b635a20fe7a81..a30325503262c 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -458,7 +458,7 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - if (len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS) { + if (EXPECTED(len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS)) { bc_fast_mul(n1, len1, n2, len2, &prod); } else { bc_standard_mul(n1, len1, n2, len2, &prod);