@@ -92,6 +92,16 @@ get_math_module_state(PyObject *module)
92
92
return (math_module_state *)state;
93
93
}
94
94
95
+ /*
96
+ sin(pi*x), giving accurate results for all finite x (especially x
97
+ integral or close to an integer). This is here for use in the
98
+ reflection formula for the gamma function. It conforms to IEEE
99
+ 754-2008 for finite arguments, but not for infinities or nans.
100
+ */
101
+
102
+ static const double pi = 3.141592653589793238462643383279502884197;
103
+ static const double logpi = 1.144729885849400174143427351353058711647;
104
+
95
105
/* Version of PyFloat_AsDouble() with in-line fast paths
96
106
for exact floats and integers. Gives a substantial
97
107
speed improvement for extracting float arguments.
@@ -114,6 +124,162 @@ get_math_module_state(PyObject *module)
114
124
} \
115
125
}
116
126
127
+ static double
128
+ m_sinpi(double x)
129
+ {
130
+ double y, r;
131
+ int n;
132
+ /* this function should only ever be called for finite arguments */
133
+ assert(Py_IS_FINITE(x));
134
+ y = fmod(fabs(x), 2.0);
135
+ n = (int)round(2.0*y);
136
+ assert(0 <= n && n <= 4);
137
+ switch (n) {
138
+ case 0:
139
+ r = sin(pi*y);
140
+ break;
141
+ case 1:
142
+ r = cos(pi*(y-0.5));
143
+ break;
144
+ case 2:
145
+ /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
146
+ -0.0 instead of 0.0 when y == 1.0. */
147
+ r = sin(pi*(1.0-y));
148
+ break;
149
+ case 3:
150
+ r = -cos(pi*(y-1.5));
151
+ break;
152
+ case 4:
153
+ r = sin(pi*(y-2.0));
154
+ break;
155
+ default:
156
+ Py_UNREACHABLE();
157
+ }
158
+ return copysign(1.0, x)*r;
159
+ }
160
+
161
+ /* Implementation of the real gamma function. In extensive but non-exhaustive
162
+ random tests, this function proved accurate to within <= 10 ulps across the
163
+ entire float domain. Note that accuracy may depend on the quality of the
164
+ system math functions, the pow function in particular. Special cases
165
+ follow C99 annex F. The parameters and method are tailored to platforms
166
+ whose double format is the IEEE 754 binary64 format.
167
+
168
+ Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
169
+ and g=6.024680040776729583740234375; these parameters are amongst those
170
+ used by the Boost library. Following Boost (again), we re-express the
171
+ Lanczos sum as a rational function, and compute it that way. The
172
+ coefficients below were computed independently using MPFR, and have been
173
+ double-checked against the coefficients in the Boost source code.
174
+
175
+ For x < 0.0 we use the reflection formula.
176
+
177
+ There's one minor tweak that deserves explanation: Lanczos' formula for
178
+ Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
179
+ values, x+g-0.5 can be represented exactly. However, in cases where it
180
+ can't be represented exactly the small error in x+g-0.5 can be magnified
181
+ significantly by the pow and exp calls, especially for large x. A cheap
182
+ correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
183
+ involved in the computation of x+g-0.5 (that is, e = computed value of
184
+ x+g-0.5 - exact value of x+g-0.5). Here's the proof:
185
+
186
+ Correction factor
187
+ -----------------
188
+ Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
189
+ double, and e is tiny. Then:
190
+
191
+ pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
192
+ = pow(y, x-0.5)/exp(y) * C,
193
+
194
+ where the correction_factor C is given by
195
+
196
+ C = pow(1-e/y, x-0.5) * exp(e)
197
+
198
+ Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
199
+
200
+ C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
201
+
202
+ But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
203
+
204
+ pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
205
+
206
+ Note that for accuracy, when computing r*C it's better to do
207
+
208
+ r + e*g/y*r;
209
+
210
+ than
211
+
212
+ r * (1 + e*g/y);
213
+
214
+ since the addition in the latter throws away most of the bits of
215
+ information in e*g/y.
216
+ */
217
+
218
+ #define LANCZOS_N 13
219
+ static const double lanczos_g = 6.024680040776729583740234375;
220
+ static const double lanczos_g_minus_half = 5.524680040776729583740234375;
221
+ static const double lanczos_num_coeffs[LANCZOS_N] = {
222
+ 23531376880.410759688572007674451636754734846804940,
223
+ 42919803642.649098768957899047001988850926355848959,
224
+ 35711959237.355668049440185451547166705960488635843,
225
+ 17921034426.037209699919755754458931112671403265390,
226
+ 6039542586.3520280050642916443072979210699388420708,
227
+ 1439720407.3117216736632230727949123939715485786772,
228
+ 248874557.86205415651146038641322942321632125127801,
229
+ 31426415.585400194380614231628318205362874684987640,
230
+ 2876370.6289353724412254090516208496135991145378768,
231
+ 186056.26539522349504029498971604569928220784236328,
232
+ 8071.6720023658162106380029022722506138218516325024,
233
+ 210.82427775157934587250973392071336271166969580291,
234
+ 2.5066282746310002701649081771338373386264310793408
235
+ };
236
+
237
+ /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
238
+ static const double lanczos_den_coeffs[LANCZOS_N] = {
239
+ 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
240
+ 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
241
+
242
+ /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
243
+ #define NGAMMA_INTEGRAL 23
244
+ static const double gamma_integral[NGAMMA_INTEGRAL] = {
245
+ 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
246
+ 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
247
+ 1307674368000.0, 20922789888000.0, 355687428096000.0,
248
+ 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
249
+ 51090942171709440000.0, 1124000727777607680000.0,
250
+ };
251
+
252
+ /* Lanczos' sum L_g(x), for positive x */
253
+
254
+ static double
255
+ lanczos_sum(double x)
256
+ {
257
+ double num = 0.0, den = 0.0;
258
+ int i;
259
+ assert(x > 0.0);
260
+ /* evaluate the rational function lanczos_sum(x). For large
261
+ x, the obvious algorithm risks overflow, so we instead
262
+ rescale the denominator and numerator of the rational
263
+ function by x**(1-LANCZOS_N) and treat this as a
264
+ rational function in 1/x. This also reduces the error for
265
+ larger x values. The choice of cutoff point (5.0 below) is
266
+ somewhat arbitrary; in tests, smaller cutoff values than
267
+ this resulted in lower accuracy. */
268
+ if (x < 5.0) {
269
+ for (i = LANCZOS_N; --i >= 0; ) {
270
+ num = num * x + lanczos_num_coeffs[i];
271
+ den = den * x + lanczos_den_coeffs[i];
272
+ }
273
+ }
274
+ else {
275
+ for (i = 0; i < LANCZOS_N; i++) {
276
+ num = num / x + lanczos_num_coeffs[i];
277
+ den = den / x + lanczos_den_coeffs[i];
278
+ }
279
+ }
280
+ return num/den;
281
+ }
282
+
117
283
/* Constant for +infinity, generated in the same way as float('inf'). */
118
284
119
285
static double
@@ -143,46 +309,113 @@ m_nan(void)
143
309
144
310
#endif
145
311
146
- /*
147
- gamma: the real gamma function.
148
- */
149
-
150
312
static double
151
- m_gamma (double x)
313
+ m_tgamma (double x)
152
314
{
315
+ double absx, r, y, z, sqrtpow;
316
+
153
317
/* special cases */
154
318
if (!Py_IS_FINITE(x)) {
155
319
if (Py_IS_NAN(x) || x > 0.0)
156
- return x; /* gamma (nan) = nan, gamma (inf) = inf */
320
+ return x; /* tgamma (nan) = nan, tgamma (inf) = inf */
157
321
else {
158
322
errno = EDOM;
159
- return Py_NAN; /* gamma (-inf) = nan, invalid */
323
+ return Py_NAN; /* tgamma (-inf) = nan, invalid */
160
324
}
161
325
}
162
326
if (x == 0.0) {
163
327
errno = EDOM;
164
- /* gamma (+-0.0) = +-inf, divide-by-zero */
328
+ /* tgamma (+-0.0) = +-inf, divide-by-zero */
165
329
return copysign(Py_HUGE_VAL, x);
166
330
}
167
331
168
332
/* integer arguments */
169
333
if (x == floor(x)) {
170
334
if (x < 0.0) {
171
- errno = EDOM; /* gamma (n) = nan, invalid for */
335
+ errno = EDOM; /* tgamma (n) = nan, invalid for */
172
336
return Py_NAN; /* negative integers n */
173
337
}
338
+ if (x <= NGAMMA_INTEGRAL)
339
+ return gamma_integral[(int)x - 1];
340
+ }
341
+ absx = fabs(x);
342
+
343
+ /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
344
+ if (absx < 1e-20) {
345
+ r = 1.0/x;
346
+ if (Py_IS_INFINITY(r))
347
+ errno = ERANGE;
348
+ return r;
349
+ }
350
+
351
+ /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
352
+ x > 200, and underflows to +-0.0 for x < -200, not a negative
353
+ integer. */
354
+ if (absx > 200.0) {
355
+ if (x < 0.0) {
356
+ return 0.0/m_sinpi(x);
357
+ }
358
+ else {
359
+ errno = ERANGE;
360
+ return Py_HUGE_VAL;
361
+ }
174
362
}
175
363
176
- return tgamma(x);
364
+ y = absx + lanczos_g_minus_half;
365
+ /* compute error in sum */
366
+ if (absx > lanczos_g_minus_half) {
367
+ /* note: the correction can be foiled by an optimizing
368
+ compiler that (incorrectly) thinks that an expression like
369
+ a + b - a - b can be optimized to 0.0. This shouldn't
370
+ happen in a standards-conforming compiler. */
371
+ double q = y - absx;
372
+ z = q - lanczos_g_minus_half;
373
+ }
374
+ else {
375
+ double q = y - lanczos_g_minus_half;
376
+ z = q - absx;
377
+ }
378
+ z = z * lanczos_g / y;
379
+ if (x < 0.0) {
380
+ r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
381
+ r -= z * r;
382
+ if (absx < 140.0) {
383
+ r /= pow(y, absx - 0.5);
384
+ }
385
+ else {
386
+ sqrtpow = pow(y, absx / 2.0 - 0.25);
387
+ r /= sqrtpow;
388
+ r /= sqrtpow;
389
+ }
390
+ }
391
+ else {
392
+ r = lanczos_sum(absx) / exp(y);
393
+ r += z * r;
394
+ if (absx < 140.0) {
395
+ r *= pow(y, absx - 0.5);
396
+ }
397
+ else {
398
+ sqrtpow = pow(y, absx / 2.0 - 0.25);
399
+ r *= sqrtpow;
400
+ r *= sqrtpow;
401
+ }
402
+ }
403
+ if (Py_IS_INFINITY(r))
404
+ errno = ERANGE;
405
+ return r;
177
406
}
178
407
179
408
/*
180
409
lgamma: natural log of the absolute value of the Gamma function.
410
+ For large arguments, Lanczos' formula works extremely well here.
181
411
*/
182
412
183
413
static double
184
414
m_lgamma(double x)
185
415
{
416
+ double r;
417
+ double absx;
418
+
186
419
/* special cases */
187
420
if (!Py_IS_FINITE(x)) {
188
421
if (Py_IS_NAN(x))
@@ -197,9 +430,28 @@ m_lgamma(double x)
197
430
errno = EDOM; /* lgamma(n) = inf, divide-by-zero for */
198
431
return Py_HUGE_VAL; /* integers n <= 0 */
199
432
}
433
+ else {
434
+ return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
435
+ }
200
436
}
201
437
202
- return lgamma(x);
438
+ absx = fabs(x);
439
+ /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
440
+ if (absx < 1e-20)
441
+ return -log(absx);
442
+
443
+ /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
444
+ having a second set of numerator coefficients for lanczos_sum that
445
+ absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
446
+ subtraction below; it's probably not worth it. */
447
+ r = log(lanczos_sum(absx)) - lanczos_g;
448
+ r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
449
+ if (x < 0.0)
450
+ /* Use reflection formula to get value for negative x. */
451
+ r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
452
+ if (Py_IS_INFINITY(r))
453
+ errno = ERANGE;
454
+ return r;
203
455
}
204
456
205
457
/*
@@ -907,7 +1159,7 @@ math_floor(PyObject *module, PyObject *number)
907
1159
return PyLong_FromDouble(floor(x));
908
1160
}
909
1161
910
- FUNC1A(gamma, m_gamma ,
1162
+ FUNC1A(gamma, m_tgamma ,
911
1163
"gamma($module, x, /)\n--\n\n"
912
1164
"Gamma function at x.")
913
1165
FUNC1A(lgamma, m_lgamma,
0 commit comments