-
-
Notifications
You must be signed in to change notification settings - Fork 10
Description
Summary
When taking a large number of samples from Binomial::new(n, p)
where n
is a power of two ≥ 2^54, and p
is 0.5, the samples appear rounded to multiples of powers of 2.
Expected behavior: about half of the sample values should be odd, and more generally the distribution of values mod k for any k ≪ sqrt(n) should be roughly uniform.
The distribution may also be quantized for other values of p
and n
, but I have not tested this. n<2^53
is probably safe since (as noted below) this issue may result from floating point quantization. I have also not tested other distributions.
Code sample
fn test_binomial_rounding() {
for n_bits in 30..62 {
let n = 1u64 << n_bits;
let p = 0.5;
let bin = Binomial::new(n, p).unwrap();
let mut rng = ChaCha12Rng::seed_from_u64(0);
let nsamples = 1u64 << 20;
let mut tzcnt = vec![0u64; 65];
for _ in 0..nsamples {
let v = bin.sample(&mut rng);
tzcnt[v.trailing_zeros() as usize] += 1;
}
println!("n=2^{}, p={}, tzcnt dist: {:?}", n_bits, p, tzcnt);
}
}
Running this sample, I get the following output:
n=2^30, p=0.5, tzcnt dist: [525016, 262161, 130650, 65601, 32548, 16203, 8225, 4092, 2087, 958, 520, 277, 121, 55, 27, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^31, p=0.5, tzcnt dist: [523470, 263139, 130636, 65616, 32942, 16555, 8101, 4035, 2076, 988, 480, 268, 131, 63, 44, 13, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^32, p=0.5, tzcnt dist: [525218, 262496, 130065, 65533, 32459, 16494, 8123, 4112, 2030, 1054, 491, 244, 125, 61, 38, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^33, p=0.5, tzcnt dist: [524664, 261701, 131444, 65115, 32776, 16458, 8230, 4005, 2083, 1028, 529, 275, 129, 82, 23, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^34, p=0.5, tzcnt dist: [524492, 262392, 130859, 65466, 32743, 16315, 8188, 4077, 2037, 1009, 512, 233, 118, 69, 27, 22, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^35, p=0.5, tzcnt dist: [523512, 262697, 130561, 66020, 32635, 16626, 8333, 4159, 2008, 996, 510, 244, 138, 69, 38, 12, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^36, p=0.5, tzcnt dist: [524355, 262344, 131434, 65300, 32467, 16376, 8197, 4117, 2022, 987, 497, 239, 109, 60, 34, 18, 11, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^37, p=0.5, tzcnt dist: [524592, 262290, 130847, 65194, 32896, 16373, 8177, 4151, 2052, 1016, 492, 245, 109, 62, 40, 22, 10, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^38, p=0.5, tzcnt dist: [525177, 261487, 131236, 65407, 32767, 16113, 8183, 4052, 2073, 1071, 499, 253, 152, 46, 24, 20, 8, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^39, p=0.5, tzcnt dist: [524355, 261949, 131233, 65501, 32624, 16450, 8295, 4124, 2017, 1044, 510, 239, 115, 58, 23, 19, 11, 5, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^40, p=0.5, tzcnt dist: [523643, 262597, 131269, 65549, 32815, 16423, 8096, 4158, 2012, 1007, 498, 258, 123, 64, 32, 10, 13, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^41, p=0.5, tzcnt dist: [523999, 262491, 130854, 65615, 32655, 16450, 8378, 4049, 2053, 1041, 503, 241, 132, 63, 31, 9, 6, 3, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^42, p=0.5, tzcnt dist: [523496, 262461, 131616, 65488, 32560, 16618, 8159, 4098, 2044, 1013, 492, 243, 151, 69, 27, 19, 9, 6, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^43, p=0.5, tzcnt dist: [524973, 261898, 131086, 65273, 32672, 16200, 8251, 4135, 2014, 1048, 486, 252, 147, 66, 32, 23, 12, 2, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^44, p=0.5, tzcnt dist: [524359, 262049, 131095, 65556, 32738, 16301, 8292, 4103, 2037, 1048, 506, 234, 122, 71, 34, 11, 5, 8, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^45, p=0.5, tzcnt dist: [524949, 261943, 131127, 65457, 32535, 16322, 8066, 4061, 2059, 1000, 549, 240, 131, 72, 27, 12, 12, 7, 2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^46, p=0.5, tzcnt dist: [524620, 262508, 130855, 65527, 32439, 16308, 8196, 4035, 2022, 1039, 512, 267, 115, 66, 37, 14, 5, 5, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^47, p=0.5, tzcnt dist: [523160, 262718, 131653, 65676, 32636, 16267, 8336, 4039, 2020, 1042, 516, 260, 120, 63, 36, 13, 6, 8, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^48, p=0.5, tzcnt dist: [524515, 262220, 131102, 65360, 32656, 16320, 8197, 4048, 2039, 1055, 521, 268, 133, 75, 30, 17, 13, 5, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^49, p=0.5, tzcnt dist: [523577, 262379, 130861, 66024, 32865, 16443, 8213, 4145, 2022, 1032, 494, 249, 148, 57, 30, 17, 6, 6, 4, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^50, p=0.5, tzcnt dist: [524659, 262212, 131130, 65308, 32494, 16423, 8153, 4103, 2038, 1054, 507, 249, 134, 54, 29, 12, 5, 8, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^51, p=0.5, tzcnt dist: [524720, 261945, 130437, 65553, 32872, 16525, 8287, 4167, 2028, 1012, 509, 266, 127, 60, 33, 15, 9, 2, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^52, p=0.5, tzcnt dist: [524498, 261377, 131525, 65845, 32440, 16364, 8202, 4213, 2043, 1062, 523, 239, 125, 59, 31, 15, 10, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^53, p=0.5, tzcnt dist: [524090, 262050, 131099, 65734, 32785, 16386, 8122, 4092, 2097, 1021, 555, 277, 149, 53, 34, 12, 10, 3, 2, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^54, p=0.5, tzcnt dist: [262070, 393606, 196324, 98075, 49382, 24591, 12198, 6071, 3151, 1547, 800, 379, 190, 96, 57, 14, 9, 8, 2, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^55, p=0.5, tzcnt dist: [0, 262167, 392940, 196928, 98379, 49045, 24554, 12150, 6134, 3133, 1541, 785, 413, 207, 95, 49, 21, 15, 7, 4, 4, 3, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^56, p=0.5, tzcnt dist: [0, 0, 262121, 392592, 196924, 98526, 49166, 24493, 12377, 6231, 3052, 1550, 773, 382, 182, 115, 43, 29, 10, 6, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^57, p=0.5, tzcnt dist: [0, 0, 0, 262174, 393487, 196484, 98134, 48943, 24591, 12328, 6256, 3043, 1579, 806, 385, 170, 92, 42, 24, 15, 9, 5, 7, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^58, p=0.5, tzcnt dist: [0, 0, 0, 0, 262107, 392540, 197262, 98151, 49108, 24752, 12277, 6138, 3077, 1569, 808, 382, 212, 90, 57, 21, 14, 6, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^59, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 262459, 392898, 196042, 98569, 49134, 24707, 12352, 6162, 3163, 1570, 765, 376, 190, 87, 46, 21, 15, 8, 8, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^60, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 0, 261472, 393552, 196811, 98258, 49260, 24697, 12215, 6071, 3137, 1552, 761, 401, 181, 101, 57, 28, 15, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^61, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 0, 0, 262367, 392711, 196764, 98297, 49140, 24674, 12415, 6158, 3053, 1539, 733, 351, 190, 86, 45, 26, 10, 11, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Investigation
The BTPE sampling method's output derives from floating point values (which have a ~52+1 bit significand) and (while I have not looked deeply at the algorithm itself) does not appear to have any mechanism to increase precision from this:
Lines 247 to 254 in 2e2dc45
let gen_u = Uniform::new(0., p4).unwrap(); | |
let gen_v = Uniform::new(0., 1.).unwrap(); | |
loop { | |
// Step 1: Generate `u` for selecting the region. If region 1 is | |
// selected, generate a triangularly distributed variate. | |
let u = gen_u.sample(rng); | |
let mut v = gen_v.sample(rng); |
I don't think the current distribution tests (https://github.com/rust-random/rand_distr/blob/2e2dc4583a70466bf86419d14bba649cf7a9051b/distr_test/tests/ks/mod.rs) can efficiently catch quantization issues for large n
without making a huge number of samples, but testing that transformed distribution mod k
for various small values of k
is correct should be doable (although approximately calculating the ideal distribution may be tedious).