Skip to content

Binomial distribution sampling quantized by floating point rounding #22

@mstoeckl

Description

@mstoeckl

Summary

When taking a large number of samples from Binomial::new(n, p) where n is a power of two ≥ 2^54, and p is 0.5, the samples appear rounded to multiples of powers of 2.

Expected behavior: about half of the sample values should be odd, and more generally the distribution of values mod k for any k ≪ sqrt(n) should be roughly uniform.

The distribution may also be quantized for other values of p and n, but I have not tested this. n<2^53 is probably safe since (as noted below) this issue may result from floating point quantization. I have also not tested other distributions.

Code sample

fn test_binomial_rounding() {
    for n_bits in 30..62 {
        let n = 1u64 << n_bits;
        let p = 0.5;
        let bin = Binomial::new(n, p).unwrap();
        let mut rng = ChaCha12Rng::seed_from_u64(0);
        let nsamples = 1u64 << 20;
        let mut tzcnt = vec![0u64; 65];
        for _ in 0..nsamples {
            let v = bin.sample(&mut rng);
            tzcnt[v.trailing_zeros() as usize] += 1;
        }
        println!("n=2^{}, p={}, tzcnt dist: {:?}", n_bits, p, tzcnt);
    }
}

Running this sample, I get the following output:

n=2^30, p=0.5, tzcnt dist: [525016, 262161, 130650, 65601, 32548, 16203, 8225, 4092, 2087, 958, 520, 277, 121, 55, 27, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^31, p=0.5, tzcnt dist: [523470, 263139, 130636, 65616, 32942, 16555, 8101, 4035, 2076, 988, 480, 268, 131, 63, 44, 13, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^32, p=0.5, tzcnt dist: [525218, 262496, 130065, 65533, 32459, 16494, 8123, 4112, 2030, 1054, 491, 244, 125, 61, 38, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^33, p=0.5, tzcnt dist: [524664, 261701, 131444, 65115, 32776, 16458, 8230, 4005, 2083, 1028, 529, 275, 129, 82, 23, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^34, p=0.5, tzcnt dist: [524492, 262392, 130859, 65466, 32743, 16315, 8188, 4077, 2037, 1009, 512, 233, 118, 69, 27, 22, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^35, p=0.5, tzcnt dist: [523512, 262697, 130561, 66020, 32635, 16626, 8333, 4159, 2008, 996, 510, 244, 138, 69, 38, 12, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^36, p=0.5, tzcnt dist: [524355, 262344, 131434, 65300, 32467, 16376, 8197, 4117, 2022, 987, 497, 239, 109, 60, 34, 18, 11, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^37, p=0.5, tzcnt dist: [524592, 262290, 130847, 65194, 32896, 16373, 8177, 4151, 2052, 1016, 492, 245, 109, 62, 40, 22, 10, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^38, p=0.5, tzcnt dist: [525177, 261487, 131236, 65407, 32767, 16113, 8183, 4052, 2073, 1071, 499, 253, 152, 46, 24, 20, 8, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^39, p=0.5, tzcnt dist: [524355, 261949, 131233, 65501, 32624, 16450, 8295, 4124, 2017, 1044, 510, 239, 115, 58, 23, 19, 11, 5, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^40, p=0.5, tzcnt dist: [523643, 262597, 131269, 65549, 32815, 16423, 8096, 4158, 2012, 1007, 498, 258, 123, 64, 32, 10, 13, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^41, p=0.5, tzcnt dist: [523999, 262491, 130854, 65615, 32655, 16450, 8378, 4049, 2053, 1041, 503, 241, 132, 63, 31, 9, 6, 3, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^42, p=0.5, tzcnt dist: [523496, 262461, 131616, 65488, 32560, 16618, 8159, 4098, 2044, 1013, 492, 243, 151, 69, 27, 19, 9, 6, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^43, p=0.5, tzcnt dist: [524973, 261898, 131086, 65273, 32672, 16200, 8251, 4135, 2014, 1048, 486, 252, 147, 66, 32, 23, 12, 2, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^44, p=0.5, tzcnt dist: [524359, 262049, 131095, 65556, 32738, 16301, 8292, 4103, 2037, 1048, 506, 234, 122, 71, 34, 11, 5, 8, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^45, p=0.5, tzcnt dist: [524949, 261943, 131127, 65457, 32535, 16322, 8066, 4061, 2059, 1000, 549, 240, 131, 72, 27, 12, 12, 7, 2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^46, p=0.5, tzcnt dist: [524620, 262508, 130855, 65527, 32439, 16308, 8196, 4035, 2022, 1039, 512, 267, 115, 66, 37, 14, 5, 5, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^47, p=0.5, tzcnt dist: [523160, 262718, 131653, 65676, 32636, 16267, 8336, 4039, 2020, 1042, 516, 260, 120, 63, 36, 13, 6, 8, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^48, p=0.5, tzcnt dist: [524515, 262220, 131102, 65360, 32656, 16320, 8197, 4048, 2039, 1055, 521, 268, 133, 75, 30, 17, 13, 5, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^49, p=0.5, tzcnt dist: [523577, 262379, 130861, 66024, 32865, 16443, 8213, 4145, 2022, 1032, 494, 249, 148, 57, 30, 17, 6, 6, 4, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^50, p=0.5, tzcnt dist: [524659, 262212, 131130, 65308, 32494, 16423, 8153, 4103, 2038, 1054, 507, 249, 134, 54, 29, 12, 5, 8, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^51, p=0.5, tzcnt dist: [524720, 261945, 130437, 65553, 32872, 16525, 8287, 4167, 2028, 1012, 509, 266, 127, 60, 33, 15, 9, 2, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^52, p=0.5, tzcnt dist: [524498, 261377, 131525, 65845, 32440, 16364, 8202, 4213, 2043, 1062, 523, 239, 125, 59, 31, 15, 10, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^53, p=0.5, tzcnt dist: [524090, 262050, 131099, 65734, 32785, 16386, 8122, 4092, 2097, 1021, 555, 277, 149, 53, 34, 12, 10, 3, 2, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^54, p=0.5, tzcnt dist: [262070, 393606, 196324, 98075, 49382, 24591, 12198, 6071, 3151, 1547, 800, 379, 190, 96, 57, 14, 9, 8, 2, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^55, p=0.5, tzcnt dist: [0, 262167, 392940, 196928, 98379, 49045, 24554, 12150, 6134, 3133, 1541, 785, 413, 207, 95, 49, 21, 15, 7, 4, 4, 3, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^56, p=0.5, tzcnt dist: [0, 0, 262121, 392592, 196924, 98526, 49166, 24493, 12377, 6231, 3052, 1550, 773, 382, 182, 115, 43, 29, 10, 6, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^57, p=0.5, tzcnt dist: [0, 0, 0, 262174, 393487, 196484, 98134, 48943, 24591, 12328, 6256, 3043, 1579, 806, 385, 170, 92, 42, 24, 15, 9, 5, 7, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^58, p=0.5, tzcnt dist: [0, 0, 0, 0, 262107, 392540, 197262, 98151, 49108, 24752, 12277, 6138, 3077, 1569, 808, 382, 212, 90, 57, 21, 14, 6, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^59, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 262459, 392898, 196042, 98569, 49134, 24707, 12352, 6162, 3163, 1570, 765, 376, 190, 87, 46, 21, 15, 8, 8, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^60, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 0, 261472, 393552, 196811, 98258, 49260, 24697, 12215, 6071, 3137, 1552, 761, 401, 181, 101, 57, 28, 15, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
n=2^61, p=0.5, tzcnt dist: [0, 0, 0, 0, 0, 0, 0, 262367, 392711, 196764, 98297, 49140, 24674, 12415, 6158, 3053, 1539, 733, 351, 190, 86, 45, 26, 10, 11, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Investigation

The BTPE sampling method's output derives from floating point values (which have a ~52+1 bit significand) and (while I have not looked deeply at the algorithm itself) does not appear to have any mechanism to increase precision from this:

rand_distr/src/binomial.rs

Lines 247 to 254 in 2e2dc45

let gen_u = Uniform::new(0., p4).unwrap();
let gen_v = Uniform::new(0., 1.).unwrap();
loop {
// Step 1: Generate `u` for selecting the region. If region 1 is
// selected, generate a triangularly distributed variate.
let u = gen_u.sample(rng);
let mut v = gen_v.sample(rng);

I don't think the current distribution tests (https://github.com/rust-random/rand_distr/blob/2e2dc4583a70466bf86419d14bba649cf7a9051b/distr_test/tests/ks/mod.rs) can efficiently catch quantization issues for large n without making a huge number of samples, but testing that transformed distribution mod k for various small values of k is correct should be doable (although approximately calculating the ideal distribution may be tedious).

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions