@@ -11,25 +11,27 @@ use std::arch::x86_64::*;
1111use std:: collections:: HashSet ;
1212
1313fn main ( ) {
14+ let a = unsafe { _mm_setr_ps ( 4.0 , 4.0 , 4.0 , 4.0 ) } ;
15+ let exact = 0.5 ;
16+ // max error: 2^-12.
17+ let rel_error_bound = 1.0 / ( 1 << 12 ) as f32 ;
18+
1419 let mut vals = HashSet :: new ( ) ;
1520 for _ in 0 ..50 {
16- unsafe {
17- // Compute the inverse square root of 4.0, four times.
18- let a = _mm_setr_ps ( 4.0 , 4.0 , 4.0 , 4.0 ) ;
19- let exact = 0.5 ;
20- let r = _mm_rsqrt_ps ( a) ;
21- let r: [ f32 ; 4 ] = std:: mem:: transmute ( r) ;
22- // Check the results.
23- for r in r {
24- vals. insert ( r. to_bits ( ) ) ;
25- // Ensure the relative error is less than 2^-12.
26- let rel_error = ( r - exact) / exact;
27- let log_error = rel_error. abs ( ) . log2 ( ) ;
28- assert ! (
29- rel_error == 0.0 || log_error < -12.0 ,
30- "got an error of {rel_error} = 2^{log_error}"
31- ) ;
32- }
21+ // Compute the inverse square root of 4.0, four times.
22+ let r = unsafe { _mm_rsqrt_ps ( a) } ;
23+ let r: [ f32 ; 4 ] = unsafe { std:: mem:: transmute ( r) } ;
24+ // Check the results.
25+ for r in r {
26+ vals. insert ( r. to_bits ( ) ) ;
27+ // Ensure the relative error is no more than 2^-12.
28+ let rel_error = ( r - exact) / exact;
29+ assert ! (
30+ rel_error. abs( ) <= rel_error_bound,
31+ "correct result: {exact}, got: {r}\n \
32+ that's a relative error of {rel_error} (= 2^{log_error})",
33+ log_error = rel_error. abs( ) . log2( )
34+ ) ;
3335 }
3436 }
3537 // Ensure we saw a bunch of different results.
0 commit comments