Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit ab778fb

Browse files
committed
fully generic
1 parent a668d27 commit ab778fb

File tree

3 files changed

+43
-26
lines changed

3 files changed

+43
-26
lines changed

src/math/generic/fma.rs

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@ use super::super::{CastFrom, Float, Int, MinInt};
55

66
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
77

8-
type F = f64;
9-
108
/// Fused multiply-add.
119
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
12-
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
10+
pub fn fma<F>(x: F, y: F, z: F) -> F
11+
where
12+
F: Float + Helper,
13+
F: CastFrom<F::SignedInt>,
14+
F: CastFrom<i8>,
15+
F::Int: HInt,
16+
{
1317
// let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
1418
let one = IntTy::<F>::ONE;
1519
let zero = IntTy::<F>::ZERO;
@@ -32,8 +36,8 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
3236
}
3337

3438
/* mul: r = x*y */
35-
let zhi: u64;
36-
let zlo: u64;
39+
let zhi: F::Int;
40+
let zlo: F::Int;
3741
let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
3842

3943
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
@@ -55,8 +59,9 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
5559
d -= sbits;
5660
if d == 0 {
5761
} else if d < sbits {
58-
rlo =
59-
(rhi << (sbits - d)) | (rlo >> d) | IntTy::<F>::from((rlo << (sbits - d)) != 0);
62+
rlo = (rhi << (sbits - d))
63+
| (rlo >> d)
64+
| IntTy::<F>::from((rlo << (sbits - d)) != zero);
6065
rhi = rhi >> d;
6166
} else {
6267
rlo = one;
@@ -69,7 +74,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
6974
if d == 0 {
7075
zlo = nz.m;
7176
} else if d < sbits {
72-
zlo = (nz.m >> d) | IntTy::<F>::from((nz.m << (sbits - d)) != 0);
77+
zlo = (nz.m >> d) | IntTy::<F>::from((nz.m << (sbits - d)) != zero);
7378
} else {
7479
zlo = one;
7580
}
@@ -88,25 +93,24 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
8893
let (res, borrow) = rlo.overflowing_sub(zlo);
8994
rlo = res;
9095
rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
91-
if (rhi >> (F::BITS - 1)) != 0 {
96+
if (rhi >> (F::BITS - 1)) != zero {
9297
rlo = rlo.signed().wrapping_neg().unsigned();
93-
rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != 0);
98+
rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
9499
neg = !neg;
95-
// sign = (sign == 0) as i32;
96100
}
97-
nonzero = (rhi != 0) as i32;
101+
nonzero = (rhi != zero) as i32;
98102
}
99103

100104
/* set rhi to top 63bit of the result (last bit is sticky) */
101105
if nonzero != 0 {
102106
e += sbits;
103107
d = rhi.leading_zeros() as i32 - 1;
104108
/* note: d > 0 */
105-
rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::<F>::from((rlo << d) != 0);
106-
} else if rlo != 0 {
109+
rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::<F>::from((rlo << d) != zero);
110+
} else if rlo != zero {
107111
d = rlo.leading_zeros() as i32 - 1;
108112
if d < 0 {
109-
rhi = (rlo >> 1) | (rlo & 1);
113+
rhi = (rlo >> 1) | (rlo & one);
110114
} else {
111115
rhi = rlo << d;
112116
}
@@ -117,17 +121,17 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
117121
e -= d;
118122

119123
/* convert to double */
120-
let mut i: i64 = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
124+
let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
121125
if neg {
122126
i = -i;
123127
}
124128

125-
let mut r: f64 = f64::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */
129+
let mut r: F = F::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */
126130

127131
if e < -(F::EXP_BIAS as i32 - 1) - (sbits - 2) {
128132
/* result is subnormal before rounding */
129133
if e == -(F::EXP_BIAS as i32 - 1) - (sbits - 1) {
130-
let mut c: f64 = magic;
134+
let mut c: F = magic;
131135
if neg {
132136
c = -c;
133137
}
@@ -139,13 +143,14 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
139143
}
140144
/* one bit is lost when scaled, add another top bit to
141145
* only round once at conversion if it is inexact */
142-
if (rhi << F::SIG_BITS) != 0 {
143-
i = ((rhi >> 1) | (rhi & 1) | (1 << 62)).signed();
146+
if (rhi << F::SIG_BITS) != zero {
147+
let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << 62);
148+
i = iu.signed();
144149
if neg {
145150
i = -i;
146151
}
147152
r = F::cast_from(i);
148-
r = 2.0 * r - c; /* remove top bit */
153+
r = F::cast_from(2i8) * r - c; /* remove top bit */
149154

150155
/* raise underflow portably, such that it
151156
* cannot be optimized away */
@@ -154,11 +159,12 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
154159
} else {
155160
/* only round once when scaled */
156161
d = 10;
157-
i = (((rhi >> d) | IntTy::<F>::from(rhi << (F::BITS as i32 - d) != 0)) << d).signed();
162+
i = (((rhi >> d) | IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero)) << d)
163+
.signed();
158164
if neg {
159165
i = -i;
160166
}
161-
r = f64::cast_from(i);
167+
r = F::cast_from(i);
162168
}
163169
}
164170

@@ -197,13 +203,13 @@ impl<F: Float> Norm<F> {
197203
}
198204

199205
// Need to figure out how to do this better.
200-
trait RaiseUnderflow {
206+
pub trait Helper {
201207
fn raise_underflow(self) -> Self;
202208
fn raise_underflow2(self) -> Self;
203209
fn scalbn(self, n: i32) -> Self;
204210
}
205211

206-
impl RaiseUnderflow for f64 {
212+
impl Helper for f64 {
207213
fn raise_underflow(self) -> Self {
208214
let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
209215
let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * self) as f32;

src/math/support/float_traits.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ pub trait Float:
2323
type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
2424

2525
/// A int of the same width as the float
26-
type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
26+
type SignedInt: Int
27+
+ MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
28+
+ ops::Neg<Output = Self::SignedInt>;
2729

2830
const ZERO: Self;
2931
const NEG_ZERO: Self;

src/math/support/int_traits.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ pub trait Int:
5252
+ ops::Sub<Output = Self>
5353
+ ops::Mul<Output = Self>
5454
+ ops::Div<Output = Self>
55+
+ ops::Shl<i32, Output = Self>
56+
+ ops::Shl<u32, Output = Self>
57+
+ ops::Shr<i32, Output = Self>
5558
+ ops::Shr<u32, Output = Self>
5659
+ ops::BitXor<Output = Self>
5760
+ ops::BitAnd<Output = Self>
@@ -93,6 +96,7 @@ pub trait Int:
9396
fn wrapping_shr(self, other: u32) -> Self;
9497
fn rotate_left(self, other: u32) -> Self;
9598
fn overflowing_add(self, other: Self) -> (Self, bool);
99+
fn overflowing_sub(self, other: Self) -> (Self, bool);
96100
fn leading_zeros(self) -> u32;
97101
fn ilog2(self) -> u32;
98102
}
@@ -151,6 +155,10 @@ macro_rules! int_impl_common {
151155
<Self>::overflowing_add(self, other)
152156
}
153157

158+
fn overflowing_sub(self, other: Self) -> (Self, bool) {
159+
<Self>::overflowing_sub(self, other)
160+
}
161+
154162
fn leading_zeros(self) -> u32 {
155163
<Self>::leading_zeros(self)
156164
}
@@ -437,6 +445,7 @@ cast_into!(i64);
437445
cast_into!(u128);
438446
cast_into!(i128);
439447

448+
cast_into_float!(i8);
440449
cast_into_float!(i16);
441450
cast_into_float!(i32);
442451
cast_into_float!(i64);

0 commit comments

Comments
 (0)