@@ -17,11 +17,15 @@ import
1717 cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
1818 setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
1919 isone, big, _string_n, decompose, minmax,
20- sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
20+ sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
21+ uinttype, exponent_max, exponent_min, ieee754_representation,
22+ RawBigIntRoundingIncrementHelper, truncated, RawBigInt
2123
2224
2325using . Base. Libc
24- import .. Rounding: rounding_raw, setrounding_raw
26+ import .. Rounding:
27+ rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
28+ tie_breaker_is_to_even, correct_rounding_requires_increment
2529
2630import .. GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
2731
@@ -89,6 +93,21 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
8993 end
9094end
9195
96+ rounds_to_nearest (m:: MPFRRoundingMode ) = m == MPFRRoundNearest
97+ function rounds_away_from_zero (m:: MPFRRoundingMode , sign_bit:: Bool )
98+ if m == MPFRRoundToZero
99+ false
100+ elseif m == MPFRRoundUp
101+ ! sign_bit
102+ elseif m == MPFRRoundDown
103+ sign_bit
104+ else
105+ # Assuming `m == MPFRRoundFromZero`
106+ true
107+ end
108+ end
109+ tie_breaker_is_to_even (:: MPFRRoundingMode ) = true
110+
92111const ROUNDING_MODE = Ref {MPFRRoundingMode} (MPFRRoundNearest)
93112const DEFAULT_PRECISION = Ref {Clong} (256 )
94113
@@ -130,6 +149,9 @@ mutable struct BigFloat <: AbstractFloat
130149 end
131150end
132151
152+ # The rounding mode here shouldn't matter.
153+ significand_limb_count (x:: BigFloat ) = div (sizeof (x. _d), sizeof (Limb), RoundToZero)
154+
133155rounding_raw (:: Type{BigFloat} ) = ROUNDING_MODE[]
134156setrounding_raw (:: Type{BigFloat} , r:: MPFRRoundingMode ) = ROUNDING_MODE[]= r
135157
@@ -380,35 +402,56 @@ function (::Type{T})(x::BigFloat) where T<:Integer
380402 trunc (T,x)
381403end
382404
383- # # BigFloat -> AbstractFloat
384- _cpynansgn (x:: AbstractFloat , y:: BigFloat ) = isnan (x) && signbit (x) != signbit (y) ? - x : x
385-
386- Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
387- _cpynansgn (ccall ((:mpfr_get_d ,libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
388- Float64 (x:: BigFloat , r:: RoundingMode ) = Float64 (x, convert (MPFRRoundingMode, r))
389-
390- Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
391- _cpynansgn (ccall ((:mpfr_get_flt ,libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
392- Float32 (x:: BigFloat , r:: RoundingMode ) = Float32 (x, convert (MPFRRoundingMode, r))
393-
394- function Float16 (x:: BigFloat ) :: Float16
395- res = Float32 (x)
396- resi = reinterpret (UInt32, res)
397- if (resi& 0x7fffffff ) < 0x38800000 # if Float16(res) is subnormal
398- # shift so that the mantissa lines up where it would for normal Float16
399- shift = 113 - ((resi & 0x7f800000 )>> 23 )
400- if shift< 23
401- resi |= 0x0080_0000 # set implicit bit
402- resi >>= shift
405+ function to_ieee754 (:: Type{T} , x:: BigFloat , rm) where {T<: AbstractFloat }
406+ sb = signbit (x)
407+ is_zero = iszero (x)
408+ is_inf = isinf (x)
409+ is_nan = isnan (x)
410+ is_regular = ! is_zero & ! is_inf & ! is_nan
411+ ieee_exp = Int (x. exp) - 1
412+ ieee_precision = precision (T)
413+ ieee_exp_max = exponent_max (T)
414+ ieee_exp_min = exponent_min (T)
415+ ieee_exp_min_subnormal = ieee_exp_min - ieee_precision + 1
416+ exp_diff = ieee_exp - ieee_exp_min
417+ is_normal = 0 ≤ exp_diff
418+
419+ # Note: sufficient but not necessary, depending on the rounding mode
420+ rounds_to_inf = is_inf | (is_regular & (ieee_exp_max < ieee_exp))
421+ rounds_to_zero = is_zero | (is_regular & (ieee_exp < ieee_exp_min_subnormal))
422+ rounds_to_regular = ! is_nan & ! rounds_to_inf & ! rounds_to_zero
423+
424+ U = uinttype (T)
425+
426+ ret_u = if rounds_to_regular
427+ v = RawBigInt (x. d, significand_limb_count (x))
428+ len = max (ieee_precision + min (exp_diff, 0 ), 0 ):: Int
429+ signif = truncated (U, v, len)
430+ c = 8 * sizeof (U) - len + 1
431+ is_normal && (signif = (signif << c) >> c) # implicit bit convention
432+ rh = RawBigIntRoundingIncrementHelper (v, len)
433+ incr = correct_rounding_requires_increment (rh, rm, sb)
434+ exp_field = max (exp_diff, 0 ) + is_normal
435+ ieee754_representation (T, sb, exp_field, signif) + incr
436+ else
437+ if rounds_to_zero
438+ ieee754_representation (T, sb, Val (:zero ))
439+ elseif rounds_to_inf
440+ ieee754_representation (T, sb, Val (:inf ))
441+ else
442+ ieee754_representation (T, sb, Val (:nan ))
403443 end
404- end
405- if (resi & 0x1fff == 0x1000 ) # if we are halfway between 2 Float16 values
406- # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
407- res = nextfloat (res, cmp (x, res))
408- end
409- return res
444+ end :: U
445+ reinterpret (T, ret_u)
410446end
411447
448+ Float16 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float16, x, r)
449+ Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float32, x, r)
450+ Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float64, x, r)
451+ Float16 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float16, x, r)
452+ Float32 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float32, x, r)
453+ Float64 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float64, x, r)
454+
412455promote_rule (:: Type{BigFloat} , :: Type{<:Real} ) = BigFloat
413456promote_rule (:: Type{BigInt} , :: Type{<:AbstractFloat} ) = BigFloat
414457promote_rule (:: Type{BigFloat} , :: Type{<:AbstractFloat} ) = BigFloat
0 commit comments