diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 3c3bd338..25a6ee43 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -106,36 +106,62 @@ floatmax(::Type{T}) where {T <: FixedPoint} = typemax(T) """ - floattype(::Type{T}) + floattype(::Type{T})::Type{<:AbstractFloat} -Return the minimum float type that represents `T` without overflow to `Inf`. +Return a minimal type suitable for performing computations with instances of type `T` without integer overflow. -# Example +The fallback definition of `floattype(T)` applies only to `T<:AbstractFloat`. +However, it is permissible to extend `floattype` to return types that are not subtypes of +`AbstractFloat`; the key characteristic is that the return type should support computation without integer overflow. + +In general the returned type should have the minimum bitwidth needed to encode the full precision of the input type. +however, a priority should be placed on computational efficiency; consequently, types like `Float16` should be avoided +except in scenarios where they are guaranteed to have hardware support. + +# Examples A classic usage is to avoid overflow behavior by promoting `FixedPoint` to `AbstractFloat` -```julia +```jldoctest julia> x = N0f8(1.0) 1.0N0f8 julia> x + x # overflow 0.996N0f8 -julia> float_x = floattype(eltype(x))(x) -1.0f0 +julia> T = floattype(x) +Float32 -julia> float_x + float_x +julia> T(x) + T(x) 2.0f0 ``` + +The following represents a valid extension of `floattype` to non-AbstractFloats: + +```julia +julia> using FixedPointNumbers, ColorTypes + +julia> floattype(RGB{N0f8}) +RGB{Float32} +``` + +`RGB` itself is not a subtype of `AbstractFloat`, but unlike `RGB{N0f8}` operations with `RGB{Float32}` are not subject to integer overflow. """ -floattype(::Type{T}) where {T <: Real} = T # fallback +floattype(::Type{T}) where {T <: AbstractFloat} = T # fallback (we want a MethodError if no method producing AbstractFloat is defined) floattype(::Type{T}) where {T <: Union{ShortInts, Bool}} = Float32 floattype(::Type{T}) where {T <: Integer} = Float64 floattype(::Type{T}) where {T <: LongInts} = BigFloat +floattype(::Type{T}) where {I <: Integer, T <: Rational{I}} = typeof(zero(I)/oneunit(I)) +floattype(::Type{<:AbstractIrrational}) = Float64 floattype(::Type{X}) where {T <: ShortInts, X <: FixedPoint{T}} = Float32 floattype(::Type{X}) where {T <: Integer, X <: FixedPoint{T}} = Float64 floattype(::Type{X}) where {T <: LongInts, X <: FixedPoint{T}} = BigFloat +# Non-Real types +floattype(::Type{Complex{T}}) where T = Complex{floattype(T)} +floattype(::Type{Base.TwicePrecision{Float64}}) = Float64 # wider would be nice, but hardware support is paramount +floattype(::Type{Base.TwicePrecision{T}}) where T<:Union{Float16,Float32} = widen(T) + float(x::FixedPoint) = convert(floattype(x), x) function minmax(x::X, y::X) where {X <: FixedPoint} diff --git a/src/deprecations.jl b/src/deprecations.jl index b768d486..30d586a8 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -1 +1,9 @@ import Base.@deprecate_binding + +function floattype(::Type{T}) where {T <: Real} + Base.depwarn(""" + In a future release, the fallback definition of `floattype` will throw a MethodError if it cannot return a type `<:AbstractFloat`. + See the documentation on `floattype` for guidance on whether to define a custom `floattype(::Type{$T})` method. + """, :floattype) + return T +end diff --git a/test/traits.jl b/test/traits.jl index 874a47b4..9aee60b0 100644 --- a/test/traits.jl +++ b/test/traits.jl @@ -1,3 +1,7 @@ +using FixedPointNumbers, Test + +struct MyReal <: Real end + @testset "floattype" begin function _is_fixed_type(x::Symbol) try @@ -16,4 +20,13 @@ for T in exact_types @test typemax(T) <= maxintfloat(floattype(T)) end + @test floattype(Rational{Int}) === Float64 + @test floattype(Complex{Int16}) === Complex{Float32} + @test floattype(Complex{Float32}) === Complex{Float32} + @test floattype(Base.TwicePrecision{Float16}) === Float32 + @test floattype(Base.TwicePrecision{Float32}) === Float64 + @test floattype(Base.TwicePrecision{Float64}) === Float64 + @test floattype(typeof(π)) === Float64 + + @test_skip(@test_throws MethodError floattype(MyReal)) # TODO: eliminate `@test_skipped` when depwarn is eliminated. See #177. end