Skip to content

Commit 2984b75

Browse files
committed
Implement Float16 conversions using integer arithmetic.
1 parent 0025d97 commit 2984b75

File tree

2 files changed

+135
-83
lines changed

2 files changed

+135
-83
lines changed

base/runtime/runtime.jl

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ isapple() = (KERNEL === :Apple || KERNEL === :Darwin)
2929

3030
## Float16 intrinsics
3131

32+
# note that we can't actually use Float16 in these implementations, as LLVM will happily
33+
# lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2`
34+
# because it wants to store the `half` in a single-precision register. this causes recursion
35+
# when compiling these intrinsics. LLVM's compiler-rt similarly returns plain integers.
36+
3237
# Float32 -> Float16 algorithm from:
3338
# "Fast Half Float Conversion" by Jeroen van der Zijp
3439
# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
@@ -71,11 +76,10 @@ let _basetable = Vector{UInt16}(undef, 512),
7176
end
7277

7378
# truncation
74-
function truncsfhf2(val::Float32)
75-
f = reinterpret(UInt32, val)
76-
if isnan(val)
79+
function truncsfhf2(f::UInt32)
80+
if f&0x7fffffff > 0x7f800000 # isnan without reinterpreting as Float32
7781
t = 0x8000 (0x8000 & ((f >> 0x10) % UInt16))
78-
return reinterpret(Float16, t ((f >> 0xd) % UInt16))
82+
return t ((f >> 0xd) % UInt16)
7983
end
8084
i = ((f & ~Base.significand_mask(Float32)) >> Base.significand_bits(Float32)) + 1
8185
@inbounds sh = shifttable[i]
@@ -95,17 +99,18 @@ function truncsfhf2(val::Float32)
9599
h += UInt16(1)
96100
end
97101
end
98-
reinterpret(Float16, h)
102+
h
99103
end
100-
if !Sys.isapple()
101-
@ccallable Float16 __truncsfhf2(val::Float32) = truncsfhf2(val)
102-
@ccallable Float16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
103-
@ccallable Float16 __truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
104+
truncdfhf2(x::UInt64) = truncsfhf2(reinterpret(UInt32, Float32(reinterpret(Float64, x))))
105+
if !isapple()
106+
@ccallable UInt16 __truncsfhf2(val::UInt32) = truncsfhf2(val)
107+
@ccallable UInt16 __gnu_f2h_ieee(val::UInt32) = truncsfhf2(val)
108+
@ccallable UInt16 __truncdfhf2(val::UInt64) = truncdfhf2(val)
104109
end
105110

106111
# extension
107-
function extendhfsf2(val::Float16)
108-
local ival::UInt32 = reinterpret(UInt16, val)
112+
function extendhfsf2(val::UInt16)
113+
local ival::UInt32 = val
109114
local sign::UInt32 = (ival & 0x8000) >> 15
110115
local exp::UInt32 = (ival & 0x7c00) >> 10
111116
local sig::UInt32 = (ival & 0x3ff) >> 0
@@ -143,12 +148,13 @@ function extendhfsf2(val::Float16)
143148
sig = sig << (23 - 10)
144149
ret = sign | exp | sig
145150
end
146-
return reinterpret(Float32, ret)
151+
ret
147152
end
148-
if !Sys.isapple()
149-
@ccallable Float32 __extendhfsf2(val::Float16) = extendhfsf2(val)
150-
@ccallable Float32 __gnu_h2f_ieee(val::Float16) = extendhfsf2(val)
153+
extendhfdf2(x::UInt16) = reinterpret(UInt64, Float64(reinterpret(Float32, extendhfsf2(x))))
154+
if !isapple()
155+
@ccallable UInt32 __extendhfsf2(val::UInt16) = extendhfsf2(val)
156+
@ccallable UInt32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val)
151157
end
152-
@ccallable Float64 __extendhfdf2(x::Float16) = Float64(extendhfdf2(x))
158+
@ccallable UInt64 __extendhfdf2(val::UInt16) = extendhfdf2(val)
153159

154160
end

test/runtime.jl

Lines changed: 113 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3,98 +3,144 @@
33
using Base: Runtime
44

55
@testset "truncdfhf2" begin
6-
@test Runtime.__truncdfhf2(NaN) === NaN16
7-
@test Runtime.__truncdfhf2(Inf) === Inf16
8-
@test Runtime.__truncdfhf2(-Inf) === -Inf16
9-
@test Runtime.__truncdfhf2(0.0) === reinterpret(Float16, 0x0000)
10-
@test Runtime.__truncdfhf2(-0.0) === reinterpret(Float16, 0x8000)
11-
@test Runtime.__truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248)
12-
@test Runtime.__truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248)
13-
@test Runtime.__truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00)
14-
@test Runtime.__truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62)
15-
@test Runtime.__truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00)
16-
@test Runtime.__truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400)
6+
test_truncdfhf2(a, expected) =
7+
@test Runtime.truncdfhf2(reinterpret(UInt64, Float64(a))) === reinterpret(UInt16, expected)
8+
# NaN
9+
test_truncdfhf2(NaN, NaN16)
10+
# inf
11+
test_truncdfhf2(Inf, Inf16)
12+
test_truncdfhf2(-Inf, -Inf16)
13+
# zero
14+
test_truncdfhf2(0.0, 0x0000)
15+
test_truncdfhf2(-0.0, 0x8000)
16+
test_truncdfhf2(3.1415926535, 0x4248)
17+
test_truncdfhf2(-3.1415926535, 0xc248)
18+
test_truncdfhf2(0x1.987124876876324p+1000, 0x7c00)
19+
test_truncdfhf2(0x1.987124876876324p+12, 0x6e62)
20+
test_truncdfhf2(0x1.0p+0, 0x3c00)
21+
test_truncdfhf2(0x1.0p-14, 0x0400)
1722
# denormal
18-
@test Runtime.__truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010)
19-
@test Runtime.__truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001)
20-
@test Runtime.__truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001)
21-
@test Runtime.__truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001)
23+
test_truncdfhf2(0x1.0p-20, 0x0010)
24+
test_truncdfhf2(0x1.0p-24, 0x0001)
25+
test_truncdfhf2(-0x1.0p-24, 0x8001)
26+
test_truncdfhf2(0x1.5p-25, 0x0001)
2227
# and back to zero
23-
@test Runtime.__truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000)
24-
@test Runtime.__truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000)
28+
test_truncdfhf2(0x1.0p-25, 0x0000)
29+
test_truncdfhf2(-0x1.0p-25, 0x8000)
2530
# max (precise)
26-
@test Runtime.__truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff)
31+
test_truncdfhf2(65504.0, 0x7bff)
2732
# max (rounded)
28-
@test Runtime.__truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff)
33+
test_truncdfhf2(65519.0, 0x7bff)
2934
# max (to +inf)
30-
@test Runtime.__truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00)
31-
@test Runtime.__truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00)
32-
@test Runtime.__truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00)
35+
test_truncdfhf2(65520.0, 0x7c00)
36+
test_truncdfhf2(-65520.0, 0xfc00)
37+
test_truncdfhf2(65536.0, 0x7c00)
3338
end
3439

3540
@testset "truncsfhf2" begin
41+
test_truncsfhf2(a, expected) =
42+
@test Runtime.truncsfhf2(reinterpret(UInt32, Float32(a))) === reinterpret(UInt16, expected)
3643
# NaN
37-
@test Runtime.__truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00)
44+
test_truncsfhf2(NaN32, NaN16)
3845
# inf
39-
@test Runtime.__truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00)
40-
@test Runtime.__truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00)
46+
test_truncsfhf2(Inf32, Inf16)
47+
test_truncsfhf2(-Inf32, -Inf16)
4148
# zero
42-
@test Runtime.__truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000)
43-
@test Runtime.__truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000)
44-
@test Runtime.__truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248)
45-
@test Runtime.__truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248)
46-
@test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00)
47-
@test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62)
48-
@test Runtime.__truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00)
49-
@test Runtime.__truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400)
49+
test_truncsfhf2(0.0f0, 0x0000)
50+
test_truncsfhf2(-0.0f0, 0x8000)
51+
test_truncsfhf2(3.1415926535f0, 0x4248)
52+
test_truncsfhf2(-3.1415926535f0, 0xc248)
53+
test_truncsfhf2(0x1.987124876876324p+100, 0x7c00)
54+
test_truncsfhf2(0x1.987124876876324p+12, 0x6e62)
55+
test_truncsfhf2(0x1.0p+0, 0x3c00)
56+
test_truncsfhf2(0x1.0p-14, 0x0400)
5057
# denormal
51-
@test Runtime.__truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010)
52-
@test Runtime.__truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001)
53-
@test Runtime.__truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001)
54-
@test Runtime.__truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001)
58+
test_truncsfhf2(0x1.0p-20, 0x0010)
59+
test_truncsfhf2(0x1.0p-24, 0x0001)
60+
test_truncsfhf2(-0x1.0p-24, 0x8001)
61+
test_truncsfhf2(0x1.5p-25, 0x0001)
5562
# and back to zero
56-
@test Runtime.__truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000)
57-
@test Runtime.__truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000)
63+
test_truncsfhf2(0x1.0p-25, 0x0000)
64+
test_truncsfhf2(-0x1.0p-25, 0x8000)
5865
# max (precise)
59-
@test Runtime.__truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff)
66+
test_truncsfhf2(65504.0f0, 0x7bff)
6067
# max (rounded)
61-
@test Runtime.__truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff)
68+
test_truncsfhf2(65519.0f0, 0x7bff)
6269
# max (to +inf)
63-
@test Runtime.__truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00)
64-
@test Runtime.__truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00)
65-
@test Runtime.__truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00)
70+
test_truncsfhf2(65520.0f0, 0x7c00)
71+
test_truncsfhf2(65536.0f0, 0x7c00)
72+
test_truncsfhf2(-65520.0f0, 0xfc00)
6673
end
6774

6875
@testset "extendhfsf2" begin
69-
# These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0
70-
# the test are done with compareResultH (so with after casting to UInt16)
71-
# Tests that are marked broken fail as === Float32 comparisons.
76+
function test_extendhfsf2(a::UInt16, expected::Float32)
77+
b = Runtime.extendhfsf2(a)
78+
b16 = Float16(reinterpret(Float32, b))
79+
expected16 = Float16(expected)
80+
@test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
81+
end
82+
# NaN
83+
test_extendhfsf2(0x7e00, NaN32)
84+
# inf
85+
test_extendhfsf2(0x7c00, Inf32)
86+
test_extendhfsf2(0xfc00, -Inf32)
87+
# zero
88+
test_extendhfsf2(0x0000, 0.0f0)
89+
test_extendhfsf2(0x8000, -0.0f0)
90+
test_extendhfsf2(0x4248, Float32(π))
91+
test_extendhfsf2(0xc248, Float32(-π))
92+
test_extendhfsf2(0x7c00, Float32(0x1.987124876876324p+100))
93+
test_extendhfsf2(0x6e62, Float32(0x1.988p+12))
94+
test_extendhfsf2(0x3c00, Float32(0x1.0p+0))
95+
test_extendhfsf2(0x0400, Float32(0x1.0p-14))
96+
# denormal
97+
test_extendhfsf2(0x0010, Float32(0x1.0p-20))
98+
test_extendhfsf2(0x0001, Float32(0x1.0p-24))
99+
test_extendhfsf2(0x8001, Float32(-0x1.0p-24))
100+
test_extendhfsf2(0x0001, Float32(0x1.5p-25))
101+
# and back to zero
102+
test_extendhfsf2(0x0000, Float32(0x1.0p-25))
103+
test_extendhfsf2(0x8000, Float32(-0x1.0p-25))
104+
# max (precise)
105+
test_extendhfsf2(0x7bff, 65504.0f0)
106+
# max (rounded)
107+
test_extendhfsf2(0x7bff, 65504.0f0)
108+
109+
# BROKEN: once this works, remove the calls to Float32 in these tests
110+
@test Float16(Float32(0x1.0p-14)) == Float16(0x1.0p-14)
111+
end
72112

73-
##
113+
@testset "extendhfdf2" begin
114+
function test_extendhfdf2(a::UInt16, expected::Float64)
115+
b = Runtime.extendhfdf2(a)
116+
b16 = Float16(reinterpret(Float64, b))
117+
expected16 = Float16(Float32(expected)) # see BROKEN above
118+
@test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
119+
end
74120
# NaN
75-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32
121+
test_extendhfdf2(0x7e00, NaN64)
76122
# inf
77-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32
78-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32
123+
test_extendhfdf2(0x7c00, Inf64)
124+
test_extendhfdf2(0xfc00, -Inf64)
79125
# zero
80-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0
81-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0
82-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x4248)) Float32)
83-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0xc248)) Float32(-π)
84-
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100)
85-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12)
86-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0)
87-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14)
126+
test_extendhfdf2(0x0000, 0.0)
127+
test_extendhfdf2(0x8000, -0.0)
128+
test_extendhfdf2(0x4248, Float64(π))
129+
test_extendhfdf2(0xc248, Float64(-π))
130+
test_extendhfdf2(0x7c00, 0x1.987124876876324p+100)
131+
test_extendhfdf2(0x6e62, 0x1.988p+12)
132+
test_extendhfdf2(0x3c00, 0x1.0p+0)
133+
test_extendhfdf2(0x0400, 0x1.0p-14)
88134
# denormal
89-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20)
90-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24)
91-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24)
92-
@test_broken Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25)
135+
test_extendhfdf2(0x0010, 0x1.0p-20)
136+
test_extendhfdf2(0x0001, 0x1.0p-24)
137+
test_extendhfdf2(0x8001, -0x1.0p-24)
138+
test_extendhfdf2(0x0001, 0x1.5p-25)
93139
# and back to zero
94-
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25)
95-
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25)
140+
test_extendhfdf2(0x0000, 0x1.0p-25)
141+
test_extendhfdf2(0x8000, -0x1.0p-25)
96142
# max (precise)
97-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
143+
test_extendhfdf2(0x7bff, 65504.0)
98144
# max (rounded)
99-
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
145+
test_extendhfdf2(0x7bff, 65504.0)
100146
end

0 commit comments

Comments
 (0)