Skip to content

Commit a69a0d2

Browse files
author
Milan K
authored
Merge pull request #8 from milankl/performance
Performance
2 parents aa8c95f + 2dc90a2 commit a69a0d2

File tree

3 files changed

+209
-14
lines changed

3 files changed

+209
-14
lines changed

src/Float8s.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ module Float8s
88
(+), (-), (*), (/), (\), (^),
99
sin,cos,tan,asin,acos,atan,sinh,cosh,tanh,asinh,acosh,
1010
atanh,exp,exp2,exp10,log,log2,log10,sqrt,log1p,
11-
atan,hypot
11+
atan,hypot,
12+
round
1213

1314
export Float8, Float8_4, NaN8, Inf8, NaN8_4, Inf8_4
1415

src/float8.jl

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -149,41 +149,66 @@ end
149149
const basetable8, shifttable8 = create_base_shifttable(Float8)
150150
const basetable8_4, shifttable8_4 = create_base_shifttable(Float8_4)
151151

152-
basetable(::Type{Float8},i::Int) = @inbounds basetable8[i]
153-
basetable(::Type{Float8_4},i::Int) = @inbounds basetable8_4[i]
152+
function Float8(val::Float32)
154153

155-
shifttable(::Type{Float8},i::Int) = @inbounds shifttable8[i]
156-
shifttable(::Type{Float8_4},i::Int) = @inbounds shifttable8_4[i]
154+
f = reinterpret(UInt32, val)
155+
156+
if isnan(val) #TODO retain the significant bits for NaN?
157+
return nan8(Float8)
158+
end
159+
160+
# exponent as Int64
161+
i = f >> n_significant_bits(Float32) + 1
162+
@inbounds sh = shifttable8[i]
163+
f &= significand_mask(Float32)
157164

158-
function (::Type{T})(val::Float32) where {T<:AbstractFloat8}
165+
# If `val` is subnormal, the tables are set up to force the
166+
# result to 0, so the significand has an implicit `1` in the
167+
# cases we care about.
168+
169+
f |= significand_mask(Float32) + 0x1
170+
@inbounds h = (basetable8[i] + (f >> sh) & significand_mask(Float8)) % UInt8
171+
172+
# rounding
173+
nextbit = (f >> (sh-1)) & 1
174+
if nextbit != 0 && (h & exponent_mask(Float8)) != exponent_mask(Float8)
175+
# Round halfway to even or check lower bits
176+
if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
177+
h += one(UInt8)
178+
end
179+
end
180+
return reinterpret(Float8, h)
181+
end
182+
183+
function Float8_4(val::Float32)
159184

160185
f = reinterpret(UInt32, val)
161186

162187
if isnan(val) #TODO retain the significant bits for NaN?
163-
return nan8(T)
188+
return nan8(Float8_4)
164189
end
165190

166191
# exponent as Int64
167192
i = f >> n_significant_bits(Float32) + 1
168-
sh = shifttable(T,i)
193+
@inbounds sh = shifttable8_4[i]
169194
f &= significand_mask(Float32)
170195

171196
# If `val` is subnormal, the tables are set up to force the
172197
# result to 0, so the significand has an implicit `1` in the
173198
# cases we care about.
174199

175200
f |= significand_mask(Float32) + 0x1
176-
h = (basetable(T,i) + (f >> sh) & significand_mask(T)) % UInt8
201+
@inbounds h = (basetable8_4[i] + (f >> sh) & significand_mask(Float8_4)) % UInt8
177202

178203
# rounding
179204
nextbit = (f >> (sh-1)) & 1
180-
if nextbit != 0 && (h & exponent_mask(T)) != exponent_mask(T)
205+
if nextbit != 0 && (h & exponent_mask(Float8_4)) != exponent_mask(Float8_4)
181206
# Round halfway to even or check lower bits
182207
if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
183208
h += one(UInt8)
184209
end
185210
end
186-
return reinterpret(T, h)
211+
return reinterpret(Float8_4, h)
187212
end
188213

189214
first_sig_bit_mask(::Type{Float8}) = 0x00000008
@@ -265,7 +290,7 @@ function ==(x::AbstractFloat8, y::AbstractFloat8)
265290
if iszero(x) && iszero(y) # For Float16: (ix|iy)&0x7fff == 0x0000
266291
return true
267292
end
268-
return x == y
293+
return reinterpret(UInt8,x) == reinterpret(UInt8,y)
269294
end
270295

271296
for op in (:<, :<=, :isless)
@@ -274,6 +299,7 @@ end
274299

275300
for op in (:+, :-, :*, :/, :\, :^)
276301
@eval ($op)(a::Float8, b::Float8) = Float8(($op)(Float32(a), Float32(b)))
302+
@eval ($op)(a::Float8_4, b::Float8_4) = Float8_4(($op)(Float32(a), Float32(b)))
277303
end
278304

279305
for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
@@ -290,3 +316,29 @@ for func in (:atan,:hypot)
290316
$func(a::Float8_4,b::Float8_4) = Float8_4($func(Float32(a),Float32(b)))
291317
end
292318
end
319+
320+
function show(io::IO,x::Float8)
321+
if isnan(x)
322+
print(io,"NaN8")
323+
elseif isinf(x)
324+
print(io,"Inf8")
325+
else
326+
io2 = IOBuffer()
327+
print(io2,Float32(x))
328+
f = String(take!(io2))
329+
print(io,"Float8("*f*")")
330+
end
331+
end
332+
333+
function show(io::IO,x::Float8_4)
334+
if isnan(x)
335+
print(io,"NaN8_4")
336+
elseif isinf(x)
337+
print(io,"Inf8_4")
338+
else
339+
io2 = IOBuffer()
340+
print(io2,Float32(x))
341+
f = String(take!(io2))
342+
print(io,"Float8_4("*f*")")
343+
end
344+
end

test/runtests.jl

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ using Test
33

44
@testset "Conversion Float8 <-> Float32" begin
55

6-
for i in 0x00:0xff
6+
@testset for i in 0x00:0xff
77
if ~isnan(Float8(i))
88
@test i == reinterpret(UInt8,Float8(Float32(Float8(i))))
99
end
@@ -12,9 +12,151 @@ end
1212

1313
@testset "Conversion Float8_4 <-> Float32" begin
1414

15-
for i in 0x00:0xff
15+
@testset for i in 0x00:0xff
1616
if ~isnan(Float8_4(i))
1717
@test i == reinterpret(UInt8,Float8_4(Float32(Float8_4(i))))
1818
end
1919
end
2020
end
21+
22+
@testset "Negation" begin
23+
24+
@testset for i in 0x00:0xff
25+
f8 = Float8(i)
26+
f8_4 = Float8_4(i)
27+
28+
if ~isnan(f8)
29+
@test f8 == -(-f8)
30+
end
31+
32+
if ~isnan(f8_4)
33+
@test f8_4 == -(-f8_4)
34+
end
35+
end
36+
end
37+
38+
@testset "Rounding" begin
39+
40+
@testset for i in 0x00:0xff
41+
f8 = Float8(i)
42+
f8_4 = Float8_4(i)
43+
44+
if ~isnan(f8)
45+
@test f8 >= floor(f8)
46+
@test f8 <= ceil(f8)
47+
end
48+
49+
if ~isnan(f8_4)
50+
@test f8_4 >= floor(f8_4)
51+
@test f8_4 <= ceil(f8_4)
52+
end
53+
end
54+
end
55+
56+
@testset "Triangle inequality Float8" begin
57+
58+
@testset for i in 0x00:0xff
59+
for j in 0x00:0xff
60+
61+
f1 = Float8(i)
62+
f2 = Float8(j)
63+
64+
if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
65+
@test abs(f1) + abs(f2) >= abs(f1+f2)
66+
@test abs(f1) - abs(f2) <= abs(f1-f2)
67+
@test abs(f1) * abs(f2) >= f1*f2
68+
end
69+
end
70+
end
71+
end
72+
73+
@testset "Triangle inequality Float8_4" begin
74+
75+
@testset for i in 0x00:0xff
76+
for j in 0x00:0xff
77+
78+
f1 = Float8_4(i)
79+
f2 = Float8_4(j)
80+
81+
if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
82+
@test abs(f1) + abs(f2) >= abs(f1+f2)
83+
@test abs(f1) - abs(f2) <= abs(f1-f2)
84+
@test abs(f1) * abs(f2) >= f1*f2
85+
end
86+
end
87+
end
88+
end
89+
90+
f = Float8(2.)
91+
g = Float8(1.)
92+
93+
@testset "Comparison Float8" begin
94+
@test f >= g
95+
@test f > g
96+
@test g < f
97+
@test g <= g
98+
@test all([g g] .< [f f])
99+
@test all([g g] .<= [f f])
100+
@test all([f f] .> [g g])
101+
@test all([f f] .>= [g g])
102+
@test isless(g, f)
103+
@test !isless(f, g)
104+
105+
@test Float8(2.5) == Float8(2.5)
106+
@test Float8(2.5) != Float8(2.6)
107+
end
108+
109+
f = Float8_4(2.)
110+
g = Float8_4(1.)
111+
112+
@testset "Comparison Float8_4" begin
113+
@test f >= g
114+
@test f > g
115+
@test g < f
116+
@test g <= g
117+
@test all([g g] .< [f f])
118+
@test all([g g] .<= [f f])
119+
@test all([f f] .> [g g])
120+
@test all([f f] .>= [g g])
121+
@test isless(g, f)
122+
@test !isless(f, g)
123+
124+
@test Float8_4(2.5) == Float8_4(2.5)
125+
@test Float8_4(2.5) != Float8_4(2.7)
126+
end
127+
128+
@testset "NaN8 and Inf8" begin
129+
@test isnan(NaN8)
130+
@test isnan(-NaN8)
131+
@test !isnan(Inf8)
132+
@test !isnan(-Inf8)
133+
@test !isnan(Float8(2.6))
134+
@test NaN8 != NaN8
135+
136+
@test isinf(Inf8)
137+
@test isinf(-Inf8)
138+
@test !isinf(NaN8)
139+
@test !isinf(-NaN8)
140+
@test !isinf(Float8(2.6))
141+
@test Inf8 == Inf8
142+
@test Inf8 != -Inf8
143+
@test -Inf8 < Inf8
144+
end
145+
146+
@testset "NaN8_4 and Inf8_4" begin
147+
@test isnan(NaN8_4)
148+
@test isnan(-NaN8_4)
149+
@test !isnan(Inf8_4)
150+
@test !isnan(-Inf8_4)
151+
@test !isnan(Float8(2.6))
152+
@test NaN8_4 != NaN8_4
153+
154+
@test isinf(Inf8_4)
155+
@test isinf(-Inf8_4)
156+
@test !isinf(NaN8_4)
157+
@test !isinf(-NaN8_4)
158+
@test !isinf(Float8(2.6))
159+
@test Inf8_4 == Inf8_4
160+
@test Inf8_4 != -Inf8_4
161+
@test -Inf8_4 < Inf8_4
162+
end

0 commit comments

Comments
 (0)