Merge pull request #8 from milankl/performance

Milan K · web-flow · commit a69a0d22cf4b · 2020-02-13T13:50:36.000Z
Performance
diff --git a/src/Float8s.jl b/src/Float8s.jl
@@ -8,7 +8,8 @@ module Float8s
                 (+), (-), (*), (/), (\), (^),
                 sin,cos,tan,asin,acos,atan,sinh,cosh,tanh,asinh,acosh,
                 atanh,exp,exp2,exp10,log,log2,log10,sqrt,log1p,
-                atan,hypot
+                atan,hypot,
+                round
 
     export Float8, Float8_4, NaN8, Inf8, NaN8_4, Inf8_4
 
diff --git a/src/float8.jl b/src/float8.jl
@@ -149,41 +149,66 @@ end
 const basetable8, shifttable8 = create_base_shifttable(Float8)
 const basetable8_4, shifttable8_4 = create_base_shifttable(Float8_4)
 
-basetable(::Type{Float8},i::Int) = @inbounds basetable8[i]
-basetable(::Type{Float8_4},i::Int) = @inbounds basetable8_4[i]
+function Float8(val::Float32)
 
-shifttable(::Type{Float8},i::Int) = @inbounds shifttable8[i]
-shifttable(::Type{Float8_4},i::Int) = @inbounds shifttable8_4[i]
+    f = reinterpret(UInt32, val)
+
+    if isnan(val)       #TODO retain the significant bits for NaN?
+        return nan8(Float8)
+    end
+
+    # exponent as Int64
+    i = f >> n_significant_bits(Float32) + 1
+    @inbounds sh = shifttable8[i]
+    f &= significand_mask(Float32)
 
-function (::Type{T})(val::Float32) where {T<:AbstractFloat8}
+    # If `val` is subnormal, the tables are set up to force the
+    # result to 0, so the significand has an implicit `1` in the
+    # cases we care about.
+
+    f |= significand_mask(Float32) + 0x1
+    @inbounds h = (basetable8[i] + (f >> sh) & significand_mask(Float8)) % UInt8
+
+    # rounding
+    nextbit = (f >> (sh-1)) & 1
+    if nextbit != 0 && (h & exponent_mask(Float8)) != exponent_mask(Float8)
+        # Round halfway to even or check lower bits
+        if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
+            h += one(UInt8)
+        end
+    end
+    return reinterpret(Float8, h)
+end
+
+function Float8_4(val::Float32)
 
     f = reinterpret(UInt32, val)
 
     if isnan(val)       #TODO retain the significant bits for NaN?
-        return nan8(T)
+        return nan8(Float8_4)
     end
 
     # exponent as Int64
     i = f >> n_significant_bits(Float32) + 1
-    sh = shifttable(T,i)
+    @inbounds sh = shifttable8_4[i]
     f &= significand_mask(Float32)
 
     # If `val` is subnormal, the tables are set up to force the
     # result to 0, so the significand has an implicit `1` in the
     # cases we care about.
 
     f |= significand_mask(Float32) + 0x1
-    h = (basetable(T,i) + (f >> sh) & significand_mask(T)) % UInt8
+    @inbounds h = (basetable8_4[i] + (f >> sh) & significand_mask(Float8_4)) % UInt8
 
     # rounding
     nextbit = (f >> (sh-1)) & 1
-    if nextbit != 0 && (h & exponent_mask(T)) != exponent_mask(T)
+    if nextbit != 0 && (h & exponent_mask(Float8_4)) != exponent_mask(Float8_4)
         # Round halfway to even or check lower bits
         if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
             h += one(UInt8)
         end
     end
-    return reinterpret(T, h)
+    return reinterpret(Float8_4, h)
 end
 
 first_sig_bit_mask(::Type{Float8}) = 0x00000008
@@ -265,7 +290,7 @@ function ==(x::AbstractFloat8, y::AbstractFloat8)
     if iszero(x) && iszero(y)   # For Float16: (ix|iy)&0x7fff == 0x0000
         return true
     end
-    return x == y
+    return reinterpret(UInt8,x) == reinterpret(UInt8,y)
 end
 
 for op in (:<, :<=, :isless)
@@ -274,6 +299,7 @@ end
 
 for op in (:+, :-, :*, :/, :\, :^)
     @eval ($op)(a::Float8, b::Float8) = Float8(($op)(Float32(a), Float32(b)))
+    @eval ($op)(a::Float8_4, b::Float8_4) = Float8_4(($op)(Float32(a), Float32(b)))
 end
 
 for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
@@ -290,3 +316,29 @@ for func in (:atan,:hypot)
         $func(a::Float8_4,b::Float8_4) = Float8_4($func(Float32(a),Float32(b)))
     end
 end
+
+function show(io::IO,x::Float8)
+    if isnan(x)
+        print(io,"NaN8")
+    elseif isinf(x)
+        print(io,"Inf8")
+    else
+        io2 = IOBuffer()
+        print(io2,Float32(x))
+        f = String(take!(io2))
+        print(io,"Float8("*f*")")
+    end
+end
+
+function show(io::IO,x::Float8_4)
+    if isnan(x)
+        print(io,"NaN8_4")
+    elseif isinf(x)
+        print(io,"Inf8_4")
+    else
+        io2 = IOBuffer()
+        print(io2,Float32(x))
+        f = String(take!(io2))
+        print(io,"Float8_4("*f*")")
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -3,7 +3,7 @@ using Test
 
 @testset "Conversion Float8 <-> Float32" begin
 
-    for i in 0x00:0xff
+    @testset for i in 0x00:0xff
         if ~isnan(Float8(i))
             @test i == reinterpret(UInt8,Float8(Float32(Float8(i))))
         end
@@ -12,9 +12,151 @@ end
 
 @testset "Conversion Float8_4 <-> Float32" begin
 
-    for i in 0x00:0xff
+    @testset for i in 0x00:0xff
         if ~isnan(Float8_4(i))
             @test i == reinterpret(UInt8,Float8_4(Float32(Float8_4(i))))
         end
     end
 end
+
+@testset "Negation" begin
+
+    @testset for i in 0x00:0xff
+        f8 = Float8(i)
+        f8_4 = Float8_4(i)
+
+        if ~isnan(f8)
+            @test f8 == -(-f8)
+        end
+
+        if ~isnan(f8_4)
+            @test f8_4 == -(-f8_4)
+        end
+    end
+end
+
+@testset "Rounding" begin
+
+    @testset for i in 0x00:0xff
+        f8 = Float8(i)
+        f8_4 = Float8_4(i)
+
+        if ~isnan(f8)
+            @test f8 >= floor(f8)
+            @test f8 <= ceil(f8)
+        end
+
+        if ~isnan(f8_4)
+            @test f8_4 >= floor(f8_4)
+            @test f8_4 <= ceil(f8_4)
+        end
+    end
+end
+
+@testset "Triangle inequality Float8" begin
+
+    @testset for i in 0x00:0xff
+        for j in 0x00:0xff
+
+            f1 = Float8(i)
+            f2 = Float8(j)
+
+            if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
+                @test abs(f1) + abs(f2) >= abs(f1+f2)
+                @test abs(f1) - abs(f2) <= abs(f1-f2)
+                @test abs(f1) * abs(f2) >= f1*f2
+            end
+        end
+    end
+end
+
+@testset "Triangle inequality Float8_4" begin
+
+    @testset for i in 0x00:0xff
+        for j in 0x00:0xff
+
+            f1 = Float8_4(i)
+            f2 = Float8_4(j)
+
+            if ~isnan(f1) && ~isnan(f2) && isfinite(f1) && isfinite(f2)
+                @test abs(f1) + abs(f2) >= abs(f1+f2)
+                @test abs(f1) - abs(f2) <= abs(f1-f2)
+                @test abs(f1) * abs(f2) >= f1*f2
+            end
+        end
+    end
+end
+
+f = Float8(2.)
+g = Float8(1.)
+
+@testset "Comparison Float8" begin
+    @test f >= g
+    @test f > g
+    @test g < f
+    @test g <= g
+    @test all([g g] .< [f f])
+    @test all([g g] .<= [f f])
+    @test all([f f] .> [g g])
+    @test all([f f] .>= [g g])
+    @test isless(g, f)
+    @test !isless(f, g)
+
+    @test Float8(2.5) == Float8(2.5)
+    @test Float8(2.5) != Float8(2.6)
+end
+
+f = Float8_4(2.)
+g = Float8_4(1.)
+
+@testset "Comparison Float8_4" begin
+    @test f >= g
+    @test f > g
+    @test g < f
+    @test g <= g
+    @test all([g g] .< [f f])
+    @test all([g g] .<= [f f])
+    @test all([f f] .> [g g])
+    @test all([f f] .>= [g g])
+    @test isless(g, f)
+    @test !isless(f, g)
+
+    @test Float8_4(2.5) == Float8_4(2.5)
+    @test Float8_4(2.5) != Float8_4(2.7)
+end
+
+@testset "NaN8 and Inf8" begin
+    @test isnan(NaN8)
+    @test isnan(-NaN8)
+    @test !isnan(Inf8)
+    @test !isnan(-Inf8)
+    @test !isnan(Float8(2.6))
+    @test NaN8 != NaN8
+
+    @test isinf(Inf8)
+    @test isinf(-Inf8)
+    @test !isinf(NaN8)
+    @test !isinf(-NaN8)
+    @test !isinf(Float8(2.6))
+    @test Inf8 == Inf8
+    @test Inf8 != -Inf8
+    @test -Inf8 < Inf8
+end
+
+@testset "NaN8_4 and Inf8_4" begin
+    @test isnan(NaN8_4)
+    @test isnan(-NaN8_4)
+    @test !isnan(Inf8_4)
+    @test !isnan(-Inf8_4)
+    @test !isnan(Float8(2.6))
+    @test NaN8_4 != NaN8_4
+
+    @test isinf(Inf8_4)
+    @test isinf(-Inf8_4)
+    @test !isinf(NaN8_4)
+    @test !isinf(-NaN8_4)
+    @test !isinf(Float8(2.6))
+    @test Inf8_4 == Inf8_4
+    @test Inf8_4 != -Inf8_4
+    @test -Inf8_4 < Inf8_4
+end