@@ -149,41 +149,66 @@ end
149149const basetable8, shifttable8 = create_base_shifttable (Float8)
150150const basetable8_4, shifttable8_4 = create_base_shifttable (Float8_4)
151151
152- basetable (:: Type{Float8} ,i:: Int ) = @inbounds basetable8[i]
153- basetable (:: Type{Float8_4} ,i:: Int ) = @inbounds basetable8_4[i]
152+ function Float8 (val:: Float32 )
154153
155- shifttable (:: Type{Float8} ,i:: Int ) = @inbounds shifttable8[i]
156- shifttable (:: Type{Float8_4} ,i:: Int ) = @inbounds shifttable8_4[i]
154+ f = reinterpret (UInt32, val)
155+
156+ if isnan (val) # TODO retain the significant bits for NaN?
157+ return nan8 (Float8)
158+ end
159+
160+ # exponent as Int64
161+ i = f >> n_significant_bits (Float32) + 1
162+ @inbounds sh = shifttable8[i]
163+ f &= significand_mask (Float32)
157164
158- function (:: Type{T} )(val:: Float32 ) where {T<: AbstractFloat8 }
165+ # If `val` is subnormal, the tables are set up to force the
166+ # result to 0, so the significand has an implicit `1` in the
167+ # cases we care about.
168+
169+ f |= significand_mask (Float32) + 0x1
170+ @inbounds h = (basetable8[i] + (f >> sh) & significand_mask (Float8)) % UInt8
171+
172+ # rounding
173+ nextbit = (f >> (sh- 1 )) & 1
174+ if nextbit != 0 && (h & exponent_mask (Float8)) != exponent_mask (Float8)
175+ # Round halfway to even or check lower bits
176+ if h& 1 == 1 || (f & ((1 << (sh- 1 ))- 1 )) != 0
177+ h += one (UInt8)
178+ end
179+ end
180+ return reinterpret (Float8, h)
181+ end
182+
183+ function Float8_4 (val:: Float32 )
159184
160185 f = reinterpret (UInt32, val)
161186
162187 if isnan (val) # TODO retain the significant bits for NaN?
163- return nan8 (T )
188+ return nan8 (Float8_4 )
164189 end
165190
166191 # exponent as Int64
167192 i = f >> n_significant_bits (Float32) + 1
168- sh = shifttable (T,i)
193+ @inbounds sh = shifttable8_4[i]
169194 f &= significand_mask (Float32)
170195
171196 # If `val` is subnormal, the tables are set up to force the
172197 # result to 0, so the significand has an implicit `1` in the
173198 # cases we care about.
174199
175200 f |= significand_mask (Float32) + 0x1
176- h = (basetable (T,i) + (f >> sh) & significand_mask (T )) % UInt8
201+ @inbounds h = (basetable8_4[i] + (f >> sh) & significand_mask (Float8_4 )) % UInt8
177202
178203 # rounding
179204 nextbit = (f >> (sh- 1 )) & 1
180- if nextbit != 0 && (h & exponent_mask (T )) != exponent_mask (T )
205+ if nextbit != 0 && (h & exponent_mask (Float8_4 )) != exponent_mask (Float8_4 )
181206 # Round halfway to even or check lower bits
182207 if h& 1 == 1 || (f & ((1 << (sh- 1 ))- 1 )) != 0
183208 h += one (UInt8)
184209 end
185210 end
186- return reinterpret (T , h)
211+ return reinterpret (Float8_4 , h)
187212end
188213
189214first_sig_bit_mask (:: Type{Float8} ) = 0x00000008
@@ -265,7 +290,7 @@ function ==(x::AbstractFloat8, y::AbstractFloat8)
265290 if iszero (x) && iszero (y) # For Float16: (ix|iy)&0x7fff == 0x0000
266291 return true
267292 end
268- return x == y
293+ return reinterpret (UInt8,x) == reinterpret (UInt8,y)
269294end
270295
271296for op in (:< , :<= , :isless )
274299
275300for op in (:+ , :- , :* , :/ , :\ , :^ )
276301 @eval ($ op)(a:: Float8 , b:: Float8 ) = Float8 (($ op)(Float32 (a), Float32 (b)))
302+ @eval ($ op)(a:: Float8_4 , b:: Float8_4 ) = Float8_4 (($ op)(Float32 (a), Float32 (b)))
277303end
278304
279305for func in (:sin ,:cos ,:tan ,:asin ,:acos ,:atan ,:sinh ,:cosh ,:tanh ,:asinh ,:acosh ,
@@ -290,3 +316,29 @@ for func in (:atan,:hypot)
290316 $ func (a:: Float8_4 ,b:: Float8_4 ) = Float8_4 ($ func (Float32 (a),Float32 (b)))
291317 end
292318end
319+
320+ function show (io:: IO ,x:: Float8 )
321+ if isnan (x)
322+ print (io," NaN8" )
323+ elseif isinf (x)
324+ print (io," Inf8" )
325+ else
326+ io2 = IOBuffer ()
327+ print (io2,Float32 (x))
328+ f = String (take! (io2))
329+ print (io," Float8(" * f* " )" )
330+ end
331+ end
332+
333+ function show (io:: IO ,x:: Float8_4 )
334+ if isnan (x)
335+ print (io," NaN8_4" )
336+ elseif isinf (x)
337+ print (io," Inf8_4" )
338+ else
339+ io2 = IOBuffer ()
340+ print (io2,Float32 (x))
341+ f = String (take! (io2))
342+ print (io," Float8_4(" * f* " )" )
343+ end
344+ end
0 commit comments