@@ -75,12 +75,17 @@ between x y z = x >= y && x <= z
7575-- | otherwise = 4
7676-- Implementation suggested by Alex Mason.
7777
78- -- | @since 2.0
78+ -- | Measure byte length of UTF-8 encoding for a given character.
79+ --
80+ -- @since 2.0
7981utf8Length :: Char -> Int
8082utf8Length (C # c) = I # ((1 # +# geChar# c (chr# 0x80 # )) +# (geChar# c (chr# 0x800 # ) +# geChar# c (chr# 0x10000 # )))
8183{-# INLINE utf8Length #-}
8284
83- -- | @since 2.0
85+ -- | Measure byte length of UTF-8 encoding for characters,
86+ -- starting with a given byte.
87+ --
88+ -- @since 2.0
8489utf8LengthByLeader :: Word8 -> Int
8590utf8LengthByLeader w
8691 | w < 0x80 = 1
@@ -89,6 +94,10 @@ utf8LengthByLeader w
8994 | otherwise = 4
9095{-# INLINE utf8LengthByLeader #-}
9196
97+ -- | Encode a character as UTF-8 bytes assuming that exactly 2 are needed.
98+ -- This precondition is not checked.
99+ --
100+ -- @since 1.1.0.0
92101ord2 ::
93102#if defined(ASSERTS)
94103 HasCallStack =>
@@ -105,6 +114,10 @@ ord2 c =
105114 x2 = intToWord8 $ (n .&. 0x3F ) + 0x80
106115{-# INLINE ord2 #-}
107116
117+ -- | Encode a character as UTF-8 bytes assuming that exactly 3 are needed.
118+ -- This precondition is not checked.
119+ --
120+ -- @since 1.1.0.0
108121ord3 ::
109122#if defined(ASSERTS)
110123 HasCallStack =>
@@ -122,6 +135,10 @@ ord3 c =
122135 x3 = intToWord8 $ (n .&. 0x3F ) + 0x80
123136{-# INLINE ord3 #-}
124137
138+ -- | Encode a character as UTF-8 bytes assuming that exactly 4 are needed.
139+ -- This precondition is not checked.
140+ --
141+ -- @since 1.1.0.0
125142ord4 ::
126143#if defined(ASSERTS)
127144 HasCallStack =>
@@ -140,6 +157,7 @@ ord4 c =
140157 x4 = intToWord8 $ (n .&. 0x3F ) + 0x80
141158{-# INLINE ord4 #-}
142159
160+ -- | @since 1.1.0.0
143161chr2 :: Word8 -> Word8 -> Char
144162chr2 (W8 # x1# ) (W8 # x2# ) = C # (chr# (z1# +# z2# ))
145163 where
@@ -149,6 +167,7 @@ chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
149167 ! z2# = y2# -# 0x80 #
150168{-# INLINE chr2 #-}
151169
170+ -- | @since 1.1.0.0
152171chr3 :: Word8 -> Word8 -> Word8 -> Char
153172chr3 (W8 # x1# ) (W8 # x2# ) (W8 # x3# ) = C # (chr# (z1# +# z2# +# z3# ))
154173 where
@@ -160,6 +179,7 @@ chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
160179 ! z3# = y3# -# 0x80 #
161180{-# INLINE chr3 #-}
162181
182+ -- | @since 1.1.0.0
163183chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
164184chr4 (W8 # x1# ) (W8 # x2# ) (W8 # x3# ) (W8 # x4# ) =
165185 C # (chr# (z1# +# z2# +# z3# +# z4# ))
@@ -174,14 +194,17 @@ chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
174194 ! z4# = y4# -# 0x80 #
175195{-# INLINE chr4 #-}
176196
197+ -- | @since 1.1.0.0
177198validate1 :: Word8 -> Bool
178199validate1 x1 = x1 <= 0x7F
179200{-# INLINE validate1 #-}
180201
202+ -- | @since 1.1.0.0
181203validate2 :: Word8 -> Word8 -> Bool
182204validate2 x1 x2 = between x1 0xC2 0xDF && between x2 0x80 0xBF
183205{-# INLINE validate2 #-}
184206
207+ -- | @since 1.1.0.0
185208validate3 :: Word8 -> Word8 -> Word8 -> Bool
186209{-# INLINE validate3 #-}
187210validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
@@ -199,6 +222,7 @@ validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
199222 between x2 0x80 0xBF &&
200223 between x3 0x80 0xBF
201224
225+ -- | @since 1.1.0.0
202226validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
203227{-# INLINE validate4 #-}
204228validate4 x1 x2 x3 x4 = validate4_1 || validate4_2 || validate4_3
@@ -237,12 +261,15 @@ byteToClass n = ByteClass (W8# el#)
237261 table# :: Addr #
238262 table# = " \NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\b\b\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\n\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\EOT\ETX\ETX\v\ACK\ACK\ACK\ENQ\b\b\b\b\b\b\b\b\b\b\b " #
239263
264+ -- | @since 2.0
240265newtype DecoderState = DecoderState Word8
241266 deriving (Eq , Show )
242267
268+ -- | @since 2.0.2
243269utf8AcceptState :: DecoderState
244270utf8AcceptState = DecoderState 0
245271
272+ -- | @since 2.0.2
246273utf8RejectState :: DecoderState
247274utf8RejectState = DecoderState 12
248275
@@ -255,9 +282,11 @@ updateState (ByteClass c) (DecoderState s) = DecoderState (W8# el#)
255282 table# :: Addr #
256283 table# = " \NUL\f\CAN $<`T\f\f\f 0H\f\f\f\f\f\f\f\f\f\f\f\f\f\NUL\f\f\f\f\f\NUL\f\NUL\f\f\f\CAN\f\f\f\f\f\CAN\f\CAN\f\f\f\f\f\f\f\f\f\CAN\f\f\f\f\f\CAN\f\f\f\f\f\f\f\CAN\f\f\f\f\f\f\f\f\f $\f $\f\f\f $\f\f\f\f\f $\f $\f\f\f $\f\f\f\f\f\f\f\f\f\f " #
257284
285+ -- | @since 2.0.2
258286updateDecoderState :: Word8 -> DecoderState -> DecoderState
259287updateDecoderState b s = updateState (byteToClass b) s
260288
289+ -- | @since 2.0
261290newtype CodePoint = CodePoint Int
262291
263292-- | @since 2.0
0 commit comments