Skip to content

Commit 68e2e27

Browse files
committed
A bit more documentation for Data.Text.Internal.Encoding.Utf8
1 parent d81c75e commit 68e2e27

1 file changed

Lines changed: 31 additions & 2 deletions

File tree

  • src/Data/Text/Internal/Encoding

src/Data/Text/Internal/Encoding/Utf8.hs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,17 @@ between x y z = x >= y && x <= z
7575
-- | otherwise = 4
7676
-- Implementation suggested by Alex Mason.
7777

78-
-- | @since 2.0
78+
-- | Measure byte length of UTF-8 encoding for a given character.
79+
--
80+
-- @since 2.0
7981
utf8Length :: Char -> Int
8082
utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800#) +# geChar# c (chr# 0x10000#)))
8183
{-# INLINE utf8Length #-}
8284

83-
-- | @since 2.0
85+
-- | Measure byte length of UTF-8 encoding for characters,
86+
-- starting with a given byte.
87+
--
88+
-- @since 2.0
8489
utf8LengthByLeader :: Word8 -> Int
8590
utf8LengthByLeader w
8691
| w < 0x80 = 1
@@ -89,6 +94,10 @@ utf8LengthByLeader w
8994
| otherwise = 4
9095
{-# INLINE utf8LengthByLeader #-}
9196

97+
-- | Encode a character as UTF-8 bytes assuming that exactly 2 are needed.
98+
-- This precondition is not checked.
99+
--
100+
-- @since 1.1.0.0
92101
ord2 ::
93102
#if defined(ASSERTS)
94103
HasCallStack =>
@@ -105,6 +114,10 @@ ord2 c =
105114
x2 = intToWord8 $ (n .&. 0x3F) + 0x80
106115
{-# INLINE ord2 #-}
107116

117+
-- | Encode a character as UTF-8 bytes assuming that exactly 3 are needed.
118+
-- This precondition is not checked.
119+
--
120+
-- @since 1.1.0.0
108121
ord3 ::
109122
#if defined(ASSERTS)
110123
HasCallStack =>
@@ -122,6 +135,10 @@ ord3 c =
122135
x3 = intToWord8 $ (n .&. 0x3F) + 0x80
123136
{-# INLINE ord3 #-}
124137

138+
-- | Encode a character as UTF-8 bytes assuming that exactly 4 are needed.
139+
-- This precondition is not checked.
140+
--
141+
-- @since 1.1.0.0
125142
ord4 ::
126143
#if defined(ASSERTS)
127144
HasCallStack =>
@@ -140,6 +157,7 @@ ord4 c =
140157
x4 = intToWord8 $ (n .&. 0x3F) + 0x80
141158
{-# INLINE ord4 #-}
142159

160+
-- | @since 1.1.0.0
143161
chr2 :: Word8 -> Word8 -> Char
144162
chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
145163
where
@@ -149,6 +167,7 @@ chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
149167
!z2# = y2# -# 0x80#
150168
{-# INLINE chr2 #-}
151169

170+
-- | @since 1.1.0.0
152171
chr3 :: Word8 -> Word8 -> Word8 -> Char
153172
chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
154173
where
@@ -160,6 +179,7 @@ chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
160179
!z3# = y3# -# 0x80#
161180
{-# INLINE chr3 #-}
162181

182+
-- | @since 1.1.0.0
163183
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
164184
chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
165185
C# (chr# (z1# +# z2# +# z3# +# z4#))
@@ -174,14 +194,17 @@ chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
174194
!z4# = y4# -# 0x80#
175195
{-# INLINE chr4 #-}
176196

197+
-- | @since 1.1.0.0
177198
validate1 :: Word8 -> Bool
178199
validate1 x1 = x1 <= 0x7F
179200
{-# INLINE validate1 #-}
180201

202+
-- | @since 1.1.0.0
181203
validate2 :: Word8 -> Word8 -> Bool
182204
validate2 x1 x2 = between x1 0xC2 0xDF && between x2 0x80 0xBF
183205
{-# INLINE validate2 #-}
184206

207+
-- | @since 1.1.0.0
185208
validate3 :: Word8 -> Word8 -> Word8 -> Bool
186209
{-# INLINE validate3 #-}
187210
validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
@@ -199,6 +222,7 @@ validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
199222
between x2 0x80 0xBF &&
200223
between x3 0x80 0xBF
201224

225+
-- | @since 1.1.0.0
202226
validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
203227
{-# INLINE validate4 #-}
204228
validate4 x1 x2 x3 x4 = validate4_1 || validate4_2 || validate4_3
@@ -237,12 +261,15 @@ byteToClass n = ByteClass (W8# el#)
237261
table# :: Addr#
238262
table# = "\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\b\b\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\n\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\EOT\ETX\ETX\v\ACK\ACK\ACK\ENQ\b\b\b\b\b\b\b\b\b\b\b"#
239263

264+
-- | @since 2.0
240265
newtype DecoderState = DecoderState Word8
241266
deriving (Eq, Show)
242267

268+
-- | @since 2.0.2
243269
utf8AcceptState :: DecoderState
244270
utf8AcceptState = DecoderState 0
245271

272+
-- | @since 2.0.2
246273
utf8RejectState :: DecoderState
247274
utf8RejectState = DecoderState 12
248275

@@ -255,9 +282,11 @@ updateState (ByteClass c) (DecoderState s) = DecoderState (W8# el#)
255282
table# :: Addr#
256283
table# = "\NUL\f\CAN$<`T\f\f\f0H\f\f\f\f\f\f\f\f\f\f\f\f\f\NUL\f\f\f\f\f\NUL\f\NUL\f\f\f\CAN\f\f\f\f\f\CAN\f\CAN\f\f\f\f\f\f\f\f\f\CAN\f\f\f\f\f\CAN\f\f\f\f\f\f\f\CAN\f\f\f\f\f\f\f\f\f$\f$\f\f\f$\f\f\f\f\f$\f$\f\f\f$\f\f\f\f\f\f\f\f\f\f"#
257284

285+
-- | @since 2.0.2
258286
updateDecoderState :: Word8 -> DecoderState -> DecoderState
259287
updateDecoderState b s = updateState (byteToClass b) s
260288

289+
-- | @since 2.0
261290
newtype CodePoint = CodePoint Int
262291

263292
-- | @since 2.0

0 commit comments

Comments
 (0)