Skip to content

Commit 45515c1

Browse files
committed
chore: use modules
1 parent 7c99f20 commit 45515c1

File tree

11 files changed

+161
-147
lines changed

11 files changed

+161
-147
lines changed

UnicodeBasic.lean

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6-
import UnicodeBasic.Types
7-
import UnicodeBasic.TableLookup
6+
module
7+
8+
public import UnicodeBasic.TableLookup
9+
public import UnicodeBasic.Types
810

911
/-!
1012
# General API #
@@ -40,7 +42,7 @@ import UnicodeBasic.TableLookup
4042
non-titlecase letter or a separator.
4143
-/
4244

43-
namespace Unicode
45+
public section namespace Unicode
4446

4547
/-!
4648
## Name ##

UnicodeBasic/CharacterDatabase.lean

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ Copyright © 2023 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6+
module
7+
68
namespace Unicode
79

810
/-- Stream type for Unicode Character Database (UCD) files
@@ -12,24 +14,24 @@ namespace Unicode
1214
around field values are not significant. Line comments are prefixed with a
1315
number sign `#` (U+0023).
1416
-/
15-
structure UCDStream extends Substring.Raw where
17+
public structure UCDStream extends Substring.Raw where
1618
/-- `isUnihan` is true if the records are tab separated -/
1719
isUnihan := false
1820
deriving Inhabited
1921

2022
namespace UCDStream
2123

2224
/-- Make a `UCDStream` from a substring -/
23-
def ofSubstring (str : Substring.Raw) : UCDStream := { str with }
25+
public def ofSubstring (str : Substring.Raw) : UCDStream := { str with }
2426

2527
/-- Make a `UCDStream` from a string -/
26-
def ofString (str : String) : UCDStream where
28+
public def ofString (str : String) : UCDStream where
2729
str := str
2830
startPos := 0
2931
stopPos := str.rawEndPos
3032

3133
/-- Make a `UCDStream` from a file -/
32-
def ofFile (path : System.FilePath) : IO UCDStream :=
34+
public def ofFile (path : System.FilePath) : IO UCDStream :=
3335
ofString <$> IO.FS.readFile path
3436

3537
/-- Get the next line from the `UCDStream`
@@ -60,7 +62,3 @@ protected def next? (stream : UCDStream) : Option (Array Substring.Raw × UCDStr
6062

6163
instance : Std.Stream UCDStream (Array Substring.Raw) where
6264
next? := UCDStream.next?
63-
64-
end UCDStream
65-
66-
end Unicode

UnicodeBasic/Hangul.lean

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,34 @@ Copyright © 2024 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6+
module
7+
68
namespace Unicode.Hangul
79

8-
private def shortNameL : Array String :=
10+
def shortNameL : Array String :=
911
#["G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S",
1012
"SS", "", "J", "JJ", "C", "K", "T", "P", "H"]
1113

12-
private def shortNameV : Array String :=
14+
def shortNameV : Array String :=
1315
#["A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
1416
"WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
1517
"I"]
1618

17-
private def shortNameT : Array String :=
19+
def shortNameT : Array String :=
1820
#["", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG",
1921
"LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S",
2022
"SS", "NG", "J", "C", "K", "T", "P", "H"]
2123

22-
private abbrev sizeL := shortNameL.size -- 19
23-
private abbrev sizeV := shortNameV.size -- 21
24-
private abbrev sizeT := shortNameT.size -- 28
25-
private abbrev sizeLV := sizeL * sizeV -- 399
26-
private abbrev sizeVT := sizeV * sizeT -- 588
27-
private abbrev sizeLVT := sizeL * sizeV * sizeT -- 11172
24+
abbrev sizeL := shortNameL.size -- 19
25+
abbrev sizeV := shortNameV.size -- 21
26+
abbrev sizeT := shortNameT.size -- 28
27+
abbrev sizeLV := sizeL * sizeV -- 399
28+
abbrev sizeVT := sizeV * sizeT -- 588
29+
abbrev sizeLVT := sizeL * sizeV * sizeT -- 11172
2830

29-
private abbrev baseL : UInt32 := 0x1100
30-
private abbrev baseV : UInt32 := 0x1161
31-
private abbrev baseT : UInt32 := 0x11A7
31+
abbrev baseL : UInt32 := 0x1100
32+
abbrev baseV : UInt32 := 0x1161
33+
abbrev baseT : UInt32 := 0x11A7
3234

3335
/-- Number of Hangul syllables -/
3436
def Syllable.size := sizeLVT
@@ -112,5 +114,3 @@ def getSyllable! (code : UInt32) : Syllable :=
112114
match getSyllable? code with
113115
| some s => s
114116
| none => panic! "not a Hangul syllable"
115-
116-
end Unicode.Hangul

UnicodeBasic/TableLookup.lean

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,28 @@ Copyright © 2024-2025 François G. Dorais. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
-/
55

6-
import UnicodeBasic.CharacterDatabase
7-
import UnicodeBasic.Hangul
8-
import UnicodeBasic.Types
6+
module
7+
8+
import all UnicodeBasic.CharacterDatabase
9+
import all UnicodeBasic.Hangul
10+
import all UnicodeBasic.Types
11+
12+
public import UnicodeBasic.Types
913

1014
namespace Unicode
1115

1216
namespace CLib
1317

1418
@[extern "unicode_case_lookup"]
15-
protected opaque lookupCase (c : UInt32) : UInt64
19+
public protected opaque lookupCase (c : UInt32) : UInt64
1620

1721
protected abbrev oUpper : UInt64 := 0x100000000
1822
protected abbrev oLower : UInt64 := 0x200000000
1923
protected abbrev oAlpha : UInt64 := 0x400000000
2024
protected abbrev oMath : UInt64 := 0x800000000
2125

2226
@[extern "unicode_prop_lookup"]
23-
protected opaque lookupProp (c : UInt32) : UInt64
27+
public protected opaque lookupProp (c : UInt32) : UInt64
2428

2529
end CLib
2630

@@ -69,7 +73,7 @@ private def parsePropTable (s : String) : Thunk <| Array (UInt32 × UInt32) := I
6973
/-- Get bidirectional class using lookup table
7074
7175
Unicode property: `Bidi_Class` -/
72-
def lookupBidiClass (c : UInt32) : BidiClass :=
76+
public def lookupBidiClass (c : UInt32) : BidiClass :=
7377
let table := table.get
7478
if c < table[0]!.1 then .BN else
7579
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
@@ -82,7 +86,7 @@ where
8286
/-- Get canonical combining class using lookup table
8387
8488
Unicode property: `Canonical_Combining_Class` -/
85-
def lookupCanonicalCombiningClass (c : UInt32) : Nat :=
89+
public def lookupCanonicalCombiningClass (c : UInt32) : Nat :=
8690
let t := table.get
8791
if c < t[0]!.1 then 0 else
8892
match t[find c (fun i => t[i]!.1) 0 t.size.toUSize]! with
@@ -97,7 +101,7 @@ where
97101
Unicode properties:
98102
`Decomposition_Mapping`
99103
`Decomposition_Type=Canonical` -/
100-
def lookupCanonicalDecompositionMapping (c : UInt32) : List UInt32 :=
104+
public def lookupCanonicalDecompositionMapping (c : UInt32) : List UInt32 :=
101105
-- Hangul syllables
102106
if Hangul.Syllable.base ≤ c && c ≤ Hangul.Syllable.last then
103107
let s := Hangul.getSyllable! c
@@ -120,7 +124,7 @@ where
120124
`Simple_Lowercase_Mapping`
121125
`Simple_Uppercase_Mapping`
122126
`Simple_Titlecase_Mapping` -/
123-
def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 :=
127+
public def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 :=
124128
let v : UInt64 := CLib.lookupCase c
125129
if v == 0 then (c, c, c) else
126130
let cu : UInt32 := v.toUInt32 &&& 0x001FFFFF
@@ -133,7 +137,7 @@ def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 :=
133137
Unicode properties:
134138
`Decomposition_Mapping`
135139
`Decomposition_Type` -/
136-
def lookupDecompositionMapping? (c : UInt32) : Option DecompositionMapping :=
140+
public def lookupDecompositionMapping? (c : UInt32) : Option DecompositionMapping :=
137141
-- Hangul syllables
138142
if Hangul.Syllable.base ≤ c && c ≤ Hangul.Syllable.last then
139143
let s := Hangul.getSyllable! c
@@ -178,12 +182,12 @@ where
178182
179183
Unicode property: `General_Category` -/
180184
@[inline]
181-
def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32
185+
public def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32
182186

183187
/-- Get name of a code point using lookup table
184188
185189
Unicode property: `Name` -/
186-
def lookupName (c : UInt32) : String :=
190+
public def lookupName (c : UInt32) : String :=
187191
let table := table.get
188192
if c < table[0]!.1 then unreachable! else
189193
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
@@ -235,7 +239,7 @@ where
235239
Unicode properties:
236240
`Numeric_Type`
237241
`Numeric_Value` -/
238-
def lookupNumericValue (c : UInt32) : Option NumericType :=
242+
public def lookupNumericValue (c : UInt32) : Option NumericType :=
239243
let table := table.get
240244
if c < table[0]!.1 then none else
241245
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
@@ -292,15 +296,16 @@ def lookupOther (c : UInt32) : UInt32 :=
292296
293297
Unicode property: `Alphabetic` -/
294298
@[inline]
295-
def lookupAlphabetic (c : UInt32) : Bool :=
299+
public def lookupAlphabetic (c : UInt32) : Bool :=
296300
let m := CLib.oAlpha ||| (GC.L ||| GC.Nl).toUInt64
297301
CLib.lookupProp c &&& m != 0
298302

299303
/-- Check if code point is bidi mirrored using lookup table
300304
301305
Unicode property: `Bidi_Mirrored`
302306
-/
303-
def lookupBidiMirrored (c : UInt32) : Bool :=
307+
@[inline]
308+
public def lookupBidiMirrored (c : UInt32) : Bool :=
304309
let table := table.get
305310
if c < table[0]!.1 then false else
306311
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
@@ -313,15 +318,15 @@ where
313318
314319
Unicode property: `Cased` -/
315320
@[inline]
316-
def lookupCased (c : UInt32 ) : Bool :=
321+
public def lookupCased (c : UInt32 ) : Bool :=
317322
let m := CLib.oUpper ||| CLib.oLower ||| GC.LC.toUInt64
318323
CLib.lookupProp c &&& m != 0
319324

320325
/-- Check if code point is a lowercase letter using lookup table
321326
322327
Unicode property: `Lowercase` -/
323328
@[inline]
324-
def lookupLowercase (c : UInt32) : Bool :=
329+
public def lookupLowercase (c : UInt32) : Bool :=
325330
let m := CLib.oLower ||| GC.Ll.toUInt64
326331
CLib.lookupProp c &&& m != 0
327332

@@ -330,35 +335,33 @@ def lookupLowercase (c : UInt32) : Bool :=
330335
331336
Unicode property: `Math` -/
332337
@[inline]
333-
def lookupMath (c : UInt32) : Bool :=
338+
public def lookupMath (c : UInt32) : Bool :=
334339
let m := CLib.oMath ||| GC.Sm.toUInt64
335340
CLib.lookupProp c &&& m != 0
336341

337342
/-- Check if code point is a titlecase letter using lookup table
338343
339344
Unicode property: `Titlecase` -/
340345
@[inline]
341-
def lookupTitlecase (c : UInt32) : Bool :=
346+
public def lookupTitlecase (c : UInt32) : Bool :=
342347
lookupGC c == GC.Lt
343348

344349
/-- Check if code point is a uppercase letter using lookup table
345350
346351
Unicode property: `Uppercase` -/
347352
@[inline]
348-
def lookupUppercase (c : UInt32) : Bool :=
353+
public def lookupUppercase (c : UInt32) : Bool :=
349354
let m := CLib.oUpper ||| GC.Lu.toUInt64
350355
CLib.lookupProp c &&& m != 0
351356

352357
/-- Check if code point is a white space character using lookup table
353358
354359
Unicode property: `White_Space` -/
355-
def lookupWhiteSpace (c : UInt32) : Bool :=
360+
public def lookupWhiteSpace (c : UInt32) : Bool :=
356361
let table := table.get
357362
if c < table[0]!.1 then false else
358363
match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with
359364
| (_, v) => c ≤ v
360365
where
361366
str : String := include_str "../data/table/White_Space.txt"
362367
table : Thunk <| Array (UInt32 × UInt32) := parsePropTable str
363-
364-
end Unicode

0 commit comments

Comments
 (0)