-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest.py
More file actions
64 lines (52 loc) · 1.55 KB
/
test.py
File metadata and controls
64 lines (52 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# coding: utf-8
import os
import sys
import igo.tagger
if sys.version_info[0] < 3:
u = lambda s: s.decode('utf-8')
import codecs
sys.stdout = codecs.lookup('utf-8').streamwriter(sys.stdout)
else:
u = str
def pp(sf, ft, st):
sys.stdout.write(u("%s: %s at %d\n") % (sf, ft, st))
t = igo.tagger.Tagger()
#t = igo.tagger.Tagger('ipadic_gae', gae=True)
for m in t.parse(u('私の名前は中野です。')):
pp(m.surface, m.feature, m.start)
print('\n')
# t = igo.tagger.Tagger('ipadic')
for m in t.parse(u('こんにちは世界')):
pp(m.surface, m.feature, m.start)
print('\n')
# test if the dictionary exists
try:
os.symlink(os.path.join(os.getcwd(), 'ipadic'), 'igo/dic')
if os.path.exists('igo/dic'):
t = igo.tagger.Tagger()
for m in t.parse(u('こんにちは世界')):
pp(m.surface, m.feature, m.start)
print('\n')
os.remove('igo/dic')
except:
pass
# contains a surrogate pair char
for m in t.parse(u('おはようー😳こんにちはー美味しいご飯だよ')):
pp(m.surface, m.feature, m.start)
print('\n')
# only surrogate pair char
for m in t.parse(u('😳')):
pp(m.surface, m.feature, m.start)
print('\n')
# multiple surrogate pair chars
for m in t.parse(u('😳😳')):
pp(m.surface, m.feature, m.start)
print('\n')
# starts with a surrogate pair char
for m in t.parse(u('😳おはよう')):
pp(m.surface, m.feature, m.start)
print('\n')
# end with a surrogate pair char
for m in t.parse(u('おはよう😳')):
pp(m.surface, m.feature, m.start)
print('\n')