forked from elinorbgr/ai-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlt.py
More file actions
83 lines (70 loc) · 2.59 KB
/
lt.py
File metadata and controls
83 lines (70 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
from ctypes import byref, CDLL, c_char_p, c_wchar_p, c_void_p, POINTER
class FST(object):
def __init__(self, libpath, fst_a_path, fst_g_path):
self.__lib = CDLL(libpath)
self.__lib.init.argtypes = [POINTER(c_char_p), c_char_p, c_char_p]
self.__lib.init.restype = c_void_p
self.__lib.terminate.argtypes = [c_void_p]
self.__lib.terminate.restype = None
self.__lib.analyse.argtypes = [c_void_p, c_wchar_p]
self.__lib.analyse.restype = POINTER(c_wchar_p)
self.__lib.generate.argtypes = [c_void_p, c_wchar_p]
self.__lib.generate.restype = POINTER(c_wchar_p)
error = c_char_p()
self.__handle = self.__lib.init(byref(error), fst_a_path, fst_g_path)
if error.value != None:
self.__handle = 0
raise Exception(u"Initialisation of fst failed: " + unicode(error.value, "UTF-8"))
def __del__(self):
if (self.__handle != 0):
self.__handle = 0
class DummyLib:
def __getattr__(obj, name):
raise Exception("Attempt to use library after terminate() was called")
self.__lib = DummyLib()
def analyse(self, word):
ana_p = self.__lib.analyse(self.__handle, word)
if ana_p:
analysis = ana_p.contents.value
else:
analysis = ""
self.__lib.free_analyses(ana_p)
return analysis
def generate(self, word):
ana_p = self.__lib.generate(self.__handle, word)
if ana_p:
analysis = ana_p.contents.value
if analysis[0] == '@':
analysis = word
else:
analysis = ""
self.__lib.free_analyses(ana_p)
return analysis
class Analyser:
def __init__(self):
self.fst = FST(b"./libltpy.so", b"en.analyser.bin", b"en.generator.bin")
# Use:
# analyser.analyse("cats")
# -> ("cat", ["n", "pl"])
def analyse(self, word):
analysed = self.fst.analyse(word)
results = []
if len(analysed) == 0 or analysed[0] == '@':
return [(word, ["?"])]
for token in analysed.split("/"):
toks = [t.rstrip(">") for t in token.split("<")]
results.append((toks.pop(0), toks))
return results
# Use:
# analyser.generate("cat", ["n", "pl"])
# -> "cats"
def generate(self, base, tokens):
arg = base
for t in tokens:
arg += "<" + t + ">"
out = self.fst.generate(arg)
if "<" in out:
return base
else:
return out