-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_sim.py
More file actions
executable file
·51 lines (46 loc) · 1.69 KB
/
word_sim.py
File metadata and controls
executable file
·51 lines (46 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# coding: utf-8
import jp_wordnet as JPWN
"""
類似度計算モジュール
"""
class WordSim(JPWN.JapaneseWordNetCorpusReader):
def __init__(self):
JPWN.JapaneseWordNetCorpusReader.__init__(self)
self.cache = {} #計算を早くするために一度計算した結果を保存しておく
def similarity(self, a, b):
"類似度の計算"
if not isinstance(a, str):
a = str(a)
if not isinstance(b, str):
b = str(b)
# キャッシュに保存するために順番を統一
if a > b:
a, b = b, a
# キャッシュに結果がのこっていないか調べる
if (a, b) in self.cache:
return self.cache[(a, b)]
# 類似度の計算
jsyn_a = self.synset(a)
jsyn_b = self.synset(b)
if jsyn_a and jsyn_b:
sim = jsyn_a.path_similarity(jsyn_b)
else:
sim = None
self.cache[(a, b)] = sim # キャッシュに結果の保存
return sim
def printSimilarity(self, a, b):
"類似度の表示"
sim = self.similarity(a, b)
if sim != None:
print ("「"+a+"」と「"+b+"」の類似度:", sim)
else:
print ("「"+a+"」と「"+b+"」:辞書に無い単語を含みます")
if __name__ == "__main__":
wn = WordSim()
wn.printSimilarity(u"うどん", u"そば")
wn.printSimilarity(u"うどん", u"りんご")
wn.printSimilarity(u"うどん", u"くじら")
wn.printSimilarity(u"みかん", u"りんご")
wn.printSimilarity(u"メロン", u"りんご")
wn.printSimilarity(u"自動車", u"りんご")
wn.printSimilarity(u"ほあ", u"りんご")