-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsenryu_detector.rb
More file actions
160 lines (130 loc) · 3.49 KB
/
senryu_detector.rb
File metadata and controls
160 lines (130 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
require 'nameko'
class Array
def has?(item)
!count(item).zero?
end
end
class SenryuArray < Array
def text
text = []
each do |item|
text << item[:parsed].surface
end
text.join
end
def yomi
yomi = []
each do |item|
yomi << item[:yomi]
end
yomi.join
end
end
class SenryuDetector
attr_accessor :ignore_words, :delete_words, :permission_posids
def initialize
@delete_words, @ignore_words = read_file('exclude_word.tsv')
@permission_posids, * = read_file('permission.tsv')
@permission_posids.map!(&:to_i)
@mecab = Nameko::Mecab.new('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
end
def senryu?(text)
safe_text = delete_excludes(text)
senryu_elements = SenryuArray.new
pronunciations(safe_text).each do |parsed|
if parsed.feature[:pronunciation].nil?
senryu_elements = SenryuArray.new
next
end
senryu_elements << {
parsed: parsed,
yomi: ignore?(parsed.surface) ? '' : remove_not_pronucation(parsed.feature[:pronunciation])
}
senryu_elements.shift while senryu_elements.yomi.length > 18
if (ret_val = _senryu?(senryu_elements))
break unless correct_end?(ret_val)
return ret_val
end
if senryu_elements[1..-1].yomi.length == 17 && (ret_val = _senryu?(senryu_elements[1..-1]))
break unless correct_end?(ret_val)
return ret_val
end
end
return false
end
private
def _senryu?(elements)
return false unless elements.yomi.length == 17 || elements.yomi.length == 18
checking = :kami
checking_length = { kami: 5, naka: elements.yomi.length - 10, shimo: 5 }
pre_checking = { kami: :kami, naka: :kami, shimo: :naka }
result = Hash.new('')
yomi = ''
elements.each do |elm|
if special_ignore?(elm[:parsed].surface) && yomi.empty?
result[pre_checking[checking]] += elm[:parsed].surface
next
end
return false if yomi.empty? && !be_permission?(elm[:parsed].posid)
result[checking] += elm[:parsed].surface
yomi += elm[:yomi]
if (tmp = check_format(yomi, checking_length[checking], checking))
yomi, checking = tmp
else
return false
end
end
return result.values
end
def correct_end?(senryu)
senryu.all? { |e| e[-1] != "っ" }
end
def check_format(yomi, length, checking)
checking_ref = { kami: :naka, naka: :shimo, shimo: nil }
if yomi.length == length
return true if checking == :shimo
return ['', checking_ref[checking]]
elsif yomi.length > length
return false
end
return [yomi, checking]
end
def read_file(filename)
reading_line = []
File.open(filename) do |f|
f.each_line do |line|
reading_line << line.split[1..-1]
end
end
reading_line
end
def delete_excludes(text)
dump = text.dup
@delete_words.each do |excluded|
dump.delete!(excluded)
end
dump
end
def ignore?(word)
@ignore_words.has?(word)
end
# 終端となる記号は特別な動きをする
def special_ignore?(word)
special_ignore = [']', '」', '>','}']
special_ignore.has?(word)
end
def be_permission?(posid)
@permission_posids.has?(posid)
end
def pronunciations(text)
@mecab.parse(text)
end
def remove_not_pronucation(text)
text.gsub(/ャ|ュ|ョ|ァ|ィ|ゥ|ェ|ォ|、|/, '')
end
end
# Detector = SenryuDetector.new
# loop do
# text = gets.chomp
# p Detector.senryu?(text)
# end