-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathmy_mecab.py
More file actions
29 lines (23 loc) · 773 Bytes
/
my_mecab.py
File metadata and controls
29 lines (23 loc) · 773 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-
import MeCab
# path = '-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd'
path = ''
tagger = MeCab.Tagger(path)
def tokens(text, pos = ['名詞', '形容詞', '動詞']):
text = ''.join(text.split())
node = tagger.parseToNode(text)
word_list = []
while node:
# print(node.surface)
if node.surface != '':
elem = node.feature.split(',')
term = elem[6] if elem[6] != '*' else node.surface
# print(elem[0])
if len(pos) < 1 or elem[0] in pos:
# print(term)
word_list.append(term)
node = node.next
return word_list
if __name__ == '__main__':
out = tokens("今日の午後は八宝菜を食べました。")
print(out)