-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathentity_extraction.py
More file actions
29 lines (25 loc) · 988 Bytes
/
entity_extraction.py
File metadata and controls
29 lines (25 loc) · 988 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import nltk
from flair.nn import Classifier
from flair.data import Sentence
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
class EntityExtractor:
def __init__(self):
self.flair_ner_model = Classifier.load('ner')
def extract_entities_flair(self, text):
sentence = Sentence(text)
self.flair_ner_model.predict(sentence)
return [(entity.text, entity.tag) for entity in sentence.get_spans('ner')]
def extract_entities_nltk(self, text):
tokens = nltk.word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)
tree = nltk.ne_chunk(pos_tags)
entities = []
for subtree in tree:
if isinstance(subtree, nltk.Tree):
entity_text = ' '.join([word for word, tag in subtree.leaves()])
entity_label = subtree.label()
entities.append((entity_text, entity_label))
return entities