-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathglove.py
More file actions
34 lines (27 loc) · 1.24 KB
/
glove.py
File metadata and controls
34 lines (27 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models.keyedvectors import KeyedVectors
# embeddings_dict = {}
class Glove:
# DO NOT MODIFY THIS SIGNATURE
# You can change the internal implementation as you see fit. The model
# parameter allows you to pass in a precomputed model that is already in
# memory for the searcher to use such as LSI, LDA, Word2vec models.
# MAKE SURE YOU DON'T LOAD A MODEL INTO MEMORY HERE AS THIS IS RUN AT QUERY TIME.
def __init__(self):
path_server = '../../../../glove.twitter.27B.25d.txt'
glove_input_file = 'glove.twitter.27B.25d.txt'
word2vec_output_file = 'glove.twitter.27B.25d.txt.word2vec'
if os.path.isfile('glove.twitter.27B.25d.txt.word2vec') is False:
glove2word2vec(path_server, word2vec_output_file)
#
self._model = KeyedVectors.load_word2vec_format('glove.twitter.27B.25d.txt.word2vec', binary=False)
def expand_query(self, term):
try:
words = self._model.most_similar(term)
list_word = []
for word in words:
list_word.append(word[0])
return list_word
except:
return []