wordvec-api/main.py at master · Christopher-06/wordvec-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import argparse
import base64

import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from typing import List
import numpy as np

from config import *
import helper

parser = argparse.ArgumentParser(description='Startup Arguments for word2vec-api')
parser.add_argument("-model_url", help="URL to download the word2vec model")
parser.add_argument("-model_filepath", help="Filepath to the word2vec model")
parser.add_argument("-gensim_model", help="Name of pretrained gensim model")
parser.add_argument("-fasttext_model", help="Lang of pretrained fasttext model")

app = FastAPI(
    title="Word2Vec API",
    description="Simple API Interface to get word-vectors by yours model!",
    version="0.2.6"
)

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.on_event("startup")
async def startup_event():
    args = parser.parse_args()

    if args.model_filepath:
        INFO["model"]["filename"] = args.model_filepath

    if args.model_url:
        await helper.download_word2vec_model(args.model_url)

    if args.gensim_model:
        await helper.download_pretrained_gensim_model(args.gensim_model)

    if args.fasttext_model:
        await helper.download_pretrained_fasttext_model(args.fasttext_model)


    # Load model (if not already happened)
    if OBJECTS["WORD2VEC_MODEL"] is None:
        await helper.load_word2vec_model()


@app.get("/")
async def root():
    return INFO

# ***   GET METHODS   ***
@app.get("/vector")
async def get_vector(w : str):
    '''Get one Vector of one Word'''
    if OBJECTS["WORD2VEC_MODEL"] is None:
        # Model is not in_memory
        return {"status" : "failed", "msg" : "No word2vec model is loaded"}

    try:
        vector = np.array(OBJECTS["WORD2VEC_MODEL"].get_vector(w), dtype=np.float64)
        return {
            "status" : "ok",
            "vector" : base64.b64encode(vector)
            }
    except:
        return {"status" : "failed", "msg" : f"Key {w} is not present"}

@app.get("/most-similar")
async def get_most_similar(w : str):
    '''Get similar words of the input w'''
    if OBJECTS["WORD2VEC_MODEL"] is None:
        # Model is not in_memory
        return {"status" : "failed", "msg" : "No word2vec model is loaded"}

    try:
        return {
            "status" : "ok",
            "similar" : OBJECTS["WORD2VEC_MODEL"].most_similar(w)
            }
    except:
        return {"status" : "failed", "msg" : f"Key {w} is not present"}

@app.get("/similarity")
async def get_similarity(w1 : str, w2 : str):
    '''Calculate how similar two individual words are'''
    if OBJECTS["WORD2VEC_MODEL"] is None:
        # Model is not in_memory
        return {"status" : "failed", "msg" : "No word2vec model is loaded"}

    try:
        similarity = OBJECTS["WORD2VEC_MODEL"].similarity(w1, w2)
        return {"status" : "ok", "similarity" : similarity.astype(float)}
    except:
        return {"status" : "failed", "msg" : f"Key {w1} or {w2} is not present"}


# ***   POST METHODS   ***
@app.post("/vector")
async def post_vector(words : List[str]):
    '''Get one Vector for many words'''
    if OBJECTS["WORD2VEC_MODEL"] is None:
        # Model is not in_memory
        return {"status" : "failed", "msg" : "No word2vec model is loaded"}

    vectors = {}
    for w in words:
        try:
            arr = np.array(OBJECTS["WORD2VEC_MODEL"].get_vector(w), dtype=np.float64)
            vectors[w] = base64.b64encode(arr)
        except:
            continue # Not Found

    return {"status" : "ok", "vectors" : vectors}

@app.post("/most-similar")
async def post_most_similar(words : List[str]):
    '''Get similar words of the input words'''
    if OBJECTS["WORD2VEC_MODEL"] is None:
        # Model is not in_memory
        return {"status" : "failed", "msg" : "No word2vec model is loaded"}

    similars = {}
    for w in words:
        try:
            similars[w] = OBJECTS["WORD2VEC_MODEL"].most_similar(w)
        except:
            continue # Not Found

    return {"status" : "ok", "similar" : similars}

@app.post("/similar-vector")
async def get_similar_vector(vs : List[str], n : int = 7):
    '''Find similar vectors like the given ones (Return them as words)'''
    similars = {}

    for index, v in enumerate(vs):
        # Parse vector, find similar and append words to lists
        d_bytes = base64.b64decode(v)
        vector = np.frombuffer(d_bytes, dtype=np.float64)
        similars[index] = OBJECTS["WORD2VEC_MODEL"].most_similar([vector], [], n)

    return {"status" : "ok", "similar" : similars}


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)