-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
72 lines (60 loc) · 1.84 KB
/
app.py
File metadata and controls
72 lines (60 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
## install requirements
## python -m pip install nltk==3.5 joblib==1.0.1 scikit-learn numpy fastapi uvicorn[standard]
## run with:
## uvicorn main:app --reload
## or on azure app service:
## gunicorn -w 2 -k uvicorn.workers.UvicornWorker main:app
import joblib
from asyncio import Lock
import numpy as np
from fastapi import FastAPI
import nltk
nltk_resources = [
'tokenizers/punkt',
'taggers/averaged_perceptron_tagger',
'corpora/wordnet'
]
for res in nltk_resources:
try:
nltk.data.find(res)
except LookupError:
nltk.download(res.split('/')[1])
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
wn.ensure_loaded()
app = FastAPI()
# load trained classifier
clf_path = 'models/classifier.joblib'
vec_path = 'models/TfidfVectorizer.joblib'
wnl = WordNetLemmatizer()
simplify = {'N':wn.NOUN, 'V':wn.VERB, 'J':wn.ADJ, 'R':wn.ADV}
lock = Lock()
async def lemmatize(wnl, text, tagfilter = list('NVJR')):
newtext = []
async with lock:
for token,tag in nltk.pos_tag(nltk.word_tokenize(text)):
if tag[0] in tagfilter:
try:
lemma = wnl.lemmatize(token, simplify[tag[0]])
newtext.append(lemma.lower())
except KeyError:
pass
return ' '.join(newtext)
@app.get("/predict/")
async def predict(query : str = ''):
model = joblib.load(clf_path)
vec = joblib.load(vec_path)
# vectorize the user's query and make a prediction
tags = list('NVJ')
text = await lemmatize(wnl, query, tags)
X = vec.transform(np.array([text]))
yp = model.predict_proba(X)[0]
output = {
cls:round(prob,3)
for cls,prob in sorted(
zip(model.classes_, yp),
key=lambda x: x[1],
reverse=True
)
}
return output