-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
50 lines (40 loc) · 1.36 KB
/
main.py
File metadata and controls
50 lines (40 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from fastapi import FastAPI, Form, HTTPException, UploadFile, Query
from marshal import loads
from polars import read_csv
from PyPDF2 import PdfReader
from sklearn.feature_extraction.text import TfidfVectorizer
from types import FunctionType
df = read_csv("temp/repository_pnj_20212023clean.csv")
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["f"].to_list())
with open("temp/model.marshall", "rb") as file:
model = file.read()
app = FastAPI()
@app.get("/")
async def get_root():
return {"message": "Scientia-API by Kelompok 4 TI 6A"}
@app.get("/find_similar")
async def get_find_similar(
title: str = Query(...),
abstract: str = Query(...),
top_n: int = Query(...),
):
try:
return FunctionType(loads(model), globals())(
title,
abstract,
top_n,
).to_dicts()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/find_similar_pdf")
async def post_find_similar_pdf(file: UploadFile, top_n: int = Form(...)):
pages = PdfReader(file.file).pages
try:
return FunctionType(loads(model), globals())(
pages[0].extract_text(),
" ".join([page.extract_text() for page in pages[1:]]),
top_n,
).to_dicts()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))