Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
4 changes: 0 additions & 4 deletions .bentoignore

This file was deleted.

2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.github
.coveragerc
82 changes: 47 additions & 35 deletions app/api/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from openprompt.plms import get_model_class
from openprompt.prompts import ManualTemplate, ManualVerbalizer
from sentence_transformers import SentenceTransformer
from torch.types import Device

from app.config import (
CONTENT_LOCAL_MODEL_PATH,
Expand All @@ -19,63 +20,74 @@
)
from app.controller.content import ContentController
from app.controller.keyeword import KeywordController
from app.decorator import singleton
from app.utils.aws_s3 import AwsS3Downloader
from app.utils.utils import get_template_text

log = logging.getLogger("__main__")
s3 = AwsS3Downloader()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
keyword_model: Optional[SentenceTransformer] = None
content_model: Optional[PromptForClassification] = None
keyword_controller: Optional[KeywordController] = None
content_controller: Optional[ContentController] = None


def init_model() -> None:
global keyword_model, content_model

if os.path.exists(KEYWORD_LOCAL_MODEL_PATH):
keyword_model = torch.load(KEYWORD_LOCAL_MODEL_PATH)
else:
@singleton
class ApplicationContext:
_s3: AwsS3Downloader
_device: Device
_keyword_model: Optional[SentenceTransformer] = None
_content_model: Optional[PromptForClassification] = None
_keyword_controller: Optional[KeywordController] = None
_content_controller: Optional[ContentController] = None

def __init__(self):
self._s3 = AwsS3Downloader()
self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self._keyword_model = self._load_or_pull_keyword_model()
self._keyword_model.eval()
self._content_model = self._load_or_pull_content_model()
self._content_model.eval()
self._keyword_controller = KeywordController(self._keyword_model)
self._content_controller = ContentController(self._content_model)

def _load_or_pull_keyword_model(self):
if os.path.exists(KEYWORD_LOCAL_MODEL_PATH):
return torch.load(KEYWORD_LOCAL_MODEL_PATH)
keyword_model = SentenceTransformer(KEYWORD_MODEL_PATH)
torch.save(keyword_model, KEYWORD_LOCAL_MODEL_PATH)
keyword_model.eval()
return keyword_model

def _load_or_pull_content_model(self):
if os.path.exists(CONTENT_LOCAL_MODEL_PATH):
return torch.load(CONTENT_LOCAL_MODEL_PATH)
return self.pull_content_model_from_s3()

if os.path.exists(CONTENT_LOCAL_MODEL_PATH):
content_model = torch.load(CONTENT_LOCAL_MODEL_PATH)
else:
def pull_content_model_from_s3(self):
model_class = get_model_class(plm_type=CONTENT_MODEL_NAME)
plm = model_class.model.from_pretrained(CONTENT_MODEL_PATH)
tokenizer = model_class.tokenizer.from_pretrained(CONTENT_MODEL_PATH)
template_text = get_template_text()

template = ManualTemplate(tokenizer=tokenizer, text=template_text)
verbalizer = ManualVerbalizer(tokenizer=tokenizer, num_classes=2, label_words=[["yes"], ["no"]])

content_model = PromptForClassification(plm=plm, template=template, verbalizer=verbalizer)
model_path = self._s3.download(url=CONTENT_MODEL_S3_PATH, local_dir=".cache")
content_model.load_state_dict(torch.load(model_path, map_location=self._device))

model_path = s3.download(url=CONTENT_MODEL_S3_PATH, local_dir=".cache")
content_model.load_state_dict(torch.load(model_path, map_location=device))
torch.save(content_model, CONTENT_LOCAL_MODEL_PATH)
content_model.eval()

return content_model

def get_keyword_grading_model() -> SentenceTransformer:
return keyword_model
def get_keyword_model(self) -> SentenceTransformer:
return self._keyword_model

def get_content_model(self) -> PromptForClassification:
return self._content_model

def get_content_grading_model() -> PromptForClassification:
return content_model
def get_keyword_controller(self) -> KeywordController:
return self._keyword_controller

def get_content_controller(self) -> ContentController:
return self._content_controller

def get_keyword_controller(model: SentenceTransformer = Depends(get_keyword_grading_model)) -> KeywordController:
global keyword_controller
if keyword_controller is None:
keyword_controller = KeywordController(model)
return keyword_controller
def get_device(self) -> Device:
return self._device


def get_content_controller(model: PromptForClassification = Depends(get_content_grading_model)) -> ContentController:
global content_controller
if content_controller is None:
content_controller = ContentController(model)
return content_controller
def load_application_context(context: ApplicationContext = Depends()):
...
8 changes: 4 additions & 4 deletions app/api/v1/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi import APIRouter, Body, Depends

from app import schemas
from app.api.dependency import get_content_controller, get_keyword_controller
from app.api.dependency import ApplicationContext
from app.controller.content import ContentController
from app.controller.keyeword import KeywordController
from app.schemas import ContentGradingRequest, IntegratedGradingResponse, KeywordGradingRequest, KeywordGradingResponse
Expand All @@ -14,16 +14,16 @@
@router.post("/keyword_predict", status_code=200, response_model=KeywordGradingResponse)
async def keyword_predict(
keyword_grading_req: KeywordGradingRequest = Body(...),
keyword_controller: KeywordController = Depends(get_keyword_controller),
keyword_controller: KeywordController = Depends(ApplicationContext.get_keyword_controller),
) -> KeywordGradingResponse:
return await keyword_controller.grading(keyword_grading_req)


@router.post("/integrate_predict", status_code=200, response_model=IntegratedGradingResponse)
async def integrate_predict(
integrated_grading_req: schemas.IntegratedGradingRequest = Body(...),
keyword_controller: KeywordController = Depends(get_keyword_controller),
content_controller: ContentController = Depends(get_content_controller),
keyword_controller: KeywordController = Depends(ApplicationContext.get_keyword_controller),
content_controller: ContentController = Depends(ApplicationContext.get_content_controller),
) -> schemas.IntegratedGradingResponse:
keyword_predict_input = KeywordGradingRequest(
problem_id=integrated_grading_req.problem_id,
Expand Down
1 change: 0 additions & 1 deletion app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def get_secret():
KEYWORD_MODEL_PATH = "Huffon/sentence-klue-roberta-base"
KEYWORD_LOCAL_MODEL_PATH = os.path.join(root, f"app/static/{STAGE}_keyword_model")
CONTENT_LOCAL_MODEL_PATH = os.path.join(root, f"app/static/{STAGE}_content_model")
KEYWORD_MODEL_S3_PATH = os.getenv("KEYWORD_MODEL_S3_PATH")
CONTENT_MODEL_S3_PATH = os.getenv("CONTENT_MODEL_S3_PATH")
STOPWORD_FILE_PATH = os.path.join(root, "app/static/stopwords.txt")
OS = platform.system()
Expand Down
27 changes: 16 additions & 11 deletions app/controller/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from openprompt.plms import T5TokenizerWrapper

from app.controller.base import BaseController
from app.decorator import singleton
from app.schemas import ContentGradingRequest, ContentGradingResponse, ContentResponse

log = logging.getLogger("__main__")


@singleton
class ContentController(BaseController):
def __init__(self, model: PromptForClassification):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Expand All @@ -37,17 +39,7 @@ async def grading(self, input_data: ContentGradingRequest) -> ContentGradingResp
for content_standard in input_data.content_standards
]

data_loader = PromptDataLoader(
dataset=input_data_list,
template=self.template,
tokenizer=self.model.tokenizer,
tokenizer_wrapper_class=T5TokenizerWrapper,
max_seq_length=256,
decoder_max_length=3,
predict_eos_token=False,
truncate_method="head",
batch_size=len(input_data_list),
)
data_loader = await self.transform_loader(input_data_list)
correct_contents = []
with torch.no_grad():
for model_inputs in data_loader:
Expand All @@ -65,3 +57,16 @@ async def grading(self, input_data: ContentGradingRequest) -> ContentGradingResp
response_data = ContentGradingResponse(problem_id=input_data.problem_id, correct_contents=correct_contents)
log.info(pformat(response_data.__dict__))
return response_data

async def transform_loader(self, input_data_list):
return PromptDataLoader(
dataset=input_data_list,
template=self.template,
tokenizer=self.model.tokenizer,
tokenizer_wrapper_class=T5TokenizerWrapper,
max_seq_length=256,
decoder_max_length=3,
predict_eos_token=False,
truncate_method="head",
batch_size=len(input_data_list),
)
30 changes: 16 additions & 14 deletions app/controller/keyeword.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from typing import List, Optional, Tuple

import torch.cuda
from konlpy.tag import Mecab
from konlpy.tag import Kkma
from numpy.typing import NDArray
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

from app.config import MECAB_DIC_PATH, OS
from app.controller.base import BaseController
from app.decorator import singleton
from app.schemas import (
KeywordGradingRequest,
KeywordGradingResponse,
Expand All @@ -24,23 +24,21 @@
log = logging.getLogger("__main__")


@singleton
class KeywordController(BaseController):
def __init__(self, model: SentenceTransformer, problem_dict: Optional[dict] = None):
self.problem_dict = problem_dict if problem_dict else {}
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log.info(f"keyword predict model is running on {self.device}")
self.model = model.to(self.device)
self.tokenizer = Mecab(MECAB_DIC_PATH) if OS == "Windows" else Mecab()
self.tokenizer = Kkma()
self.stopwords = get_stopwords()
self.threshold = 0.7
self.word_concat_size = 2

def create_problem(self, input_data: KeywordGradingRequest) -> None:
log.info(f"problem id [{input_data.problem_id}] : create problem")
keyword_standards = []
for keyword_standard in input_data.keyword_standards:
for content in keyword_standard.content.split(", "):
keyword_standards.append(KeywordStandard(id=keyword_standard.id, content=content.strip()))
keyword_standards: List[KeywordStandard] = input_data.keyword_standards

self.problem_dict[input_data.problem_id] = Problem(
keyword_standards=keyword_standards,
Expand All @@ -49,14 +47,18 @@ def create_problem(self, input_data: KeywordGradingRequest) -> None:

def synchronize_keywords(self, input_data: KeywordGradingRequest) -> None:
problem_id = input_data.problem_id
if problem_id not in self.problem_dict: # 새로운 문제
if self._is_new_problem(problem_id): # 새로운 문제
self.create_problem(input_data)
else: # 기존에 존재하던 문제
pre_keyword_id_set = set(keyword.id for keyword in self.problem_dict[problem_id].keyword_standards)
new_keyword_id_set = set(keyword.id for keyword in input_data.keyword_standards)
if pre_keyword_id_set != new_keyword_id_set:
self.problem_dict.pop(problem_id)
self.create_problem(input_data)
return

pre_keyword_id_set = set(keyword.id for keyword in self.problem_dict[problem_id].keyword_standards)
new_keyword_id_set = set(keyword.id for keyword in input_data.keyword_standards)
if pre_keyword_id_set != new_keyword_id_set:
self.problem_dict.pop(problem_id)
self.create_problem(input_data)

def _is_new_problem(self, problem_id: int):
return problem_id not in self.problem_dict

def get_tokenized_answer(self, user_answer: str) -> List[str]:
regex_filter = r"[^\uAC00-\uD7A3a-zA-Z\s]"
Expand Down
9 changes: 9 additions & 0 deletions app/decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

def singleton(class_):
instances = {}
def get_instance(*args, **kwargs):
if class_ not in instances:
instances[class_] = class_(*args, **kwargs)
return instances[class_]

return get_instance
4 changes: 2 additions & 2 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from starlette.responses import JSONResponse

from app import config as settings
from app.api.dependency import init_model
from app.api.dependency import load_application_context
from app.api.v1.endpoint import router
from app.exceptions import APIException, APIExceptionErrorCodes, APIExceptionTypes

Expand All @@ -14,7 +14,7 @@

@app.on_event("startup")
async def startup_event():
init_model()
load_application_context()


@app.exception_handler(RequestValidationError)
Expand Down
27 changes: 13 additions & 14 deletions app/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
from openprompt import PromptForClassification
from sentence_transformers import SentenceTransformer

from app.api.dependency import (
get_content_controller,
get_content_grading_model,
get_keyword_controller,
get_keyword_grading_model,
)
from app.api.dependency import ApplicationContext
from app.controller.content import ContentController
from app.controller.keyeword import KeywordController
from app.schemas import ContentGradingRequest, KeywordGradingRequest
Expand All @@ -21,23 +16,27 @@ def user_answer_df(path: str = "app/static/changed_user_answer.csv") -> pd.DataF


@pytest.fixture(scope="session")
def keyword_model() -> SentenceTransformer:
return get_keyword_grading_model()
def context() -> ApplicationContext:
return ApplicationContext()

@pytest.fixture(scope="session")
def keyword_model(context: ApplicationContext) -> SentenceTransformer:
return context.get_keyword_model()


@pytest.fixture(scope="session")
def content_model() -> PromptForClassification:
return get_content_grading_model()
def content_model(context: ApplicationContext) -> PromptForClassification:
return context.get_content_model()


@pytest.fixture(scope="session")
def keyword_controller(keyword_model: SentenceTransformer) -> KeywordController:
return get_keyword_controller(keyword_model)
def keyword_controller(context: ApplicationContext) -> KeywordController:
return context.get_keyword_controller()


@pytest.fixture(scope="session")
def content_controller(content_model: PromptForClassification) -> ContentController:
return get_content_controller(content_model)
def content_controller(context: ApplicationContext) -> ContentController:
return context.get_content_controller()


@pytest.fixture(scope="session")
Expand Down
2 changes: 1 addition & 1 deletion dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.8
FROM theeluwin/ubuntu-konlpy:latest

RUN apt-get update

Expand Down
Binary file added script/.DS_Store
Binary file not shown.
Empty file modified script/mecab_install.sh
100644 → 100755
Empty file.
Empty file modified script/run.sh
100644 → 100755
Empty file.