diff --git a/ocr_service/api/process.py b/ocr_service/api/process.py index c0a7ba1..c2a4d6f 100644 --- a/ocr_service/api/process.py +++ b/ocr_service/api/process.py @@ -10,8 +10,8 @@ from fastapi.responses import ORJSONResponse, Response from starlette.datastructures import FormData -from ocr_service.settings import settings from ocr_service.processor.processor import Processor +from ocr_service.settings import settings from ocr_service.utils.utils import build_response, setup_logging process_api = APIRouter(prefix="/api") diff --git a/ocr_service/app/app.py b/ocr_service/app/app.py index e2b7c24..bf95cef 100644 --- a/ocr_service/app/app.py +++ b/ocr_service/app/app.py @@ -41,8 +41,7 @@ def start_office_server(port_num: str) -> dict[str, Any]: "--executable", settings.LIBRE_OFFICE_EXEC_PATH, "--port", port_num, "--uno-port", uno_port, - "--user-installation", user_installation, - # "--logfile", f"loffice_{port_num}.log" + "--user-installation", user_installation ], cwd=settings.TMP_FILE_DIR, close_fds=True, diff --git a/ocr_service/processor/converter.py b/ocr_service/processor/converter.py index 5d5d1cf..ac4df2f 100644 --- a/ocr_service/processor/converter.py +++ b/ocr_service/processor/converter.py @@ -16,8 +16,8 @@ from PIL import Image from striprtf.striprtf import rtf_to_text -from ocr_service.settings import settings from ocr_service.dto.process_context import ProcessContext +from ocr_service.settings import settings from ocr_service.utils.utils import INPUT_FILTERS, delete_tmp_files, terminate_hanging_process diff --git a/ocr_service/processor/processor.py b/ocr_service/processor/processor.py index 844ca12..ca9ecf2 100644 --- a/ocr_service/processor/processor.py +++ b/ocr_service/processor/processor.py @@ -5,10 +5,10 @@ import traceback from typing import Any -from ocr_service.settings import settings from ocr_service.dto.process_context import ProcessContext from ocr_service.processor.converter import DocumentConverter from ocr_service.processor.ocr_engine import OcrEngine +from ocr_service.settings import settings from ocr_service.utils.utils import detect_file_type, normalise_file_name_with_ext, setup_logging diff --git a/ocr_service/settings.py b/ocr_service/settings.py index 75b8abe..b20ad2e 100644 --- a/ocr_service/settings.py +++ b/ocr_service/settings.py @@ -30,8 +30,8 @@ class Settings(BaseSettings): OCR_WEB_SERVICE_THREADS: int = Field(1, ge=1) OCR_WEB_SERVICE_WORKERS: int = Field(1, ge=1) - OCR_SERVICE_CPU_THREADS: int | None = Field(None, ge=1) - OCR_SERVICE_CONVERTER_THREADS: int | None = Field(None, ge=1) + OCR_SERVICE_CPU_THREADS: int = Field(1, ge=1) + OCR_SERVICE_CONVERTER_THREADS: int = Field(1, ge=1) OCR_SERVICE_IMAGE_DPI: int = Field(200, gt=0) OCR_CONVERT_GRAYSCALE_IMAGES: bool = Field(True) @@ -41,8 +41,14 @@ class Settings(BaseSettings): LIBRE_OFFICE_NETWORK_INTERFACE: str = Field("localhost", min_length=1) LIBRE_OFFICE_PROCESSES_LISTENER_INTERVAL: int = Field(10, gt=0) - LIBRE_OFFICE_PYTHON_PATH: str | None = Field(None, min_length=1) - LIBRE_OFFICE_EXEC_PATH: str | None = Field(None, min_length=1) + LIBRE_OFFICE_PYTHON_PATH: str = Field( + "/Applications/LibreOffice.app/Contents/Resources/python", + min_length=1, + ) + LIBRE_OFFICE_EXEC_PATH: str = Field( + "/Applications/LibreOffice.app/Contents/MacOS/soffice", + min_length=1, + ) @field_validator("OCR_SERVICE_OPERATION_MODE", mode="before") @classmethod @@ -104,83 +110,83 @@ def model_post_init(self, __context: Any) -> None: if "LIBRE_OFFICE_EXEC_PATH" not in self.model_fields_set: self.LIBRE_OFFICE_EXEC_PATH = default_lo_exec - @computed_field + @computed_field # type: ignore[prop-decorator] @property def LOG_LEVEL(self) -> int: # 50 - CRITICAL, 40 - ERROR, 30 - WARNING, 20 - INFO, 10 - DEBUG, 0 - NOTSET return self.OCR_SERVICE_LOG_LEVEL - @computed_field + @computed_field # type: ignore[prop-decorator] @property def DEBUG_MODE(self) -> bool: return self.OCR_SERVICE_DEBUG_MODE - @computed_field + @computed_field # type: ignore[prop-decorator] @property def ROOT_DIR(self) -> str: return str(Path(__file__).resolve().parents[1]) - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TMP_FILE_DIR(self) -> str: return self.OCR_TMP_DIR or os.path.join(self.ROOT_DIR, "tmp") - @computed_field + @computed_field # type: ignore[prop-decorator] @property def WORKER_PORT_MAP_FILE_PATH(self) -> str: return os.path.join(self.TMP_FILE_DIR, "./worker_process_data.txt") - @computed_field + @computed_field # type: ignore[prop-decorator] @property def OPERATION_MODE(self) -> str: # possible vals : "OCR", "NO_OCR" return self.OCR_SERVICE_OPERATION_MODE - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TESSDATA_PREFIX(self) -> str: return self.OCR_TESSDATA_PREFIX - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TESSERACT_TIMEOUT(self) -> int: return self.OCR_SERVICE_TESSERACT_TIMEOUT - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TESSERACT_LANGUAGE(self) -> str: return self.OCR_SERVICE_TESSERACT_LANG - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TESSERACT_NICE(self) -> int: return self.OCR_SERVICE_TESSERACT_NICE - @computed_field + @computed_field # type: ignore[prop-decorator] @property def TESSERACT_CUSTOM_CONFIG_FLAGS(self) -> str: return self.OCR_SERVICE_TESSERACT_CUSTOM_CONFIG_FLAGS - @computed_field + @computed_field # type: ignore[prop-decorator] @property def CPU_THREADS(self) -> int: if self.OCR_SERVICE_CPU_THREADS is not None: return int(self.OCR_SERVICE_CPU_THREADS) return int(multiprocessing.cpu_count() / self.OCR_WEB_SERVICE_WORKERS) - @computed_field + @computed_field # type: ignore[prop-decorator] @property def CONVERTER_THREAD_NUM(self) -> int: if self.OCR_SERVICE_CONVERTER_THREADS is not None: return int(self.OCR_SERVICE_CONVERTER_THREADS) return int(multiprocessing.cpu_count() / self.OCR_WEB_SERVICE_WORKERS) - @computed_field + @computed_field # type: ignore[prop-decorator] @property def LIBRE_OFFICE_PROCESS_TIMEOUT(self) -> int: return self.OCR_SERVICE_LIBRE_OFFICE_PROCESS_TIMEOUT - @computed_field + @computed_field # type: ignore[prop-decorator] @property def LIBRE_OFFICE_PORT_CAP(self) -> int: port_cap = self.DEFAULT_LIBRE_OFFICE_SERVER_PORT + 1 @@ -190,7 +196,7 @@ def LIBRE_OFFICE_PORT_CAP(self) -> int: port_cap = self.DEFAULT_LIBRE_OFFICE_SERVER_PORT + self.OCR_WEB_SERVICE_WORKERS return port_cap - @computed_field + @computed_field # type: ignore[prop-decorator] @property def LIBRE_OFFICE_LISTENER_PORT_RANGE(self) -> range: if self.OCR_SERVICE_LIBRE_OFFICE_LISTENER_PORT_RANGE: @@ -198,4 +204,4 @@ def LIBRE_OFFICE_LISTENER_PORT_RANGE(self) -> range: return range(start, end) return range(self.DEFAULT_LIBRE_OFFICE_SERVER_PORT, self.LIBRE_OFFICE_PORT_CAP) -settings = Settings() +settings = Settings() # type: ignore[call-arg]