From 76986d7b7331f34d35198068107b58e3dce4d18b Mon Sep 17 00:00:00 2001 From: Oliver Baer <75138893+mrwind-up-bird@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:18:01 +0100 Subject: [PATCH] fix(autofix): Unhandled encoding errors in text extraction --- app/services/extract.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/services/extract.py b/app/services/extract.py index c606d24..27a48dc 100644 --- a/app/services/extract.py +++ b/app/services/extract.py @@ -23,7 +23,12 @@ def extract_text(filename: str, content: bytes) -> str: ValueError: If the file extension is not supported. """ ext = Path(filename).suffix.lower() - + try: + return content.decode("utf-8") + except UnicodeDecodeError: + # Fallback to latin-1 which can decode any byte sequence + # then attempt to detect and convert to proper encoding + return content.decode("latin-1", errors="replace") if ext in {".txt", ".md", ".csv"}: return content.decode("utf-8")