diff --git a/app/services/chunking.py b/app/services/chunking.py index bb3d84e..ccf3dea 100644 --- a/app/services/chunking.py +++ b/app/services/chunking.py @@ -5,6 +5,11 @@ import re import unicodedata from dataclasses import dataclass +# Default chunking configuration constants +DEFAULT_CHUNK_SIZE = 512 +DEFAULT_CHUNK_OVERLAP = 64 + + @dataclass @@ -42,8 +47,8 @@ def normalize_text(text: str) -> str: " ", # word boundaries "", # character-level fallback ] - - + chunk_size: int = DEFAULT_CHUNK_SIZE, + chunk_overlap: int = DEFAULT_CHUNK_OVERLAP, def chunk_text( text: str, chunk_size: int = 512,