@@ -82,7 +82,12 @@ def dump(v): return orjson.dumps(v).decode("utf-8")
8282 def _split_plain (self , text : str ) -> List [str ]:
8383 """
8484 Split plain text by max length, preferring punctuation boundaries.
85- Avoid cutting inside escape sequences (e.g., don't cut after a lone backslash).
85+
86+ Args:
87+ text: Input text
88+
89+ Returns:
90+ List of text chunks
8691 """
8792 out : List [str ] = []
8893 all_punct = set (string .punctuation )
@@ -118,7 +123,12 @@ def _split_plain(self, text: str) -> List[str]:
118123 def _split_json_text (self , text : str ) -> List [str ]:
119124 """
120125 Split JSON-derived text while preserving top-level key-value integrity.
121- Falls back to plain splitting if no safe top-level boundary is found.
126+
127+ Args:
128+ text: JSON-derived string
129+
130+ Returns:
131+ List of text chunks
122132 """
123133 out : List [str ] = []
124134 cur = text
@@ -140,10 +150,14 @@ def _split_json_text(self, text: str) -> List[str]:
140150
141151 def _find_last_top_kv (self , text : str , max_len : int ) -> int | None :
142152 """
143- Find the rightmost position <= max_len where a top-level key-value pair ends .
153+ Find the split position of the last top-level key-value pair.
144154
145- A top-level KV ends at a comma when depth == 1 and outside any string.
146- Additionally, ensures the cut does not leave an unescaped backslash at end.
155+ Args:
156+ text: JSON substring (prefix)
157+
158+ Returns:
159+ Index after the last complete top-level KV pair,
160+ or None if no safe split point exists.
147161 """
148162 depth = 0
149163 in_str = False
0 commit comments