MinishLab · Pringled · Jun 18, 2026 · Jun 19, 2026
diff --git a/src/semble/cli.py b/src/semble/cli.py
@@ -115,7 +115,7 @@ def _load_index(path: str, content: list[ContentType]) -> SembleIndex:
 def _run_search(path: str, query: str, top_k: int, content: list[ContentType], max_snippet_lines: int | None) -> None:
     """Handle the `search` subcommand."""
     index = _load_index(path, content)
-    results = index.search(query, top_k=top_k)
+    results = index.search(query, top_k=top_k, max_snippet_lines=max_snippet_lines)
     out = format_results(query, results, max_snippet_lines) if results else {"error": "No results found."}
     print(json.dumps(out))
     _maybe_save_index(index, path)
@@ -130,7 +130,7 @@ def _run_find_related(
     if chunk is None:
         print(f"No chunk found at {file_path}:{line}.", file=sys.stderr)
         sys.exit(1)
-    results = index.find_related(chunk, top_k=top_k)
+    results = index.find_related(chunk, top_k=top_k, max_snippet_lines=max_snippet_lines)
     label = f"Chunks related to {file_path}:{line}"
     out = (
         format_results(label, results, max_snippet_lines)

diff --git a/src/semble/index/index.py b/src/semble/index/index.py
@@ -224,18 +224,21 @@ def from_git(
                 content=normalized,
             )
 
-    def find_related(self, source: Chunk | SearchResult, *, top_k: int = 5) -> list[SearchResult]:
+    def find_related(
+        self, source: Chunk | SearchResult, *, top_k: int = 5, max_snippet_lines: int | None = None
+    ) -> list[SearchResult]:
         """Return chunks semantically similar to the given chunk or search result.
 
         :param source: A SearchResult or Chunk to use as the seed.
         :param top_k: Number of similar chunks to return.
+        :param max_snippet_lines: Lines of content to count for savings stats. None = full chunk.
         :return: Ranked list of SearchResult objects, most similar first.
         """
         target = source.chunk if isinstance(source, SearchResult) else source
         selector = self._get_selector_vector(filter_languages=[target.language]) if target.language else None
         results = _search_semantic(target.content, self.model, self._semantic_index, self.chunks, top_k + 1, selector)
         results = [r for r in results if r.chunk != target][:top_k]
-        save_search_stats(results, CallType.FIND_RELATED, self._file_sizes)
+        save_search_stats(results, CallType.FIND_RELATED, self._file_sizes, max_snippet_lines)
         return results
 
     def _get_selector_vector(
@@ -258,6 +261,7 @@ def search(
         filter_languages: list[str] | None = None,
         filter_paths: list[str] | None = None,
         rerank: bool | None = None,
+        max_snippet_lines: int | None = None,
     ) -> list[SearchResult]:
         """Search the index and return the top-k most relevant chunks.
 
@@ -271,6 +275,7 @@ def search(
             chunks from these files are returned.
         :param rerank: Apply code-tuned reranking (file boost, identifier boost, path penalties).
             Defaults to True when ContentType.CODE was indexed.
+        :param max_snippet_lines: Lines of content to count for savings stats. None = full chunk.
         :return: Ranked list of SearchResult objects, best match first.
         """
         if not self.chunks or not query.strip():
@@ -290,7 +295,7 @@ def search(
             selector=selector,
             rerank=resolved_rerank,
         )
-        save_search_stats(results, CallType.SEARCH, self._file_sizes)
+        save_search_stats(results, CallType.SEARCH, self._file_sizes, max_snippet_lines)
         return results
 
     @classmethod

diff --git a/src/semble/mcp.py b/src/semble/mcp.py
@@ -78,6 +78,7 @@ async def search(
                     "If the snippet does not contain enough context to confirm you have the right location, "
                     "call again with max_snippet_lines=None."
                 ),
+                ge=0,
             ),
         ] = 10,
     ) -> str:
@@ -91,7 +92,7 @@ async def search(
             index = await _get_index(repo, default_source, cache)
         except ValueError as exc:
             return str(exc)
-        results = index.search(query, top_k=top_k)
+        results = index.search(query, top_k=top_k, max_snippet_lines=max_snippet_lines)
         if not results:
             return json.dumps({"error": "No results found."})
         return json.dumps(format_results(query, results, max_snippet_lines))
@@ -111,7 +112,8 @@ async def find_related(
                 description=(
                     "Lines of source per result. "
                     "Default 10 = signature + first body lines. 0 = location only. None = full chunk."
-                )
+                ),
+                ge=0,
             ),
         ] = 10,
     ) -> str:
@@ -131,7 +133,7 @@ async def find_related(
                 f"No chunk found at {file_path}:{line}. "
                 "Make sure the file is indexed and the line number is within a known chunk."
             )
-        results = index.find_related(chunk, top_k=top_k)
+        results = index.find_related(chunk, top_k=top_k, max_snippet_lines=max_snippet_lines)
         if not results:
             return json.dumps({"error": f"No related chunks found for {file_path}:{line}."})
         label = f"Chunks related to {file_path}:{line}"

diff --git a/src/semble/stats.py b/src/semble/stats.py
@@ -65,10 +65,18 @@ def save_search_stats(
     results: list[SearchResult],
     call_type: CallType,
     file_sizes: dict[str, int],
+    max_snippet_lines: int | None = None,
 ) -> None:
     """Save stats about a search or find_related call to the stats file."""
     try:
-        snippet_chars = sum(len(result.chunk.content) for result in results)
+        snippet_chars = sum(
+            len("\n".join(result.chunk.content.splitlines()[:max_snippet_lines]))
+            if max_snippet_lines and max_snippet_lines > 0
+            else 0
+            if max_snippet_lines == 0
+            else len(result.chunk.content)
+            for result in results
+        )
         file_chars = sum(
             file_sizes[path] for path in {result.chunk.file_path for result in results} if path in file_sizes
         )