From b0e3dac9bef25424159df93a921533227c844849 Mon Sep 17 00:00:00 2001 From: avfirsov Date: Wed, 17 Jun 2026 23:15:58 +0300 Subject: [PATCH] =?UTF-8?q?fix(mcp):=20cap=20per=5Frepo=20in=20graph=5Fsta?= =?UTF-8?q?ts=20=E2=80=94=20monorepo=20context=20overflow=20made=20MCP=20u?= =?UTF-8?q?nusable?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gortex://stats resource (and graph_stats tool) inlined a full GraphStats for EVERY tracked repo when multi-repo. On a large monorepo gortex decomposes into hundreds of sub-repos, and the resource is advertised "read at session start to orient", so an agent reads it on connect — dumping a per-repo rollup for ~hundreds of repos into the context window and overflowing it before any user turn (even a "42" prompt died). Small repos were fine because IsMultiRepo()==false skips the block entirely. cappedRepoStats bounds per_repo to the top-N (25) repos by node count + a _truncated marker pointing at graph_stats repo= for the rest. Bounds both the resource and the tool. Co-Authored-By: Claude Opus 4.8 (1M context) (cherry picked from commit f276e74324cc8f903019381d067519dab248a4b8) --- internal/mcp/capstats_test.go | 51 +++++++++++++++++++++++++++++++++++ internal/mcp/tools_core.go | 50 +++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 internal/mcp/capstats_test.go diff --git a/internal/mcp/capstats_test.go b/internal/mcp/capstats_test.go new file mode 100644 index 00000000..33e7a5f9 --- /dev/null +++ b/internal/mcp/capstats_test.go @@ -0,0 +1,51 @@ +package mcp + +import ( + "fmt" + "testing" + + "github.com/zzet/gortex/internal/graph" +) + +func gsNodes(n int) graph.GraphStats { return graph.GraphStats{TotalNodes: n} } + +// TestCappedRepoStats_VerbatimWhenSmall: within the cap → every repo passes +// through and no _truncated marker is added. +func TestCappedRepoStats_VerbatimWhenSmall(t *testing.T) { + in := map[string]graph.GraphStats{"a": gsNodes(10), "b": gsNodes(20)} + out := cappedRepoStats(in, 25) + if len(out) != 2 { + t.Fatalf("want 2 verbatim entries, got %d", len(out)) + } + if _, truncated := out["_truncated"]; truncated { + t.Fatal("must not truncate within cap") + } +} + +// TestCappedRepoStats_TopNWhenLarge: above the cap (the monorepo case) → only +// the top-N repos by node count survive, plus a _truncated marker carrying the +// real counts. This is the bound that keeps gortex://stats from overflowing an +// agent's context on a many-repo monorepo. +func TestCappedRepoStats_TopNWhenLarge(t *testing.T) { + in := map[string]graph.GraphStats{} + for i := 0; i < 100; i++ { + in[fmt.Sprintf("repo%02d", i)] = gsNodes(i) // node counts 0..99 + } + out := cappedRepoStats(in, 25) + if len(out) != 26 { // 25 repos + 1 _truncated marker + t.Fatalf("want 25 top repos + _truncated = 26 keys, got %d", len(out)) + } + tr, ok := out["_truncated"].(map[string]any) + if !ok { + t.Fatal("missing _truncated marker") + } + if tr["total_repos"] != 100 || tr["shown"] != 25 { + t.Fatalf("bad truncation marker: %+v", tr) + } + if _, ok := out["repo99"]; !ok { + t.Error("top repo by nodes (repo99) must be retained") + } + if _, ok := out["repo00"]; ok { + t.Error("smallest repo (repo00) must be dropped") + } +} diff --git a/internal/mcp/tools_core.go b/internal/mcp/tools_core.go index 5625be76..516af84a 100644 --- a/internal/mcp/tools_core.go +++ b/internal/mcp/tools_core.go @@ -2401,7 +2401,14 @@ func (s *Server) buildGraphStatsPayload(ctx context.Context) map[string]any { result["edge_identity_revisions"] = s.readerFor(ctx).EdgeIdentityRevisions() if s.multiIndexer != nil && s.multiIndexer.IsMultiRepo() { - result["per_repo"] = s.readerFor(ctx).RepoStats() + // BUG_FIX_CONTEXT: an unbounded per-repo dump here made the MCP unusable on large + // monorepos. The gortex://stats resource is advertised "read at session start to + // orient", so an agent reads it on connect — and a full GraphStats for every one of + // the hundreds of tracked sub-repos a monorepo decomposes into overflowed the agent's + // context window before any user turn (small repos: IsMultiRepo()==false → no dump → + // fine). Cap to the top-N repos by node count + a truncation marker; per-repo detail + // for one repo stays available via graph_stats repo=. + result["per_repo"] = cappedRepoStats(s.readerFor(ctx).RepoStats(), graphStatsPerRepoCap) } result["token_savings"] = s.tokenStatsFor(ctx).snapshot() @@ -2440,6 +2447,47 @@ func (s *Server) buildGraphStatsPayload(ctx context.Context) map[string]any { return result } +// graphStatsPerRepoCap bounds how many per-repo GraphStats entries the +// gortex://stats resource / graph_stats tool inlines. On a large monorepo +// gortex tracks hundreds of sub-repos; dumping a full GraphStats per repo +// into a resource that is read "at session start" overflows an agent's +// context window — the bug that made the MCP unusable on big monorepos. +const graphStatsPerRepoCap = 25 + +// cappedRepoStats returns the per_repo rollup verbatim when the repo count is +// within the cap, otherwise the top-`limit` repos by TotalNodes plus a +// `_truncated` marker pointing at graph_stats repo= for the rest. +// Keeps the stats payload bounded regardless of how many repos are tracked. +func cappedRepoStats(stats map[string]graph.GraphStats, limit int) map[string]any { + out := make(map[string]any, len(stats)+1) + if len(stats) <= limit { + for k, v := range stats { + out[k] = v + } + return out + } + type kv struct { + name string + st graph.GraphStats + } + arr := make([]kv, 0, len(stats)) + for k, v := range stats { + arr = append(arr, kv{name: k, st: v}) + } + sort.Slice(arr, func(i, j int) bool { return arr[i].st.TotalNodes > arr[j].st.TotalNodes }) + for i := 0; i < limit; i++ { + out[arr[i].name] = arr[i].st + } + out["_truncated"] = map[string]any{ + "shown": limit, + "total_repos": len(stats), + "note": fmt.Sprintf("per_repo capped to the top %d of %d tracked repos by node count "+ + "(context-frugal on monorepos); call graph_stats with repo= for a specific repo.", + limit, len(stats)), + } + return out +} + // notificationsStatus reports each push-notification channel's live // subscriber count and last-published payload. nil when no broadcaster // is wired (single-shot CLI modes). Consumed by graph_stats /