Skip to content

Commit ee5b271

Browse files
RedthCopilotPureWeen
authored
fix: skip provider sessions during SDK reconnect (#569)
## Summary - skip provider and other virtual sessions when recreating the shared Copilot SDK client after a JSON-RPC disconnect - avoid invalid `session.resume` calls against synthetic provider session IDs like `provider:papilot:leader` - add a regression guard covering the sibling reconnect path ## Why When the Copilot CLI transport dropped, PolyPilot tried to re-resume Papilot provider sessions using their persistence-only IDs. Those are not real CLI session IDs, so reconnect recovery could cascade into more errors and leave provider chats looking broken. ## Testing - /opt/homebrew/bin/dotnet test PolyPilot.Tests/PolyPilot.Tests.csproj --no-restore --filter "ConnectionRecoveryTests" --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Shane Neuville <shneuvil@microsoft.com>
1 parent 3523b45 commit ee5b271

2 files changed

Lines changed: 34 additions & 0 deletions

File tree

PolyPilot.Tests/ConnectionRecoveryTests.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,31 @@ public void SendPromptAsync_ReconnectPath_UsesRefreshedClientForCreateSession()
264264
"client.CreateSessionAsync (Session not found fallback) must be after client = _client refresh");
265265
}
266266

267+
[Fact]
268+
public void SendPromptAsync_SiblingReconnect_SkipsProviderAndVirtualSessions()
269+
{
270+
// STRUCTURAL REGRESSION GUARD: when the shared SDK client is recreated after a
271+
// JSON-RPC transport loss, the sibling re-resume loop must skip provider sessions.
272+
// Their synthetic SessionId values are for persistence only and are invalid for
273+
// session.resume, which otherwise causes Papilot sessions to break after reconnect.
274+
var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs"));
275+
276+
var loopIndex = source.IndexOf("foreach (var kvp in _sessions)", StringComparison.Ordinal);
277+
Assert.True(loopIndex > 0, "Could not find sibling reconnect loop");
278+
279+
var skipIndex = source.IndexOf("if (otherState.Session == null || IsProviderSession(kvp.Key))", loopIndex, StringComparison.Ordinal);
280+
Assert.True(skipIndex > loopIndex,
281+
"Sibling reconnect loop must skip provider/virtual sessions before attempting ResumeSessionAsync");
282+
283+
var forceCompleteIndex = source.IndexOf("ForceCompleteProcessingAsync(kvp.Key", loopIndex, StringComparison.Ordinal);
284+
Assert.True(forceCompleteIndex > skipIndex,
285+
"Provider-session skip must appear before ForceCompleteProcessingAsync in the sibling reconnect path");
286+
287+
var resumeIndex = source.IndexOf("ResumeSessionAsync(", loopIndex, StringComparison.Ordinal);
288+
Assert.True(resumeIndex > skipIndex,
289+
"Provider-session skip must appear before ResumeSessionAsync in the sibling reconnect path");
290+
}
291+
267292
// ===== Regression: Fresh session after "Session not found" must include MCP servers & skills =====
268293
// When the JSON-RPC connection is lost and the server-side session has expired,
269294
// SendPromptAsync falls back to creating a fresh session via CreateSessionAsync.

PolyPilot/Services/CopilotService.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3640,6 +3640,15 @@ public async Task<string> SendPromptAsync(string sessionName, string prompt, Lis
36403640
if (kvp.Key == sessionName) continue;
36413641
var otherState = kvp.Value;
36423642
if (string.IsNullOrEmpty(otherState.Info.SessionId)) continue;
3643+
// Provider/virtual sessions are not backed by the SDK client that
3644+
// was just recreated. Their SessionId values are persistence keys,
3645+
// not CLI-resumable session IDs, so trying to ResumeSessionAsync
3646+
// them produces invalid-session errors and can destabilize recovery.
3647+
if (otherState.Session == null || IsProviderSession(kvp.Key))
3648+
{
3649+
Debug($"[RECONNECT] Skipping non-SDK sibling '{kvp.Key}' during client recreation");
3650+
continue;
3651+
}
36433652
// INV-O14: IsProcessing siblings have dead event streams —
36443653
// their CopilotSession was tied to the old client which was
36453654
// just disposed. Force-abort so the orchestrator retries

0 commit comments

Comments
 (0)