From ad171b3f42e19a510bd9d24f1ef6234f5870bf31 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 03:26:48 -0700 Subject: [PATCH 01/11] Add workflow run visibility: live agent tree, script view, transcripts Surface Claude Agent SDK workflow runs (the Workflow orchestration tool) end to end in the desktop app: - Contracts: WorkflowProgressEntry schemas, run handles, workflow inspection RPCs, thread.task.stop command + task-stop-requested event - ClaudeAdapter: normalize the SDK's undocumented workflow_progress snapshot (size-capped, tolerant), forward workflow identity on task events, emit task.workflowMeta from Workflow tool results, stopTask - Ingestion: upsert workflow snapshots under a stable per-task activity id so projections and reconnect payloads stay one row per run - WorkflowInspectionService: path-validated, size-capped reads of the run's script, journal, and per-agent transcripts (realpath containment under ~/.claude/projects) - Web: WorkflowRunCard inline in the chat timeline, a workflow right- panel surface with Run/Script/Logs tabs, cursor-paged transcript polling, stop + resume affordances; remote runs link to their cloud session Co-Authored-By: Claude Fable 5 --- apps/mobile/src/lib/threadActivity.ts | 4 + .../Layers/CheckpointReactor.test.ts | 1 + .../Layers/ProviderCommandReactor.test.ts | 79 ++ .../Layers/ProviderCommandReactor.ts | 35 +- .../Layers/ProviderRuntimeIngestion.test.ts | 183 +++++ .../Layers/ProviderRuntimeIngestion.ts | 66 +- apps/server/src/orchestration/decider.ts | 22 + .../decider.workflowTaskStop.test.ts | 121 +++ .../src/provider/Layers/ClaudeAdapter.test.ts | 226 ++++++ .../src/provider/Layers/ClaudeAdapter.ts | 269 ++++++- .../src/provider/Layers/ProviderService.ts | 32 + .../Layers/ProviderSessionReaper.test.ts | 1 + .../src/provider/Services/ProviderAdapter.ts | 7 + .../src/provider/Services/ProviderService.ts | 6 + apps/server/src/server.test.ts | 2 + apps/server/src/server.ts | 3 +- .../WorkflowInspectionService.test.ts | 309 ++++++++ .../src/workflow/WorkflowInspectionService.ts | 309 ++++++++ apps/server/src/ws.ts | 19 + apps/web/src/components/ChatView.tsx | 56 +- apps/web/src/components/RightPanelTabs.tsx | 15 +- .../components/chat/MessagesTimeline.logic.ts | 28 + .../components/chat/MessagesTimeline.test.tsx | 2 + .../src/components/chat/MessagesTimeline.tsx | 30 + .../src/components/workflow/WorkflowPanel.tsx | 702 ++++++++++++++++++ .../components/workflow/WorkflowRunCard.tsx | 146 ++++ .../src/components/workflow/workflowUi.tsx | 181 +++++ apps/web/src/rightPanelStore.ts | 38 +- apps/web/src/session-logic.test.ts | 115 +++ apps/web/src/session-logic.ts | 37 +- apps/web/src/state/workflow.ts | 5 + apps/web/src/workflow-logic.test.ts | 317 ++++++++ apps/web/src/workflow-logic.ts | 466 ++++++++++++ packages/client-runtime/package.json | 4 + .../client-runtime/src/operations/commands.ts | 13 + .../src/state/threadCommands.ts | 9 + packages/client-runtime/src/state/workflow.ts | 34 + packages/contracts/src/index.ts | 1 + packages/contracts/src/orchestration.ts | 26 + packages/contracts/src/provider.ts | 6 + packages/contracts/src/providerRuntime.ts | 27 + packages/contracts/src/rpc.ts | 35 + packages/contracts/src/workflow.test.ts | 145 ++++ packages/contracts/src/workflow.ts | 161 ++++ 44 files changed, 4274 insertions(+), 19 deletions(-) create mode 100644 apps/server/src/orchestration/decider.workflowTaskStop.test.ts create mode 100644 apps/server/src/workflow/WorkflowInspectionService.test.ts create mode 100644 apps/server/src/workflow/WorkflowInspectionService.ts create mode 100644 apps/web/src/components/workflow/WorkflowPanel.tsx create mode 100644 apps/web/src/components/workflow/WorkflowRunCard.tsx create mode 100644 apps/web/src/components/workflow/workflowUi.tsx create mode 100644 apps/web/src/state/workflow.ts create mode 100644 apps/web/src/workflow-logic.test.ts create mode 100644 apps/web/src/workflow-logic.ts create mode 100644 packages/client-runtime/src/state/workflow.ts create mode 100644 packages/contracts/src/workflow.test.ts create mode 100644 packages/contracts/src/workflow.ts diff --git a/apps/mobile/src/lib/threadActivity.ts b/apps/mobile/src/lib/threadActivity.ts index 9f79a90550d..1c1582ef3ef 100644 --- a/apps/mobile/src/lib/threadActivity.ts +++ b/apps/mobile/src/lib/threadActivity.ts @@ -237,6 +237,10 @@ function deriveWorkLogEntries( for (const activity of ordered) { if (activity.kind === "tool.started") continue; if (activity.kind === "task.started") continue; + // Workflow snapshot/meta activities back the desktop workflow card; on + // mobile they would render as ever-mutating raw rows, so skip them. + if (activity.kind === "task.workflow-updated") continue; + if (activity.kind === "task.workflow-meta") continue; if (activity.kind === "context-window.updated") continue; if (activity.summary === "Checkpoint captured") continue; if (isPlanBoundaryToolActivity(activity)) continue; diff --git a/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts b/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts index 707c87c43c9..80c192b7ce5 100644 --- a/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts +++ b/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts @@ -106,6 +106,7 @@ function createProviderServiceHarness( startSession: () => unsupported(), sendTurn: () => unsupported(), interruptTurn: () => unsupported(), + stopTask: () => unsupported(), respondToRequest: () => unsupported(), respondToUserInput: () => unsupported(), stopSession: () => unsupported(), diff --git a/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts b/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts index ce464565dc5..2f3a0b6a5b5 100644 --- a/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts @@ -239,6 +239,7 @@ describe("ProviderCommandReactor", () => { } }), ); + const stopTask = vi.fn(() => Effect.void); const renameBranch = vi.fn((input: unknown) => Effect.succeed({ branch: @@ -301,6 +302,7 @@ describe("ProviderCommandReactor", () => { respondToRequest: respondToRequest as ProviderServiceShape["respondToRequest"], respondToUserInput: respondToUserInput as ProviderServiceShape["respondToUserInput"], stopSession: stopSession as ProviderServiceShape["stopSession"], + stopTask: stopTask as ProviderServiceShape["stopTask"], listSessions: () => Effect.succeed(runtimeSessions), getCapabilities: (_provider) => Effect.succeed({ @@ -417,6 +419,7 @@ describe("ProviderCommandReactor", () => { respondToRequest, respondToUserInput, stopSession, + stopTask, renameBranch, refreshStatus, generateBranchName, @@ -2094,4 +2097,80 @@ describe("ProviderCommandReactor", () => { expect(thread?.session?.providerInstanceId).toBe(ProviderInstanceId.make("codex_work")); expect(thread?.session?.activeTurnId).toBeNull(); }); + + effectIt.effect( + "reacts to thread.task.stop by stopping the background task on the active session", + () => + Effect.gen(function* () { + const harness = yield* Effect.promise(() => createHarness()); + const now = "2026-01-01T00:00:00.000Z"; + + yield* harness.engine.dispatch({ + type: "thread.session.set", + commandId: CommandId.make("cmd-session-set-for-task-stop"), + threadId: ThreadId.make("thread-1"), + session: { + threadId: ThreadId.make("thread-1"), + status: "running", + providerName: "claudeAgent", + runtimeMode: "approval-required", + activeTurnId: asTurnId("turn-1"), + lastError: null, + updatedAt: now, + }, + createdAt: now, + }); + + yield* harness.engine.dispatch({ + type: "thread.task.stop", + commandId: CommandId.make("cmd-task-stop"), + threadId: ThreadId.make("thread-1"), + taskId: "task-9", + createdAt: now, + }); + + yield* Effect.promise(() => waitFor(() => harness.stopTask.mock.calls.length === 1)); + expect(harness.stopTask.mock.calls[0]?.[0]).toEqual({ + threadId: "thread-1", + taskId: "task-9", + }); + }), + ); + + effectIt.effect("appends a task-stop failure activity when no active session is bound", () => + Effect.gen(function* () { + const harness = yield* Effect.promise(() => createHarness()); + const now = "2026-01-01T00:00:00.000Z"; + + yield* harness.engine.dispatch({ + type: "thread.task.stop", + commandId: CommandId.make("cmd-task-stop-no-session"), + threadId: ThreadId.make("thread-1"), + taskId: "task-9", + createdAt: now, + }); + + yield* Effect.promise(() => + waitFor(async () => { + const readModel = await harness.readModel(); + const thread = readModel.threads.find((entry) => entry.id === ThreadId.make("thread-1")); + return ( + thread?.activities.some((activity) => activity.kind === "provider.task.stop.failed") ?? + false + ); + }), + ); + + expect(harness.stopTask).not.toHaveBeenCalled(); + const readModel = yield* Effect.promise(() => harness.readModel()); + const thread = readModel.threads.find((entry) => entry.id === ThreadId.make("thread-1")); + expect( + thread?.activities.find((activity) => activity.kind === "provider.task.stop.failed"), + ).toMatchObject({ + payload: { + detail: expect.stringContaining("No active provider session"), + }, + }); + }), + ); }); diff --git a/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts b/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts index 9c7a7c94bb1..60df8c225c4 100644 --- a/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts +++ b/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts @@ -53,7 +53,8 @@ type ProviderIntentEvent = Extract< | "thread.turn-interrupt-requested" | "thread.approval-response-requested" | "thread.user-input-response-requested" - | "thread.session-stop-requested"; + | "thread.session-stop-requested" + | "thread.task-stop-requested"; } >; @@ -219,6 +220,7 @@ const make = Effect.gen(function* () { readonly kind: | "provider.turn.start.failed" | "provider.turn.interrupt.failed" + | "provider.task.stop.failed" | "provider.approval.respond.failed" | "provider.user-input.respond.failed" | "provider.session.stop.failed"; @@ -1002,6 +1004,31 @@ const make = Effect.gen(function* () { }); }); + const processTaskStopRequested = Effect.fn("processTaskStopRequested")(function* ( + event: Extract, + ) { + const thread = yield* resolveThread(event.payload.threadId); + if (!thread) { + return; + } + const hasSession = thread.session && thread.session.status !== "stopped"; + if (!hasSession) { + return yield* appendProviderFailureActivity({ + threadId: event.payload.threadId, + kind: "provider.task.stop.failed", + summary: "Background task stop failed", + detail: "No active provider session is bound to this thread.", + turnId: null, + createdAt: event.payload.createdAt, + }); + } + + yield* providerService.stopTask({ + threadId: event.payload.threadId, + taskId: event.payload.taskId, + }); + }); + const processDomainEvent = Effect.fn("processDomainEvent")(function* ( event: ProviderIntentEvent, ) { @@ -1042,6 +1069,9 @@ const make = Effect.gen(function* () { case "thread.session-stop-requested": yield* processSessionStopRequested(event); return; + case "thread.task-stop-requested": + yield* processTaskStopRequested(event); + return; } }); @@ -1068,7 +1098,8 @@ const make = Effect.gen(function* () { event.type === "thread.turn-interrupt-requested" || event.type === "thread.approval-response-requested" || event.type === "thread.user-input-response-requested" || - event.type === "thread.session-stop-requested" + event.type === "thread.session-stop-requested" || + event.type === "thread.task-stop-requested" ) { return yield* worker.enqueue(event); } diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index 001ba388949..7130b396bb5 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -101,6 +101,7 @@ function createProviderServiceHarness() { startSession: () => unsupported(), sendTurn: () => unsupported(), interruptTurn: () => unsupported(), + stopTask: () => unsupported(), respondToRequest: () => unsupported(), respondToUserInput: () => unsupported(), stopSession: () => unsupported(), @@ -3060,4 +3061,186 @@ describe("ProviderRuntimeIngestion", () => { expect(thread.session?.status).toBe("error"); expect(thread.session?.lastError).toBe("runtime still processed"); }); + + it("projects a plain task.progress (no workflowProgress) into a single activity", async () => { + const harness = await createHarness(); + const now = "2026-01-01T00:00:00.000Z"; + + harness.emit({ + type: "task.progress", + eventId: asEventId("evt-plain-progress"), + provider: ProviderDriverKind.make("claudeAgent"), + createdAt: now, + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-plain-progress"), + payload: { + taskId: "task-plain-1", + description: "thinking through the patch", + summary: "thinking through the patch", + }, + }); + + const thread = await waitForThread(harness.readModel, (entry) => + entry.activities.some( + (activity: ProviderRuntimeTestActivity) => activity.id === "evt-plain-progress", + ), + ); + const progress = thread.activities.find( + (activity: ProviderRuntimeTestActivity) => activity.id === "evt-plain-progress", + ); + expect(progress?.kind).toBe("task.progress"); + expect( + thread.activities.filter((activity: ProviderRuntimeTestActivity) => + activity.id.startsWith("workflow:"), + ), + ).toHaveLength(0); + }); + + it("projects a workflow task.progress into a per-tick row plus a stable workflow snapshot", async () => { + const harness = await createHarness(); + const now = "2026-01-01T00:00:00.000Z"; + const workflowProgress = [ + { type: "workflow_phase", index: 0, title: "Plan" }, + { type: "workflow_agent", index: 0, state: "start", phaseIndex: 0 }, + { type: "workflow_log", message: "kicked off" }, + ]; + + harness.emit({ + type: "task.progress", + eventId: asEventId("evt-workflow-progress"), + provider: ProviderDriverKind.make("claudeAgent"), + createdAt: now, + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-workflow-progress"), + payload: { + taskId: "task-wf-1", + description: "spec workflow", + workflowProgress, + usage: { total_tokens: 100 }, + }, + }); + + const thread = await waitForThread(harness.readModel, (entry) => + entry.activities.some( + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-1", + ), + ); + + const perTick = thread.activities.find( + (activity: ProviderRuntimeTestActivity) => activity.id === "evt-workflow-progress", + ); + expect(perTick?.kind).toBe("task.progress"); + + const snapshot = thread.activities.find( + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-1", + ); + expect(snapshot?.kind).toBe("task.workflow-updated"); + const payload = + snapshot?.payload && typeof snapshot.payload === "object" + ? (snapshot.payload as Record) + : undefined; + expect(payload?.taskId).toBe("task-wf-1"); + expect(payload?.workflowProgress).toEqual(workflowProgress); + }); + + it("projects task.workflowMeta into a stable workflow-meta activity", async () => { + const harness = await createHarness(); + const now = "2026-01-01T00:00:00.000Z"; + + harness.emit({ + type: "task.workflowMeta", + eventId: asEventId("evt-workflow-meta"), + provider: ProviderDriverKind.make("claudeAgent"), + createdAt: now, + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-workflow-meta"), + payload: { + taskId: "task-wf-meta", + runId: "wf_abc", + workflowName: "spec", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + }, + }); + + const thread = await waitForThread(harness.readModel, (entry) => + entry.activities.some( + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:task-wf-meta", + ), + ); + const meta = thread.activities.find( + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:task-wf-meta", + ); + expect(meta?.kind).toBe("task.workflow-meta"); + const payload = + meta?.payload && typeof meta.payload === "object" + ? (meta.payload as Record) + : undefined; + expect(payload).toMatchObject({ + taskId: "task-wf-meta", + runId: "wf_abc", + workflowName: "spec", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + }); + }); + + it("upserts successive workflow snapshots under the same stable activity id", async () => { + const harness = await createHarness(); + const now = "2026-01-01T00:00:00.000Z"; + + harness.emit({ + type: "task.progress", + eventId: asEventId("evt-workflow-progress-1"), + provider: ProviderDriverKind.make("claudeAgent"), + createdAt: now, + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-workflow-upsert"), + payload: { + taskId: "task-wf-2", + description: "spec workflow", + workflowProgress: [{ type: "workflow_agent", index: 0, state: "start" }], + }, + }); + harness.emit({ + type: "task.progress", + eventId: asEventId("evt-workflow-progress-2"), + provider: ProviderDriverKind.make("claudeAgent"), + createdAt: now, + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-workflow-upsert"), + payload: { + taskId: "task-wf-2", + description: "spec workflow", + workflowProgress: [{ type: "workflow_agent", index: 0, state: "done" }], + }, + }); + + const thread = await waitForThread(harness.readModel, (entry) => + entry.activities.some( + (activity: ProviderRuntimeTestActivity) => activity.id === "evt-workflow-progress-2", + ), + ); + await harness.drain(); + const drained = await harness.readModel(); + const drainedThread = drained.threads.find((entry) => entry.id === ThreadId.make("thread-1")); + + const snapshots = drainedThread?.activities.filter( + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-2", + ); + expect(snapshots).toHaveLength(1); + const payload = + snapshots?.[0]?.payload && typeof snapshots[0].payload === "object" + ? (snapshots[0].payload as Record) + : undefined; + expect(payload?.workflowProgress).toEqual([ + { type: "workflow_agent", index: 0, state: "done" }, + ]); + // Per-tick rows remain distinct (one per progress event). + expect( + thread.activities.filter( + (activity: ProviderRuntimeTestActivity) => activity.kind === "task.progress", + ).length, + ).toBeGreaterThanOrEqual(2); + }); }); diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts index 3e5978f4846..8df21f579b3 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts @@ -8,6 +8,7 @@ import { type OrchestrationProposedPlanId, CheckpointRef, isToolLifecycleItemType, + EventId, ThreadId, type ThreadTokenUsageSnapshot, TurnId, @@ -162,6 +163,18 @@ function maxCheckpointTurnCount( return maxTurnCount; } +/** + * Stable per-task activity ids: workflow snapshot/meta activities are + * upserted (one projection row per run), not appended per progress tick. + */ +function workflowActivityId(taskId: string): EventId { + return EventId.make(`workflow:${taskId}`); +} + +function workflowMetaActivityId(taskId: string): EventId { + return EventId.make(`workflow-meta:${taskId}`); +} + function truncateDetail(value: string, limit = 180): string { return value.length > limit ? `${value.slice(0, limit - 3)}...` : value; } @@ -456,6 +469,8 @@ function runtimeEventToActivities( payload: { taskId: event.payload.taskId, ...(event.payload.taskType ? { taskType: event.payload.taskType } : {}), + ...(event.payload.toolUseId ? { toolUseId: event.payload.toolUseId } : {}), + ...(event.payload.workflowName ? { workflowName: event.payload.workflowName } : {}), ...(event.payload.description ? { detail: truncateDetail(event.payload.description) } : {}), @@ -467,18 +482,40 @@ function runtimeEventToActivities( } case "task.progress": { + const progressActivity: OrchestrationThreadActivity = { + id: event.eventId, + createdAt: event.createdAt, + tone: "info", + kind: "task.progress", + summary: "Reasoning update", + payload: { + taskId: event.payload.taskId, + detail: truncateDetail(event.payload.summary ?? event.payload.description), + ...(event.payload.summary ? { summary: truncateDetail(event.payload.summary) } : {}), + ...(event.payload.lastToolName ? { lastToolName: event.payload.lastToolName } : {}), + ...(event.payload.usage !== undefined ? { usage: event.payload.usage } : {}), + }, + turnId: toTurnId(event.turnId) ?? null, + ...maybeSequence, + }; + if (event.payload.workflowProgress === undefined) { + return [progressActivity]; + } + // Workflow snapshots are cumulative state, not timeline entries: reuse a + // stable activity id per task so the projection and client upsert one + // row per run instead of accumulating one per progress tick. return [ + progressActivity, { - id: event.eventId, + id: workflowActivityId(event.payload.taskId), createdAt: event.createdAt, tone: "info", - kind: "task.progress", - summary: "Reasoning update", + kind: "task.workflow-updated", + summary: truncateDetail(event.payload.description), payload: { taskId: event.payload.taskId, - detail: truncateDetail(event.payload.summary ?? event.payload.description), - ...(event.payload.summary ? { summary: truncateDetail(event.payload.summary) } : {}), - ...(event.payload.lastToolName ? { lastToolName: event.payload.lastToolName } : {}), + description: truncateDetail(event.payload.description), + workflowProgress: event.payload.workflowProgress, ...(event.payload.usage !== undefined ? { usage: event.payload.usage } : {}), }, turnId: toTurnId(event.turnId) ?? null, @@ -487,6 +524,23 @@ function runtimeEventToActivities( ]; } + case "task.workflowMeta": { + return [ + { + id: workflowMetaActivityId(event.payload.taskId), + createdAt: event.createdAt, + tone: "info", + kind: "task.workflow-meta", + summary: event.payload.workflowName + ? `Workflow "${event.payload.workflowName}" launched` + : "Workflow launched", + payload: { ...event.payload }, + turnId: toTurnId(event.turnId) ?? null, + ...maybeSequence, + }, + ]; + } + case "task.completed": { return [ { diff --git a/apps/server/src/orchestration/decider.ts b/apps/server/src/orchestration/decider.ts index 0d4af771ca8..058456f0995 100644 --- a/apps/server/src/orchestration/decider.ts +++ b/apps/server/src/orchestration/decider.ts @@ -483,6 +483,28 @@ export const decideOrchestrationCommand = Effect.fn("decideOrchestrationCommand" }; } + case "thread.task.stop": { + yield* requireThread({ + readModel, + command, + threadId: command.threadId, + }); + return { + ...(yield* withEventBase({ + aggregateKind: "thread", + aggregateId: command.threadId, + occurredAt: command.createdAt, + commandId: command.commandId, + })), + type: "thread.task-stop-requested", + payload: { + threadId: command.threadId, + taskId: command.taskId, + createdAt: command.createdAt, + }, + }; + } + case "thread.approval.respond": { yield* requireThread({ readModel, diff --git a/apps/server/src/orchestration/decider.workflowTaskStop.test.ts b/apps/server/src/orchestration/decider.workflowTaskStop.test.ts new file mode 100644 index 00000000000..f4d816a10e5 --- /dev/null +++ b/apps/server/src/orchestration/decider.workflowTaskStop.test.ts @@ -0,0 +1,121 @@ +import { + CommandId, + DEFAULT_PROVIDER_INTERACTION_MODE, + EventId, + ProjectId, + ThreadId, + ProviderInstanceId, +} from "@t3tools/contracts"; +import * as Effect from "effect/Effect"; +import * as NodeServices from "@effect/platform-node/NodeServices"; +import { expect, it } from "@effect/vitest"; + +import { decideOrchestrationCommand } from "./decider.ts"; +import { createEmptyReadModel, projectEvent } from "./projector.ts"; + +const asCommandId = (value: string): CommandId => CommandId.make(value); +const asEventId = (value: string): EventId => EventId.make(value); +const asProjectId = (value: string): ProjectId => ProjectId.make(value); +const asThreadId = (value: string): ThreadId => ThreadId.make(value); + +const now = "2026-01-01T00:00:00.000Z"; + +const seedReadModel = Effect.gen(function* () { + const initial = createEmptyReadModel(now); + const withProject = yield* projectEvent(initial, { + sequence: 1, + eventId: asEventId("evt-project-create"), + aggregateKind: "project", + aggregateId: asProjectId("project-workflow"), + type: "project.created", + occurredAt: now, + commandId: asCommandId("cmd-project-create"), + causationEventId: null, + correlationId: asCommandId("cmd-project-create"), + metadata: {}, + payload: { + projectId: asProjectId("project-workflow"), + title: "Project Workflow", + workspaceRoot: "/tmp/project-workflow", + defaultModelSelection: null, + scripts: [], + createdAt: now, + updatedAt: now, + }, + }); + + return yield* projectEvent(withProject, { + sequence: 2, + eventId: asEventId("evt-thread-create"), + aggregateKind: "thread", + aggregateId: asThreadId("thread-workflow"), + type: "thread.created", + occurredAt: now, + commandId: asCommandId("cmd-thread-create"), + causationEventId: null, + correlationId: asCommandId("cmd-thread-create"), + metadata: {}, + payload: { + threadId: asThreadId("thread-workflow"), + projectId: asProjectId("project-workflow"), + title: "Thread Workflow", + modelSelection: { + instanceId: ProviderInstanceId.make("codex"), + model: "gpt-5-codex", + }, + interactionMode: DEFAULT_PROVIDER_INTERACTION_MODE, + runtimeMode: "approval-required", + branch: null, + worktreePath: null, + createdAt: now, + updatedAt: now, + }, + }); +}); + +it.layer(NodeServices.layer)("decider thread.task.stop", (it) => { + it.effect("emits a thread.task-stop-requested event carrying the task id", () => + Effect.gen(function* () { + const readModel = yield* seedReadModel; + const result = yield* decideOrchestrationCommand({ + command: { + type: "thread.task.stop", + commandId: asCommandId("cmd-task-stop"), + threadId: asThreadId("thread-workflow"), + taskId: "task-9", + createdAt: now, + }, + readModel, + }); + const events = Array.isArray(result) ? result : [result]; + expect(events).toHaveLength(1); + const event = events[0]!; + expect(event.type).toBe("thread.task-stop-requested"); + expect(event.payload).toMatchObject({ + threadId: asThreadId("thread-workflow"), + taskId: "task-9", + createdAt: now, + }); + }), + ); + + it.effect("rejects a task stop for a thread that does not exist", () => + Effect.gen(function* () { + const readModel = yield* seedReadModel; + const error = yield* Effect.flip( + decideOrchestrationCommand({ + command: { + type: "thread.task.stop", + commandId: asCommandId("cmd-task-stop-missing"), + threadId: asThreadId("thread-missing"), + taskId: "task-9", + createdAt: now, + }, + readModel, + }), + ); + expect(error.message).toContain("thread-missing"); + expect(error.message).toContain("does not exist"); + }), + ); +}); diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts index 191bf8e27db..eef9d661d96 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts @@ -58,6 +58,7 @@ class FakeClaudeQuery implements AsyncIterable { public readonly setModelCalls: Array = []; public readonly setPermissionModeCalls: Array = []; public readonly setMaxThinkingTokensCalls: Array = []; + public readonly stopTaskCalls: Array = []; public closeCalls = 0; emit(message: SDKMessage): void { @@ -110,6 +111,10 @@ class FakeClaudeQuery implements AsyncIterable { this.setMaxThinkingTokensCalls.push(maxThinkingTokens); }; + readonly stopTask = async (taskId: string): Promise => { + this.stopTaskCalls.push(taskId); + }; + readonly close = (): void => { this.closeCalls += 1; this.finish(); @@ -3786,4 +3791,225 @@ describe("ClaudeAdapterLive", () => { Effect.provide(harness.layer), ); }); + + it.effect( + "normalizes workflow_progress on task progress, clipping previews and dropping malformed entries", + () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.takeUntil( + adapter.streamEvents, + (event) => event.type === "task.progress", + ).pipe(Stream.runCollect, Effect.forkChild); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: ProviderDriverKind.make("claudeAgent"), + runtimeMode: "full-access", + }); + + const longPreview = "x".repeat(300); + harness.query.emit({ + type: "system", + subtype: "task_progress", + task_id: "task-wf-1", + description: "spec workflow", + workflow_progress: [ + { type: "workflow_phase", index: 0, title: "Plan" }, + { type: "workflow_agent", index: 0, state: "start", promptPreview: longPreview }, + { type: "workflow_log", message: "kicked off" }, + { type: "workflow_agent", state: "start" }, // missing index -> dropped + { type: "workflow_mystery", index: 9 }, // unknown type -> dropped + ], + session_id: "sdk-session-workflow-progress", + uuid: "task-workflow-progress-1", + } as unknown as SDKMessage); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const progress = runtimeEvents.find((event) => event.type === "task.progress"); + assert.equal(progress?.type, "task.progress"); + if (progress?.type === "task.progress") { + const workflowProgress = progress.payload.workflowProgress; + // Only the three well-formed entries survive; order is phases, agents, logs. + assert.equal(workflowProgress?.length, 3); + assert.equal(workflowProgress?.[0]?.type, "workflow_phase"); + assert.equal(workflowProgress?.[1]?.type, "workflow_agent"); + assert.equal(workflowProgress?.[2]?.type, "workflow_log"); + const agent = workflowProgress?.[1]; + if (agent?.type === "workflow_agent") { + // 240-char clip + a trailing ellipsis. + assert.equal(agent.promptPreview?.length, 241); + assert.equal(agent.promptPreview?.endsWith("…"), true); + } + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }, + ); + + it.effect("forwards workflow tool_use_id and workflow_name on task started", () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.takeUntil( + adapter.streamEvents, + (event) => event.type === "task.started", + ).pipe(Stream.runCollect, Effect.forkChild); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: ProviderDriverKind.make("claudeAgent"), + runtimeMode: "full-access", + }); + + harness.query.emit({ + type: "system", + subtype: "task_started", + task_id: "task-wf-1", + description: "spec workflow", + task_type: "local_workflow", + tool_use_id: "tool-wf-1", + workflow_name: "spec", + session_id: "sdk-session-workflow-started", + uuid: "task-workflow-started-1", + } as unknown as SDKMessage); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const started = runtimeEvents.find((event) => event.type === "task.started"); + assert.equal(started?.type, "task.started"); + if (started?.type === "task.started") { + assert.equal(started.payload.toolUseId, "tool-wf-1"); + assert.equal(started.payload.workflowName, "spec"); + assert.equal(started.payload.taskType, "local_workflow"); + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }); + + it.effect("emits task.workflowMeta from a Workflow tool result", () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.takeUntil( + adapter.streamEvents, + (event) => event.type === "task.workflowMeta", + ).pipe(Stream.runCollect, Effect.forkChild); + + const session = yield* adapter.startSession({ + threadId: THREAD_ID, + provider: ProviderDriverKind.make("claudeAgent"), + runtimeMode: "full-access", + }); + + yield* adapter.sendTurn({ + threadId: session.threadId, + input: "run the workflow", + attachments: [], + }); + + harness.query.emit({ + type: "stream_event", + session_id: "sdk-session-workflow-tool", + uuid: "stream-workflow-start", + parent_tool_use_id: null, + event: { + type: "content_block_start", + index: 1, + content_block: { + type: "tool_use", + id: "tool-wf-1", + name: "Workflow", + input: {}, + }, + }, + } as unknown as SDKMessage); + + harness.query.emit({ + type: "stream_event", + session_id: "sdk-session-workflow-tool", + uuid: "stream-workflow-stop", + parent_tool_use_id: null, + event: { + type: "content_block_stop", + index: 1, + }, + } as unknown as SDKMessage); + + harness.query.emit({ + type: "user", + session_id: "sdk-session-workflow-tool", + uuid: "user-workflow-result", + parent_tool_use_id: null, + message: { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-wf-1", + content: "workflow launched", + }, + ], + }, + tool_use_result: { + taskId: "task-1", + runId: "wf_abc", + workflowName: "spec", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + taskType: "local_workflow", + }, + } as unknown as SDKMessage); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const meta = runtimeEvents.find((event) => event.type === "task.workflowMeta"); + assert.equal(meta?.type, "task.workflowMeta"); + if (meta?.type === "task.workflowMeta") { + assert.deepEqual(meta.payload, { + taskId: "task-1", + runId: "wf_abc", + workflowName: "spec", + taskType: "local_workflow", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + }); + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }); + + it.effect("forwards stopTask to the Claude query runtime", () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + yield* adapter.streamEvents.pipe(Stream.runDrain, Effect.forkChild); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: ProviderDriverKind.make("claudeAgent"), + runtimeMode: "full-access", + }); + + const stopTask = adapter.stopTask; + if (stopTask === undefined) { + throw new Error("Expected the Claude adapter to expose stopTask."); + } + yield* stopTask(THREAD_ID, "task-9"); + + assert.deepEqual(harness.query.stopTaskCalls, ["task-9"]); + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }); }); diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index 97a93f85829..f62a33068ce 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -45,6 +45,11 @@ import { ThreadId, TurnId, type UserInputQuestion, + type WorkflowAgentProgressEntry, + type WorkflowLogProgressEntry, + type WorkflowPhaseProgressEntry, + type WorkflowProgressEntry, + type WorkflowRunHandles, } from "@t3tools/contracts"; import { applyClaudePromptEffortPrefix, @@ -204,6 +209,7 @@ interface ClaudeSessionContext { interface ClaudeQueryRuntime extends AsyncIterable { readonly interrupt: () => Promise; + readonly stopTask?: (taskId: string) => Promise; readonly setModel: (model?: string) => Promise; readonly setPermissionMode: (mode: PermissionMode) => Promise; readonly setMaxThinkingTokens: (maxThinkingTokens: number | null) => Promise; @@ -714,6 +720,212 @@ function readStringArray(value: unknown): Array { : []; } +const WORKFLOW_TOOL_NAME = "Workflow"; +const MAX_WORKFLOW_AGENT_ENTRIES = 300; +const MAX_WORKFLOW_PHASE_ENTRIES = 50; +const MAX_WORKFLOW_LOG_ENTRIES = 40; +const MAX_WORKFLOW_PREVIEW_CHARS = 240; +const MAX_WORKFLOW_RESULT_PREVIEW_CHARS = 400; + +function workflowString(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 ? value : undefined; +} + +function workflowClippedString(value: unknown, limit: number): string | undefined { + const text = workflowString(value); + if (text === undefined) { + return undefined; + } + return text.length > limit ? `${text.slice(0, limit)}\u2026` : text; +} + +function workflowFiniteNumber(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function normalizeWorkflowAgentEntry( + entry: Record, +): WorkflowAgentProgressEntry | undefined { + const index = workflowFiniteNumber(entry.index); + const state = workflowString(entry.state); + if (index === undefined || state === undefined) { + return undefined; + } + const isolation = + entry.isolation === "worktree" || entry.isolation === "remote" ? entry.isolation : undefined; + return { + type: "workflow_agent", + index, + state, + ...(workflowString(entry.label) !== undefined ? { label: workflowString(entry.label) } : {}), + ...(workflowFiniteNumber(entry.phaseIndex) !== undefined + ? { phaseIndex: workflowFiniteNumber(entry.phaseIndex) } + : {}), + ...(workflowString(entry.phaseTitle) !== undefined + ? { phaseTitle: workflowString(entry.phaseTitle) } + : {}), + ...(workflowString(entry.agentId) !== undefined + ? { agentId: workflowString(entry.agentId) } + : {}), + ...(workflowString(entry.agentType) !== undefined + ? { agentType: workflowString(entry.agentType) } + : {}), + ...(workflowString(entry.model) !== undefined ? { model: workflowString(entry.model) } : {}), + ...(workflowString(entry.fallbackModel) !== undefined + ? { fallbackModel: workflowString(entry.fallbackModel) } + : {}), + ...(isolation !== undefined ? { isolation } : {}), + ...(workflowFiniteNumber(entry.attempt) !== undefined + ? { attempt: workflowFiniteNumber(entry.attempt) } + : {}), + ...(workflowFiniteNumber(entry.queuedAt) !== undefined + ? { queuedAt: workflowFiniteNumber(entry.queuedAt) } + : {}), + ...(workflowFiniteNumber(entry.startedAt) !== undefined + ? { startedAt: workflowFiniteNumber(entry.startedAt) } + : {}), + ...(workflowFiniteNumber(entry.lastProgressAt) !== undefined + ? { lastProgressAt: workflowFiniteNumber(entry.lastProgressAt) } + : {}), + ...(entry.cached === true ? { cached: true } : {}), + ...(workflowString(entry.remoteSessionId) !== undefined + ? { remoteSessionId: workflowString(entry.remoteSessionId) } + : {}), + ...(workflowString(entry.lastToolName) !== undefined + ? { lastToolName: workflowString(entry.lastToolName) } + : {}), + ...(workflowClippedString(entry.lastToolSummary, MAX_WORKFLOW_PREVIEW_CHARS) !== undefined + ? { + lastToolSummary: workflowClippedString(entry.lastToolSummary, MAX_WORKFLOW_PREVIEW_CHARS), + } + : {}), + ...(workflowClippedString(entry.promptPreview, MAX_WORKFLOW_PREVIEW_CHARS) !== undefined + ? { promptPreview: workflowClippedString(entry.promptPreview, MAX_WORKFLOW_PREVIEW_CHARS) } + : {}), + ...(workflowClippedString(entry.resultPreview, MAX_WORKFLOW_RESULT_PREVIEW_CHARS) !== undefined + ? { + resultPreview: workflowClippedString( + entry.resultPreview, + MAX_WORKFLOW_RESULT_PREVIEW_CHARS, + ), + } + : {}), + ...(workflowClippedString(entry.error, MAX_WORKFLOW_PREVIEW_CHARS) !== undefined + ? { error: workflowClippedString(entry.error, MAX_WORKFLOW_PREVIEW_CHARS) } + : {}), + }; +} + +/** + * Normalize the Claude Agent SDK's `workflow_progress` snapshot. + * + * The field is deliberate wire surface (the CLI's own /workflows view renders + * it) but is absent from the published SDK types, so every read is defensive: + * malformed entries and unknown entry types are dropped, previews are + * clipped, and entry counts are capped before the snapshot enters the + * runtime-event contract. + */ +function normalizeWorkflowProgress( + value: unknown, +): ReadonlyArray | undefined { + if (!Array.isArray(value) || value.length === 0) { + return undefined; + } + const agents: Array = []; + const phases: Array = []; + const logs: Array = []; + for (const raw of value) { + if (raw === null || typeof raw !== "object" || Array.isArray(raw)) { + continue; + } + const entry = raw as Record; + switch (entry.type) { + case "workflow_agent": { + const agent = normalizeWorkflowAgentEntry(entry); + if (agent && agents.length < MAX_WORKFLOW_AGENT_ENTRIES) { + agents.push(agent); + } + break; + } + case "workflow_phase": { + const index = workflowFiniteNumber(entry.index); + const title = workflowString(entry.title); + if ( + index !== undefined && + title !== undefined && + phases.length < MAX_WORKFLOW_PHASE_ENTRIES + ) { + phases.push({ + type: "workflow_phase", + index, + title, + ...(workflowString(entry.kind) !== undefined + ? { kind: workflowString(entry.kind) } + : {}), + }); + } + break; + } + case "workflow_log": { + const logMessage = workflowClippedString(entry.message, MAX_WORKFLOW_RESULT_PREVIEW_CHARS); + if (logMessage !== undefined) { + logs.push({ type: "workflow_log", message: logMessage }); + } + break; + } + default: + break; + } + } + // Narration is append-only upstream; keep the newest lines when clipping. + const clippedLogs = + logs.length > MAX_WORKFLOW_LOG_ENTRIES ? logs.slice(-MAX_WORKFLOW_LOG_ENTRIES) : logs; + const entries = [...phases, ...agents, ...clippedLogs]; + return entries.length > 0 ? entries : undefined; +} + +function readClaudeWorkflowProgress( + message: SDKMessage, +): ReadonlyArray | undefined { + // `workflow_progress` is not yet declared on SDKTaskProgressMessage — this + // cast is the single place the undocumented field is read. + const raw = (message as { readonly workflow_progress?: unknown }).workflow_progress; + return normalizeWorkflowProgress(raw); +} + +function normalizeWorkflowRunHandles( + toolUseResult: Record, +): WorkflowRunHandles | undefined { + const taskId = workflowString(toolUseResult.taskId); + if (taskId === undefined) { + return undefined; + } + return { + taskId, + ...(workflowString(toolUseResult.runId) !== undefined + ? { runId: workflowString(toolUseResult.runId) } + : {}), + ...(workflowString(toolUseResult.workflowName) !== undefined + ? { workflowName: workflowString(toolUseResult.workflowName) } + : {}), + ...(workflowString(toolUseResult.taskType) !== undefined + ? { taskType: workflowString(toolUseResult.taskType) } + : {}), + ...(workflowString(toolUseResult.scriptPath) !== undefined + ? { scriptPath: workflowString(toolUseResult.scriptPath) } + : {}), + ...(workflowString(toolUseResult.transcriptDir) !== undefined + ? { transcriptDir: workflowString(toolUseResult.transcriptDir) } + : {}), + ...(workflowString(toolUseResult.sessionUrl) !== undefined + ? { sessionUrl: workflowString(toolUseResult.sessionUrl) } + : {}), + ...(workflowString(toolUseResult.warning) !== undefined + ? { warning: workflowString(toolUseResult.warning) } + : {}), + }; +} + function readClaudeToolUseResult(message: SDKMessage): Record | undefined { if (message.type !== "user") { return undefined; @@ -2451,6 +2663,30 @@ export const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( }); } + if (!toolResult.isError && tool.toolName === WORKFLOW_TOOL_NAME && toolUseResult) { + const workflowHandles = normalizeWorkflowRunHandles(toolUseResult); + if (workflowHandles) { + const workflowMetaStamp = yield* makeEventStamp(); + yield* offerRuntimeEvent({ + type: "task.workflowMeta", + eventId: workflowMetaStamp.eventId, + provider: PROVIDER, + createdAt: workflowMetaStamp.createdAt, + threadId: context.session.threadId, + ...(context.turnState ? { turnId: asCanonicalTurnId(context.turnState.turnId) } : {}), + payload: workflowHandles, + providerRefs: nativeProviderRefs(context, { + providerItemId: tool.itemId, + }), + raw: { + source: "claude.sdk.message", + method: "claude/user", + payload: message, + }, + }); + } + } + context.inFlightTools.delete(index); } }); @@ -2673,10 +2909,13 @@ export const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( taskId: RuntimeTaskId.make(message.task_id), description: message.description, ...(message.task_type ? { taskType: message.task_type } : {}), + ...(message.tool_use_id ? { toolUseId: message.tool_use_id } : {}), + ...(message.workflow_name ? { workflowName: message.workflow_name } : {}), }, }); return; - case "task_progress": + case "task_progress": { + const workflowProgress = readClaudeWorkflowProgress(message); yield* emitThreadTokenUsage( context, normalizeClaudeTaskProgressTokenUsage(message.usage, context), @@ -2694,9 +2933,18 @@ export const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( ...(message.summary ? { summary: message.summary } : {}), ...(message.usage ? { usage: message.usage } : {}), ...(message.last_tool_name ? { lastToolName: message.last_tool_name } : {}), + ...(workflowProgress !== undefined ? { workflowProgress } : {}), }, }); return; + } + case "task_updated": + // Task status patches (pause/background/description edits). The + // canonical lifecycle events above already carry everything the + // runtime-event model consumes; swallow these instead of routing + // them to the unknown-subtype warning path, which would fire on + // every workflow status transition. + return; case "task_notification": yield* emitThreadTokenUsage( context, @@ -3751,6 +3999,24 @@ export const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( }, ); + const stopTask: NonNullable = Effect.fn("stopTask")( + function* (threadId, taskId) { + const context = yield* requireSession(threadId); + const stop = context.query.stopTask; + if (stop === undefined) { + return yield* toRequestError( + threadId, + "task/stop", + new Error("The Claude SDK runtime for this session does not expose stopTask."), + ); + } + yield* Effect.tryPromise({ + try: () => stop(taskId), + catch: (cause) => toRequestError(threadId, "task/stop", cause), + }); + }, + ); + const readThread: ClaudeAdapterShape["readThread"] = Effect.fn("readThread")( function* (threadId) { const context = yield* requireSession(threadId); @@ -3854,6 +4120,7 @@ export const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( startSession, sendTurn, interruptTurn, + stopTask, readThread, rollbackThread, respondToRequest, diff --git a/apps/server/src/provider/Layers/ProviderService.ts b/apps/server/src/provider/Layers/ProviderService.ts index 2eaaeb8ce3c..20546ac5296 100644 --- a/apps/server/src/provider/Layers/ProviderService.ts +++ b/apps/server/src/provider/Layers/ProviderService.ts @@ -19,6 +19,7 @@ import { ProviderSendTurnInput, ProviderSessionStartInput, ProviderStopSessionInput, + ProviderStopTaskInput, type ProviderInstanceId, type ProviderDriverKind, type ProviderRuntimeEvent, @@ -754,6 +755,36 @@ const makeProviderService = Effect.fn("makeProviderService")(function* ( }, ); + const stopTask: ProviderServiceMethod<"stopTask"> = Effect.fn("stopTask")(function* (rawInput) { + const input = yield* decodeInputOrValidationError({ + operation: "ProviderService.stopTask", + schema: ProviderStopTaskInput, + payload: rawInput, + }); + const routed = yield* resolveRoutableSession({ + threadId: input.threadId, + operation: "ProviderService.stopTask", + allowRecovery: true, + }); + yield* Effect.annotateCurrentSpan({ + "provider.operation": "stop-task", + "provider.kind": routed.adapter.provider, + "provider.thread_id": input.threadId, + "provider.task_id": input.taskId, + }); + const adapterStopTask = routed.adapter.stopTask; + if (adapterStopTask === undefined) { + return yield* toValidationError( + "ProviderService.stopTask", + `Provider '${routed.adapter.provider}' does not support stopping background tasks.`, + ); + } + yield* adapterStopTask(routed.threadId, input.taskId); + yield* analytics.record("provider.task.stopped", { + provider: routed.adapter.provider, + }); + }); + const respondToRequest: ProviderServiceMethod<"respondToRequest"> = Effect.fn("respondToRequest")( function* (rawInput) { const input = yield* decodeInputOrValidationError({ @@ -1075,6 +1106,7 @@ const makeProviderService = Effect.fn("makeProviderService")(function* ( respondToRequest, respondToUserInput, stopSession, + stopTask, listSessions, getCapabilities, getInstanceInfo, diff --git a/apps/server/src/provider/Layers/ProviderSessionReaper.test.ts b/apps/server/src/provider/Layers/ProviderSessionReaper.test.ts index e976c183a43..72ad4e39010 100644 --- a/apps/server/src/provider/Layers/ProviderSessionReaper.test.ts +++ b/apps/server/src/provider/Layers/ProviderSessionReaper.test.ts @@ -153,6 +153,7 @@ describe("ProviderSessionReaper", () => { startSession: () => unsupported(), sendTurn: () => unsupported(), interruptTurn: () => unsupported(), + stopTask: () => unsupported(), respondToRequest: () => unsupported(), respondToUserInput: () => unsupported(), stopSession, diff --git a/apps/server/src/provider/Services/ProviderAdapter.ts b/apps/server/src/provider/Services/ProviderAdapter.ts index 01eeae7b7bd..6f2e124b84f 100644 --- a/apps/server/src/provider/Services/ProviderAdapter.ts +++ b/apps/server/src/provider/Services/ProviderAdapter.ts @@ -68,6 +68,13 @@ export interface ProviderAdapterShape { */ readonly interruptTurn: (threadId: ThreadId, turnId?: TurnId) => Effect.Effect; + /** + * Stop one background task (e.g. a running workflow) inside an active + * session. Optional: adapters whose provider has no background-task + * runtime omit it, and callers surface an "unsupported" error. + */ + readonly stopTask?: (threadId: ThreadId, taskId: string) => Effect.Effect; + /** * Respond to an interactive approval request. */ diff --git a/apps/server/src/provider/Services/ProviderService.ts b/apps/server/src/provider/Services/ProviderService.ts index 4d4cb4fa01a..c9474014012 100644 --- a/apps/server/src/provider/Services/ProviderService.ts +++ b/apps/server/src/provider/Services/ProviderService.ts @@ -13,6 +13,7 @@ */ import type { ProviderInterruptTurnInput, + ProviderStopTaskInput, ProviderInstanceId, ProviderRespondToRequestInput, ProviderRespondToUserInputInput, @@ -72,6 +73,11 @@ export interface ProviderServiceShape { input: ProviderRespondToUserInputInput, ) => Effect.Effect; + /** + * Stop one background task inside an active provider session. + */ + readonly stopTask: (input: ProviderStopTaskInput) => Effect.Effect; + /** * Stop a provider session. */ diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index 26528c84d34..2ef3dfd61d4 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -103,6 +103,7 @@ import * as VcsStatusBroadcaster from "./vcs/VcsStatusBroadcaster.ts"; import * as VcsDriverRegistry from "./vcs/VcsDriverRegistry.ts"; import * as VcsProvisioningService from "./vcs/VcsProvisioningService.ts"; import * as GitWorkflowService from "./git/GitWorkflowService.ts"; +import * as WorkflowInspection from "./workflow/WorkflowInspectionService.ts"; import * as ReviewService from "./review/ReviewService.ts"; import * as SourceControlRepositoryService from "./sourceControl/SourceControlRepositoryService.ts"; import * as ServerSecretStore from "./auth/ServerSecretStore.ts"; @@ -670,6 +671,7 @@ const buildAppUnderTest = (options?: { registerTerminalProcesses: () => Effect.void, unregisterTerminal: () => Effect.void, }), + WorkflowInspection.layer, ), ), Layer.provide( diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 0c632d8486c..b473099e8a2 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -65,6 +65,7 @@ import * as VcsProcess from "./vcs/VcsProcess.ts"; import * as VcsProvisioningService from "./vcs/VcsProvisioningService.ts"; import * as VcsStatusBroadcaster from "./vcs/VcsStatusBroadcaster.ts"; import * as GitWorkflowService from "./git/GitWorkflowService.ts"; +import * as WorkflowInspection from "./workflow/WorkflowInspectionService.ts"; import * as ReviewService from "./review/ReviewService.ts"; import * as SourceControlProviderRegistry from "./sourceControl/SourceControlProviderRegistry.ts"; import * as SourceControlRepositoryService from "./sourceControl/SourceControlRepositoryService.ts"; @@ -291,7 +292,7 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( Layer.provideMerge(GitLayerLive), Layer.provideMerge(VcsLayerLive), Layer.provideMerge(ProviderRuntimeLayerLive), - Layer.provideMerge(Layer.mergeAll(TerminalLayerLive, PreviewLayerLive)), + Layer.provideMerge(Layer.mergeAll(TerminalLayerLive, PreviewLayerLive, WorkflowInspection.layer)), Layer.provideMerge(PersistenceLayerLive), Layer.provideMerge(Keybindings.layer), Layer.provideMerge(ProviderRegistryLive), diff --git a/apps/server/src/workflow/WorkflowInspectionService.test.ts b/apps/server/src/workflow/WorkflowInspectionService.test.ts new file mode 100644 index 00000000000..5dcc4884a46 --- /dev/null +++ b/apps/server/src/workflow/WorkflowInspectionService.test.ts @@ -0,0 +1,309 @@ +// @effect-diagnostics nodeBuiltinImport:off - test builds fixtures via Node fs/path directly. +// @effect-diagnostics preferSchemaOverJson:off - fixtures serialize plain JSON journal records. +import * as NodeFSP from "node:fs/promises"; +import * as NodePath from "node:path"; + +import { assert, describe, it } from "@effect/vitest"; +import * as NodeServices from "@effect/platform-node/NodeServices"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; + +import * as WorkflowInspectionService from "./WorkflowInspectionService.ts"; + +interface Layout { + readonly root: string; + readonly transcriptDir: string; + readonly scriptsDir: string; + readonly scriptPath: string; +} + +const makeLayout = (fs: FileSystem.FileSystem, root: string) => + Effect.gen(function* () { + const sessionDir = NodePath.join(root, "proj", "sess"); + const transcriptDir = NodePath.join(sessionDir, "subagents", "workflows", "wf_abc"); + const scriptsDir = NodePath.join(sessionDir, "workflows", "scripts"); + yield* fs.makeDirectory(transcriptDir, { recursive: true }); + yield* fs.makeDirectory(scriptsDir, { recursive: true }); + return { + root, + transcriptDir, + scriptsDir, + scriptPath: NodePath.join(scriptsDir, "spec.js"), + } satisfies Layout; + }); + +/** Build a service instance whose projects root is an isolated temp dir. */ +const setup = Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const root = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-inspection-" }); + const layout = yield* makeLayout(fs, root); + const service = yield* WorkflowInspectionService.make({ projectsRoot: root }); + return { fs, service, layout }; +}); + +describe("WorkflowInspectionService", () => { + describe("readScript", () => { + it.effect("reads a contained script and reports it untruncated", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + yield* fs.writeFileString(layout.scriptPath, "export const run = () => 1;\n"); + + const result = yield* service.readScript({ scriptPath: layout.scriptPath }); + assert.equal(result.source, "export const run = () => 1;\n"); + assert.isFalse(result.truncated); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("clips scripts larger than the cap and marks them truncated", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const big = "a".repeat(512 * 1024 + 128); + yield* fs.writeFileString(layout.scriptPath, big); + + const result = yield* service.readScript({ scriptPath: layout.scriptPath }); + assert.isTrue(result.truncated); + assert.equal(result.source.length, 512 * 1024); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a relative path as invalid-path", () => + Effect.gen(function* () { + const { service } = yield* setup; + const error = yield* service + .readScript({ scriptPath: "relative/spec.js" }) + .pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a path outside the projects root as invalid-path", () => + Effect.gen(function* () { + const { fs, service } = yield* setup; + const outside = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-outside-" }); + const outsideScript = NodePath.join(outside, "escape.js"); + yield* fs.writeFileString(outsideScript, "export const x = 1;"); + + const error = yield* service.readScript({ scriptPath: outsideScript }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a symlink inside the root that escapes it as invalid-path", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const outside = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-outside-" }); + const outsideScript = NodePath.join(outside, "real.js"); + yield* fs.writeFileString(outsideScript, "export const x = 1;"); + + const linkPath = NodePath.join(layout.scriptsDir, "link.js"); + yield* Effect.promise(() => NodeFSP.symlink(outsideScript, linkPath)); + + const error = yield* service.readScript({ scriptPath: linkPath }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a non-script extension as invalid-path", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const badPath = NodePath.join(layout.scriptsDir, "spec.txt"); + yield* fs.writeFileString(badPath, "not a script"); + + const error = yield* service.readScript({ scriptPath: badPath }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("reports a missing script as not-found", () => + Effect.gen(function* () { + const { service, layout } = yield* setup; + const missing = NodePath.join(layout.scriptsDir, "missing.js"); + const error = yield* service.readScript({ scriptPath: missing }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "not-found"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + }); + + describe("readJournal", () => { + it.effect("summarizes started and result records with clipping", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const bigResult = "z".repeat(64 * 1024); + const lines = [ + JSON.stringify({ type: "started", key: "k1", agentId: "a1" }), + JSON.stringify({ + type: "result", + key: "k1", + agentId: "a1", + result: { ok: true, value: 42 }, + }), + JSON.stringify({ type: "started", key: "k2", agentId: "a2" }), + "this-is-not-json{", + JSON.stringify({ type: "result", key: "k3", agentId: "a3", result: bigResult }), + ]; + yield* fs.writeFileString( + NodePath.join(layout.transcriptDir, "journal.jsonl"), + `${lines.join("\n")}\n`, + ); + + const result = yield* service.readJournal({ transcriptDir: layout.transcriptDir }); + assert.isFalse(result.truncated); + assert.deepEqual( + result.entries.map((entry) => entry.agentId), + ["a1", "a2", "a3"], + ); + + const a1 = result.entries[0]; + assert.isTrue(a1?.hasResult); + assert.equal(a1?.resultJson, JSON.stringify({ ok: true, value: 42 })); + assert.isUndefined(a1?.resultTruncated); + + const a2 = result.entries[1]; + assert.isFalse(a2?.hasResult); + assert.isUndefined(a2?.resultJson); + + const a3 = result.entries[2]; + assert.isTrue(a3?.hasResult); + assert.isTrue(a3?.resultTruncated); + assert.equal(a3?.resultJson?.length, 32 * 1024); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("reports a missing journal as not-found", () => + Effect.gen(function* () { + const { service, layout } = yield* setup; + const error = yield* service + .readJournal({ transcriptDir: layout.transcriptDir }) + .pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "not-found"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a transcript dir outside the root as invalid-path", () => + Effect.gen(function* () { + const { fs, service } = yield* setup; + const outside = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-outside-" }); + yield* fs.writeFileString(NodePath.join(outside, "journal.jsonl"), ""); + + const error = yield* service.readJournal({ transcriptDir: outside }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + }); + + describe("readAgentTranscript", () => { + it.effect("reads the full transcript and reports completion", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + yield* fs.writeFileString( + NodePath.join(layout.transcriptDir, "agent-a1.jsonl"), + "l0\nl1\nl2\n", + ); + + const result = yield* service.readAgentTranscript({ + transcriptDir: layout.transcriptDir, + agentId: "a1", + }); + assert.deepEqual(result.lines, ["l0", "l1", "l2"]); + assert.equal(result.nextLine, 3); + assert.isTrue(result.complete); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("returns the remainder from a mid-file cursor", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + yield* fs.writeFileString( + NodePath.join(layout.transcriptDir, "agent-a1.jsonl"), + "l0\nl1\nl2\n", + ); + + const result = yield* service.readAgentTranscript({ + transcriptDir: layout.transcriptDir, + agentId: "a1", + afterLine: 1, + }); + assert.deepEqual(result.lines, ["l1", "l2"]); + assert.equal(result.nextLine, 3); + assert.isTrue(result.complete); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("returns empty and complete when the cursor is past end-of-file", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + yield* fs.writeFileString( + NodePath.join(layout.transcriptDir, "agent-a1.jsonl"), + "l0\nl1\nl2\n", + ); + + const result = yield* service.readAgentTranscript({ + transcriptDir: layout.transcriptDir, + agentId: "a1", + afterLine: 10, + }); + assert.deepEqual(result.lines, []); + assert.equal(result.nextLine, 10); + assert.isTrue(result.complete); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a traversal agent id as invalid-path", () => + Effect.gen(function* () { + const { service, layout } = yield* setup; + const error = yield* service + .readAgentTranscript({ transcriptDir: layout.transcriptDir, agentId: "../journal" }) + .pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("pages a long transcript at the line cap", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const lines = Array.from({ length: 500 }, (_unused, index) => `line-${index}`); + yield* fs.writeFileString( + NodePath.join(layout.transcriptDir, "agent-a1.jsonl"), + lines.join("\n"), + ); + + const first = yield* service.readAgentTranscript({ + transcriptDir: layout.transcriptDir, + agentId: "a1", + }); + assert.equal(first.lines.length, 400); + assert.equal(first.nextLine, 400); + assert.isFalse(first.complete); + + const second = yield* service.readAgentTranscript({ + transcriptDir: layout.transcriptDir, + agentId: "a1", + afterLine: first.nextLine, + }); + assert.equal(second.lines.length, 100); + assert.equal(second.nextLine, 500); + assert.isTrue(second.complete); + assert.equal(second.lines[99], "line-499"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + + it.effect("reports a missing transcript as not-found", () => + Effect.gen(function* () { + const { service, layout } = yield* setup; + const error = yield* service + .readAgentTranscript({ transcriptDir: layout.transcriptDir, agentId: "missing" }) + .pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "not-found"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + }); +}); diff --git a/apps/server/src/workflow/WorkflowInspectionService.ts b/apps/server/src/workflow/WorkflowInspectionService.ts new file mode 100644 index 00000000000..c771278c49e --- /dev/null +++ b/apps/server/src/workflow/WorkflowInspectionService.ts @@ -0,0 +1,309 @@ +// @effect-diagnostics nodeBuiltinImport:off - realpath containment must use Node's fs/path directly. +import * as NodeFSP from "node:fs/promises"; +import * as NodeOS from "node:os"; +import * as NodePath from "node:path"; + +import { + WorkflowInspectionError, + type WorkflowReadAgentTranscriptInput, + type WorkflowReadAgentTranscriptResult, + type WorkflowReadJournalInput, + type WorkflowReadJournalResult, + type WorkflowReadScriptInput, + type WorkflowReadScriptResult, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; +import * as Layer from "effect/Layer"; + +/** + * Read-only inspection of Claude Agent SDK workflow-run artifacts on local + * disk. Clients echo server-local paths back over RPC, so every path is + * validated structurally (absolute + realpath-contained inside the projects + * root) before any disk access — the service must never become an + * arbitrary-file-read oracle. + */ + +/** `readScript`: clip source text past this many characters. */ +const SCRIPT_MAX_CHARS = 512 * 1024; +/** `readJournal`: clip each serialized result past this many characters. */ +const JOURNAL_RESULT_MAX_CHARS = 32 * 1024; +/** `readJournal`: cap the number of distinct agents reported. */ +const JOURNAL_MAX_ENTRIES = 512; +/** `readAgentTranscript`: cap the number of lines returned per page. */ +const TRANSCRIPT_MAX_LINES = 400; +/** `readAgentTranscript`: stop a page once accumulated chars exceed this. */ +const TRANSCRIPT_MAX_CHARS = 768 * 1024; + +/** Only these agent id shapes are accepted before touching the filesystem. */ +const AGENT_ID_PATTERN = /^[A-Za-z0-9_-]+$/; + +/** Mutable while parsing the journal; frozen into the readonly contract shape. */ +interface MutableJournalEntry { + agentId: string; + hasResult: boolean; + resultJson?: string; + resultTruncated?: boolean; +} + +/** Parse one JSONL line defensively; unparseable lines return `undefined`. */ +const parseJsonLine = (text: string): unknown => { + try { + return JSON.parse(text) as unknown; + } catch { + return undefined; + } +}; + +const isEnoent = (cause: unknown): boolean => + typeof cause === "object" && + cause !== null && + "code" in cause && + (cause as { code?: unknown }).code === "ENOENT"; + +const isNotFoundPlatformError = (cause: unknown): boolean => + typeof cause === "object" && + cause !== null && + "reason" in cause && + typeof (cause as { reason?: unknown }).reason === "object" && + (cause as { reason: { _tag?: unknown } }).reason !== null && + (cause as { reason: { _tag?: unknown } }).reason._tag === "NotFound"; + +export class WorkflowInspectionService extends Context.Service< + WorkflowInspectionService, + { + readonly readScript: ( + input: WorkflowReadScriptInput, + ) => Effect.Effect; + readonly readJournal: ( + input: WorkflowReadJournalInput, + ) => Effect.Effect; + readonly readAgentTranscript: ( + input: WorkflowReadAgentTranscriptInput, + ) => Effect.Effect; + } +>()("t3/workflow/WorkflowInspectionService") {} + +export const make = (options?: { readonly projectsRoot?: string }) => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const projectsRoot = + options?.projectsRoot ?? NodePath.join(NodeOS.homedir(), ".claude", "projects"); + + /** + * Resolve the real path of `target` and prove it is contained within the + * real projects root using a path-segment-safe prefix comparison. ENOENT + * during either realpath maps to `not-found`; escape maps to + * `invalid-path`. + */ + const resolveContained = Effect.fn("WorkflowInspectionService.resolveContained")(function* ( + operation: string, + target: string, + ) { + if (!NodePath.isAbsolute(target)) { + return yield* new WorkflowInspectionError({ + operation, + reason: "invalid-path", + detail: "Path must be absolute.", + }); + } + + const realRoot = yield* Effect.tryPromise({ + try: () => NodeFSP.realpath(projectsRoot), + catch: (cause) => + new WorkflowInspectionError({ + operation, + reason: isEnoent(cause) ? "not-found" : "read-failed", + detail: "Failed to resolve the workflow projects root.", + cause, + }), + }); + + const realTarget = yield* Effect.tryPromise({ + try: () => NodeFSP.realpath(target), + catch: (cause) => + new WorkflowInspectionError({ + operation, + reason: isEnoent(cause) ? "not-found" : "read-failed", + detail: "Failed to resolve the requested path.", + cause, + }), + }); + + if (realTarget !== realRoot && !realTarget.startsWith(realRoot + NodePath.sep)) { + return yield* new WorkflowInspectionError({ + operation, + reason: "invalid-path", + detail: "Path escapes the workflow projects root.", + }); + } + + return realTarget; + }); + + const readScript = Effect.fn("WorkflowInspectionService.readScript")(function* ( + input: WorkflowReadScriptInput, + ) { + const operation = "WorkflowInspectionService.readScript"; + if (!input.scriptPath.endsWith(".js") && !input.scriptPath.endsWith(".mjs")) { + return yield* new WorkflowInspectionError({ + operation, + reason: "invalid-path", + detail: "Script path must end with .js or .mjs.", + }); + } + + const realPath = yield* resolveContained(operation, input.scriptPath); + const source = yield* fs.readFileString(realPath).pipe( + Effect.mapError( + (cause) => + new WorkflowInspectionError({ + operation, + reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", + detail: "Failed to read the workflow script.", + cause, + }), + ), + ); + + const truncated = source.length > SCRIPT_MAX_CHARS; + return { + source: truncated ? source.slice(0, SCRIPT_MAX_CHARS) : source, + truncated, + } satisfies WorkflowReadScriptResult; + }); + + const readJournal = Effect.fn("WorkflowInspectionService.readJournal")(function* ( + input: WorkflowReadJournalInput, + ) { + const operation = "WorkflowInspectionService.readJournal"; + const realDir = yield* resolveContained(operation, input.transcriptDir); + const journalPath = NodePath.join(realDir, "journal.jsonl"); + const raw = yield* fs.readFileString(journalPath).pipe( + Effect.mapError( + (cause) => + new WorkflowInspectionError({ + operation, + reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", + detail: "Failed to read the workflow journal.", + cause, + }), + ), + ); + + // Preserve first-seen agent order via an insertion-ordered Map. + const entries = new Map(); + let truncated = false; + + const ensureEntry = (agentId: string): MutableJournalEntry | undefined => { + const existing = entries.get(agentId); + if (existing !== undefined) return existing; + if (entries.size >= JOURNAL_MAX_ENTRIES) { + truncated = true; + return undefined; + } + const created: MutableJournalEntry = { agentId, hasResult: false }; + entries.set(agentId, created); + return created; + }; + + for (const line of raw.split("\n")) { + const text = line.trim(); + if (text.length === 0) continue; + const record = parseJsonLine(text); + if (record === undefined) continue; + if (typeof record !== "object" || record === null) continue; + const parsed = record as { + type?: unknown; + agentId?: unknown; + result?: unknown; + }; + if (typeof parsed.agentId !== "string" || parsed.agentId.length === 0) continue; + + if (parsed.type === "started") { + ensureEntry(parsed.agentId); + continue; + } + if (parsed.type === "result") { + const entry = ensureEntry(parsed.agentId); + if (entry === undefined) continue; + entry.hasResult = true; + // @effect-diagnostics-next-line preferSchemaOverJson:off - result is arbitrary JSON re-serialized verbatim. + const serialized = JSON.stringify(parsed.result); + if (serialized !== undefined) { + const resultTruncated = serialized.length > JOURNAL_RESULT_MAX_CHARS; + entry.resultJson = resultTruncated + ? serialized.slice(0, JOURNAL_RESULT_MAX_CHARS) + : serialized; + if (resultTruncated) entry.resultTruncated = true; + } + } + } + + return { + entries: Array.from(entries.values()), + truncated, + } satisfies WorkflowReadJournalResult; + }); + + const readAgentTranscript = Effect.fn("WorkflowInspectionService.readAgentTranscript")( + function* (input: WorkflowReadAgentTranscriptInput) { + const operation = "WorkflowInspectionService.readAgentTranscript"; + if (!AGENT_ID_PATTERN.test(input.agentId)) { + return yield* new WorkflowInspectionError({ + operation, + reason: "invalid-path", + detail: "Agent id contains unsupported characters.", + }); + } + + const realDir = yield* resolveContained(operation, input.transcriptDir); + const transcriptPath = NodePath.join(realDir, `agent-${input.agentId}.jsonl`); + // v1 reads the whole file per page; acceptable for current transcript + // sizes. Revisit with a streaming/seek reader if transcripts grow large. + const raw = yield* fs.readFileString(transcriptPath).pipe( + Effect.mapError( + (cause) => + new WorkflowInspectionError({ + operation, + reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", + detail: "Failed to read the agent transcript.", + cause, + }), + ), + ); + + const allLines = raw.split("\n"); + if (allLines.length > 0 && allLines[allLines.length - 1] === "") { + allLines.pop(); + } + const total = allLines.length; + const afterLine = Math.max(0, input.afterLine ?? 0); + + const lines: string[] = []; + let accumulated = 0; + for (let index = afterLine; index < total && lines.length < TRANSCRIPT_MAX_LINES; index++) { + const current = allLines[index] ?? ""; + lines.push(current); + accumulated += current.length; + if (accumulated > TRANSCRIPT_MAX_CHARS) break; + } + + const nextLine = afterLine + lines.length; + return { + lines, + nextLine, + complete: nextLine >= total, + } satisfies WorkflowReadAgentTranscriptResult; + }, + ); + + return WorkflowInspectionService.of({ + readScript, + readJournal, + readAgentTranscript, + }); + }); + +export const layer = Layer.effect(WorkflowInspectionService, make()); diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 9020e99f670..32355654947 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -89,6 +89,7 @@ import * as WorkspacePaths from "./workspace/WorkspacePaths.ts"; import * as VcsStatusBroadcaster from "./vcs/VcsStatusBroadcaster.ts"; import * as VcsProvisioningService from "./vcs/VcsProvisioningService.ts"; import * as GitWorkflowService from "./git/GitWorkflowService.ts"; +import * as WorkflowInspection from "./workflow/WorkflowInspectionService.ts"; import * as ReviewService from "./review/ReviewService.ts"; import * as ProjectSetupScriptRunner from "./project/ProjectSetupScriptRunner.ts"; import * as RepositoryIdentityResolver from "./project/RepositoryIdentityResolver.ts"; @@ -313,6 +314,9 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.gitResolvePullRequest, AuthOrchestrationOperateScope], [WS_METHODS.gitPreparePullRequestThread, AuthOrchestrationOperateScope], [WS_METHODS.vcsListRefs, AuthOrchestrationReadScope], + [WS_METHODS.workflowReadScript, AuthOrchestrationReadScope], + [WS_METHODS.workflowReadJournal, AuthOrchestrationReadScope], + [WS_METHODS.workflowReadAgentTranscript, AuthOrchestrationReadScope], [WS_METHODS.vcsCreateWorktree, AuthOrchestrationOperateScope], [WS_METHODS.vcsRemoveWorktree, AuthOrchestrationOperateScope], [WS_METHODS.vcsCreateRef, AuthOrchestrationOperateScope], @@ -399,6 +403,7 @@ const makeWsRpcLayer = ( const keybindings = yield* Keybindings.Keybindings; const externalLauncher = yield* ExternalLauncher.ExternalLauncher; const gitWorkflow = yield* GitWorkflowService.GitWorkflowService; + const workflowInspection = yield* WorkflowInspection.WorkflowInspectionService; const review = yield* ReviewService.ReviewService; const vcsProvisioning = yield* VcsProvisioningService.VcsProvisioningService; const vcsStatusBroadcaster = yield* VcsStatusBroadcaster.VcsStatusBroadcaster; @@ -1559,6 +1564,20 @@ const makeWsRpcLayer = ( observeRpcEffect(WS_METHODS.reviewGetDiffPreview, review.getDiffPreview(input), { "rpc.aggregate": "review", }), + [WS_METHODS.workflowReadScript]: (input) => + observeRpcEffect(WS_METHODS.workflowReadScript, workflowInspection.readScript(input), { + "rpc.aggregate": "workflow", + }), + [WS_METHODS.workflowReadJournal]: (input) => + observeRpcEffect(WS_METHODS.workflowReadJournal, workflowInspection.readJournal(input), { + "rpc.aggregate": "workflow", + }), + [WS_METHODS.workflowReadAgentTranscript]: (input) => + observeRpcEffect( + WS_METHODS.workflowReadAgentTranscript, + workflowInspection.readAgentTranscript(input), + { "rpc.aggregate": "workflow" }, + ), [WS_METHODS.terminalOpen]: (input) => observeRpcEffect(WS_METHODS.terminalOpen, terminalManager.open(input), { "rpc.aggregate": "terminal", diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx index f5ea5bb1eba..e89b8b43333 100644 --- a/apps/web/src/components/ChatView.tsx +++ b/apps/web/src/components/ChatView.tsx @@ -249,6 +249,8 @@ import { resolveServerConfigVersionMismatch, } from "../versionSkew"; import { useAssetUrls } from "../assets/assetUrls"; +import { deriveWorkflowRuns } from "../workflow-logic"; +import { WorkflowPanel } from "./workflow/WorkflowPanel"; const IMAGE_ONLY_BOOTSTRAP_PROMPT = "[User attached one or more images without additional text. Respond using the conversation context and the attached image(s).]"; @@ -1012,6 +1014,7 @@ function ChatViewContent(props: ChatViewProps) { reportFailure: false, }); const startThreadTurn = useAtomCommand(threadEnvironment.startTurn, { reportFailure: false }); + const stopThreadTask = useAtomCommand(threadEnvironment.stopTask, { reportFailure: true }); const interruptThreadTurn = useAtomCommand(threadEnvironment.interruptTurn, { reportFailure: false, }); @@ -1727,6 +1730,38 @@ function ChatViewContent(props: ChatViewProps) { const phase = derivePhase(activeThread?.session ?? null); const threadActivities = activeThread?.activities ?? EMPTY_ACTIVITIES; const workLogEntries = useMemo(() => deriveWorkLogEntries(threadActivities), [threadActivities]); + const workflowRuns = useMemo(() => deriveWorkflowRuns(threadActivities), [threadActivities]); + const activeWorkflowSurface = + activeRightPanelSurface?.kind === "workflow" ? activeRightPanelSurface : null; + const activeWorkflowRun = useMemo( + () => + activeWorkflowSurface + ? (workflowRuns.find((run) => run.taskId === activeWorkflowSurface.taskId) ?? null) + : null, + [activeWorkflowSurface, workflowRuns], + ); + const onOpenWorkflowDetails = useCallback( + (taskId: string) => { + if (!activeThreadRef) { + return; + } + useRightPanelStore.getState().openWorkflow(activeThreadRef, taskId); + }, + [activeThreadRef], + ); + const onStopWorkflowTask = useMemo(() => { + if (!activeThread || activeThread.session?.status === "stopped") { + return null; + } + const threadId = activeThread.id; + const threadEnvironmentId = activeThread.environmentId; + return (taskId: string) => { + void stopThreadTask({ + environmentId: threadEnvironmentId, + input: { threadId, taskId }, + }); + }; + }, [activeThread, stopThreadTask]); const pendingApprovals = useMemo( () => derivePendingApprovals(threadActivities), [threadActivities], @@ -2060,8 +2095,13 @@ function ChatViewContent(props: ChatViewProps) { }, [attachmentPreviewHandoffByMessageId, displayServerMessages, optimisticUserMessages]); const timelineEntries = useMemo( () => - deriveTimelineEntries(timelineMessages, activeThread?.proposedPlans ?? [], workLogEntries), - [activeThread?.proposedPlans, timelineMessages, workLogEntries], + deriveTimelineEntries( + timelineMessages, + activeThread?.proposedPlans ?? [], + workLogEntries, + workflowRuns, + ), + [activeThread?.proposedPlans, timelineMessages, workLogEntries, workflowRuns], ); const { turnDiffSummaries, inferredCheckpointTurnCountByTurnId } = useTurnDiffSummaries(activeThread); @@ -4970,6 +5010,16 @@ function ChatViewContent(props: ChatViewProps) { + ) : activeRightPanelSurface?.kind === "workflow" ? ( + onStopWorkflowTask(activeWorkflowRun.taskId) + : undefined + } + /> ) : activeRightPanelSurface?.kind === "plan" ? ( ; case "plan": return ; + case "workflow": + return ; } } diff --git a/apps/web/src/components/chat/MessagesTimeline.logic.ts b/apps/web/src/components/chat/MessagesTimeline.logic.ts index c6e277cce08..fcc57defa5a 100644 --- a/apps/web/src/components/chat/MessagesTimeline.logic.ts +++ b/apps/web/src/components/chat/MessagesTimeline.logic.ts @@ -6,6 +6,7 @@ import { type TimelineEntry, type WorkLogEntry, } from "../../session-logic"; +import type { WorkflowRun } from "../../workflow-logic"; import { type ChatMessage, type ProposedPlan, type TurnDiffSummary } from "../../types"; import { type MessageId, type OrchestrationLatestTurn, type TurnId } from "@t3tools/contracts"; @@ -136,6 +137,12 @@ export type MessagesTimelineRow = createdAt: string; proposedPlan: ProposedPlan; } + | { + kind: "workflow"; + id: string; + createdAt: string; + workflowRun: WorkflowRun; + } | { kind: "working"; id: string; createdAt: string | null }; export interface StableMessagesTimelineRowsState { @@ -488,6 +495,16 @@ export function deriveMessagesTimelineRows(input: { continue; } + if (timelineEntry.kind === "workflow") { + nextRows.push({ + kind: "workflow", + id: timelineEntry.id, + createdAt: timelineEntry.createdAt, + workflowRun: timelineEntry.workflowRun, + }); + continue; + } + const assistantTurnStillInProgress = timelineEntry.message.role === "assistant" && unsettledTurnId !== null && @@ -571,6 +588,17 @@ function isRowUnchanged(a: MessagesTimelineRow, b: MessagesTimelineRow): boolean case "proposed-plan": return a.proposedPlan === (b as typeof a).proposedPlan; + case "workflow": { + const bw = b as typeof a; + // WorkflowRun view models are rebuilt per derivation; compare the + // fields that drive rendering so unchanged runs keep row identity. + return ( + a.createdAt === bw.createdAt && + a.workflowRun.status === bw.workflowRun.status && + a.workflowRun.updatedAt === bw.workflowRun.updatedAt + ); + } + case "work": return Equal.equals(a.groupedEntries, (b as typeof a).groupedEntries); diff --git a/apps/web/src/components/chat/MessagesTimeline.test.tsx b/apps/web/src/components/chat/MessagesTimeline.test.tsx index 0957e025311..84006a95106 100644 --- a/apps/web/src/components/chat/MessagesTimeline.test.tsx +++ b/apps/web/src/components/chat/MessagesTimeline.test.tsx @@ -177,6 +177,8 @@ function buildProps() { turnDiffSummaryByAssistantMessageId: new Map(), routeThreadKey: "environment-local:thread-1", onOpenTurnDiff: () => {}, + onOpenWorkflowDetails: () => {}, + onStopWorkflowTask: null, revertTurnCountByUserMessageId: new Map(), onRevertUserMessage: () => {}, isRevertingCheckpoint: false, diff --git a/apps/web/src/components/chat/MessagesTimeline.tsx b/apps/web/src/components/chat/MessagesTimeline.tsx index 1a4dc6b6895..e07ee8a178c 100644 --- a/apps/web/src/components/chat/MessagesTimeline.tsx +++ b/apps/web/src/components/chat/MessagesTimeline.tsx @@ -112,6 +112,7 @@ import { parseReviewCommentMessageSegments, type ReviewCommentContext, } from "../../reviewCommentContext"; +import { WorkflowRunCard } from "../workflow/WorkflowRunCard"; // --------------------------------------------------------------------------- // Context — shared state consumed by every row component via Context. @@ -134,6 +135,8 @@ interface TimelineRowSharedState { onOpenTurnDiff: (turnId: TurnId, filePath?: string) => void; onToggleTurnFold: (turnId: TurnId) => void; onToggleWorkGroup: (groupId: string, anchorElement?: HTMLElement) => void; + onOpenWorkflowDetails: (taskId: string) => void; + onStopWorkflowTask: ((taskId: string) => void) | null; } interface TimelineRowActivityState { @@ -173,6 +176,8 @@ interface MessagesTimelineProps { timestampFormat: TimestampFormat; workspaceRoot: string | undefined; skills?: ReadonlyArray>; + onOpenWorkflowDetails: (taskId: string) => void; + onStopWorkflowTask: ((taskId: string) => void) | null; anchorMessageId: MessageId | null; onAnchorReady: (messageId: MessageId, anchorIndex: number) => void; onAnchorSizeChanged: (messageId: MessageId, size: number) => void; @@ -206,6 +211,8 @@ export const MessagesTimeline = memo(function MessagesTimeline({ timestampFormat, workspaceRoot, skills = EMPTY_TIMELINE_SKILLS, + onOpenWorkflowDetails, + onStopWorkflowTask, anchorMessageId, onAnchorReady, onAnchorSizeChanged, @@ -421,6 +428,8 @@ export const MessagesTimeline = memo(function MessagesTimeline({ onOpenTurnDiff, onToggleTurnFold, onToggleWorkGroup, + onOpenWorkflowDetails, + onStopWorkflowTask, }), [ timestampFormat, @@ -435,6 +444,8 @@ export const MessagesTimeline = memo(function MessagesTimeline({ onOpenTurnDiff, onToggleTurnFold, onToggleWorkGroup, + onOpenWorkflowDetails, + onStopWorkflowTask, ], ); const activityState = useMemo( @@ -820,11 +831,30 @@ const TimelineRowContent = memo(function TimelineRowContent({ row }: { row: Time ) : null} {row.kind === "proposed-plan" ? : null} + {row.kind === "workflow" ? : null} {row.kind === "working" ? : null} ); }); +function WorkflowTimelineRow({ row }: { row: Extract }) { + const ctx = use(TimelineRowCtx); + const run = row.workflowRun; + const onOpenDetails = useCallback(() => { + ctx.onOpenWorkflowDetails(run.taskId); + }, [ctx, run.taskId]); + const stopHandler = ctx.onStopWorkflowTask; + const onStop = useMemo(() => { + if (run.status !== "running" || stopHandler === null) { + return undefined; + } + return () => { + stopHandler(run.taskId); + }; + }, [run.status, run.taskId, stopHandler]); + return ; +} + function UserTimelineRow({ row }: { row: Extract }) { const ctx = use(TimelineRowCtx); const userImages = row.message.attachments ?? []; diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx new file mode 100644 index 00000000000..2f0545aeb43 --- /dev/null +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -0,0 +1,702 @@ +import { DiffsHighlighter, getSharedHighlighter, SupportedLanguages } from "@pierre/diffs"; +import type { EnvironmentId } from "@t3tools/contracts"; +import { CheckIcon, ChevronRightIcon, CopyIcon, ExternalLinkIcon, NetworkIcon } from "lucide-react"; +import { + Component, + type KeyboardEvent as ReactKeyboardEvent, + type MouseEvent as ReactMouseEvent, + type ReactElement, + type ReactNode, + Suspense, + use, + useCallback, + useEffect, + useMemo, + useRef, + useState, +} from "react"; + +import { useTheme } from "~/hooks/useTheme"; +import { type DiffThemeName, resolveDiffThemeName } from "~/lib/diffRendering"; +import { cn } from "~/lib/utils"; +import { useEnvironmentQuery } from "~/state/query"; +import { useAtomCommand } from "~/state/use-atom-command"; +import { workflowEnvironment } from "~/state/workflow"; +import { + isRemoteWorkflowRun, + type WorkflowRun, + type WorkflowRunAgent, + type WorkflowRunStatus, + workflowRunTitle, +} from "~/workflow-logic"; +import { Button } from "../ui/button"; +import { AgentRowContent, PhaseHeader, WorkflowStatusChip } from "./workflowUi"; + +type WorkflowTabId = "run" | "script" | "logs"; + +const WORKFLOW_TABS: ReadonlyArray<{ id: WorkflowTabId; label: string }> = [ + { id: "run", label: "Run" }, + { id: "script", label: "Script" }, + { id: "logs", label: "Logs" }, +]; + +// --------------------------------------------------------------------------- +// Root — handles the not-found empty state, then delegates to the inner panel +// so every data hook runs unconditionally. +// --------------------------------------------------------------------------- + +export function WorkflowPanel(props: { + workflowRun: WorkflowRun | null; + environmentId: EnvironmentId; + onStop?: (() => void) | undefined; +}): ReactElement { + if (props.workflowRun === null) { + return ( +
+ Workflow not found +
+ ); + } + return ( + + ); +} + +function WorkflowPanelInner({ + run, + environmentId, + onStop, +}: { + run: WorkflowRun; + environmentId: EnvironmentId; + onStop?: (() => void) | undefined; +}): ReactElement { + const [tab, setTab] = useState("run"); + const { resolvedTheme } = useTheme(); + const themeName = resolveDiffThemeName(resolvedTheme); + + const scriptPath = run.handles?.scriptPath; + const transcriptDir = run.handles?.transcriptDir; + const remote = isRemoteWorkflowRun(run); + const isTerminal = run.status !== "running"; + + const scriptQuery = useEnvironmentQuery( + tab === "script" && scriptPath !== undefined + ? workflowEnvironment.readScript({ environmentId, input: { scriptPath } }) + : null, + ); + const journalQuery = useEnvironmentQuery( + tab === "logs" && transcriptDir !== undefined + ? workflowEnvironment.readJournal({ environmentId, input: { transcriptDir } }) + : null, + ); + + return ( +
+
+ + + {workflowRunTitle(run)} + + +
+ {isTerminal && run.handles?.runId !== undefined && scriptPath !== undefined && ( + + )} + {onStop && ( + + )} +
+
+ +
+ {WORKFLOW_TABS.map((entry) => ( + + ))} +
+ +
+ {tab === "run" && ( + + )} + {tab === "script" && ( + + )} + {tab === "logs" && ( + + )} +
+
+ ); +} + +// --------------------------------------------------------------------------- +// Header copy button +// --------------------------------------------------------------------------- + +function CopyResumeButton({ + scriptPath, + runId, +}: { + scriptPath: string; + runId: string; +}): ReactElement { + const [copied, setCopied] = useState(false); + const timerRef = useRef | null>(null); + + useEffect( + () => () => { + if (timerRef.current !== null) { + clearTimeout(timerRef.current); + } + }, + [], + ); + + const handleCopy = useCallback(() => { + const command = `Workflow({ scriptPath: "${scriptPath}", resumeFromRunId: "${runId}" })`; + void navigator.clipboard.writeText(command).then(() => { + setCopied(true); + if (timerRef.current !== null) { + clearTimeout(timerRef.current); + } + timerRef.current = setTimeout(() => setCopied(false), 1200); + }); + }, [runId, scriptPath]); + + return ( + + ); +} + +// --------------------------------------------------------------------------- +// Run tab +// --------------------------------------------------------------------------- + +function RunTab({ + run, + environmentId, + transcriptDir, + remote, +}: { + run: WorkflowRun; + environmentId: EnvironmentId; + transcriptDir: string | undefined; + remote: boolean; +}): ReactElement { + if (remote) { + const sessionUrl = run.handles?.sessionUrl; + if (sessionUrl === undefined) { + return ; + } + return ( + + + Running in the cloud — open session + + ); + } + + if (run.phases.length === 0) { + return ; + } + + return ( +
+ {run.phases.map((phase) => ( +
+ + {phase.agents.map((agent) => ( + + ))} +
+ ))} +
+ ); +} + +const stopPropagation = (event: ReactMouseEvent) => event.stopPropagation(); + +function ExpandableAgentRow({ + agent, + environmentId, + transcriptDir, + runStatus, +}: { + agent: WorkflowRunAgent; + environmentId: EnvironmentId; + transcriptDir: string | undefined; + runStatus: WorkflowRunStatus; +}): ReactElement { + const [expanded, setExpanded] = useState(false); + const agentId = agent.agentId; + const canExpand = + agentId !== undefined && transcriptDir !== undefined && agent.isolation !== "remote"; + + const toggle = useCallback(() => setExpanded((value) => !value), []); + const onKeyDown = useCallback( + (event: ReactKeyboardEvent) => { + if (event.key === "Enter" || event.key === " ") { + event.preventDefault(); + toggle(); + } + }, + [toggle], + ); + + const leading = canExpand ? ( + + ) : ( + + ); + + return ( +
+ + {canExpand && expanded && agentId !== undefined && transcriptDir !== undefined && ( + + )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Transcript view (cursor-paged, polled while running) +// --------------------------------------------------------------------------- + +function extractAssistantText(parsed: unknown): string | null { + if (typeof parsed !== "object" || parsed === null) { + return null; + } + const record = parsed as Record; + const message = + typeof record.message === "object" && record.message !== null + ? (record.message as Record) + : record; + const role = record.type ?? message.role; + if (role !== "assistant") { + return null; + } + const content = message.content; + if (typeof content === "string") { + return content; + } + if (Array.isArray(content)) { + const parts: string[] = []; + for (const block of content) { + if (typeof block === "object" && block !== null) { + const record2 = block as Record; + if (record2.type === "text" && typeof record2.text === "string") { + parts.push(record2.text); + } + } + } + if (parts.length > 0) { + return parts.join("\n"); + } + } + return null; +} + +function renderTranscriptLine(raw: string): { text: string; dim: boolean } { + try { + const parsed: unknown = JSON.parse(raw); + const text = extractAssistantText(parsed); + if (text !== null && text.trim().length > 0) { + return { text, dim: false }; + } + const type = + typeof parsed === "object" && parsed !== null && "type" in parsed + ? String((parsed as { type: unknown }).type) + : "event"; + return { text: type, dim: true }; + } catch { + return { text: raw, dim: true }; + } +} + +function AgentTranscriptView({ + environmentId, + transcriptDir, + agentId, + runStatus, +}: { + environmentId: EnvironmentId; + transcriptDir: string; + agentId: string; + runStatus: WorkflowRunStatus; +}): ReactElement { + const runTranscript = useAtomCommand( + workflowEnvironment.readAgentTranscript, + "workflow read transcript", + ); + const [lines, setLines] = useState([]); + const [failed, setFailed] = useState(false); + const [loading, setLoading] = useState(false); + const nextLineRef = useRef(0); + const completeRef = useRef(false); + const loadingRef = useRef(false); + + const loadMore = useCallback(async () => { + if (loadingRef.current || completeRef.current) { + return; + } + loadingRef.current = true; + setLoading(true); + const result = await runTranscript({ + environmentId, + input: { transcriptDir, agentId, afterLine: nextLineRef.current }, + }); + loadingRef.current = false; + setLoading(false); + if (result._tag !== "Success") { + setFailed(true); + return; + } + setFailed(false); + nextLineRef.current = result.value.nextLine; + completeRef.current = result.value.complete; + if (result.value.lines.length > 0) { + setLines((prev) => [...prev, ...result.value.lines]); + } + }, [agentId, environmentId, runTranscript, transcriptDir]); + + // Drain all currently-available pages when the row opens. + useEffect(() => { + const control = { cancelled: false }; + const drain = async () => { + while (!control.cancelled && !completeRef.current) { + const before = nextLineRef.current; + await loadMore(); + if (control.cancelled || nextLineRef.current === before) { + break; + } + } + }; + void drain(); + return () => { + control.cancelled = true; + }; + }, [loadMore]); + + // Keep polling for new lines while the run is live. + useEffect(() => { + if (runStatus !== "running") { + return; + } + const id = setInterval(() => { + void loadMore(); + }, 2000); + return () => clearInterval(id); + }, [loadMore, runStatus]); + + return ( +
+ {lines.length === 0 ? ( + failed ? ( +

Failed to load transcript.

+ ) : loading ? ( +

Loading transcript…

+ ) : ( +

No transcript output.

+ ) + ) : ( + lines.map((line, index) => { + const parsed = renderTranscriptLine(line); + return ( +
+ {parsed.text} +
+ ); + }) + )} + {failed && lines.length > 0 && ( +

Failed to load more transcript.

+ )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Script tab +// --------------------------------------------------------------------------- + +let cachedScriptHighlighter: Promise | undefined; + +function getScriptHighlighter(): Promise { + cachedScriptHighlighter ??= getSharedHighlighter({ + themes: [resolveDiffThemeName("dark"), resolveDiffThemeName("light")], + langs: ["javascript" as SupportedLanguages], + preferredHighlighter: "shiki-js", + }); + return cachedScriptHighlighter; +} + +class WorkflowCodeErrorBoundary extends Component< + { fallback: ReactNode; children: ReactNode }, + { hasError: boolean } +> { + constructor(props: { fallback: ReactNode; children: ReactNode }) { + super(props); + this.state = { hasError: false }; + } + + static getDerivedStateFromError() { + return { hasError: true }; + } + + override render() { + return this.state.hasError ? this.props.fallback : this.props.children; + } +} + +function ScriptHighlight({ + source, + themeName, +}: { + source: string; + themeName: DiffThemeName; +}): ReactElement { + const highlighter = use(getScriptHighlighter()); + const html = useMemo( + () => highlighter.codeToHtml(source, { lang: "javascript", theme: themeName }), + [highlighter, source, themeName], + ); + return ( +
+ ); +} + +function ScriptTab({ + scriptPath, + query, + themeName, +}: { + scriptPath: string | undefined; + query: { + data: { source: string; truncated: boolean } | null; + error: string | null; + isPending: boolean; + }; + themeName: DiffThemeName; +}): ReactElement { + if (scriptPath === undefined) { + return ; + } + if (query.error !== null) { + return

{query.error}

; + } + if (query.data === null) { + return ; + } + const { source, truncated } = query.data; + const fallback = ( +
+      {source}
+    
+ ); + return ( +
+ {truncated && ( +

Script truncated for display.

+ )} + + + + + +
+ ); +} + +// --------------------------------------------------------------------------- +// Logs tab +// --------------------------------------------------------------------------- + +interface JournalEntry { + agentId: string; + hasResult: boolean; + resultJson?: string | undefined; + resultTruncated?: boolean | undefined; +} + +function LogsTab({ + logs, + transcriptDir, + query, +}: { + logs: string[]; + transcriptDir: string | undefined; + query: { + data: { entries: readonly JournalEntry[]; truncated: boolean } | null; + error: string | null; + isPending: boolean; + }; +}): ReactElement { + return ( +
+ {logs.length === 0 ? ( + + ) : ( +
+ {logs.map((log, index) => ( + // oxlint-disable-next-line no-array-index-key -- logs are append-only, index is stable +
+ {log} +
+ ))} +
+ )} + + {transcriptDir !== undefined && ( +
+

+ Results +

+ {query.error !== null ? ( +

{query.error}

+ ) : query.data === null ? ( + query.isPending ? ( + + ) : null + ) : query.data.entries.length === 0 ? ( + + ) : ( +
+ {query.data.entries.map((entry) => ( + + ))} +
+ )} + {query.data?.truncated && ( +

Results truncated.

+ )} +
+ )} +
+ ); +} + +function JournalResultRow({ entry }: { entry: JournalEntry }): ReactElement { + const [expanded, setExpanded] = useState(false); + const resultJson = entry.resultJson; + return ( +
+
+ {entry.agentId} + {!entry.hasResult && pending} +
+ {resultJson !== undefined && + (expanded ? ( +
setExpanded(false)} + onKeyDown={(event) => { + if (event.key === "Enter" || event.key === " ") { + event.preventDefault(); + setExpanded(false); + } + }} + > +
+              {resultJson}
+            
+ {entry.resultTruncated && ( +

Result truncated.

+ )} +
+ ) : ( + + ))} +
+ ); +} + +// --------------------------------------------------------------------------- +// Shared muted body +// --------------------------------------------------------------------------- + +function MutedBody({ text }: { text: string }): ReactElement { + return

{text}

; +} diff --git a/apps/web/src/components/workflow/WorkflowRunCard.tsx b/apps/web/src/components/workflow/WorkflowRunCard.tsx new file mode 100644 index 00000000000..bcb8f3bf6e3 --- /dev/null +++ b/apps/web/src/components/workflow/WorkflowRunCard.tsx @@ -0,0 +1,146 @@ +import { ExternalLinkIcon, NetworkIcon } from "lucide-react"; +import { type ReactElement } from "react"; + +import { cn } from "~/lib/utils"; +import { + formatWorkflowDuration, + formatWorkflowTokens, + isRemoteWorkflowRun, + type WorkflowRun, + type WorkflowRunAgent, + workflowRunTitle, +} from "~/workflow-logic"; +import { Button } from "../ui/button"; +import { AgentRowContent, PhaseHeader, WorkflowStatusChip, agentRollupLabel } from "./workflowUi"; + +const MAX_CARD_AGENT_ROWS = 8; + +function agentRecency(agent: WorkflowRunAgent): number { + return agent.lastProgressAt ?? agent.startedAt ?? agent.queuedAt ?? 0; +} + +/** Choose which agent indices survive the card cap: running+error first, then most recent. */ +function selectVisibleAgentIndices(agents: WorkflowRunAgent[], cap: number): Set { + const prioritized = [...agents].sort((a, b) => { + const aUrgent = a.status === "running" || a.status === "error" ? 0 : 1; + const bUrgent = b.status === "running" || b.status === "error" ? 0 : 1; + if (aUrgent !== bUrgent) { + return aUrgent - bUrgent; + } + return agentRecency(b) - agentRecency(a); + }); + return new Set(prioritized.slice(0, cap).map((agent) => agent.index)); +} + +export function WorkflowRunCard(props: { + workflowRun: WorkflowRun; + onOpenDetails?: (() => void) | undefined; + onStop?: (() => void) | undefined; +}): ReactElement { + const { workflowRun: run, onOpenDetails, onStop } = props; + const title = workflowRunTitle(run); + const remote = isRemoteWorkflowRun(run); + const tokens = run.usage?.totalTokens; + const durationMs = run.usage?.durationMs; + + const allAgents = run.phases.flatMap((phase) => phase.agents); + const overCap = allAgents.length > MAX_CARD_AGENT_ROWS; + const visibleIndices = overCap ? selectVisibleAgentIndices(allAgents, MAX_CARD_AGENT_ROWS) : null; + const hiddenCount = overCap ? allAgents.length - MAX_CARD_AGENT_ROWS : 0; + + return ( +
+
+ + {title} + +
+
+ {agentRollupLabel(run.agentCounts)} + {tokens !== undefined && {formatWorkflowTokens(tokens)}} + {durationMs !== undefined && {formatWorkflowDuration(durationMs)}} +
+ {(onStop || onOpenDetails) && ( +
+ {onStop && ( + + )} + {onOpenDetails && ( + + )} +
+ )} +
+
+ + {remote ? ( + + ) : allAgents.length > 0 ? ( +
+ {run.phases.map((phase) => { + const rows = phase.agents.filter( + (agent) => visibleIndices === null || visibleIndices.has(agent.index), + ); + if (rows.length === 0) { + return null; + } + return ( +
+ + {rows.map((agent) => ( +
+ +
+ ))} +
+ ); + })} + {hiddenCount > 0 && ( + + )} +
+ ) : null} + + {run.handles?.warning !== undefined && ( +

{run.handles.warning}

+ )} +
+ ); +} + +function RemoteRunBody({ run }: { run: WorkflowRun }): ReactElement { + const sessionUrl = run.handles?.sessionUrl; + if (sessionUrl === undefined) { + return ( +

+ Running in the cloud +

+ ); + } + return ( + + + Running in the cloud — open session + + ); +} diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx new file mode 100644 index 00000000000..aa7f951911b --- /dev/null +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -0,0 +1,181 @@ +import type { ReactElement, ReactNode } from "react"; + +import { cn } from "~/lib/utils"; +import type { + WorkflowAgentStatus, + WorkflowRun, + WorkflowRunAgent, + WorkflowRunPhase, + WorkflowRunStatus, +} from "~/workflow-logic"; + +// --------------------------------------------------------------------------- +// Run-level status chip +// --------------------------------------------------------------------------- + +interface RunStatusVisual { + label: string; + dotClass: string; + textClass: string; + pulse: boolean; +} + +const RUN_STATUS_VISUALS: Record = { + running: { label: "Running", dotClass: "bg-info", textClass: "text-info", pulse: true }, + completed: { + label: "Completed", + dotClass: "bg-success", + textClass: "text-success", + pulse: false, + }, + failed: { + label: "Failed", + dotClass: "bg-destructive", + textClass: "text-destructive", + pulse: false, + }, + stopped: { + label: "Stopped", + dotClass: "bg-muted-foreground", + textClass: "text-muted-foreground", + pulse: false, + }, +}; + +export function WorkflowStatusChip({ status }: { status: WorkflowRunStatus }): ReactElement { + const visual = RUN_STATUS_VISUALS[status]; + return ( + + + {visual.pulse && ( + + )} + + + {visual.label} + + ); +} + +// --------------------------------------------------------------------------- +// Agent-level presentation +// --------------------------------------------------------------------------- + +const AGENT_STATUS_DOT: Record = { + queued: "bg-muted-foreground/50", + running: "bg-info animate-pulse", + done: "bg-success", + error: "bg-destructive", +}; + +export function AgentStatusDot({ status }: { status: WorkflowAgentStatus }): ReactElement { + return ; +} + +export function agentDisplayLabel(agent: WorkflowRunAgent): string { + return agent.label ?? agent.agentType ?? `agent ${agent.index}`; +} + +export function agentPreviewText(agent: WorkflowRunAgent): string | undefined { + switch (agent.status) { + case "error": + return agent.error ?? agent.resultPreview; + case "done": + return agent.resultPreview; + case "running": + return agent.lastToolSummary ?? agent.promptPreview; + default: + return agent.promptPreview; + } +} + +function AgentMetaBadges({ agent }: { agent: WorkflowRunAgent }): ReactElement | null { + const badges: string[] = []; + if (agent.cached) { + badges.push("cached"); + } + if (agent.attempt !== undefined && agent.attempt > 1) { + badges.push(`retry ${agent.attempt}`); + } + if (badges.length === 0) { + return null; + } + return ( + <> + {badges.map((badge) => ( + + {badge} + + ))} + + ); +} + +/** The shared inner content of an agent row: dot, label, badges, dimmed preview. */ +export function AgentRowContent({ + agent, + leading, +}: { + agent: WorkflowRunAgent; + leading?: ReactNode; +}): ReactElement { + const preview = agentPreviewText(agent); + return ( +
+ {leading} + + + {agentDisplayLabel(agent)} + + + {preview !== undefined && ( + {preview} + )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Phase header + rollup helpers +// --------------------------------------------------------------------------- + +export function phaseDoneCount(phase: WorkflowRunPhase): number { + return phase.agents.filter((agent) => agent.status === "done").length; +} + +export function PhaseHeader({ phase }: { phase: WorkflowRunPhase }): ReactElement { + return ( +
+ + {phase.title} + + {phase.agents.length > 0 && ( + + {phaseDoneCount(phase)}/{phase.agents.length} + + )} +
+ ); +} + +export function agentRollupLabel(counts: WorkflowRun["agentCounts"]): string { + return `${counts.done + counts.error}/${counts.total} agents`; +} diff --git a/apps/web/src/rightPanelStore.ts b/apps/web/src/rightPanelStore.ts index 70d163306cc..e10d2dd26bd 100644 --- a/apps/web/src/rightPanelStore.ts +++ b/apps/web/src/rightPanelStore.ts @@ -14,7 +14,15 @@ import { createJSONStorage, persist } from "zustand/middleware"; import { resolveStorage } from "./lib/storage"; -export const RIGHT_PANEL_KINDS = ["plan", "diff", "files", "file", "preview", "terminal"] as const; +export const RIGHT_PANEL_KINDS = [ + "plan", + "diff", + "files", + "file", + "preview", + "terminal", + "workflow", +] as const; export type RightPanelKind = (typeof RIGHT_PANEL_KINDS)[number]; export type RightPanelSurface = @@ -37,7 +45,8 @@ export type RightPanelSurface = revealLine: number | null; revealRequestId: number; } - | { id: "plan"; kind: "plan" }; + | { id: "plan"; kind: "plan" } + | { id: `workflow:${string}`; kind: "workflow"; taskId: string }; const RIGHT_PANEL_STORAGE_KEY = "t3code:right-panel-state:v2"; const RIGHT_PANEL_STORAGE_VERSION = 7; @@ -50,10 +59,14 @@ export interface ThreadRightPanelState { interface RightPanelStoreState { byThreadKey: Record; - open: (ref: ScopedThreadRef, kind: Exclude) => void; + open: ( + ref: ScopedThreadRef, + kind: Exclude, + ) => void; openBrowser: (ref: ScopedThreadRef, tabId: string | null) => void; openFile: (ref: ScopedThreadRef, relativePath: string, line?: number) => void; openTerminal: (ref: ScopedThreadRef, terminalId: string) => void; + openWorkflow: (ref: ScopedThreadRef, taskId: string) => void; splitTerminal: ( ref: ScopedThreadRef, surfaceId: string, @@ -72,7 +85,10 @@ interface RightPanelStoreState { show: (ref: ScopedThreadRef) => void; close: (ref: ScopedThreadRef) => void; toggleVisibility: (ref: ScopedThreadRef) => void; - toggle: (ref: ScopedThreadRef, kind: Exclude) => void; + toggle: ( + ref: ScopedThreadRef, + kind: Exclude, + ) => void; removeThread: (ref: ScopedThreadRef) => void; } @@ -83,7 +99,7 @@ const EMPTY_THREAD_STATE: ThreadRightPanelState = { }; const singletonSurface = ( - kind: Exclude, + kind: Exclude, ): RightPanelSurface => { switch (kind) { case "diff": @@ -112,6 +128,12 @@ const fileSurface = ( revealRequestId, }); +const workflowSurface = (taskId: string): RightPanelSurface => ({ + id: `workflow:${taskId}`, + kind: "workflow", + taskId, +}); + const terminalSurface = (terminalId: string): RightPanelSurface => ({ id: `terminal:${terminalId}`, kind: "terminal", @@ -286,6 +308,12 @@ export const useRightPanelStore = create()( }; }), })), + openWorkflow: (ref, taskId) => + set((state) => ({ + byThreadKey: updateThread(state.byThreadKey, scopedThreadKey(ref), (current) => + upsertSurface(current, workflowSurface(taskId)), + ), + })), openTerminal: (ref, terminalId) => set((state) => ({ byThreadKey: updateThread(state.byThreadKey, scopedThreadKey(ref), (current) => diff --git a/apps/web/src/session-logic.test.ts b/apps/web/src/session-logic.test.ts index 0f12e672f66..c77a830fc48 100644 --- a/apps/web/src/session-logic.test.ts +++ b/apps/web/src/session-logic.test.ts @@ -22,6 +22,7 @@ import { workEntryIndicatesToolNeutralStatus, workEntryIndicatesToolSuccess, } from "./session-logic"; +import { deriveWorkflowRuns } from "./workflow-logic.ts"; let nextActivityId = 0; @@ -1490,6 +1491,72 @@ describe("deriveWorkLogEntries", () => { expect(entries).toHaveLength(1); expect(entries[0]?.id).toBe("a-complete-same-timestamp"); }); + + it("suppresses workflow task rows and snapshot activities while keeping plain task rows", () => { + const activities: OrchestrationThreadActivity[] = [ + // Plain task: its progress/completed rows must survive. + makeActivity({ + id: "plain-progress", + createdAt: "2026-02-23T00:00:01.000Z", + kind: "task.progress", + summary: "Reasoning update", + tone: "info", + payload: { taskId: "plain-1", summary: "thinking about it" }, + }), + makeActivity({ + id: "plain-complete", + createdAt: "2026-02-23T00:00:02.000Z", + kind: "task.completed", + summary: "Task completed", + tone: "info", + payload: { taskId: "plain-1", status: "completed", detail: "plain done" }, + }), + // Workflow task: every row below belongs to the dedicated workflow card. + makeActivity({ + id: "wf-start", + createdAt: "2026-02-23T00:00:03.000Z", + kind: "task.started", + summary: "local_workflow task started", + tone: "info", + payload: { taskId: "wf-1", taskType: "local_workflow", workflowName: "spec" }, + }), + makeActivity({ + id: "wf-progress", + createdAt: "2026-02-23T00:00:04.000Z", + kind: "task.progress", + summary: "Reasoning update", + tone: "info", + payload: { taskId: "wf-1", summary: "workflow ticking" }, + }), + makeActivity({ + id: "wf-updated", + createdAt: "2026-02-23T00:00:05.000Z", + kind: "task.workflow-updated", + summary: "spec workflow", + tone: "info", + payload: { taskId: "wf-1", description: "spec", workflowProgress: [] }, + }), + makeActivity({ + id: "wf-meta", + createdAt: "2026-02-23T00:00:06.000Z", + kind: "task.workflow-meta", + summary: "Workflow launched", + tone: "info", + payload: { taskId: "wf-1", runId: "wf_abc" }, + }), + makeActivity({ + id: "wf-complete", + createdAt: "2026-02-23T00:00:07.000Z", + kind: "task.completed", + summary: "Task completed", + tone: "info", + payload: { taskId: "wf-1", status: "completed" }, + }), + ]; + + const entries = deriveWorkLogEntries(activities); + expect(entries.map((entry) => entry.id)).toEqual(["plain-progress", "plain-complete"]); + }); }); describe("deriveTimelineEntries", () => { @@ -1537,6 +1604,54 @@ describe("deriveTimelineEntries", () => { }, }); }); + + it("emits a workflow entry sorted chronologically among messages", () => { + const workflowRuns = deriveWorkflowRuns([ + makeActivity({ + id: "wf-start", + createdAt: "2026-02-23T00:00:02.000Z", + kind: "task.started", + summary: "local_workflow task started", + tone: "info", + payload: { taskId: "task-wf", taskType: "local_workflow", workflowName: "spec" }, + }), + ]); + expect(workflowRuns).toHaveLength(1); + + const entries = deriveTimelineEntries( + [ + { + id: MessageId.make("message-before"), + role: "user", + text: "kick it off", + createdAt: "2026-02-23T00:00:01.000Z", + turnId: null, + updatedAt: "2026-02-23T00:00:01.000Z", + streaming: false, + }, + { + id: MessageId.make("message-after"), + role: "assistant", + text: "done", + createdAt: "2026-02-23T00:00:03.000Z", + turnId: null, + updatedAt: "2026-02-23T00:00:03.000Z", + streaming: false, + }, + ], + [], + [], + workflowRuns, + ); + + expect(entries.map((entry) => entry.kind)).toEqual(["message", "workflow", "message"]); + const workflowEntry = entries[1]; + expect(workflowEntry).toMatchObject({ + kind: "workflow", + id: "workflow:task-wf", + workflowRun: { taskId: "task-wf", name: "spec" }, + }); + }); }); describe("deriveWorkLogEntries context window handling", () => { diff --git a/apps/web/src/session-logic.ts b/apps/web/src/session-logic.ts index 5d5051f748e..5c24a7aae96 100644 --- a/apps/web/src/session-logic.ts +++ b/apps/web/src/session-logic.ts @@ -21,6 +21,7 @@ import type { ThreadSession, TurnDiffSummary, } from "./types"; +import { collectWorkflowTaskIds, type WorkflowRun } from "./workflow-logic.ts"; export type ProviderPickerKind = ProviderDriverKind; @@ -137,6 +138,12 @@ export type TimelineEntry = kind: "work"; createdAt: string; entry: WorkLogEntry; + } + | { + id: string; + kind: "workflow"; + createdAt: string; + workflowRun: WorkflowRun; }; export function workLogEntryIsToolLike(entry: WorkLogEntry): boolean { @@ -624,15 +631,36 @@ export function hasActionableProposedPlan( return proposedPlan !== null && proposedPlan.implementedAt === null; } +function activityBelongsToWorkflow( + activity: OrchestrationThreadActivity, + workflowTaskIds: ReadonlySet, +): boolean { + const payload = activity.payload as Record | null | undefined; + const taskId = payload && typeof payload === "object" ? payload["taskId"] : undefined; + return typeof taskId === "string" && workflowTaskIds.has(taskId); +} + export function deriveWorkLogEntries( activities: ReadonlyArray, ): WorkLogEntry[] { const ordered = [...activities].toSorted(compareActivitiesByOrder); + const workflowTaskIds = collectWorkflowTaskIds(activities); const entries: DerivedWorkLogEntry[] = []; for (const activity of ordered) { if (activity.kind === "tool.started") continue; if (activity.kind === "task.started") continue; if (activity.kind === "context-window.updated") continue; + // Workflow runs render as dedicated timeline cards; their snapshot + // activities and per-tick task rows would duplicate that surface. + if (activity.kind === "task.workflow-updated") continue; + if (activity.kind === "task.workflow-meta") continue; + if ( + (activity.kind === "task.progress" || activity.kind === "task.completed") && + workflowTaskIds.size > 0 && + activityBelongsToWorkflow(activity, workflowTaskIds) + ) { + continue; + } if (activity.summary === "Checkpoint captured") continue; if (isPlanBoundaryToolActivity(activity)) continue; entries.push(toDerivedWorkLogEntry(activity)); @@ -1341,6 +1369,7 @@ export function deriveTimelineEntries( messages: ReadonlyArray, proposedPlans: ReadonlyArray, workEntries: ReadonlyArray, + workflowRuns: ReadonlyArray = [], ): TimelineEntry[] { const messageRows: TimelineEntry[] = messages.map((message) => ({ id: message.id, @@ -1360,7 +1389,13 @@ export function deriveTimelineEntries( createdAt: entry.createdAt, entry, })); - return [...messageRows, ...proposedPlanRows, ...workRows].toSorted((a, b) => + const workflowRows: TimelineEntry[] = workflowRuns.map((workflowRun) => ({ + id: `workflow:${workflowRun.taskId}`, + kind: "workflow", + createdAt: workflowRun.createdAt, + workflowRun, + })); + return [...messageRows, ...proposedPlanRows, ...workRows, ...workflowRows].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt), ); } diff --git a/apps/web/src/state/workflow.ts b/apps/web/src/state/workflow.ts new file mode 100644 index 00000000000..276773c555d --- /dev/null +++ b/apps/web/src/state/workflow.ts @@ -0,0 +1,5 @@ +import { createWorkflowEnvironmentAtoms } from "@t3tools/client-runtime/state/workflow"; + +import { connectionAtomRuntime } from "../connection/runtime"; + +export const workflowEnvironment = createWorkflowEnvironmentAtoms(connectionAtomRuntime); diff --git a/apps/web/src/workflow-logic.test.ts b/apps/web/src/workflow-logic.test.ts new file mode 100644 index 00000000000..ecbee190885 --- /dev/null +++ b/apps/web/src/workflow-logic.test.ts @@ -0,0 +1,317 @@ +import { EventId, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts"; +import { describe, expect, it } from "vite-plus/test"; + +import { + collectWorkflowTaskIds, + deriveWorkflowAgentStatus, + deriveWorkflowRuns, + groupWorkflowAgentsByPhase, + isRemoteWorkflowRun, + type WorkflowRunAgent, +} from "./workflow-logic.ts"; + +let nextActivityId = 0; + +function buildActivity(overrides: { + id?: string; + createdAt?: string; + kind?: string; + summary?: string; + tone?: OrchestrationThreadActivity["tone"]; + payload?: Record; + turnId?: string; +}): OrchestrationThreadActivity { + return { + id: EventId.make(overrides.id ?? `activity-${nextActivityId++}`), + createdAt: overrides.createdAt ?? "2026-02-23T00:00:00.000Z", + kind: overrides.kind ?? "task.started", + // summary/kind must be trimmed non-empty branded strings. + summary: overrides.summary ?? "Workflow", + tone: overrides.tone ?? "info", + payload: overrides.payload ?? {}, + turnId: overrides.turnId ? TurnId.make(overrides.turnId) : null, + }; +} + +describe("deriveWorkflowAgentStatus", () => { + it("maps terminal states directly", () => { + expect(deriveWorkflowAgentStatus({ state: "done" })).toBe("done"); + expect(deriveWorkflowAgentStatus({ state: "error" })).toBe("error"); + }); + + it("treats start without startedAt as queued and with startedAt as running", () => { + expect(deriveWorkflowAgentStatus({ state: "start" })).toBe("queued"); + expect(deriveWorkflowAgentStatus({ state: "start", startedAt: 123 })).toBe("running"); + }); + + it("renders unknown future states as running once startedAt is present", () => { + expect(deriveWorkflowAgentStatus({ state: "reticulating", startedAt: 1 })).toBe("running"); + expect(deriveWorkflowAgentStatus({ state: "reticulating" })).toBe("queued"); + }); +}); + +describe("groupWorkflowAgentsByPhase", () => { + const agent = (over: Partial & { index: number }): WorkflowRunAgent => ({ + state: "start", + status: "queued", + ...over, + }); + + it("groups agents under their phase and synthesizes an Agents phase for unphased agents", () => { + const phases = groupWorkflowAgentsByPhase({ + phases: [{ index: 0, title: "Plan" }], + agents: [ + agent({ index: 0, phaseIndex: 0 }), + agent({ index: 1 }), // no phaseIndex -> synthetic "Agents" phase (index -1) + ], + }); + expect(phases.map((phase) => phase.title)).toEqual(["Agents", "Plan"]); + const synthetic = phases.find((phase) => phase.title === "Agents"); + expect(synthetic?.index).toBe(-1); + expect(synthetic?.agents.map((entry) => entry.index)).toEqual([1]); + }); + + it("falls back to a Phase title when the phase is unknown but an agent references it", () => { + const phases = groupWorkflowAgentsByPhase({ + phases: [], + agents: [agent({ index: 0, phaseIndex: 2 })], + }); + expect(phases).toHaveLength(1); + expect(phases[0]?.title).toBe("Phase 2"); + }); + + it("prefers an agent-supplied phaseTitle for an otherwise-unknown phase", () => { + const phases = groupWorkflowAgentsByPhase({ + phases: [], + agents: [agent({ index: 0, phaseIndex: 5, phaseTitle: "Custom" })], + }); + expect(phases[0]?.title).toBe("Custom"); + }); +}); + +function workflowStartedActivity(taskId: string, extra?: Record) { + return buildActivity({ + id: `start-${taskId}`, + kind: "task.started", + createdAt: "2026-02-23T00:00:01.000Z", + turnId: "turn-1", + payload: { taskId, taskType: "local_workflow", workflowName: "spec", ...extra }, + }); +} + +function workflowUpdatedActivity( + taskId: string, + workflowProgress: unknown[], + extra?: Record, +) { + return buildActivity({ + id: `updated-${taskId}`, + kind: "task.workflow-updated", + createdAt: "2026-02-23T00:00:02.000Z", + payload: { taskId, description: "spec workflow", workflowProgress, ...extra }, + }); +} + +describe("deriveWorkflowRuns", () => { + it("derives a single running->completed lifecycle from started + updated + meta + completed", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_phase", index: 0, title: "Plan" }, + { type: "workflow_agent", index: 0, state: "done", phaseIndex: 0 }, + { type: "workflow_log", message: "kicked off" }, + ]), + buildActivity({ + id: "meta-task-1", + kind: "task.workflow-meta", + createdAt: "2026-02-23T00:00:03.000Z", + payload: { + taskId: "task-1", + runId: "wf_abc", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + }, + }), + buildActivity({ + id: "complete-task-1", + kind: "task.completed", + createdAt: "2026-02-23T00:00:04.000Z", + payload: { taskId: "task-1", status: "completed", detail: "all done" }, + }), + ]); + + expect(runs).toHaveLength(1); + const run = runs[0]!; + expect(run.taskId).toBe("task-1"); + expect(run.status).toBe("completed"); + expect(run.name).toBe("spec"); + expect(run.completionSummary).toBe("all done"); + expect(run.handles?.runId).toBe("wf_abc"); + expect(run.logs).toEqual(["kicked off"]); + expect(run.agentCounts).toEqual({ total: 1, queued: 0, running: 0, done: 1, error: 0 }); + expect(run.turnId).toBe(TurnId.make("turn-1")); + }); + + it("maps failed and stopped completion statuses", () => { + const failed = deriveWorkflowRuns([ + workflowStartedActivity("task-f"), + buildActivity({ + id: "complete-task-f", + kind: "task.completed", + createdAt: "2026-02-23T00:00:04.000Z", + payload: { taskId: "task-f", status: "failed" }, + }), + ]); + expect(failed[0]?.status).toBe("failed"); + + const stopped = deriveWorkflowRuns([ + workflowStartedActivity("task-s"), + buildActivity({ + id: "complete-task-s", + kind: "task.completed", + createdAt: "2026-02-23T00:00:04.000Z", + payload: { taskId: "task-s", status: "stopped" }, + }), + ]); + expect(stopped[0]?.status).toBe("stopped"); + }); + + it("derives agent status per entry (queued/running/done/error/unknown-with-startedAt)", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_agent", index: 0, state: "start" }, + { type: "workflow_agent", index: 1, state: "start", startedAt: 100 }, + { type: "workflow_agent", index: 2, state: "done" }, + { type: "workflow_agent", index: 3, state: "error" }, + { type: "workflow_agent", index: 4, state: "reticulating", startedAt: 5 }, + ]), + ]); + expect(runs[0]?.agentCounts).toEqual({ + total: 5, + queued: 1, + running: 2, + done: 1, + error: 1, + }); + }); + + it("lets a later agent entry with the same index win", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_agent", index: 0, state: "start" }, + { type: "workflow_agent", index: 0, state: "done" }, + ]), + ]); + expect(runs[0]?.agentCounts.total).toBe(1); + expect(runs[0]?.agentCounts.done).toBe(1); + expect(runs[0]?.agentCounts.queued).toBe(0); + }); + + it("drops malformed progress entries without throwing", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_agent", state: "start" }, // missing index + { type: "workflow_agent", index: 1 }, // missing state + "not an object", + null, + { type: "workflow_mystery", index: 9 }, + { type: "workflow_agent", index: 2, state: "done" }, + ]), + ]); + expect(runs[0]?.agentCounts.total).toBe(1); + expect(runs[0]?.agentCounts.done).toBe(1); + }); + + it("parses snake_case usage from the updated snapshot", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [{ type: "workflow_agent", index: 0, state: "done" }], { + usage: { total_tokens: 1200, tool_uses: 3, duration_ms: 4500 }, + }), + ]); + expect(runs[0]?.usage).toEqual({ totalTokens: 1200, toolUses: 3, durationMs: 4500 }); + }); + + it("ignores plain (non-workflow) tasks entirely", () => { + const runs = deriveWorkflowRuns([ + buildActivity({ + id: "plain-start", + kind: "task.started", + payload: { taskId: "plain-1", taskType: "plan" }, + }), + buildActivity({ + id: "plain-progress", + kind: "task.progress", + payload: { taskId: "plain-1", summary: "thinking" }, + }), + buildActivity({ + id: "plain-complete", + kind: "task.completed", + payload: { taskId: "plain-1", status: "completed" }, + }), + ]); + expect(runs).toEqual([]); + }); + + it("detects remote runs from session handles", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-remote"), + buildActivity({ + id: "meta-remote", + kind: "task.workflow-meta", + createdAt: "2026-02-23T00:00:03.000Z", + payload: { taskId: "task-remote", sessionUrl: "https://example.com/run" }, + }), + ]); + expect(runs).toHaveLength(1); + expect(isRemoteWorkflowRun(runs[0]!)).toBe(true); + }); +}); + +describe("collectWorkflowTaskIds", () => { + it("collects workflow task ids via workflowName, local_workflow task type, and workflow kinds", () => { + const ids = collectWorkflowTaskIds([ + buildActivity({ + id: "s1", + kind: "task.started", + payload: { taskId: "by-name", workflowName: "spec" }, + }), + buildActivity({ + id: "s2", + kind: "task.started", + payload: { taskId: "by-type", taskType: "local_workflow" }, + }), + buildActivity({ + id: "u1", + kind: "task.workflow-updated", + payload: { taskId: "by-updated", workflowProgress: [] }, + }), + buildActivity({ + id: "m1", + kind: "task.workflow-meta", + payload: { taskId: "by-meta" }, + }), + ]); + expect([...ids].sort()).toEqual(["by-meta", "by-name", "by-type", "by-updated"]); + }); + + it("does not collect plain task ids", () => { + const ids = collectWorkflowTaskIds([ + buildActivity({ + id: "plain", + kind: "task.started", + payload: { taskId: "plain-1", taskType: "plan" }, + }), + buildActivity({ + id: "plain-progress", + kind: "task.progress", + payload: { taskId: "plain-1" }, + }), + ]); + expect(ids.has("plain-1")).toBe(false); + expect(ids.size).toBe(0); + }); +}); diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts new file mode 100644 index 00000000000..9f78fa89884 --- /dev/null +++ b/apps/web/src/workflow-logic.ts @@ -0,0 +1,466 @@ +import type { OrchestrationThreadActivity, TurnId } from "@t3tools/contracts"; + +/** + * Derivation of workflow-run view models from thread activities. + * + * Workflow state arrives as three activity kinds emitted by the server: + * - `task.started` / `task.completed` — lifecycle (shared with plain tasks) + * - `task.workflow-updated` — cumulative snapshot (phases, agents, logs), + * upserted under a stable activity id per task + * - `task.workflow-meta` — run handles (script path, transcript dir, run id) + * + * Everything here parses `activity.payload` defensively: payloads are + * `unknown` end-to-end and originate from an undocumented SDK surface, so a + * malformed field must degrade to less detail, never throw. + */ + +export type WorkflowAgentStatus = "queued" | "running" | "done" | "error"; + +export interface WorkflowRunAgent { + index: number; + status: WorkflowAgentStatus; + state: string; + label?: string | undefined; + phaseIndex?: number | undefined; + phaseTitle?: string | undefined; + agentId?: string | undefined; + agentType?: string | undefined; + model?: string | undefined; + isolation?: "worktree" | "remote" | undefined; + attempt?: number | undefined; + queuedAt?: number | undefined; + startedAt?: number | undefined; + lastProgressAt?: number | undefined; + cached?: boolean | undefined; + remoteSessionId?: string | undefined; + lastToolName?: string | undefined; + lastToolSummary?: string | undefined; + promptPreview?: string | undefined; + resultPreview?: string | undefined; + error?: string | undefined; +} + +export interface WorkflowRunPhase { + index: number; + title: string; + kind?: string | undefined; + agents: WorkflowRunAgent[]; +} + +export interface WorkflowRunUsage { + totalTokens?: number | undefined; + toolUses?: number | undefined; + durationMs?: number | undefined; +} + +export interface WorkflowRunHandlesView { + runId?: string | undefined; + taskType?: string | undefined; + scriptPath?: string | undefined; + transcriptDir?: string | undefined; + sessionUrl?: string | undefined; + warning?: string | undefined; +} + +export type WorkflowRunStatus = "running" | "completed" | "failed" | "stopped"; + +export interface WorkflowRun { + taskId: string; + status: WorkflowRunStatus; + createdAt: string; + updatedAt: string; + turnId: TurnId | null; + name?: string | undefined; + description?: string | undefined; + completionSummary?: string | undefined; + phases: WorkflowRunPhase[]; + logs: string[]; + usage?: WorkflowRunUsage | undefined; + handles?: WorkflowRunHandlesView | undefined; + agentCounts: { + total: number; + queued: number; + running: number; + done: number; + error: number; + }; +} + +function asRecord(value: unknown): Record | undefined { + return value !== null && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function asString(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 ? value : undefined; +} + +function asNumber(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +export function deriveWorkflowAgentStatus(input: { + state: string; + startedAt?: number; +}): WorkflowAgentStatus { + if (input.state === "done") { + return "done"; + } + if (input.state === "error") { + return "error"; + } + // "start" plus any state a future SDK adds: running once work has begun. + return input.startedAt !== undefined ? "running" : "queued"; +} + +function parseAgentEntry(entry: Record): WorkflowRunAgent | undefined { + const index = asNumber(entry.index); + const state = asString(entry.state); + if (index === undefined || state === undefined) { + return undefined; + } + const startedAt = asNumber(entry.startedAt); + const isolation = + entry.isolation === "worktree" || entry.isolation === "remote" ? entry.isolation : undefined; + return { + index, + state, + status: deriveWorkflowAgentStatus({ state, ...(startedAt !== undefined ? { startedAt } : {}) }), + ...(asString(entry.label) !== undefined ? { label: asString(entry.label) } : {}), + ...(asNumber(entry.phaseIndex) !== undefined ? { phaseIndex: asNumber(entry.phaseIndex) } : {}), + ...(asString(entry.phaseTitle) !== undefined ? { phaseTitle: asString(entry.phaseTitle) } : {}), + ...(asString(entry.agentId) !== undefined ? { agentId: asString(entry.agentId) } : {}), + ...(asString(entry.agentType) !== undefined ? { agentType: asString(entry.agentType) } : {}), + ...(asString(entry.model) !== undefined ? { model: asString(entry.model) } : {}), + ...(isolation !== undefined ? { isolation } : {}), + ...(asNumber(entry.attempt) !== undefined ? { attempt: asNumber(entry.attempt) } : {}), + ...(asNumber(entry.queuedAt) !== undefined ? { queuedAt: asNumber(entry.queuedAt) } : {}), + ...(startedAt !== undefined ? { startedAt } : {}), + ...(asNumber(entry.lastProgressAt) !== undefined + ? { lastProgressAt: asNumber(entry.lastProgressAt) } + : {}), + ...(entry.cached === true ? { cached: true } : {}), + ...(asString(entry.remoteSessionId) !== undefined + ? { remoteSessionId: asString(entry.remoteSessionId) } + : {}), + ...(asString(entry.lastToolName) !== undefined + ? { lastToolName: asString(entry.lastToolName) } + : {}), + ...(asString(entry.lastToolSummary) !== undefined + ? { lastToolSummary: asString(entry.lastToolSummary) } + : {}), + ...(asString(entry.promptPreview) !== undefined + ? { promptPreview: asString(entry.promptPreview) } + : {}), + ...(asString(entry.resultPreview) !== undefined + ? { resultPreview: asString(entry.resultPreview) } + : {}), + ...(asString(entry.error) !== undefined ? { error: asString(entry.error) } : {}), + }; +} + +interface ParsedWorkflowProgress { + phases: Array<{ index: number; title: string; kind?: string | undefined }>; + agents: WorkflowRunAgent[]; + logs: string[]; +} + +function parseWorkflowProgress(value: unknown): ParsedWorkflowProgress { + const parsed: ParsedWorkflowProgress = { phases: [], agents: [], logs: [] }; + if (!Array.isArray(value)) { + return parsed; + } + // Later entries for the same index win: snapshots are cumulative and the + // runner may re-emit an agent slot on retry. + const agentsByIndex = new Map(); + const phasesByIndex = new Map< + number, + { index: number; title: string; kind?: string | undefined } + >(); + for (const raw of value) { + const entry = asRecord(raw); + if (!entry) { + continue; + } + switch (entry.type) { + case "workflow_agent": { + const agent = parseAgentEntry(entry); + if (agent) { + agentsByIndex.set(agent.index, agent); + } + break; + } + case "workflow_phase": { + const index = asNumber(entry.index); + const title = asString(entry.title); + if (index !== undefined && title !== undefined) { + phasesByIndex.set(index, { + index, + title, + ...(asString(entry.kind) !== undefined ? { kind: asString(entry.kind) } : {}), + }); + } + break; + } + case "workflow_log": { + const message = asString(entry.message); + if (message !== undefined) { + parsed.logs.push(message); + } + break; + } + default: + break; + } + } + parsed.agents = [...agentsByIndex.values()].toSorted((a, b) => a.index - b.index); + parsed.phases = [...phasesByIndex.values()].toSorted((a, b) => a.index - b.index); + return parsed; +} + +function parseUsage(value: unknown): WorkflowRunUsage | undefined { + const record = asRecord(value); + if (!record) { + return undefined; + } + const totalTokens = asNumber(record.total_tokens); + const toolUses = asNumber(record.tool_uses); + const durationMs = asNumber(record.duration_ms); + if (totalTokens === undefined && toolUses === undefined && durationMs === undefined) { + return undefined; + } + return { + ...(totalTokens !== undefined ? { totalTokens } : {}), + ...(toolUses !== undefined ? { toolUses } : {}), + ...(durationMs !== undefined ? { durationMs } : {}), + }; +} + +export function groupWorkflowAgentsByPhase(parsed: { + phases: ReadonlyArray<{ index: number; title: string; kind?: string | undefined }>; + agents: ReadonlyArray; +}): WorkflowRunPhase[] { + const phases = new Map(); + for (const phase of parsed.phases) { + phases.set(phase.index, { ...phase, agents: [] }); + } + const UNPHASED = -1; + for (const agent of parsed.agents) { + const phaseIndex = agent.phaseIndex ?? UNPHASED; + let phase = phases.get(phaseIndex); + if (!phase) { + phase = { + index: phaseIndex, + title: agent.phaseTitle ?? (phaseIndex === UNPHASED ? "Agents" : `Phase ${phaseIndex}`), + agents: [], + }; + phases.set(phaseIndex, phase); + } + phase.agents.push(agent); + } + return [...phases.values()] + .filter( + (phase) => phase.agents.length > 0 || parsed.phases.some((p) => p.index === phase.index), + ) + .toSorted((a, b) => a.index - b.index); +} + +interface MutableWorkflowRun extends WorkflowRun { + hasStartedActivity: boolean; +} + +function isWorkflowTaskStartedPayload(payload: Record): boolean { + return payload.taskType === "local_workflow" || asString(payload.workflowName) !== undefined; +} + +/** Task ids owned by a workflow run — used to suppress duplicate work-log rows. */ +export function collectWorkflowTaskIds( + activities: ReadonlyArray, +): Set { + const taskIds = new Set(); + for (const activity of activities) { + const payload = asRecord(activity.payload); + const taskId = payload ? asString(payload.taskId) : undefined; + if (!taskId) { + continue; + } + if ( + activity.kind === "task.workflow-updated" || + activity.kind === "task.workflow-meta" || + (activity.kind === "task.started" && + payload !== undefined && + isWorkflowTaskStartedPayload(payload)) + ) { + taskIds.add(taskId); + } + } + return taskIds; +} + +export function deriveWorkflowRuns( + activities: ReadonlyArray, +): WorkflowRun[] { + const ordered = [...activities].toSorted( + (a, b) => a.createdAt.localeCompare(b.createdAt) || a.id.localeCompare(b.id), + ); + const runs = new Map(); + + const ensureRun = (taskId: string, activity: OrchestrationThreadActivity): MutableWorkflowRun => { + const existing = runs.get(taskId); + if (existing) { + return existing; + } + const run: MutableWorkflowRun = { + taskId, + status: "running", + createdAt: activity.createdAt, + updatedAt: activity.createdAt, + turnId: activity.turnId, + phases: [], + logs: [], + agentCounts: { total: 0, queued: 0, running: 0, done: 0, error: 0 }, + hasStartedActivity: false, + }; + runs.set(taskId, run); + return run; + }; + + for (const activity of ordered) { + const payload = asRecord(activity.payload); + if (!payload) { + continue; + } + const taskId = asString(payload.taskId); + if (!taskId) { + continue; + } + + switch (activity.kind) { + case "task.started": { + if (!isWorkflowTaskStartedPayload(payload) && !runs.has(taskId)) { + break; + } + const run = ensureRun(taskId, activity); + run.hasStartedActivity = true; + run.createdAt = activity.createdAt; + run.turnId = activity.turnId; + const name = asString(payload.workflowName); + if (name !== undefined) { + run.name = name; + } + const detail = asString(payload.detail); + if (detail !== undefined) { + run.description = detail; + } + break; + } + case "task.workflow-updated": { + const run = ensureRun(taskId, activity); + run.updatedAt = activity.createdAt; + const description = asString(payload.description); + if (description !== undefined && run.description === undefined) { + run.description = description; + } + const parsed = parseWorkflowProgress(payload.workflowProgress); + run.phases = groupWorkflowAgentsByPhase(parsed); + run.logs = parsed.logs; + run.agentCounts = { + total: parsed.agents.length, + queued: parsed.agents.filter((agent) => agent.status === "queued").length, + running: parsed.agents.filter((agent) => agent.status === "running").length, + done: parsed.agents.filter((agent) => agent.status === "done").length, + error: parsed.agents.filter((agent) => agent.status === "error").length, + }; + const usage = parseUsage(payload.usage); + if (usage !== undefined) { + run.usage = usage; + } + break; + } + case "task.workflow-meta": { + const run = ensureRun(taskId, activity); + run.updatedAt = activity.createdAt; + const name = asString(payload.workflowName); + if (name !== undefined) { + run.name = name; + } + run.handles = { + ...(asString(payload.runId) !== undefined ? { runId: asString(payload.runId) } : {}), + ...(asString(payload.taskType) !== undefined + ? { taskType: asString(payload.taskType) } + : {}), + ...(asString(payload.scriptPath) !== undefined + ? { scriptPath: asString(payload.scriptPath) } + : {}), + ...(asString(payload.transcriptDir) !== undefined + ? { transcriptDir: asString(payload.transcriptDir) } + : {}), + ...(asString(payload.sessionUrl) !== undefined + ? { sessionUrl: asString(payload.sessionUrl) } + : {}), + ...(asString(payload.warning) !== undefined + ? { warning: asString(payload.warning) } + : {}), + }; + break; + } + case "task.completed": { + const run = runs.get(taskId); + if (!run) { + break; + } + run.updatedAt = activity.createdAt; + run.status = + payload.status === "failed" + ? "failed" + : payload.status === "stopped" + ? "stopped" + : "completed"; + const detail = asString(payload.detail); + if (detail !== undefined) { + run.completionSummary = detail; + } + break; + } + default: + break; + } + } + + return [...runs.values()] + .map(({ hasStartedActivity: _hasStartedActivity, ...run }) => run) + .toSorted((a, b) => a.createdAt.localeCompare(b.createdAt) || a.taskId.localeCompare(b.taskId)); +} + +export function isRemoteWorkflowRun(run: WorkflowRun): boolean { + return run.handles?.taskType === "remote_agent" || run.handles?.sessionUrl !== undefined; +} + +export function workflowRunTitle(run: WorkflowRun): string { + return run.name ?? run.description ?? "Workflow"; +} + +export function formatWorkflowDuration(durationMs: number): string { + const totalSeconds = Math.max(0, Math.round(durationMs / 1000)); + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + if (minutes === 0) { + return `${seconds}s`; + } + const hours = Math.floor(minutes / 60); + if (hours === 0) { + return `${minutes}m ${seconds.toString().padStart(2, "0")}s`; + } + return `${hours}h ${(minutes % 60).toString().padStart(2, "0")}m`; +} + +export function formatWorkflowTokens(totalTokens: number): string { + if (totalTokens < 1000) { + return `${totalTokens}`; + } + if (totalTokens < 1_000_000) { + return `${(totalTokens / 1000).toFixed(totalTokens < 10_000 ? 1 : 0)}k`; + } + return `${(totalTokens / 1_000_000).toFixed(1)}M`; +} diff --git a/packages/client-runtime/package.json b/packages/client-runtime/package.json index d9e19889721..e3f1b4d3509 100644 --- a/packages/client-runtime/package.json +++ b/packages/client-runtime/package.json @@ -130,6 +130,10 @@ "./state/vcs": { "types": "./src/state/vcs.ts", "default": "./src/state/vcs.ts" + }, + "./state/workflow": { + "types": "./src/state/workflow.ts", + "default": "./src/state/workflow.ts" } }, "scripts": { diff --git a/packages/client-runtime/src/operations/commands.ts b/packages/client-runtime/src/operations/commands.ts index a0c3cbe771f..bcea86e845c 100644 --- a/packages/client-runtime/src/operations/commands.ts +++ b/packages/client-runtime/src/operations/commands.ts @@ -44,6 +44,7 @@ export type RespondToThreadApprovalInput = CommandInput<"thread.approval.respond export type RespondToThreadUserInputInput = CommandInput<"thread.user-input.respond">; export type RevertThreadCheckpointInput = CommandInput<"thread.checkpoint.revert">; export type StopThreadSessionInput = CommandInput<"thread.session.stop">; +export type StopThreadTaskInput = CommandInput<"thread.task.stop">; type DispatchTag = typeof ORCHESTRATION_WS_METHODS.dispatchCommand; type CommandEffect = Effect.Effect< @@ -254,3 +255,15 @@ export const stopThreadSession: (input: StopThreadSessionInput) => CommandEffect createdAt: metadata.createdAt, }); }); + +export const stopThreadTask: (input: StopThreadTaskInput) => CommandEffect = Effect.fn( + "EnvironmentCommands.stopThreadTask", +)(function* (input) { + const metadata = yield* timestampedCommandMetadata(input); + return yield* dispatch({ + ...input, + type: "thread.task.stop", + commandId: metadata.commandId, + createdAt: metadata.createdAt, + }); +}); diff --git a/packages/client-runtime/src/state/threadCommands.ts b/packages/client-runtime/src/state/threadCommands.ts index aab5110e9cf..20158750ac4 100644 --- a/packages/client-runtime/src/state/threadCommands.ts +++ b/packages/client-runtime/src/state/threadCommands.ts @@ -14,12 +14,14 @@ import { type SetThreadRuntimeModeInput, type StartThreadTurnInput, type StopThreadSessionInput, + type StopThreadTaskInput, type UnarchiveThreadInput, type UpdateThreadMetadataInput, archiveThread, createThread, deleteThread, interruptThreadTurn, + stopThreadTask, respondToThreadApproval, respondToThreadUserInput, revertThreadCheckpoint, @@ -44,6 +46,7 @@ export type { SetThreadRuntimeModeInput, StartThreadTurnInput, StopThreadSessionInput, + StopThreadTaskInput, UnarchiveThreadInput, UpdateThreadMetadataInput, } from "../operations/commands.ts"; @@ -136,5 +139,11 @@ export function createThreadEnvironmentAtoms( scheduler, concurrency, }), + stopTask: createEnvironmentCommand(runtime, { + label: "environment-data:commands:thread:stop-task", + execute: (input: StopThreadTaskInput) => stopThreadTask(input), + scheduler, + concurrency, + }), }; } diff --git a/packages/client-runtime/src/state/workflow.ts b/packages/client-runtime/src/state/workflow.ts new file mode 100644 index 00000000000..cf169623e57 --- /dev/null +++ b/packages/client-runtime/src/state/workflow.ts @@ -0,0 +1,34 @@ +import { WS_METHODS } from "@t3tools/contracts"; +import { Atom } from "effect/unstable/reactivity"; + +import { createEnvironmentRpcCommand, createEnvironmentRpcQueryAtomFamily } from "./runtime.ts"; +import type { EnvironmentRegistry } from "../connection/registry.ts"; + +/** + * Workflow-run inspection atoms (Claude Agent SDK workflow artifacts). + * + * `readScript` and `readJournal` are cached queries — the script never + * changes for a given path within a run, and journal reads are refreshed by + * re-query. `readAgentTranscript` is an imperative command because the + * caller drives cursor-paged polling while a transcript pane is open. + */ +export function createWorkflowEnvironmentAtoms( + runtime: Atom.AtomRuntime, +) { + return { + readScript: createEnvironmentRpcQueryAtomFamily(runtime, { + label: "environment-data:workflow:read-script", + tag: WS_METHODS.workflowReadScript, + staleTimeMs: 30_000, + }), + readJournal: createEnvironmentRpcQueryAtomFamily(runtime, { + label: "environment-data:workflow:read-journal", + tag: WS_METHODS.workflowReadJournal, + staleTimeMs: 5_000, + }), + readAgentTranscript: createEnvironmentRpcCommand(runtime, { + label: "environment-data:workflow:read-agent-transcript", + tag: WS_METHODS.workflowReadAgentTranscript, + }), + }; +} diff --git a/packages/contracts/src/index.ts b/packages/contracts/src/index.ts index 43270efdec7..645fe2273ea 100644 --- a/packages/contracts/src/index.ts +++ b/packages/contracts/src/index.ts @@ -16,6 +16,7 @@ export * from "./server.ts"; export * from "./settings.ts"; export * from "./git.ts"; export * from "./vcs.ts"; +export * from "./workflow.ts"; export * from "./sourceControl.ts"; export * from "./orchestration.ts"; export * from "./editor.ts"; diff --git a/packages/contracts/src/orchestration.ts b/packages/contracts/src/orchestration.ts index 623fed0917b..522eeb5d206 100644 --- a/packages/contracts/src/orchestration.ts +++ b/packages/contracts/src/orchestration.ts @@ -657,6 +657,18 @@ const ThreadSessionStopCommand = Schema.Struct({ createdAt: IsoDateTime, }); +/** + * Stop one background task (e.g. a running workflow) inside an active + * provider session, without interrupting the session itself. + */ +const ThreadTaskStopCommand = Schema.Struct({ + type: Schema.Literal("thread.task.stop"), + commandId: CommandId, + threadId: ThreadId, + taskId: TrimmedNonEmptyString, + createdAt: IsoDateTime, +}); + const DispatchableClientOrchestrationCommand = Schema.Union([ ProjectCreateCommand, ProjectMetaUpdateCommand, @@ -674,6 +686,7 @@ const DispatchableClientOrchestrationCommand = Schema.Union([ ThreadUserInputRespondCommand, ThreadCheckpointRevertCommand, ThreadSessionStopCommand, + ThreadTaskStopCommand, ]); export type DispatchableClientOrchestrationCommand = typeof DispatchableClientOrchestrationCommand.Type; @@ -695,6 +708,7 @@ export const ClientOrchestrationCommand = Schema.Union([ ThreadUserInputRespondCommand, ThreadCheckpointRevertCommand, ThreadSessionStopCommand, + ThreadTaskStopCommand, ]); export type ClientOrchestrationCommand = typeof ClientOrchestrationCommand.Type; @@ -799,6 +813,7 @@ export const OrchestrationEventType = Schema.Literals([ "thread.checkpoint-revert-requested", "thread.reverted", "thread.session-stop-requested", + "thread.task-stop-requested", "thread.session-set", "thread.proposed-plan-upserted", "thread.turn-diff-completed", @@ -951,6 +966,12 @@ export const ThreadSessionStopRequestedPayload = Schema.Struct({ createdAt: IsoDateTime, }); +export const ThreadTaskStopRequestedPayload = Schema.Struct({ + threadId: ThreadId, + taskId: TrimmedNonEmptyString, + createdAt: IsoDateTime, +}); + export const ThreadSessionSetPayload = Schema.Struct({ threadId: ThreadId, session: OrchestrationSession, @@ -1089,6 +1110,11 @@ export const OrchestrationEvent = Schema.Union([ type: Schema.Literal("thread.session-stop-requested"), payload: ThreadSessionStopRequestedPayload, }), + Schema.Struct({ + ...EventBaseFields, + type: Schema.Literal("thread.task-stop-requested"), + payload: ThreadTaskStopRequestedPayload, + }), Schema.Struct({ ...EventBaseFields, type: Schema.Literal("thread.session-set"), diff --git a/packages/contracts/src/provider.ts b/packages/contracts/src/provider.ts index 94fb007a7bc..ec51fb8e645 100644 --- a/packages/contracts/src/provider.ts +++ b/packages/contracts/src/provider.ts @@ -95,6 +95,12 @@ export const ProviderStopSessionInput = Schema.Struct({ }); export type ProviderStopSessionInput = typeof ProviderStopSessionInput.Type; +export const ProviderStopTaskInput = Schema.Struct({ + threadId: ThreadId, + taskId: TrimmedNonEmptyString, +}); +export type ProviderStopTaskInput = typeof ProviderStopTaskInput.Type; + export const ProviderRespondToRequestInput = Schema.Struct({ threadId: ThreadId, requestId: ApprovalRequestId, diff --git a/packages/contracts/src/providerRuntime.ts b/packages/contracts/src/providerRuntime.ts index eb2563eff00..745733d98d3 100644 --- a/packages/contracts/src/providerRuntime.ts +++ b/packages/contracts/src/providerRuntime.ts @@ -14,6 +14,7 @@ import { TurnId, } from "./baseSchemas.ts"; import { ProviderInstanceId, ProviderDriverKind } from "./providerInstance.ts"; +import { WorkflowProgressEntry, WorkflowRunHandles } from "./workflow.ts"; const TrimmedNonEmptyStringSchema = TrimmedNonEmptyString; const UnknownRecordSchema = Schema.Record(Schema.String, Schema.Unknown); @@ -177,6 +178,7 @@ const ProviderRuntimeEventType = Schema.Literals([ "task.started", "task.progress", "task.completed", + "task.workflowMeta", "hook.started", "hook.progress", "hook.completed", @@ -227,6 +229,7 @@ const UserInputResolvedType = Schema.Literal("user-input.resolved"); const TaskStartedType = Schema.Literal("task.started"); const TaskProgressType = Schema.Literal("task.progress"); const TaskCompletedType = Schema.Literal("task.completed"); +const TaskWorkflowMetaType = Schema.Literal("task.workflowMeta"); const HookStartedType = Schema.Literal("hook.started"); const HookProgressType = Schema.Literal("hook.progress"); const HookCompletedType = Schema.Literal("hook.completed"); @@ -463,6 +466,9 @@ const TaskStartedPayload = Schema.Struct({ taskId: RuntimeTaskId, description: Schema.optional(TrimmedNonEmptyStringSchema), taskType: Schema.optional(TrimmedNonEmptyStringSchema), + toolUseId: Schema.optional(TrimmedNonEmptyStringSchema), + /** meta.name from the workflow script; set when taskType is "local_workflow". */ + workflowName: Schema.optional(TrimmedNonEmptyStringSchema), }); export type TaskStartedPayload = typeof TaskStartedPayload.Type; @@ -472,9 +478,22 @@ const TaskProgressPayload = Schema.Struct({ summary: Schema.optional(TrimmedNonEmptyStringSchema), usage: Schema.optional(Schema.Unknown), lastToolName: Schema.optional(TrimmedNonEmptyStringSchema), + /** + * Cumulative workflow snapshot (phases, agents, narration) for + * `local_workflow` tasks. Normalized and size-capped by the adapter. + */ + workflowProgress: Schema.optional(Schema.Array(WorkflowProgressEntry)), }); export type TaskProgressPayload = typeof TaskProgressPayload.Type; +/** + * Emitted once per workflow run when the Workflow tool result is observed — + * carries the run handles (script path, transcript dir, run id) that the + * progress stream does not repeat. + */ +const TaskWorkflowMetaPayload = WorkflowRunHandles; +export type TaskWorkflowMetaPayload = WorkflowRunHandles; + const TaskCompletedPayload = Schema.Struct({ taskId: RuntimeTaskId, status: Schema.Literals(["completed", "failed", "stopped"]), @@ -842,6 +861,13 @@ const ProviderRuntimeTaskCompletedEvent = Schema.Struct({ }); export type ProviderRuntimeTaskCompletedEvent = typeof ProviderRuntimeTaskCompletedEvent.Type; +const ProviderRuntimeTaskWorkflowMetaEvent = Schema.Struct({ + ...ProviderRuntimeEventBase.fields, + type: TaskWorkflowMetaType, + payload: TaskWorkflowMetaPayload, +}); +export type ProviderRuntimeTaskWorkflowMetaEvent = typeof ProviderRuntimeTaskWorkflowMetaEvent.Type; + const ProviderRuntimeHookStartedEvent = Schema.Struct({ ...ProviderRuntimeEventBase.fields, type: HookStartedType, @@ -996,6 +1022,7 @@ export const ProviderRuntimeEventV2 = Schema.Union([ ProviderRuntimeTaskStartedEvent, ProviderRuntimeTaskProgressEvent, ProviderRuntimeTaskCompletedEvent, + ProviderRuntimeTaskWorkflowMetaEvent, ProviderRuntimeHookStartedEvent, ProviderRuntimeHookProgressEvent, ProviderRuntimeHookCompletedEvent, diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index 48c5d9a774d..4fb7f707262 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -143,6 +143,15 @@ import { SourceControlRepositoryLookupInput, } from "./sourceControl.ts"; import { VcsError } from "./vcs.ts"; +import { + WorkflowInspectionError, + WorkflowReadAgentTranscriptInput, + WorkflowReadAgentTranscriptResult, + WorkflowReadJournalInput, + WorkflowReadJournalResult, + WorkflowReadScriptInput, + WorkflowReadScriptResult, +} from "./workflow.ts"; export const WS_METHODS = { // Project registry methods @@ -179,6 +188,11 @@ export const WS_METHODS = { // Review methods reviewGetDiffPreview: "review.getDiffPreview", + // Workflow inspection methods (Claude Agent SDK workflow runs) + workflowReadScript: "workflow.readScript", + workflowReadJournal: "workflow.readJournal", + workflowReadAgentTranscript: "workflow.readAgentTranscript", + // Terminal methods terminalOpen: "terminal.open", terminalAttach: "terminal.attach", @@ -478,6 +492,24 @@ export const WsReviewGetDiffPreviewRpc = Rpc.make(WS_METHODS.reviewGetDiffPrevie error: Schema.Union([ReviewDiffPreviewError, EnvironmentAuthorizationError]), }); +export const WsWorkflowReadScriptRpc = Rpc.make(WS_METHODS.workflowReadScript, { + payload: WorkflowReadScriptInput, + success: WorkflowReadScriptResult, + error: Schema.Union([WorkflowInspectionError, EnvironmentAuthorizationError]), +}); + +export const WsWorkflowReadJournalRpc = Rpc.make(WS_METHODS.workflowReadJournal, { + payload: WorkflowReadJournalInput, + success: WorkflowReadJournalResult, + error: Schema.Union([WorkflowInspectionError, EnvironmentAuthorizationError]), +}); + +export const WsWorkflowReadAgentTranscriptRpc = Rpc.make(WS_METHODS.workflowReadAgentTranscript, { + payload: WorkflowReadAgentTranscriptInput, + success: WorkflowReadAgentTranscriptResult, + error: Schema.Union([WorkflowInspectionError, EnvironmentAuthorizationError]), +}); + export const WsTerminalOpenRpc = Rpc.make(WS_METHODS.terminalOpen, { payload: TerminalOpenInput, success: TerminalSessionSnapshot, @@ -719,6 +751,9 @@ export const WsRpcGroup = RpcGroup.make( WsVcsSwitchRefRpc, WsVcsInitRpc, WsReviewGetDiffPreviewRpc, + WsWorkflowReadScriptRpc, + WsWorkflowReadJournalRpc, + WsWorkflowReadAgentTranscriptRpc, WsTerminalOpenRpc, WsTerminalAttachRpc, WsTerminalWriteRpc, diff --git a/packages/contracts/src/workflow.test.ts b/packages/contracts/src/workflow.test.ts new file mode 100644 index 00000000000..5ee8cacc09d --- /dev/null +++ b/packages/contracts/src/workflow.test.ts @@ -0,0 +1,145 @@ +import * as Schema from "effect/Schema"; +import { describe, expect, it } from "vite-plus/test"; + +import { + WorkflowAgentProgressEntry, + WorkflowInspectionError, + WorkflowLogProgressEntry, + WorkflowPhaseProgressEntry, + WorkflowProgressEntry, + WorkflowRunHandles, +} from "./workflow.ts"; + +const decodeAgent = Schema.decodeUnknownSync(WorkflowAgentProgressEntry); +const decodePhase = Schema.decodeUnknownSync(WorkflowPhaseProgressEntry); +const decodeLog = Schema.decodeUnknownSync(WorkflowLogProgressEntry); +const decodeEntry = Schema.decodeUnknownSync(WorkflowProgressEntry); +const decodeHandles = Schema.decodeUnknownSync(WorkflowRunHandles); + +describe("WorkflowProgressEntry variants", () => { + it("decodes a minimal workflow_agent entry and leaves optional fields absent", () => { + const agent = decodeAgent({ type: "workflow_agent", index: 0, state: "start" }); + expect(agent).toEqual({ type: "workflow_agent", index: 0, state: "start" }); + expect(agent.label).toBeUndefined(); + expect(agent.phaseIndex).toBeUndefined(); + expect(agent.startedAt).toBeUndefined(); + }); + + it("decodes a workflow_agent with the full optional surface, including isolation literals", () => { + const agent = decodeAgent({ + type: "workflow_agent", + index: 3, + state: "done", + label: "reviewer", + phaseIndex: 1, + phaseTitle: "Review", + agentId: "agent-3", + agentType: "code-reviewer", + model: "claude", + fallbackModel: "haiku", + isolation: "worktree", + attempt: 2, + queuedAt: 10, + startedAt: 20, + lastProgressAt: 30, + cached: true, + remoteSessionId: "remote-1", + lastToolName: "Bash", + lastToolSummary: "ran tests", + promptPreview: "do the thing", + resultPreview: "done", + error: "none", + }); + expect(agent.isolation).toBe("worktree"); + expect(agent.phaseIndex).toBe(1); + expect(agent.cached).toBe(true); + }); + + it("rejects an unknown isolation literal", () => { + expect(() => + decodeAgent({ type: "workflow_agent", index: 0, state: "start", isolation: "cloud" }), + ).toThrow(); + }); + + // Effect's Schema.Struct defaults to onExcessProperty: "ignore", so unknown + // extra keys DECODE SUCCESSFULLY and are stripped rather than rejected. + it("accepts and strips unknown extra keys on a workflow_agent", () => { + const agent = decodeAgent({ + type: "workflow_agent", + index: 0, + state: "start", + somethingNew: "from a future SDK", + }); + expect(agent).toEqual({ type: "workflow_agent", index: 0, state: "start" }); + expect(agent).not.toHaveProperty("somethingNew"); + }); + + it("decodes a minimal workflow_phase and keeps optional kind absent when omitted", () => { + const phase = decodePhase({ type: "workflow_phase", index: 0, title: "Plan" }); + expect(phase).toEqual({ type: "workflow_phase", index: 0, title: "Plan" }); + expect(phase.kind).toBeUndefined(); + }); + + it("decodes a workflow_log entry", () => { + const log = decodeLog({ type: "workflow_log", message: "starting up" }); + expect(log).toEqual({ type: "workflow_log", message: "starting up" }); + }); + + it("decodes each variant through the union by its discriminant", () => { + expect(decodeEntry({ type: "workflow_agent", index: 0, state: "start" }).type).toBe( + "workflow_agent", + ); + expect(decodeEntry({ type: "workflow_phase", index: 0, title: "Plan" }).type).toBe( + "workflow_phase", + ); + expect(decodeEntry({ type: "workflow_log", message: "hi" }).type).toBe("workflow_log"); + }); + + it("rejects an unknown entry type in the union", () => { + expect(() => decodeEntry({ type: "workflow_mystery", index: 0 })).toThrow(); + }); +}); + +describe("WorkflowRunHandles", () => { + it("decodes with only the required taskId", () => { + const handles = decodeHandles({ taskId: "task-1" }); + expect(handles).toEqual({ taskId: "task-1" }); + expect(handles.runId).toBeUndefined(); + expect(handles.sessionUrl).toBeUndefined(); + }); + + it("requires taskId", () => { + expect(() => decodeHandles({ runId: "wf_abc" })).toThrow(); + }); + + it("rejects a blank (untrimmed-empty) taskId", () => { + expect(() => decodeHandles({ taskId: " " })).toThrow(); + }); + + it("decodes the full remote handle surface", () => { + const handles = decodeHandles({ + taskId: "task-1", + runId: "wf_abc", + workflowName: "spec", + taskType: "remote_agent", + scriptPath: "/x/s.js", + transcriptDir: "/x/t", + sessionUrl: "https://example.com/run", + warning: "degraded", + }); + expect(handles.sessionUrl).toBe("https://example.com/run"); + expect(handles.taskType).toBe("remote_agent"); + }); +}); + +describe("WorkflowInspectionError", () => { + it("derives a stable message from operation and detail", () => { + const error = new WorkflowInspectionError({ + operation: "readScript", + reason: "not-found", + detail: "no such file", + }); + expect(error.message).toBe("Workflow inspection failed in readScript: no such file"); + expect(error.reason).toBe("not-found"); + }); +}); diff --git a/packages/contracts/src/workflow.ts b/packages/contracts/src/workflow.ts new file mode 100644 index 00000000000..470b920d9c3 --- /dev/null +++ b/packages/contracts/src/workflow.ts @@ -0,0 +1,161 @@ +import * as Schema from "effect/Schema"; + +import { TrimmedNonEmptyString } from "./baseSchemas.ts"; + +/** + * Contracts for Claude Agent SDK workflow-run visibility. + * + * A "workflow" is a background orchestration task the Claude Agent SDK runs + * in-process (the `Workflow` tool). The SDK streams a cumulative progress + * snapshot on every `task_progress` message via the (currently undocumented) + * `workflow_progress` field, and writes per-agent transcripts plus a result + * journal to a transcript directory on disk. These schemas model the subset + * the server forwards to clients. + * + * Every field that originates from the undocumented SDK surface is optional: + * the adapter normalizes entries defensively, and clients must tolerate + * absent fields so an SDK upgrade degrades to less detail, never to a + * decode failure. + */ + +/** + * One `agent()` call inside a workflow run. `index` is the SDK's stable + * per-run agent ordinal; snapshots are merged last-write-wins by `index`. + * `state` is an open string ("start" | "done" | "error" today) — clients + * must render unknown states as "running". + */ +export const WorkflowAgentProgressEntry = Schema.Struct({ + type: Schema.Literal("workflow_agent"), + index: Schema.Number, + state: Schema.String, + label: Schema.optional(Schema.String), + phaseIndex: Schema.optional(Schema.Number), + phaseTitle: Schema.optional(Schema.String), + agentId: Schema.optional(Schema.String), + agentType: Schema.optional(Schema.String), + model: Schema.optional(Schema.String), + fallbackModel: Schema.optional(Schema.String), + isolation: Schema.optional(Schema.Literals(["worktree", "remote"])), + attempt: Schema.optional(Schema.Number), + queuedAt: Schema.optional(Schema.Number), + startedAt: Schema.optional(Schema.Number), + lastProgressAt: Schema.optional(Schema.Number), + cached: Schema.optional(Schema.Boolean), + remoteSessionId: Schema.optional(Schema.String), + lastToolName: Schema.optional(Schema.String), + lastToolSummary: Schema.optional(Schema.String), + promptPreview: Schema.optional(Schema.String), + resultPreview: Schema.optional(Schema.String), + error: Schema.optional(Schema.String), +}); +export type WorkflowAgentProgressEntry = typeof WorkflowAgentProgressEntry.Type; + +export const WorkflowPhaseProgressEntry = Schema.Struct({ + type: Schema.Literal("workflow_phase"), + index: Schema.Number, + title: Schema.String, + kind: Schema.optional(Schema.String), +}); +export type WorkflowPhaseProgressEntry = typeof WorkflowPhaseProgressEntry.Type; + +/** A `log()` narration line emitted by the workflow script. */ +export const WorkflowLogProgressEntry = Schema.Struct({ + type: Schema.Literal("workflow_log"), + message: Schema.String, +}); +export type WorkflowLogProgressEntry = typeof WorkflowLogProgressEntry.Type; + +export const WorkflowProgressEntry = Schema.Union([ + WorkflowAgentProgressEntry, + WorkflowPhaseProgressEntry, + WorkflowLogProgressEntry, +]); +export type WorkflowProgressEntry = typeof WorkflowProgressEntry.Type; + +/** + * Handles returned by the Workflow tool result. `transcriptDir` and + * `scriptPath` are server-local paths — clients echo them back to the + * workflow inspection RPCs, which re-validate them structurally before + * touching disk. `sessionUrl` replaces the local handles for remote runs. + */ +export const WorkflowRunHandles = Schema.Struct({ + taskId: TrimmedNonEmptyString, + runId: Schema.optional(TrimmedNonEmptyString), + workflowName: Schema.optional(TrimmedNonEmptyString), + taskType: Schema.optional(TrimmedNonEmptyString), + scriptPath: Schema.optional(TrimmedNonEmptyString), + transcriptDir: Schema.optional(TrimmedNonEmptyString), + sessionUrl: Schema.optional(TrimmedNonEmptyString), + warning: Schema.optional(TrimmedNonEmptyString), +}); +export type WorkflowRunHandles = typeof WorkflowRunHandles.Type; + +export class WorkflowInspectionError extends Schema.TaggedErrorClass()( + "WorkflowInspectionError", + { + operation: Schema.String, + reason: Schema.Literals(["invalid-path", "not-found", "read-failed", "unsupported"]), + detail: Schema.String, + cause: Schema.optional(Schema.Defect()), + }, +) { + override get message(): string { + return `Workflow inspection failed in ${this.operation}: ${this.detail}`; + } +} + +export const WorkflowReadScriptInput = Schema.Struct({ + scriptPath: TrimmedNonEmptyString, +}); +export type WorkflowReadScriptInput = typeof WorkflowReadScriptInput.Type; + +export const WorkflowReadScriptResult = Schema.Struct({ + source: Schema.String, + truncated: Schema.Boolean, +}); +export type WorkflowReadScriptResult = typeof WorkflowReadScriptResult.Type; + +export const WorkflowReadJournalInput = Schema.Struct({ + transcriptDir: TrimmedNonEmptyString, +}); +export type WorkflowReadJournalInput = typeof WorkflowReadJournalInput.Type; + +/** + * One journal record per agent. `resultJson` is the agent's return value + * re-serialized as JSON, truncated server-side; `resultTruncated` marks the + * clip. Agents with a `started` record but no `result` yet report + * `hasResult: false`. + */ +export const WorkflowJournalEntry = Schema.Struct({ + agentId: Schema.String, + hasResult: Schema.Boolean, + resultJson: Schema.optional(Schema.String), + resultTruncated: Schema.optional(Schema.Boolean), +}); +export type WorkflowJournalEntry = typeof WorkflowJournalEntry.Type; + +export const WorkflowReadJournalResult = Schema.Struct({ + entries: Schema.Array(WorkflowJournalEntry), + truncated: Schema.Boolean, +}); +export type WorkflowReadJournalResult = typeof WorkflowReadJournalResult.Type; + +export const WorkflowReadAgentTranscriptInput = Schema.Struct({ + transcriptDir: TrimmedNonEmptyString, + agentId: TrimmedNonEmptyString, + /** Zero-based line cursor; omit to read from the start. */ + afterLine: Schema.optional(Schema.Int), +}); +export type WorkflowReadAgentTranscriptInput = typeof WorkflowReadAgentTranscriptInput.Type; + +/** + * Raw transcript JSONL lines starting after the cursor. `nextLine` is the + * cursor for the next page; `complete` means the read reached end-of-file + * (more lines may still be appended while the agent runs — poll again). + */ +export const WorkflowReadAgentTranscriptResult = Schema.Struct({ + lines: Schema.Array(Schema.String), + nextLine: Schema.Int, + complete: Schema.Boolean, +}); +export type WorkflowReadAgentTranscriptResult = typeof WorkflowReadAgentTranscriptResult.Type; From e5b721efc657a610b47f164071907192e31807f2 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 04:57:52 -0700 Subject: [PATCH 02/11] Address review findings from multi-agent review pass - WorkflowInspectionService: re-contain joined leaf files so a symlink named journal.jsonl / agent-.jsonl cannot escape the projects root (readScript already did this); add regression tests for both - Stop serializing raw fs error causes in WorkflowInspectionError - ProviderCommandReactor: surface stopTask failures as a provider.task.stop.failed activity instead of a silent log warning - ProviderService.stopTask: don't resurrect a stopped session via recovery just to stop a task - Ingestion: scope stable workflow activity ids by threadId so SDK task id reuse across threads cannot collide in the projection table - WorkflowPanel: keep polling transcripts past prior EOF ("complete" means caught-up, not finished); reuse useCopyToClipboard for the resume-command button - workflow-logic: terminalize runs left "running" when the provider session is gone; add a revision counter so timeline rows re-render on content changes even when timestamps collide Co-Authored-By: Claude Fable 5 --- .../Layers/ProviderCommandReactor.ts | 21 ++++++++-- .../Layers/ProviderRuntimeIngestion.test.ts | 10 ++--- .../Layers/ProviderRuntimeIngestion.ts | 12 +++--- .../src/provider/Layers/ProviderService.ts | 11 ++++- .../WorkflowInspectionService.test.ts | 40 +++++++++++++++++++ .../src/workflow/WorkflowInspectionService.ts | 19 +++++---- apps/web/src/components/ChatView.tsx | 10 ++++- .../components/chat/MessagesTimeline.logic.ts | 8 ++-- .../src/components/workflow/WorkflowPanel.tsx | 33 +++++---------- apps/web/src/workflow-logic.ts | 17 ++++++++ packages/contracts/src/workflow.ts | 1 - 11 files changed, 132 insertions(+), 50 deletions(-) diff --git a/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts b/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts index 60df8c225c4..f433080749f 100644 --- a/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts +++ b/apps/server/src/orchestration/Layers/ProviderCommandReactor.ts @@ -1023,10 +1023,23 @@ const make = Effect.gen(function* () { }); } - yield* providerService.stopTask({ - threadId: event.payload.threadId, - taskId: event.payload.taskId, - }); + yield* providerService + .stopTask({ + threadId: event.payload.threadId, + taskId: event.payload.taskId, + }) + .pipe( + Effect.catchCause((cause) => + appendProviderFailureActivity({ + threadId: event.payload.threadId, + kind: "provider.task.stop.failed", + summary: "Background task stop failed", + detail: Cause.pretty(cause), + turnId: null, + createdAt: event.payload.createdAt, + }), + ), + ); }); const processDomainEvent = Effect.fn("processDomainEvent")(function* ( diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index 7130b396bb5..f11fd8ed349 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -3122,7 +3122,7 @@ describe("ProviderRuntimeIngestion", () => { const thread = await waitForThread(harness.readModel, (entry) => entry.activities.some( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-1", + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-1", ), ); @@ -3132,7 +3132,7 @@ describe("ProviderRuntimeIngestion", () => { expect(perTick?.kind).toBe("task.progress"); const snapshot = thread.activities.find( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-1", + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-1", ); expect(snapshot?.kind).toBe("task.workflow-updated"); const payload = @@ -3165,11 +3165,11 @@ describe("ProviderRuntimeIngestion", () => { const thread = await waitForThread(harness.readModel, (entry) => entry.activities.some( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:task-wf-meta", + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:thread-1:task-wf-meta", ), ); const meta = thread.activities.find( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:task-wf-meta", + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:thread-1:task-wf-meta", ); expect(meta?.kind).toBe("task.workflow-meta"); const payload = @@ -3226,7 +3226,7 @@ describe("ProviderRuntimeIngestion", () => { const drainedThread = drained.threads.find((entry) => entry.id === ThreadId.make("thread-1")); const snapshots = drainedThread?.activities.filter( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:task-wf-2", + (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-2", ); expect(snapshots).toHaveLength(1); const payload = diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts index 8df21f579b3..4af42f924cb 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts @@ -167,12 +167,12 @@ function maxCheckpointTurnCount( * Stable per-task activity ids: workflow snapshot/meta activities are * upserted (one projection row per run), not appended per progress tick. */ -function workflowActivityId(taskId: string): EventId { - return EventId.make(`workflow:${taskId}`); +function workflowActivityId(threadId: ThreadId, taskId: string): EventId { + return EventId.make(`workflow:${threadId}:${taskId}`); } -function workflowMetaActivityId(taskId: string): EventId { - return EventId.make(`workflow-meta:${taskId}`); +function workflowMetaActivityId(threadId: ThreadId, taskId: string): EventId { + return EventId.make(`workflow-meta:${threadId}:${taskId}`); } function truncateDetail(value: string, limit = 180): string { @@ -507,7 +507,7 @@ function runtimeEventToActivities( return [ progressActivity, { - id: workflowActivityId(event.payload.taskId), + id: workflowActivityId(event.threadId, event.payload.taskId), createdAt: event.createdAt, tone: "info", kind: "task.workflow-updated", @@ -527,7 +527,7 @@ function runtimeEventToActivities( case "task.workflowMeta": { return [ { - id: workflowMetaActivityId(event.payload.taskId), + id: workflowMetaActivityId(event.threadId, event.payload.taskId), createdAt: event.createdAt, tone: "info", kind: "task.workflow-meta", diff --git a/apps/server/src/provider/Layers/ProviderService.ts b/apps/server/src/provider/Layers/ProviderService.ts index 20546ac5296..03df91c07d6 100644 --- a/apps/server/src/provider/Layers/ProviderService.ts +++ b/apps/server/src/provider/Layers/ProviderService.ts @@ -761,10 +761,13 @@ const makeProviderService = Effect.fn("makeProviderService")(function* ( schema: ProviderStopTaskInput, payload: rawInput, }); + // No recovery: a background task cannot be running without a live + // in-process session, so resurrecting one just to stop a task would be + // wasted work (and would resume the session as a side effect). const routed = yield* resolveRoutableSession({ threadId: input.threadId, operation: "ProviderService.stopTask", - allowRecovery: true, + allowRecovery: false, }); yield* Effect.annotateCurrentSpan({ "provider.operation": "stop-task", @@ -772,6 +775,12 @@ const makeProviderService = Effect.fn("makeProviderService")(function* ( "provider.thread_id": input.threadId, "provider.task_id": input.taskId, }); + if (!routed.isActive) { + return yield* toValidationError( + "ProviderService.stopTask", + "No active provider session is running for this thread.", + ); + } const adapterStopTask = routed.adapter.stopTask; if (adapterStopTask === undefined) { return yield* toValidationError( diff --git a/apps/server/src/workflow/WorkflowInspectionService.test.ts b/apps/server/src/workflow/WorkflowInspectionService.test.ts index 5dcc4884a46..410ac39412c 100644 --- a/apps/server/src/workflow/WorkflowInspectionService.test.ts +++ b/apps/server/src/workflow/WorkflowInspectionService.test.ts @@ -185,6 +185,25 @@ describe("WorkflowInspectionService", () => { }).pipe(Effect.provide(NodeServices.layer)), ); + it.effect("rejects a journal.jsonl symlink that escapes the root as invalid-path", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const outside = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-outside-" }); + const secret = NodePath.join(outside, "secret.jsonl"); + yield* fs.writeFileString(secret, JSON.stringify({ type: "started", agentId: "leak" })); + + const escapeDir = NodePath.join(layout.transcriptDir, "..", "wf_journal_escape"); + yield* fs.makeDirectory(escapeDir, { recursive: true }); + yield* Effect.promise(() => + NodeFSP.symlink(secret, NodePath.join(escapeDir, "journal.jsonl")), + ); + + const error = yield* service.readJournal({ transcriptDir: escapeDir }).pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + it.effect("rejects a transcript dir outside the root as invalid-path", () => Effect.gen(function* () { const { fs, service } = yield* setup; @@ -295,6 +314,27 @@ describe("WorkflowInspectionService", () => { }).pipe(Effect.provide(NodeServices.layer)), ); + it.effect("rejects an agent transcript symlink that escapes the root as invalid-path", () => + Effect.gen(function* () { + const { fs, service, layout } = yield* setup; + const outside = yield* fs.makeTempDirectoryScoped({ prefix: "t3-workflow-outside-" }); + const secret = NodePath.join(outside, "secret.txt"); + yield* fs.writeFileString(secret, "root:x:0:0::/root:/bin/bash"); + + const escapeDir = NodePath.join(layout.transcriptDir, "..", "wf_transcript_escape"); + yield* fs.makeDirectory(escapeDir, { recursive: true }); + yield* Effect.promise(() => + NodeFSP.symlink(secret, NodePath.join(escapeDir, "agent-leak.jsonl")), + ); + + const error = yield* service + .readAgentTranscript({ transcriptDir: escapeDir, agentId: "leak" }) + .pipe(Effect.flip); + assert.equal(error._tag, "WorkflowInspectionError"); + assert.equal(error.reason, "invalid-path"); + }).pipe(Effect.provide(NodeServices.layer)), + ); + it.effect("reports a missing transcript as not-found", () => Effect.gen(function* () { const { service, layout } = yield* setup; diff --git a/apps/server/src/workflow/WorkflowInspectionService.ts b/apps/server/src/workflow/WorkflowInspectionService.ts index c771278c49e..bad1f2c39fb 100644 --- a/apps/server/src/workflow/WorkflowInspectionService.ts +++ b/apps/server/src/workflow/WorkflowInspectionService.ts @@ -116,7 +116,6 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isEnoent(cause) ? "not-found" : "read-failed", detail: "Failed to resolve the workflow projects root.", - cause, }), }); @@ -127,7 +126,6 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isEnoent(cause) ? "not-found" : "read-failed", detail: "Failed to resolve the requested path.", - cause, }), }); @@ -162,7 +160,6 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the workflow script.", - cause, }), ), ); @@ -179,7 +176,12 @@ export const make = (options?: { readonly projectsRoot?: string }) => ) { const operation = "WorkflowInspectionService.readJournal"; const realDir = yield* resolveContained(operation, input.transcriptDir); - const journalPath = NodePath.join(realDir, "journal.jsonl"); + // Re-contain the joined leaf: a symlink named journal.jsonl inside a + // valid directory must not escape the projects root. + const journalPath = yield* resolveContained( + operation, + NodePath.join(realDir, "journal.jsonl"), + ); const raw = yield* fs.readFileString(journalPath).pipe( Effect.mapError( (cause) => @@ -187,7 +189,6 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the workflow journal.", - cause, }), ), ); @@ -259,7 +260,12 @@ export const make = (options?: { readonly projectsRoot?: string }) => } const realDir = yield* resolveContained(operation, input.transcriptDir); - const transcriptPath = NodePath.join(realDir, `agent-${input.agentId}.jsonl`); + // Re-contain the joined leaf: a symlink named agent-.jsonl inside + // a valid directory must not escape the projects root. + const transcriptPath = yield* resolveContained( + operation, + NodePath.join(realDir, `agent-${input.agentId}.jsonl`), + ); // v1 reads the whole file per page; acceptable for current transcript // sizes. Revisit with a streaming/seek reader if transcripts grow large. const raw = yield* fs.readFileString(transcriptPath).pipe( @@ -269,7 +275,6 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the agent transcript.", - cause, }), ), ); diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx index e89b8b43333..4bc0c538400 100644 --- a/apps/web/src/components/ChatView.tsx +++ b/apps/web/src/components/ChatView.tsx @@ -1730,7 +1730,15 @@ function ChatViewContent(props: ChatViewProps) { const phase = derivePhase(activeThread?.session ?? null); const threadActivities = activeThread?.activities ?? EMPTY_ACTIVITIES; const workLogEntries = useMemo(() => deriveWorkLogEntries(threadActivities), [threadActivities]); - const workflowRuns = useMemo(() => deriveWorkflowRuns(threadActivities), [threadActivities]); + const workflowSessionActive = + activeThread?.session !== null && + activeThread?.session !== undefined && + activeThread.session.status !== "stopped" && + activeThread.session.status !== "error"; + const workflowRuns = useMemo( + () => deriveWorkflowRuns(threadActivities, { sessionActive: workflowSessionActive }), + [threadActivities, workflowSessionActive], + ); const activeWorkflowSurface = activeRightPanelSurface?.kind === "workflow" ? activeRightPanelSurface : null; const activeWorkflowRun = useMemo( diff --git a/apps/web/src/components/chat/MessagesTimeline.logic.ts b/apps/web/src/components/chat/MessagesTimeline.logic.ts index fcc57defa5a..40adf7f18d3 100644 --- a/apps/web/src/components/chat/MessagesTimeline.logic.ts +++ b/apps/web/src/components/chat/MessagesTimeline.logic.ts @@ -590,12 +590,14 @@ function isRowUnchanged(a: MessagesTimelineRow, b: MessagesTimelineRow): boolean case "workflow": { const bw = b as typeof a; - // WorkflowRun view models are rebuilt per derivation; compare the - // fields that drive rendering so unchanged runs keep row identity. + // WorkflowRun view models are rebuilt per derivation; `revision` is a + // deterministic per-run change counter, so equal revisions (and status, + // which sessionActive can flip without a new activity) mean identical + // content. return ( a.createdAt === bw.createdAt && a.workflowRun.status === bw.workflowRun.status && - a.workflowRun.updatedAt === bw.workflowRun.updatedAt + a.workflowRun.revision === bw.workflowRun.revision ); } diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx index 2f0545aeb43..e191399373c 100644 --- a/apps/web/src/components/workflow/WorkflowPanel.tsx +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -31,6 +31,7 @@ import { } from "~/workflow-logic"; import { Button } from "../ui/button"; import { AgentRowContent, PhaseHeader, WorkflowStatusChip } from "./workflowUi"; +import { useCopyToClipboard } from "../../hooks/useCopyToClipboard"; type WorkflowTabId = "run" | "script" | "logs"; @@ -165,32 +166,18 @@ function CopyResumeButton({ scriptPath: string; runId: string; }): ReactElement { - const [copied, setCopied] = useState(false); - const timerRef = useRef | null>(null); - - useEffect( - () => () => { - if (timerRef.current !== null) { - clearTimeout(timerRef.current); - } - }, - [], - ); + const { copyToClipboard, isCopied } = useCopyToClipboard({ + timeout: 1200, + target: "workflow resume command", + }); const handleCopy = useCallback(() => { - const command = `Workflow({ scriptPath: "${scriptPath}", resumeFromRunId: "${runId}" })`; - void navigator.clipboard.writeText(command).then(() => { - setCopied(true); - if (timerRef.current !== null) { - clearTimeout(timerRef.current); - } - timerRef.current = setTimeout(() => setCopied(false), 1200); - }); - }, [runId, scriptPath]); + copyToClipboard(`Workflow({ scriptPath: "${scriptPath}", resumeFromRunId: "${runId}" })`); + }, [copyToClipboard, runId, scriptPath]); return ( ); @@ -395,7 +382,9 @@ function AgentTranscriptView({ const loadingRef = useRef(false); const loadMore = useCallback(async () => { - if (loadingRef.current || completeRef.current) { + // `complete` only means the last read caught up to end-of-file; a live + // run keeps appending, so polling must keep re-reading past prior EOF. + if (loadingRef.current) { return; } loadingRef.current = true; diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts index 9f78fa89884..1744f9b283d 100644 --- a/apps/web/src/workflow-logic.ts +++ b/apps/web/src/workflow-logic.ts @@ -69,6 +69,10 @@ export interface WorkflowRun { status: WorkflowRunStatus; createdAt: string; updatedAt: string; + /** Monotonic per-derivation change counter — bumped on every applied + * workflow activity so renderers can cheaply detect content changes even + * when timestamps collide at millisecond precision. */ + revision: number; turnId: TurnId | null; name?: string | undefined; description?: string | undefined; @@ -300,6 +304,7 @@ export function collectWorkflowTaskIds( export function deriveWorkflowRuns( activities: ReadonlyArray, + options?: { readonly sessionActive?: boolean | undefined }, ): WorkflowRun[] { const ordered = [...activities].toSorted( (a, b) => a.createdAt.localeCompare(b.createdAt) || a.id.localeCompare(b.id), @@ -317,6 +322,7 @@ export function deriveWorkflowRuns( createdAt: activity.createdAt, updatedAt: activity.createdAt, turnId: activity.turnId, + revision: 0, phases: [], logs: [], agentCounts: { total: 0, queued: 0, running: 0, done: 0, error: 0 }, @@ -342,6 +348,7 @@ export function deriveWorkflowRuns( break; } const run = ensureRun(taskId, activity); + run.revision += 1; run.hasStartedActivity = true; run.createdAt = activity.createdAt; run.turnId = activity.turnId; @@ -357,6 +364,7 @@ export function deriveWorkflowRuns( } case "task.workflow-updated": { const run = ensureRun(taskId, activity); + run.revision += 1; run.updatedAt = activity.createdAt; const description = asString(payload.description); if (description !== undefined && run.description === undefined) { @@ -380,6 +388,7 @@ export function deriveWorkflowRuns( } case "task.workflow-meta": { const run = ensureRun(taskId, activity); + run.revision += 1; run.updatedAt = activity.createdAt; const name = asString(payload.workflowName); if (name !== undefined) { @@ -410,6 +419,7 @@ export function deriveWorkflowRuns( if (!run) { break; } + run.revision += 1; run.updatedAt = activity.createdAt; run.status = payload.status === "failed" @@ -428,8 +438,15 @@ export function deriveWorkflowRuns( } } + // A workflow cannot outlive its provider session: when the session is gone + // and no task_notification ever arrived (crash, interrupt, app restart), + // surface the run as stopped instead of running forever. + const sessionActive = options?.sessionActive ?? true; return [...runs.values()] .map(({ hasStartedActivity: _hasStartedActivity, ...run }) => run) + .map((run) => + run.status === "running" && !sessionActive ? { ...run, status: "stopped" as const } : run, + ) .toSorted((a, b) => a.createdAt.localeCompare(b.createdAt) || a.taskId.localeCompare(b.taskId)); } diff --git a/packages/contracts/src/workflow.ts b/packages/contracts/src/workflow.ts index 470b920d9c3..07bdd89545e 100644 --- a/packages/contracts/src/workflow.ts +++ b/packages/contracts/src/workflow.ts @@ -96,7 +96,6 @@ export class WorkflowInspectionError extends Schema.TaggedErrorClass Date: Thu, 2 Jul 2026 05:07:28 -0700 Subject: [PATCH 03/11] Address cursor bot review: session-gate, transcript tail, mobile noise - workflow-logic: settle in-flight agents to "error" when terminalizing a run whose session died, so a stopped chip never sits above pulsing "running" agents; drop the dead hasStartedActivity field (snapshot-only runs render intentionally after history trims) - ChatView: gate workflow liveness on derivePhase so interrupted sessions also terminalize runs, matching disconnected-session UX - WorkflowPanel: fetch the transcript once more when a run leaves "running" so lines appended after the last poll tick are not lost - mobile: suppress workflow-owned task.progress/task.completed rows in the work log, mirroring desktop (mobile has no workflow card yet) Co-Authored-By: Claude Fable 5 --- apps/mobile/src/lib/threadActivity.ts | 52 ++++++++++++++++++- apps/web/src/components/ChatView.tsx | 8 ++- .../src/components/workflow/WorkflowPanel.tsx | 5 +- apps/web/src/workflow-logic.test.ts | 34 ++++++++++++ apps/web/src/workflow-logic.ts | 39 +++++++++++--- 5 files changed, 123 insertions(+), 15 deletions(-) diff --git a/apps/mobile/src/lib/threadActivity.ts b/apps/mobile/src/lib/threadActivity.ts index 1c1582ef3ef..2a2437a9580 100644 --- a/apps/mobile/src/lib/threadActivity.ts +++ b/apps/mobile/src/lib/threadActivity.ts @@ -229,18 +229,68 @@ function resolvePendingUserInputAnswer( return normalizeDraftAnswer(draft?.selectedOptionLabel); } +/** + * Task ids owned by a workflow run. Mirrors the desktop derivation in + * apps/web/src/workflow-logic.ts (collectWorkflowTaskIds) — mobile has no + * workflow card yet, so all rows for those tasks are suppressed rather than + * rendered as per-tick noise. + */ +function collectWorkflowTaskIds( + activities: ReadonlyArray, +): Set { + const taskIds = new Set(); + for (const activity of activities) { + const payload = activity.payload as Record | null | undefined; + const taskId = + payload && typeof payload === "object" && typeof payload["taskId"] === "string" + ? payload["taskId"] + : undefined; + if (!taskId) continue; + const workflowName = + payload && typeof payload === "object" ? payload["workflowName"] : undefined; + const taskType = payload && typeof payload === "object" ? payload["taskType"] : undefined; + if ( + activity.kind === "task.workflow-updated" || + activity.kind === "task.workflow-meta" || + (activity.kind === "task.started" && + (taskType === "local_workflow" || typeof workflowName === "string")) + ) { + taskIds.add(taskId); + } + } + return taskIds; +} + +function activityBelongsToWorkflow( + activity: OrchestrationThreadActivity, + workflowTaskIds: ReadonlySet, +): boolean { + const payload = activity.payload as Record | null | undefined; + const taskId = payload && typeof payload === "object" ? payload["taskId"] : undefined; + return typeof taskId === "string" && workflowTaskIds.has(taskId); +} + function deriveWorkLogEntries( activities: ReadonlyArray, ): DerivedWorkLogEntry[] { const ordered = Arr.sort(activities, activityOrder); + const workflowTaskIds = collectWorkflowTaskIds(activities); const entries: DerivedWorkLogEntry[] = []; for (const activity of ordered) { if (activity.kind === "tool.started") continue; if (activity.kind === "task.started") continue; // Workflow snapshot/meta activities back the desktop workflow card; on - // mobile they would render as ever-mutating raw rows, so skip them. + // mobile they would render as ever-mutating raw rows, so skip them — + // along with the per-tick task rows the workflow owns. if (activity.kind === "task.workflow-updated") continue; if (activity.kind === "task.workflow-meta") continue; + if ( + (activity.kind === "task.progress" || activity.kind === "task.completed") && + workflowTaskIds.size > 0 && + activityBelongsToWorkflow(activity, workflowTaskIds) + ) { + continue; + } if (activity.kind === "context-window.updated") continue; if (activity.summary === "Checkpoint captured") continue; if (isPlanBoundaryToolActivity(activity)) continue; diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx index 4bc0c538400..b77ccc3848b 100644 --- a/apps/web/src/components/ChatView.tsx +++ b/apps/web/src/components/ChatView.tsx @@ -1730,11 +1730,9 @@ function ChatViewContent(props: ChatViewProps) { const phase = derivePhase(activeThread?.session ?? null); const threadActivities = activeThread?.activities ?? EMPTY_ACTIVITIES; const workLogEntries = useMemo(() => deriveWorkLogEntries(threadActivities), [threadActivities]); - const workflowSessionActive = - activeThread?.session !== null && - activeThread?.session !== undefined && - activeThread.session.status !== "stopped" && - activeThread.session.status !== "error"; + // Mirrors derivePhase: interrupted/stopped/error sessions are all + // disconnected, and a workflow cannot still be running under any of them. + const workflowSessionActive = phase !== "disconnected"; const workflowRuns = useMemo( () => deriveWorkflowRuns(threadActivities, { sessionActive: workflowSessionActive }), [threadActivities, workflowSessionActive], diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx index e191399373c..3a5aded319e 100644 --- a/apps/web/src/components/workflow/WorkflowPanel.tsx +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -425,9 +425,12 @@ function AgentTranscriptView({ }; }, [loadMore]); - // Keep polling for new lines while the run is live. + // Keep polling for new lines while the run is live; when the run settles + // (or the row opens on an already-terminal run), fetch once more so lines + // appended after the last poll tick are not lost. useEffect(() => { if (runStatus !== "running") { + void loadMore(); return; } const id = setInterval(() => { diff --git a/apps/web/src/workflow-logic.test.ts b/apps/web/src/workflow-logic.test.ts index ecbee190885..006677293a9 100644 --- a/apps/web/src/workflow-logic.test.ts +++ b/apps/web/src/workflow-logic.test.ts @@ -256,6 +256,40 @@ describe("deriveWorkflowRuns", () => { expect(runs).toEqual([]); }); + it("terminalizes a still-running run and settles in-flight agents when the session is gone", () => { + const runs = deriveWorkflowRuns( + [ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_agent", index: 0, state: "done" }, + { type: "workflow_agent", index: 1, state: "start", startedAt: 1000 }, + { type: "workflow_agent", index: 2, state: "start" }, + ]), + ], + { sessionActive: false }, + ); + const run = runs[0]; + expect(run?.status).toBe("stopped"); + const agents = run?.phases.flatMap((phase) => phase.agents) ?? []; + expect(agents.map((agent) => agent.status)).toEqual(["done", "error", "error"]); + expect(agents[1]?.error).toBe("Interrupted before completion"); + expect(run?.agentCounts).toEqual({ total: 3, queued: 0, running: 0, done: 1, error: 2 }); + }); + + it("keeps a running run untouched while the session is active", () => { + const runs = deriveWorkflowRuns( + [ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { type: "workflow_agent", index: 0, state: "start", startedAt: 1000 }, + ]), + ], + { sessionActive: true }, + ); + expect(runs[0]?.status).toBe("running"); + expect(runs[0]?.phases.flatMap((phase) => phase.agents)[0]?.status).toBe("running"); + }); + it("detects remote runs from session handles", () => { const runs = deriveWorkflowRuns([ workflowStartedActivity("task-remote"), diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts index 1744f9b283d..8e6cb90dee0 100644 --- a/apps/web/src/workflow-logic.ts +++ b/apps/web/src/workflow-logic.ts @@ -270,9 +270,7 @@ export function groupWorkflowAgentsByPhase(parsed: { .toSorted((a, b) => a.index - b.index); } -interface MutableWorkflowRun extends WorkflowRun { - hasStartedActivity: boolean; -} +type MutableWorkflowRun = WorkflowRun; function isWorkflowTaskStartedPayload(payload: Record): boolean { return payload.taskType === "local_workflow" || asString(payload.workflowName) !== undefined; @@ -326,7 +324,6 @@ export function deriveWorkflowRuns( phases: [], logs: [], agentCounts: { total: 0, queued: 0, running: 0, done: 0, error: 0 }, - hasStartedActivity: false, }; runs.set(taskId, run); return run; @@ -349,7 +346,6 @@ export function deriveWorkflowRuns( } const run = ensureRun(taskId, activity); run.revision += 1; - run.hasStartedActivity = true; run.createdAt = activity.createdAt; run.turnId = activity.turnId; const name = asString(payload.workflowName); @@ -440,16 +436,43 @@ export function deriveWorkflowRuns( // A workflow cannot outlive its provider session: when the session is gone // and no task_notification ever arrived (crash, interrupt, app restart), - // surface the run as stopped instead of running forever. + // surface the run as stopped instead of running forever. Runs derived only + // from snapshot/meta activities (no task.started — e.g. after a checkpoint + // revert trimmed it) are kept intentionally: partial history still renders. const sessionActive = options?.sessionActive ?? true; return [...runs.values()] - .map(({ hasStartedActivity: _hasStartedActivity, ...run }) => run) .map((run) => - run.status === "running" && !sessionActive ? { ...run, status: "stopped" as const } : run, + run.status === "running" && !sessionActive ? terminalizeInterruptedRun(run) : run, ) .toSorted((a, b) => a.createdAt.localeCompare(b.createdAt) || a.taskId.localeCompare(b.taskId)); } +/** + * Settle a run whose session died before a terminal task notification: + * the run becomes "stopped" and its in-flight agents settle to "error" so + * nothing keeps rendering (or polling) as live work. + */ +function terminalizeInterruptedRun(run: WorkflowRun): WorkflowRun { + const settleAgent = (agent: WorkflowRunAgent): WorkflowRunAgent => + agent.status === "running" || agent.status === "queued" + ? { ...agent, status: "error", error: agent.error ?? "Interrupted before completion" } + : agent; + const phases = run.phases.map((phase) => ({ ...phase, agents: phase.agents.map(settleAgent) })); + const agents = phases.flatMap((phase) => phase.agents); + return { + ...run, + status: "stopped", + phases, + agentCounts: { + total: agents.length, + queued: agents.filter((agent) => agent.status === "queued").length, + running: agents.filter((agent) => agent.status === "running").length, + done: agents.filter((agent) => agent.status === "done").length, + error: agents.filter((agent) => agent.status === "error").length, + }, + }; +} + export function isRemoteWorkflowRun(run: WorkflowRun): boolean { return run.handles?.taskType === "remote_agent" || run.handles?.sessionUrl !== undefined; } From 0eaeb94564d857d2875754e7ae775d43f12fa8eb Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 05:14:52 -0700 Subject: [PATCH 04/11] Address macroscope + cursor follow-up review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ClaudeAdapter: dedupe workflow agents/phases by index (last write wins) before applying entry caps, so repeated slot updates cannot exhaust the cap and freeze later agents stale; restrict sessionUrl to http(s) so a hostile tool result cannot smuggle a javascript: href - workflowUi: guard sessionUrl scheme again at both anchor render sites (defense in depth for payloads persisted before the adapter filter) - Ingestion: JSON-encode the (threadId, taskId) tuple in stable workflow activity ids to remove delimiter-ambiguity collisions - workflow-logic: order same-timestamp activities by provider sequence then lifecycle rank so task.completed can never apply before its task.started - WorkflowPanel/Card: scope phase and agent row keys by taskId so switching runs remounts rows instead of leaking expanded transcript state and cursors across runs - workflowUi: phase progress counter counts settled (done + error) agents instead of reporting 0/1 for an errored terminal phase - client-runtime: readJournal query polls every 4s while mounted so the Logs tab picks up new results during a live run - mobile: keep workflow task.completed rows (only per-tick progress is suppressed) — with no workflow card on mobile it is the only signal a workflow finished or failed Co-Authored-By: Claude Fable 5 --- apps/mobile/src/lib/threadActivity.ts | 6 ++- .../Layers/ProviderRuntimeIngestion.test.ts | 15 ++++--- .../Layers/ProviderRuntimeIngestion.ts | 6 ++- .../src/provider/Layers/ClaudeAdapter.ts | 39 ++++++++++++++----- .../src/components/workflow/WorkflowPanel.tsx | 13 +++++-- .../components/workflow/WorkflowRunCard.tsx | 14 +++++-- .../src/components/workflow/workflowUi.tsx | 21 +++++++++- apps/web/src/workflow-logic.ts | 34 ++++++++++++++-- packages/client-runtime/src/state/workflow.ts | 4 ++ 9 files changed, 122 insertions(+), 30 deletions(-) diff --git a/apps/mobile/src/lib/threadActivity.ts b/apps/mobile/src/lib/threadActivity.ts index 2a2437a9580..8aae399ba81 100644 --- a/apps/mobile/src/lib/threadActivity.ts +++ b/apps/mobile/src/lib/threadActivity.ts @@ -281,11 +281,13 @@ function deriveWorkLogEntries( if (activity.kind === "task.started") continue; // Workflow snapshot/meta activities back the desktop workflow card; on // mobile they would render as ever-mutating raw rows, so skip them — - // along with the per-tick task rows the workflow owns. + // along with the per-tick progress rows the workflow owns. task.completed + // stays: with no workflow card here it is mobile's only signal that a + // workflow finished, failed, or was stopped. if (activity.kind === "task.workflow-updated") continue; if (activity.kind === "task.workflow-meta") continue; if ( - (activity.kind === "task.progress" || activity.kind === "task.completed") && + activity.kind === "task.progress" && workflowTaskIds.size > 0 && activityBelongsToWorkflow(activity, workflowTaskIds) ) { diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index f11fd8ed349..a997565c155 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -3122,7 +3122,8 @@ describe("ProviderRuntimeIngestion", () => { const thread = await waitForThread(harness.readModel, (entry) => entry.activities.some( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-1", + (activity: ProviderRuntimeTestActivity) => + activity.id === `workflow:${JSON.stringify(["thread-1", "task-wf-1"])}`, ), ); @@ -3132,7 +3133,8 @@ describe("ProviderRuntimeIngestion", () => { expect(perTick?.kind).toBe("task.progress"); const snapshot = thread.activities.find( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-1", + (activity: ProviderRuntimeTestActivity) => + activity.id === `workflow:${JSON.stringify(["thread-1", "task-wf-1"])}`, ); expect(snapshot?.kind).toBe("task.workflow-updated"); const payload = @@ -3165,11 +3167,13 @@ describe("ProviderRuntimeIngestion", () => { const thread = await waitForThread(harness.readModel, (entry) => entry.activities.some( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:thread-1:task-wf-meta", + (activity: ProviderRuntimeTestActivity) => + activity.id === `workflow-meta:${JSON.stringify(["thread-1", "task-wf-meta"])}`, ), ); const meta = thread.activities.find( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow-meta:thread-1:task-wf-meta", + (activity: ProviderRuntimeTestActivity) => + activity.id === `workflow-meta:${JSON.stringify(["thread-1", "task-wf-meta"])}`, ); expect(meta?.kind).toBe("task.workflow-meta"); const payload = @@ -3226,7 +3230,8 @@ describe("ProviderRuntimeIngestion", () => { const drainedThread = drained.threads.find((entry) => entry.id === ThreadId.make("thread-1")); const snapshots = drainedThread?.activities.filter( - (activity: ProviderRuntimeTestActivity) => activity.id === "workflow:thread-1:task-wf-2", + (activity: ProviderRuntimeTestActivity) => + activity.id === `workflow:${JSON.stringify(["thread-1", "task-wf-2"])}`, ); expect(snapshots).toHaveLength(1); const payload = diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts index 4af42f924cb..5cb6c3561f7 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts @@ -167,12 +167,14 @@ function maxCheckpointTurnCount( * Stable per-task activity ids: workflow snapshot/meta activities are * upserted (one projection row per run), not appended per progress tick. */ +// JSON-encode the (threadId, taskId) tuple: both are free-form strings, so a +// bare `:`-joined key would let distinct pairs collide on the upsert id. function workflowActivityId(threadId: ThreadId, taskId: string): EventId { - return EventId.make(`workflow:${threadId}:${taskId}`); + return EventId.make(`workflow:${JSON.stringify([threadId, taskId])}`); } function workflowMetaActivityId(threadId: ThreadId, taskId: string): EventId { - return EventId.make(`workflow-meta:${threadId}:${taskId}`); + return EventId.make(`workflow-meta:${JSON.stringify([threadId, taskId])}`); } function truncateDetail(value: string, limit = 180): string { diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index f62a33068ce..d1eb5bba0c8 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -831,8 +831,11 @@ function normalizeWorkflowProgress( if (!Array.isArray(value) || value.length === 0) { return undefined; } - const agents: Array = []; - const phases: Array = []; + // Last write wins per index: the snapshot may re-emit an agent/phase slot + // (retries, later progress), and the cap must count unique slots, not raw + // entries, or repeats would freeze later agents in a stale state. + const agentsByIndex = new Map(); + const phasesByIndex = new Map(); const logs: Array = []; for (const raw of value) { if (raw === null || typeof raw !== "object" || Array.isArray(raw)) { @@ -842,8 +845,11 @@ function normalizeWorkflowProgress( switch (entry.type) { case "workflow_agent": { const agent = normalizeWorkflowAgentEntry(entry); - if (agent && agents.length < MAX_WORKFLOW_AGENT_ENTRIES) { - agents.push(agent); + if ( + agent && + (agentsByIndex.has(agent.index) || agentsByIndex.size < MAX_WORKFLOW_AGENT_ENTRIES) + ) { + agentsByIndex.set(agent.index, agent); } break; } @@ -853,9 +859,9 @@ function normalizeWorkflowProgress( if ( index !== undefined && title !== undefined && - phases.length < MAX_WORKFLOW_PHASE_ENTRIES + (phasesByIndex.has(index) || phasesByIndex.size < MAX_WORKFLOW_PHASE_ENTRIES) ) { - phases.push({ + phasesByIndex.set(index, { type: "workflow_phase", index, title, @@ -880,7 +886,7 @@ function normalizeWorkflowProgress( // Narration is append-only upstream; keep the newest lines when clipping. const clippedLogs = logs.length > MAX_WORKFLOW_LOG_ENTRIES ? logs.slice(-MAX_WORKFLOW_LOG_ENTRIES) : logs; - const entries = [...phases, ...agents, ...clippedLogs]; + const entries = [...phasesByIndex.values(), ...agentsByIndex.values(), ...clippedLogs]; return entries.length > 0 ? entries : undefined; } @@ -893,6 +899,21 @@ function readClaudeWorkflowProgress( return normalizeWorkflowProgress(raw); } +function workflowHttpUrl(value: unknown): string | undefined { + const text = workflowString(value); + if (text === undefined) { + return undefined; + } + // Clients render this into an anchor href — restrict to web URLs so a + // hostile tool result cannot smuggle a javascript:/file: scheme through. + try { + const parsed = new URL(text); + return parsed.protocol === "https:" || parsed.protocol === "http:" ? text : undefined; + } catch { + return undefined; + } +} + function normalizeWorkflowRunHandles( toolUseResult: Record, ): WorkflowRunHandles | undefined { @@ -917,8 +938,8 @@ function normalizeWorkflowRunHandles( ...(workflowString(toolUseResult.transcriptDir) !== undefined ? { transcriptDir: workflowString(toolUseResult.transcriptDir) } : {}), - ...(workflowString(toolUseResult.sessionUrl) !== undefined - ? { sessionUrl: workflowString(toolUseResult.sessionUrl) } + ...(workflowHttpUrl(toolUseResult.sessionUrl) !== undefined + ? { sessionUrl: workflowHttpUrl(toolUseResult.sessionUrl) } : {}), ...(workflowString(toolUseResult.warning) !== undefined ? { warning: workflowString(toolUseResult.warning) } diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx index 3a5aded319e..b8ddca7bb00 100644 --- a/apps/web/src/components/workflow/WorkflowPanel.tsx +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -30,7 +30,12 @@ import { workflowRunTitle, } from "~/workflow-logic"; import { Button } from "../ui/button"; -import { AgentRowContent, PhaseHeader, WorkflowStatusChip } from "./workflowUi"; +import { + AgentRowContent, + PhaseHeader, + WorkflowStatusChip, + safeWorkflowSessionUrl, +} from "./workflowUi"; import { useCopyToClipboard } from "../../hooks/useCopyToClipboard"; type WorkflowTabId = "run" | "script" | "logs"; @@ -199,7 +204,7 @@ function RunTab({ remote: boolean; }): ReactElement { if (remote) { - const sessionUrl = run.handles?.sessionUrl; + const sessionUrl = safeWorkflowSessionUrl(run.handles?.sessionUrl); if (sessionUrl === undefined) { return ; } @@ -223,11 +228,11 @@ function RunTab({ return (
{run.phases.map((phase) => ( -
+
{phase.agents.map((agent) => ( +
{rows.map((agent) => ( -
+
))} @@ -124,7 +130,7 @@ export function WorkflowRunCard(props: { } function RemoteRunBody({ run }: { run: WorkflowRun }): ReactElement { - const sessionUrl = run.handles?.sessionUrl; + const sessionUrl = safeWorkflowSessionUrl(run.handles?.sessionUrl); if (sessionUrl === undefined) { return (

diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx index aa7f951911b..836bdb000d6 100644 --- a/apps/web/src/components/workflow/workflowUi.tsx +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -157,8 +157,27 @@ export function AgentRowContent({ // Phase header + rollup helpers // --------------------------------------------------------------------------- +/** + * Only web URLs may reach an anchor href. The server already filters the + * scheme at ingestion; this guards payloads persisted before that filter + * (and any other producer) as defense in depth. + */ +export function safeWorkflowSessionUrl(sessionUrl: string | undefined): string | undefined { + if (sessionUrl === undefined) { + return undefined; + } + try { + const parsed = new URL(sessionUrl); + return parsed.protocol === "https:" || parsed.protocol === "http:" ? sessionUrl : undefined; + } catch { + return undefined; + } +} + +/** Settled agents (done or error) — the x/y header is a progress counter, + * and an errored agent has no work remaining. */ export function phaseDoneCount(phase: WorkflowRunPhase): number { - return phase.agents.filter((agent) => agent.status === "done").length; + return phase.agents.filter((agent) => agent.status === "done" || agent.status === "error").length; } export function PhaseHeader({ phase }: { phase: WorkflowRunPhase }): ReactElement { diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts index 8e6cb90dee0..253db4edd31 100644 --- a/apps/web/src/workflow-logic.ts +++ b/apps/web/src/workflow-logic.ts @@ -300,13 +300,41 @@ export function collectWorkflowTaskIds( return taskIds; } +/** + * Millisecond timestamps collide, so equal-time activities are ordered by + * provider sequence when present, then by lifecycle rank — a task.completed + * must never be applied before the task.started that creates its run. + */ +const WORKFLOW_ACTIVITY_RANK: Record = { + "task.started": 0, + "task.workflow-meta": 1, + "task.workflow-updated": 1, + "task.completed": 2, +}; + +function compareWorkflowActivityOrder( + a: OrchestrationThreadActivity, + b: OrchestrationThreadActivity, +): number { + if (a.sequence !== undefined && b.sequence !== undefined && a.sequence !== b.sequence) { + return a.sequence - b.sequence; + } + const byTime = a.createdAt.localeCompare(b.createdAt); + if (byTime !== 0) { + return byTime; + } + const byRank = (WORKFLOW_ACTIVITY_RANK[a.kind] ?? 1) - (WORKFLOW_ACTIVITY_RANK[b.kind] ?? 1); + if (byRank !== 0) { + return byRank; + } + return a.id.localeCompare(b.id); +} + export function deriveWorkflowRuns( activities: ReadonlyArray, options?: { readonly sessionActive?: boolean | undefined }, ): WorkflowRun[] { - const ordered = [...activities].toSorted( - (a, b) => a.createdAt.localeCompare(b.createdAt) || a.id.localeCompare(b.id), - ); + const ordered = [...activities].toSorted(compareWorkflowActivityOrder); const runs = new Map(); const ensureRun = (taskId: string, activity: OrchestrationThreadActivity): MutableWorkflowRun => { diff --git a/packages/client-runtime/src/state/workflow.ts b/packages/client-runtime/src/state/workflow.ts index cf169623e57..4c679919fda 100644 --- a/packages/client-runtime/src/state/workflow.ts +++ b/packages/client-runtime/src/state/workflow.ts @@ -25,6 +25,10 @@ export function createWorkflowEnvironmentAtoms( label: "environment-data:workflow:read-journal", tag: WS_METHODS.workflowReadJournal, staleTimeMs: 5_000, + // The journal grows while a run is live and the query only mounts + // while the Logs tab is open — poll so new results appear without a + // manual refresh. + refreshIntervalMs: 4_000, }), readAgentTranscript: createEnvironmentRpcCommand(runtime, { label: "environment-data:workflow:read-agent-transcript", From cb5a39a68cd7aa1de19cb5790d8f179412b519c7 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 05:17:33 -0700 Subject: [PATCH 05/11] Restore cause on WorkflowInspectionError per service error conventions Real filesystem failures (read-failed / not-found wraps) forward their underlying error as an optional cause, matching the GitCommandError convention; pure validation reasons stay cause-less. The paths a cause can carry are ones the client supplied in the request, so this does not reintroduce the earlier information-exposure concern. Co-Authored-By: Claude Fable 5 --- apps/server/src/workflow/WorkflowInspectionService.ts | 5 +++++ packages/contracts/src/workflow.ts | 3 +++ 2 files changed, 8 insertions(+) diff --git a/apps/server/src/workflow/WorkflowInspectionService.ts b/apps/server/src/workflow/WorkflowInspectionService.ts index bad1f2c39fb..06ef488523c 100644 --- a/apps/server/src/workflow/WorkflowInspectionService.ts +++ b/apps/server/src/workflow/WorkflowInspectionService.ts @@ -116,6 +116,7 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isEnoent(cause) ? "not-found" : "read-failed", detail: "Failed to resolve the workflow projects root.", + cause, }), }); @@ -126,6 +127,7 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isEnoent(cause) ? "not-found" : "read-failed", detail: "Failed to resolve the requested path.", + cause, }), }); @@ -160,6 +162,7 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the workflow script.", + cause, }), ), ); @@ -189,6 +192,7 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the workflow journal.", + cause, }), ), ); @@ -275,6 +279,7 @@ export const make = (options?: { readonly projectsRoot?: string }) => operation, reason: isNotFoundPlatformError(cause) ? "not-found" : "read-failed", detail: "Failed to read the agent transcript.", + cause, }), ), ); diff --git a/packages/contracts/src/workflow.ts b/packages/contracts/src/workflow.ts index 07bdd89545e..63a59bc2e70 100644 --- a/packages/contracts/src/workflow.ts +++ b/packages/contracts/src/workflow.ts @@ -96,6 +96,9 @@ export class WorkflowInspectionError extends Schema.TaggedErrorClass Date: Thu, 2 Jul 2026 05:29:04 -0700 Subject: [PATCH 06/11] Make workflow completion order-robust against inverted sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A task.completed for a known workflow task now creates the run when its task.started has not been applied yet — adopted runs can carry reset provider sequences across CLI restarts, letting the completion sort first. The later-applied started only fills metadata and can never resurrect a terminal status, so derivation correctness no longer depends on the comparator at all. Co-Authored-By: Claude Fable 5 --- apps/web/src/workflow-logic.test.ts | 28 +++++++++++++++++++++++++++- apps/web/src/workflow-logic.ts | 10 ++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/apps/web/src/workflow-logic.test.ts b/apps/web/src/workflow-logic.test.ts index 006677293a9..ee19347e4f7 100644 --- a/apps/web/src/workflow-logic.test.ts +++ b/apps/web/src/workflow-logic.test.ts @@ -1,4 +1,9 @@ -import { EventId, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts"; +import { + NonNegativeInt, + EventId, + type OrchestrationThreadActivity, + TurnId, +} from "@t3tools/contracts"; import { describe, expect, it } from "vite-plus/test"; import { @@ -20,6 +25,7 @@ function buildActivity(overrides: { tone?: OrchestrationThreadActivity["tone"]; payload?: Record; turnId?: string; + sequence?: number; }): OrchestrationThreadActivity { return { id: EventId.make(overrides.id ?? `activity-${nextActivityId++}`), @@ -30,6 +36,9 @@ function buildActivity(overrides: { tone: overrides.tone ?? "info", payload: overrides.payload ?? {}, turnId: overrides.turnId ? TurnId.make(overrides.turnId) : null, + ...(overrides.sequence !== undefined + ? { sequence: NonNegativeInt.make(overrides.sequence) } + : {}), }; } @@ -276,6 +285,23 @@ describe("deriveWorkflowRuns", () => { expect(run?.agentCounts).toEqual({ total: 3, queued: 0, running: 0, done: 1, error: 2 }); }); + it("applies a completion even when it sorts before its task.started", () => { + const completed = buildActivity({ + id: "completed-task-1", + kind: "task.completed", + createdAt: "2026-02-23T00:00:00.500Z", + sequence: 1, + payload: { taskId: "task-1", status: "completed", detail: "done" }, + }); + // Same-timestamp + inverted sequence (adopted runs can reset provider + // sequence): the started activity sorts after the completion. + const started = { ...workflowStartedActivity("task-1"), sequence: NonNegativeInt.make(5) }; + const runs = deriveWorkflowRuns([completed, started]); + expect(runs).toHaveLength(1); + expect(runs[0]?.status).toBe("completed"); + expect(runs[0]?.name).toBe("spec"); + }); + it("keeps a running run untouched while the session is active", () => { const runs = deriveWorkflowRuns( [ diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts index 253db4edd31..c710a6977f9 100644 --- a/apps/web/src/workflow-logic.ts +++ b/apps/web/src/workflow-logic.ts @@ -335,6 +335,7 @@ export function deriveWorkflowRuns( options?: { readonly sessionActive?: boolean | undefined }, ): WorkflowRun[] { const ordered = [...activities].toSorted(compareWorkflowActivityOrder); + const workflowTaskIds = collectWorkflowTaskIds(activities); const runs = new Map(); const ensureRun = (taskId: string, activity: OrchestrationThreadActivity): MutableWorkflowRun => { @@ -439,10 +440,15 @@ export function deriveWorkflowRuns( break; } case "task.completed": { - const run = runs.get(taskId); - if (!run) { + // Order-robust terminal handling: a completion for a known workflow + // task creates the run if its task.started has not been applied yet + // (adopted runs can carry inverted provider sequences across CLI + // restarts); the later-applied started only fills metadata and can + // never resurrect a terminal status. + if (!runs.has(taskId) && !workflowTaskIds.has(taskId)) { break; } + const run = ensureRun(taskId, activity); run.revision += 1; run.updatedAt = activity.createdAt; run.status = From 93cd36a9a0f94af0c79acaf8cdb5d9010bff7e9c Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 12:30:46 -0700 Subject: [PATCH 07/11] Show per-agent stats and clean up the transcript view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Forward the SDK snapshot's per-agent tokens / toolCalls / durationMs through the contract, adapter normalization, and view model; agent rows now show model plus "94.2k tok · 47 tools · 7m 03s" (duration once the agent settles), mirroring the Claude Code TUI - Rewrite the transcript renderer: only assistant text and tool calls render (tool rows as "→ Name input-preview"); user turns, tool results, attachments, and thinking are skipped instead of printing raw type names ("user attachment attachment") - Bound transcript memory: retain a 600-line tail with an "earlier activity trimmed" notice so million-token agent threads cannot grow client memory unbounded Co-Authored-By: Claude Fable 5 --- .../src/provider/Layers/ClaudeAdapter.ts | 9 ++ .../src/components/workflow/WorkflowPanel.tsx | 143 ++++++++++++------ .../src/components/workflow/workflowUi.tsx | 47 +++++- apps/web/src/workflow-logic.test.ts | 20 +++ apps/web/src/workflow-logic.ts | 6 + packages/contracts/src/workflow.ts | 5 + 6 files changed, 175 insertions(+), 55 deletions(-) diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index d1eb5bba0c8..93aaae9bce5 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -813,6 +813,15 @@ function normalizeWorkflowAgentEntry( ...(workflowClippedString(entry.error, MAX_WORKFLOW_PREVIEW_CHARS) !== undefined ? { error: workflowClippedString(entry.error, MAX_WORKFLOW_PREVIEW_CHARS) } : {}), + ...(workflowFiniteNumber(entry.tokens) !== undefined + ? { tokens: workflowFiniteNumber(entry.tokens) } + : {}), + ...(workflowFiniteNumber(entry.toolCalls) !== undefined + ? { toolCalls: workflowFiniteNumber(entry.toolCalls) } + : {}), + ...(workflowFiniteNumber(entry.durationMs) !== undefined + ? { durationMs: workflowFiniteNumber(entry.durationMs) } + : {}), }; } diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx index b8ddca7bb00..a80ac4a8f01 100644 --- a/apps/web/src/components/workflow/WorkflowPanel.tsx +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -313,55 +313,86 @@ function ExpandableAgentRow({ // Transcript view (cursor-paged, polled while running) // --------------------------------------------------------------------------- -function extractAssistantText(parsed: unknown): string | null { +interface TranscriptEntry { + kind: "text" | "tool"; + text: string; +} + +const TRANSCRIPT_MAX_RETAINED_LINES = 600; +const TRANSCRIPT_TEXT_MAX_CHARS = 600; +const TRANSCRIPT_TOOL_PREVIEW_CHARS = 120; + +function clipTranscriptText(text: string, limit: number): string { + const trimmed = text.trim(); + return trimmed.length > limit ? `${trimmed.slice(0, limit)}\u2026` : trimmed; +} + +function toolInputPreview(input: unknown): string | undefined { + if (typeof input !== "object" || input === null) { + return undefined; + } + const record = input as Record; + // The most informative single field per common tool, else the first string. + const preferred = record.command ?? record.file_path ?? record.pattern ?? record.prompt; + const value = + typeof preferred === "string" + ? preferred + : Object.values(record).find((entry): entry is string => typeof entry === "string"); + return value !== undefined ? clipTranscriptText(value, TRANSCRIPT_TOOL_PREVIEW_CHARS) : undefined; +} + +/** + * Distill one transcript JSONL line into displayable entries. Only assistant + * text and tool calls render; user turns, tool results, attachments, thinking, + * and harness metadata are skipped — they dominate line counts on long runs + * and read as noise ("user", "attachment", ...) when printed raw. + */ +function parseTranscriptEntries(raw: string): TranscriptEntry[] { + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return []; + } if (typeof parsed !== "object" || parsed === null) { - return null; + return []; } const record = parsed as Record; + if (record.type !== "assistant") { + return []; + } const message = typeof record.message === "object" && record.message !== null ? (record.message as Record) : record; - const role = record.type ?? message.role; - if (role !== "assistant") { - return null; - } const content = message.content; if (typeof content === "string") { - return content; + const text = clipTranscriptText(content, TRANSCRIPT_TEXT_MAX_CHARS); + return text.length > 0 ? [{ kind: "text", text }] : []; } - if (Array.isArray(content)) { - const parts: string[] = []; - for (const block of content) { - if (typeof block === "object" && block !== null) { - const record2 = block as Record; - if (record2.type === "text" && typeof record2.text === "string") { - parts.push(record2.text); - } - } - } - if (parts.length > 0) { - return parts.join("\n"); - } + if (!Array.isArray(content)) { + return []; } - return null; -} - -function renderTranscriptLine(raw: string): { text: string; dim: boolean } { - try { - const parsed: unknown = JSON.parse(raw); - const text = extractAssistantText(parsed); - if (text !== null && text.trim().length > 0) { - return { text, dim: false }; + const entries: TranscriptEntry[] = []; + for (const block of content) { + if (typeof block !== "object" || block === null) { + continue; + } + const blockRecord = block as Record; + if (blockRecord.type === "text" && typeof blockRecord.text === "string") { + const text = clipTranscriptText(blockRecord.text, TRANSCRIPT_TEXT_MAX_CHARS); + if (text.length > 0) { + entries.push({ kind: "text", text }); + } + } else if (blockRecord.type === "tool_use" && typeof blockRecord.name === "string") { + const preview = toolInputPreview(blockRecord.input); + entries.push({ + kind: "tool", + text: preview !== undefined ? `${blockRecord.name} ${preview}` : blockRecord.name, + }); } - const type = - typeof parsed === "object" && parsed !== null && "type" in parsed - ? String((parsed as { type: unknown }).type) - : "event"; - return { text: type, dim: true }; - } catch { - return { text: raw, dim: true }; } + return entries; } function AgentTranscriptView({ @@ -380,8 +411,10 @@ function AgentTranscriptView({ "workflow read transcript", ); const [lines, setLines] = useState([]); + const [trimmed, setTrimmed] = useState(false); const [failed, setFailed] = useState(false); const [loading, setLoading] = useState(false); + const transcriptEntries = useMemo(() => lines.flatMap(parseTranscriptEntries), [lines]); const nextLineRef = useRef(0); const completeRef = useRef(false); const loadingRef = useRef(false); @@ -408,7 +441,17 @@ function AgentTranscriptView({ nextLineRef.current = result.value.nextLine; completeRef.current = result.value.complete; if (result.value.lines.length > 0) { - setLines((prev) => [...prev, ...result.value.lines]); + // Long-running agents can produce transcripts far larger than a view + // needs — retain a bounded tail so memory stays flat on million-token + // threads. + setLines((prev) => { + const merged = [...prev, ...result.value.lines]; + if (merged.length > TRANSCRIPT_MAX_RETAINED_LINES) { + setTrimmed(true); + return merged.slice(-TRANSCRIPT_MAX_RETAINED_LINES); + } + return merged; + }); } }, [agentId, environmentId, runTranscript, transcriptDir]); @@ -449,31 +492,35 @@ function AgentTranscriptView({ className="mt-1 ml-4 max-h-72 overflow-y-auto rounded-md border border-border/60 bg-muted/30 p-1.5 font-mono text-[11px] leading-4" onClick={stopPropagation} > - {lines.length === 0 ? ( + {transcriptEntries.length === 0 ? ( failed ? (

Failed to load transcript.

) : loading ? (

Loading transcript…

) : ( -

No transcript output.

+

No assistant output yet.

) ) : ( - lines.map((line, index) => { - const parsed = renderTranscriptLine(line); - return ( + <> + {trimmed && ( +

+ Earlier activity trimmed — showing the latest entries. +

+ )} + {transcriptEntries.map((entry, index) => (
- {parsed.text} + {entry.kind === "tool" ? `→ ${entry.text}` : entry.text}
- ); - }) + ))} + )} {failed && lines.length > 0 && (

Failed to load more transcript.

diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx index 836bdb000d6..d47261a6a51 100644 --- a/apps/web/src/components/workflow/workflowUi.tsx +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -1,12 +1,14 @@ import type { ReactElement, ReactNode } from "react"; import { cn } from "~/lib/utils"; -import type { - WorkflowAgentStatus, - WorkflowRun, - WorkflowRunAgent, - WorkflowRunPhase, - WorkflowRunStatus, +import { + formatWorkflowDuration, + formatWorkflowTokens, + type WorkflowAgentStatus, + type WorkflowRun, + type WorkflowRunAgent, + type WorkflowRunPhase, + type WorkflowRunStatus, } from "~/workflow-logic"; // --------------------------------------------------------------------------- @@ -124,7 +126,28 @@ function AgentMetaBadges({ agent }: { agent: WorkflowRunAgent }): ReactElement | ); } -/** The shared inner content of an agent row: dot, label, badges, dimmed preview. */ +/** "94.2k tok · 47 tools · 7m 03s" — cumulative per-agent stats from the + * SDK snapshot; duration is shown once the agent settles. */ +export function agentStatsLabel(agent: WorkflowRunAgent): string | undefined { + const parts: string[] = []; + if (agent.tokens !== undefined && agent.tokens > 0) { + parts.push(`${formatWorkflowTokens(agent.tokens)} tok`); + } + if (agent.toolCalls !== undefined && agent.toolCalls > 0) { + parts.push(`${agent.toolCalls} ${agent.toolCalls === 1 ? "tool" : "tools"}`); + } + if ( + agent.durationMs !== undefined && + agent.durationMs > 0 && + (agent.status === "done" || agent.status === "error") + ) { + parts.push(formatWorkflowDuration(agent.durationMs)); + } + return parts.length > 0 ? parts.join(" · ") : undefined; +} + +/** The shared inner content of an agent row: dot, label, badges, dimmed + * preview, right-aligned model + stats. */ export function AgentRowContent({ agent, leading, @@ -133,6 +156,7 @@ export function AgentRowContent({ leading?: ReactNode; }): ReactElement { const preview = agentPreviewText(agent); + const stats = agentStatsLabel(agent); return (
{leading} @@ -149,6 +173,15 @@ export function AgentRowContent({ {preview !== undefined && ( {preview} )} + {preview === undefined && } + {agent.model !== undefined && ( + + {agent.model} + + )} + {stats !== undefined && ( + {stats} + )}
); } diff --git a/apps/web/src/workflow-logic.test.ts b/apps/web/src/workflow-logic.test.ts index ee19347e4f7..df44f91c7ce 100644 --- a/apps/web/src/workflow-logic.test.ts +++ b/apps/web/src/workflow-logic.test.ts @@ -234,6 +234,26 @@ describe("deriveWorkflowRuns", () => { expect(runs[0]?.agentCounts.done).toBe(1); }); + it("parses per-agent tokens, tool calls, and duration from the snapshot", () => { + const runs = deriveWorkflowRuns([ + workflowStartedActivity("task-1"), + workflowUpdatedActivity("task-1", [ + { + type: "workflow_agent", + index: 0, + state: "done", + tokens: 94_200, + toolCalls: 47, + durationMs: 423_000, + }, + ]), + ]); + const agent = runs[0]?.phases.flatMap((phase) => phase.agents)[0]; + expect(agent?.tokens).toBe(94_200); + expect(agent?.toolCalls).toBe(47); + expect(agent?.durationMs).toBe(423_000); + }); + it("parses snake_case usage from the updated snapshot", () => { const runs = deriveWorkflowRuns([ workflowStartedActivity("task-1"), diff --git a/apps/web/src/workflow-logic.ts b/apps/web/src/workflow-logic.ts index c710a6977f9..d51561ec38e 100644 --- a/apps/web/src/workflow-logic.ts +++ b/apps/web/src/workflow-logic.ts @@ -38,6 +38,9 @@ export interface WorkflowRunAgent { promptPreview?: string | undefined; resultPreview?: string | undefined; error?: string | undefined; + tokens?: number | undefined; + toolCalls?: number | undefined; + durationMs?: number | undefined; } export interface WorkflowRunPhase { @@ -161,6 +164,9 @@ function parseAgentEntry(entry: Record): WorkflowRunAgent | und ? { resultPreview: asString(entry.resultPreview) } : {}), ...(asString(entry.error) !== undefined ? { error: asString(entry.error) } : {}), + ...(asNumber(entry.tokens) !== undefined ? { tokens: asNumber(entry.tokens) } : {}), + ...(asNumber(entry.toolCalls) !== undefined ? { toolCalls: asNumber(entry.toolCalls) } : {}), + ...(asNumber(entry.durationMs) !== undefined ? { durationMs: asNumber(entry.durationMs) } : {}), }; } diff --git a/packages/contracts/src/workflow.ts b/packages/contracts/src/workflow.ts index 63a59bc2e70..d58f13fcce9 100644 --- a/packages/contracts/src/workflow.ts +++ b/packages/contracts/src/workflow.ts @@ -47,6 +47,11 @@ export const WorkflowAgentProgressEntry = Schema.Struct({ promptPreview: Schema.optional(Schema.String), resultPreview: Schema.optional(Schema.String), error: Schema.optional(Schema.String), + /** Cumulative output tokens, tool calls, and wall-clock duration for this + * agent, as reported by the SDK snapshot. */ + tokens: Schema.optional(Schema.Number), + toolCalls: Schema.optional(Schema.Number), + durationMs: Schema.optional(Schema.Number), }); export type WorkflowAgentProgressEntry = typeof WorkflowAgentProgressEntry.Type; From bc9ab83e29919fdd0f2665aa635b322b40a56142 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 12:42:15 -0700 Subject: [PATCH 08/11] Show live per-agent stats while running; drop inline row previews MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SDK emits tokens/toolCalls on every progress tick (verified against the bundled runtime), so agent rows now show them live; elapsed time for a running agent derives from lastProgressAt - startedAt, which advances tick-by-tick without a client timer. Settled agents keep the reported total duration. Agent rows no longer render the routine inline preview text (result and last-tool snippets) — the expandable transcript owns that detail. Error text stays inline since it explains a red row at a glance. Co-Authored-By: Claude Fable 5 --- .../src/components/workflow/workflowUi.tsx | 46 +++++++++---------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx index d47261a6a51..5fcb25b77b6 100644 --- a/apps/web/src/components/workflow/workflowUi.tsx +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -88,19 +88,6 @@ export function agentDisplayLabel(agent: WorkflowRunAgent): string { return agent.label ?? agent.agentType ?? `agent ${agent.index}`; } -export function agentPreviewText(agent: WorkflowRunAgent): string | undefined { - switch (agent.status) { - case "error": - return agent.error ?? agent.resultPreview; - case "done": - return agent.resultPreview; - case "running": - return agent.lastToolSummary ?? agent.promptPreview; - default: - return agent.promptPreview; - } -} - function AgentMetaBadges({ agent }: { agent: WorkflowRunAgent }): ReactElement | null { const badges: string[] = []; if (agent.cached) { @@ -127,7 +114,10 @@ function AgentMetaBadges({ agent }: { agent: WorkflowRunAgent }): ReactElement | } /** "94.2k tok · 47 tools · 7m 03s" — cumulative per-agent stats from the - * SDK snapshot; duration is shown once the agent settles. */ + * SDK snapshot. Tokens and tool counts update on every progress tick; the + * duration is the reported total once the agent settles, and the elapsed + * time between start and the latest tick while it runs (tick-driven, so no + * client timer is needed). */ export function agentStatsLabel(agent: WorkflowRunAgent): string | undefined { const parts: string[] = []; if (agent.tokens !== undefined && agent.tokens > 0) { @@ -136,18 +126,23 @@ export function agentStatsLabel(agent: WorkflowRunAgent): string | undefined { if (agent.toolCalls !== undefined && agent.toolCalls > 0) { parts.push(`${agent.toolCalls} ${agent.toolCalls === 1 ? "tool" : "tools"}`); } - if ( - agent.durationMs !== undefined && - agent.durationMs > 0 && - (agent.status === "done" || agent.status === "error") - ) { + const settled = agent.status === "done" || agent.status === "error"; + if (settled && agent.durationMs !== undefined && agent.durationMs > 0) { parts.push(formatWorkflowDuration(agent.durationMs)); + } else if ( + !settled && + agent.startedAt !== undefined && + agent.lastProgressAt !== undefined && + agent.lastProgressAt > agent.startedAt + ) { + parts.push(formatWorkflowDuration(agent.lastProgressAt - agent.startedAt)); } return parts.length > 0 ? parts.join(" · ") : undefined; } -/** The shared inner content of an agent row: dot, label, badges, dimmed - * preview, right-aligned model + stats. */ +/** The shared inner content of an agent row: dot, label, badges, + * right-aligned model + stats. Error text is the only inline content — + * routine previews live in the expandable transcript, not the row. */ export function AgentRowContent({ agent, leading, @@ -155,8 +150,8 @@ export function AgentRowContent({ agent: WorkflowRunAgent; leading?: ReactNode; }): ReactElement { - const preview = agentPreviewText(agent); const stats = agentStatsLabel(agent); + const errorText = agent.status === "error" ? agent.error : undefined; return (
{leading} @@ -170,10 +165,11 @@ export function AgentRowContent({ {agentDisplayLabel(agent)} - {preview !== undefined && ( - {preview} + {errorText !== undefined ? ( + {errorText} + ) : ( + )} - {preview === undefined && } {agent.model !== undefined && ( {agent.model} From a8ee943c92b0043d2b9d05bc80cc1a3021bb8e70 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 12:47:23 -0700 Subject: [PATCH 09/11] Two-line agent rows: wrapping labels, meta line beneath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long agent labels were shrink-0 and pushed the model/stats readout off the clipped right edge. Rows now stack: the label owns the first line and wraps freely, and model · stats · badges · error text sit on a muted meta line under it — nothing competes for horizontal space, so nothing clips regardless of label length. The chevron and status dot center against the first text line via fixed line-height boxes. Co-Authored-By: Claude Fable 5 --- .../src/components/workflow/workflowUi.tsx | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx index 5fcb25b77b6..92ebf9a764a 100644 --- a/apps/web/src/components/workflow/workflowUi.tsx +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -140,9 +140,12 @@ export function agentStatsLabel(agent: WorkflowRunAgent): string | undefined { return parts.length > 0 ? parts.join(" · ") : undefined; } -/** The shared inner content of an agent row: dot, label, badges, - * right-aligned model + stats. Error text is the only inline content — - * routine previews live in the expandable transcript, not the row. */ +/** + * The shared inner content of an agent row. Two-line layout: the label owns + * the first line and wraps freely (long workflow labels are common), and + * model / stats / badges / error text sit on a muted meta line beneath — + * nothing competes for horizontal space, so nothing clips. + */ export function AgentRowContent({ agent, leading, @@ -152,32 +155,34 @@ export function AgentRowContent({ }): ReactElement { const stats = agentStatsLabel(agent); const errorText = agent.status === "error" ? agent.error : undefined; + const metaLabel = [agent.model, stats].filter((part) => part !== undefined).join(" · "); + const hasBadges = agent.cached === true || (agent.attempt !== undefined && agent.attempt > 1); return ( -
- {leading} - - - {agentDisplayLabel(agent)} +
+ {/* Fixed line-height boxes center the affordances on the first text line. */} + {leading !== undefined && {leading}} + + - - {errorText !== undefined ? ( - {errorText} - ) : ( - - )} - {agent.model !== undefined && ( - - {agent.model} - - )} - {stats !== undefined && ( - {stats} - )} +
+
+ {agentDisplayLabel(agent)} +
+ {(metaLabel.length > 0 || hasBadges || errorText !== undefined) && ( +
+ {metaLabel.length > 0 && {metaLabel}} + + {errorText !== undefined && ( + {errorText} + )} +
+ )} +
); } From 3adf7567f7fdd5088e84fbea93c619d4e337910e Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 13:09:44 -0700 Subject: [PATCH 10/11] Address bugbot follow-ups: paging empty-state, error resultPreview MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - WorkflowPanel: track a render-visible caught-up flag so the transcript shows "Loading transcript…" until the first read reaches EOF — an absence of parsed entries mid-drain means still paging, not that the agent produced no output - workflowUi: error rows fall back to resultPreview when the snapshot carries no error field, so red rows always explain the failure inline Co-Authored-By: Claude Fable 5 --- apps/web/src/components/workflow/WorkflowPanel.tsx | 9 ++++++++- apps/web/src/components/workflow/workflowUi.tsx | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/apps/web/src/components/workflow/WorkflowPanel.tsx b/apps/web/src/components/workflow/WorkflowPanel.tsx index a80ac4a8f01..ad4daac8f4b 100644 --- a/apps/web/src/components/workflow/WorkflowPanel.tsx +++ b/apps/web/src/components/workflow/WorkflowPanel.tsx @@ -414,6 +414,10 @@ function AgentTranscriptView({ const [trimmed, setTrimmed] = useState(false); const [failed, setFailed] = useState(false); const [loading, setLoading] = useState(false); + // Render-visible mirror of completeRef: "have we caught up to EOF at least + // once" — before that, an absence of parsed entries just means we are + // still paging, not that the agent produced no output. + const [caughtUp, setCaughtUp] = useState(false); const transcriptEntries = useMemo(() => lines.flatMap(parseTranscriptEntries), [lines]); const nextLineRef = useRef(0); const completeRef = useRef(false); @@ -440,6 +444,9 @@ function AgentTranscriptView({ setFailed(false); nextLineRef.current = result.value.nextLine; completeRef.current = result.value.complete; + if (result.value.complete) { + setCaughtUp(true); + } if (result.value.lines.length > 0) { // Long-running agents can produce transcripts far larger than a view // needs — retain a bounded tail so memory stays flat on million-token @@ -495,7 +502,7 @@ function AgentTranscriptView({ {transcriptEntries.length === 0 ? ( failed ? (

Failed to load transcript.

- ) : loading ? ( + ) : loading || !caughtUp ? (

Loading transcript…

) : (

No assistant output yet.

diff --git a/apps/web/src/components/workflow/workflowUi.tsx b/apps/web/src/components/workflow/workflowUi.tsx index 92ebf9a764a..f01b1593996 100644 --- a/apps/web/src/components/workflow/workflowUi.tsx +++ b/apps/web/src/components/workflow/workflowUi.tsx @@ -154,7 +154,9 @@ export function AgentRowContent({ leading?: ReactNode; }): ReactElement { const stats = agentStatsLabel(agent); - const errorText = agent.status === "error" ? agent.error : undefined; + // Failures often surface only in resultPreview (e.g. a thrown value the + // runner stringified) — fall back to it so red rows always explain why. + const errorText = agent.status === "error" ? (agent.error ?? agent.resultPreview) : undefined; const metaLabel = [agent.model, stats].filter((part) => part !== undefined).join(" · "); const hasBadges = agent.cached === true || (agent.attempt !== undefined && agent.attempt > 1); return ( From 0f16c5e6d90781d8073b0c0769ae7569f89066f1 Mon Sep 17 00:00:00 2001 From: Theo Browne Date: Thu, 2 Jul 2026 13:18:24 -0700 Subject: [PATCH 11/11] Deflake CursorAdapter interrupt test cross-snapshot assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test asserted the session/cancel request against the log snapshot returned by the *second* waitForJsonLogMatch, but only the first wait guaranteed that line — the two log appends are independent, so under CI load the second snapshot can miss (or tear) the cancel line and the assertion fails spuriously. Each assertion now checks the snapshot its own wait resolved on. This flake predates the branch (zero Cursor files are touched here) but kept failing this PR's Test job. Co-Authored-By: Claude Fable 5 --- apps/server/src/provider/Layers/CursorAdapter.test.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/server/src/provider/Layers/CursorAdapter.test.ts b/apps/server/src/provider/Layers/CursorAdapter.test.ts index 89e9c56eb8a..6b4e39dfc3a 100644 --- a/apps/server/src/provider/Layers/CursorAdapter.test.ts +++ b/apps/server/src/provider/Layers/CursorAdapter.test.ts @@ -1030,9 +1030,15 @@ cursorAdapterTestLayer("CursorAdapterLive", (it) => { entry.result.outcome !== null && "outcome" in entry.result.outcome && entry.result.outcome.outcome === "cancelled"; - yield* waitForJsonLogMatch(requestLogPath, (entry) => entry.method === "session/cancel"); + // Assert each condition against the snapshot its own wait resolved on: + // the two log appends are independent, so a snapshot that satisfies one + // predicate can miss (or tear) the other line under load. + const cancelRequests = yield* waitForJsonLogMatch( + requestLogPath, + (entry) => entry.method === "session/cancel", + ); + assert.isTrue(cancelRequests.some((entry) => entry.method === "session/cancel")); const requests = yield* waitForJsonLogMatch(requestLogPath, isCancelledApprovalResponse); - assert.isTrue(requests.some((entry) => entry.method === "session/cancel")); assert.isTrue(requests.some(isCancelledApprovalResponse)); yield* adapter.stopSession(threadId);