Skip to content

Commit fd33210

Browse files
committed
Add provider transport debug diagnostics
1 parent 0eef912 commit fd33210

1 file changed

Lines changed: 153 additions & 4 deletions

File tree

apps/web/src/components/ChatView.tsx

Lines changed: 153 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ const IMAGE_ONLY_BOOTSTRAP_PROMPT =
180180
"[User attached one or more images without additional text. Respond using the conversation context and the attached image(s).]";
181181
const EMPTY_ACTIVITIES: OrchestrationThreadActivity[] = [];
182182
const EMPTY_PROPOSED_PLANS: Thread["proposedPlans"] = [];
183-
const DEBUG_SNAPSHOT_VERSION = 4;
183+
const DEBUG_SNAPSHOT_VERSION = 5;
184184
const DEBUG_TEXT_PREVIEW_LIMIT = 240;
185185
const DEBUG_JSON_PREVIEW_LIMIT = 2_000;
186186
const DEBUG_RECENT_MESSAGE_LIMIT = 20;
@@ -191,12 +191,29 @@ const DEBUG_THREAD_DETAIL_MESSAGE_LIMIT = 2_000;
191191
const DEBUG_THREAD_DETAIL_ACTIVITY_LIMIT = 500;
192192
const DEBUG_LARGE_THREAD_TEXT_CHARS = 1_000_000;
193193
const DEBUG_LARGE_ACTIVITY_PAYLOAD_CHARS = 1_000_000;
194+
const DEBUG_SECRET_REDACTIONS: ReadonlyArray<readonly [RegExp, string]> = [
195+
[/\bnpm_[A-Za-z0-9]{8,}\b/g, "npm_[redacted]"],
196+
[/\bsk-[A-Za-z0-9_-]{16,}\b/g, "sk-[redacted]"],
197+
[/\bgithub_pat_[A-Za-z0-9_]{20,}\b/g, "github_pat_[redacted]"],
198+
[/\bgh[pousr]_[A-Za-z0-9_]{16,}\b/g, "gh[redacted]"],
199+
[/\bxox[baprs]-[A-Za-z0-9-]{16,}\b/g, "xox[redacted]"],
200+
[/\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/g, "Bearer [redacted]"],
201+
];
202+
203+
function redactDebugSecrets(value: string): string {
204+
let redacted = value;
205+
for (const [pattern, replacement] of DEBUG_SECRET_REDACTIONS) {
206+
redacted = redacted.replace(pattern, replacement);
207+
}
208+
return redacted;
209+
}
194210

195211
function truncateDebugText(value: string, limit = DEBUG_TEXT_PREVIEW_LIMIT): string {
196-
if (value.length <= limit) {
197-
return value;
212+
const redacted = redactDebugSecrets(value);
213+
if (redacted.length <= limit) {
214+
return redacted;
198215
}
199-
return `${value.slice(0, Math.max(0, limit - 1))}…`;
216+
return `${redacted.slice(0, Math.max(0, limit - 1))}…`;
200217
}
201218

202219
function stringifyDebugPreview(value: unknown, limit = DEBUG_JSON_PREVIEW_LIMIT): string {
@@ -224,6 +241,14 @@ function readDebugNumber(value: unknown): number | null {
224241
return typeof value === "number" && Number.isFinite(value) ? value : null;
225242
}
226243

244+
function readDebugBoolean(value: unknown): boolean | null {
245+
return typeof value === "boolean" ? value : null;
246+
}
247+
248+
function readDebugString(value: unknown): string | null {
249+
return typeof value === "string" && value.length > 0 ? value : null;
250+
}
251+
227252
function summarizeDebugContextWindowActivity(activity: OrchestrationThreadActivity | null) {
228253
if (activity === null) {
229254
return null;
@@ -324,6 +349,118 @@ function summarizeDebugActivity(activity: OrchestrationThreadActivity) {
324349
};
325350
}
326351

352+
function parseDebugRetryProgress(message: string | null): {
353+
readonly retryAttempt: number | null;
354+
readonly retryLimit: number | null;
355+
} {
356+
if (message === null) {
357+
return { retryAttempt: null, retryLimit: null };
358+
}
359+
const match = /(?:^|\b)Reconnecting\.\.\.\s+(\d+)\/(\d+)(?:\b|$)/.exec(message);
360+
if (!match) {
361+
return { retryAttempt: null, retryLimit: null };
362+
}
363+
const retryAttempt = Number.parseInt(match[1] ?? "", 10);
364+
const retryLimit = Number.parseInt(match[2] ?? "", 10);
365+
return {
366+
retryAttempt: Number.isFinite(retryAttempt) ? retryAttempt : null,
367+
retryLimit: Number.isFinite(retryLimit) ? retryLimit : null,
368+
};
369+
}
370+
371+
function summarizeDebugProviderTransportActivity(activity: OrchestrationThreadActivity) {
372+
if (activity.kind !== "runtime.warning" && activity.kind !== "runtime.error") {
373+
return null;
374+
}
375+
376+
const payload = readDebugRecord(activity.payload);
377+
const detail = readDebugRecord(payload?.detail);
378+
const error = readDebugRecord(detail?.error);
379+
const codexErrorInfo = readDebugRecord(error?.codexErrorInfo);
380+
const responseStreamDisconnected = readDebugRecord(codexErrorInfo?.responseStreamDisconnected);
381+
const message =
382+
readDebugString(payload?.message) ??
383+
readDebugString(error?.message) ??
384+
readDebugString(activity.summary);
385+
const additionalDetails = readDebugString(error?.additionalDetails);
386+
const retryProgress = parseDebugRetryProgress(message);
387+
const retrying = readDebugBoolean(payload?.retrying) ?? false;
388+
const willRetry = readDebugBoolean(detail?.willRetry);
389+
const isResponseStreamDisconnected =
390+
responseStreamDisconnected !== null ||
391+
additionalDetails?.includes("stream disconnected before completion") === true;
392+
const isWebsocketTransportIssue =
393+
additionalDetails?.toLowerCase().includes("websocket") === true ||
394+
message?.toLowerCase().includes("websocket") === true;
395+
const isRetryEvent =
396+
retrying ||
397+
willRetry === true ||
398+
retryProgress.retryAttempt !== null ||
399+
message?.startsWith("Reconnecting...") === true;
400+
401+
if (!isRetryEvent && !isResponseStreamDisconnected && !isWebsocketTransportIssue) {
402+
return null;
403+
}
404+
405+
return {
406+
id: activity.id,
407+
kind: activity.kind,
408+
createdAt: activity.createdAt,
409+
turnId: activity.turnId,
410+
message,
411+
retrying,
412+
willRetry,
413+
retryAttempt: retryProgress.retryAttempt,
414+
retryLimit: retryProgress.retryLimit,
415+
atRetryLimit:
416+
retryProgress.retryAttempt !== null &&
417+
retryProgress.retryLimit !== null &&
418+
retryProgress.retryAttempt >= retryProgress.retryLimit,
419+
responseStreamDisconnected: isResponseStreamDisconnected,
420+
httpStatusCode: readDebugNumber(responseStreamDisconnected?.httpStatusCode),
421+
additionalDetails:
422+
additionalDetails === null ? null : truncateDebugText(additionalDetails, 500),
423+
};
424+
}
425+
426+
function summarizeDebugProviderTransport(
427+
activities: readonly OrchestrationThreadActivity[],
428+
nowMs: number,
429+
) {
430+
const events = activities
431+
.map(summarizeDebugProviderTransportActivity)
432+
.filter((event): event is NonNullable<typeof event> => event !== null);
433+
const latest = events.at(-1) ?? null;
434+
const responseStreamDisconnectedCount = events.filter(
435+
(event) => event.responseStreamDisconnected,
436+
).length;
437+
const retryEventCount = events.filter(
438+
(event) =>
439+
event.retrying ||
440+
event.willRetry === true ||
441+
event.retryAttempt !== null ||
442+
event.message?.startsWith("Reconnecting...") === true,
443+
).length;
444+
const retryAttempts = events
445+
.map((event) => event.retryAttempt)
446+
.filter((value): value is number => value !== null);
447+
const retryLimits = events
448+
.map((event) => event.retryLimit)
449+
.filter((value): value is number => value !== null);
450+
451+
return {
452+
eventCount: events.length,
453+
retryEventCount,
454+
responseStreamDisconnectedCount,
455+
maxRetryAttempt: retryAttempts.length > 0 ? Math.max(...retryAttempts) : null,
456+
retryLimit: retryLimits.length > 0 ? Math.max(...retryLimits) : null,
457+
atRetryLimit: events.some((event) => event.atRetryLimit),
458+
latest,
459+
latestEventAgeMs: elapsedDebugMs(nowMs, latest?.createdAt),
460+
events: events.slice(-DEBUG_RECENT_RUNTIME_EVENT_LIMIT),
461+
};
462+
}
463+
327464
function summarizeDebugTurnDiff(diff: TurnDiffSummary) {
328465
return {
329466
turnId: diff.turnId,
@@ -494,6 +631,7 @@ function summarizeDebugThreadPerformance(thread: Thread, nowMs: number) {
494631
(activity) => activity.kind === "runtime.warning" || activity.kind === "runtime.error",
495632
);
496633
const latestRuntimeActivity = latestTurnRuntimeActivities.at(-1) ?? null;
634+
const providerTransport = summarizeDebugProviderTransport(latestTurnRuntimeActivities, nowMs);
497635
const contextWindowActivities = thread.activities.filter(
498636
(activity) => activity.kind === "context-window.updated",
499637
);
@@ -544,6 +682,16 @@ function summarizeDebugThreadPerformance(thread: Thread, nowMs: number) {
544682
? "streaming-message-without-running-latest-turn"
545683
: null,
546684
latestTurnRuntimeActivities.length > 0 ? "latest-turn-runtime-warnings" : null,
685+
providerTransport.retryEventCount > 0 ? "provider-transport-retries" : null,
686+
providerTransport.responseStreamDisconnectedCount > 0
687+
? "provider-response-stream-disconnects"
688+
: null,
689+
providerTransport.atRetryLimit ? "provider-transport-at-retry-limit" : null,
690+
latestTurn?.state === "running" &&
691+
providerTransport.latestEventAgeMs !== null &&
692+
providerTransport.latestEventAgeMs >= 60_000
693+
? "running-turn-stalled-after-provider-transport-warning"
694+
: null,
547695
latestContextInputTokens !== null && latestContextInputTokens >= 100_000
548696
? "large-context-input-token-count"
549697
: null,
@@ -588,6 +736,7 @@ function summarizeDebugThreadPerformance(thread: Thread, nowMs: number) {
588736
latestActivity: latestActivity === null ? null : summarizeDebugActivity(latestActivity),
589737
latestRuntimeActivity:
590738
latestRuntimeActivity === null ? null : summarizeDebugActivity(latestRuntimeActivity),
739+
providerTransport,
591740
latestContextWindowActivity: summarizeDebugContextWindowActivity(latestContextWindowActivity),
592741
pressureFlags,
593742
};

0 commit comments

Comments
 (0)