Skip to content

Commit bc05705

Browse files
committed
feat(supervisor): add COMPUTE_TRACE_OTLP_ENDPOINT override and demote remaining logs
Add optional COMPUTE_TRACE_OTLP_ENDPOINT env var to override the OTLP endpoint for supervisor-emitted spans (defaults to TRIGGER_API_URL/otel). Useful for sending spans to an OTel collector instead of the webapp. Also demotes remaining per-run logs in compute workload manager and workload server to debug/verbose.
1 parent f70be68 commit bc05705

File tree

6 files changed

+99
-91
lines changed

6 files changed

+99
-91
lines changed

apps/supervisor/src/env.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ const Env = z
8484
COMPUTE_GATEWAY_TIMEOUT_MS: z.coerce.number().int().default(30_000),
8585
COMPUTE_SNAPSHOTS_ENABLED: BoolEnv.default(false),
8686
COMPUTE_TRACE_SPANS_ENABLED: BoolEnv.default(true),
87+
COMPUTE_TRACE_OTLP_ENDPOINT: z.string().url().optional(), // Override for span export (derived from TRIGGER_API_URL if unset)
8788
COMPUTE_SNAPSHOT_DELAY_MS: z.coerce.number().int().min(0).max(60_000).default(5_000),
8889

8990
// Kubernetes settings
@@ -168,6 +169,10 @@ const Env = z
168169
path: ["TRIGGER_WORKLOAD_API_DOMAIN"],
169170
});
170171
}
171-
});
172+
})
173+
.transform((data) => ({
174+
...data,
175+
COMPUTE_TRACE_OTLP_ENDPOINT: data.COMPUTE_TRACE_OTLP_ENDPOINT ?? `${data.TRIGGER_API_URL}/otel`,
176+
}));
172177

173178
export const env = Env.parse(stdEnv);

apps/supervisor/src/otlpPayload.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { randomBytes } from "crypto";
2+
3+
export interface OtlpTraceOptions {
4+
traceId: string;
5+
parentSpanId?: string;
6+
spanName: string;
7+
startTimeMs: number;
8+
endTimeMs: number;
9+
resourceAttributes: Record<string, string | number | boolean>;
10+
spanAttributes: Record<string, string | number | boolean>;
11+
}
12+
13+
/** Build an OTLP JSON ExportTraceServiceRequest payload */
14+
export function buildOtlpTracePayload(opts: OtlpTraceOptions) {
15+
const spanId = randomBytes(8).toString("hex");
16+
17+
return {
18+
resourceSpans: [
19+
{
20+
resource: {
21+
attributes: [
22+
{ key: "$trigger", value: { boolValue: true } },
23+
...toOtlpAttributes(opts.resourceAttributes),
24+
],
25+
},
26+
scopeSpans: [
27+
{
28+
scope: { name: "supervisor.compute" },
29+
spans: [
30+
{
31+
traceId: opts.traceId,
32+
spanId,
33+
parentSpanId: opts.parentSpanId,
34+
name: opts.spanName,
35+
kind: 3, // SPAN_KIND_CLIENT
36+
startTimeUnixNano: String(opts.startTimeMs * 1_000_000),
37+
endTimeUnixNano: String(opts.endTimeMs * 1_000_000),
38+
attributes: toOtlpAttributes(opts.spanAttributes),
39+
status: { code: 1 }, // STATUS_CODE_OK
40+
},
41+
],
42+
},
43+
],
44+
},
45+
],
46+
};
47+
}
48+
49+
function toOtlpAttributes(
50+
attrs: Record<string, string | number | boolean>
51+
): Array<{ key: string; value: Record<string, unknown> }> {
52+
return Object.entries(attrs).map(([key, value]) => ({
53+
key,
54+
value: toOtlpValue(value),
55+
}));
56+
}
57+
58+
function toOtlpValue(value: string | number | boolean): Record<string, unknown> {
59+
if (typeof value === "string") return { stringValue: value };
60+
if (typeof value === "boolean") return { boolValue: value };
61+
if (Number.isInteger(value)) return { intValue: value };
62+
return { doubleValue: value };
63+
}

apps/supervisor/src/otlpTrace.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, it, expect } from "vitest";
2-
import { buildOtlpTracePayload } from "./otlpTrace.js";
2+
import { buildOtlpTracePayload } from "./otlpPayload.js";
33

44
describe("buildOtlpTracePayload", () => {
55
it("builds valid OTLP JSON with timing attributes", () => {

apps/supervisor/src/otlpTrace.ts

Lines changed: 6 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,19 @@
1-
import { randomBytes } from "crypto";
21
import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger";
2+
import { env } from "./env.js";
3+
import type { buildOtlpTracePayload } from "./otlpPayload.js";
34

45
const logger = new SimpleStructuredLogger("otlp-trace");
56

6-
export interface OtlpTraceOptions {
7-
traceId: string;
8-
parentSpanId?: string;
9-
spanName: string;
10-
startTimeMs: number;
11-
endTimeMs: number;
12-
resourceAttributes: Record<string, string | number | boolean>;
13-
spanAttributes: Record<string, string | number | boolean>;
14-
}
15-
16-
/** Build an OTLP JSON ExportTraceServiceRequest payload */
17-
export function buildOtlpTracePayload(opts: OtlpTraceOptions) {
18-
const spanId = randomBytes(8).toString("hex");
19-
20-
return {
21-
resourceSpans: [
22-
{
23-
resource: {
24-
attributes: [
25-
{ key: "$trigger", value: { boolValue: true } },
26-
...toOtlpAttributes(opts.resourceAttributes),
27-
],
28-
},
29-
scopeSpans: [
30-
{
31-
scope: { name: "supervisor.compute" },
32-
spans: [
33-
{
34-
traceId: opts.traceId,
35-
spanId,
36-
parentSpanId: opts.parentSpanId,
37-
name: opts.spanName,
38-
kind: 3, // SPAN_KIND_CLIENT
39-
startTimeUnixNano: String(opts.startTimeMs * 1_000_000),
40-
endTimeUnixNano: String(opts.endTimeMs * 1_000_000),
41-
attributes: toOtlpAttributes(opts.spanAttributes),
42-
status: { code: 1 }, // STATUS_CODE_OK
43-
},
44-
],
45-
},
46-
],
47-
},
48-
],
49-
};
50-
}
51-
52-
/** Fire-and-forget: send an OTLP trace payload to the collector */
53-
export function sendOtlpTrace(
54-
endpoint: string,
55-
payload: ReturnType<typeof buildOtlpTracePayload>
56-
) {
57-
fetch(`${endpoint}/v1/traces`, {
7+
/** Fire-and-forget: send an OTLP trace payload to the configured endpoint */
8+
export function sendOtlpTrace(payload: ReturnType<typeof buildOtlpTracePayload>) {
9+
fetch(`${env.COMPUTE_TRACE_OTLP_ENDPOINT}/v1/traces`, {
5810
method: "POST",
5911
headers: { "Content-Type": "application/json" },
6012
body: JSON.stringify(payload),
6113
signal: AbortSignal.timeout(5_000),
6214
}).catch((err) => {
63-
logger.warn("failed to send compute provision span", {
15+
logger.warn("failed to send compute trace span", {
6416
error: err instanceof Error ? err.message : String(err),
6517
});
6618
});
6719
}
68-
69-
function toOtlpAttributes(
70-
attrs: Record<string, string | number | boolean>
71-
): Array<{ key: string; value: Record<string, unknown> }> {
72-
return Object.entries(attrs).map(([key, value]) => ({
73-
key,
74-
value: toOtlpValue(value),
75-
}));
76-
}
77-
78-
function toOtlpValue(value: string | number | boolean): Record<string, unknown> {
79-
if (typeof value === "string") return { stringValue: value };
80-
if (typeof value === "boolean") return { boolValue: value };
81-
if (Number.isInteger(value)) return { intValue: value };
82-
return { doubleValue: value };
83-
}

apps/supervisor/src/workloadManager/compute.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import {
88
} from "./types.js";
99
import { env } from "../env.js";
1010
import { getRunnerId } from "../util.js";
11-
import { buildOtlpTracePayload, sendOtlpTrace } from "../otlpTrace.js";
11+
import { buildOtlpTracePayload } from "../otlpPayload.js";
12+
import { sendOtlpTrace } from "../otlpTrace.js";
1213
import { tryCatch } from "@trigger.dev/core";
1314

1415
type ComputeWorkloadManagerOptions = WorkloadManagerOptions & {
@@ -171,7 +172,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
171172
} finally {
172173
event.durationMs = Math.round(performance.now() - startMs);
173174
event.ok ??= false;
174-
this.logger.info("create instance", event);
175+
this.logger.debug("create instance", event);
175176
}
176177
}
177178

@@ -222,7 +223,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
222223
return false;
223224
}
224225

225-
this.logger.info("snapshot request accepted", { runnerId: opts.runnerId });
226+
this.logger.debug("snapshot request accepted", { runnerId: opts.runnerId });
226227
return true;
227228
}
228229

@@ -253,7 +254,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
253254
return false;
254255
}
255256

256-
this.logger.info("delete instance success", { runnerId });
257+
this.logger.debug("delete instance success", { runnerId });
257258
return true;
258259
}
259260

@@ -312,7 +313,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
312313
});
313314

314315
// Use the platform API URL, not the runner OTLP endpoint (which may be a VM gateway IP)
315-
sendOtlpTrace(`${env.TRIGGER_API_URL}/otel`, payload);
316+
sendOtlpTrace(payload);
316317
}
317318

318319
async restore(opts: {
@@ -347,7 +348,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
347348
memory_mb: opts.machine.memory * 1024,
348349
};
349350

350-
this.logger.debug("restore request body", { url, body });
351+
this.logger.verbose("restore request body", { url, body });
351352

352353
const startMs = performance.now();
353354

@@ -382,7 +383,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
382383
return false;
383384
}
384385

385-
this.logger.info("restore request success", {
386+
this.logger.debug("restore request success", {
386387
snapshotId: opts.snapshotId,
387388
runnerId: opts.runnerId,
388389
durationMs,
@@ -444,7 +445,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
444445
},
445446
});
446447

447-
sendOtlpTrace(`${env.TRIGGER_API_URL}/otel`, payload);
448+
sendOtlpTrace(payload);
448449
}
449450
}
450451

apps/supervisor/src/workloadServer/index.ts

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ import { env } from "../env.js";
2727
import type { ComputeWorkloadManager } from "../workloadManager/compute.js";
2828
import { TimerWheel } from "../services/timerWheel.js";
2929
import { parseTraceparent } from "@trigger.dev/core/v3/isomorphic";
30-
import { buildOtlpTracePayload, sendOtlpTrace } from "../otlpTrace.js";
30+
import { buildOtlpTracePayload } from "../otlpPayload.js";
31+
import { sendOtlpTrace } from "../otlpTrace.js";
3132

3233
// Use the official export when upgrading to socket.io@4.8.0
3334
interface DefaultEventsMap {
@@ -469,7 +470,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
469470
httpServer.route("/api/v1/compute/snapshot-complete", "POST", {
470471
bodySchema: ComputeSnapshotCallbackBody,
471472
handler: async ({ reply, body }) => {
472-
this.logger.info("Compute snapshot callback", {
473+
this.logger.debug("Compute snapshot callback", {
473474
snapshotId: body.snapshot_id,
474475
instanceId: body.instance_id,
475476
status: body.status,
@@ -504,7 +505,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
504505
});
505506

506507
if (result.success) {
507-
this.logger.info("Suspend completion submitted", {
508+
this.logger.debug("Suspend completion submitted", {
508509
runId,
509510
instanceId: body.instance_id,
510511
snapshotId: body.snapshot_id,
@@ -553,7 +554,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
553554
> = io.of("/workload");
554555

555556
websocketServer.on("disconnect", (socket) => {
556-
this.logger.log("[WS] disconnect", socket.id);
557+
this.logger.verbose("[WS] disconnect", socket.id);
557558
});
558559
websocketServer.use(async (socket, next) => {
559560
const setSocketDataFromHeader = (
@@ -635,23 +636,23 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
635636
socket.data.runFriendlyId = undefined;
636637
};
637638

638-
socketLogger.log("wsServer socket connected", { ...getSocketMetadata() });
639+
socketLogger.debug("wsServer socket connected", { ...getSocketMetadata() });
639640

640641
// FIXME: where does this get set?
641642
if (socket.data.runFriendlyId) {
642643
runConnected(socket.data.runFriendlyId);
643644
}
644645

645646
socket.on("disconnecting", (reason, description) => {
646-
socketLogger.log("Socket disconnecting", { ...getSocketMetadata(), reason, description });
647+
socketLogger.verbose("Socket disconnecting", { ...getSocketMetadata(), reason, description });
647648

648649
if (socket.data.runFriendlyId) {
649650
runDisconnected(socket.data.runFriendlyId);
650651
}
651652
});
652653

653654
socket.on("disconnect", (reason, description) => {
654-
socketLogger.log("Socket disconnected", { ...getSocketMetadata(), reason, description });
655+
socketLogger.debug("Socket disconnected", { ...getSocketMetadata(), reason, description });
655656
});
656657

657658
socket.on("error", (error) => {
@@ -672,7 +673,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
672673
...message,
673674
});
674675

675-
log.log("Handling run:start");
676+
log.debug("Handling run:start");
676677

677678
try {
678679
runConnected(message.run.friendlyId);
@@ -688,11 +689,13 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
688689
...message,
689690
});
690691

691-
log.log("Handling run:stop");
692+
log.debug("Handling run:stop");
692693

693694
try {
694695
runDisconnected(message.run.friendlyId);
695-
this.runTraceContexts.delete(message.run.friendlyId);
696+
// Don't delete trace context here - run:stop fires after each snapshot/shutdown
697+
// but the run may be restored on a new VM and snapshot again. Trace context is
698+
// re-populated on dequeue, and entries are small (4 strings per run).
696699
} catch (error) {
697700
log.error("run:stop error", { error });
698701
}
@@ -799,7 +802,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
799802
spanAttributes,
800803
});
801804

802-
sendOtlpTrace(`${env.TRIGGER_API_URL}/otel`, payload);
805+
sendOtlpTrace(payload);
803806
}
804807

805808
registerRunTraceContext(runFriendlyId: string, ctx: RunTraceContext) {

0 commit comments

Comments
 (0)