diff --git a/actions/setup/js/send_otlp_span.cjs b/actions/setup/js/send_otlp_span.cjs index b6f9b712029..7e22ea8dc26 100644 --- a/actions/setup/js/send_otlp_span.cjs +++ b/actions/setup/js/send_otlp_span.cjs @@ -593,6 +593,111 @@ function buildOTLPPayload({ traceId, spanId, parentSpanId, spanName, startMs, en }); } +// --------------------------------------------------------------------------- +// OTLP metrics payload builder and sender +// --------------------------------------------------------------------------- + +/** + * Build an OTLP/HTTP JSON metrics payload for a single Sum data point. + * + * Produces a `resourceMetrics` payload ready to POST to `/v1/metrics`. + * Uses a Sum with `isMonotonic: true` so backends treat each emitted value as + * a per-run total (cumulative, always non-negative). + * + * @param {{ + * name: string, + * description: string, + * unit: string, + * value: number, + * startMs: number, + * endMs: number, + * attributes: Array<{key: string, value: object}>, + * serviceName: string, + * scopeVersion?: string, + * resourceAttributes?: Array<{key: string, value: object}>, + * }} opts + * @returns {object} - Ready to be serialised as JSON and POSTed to `/v1/metrics` + */ +function buildOTLPMetricsPayload({ name, description, unit, value, startMs, endMs, attributes, serviceName, scopeVersion, resourceAttributes }) { + const resourceAttrs = buildOTLPResourceAttributes(serviceName, scopeVersion, resourceAttributes); + return { + resourceMetrics: [ + { + resource: { attributes: resourceAttrs }, + scopeMetrics: [ + { + scope: { name: "gh-aw", ...(scopeVersion ? { version: scopeVersion } : {}) }, + metrics: [ + { + name, + description, + unit, + sum: { + dataPoints: [ + { + attributes, + startTimeUnixNano: toNanoString(startMs), + timeUnixNano: toNanoString(endMs), + asDouble: value, + }, + ], + aggregationTemporality: 2, // AGGREGATION_TEMPORALITY_CUMULATIVE + isMonotonic: true, + }, + }, + ], + }, + ], + }, + ], + }; +} + +/** + * POST an OTLP metrics payload to `{endpoint}/v1/metrics`. + * + * Failures are surfaced as `console.warn` and never thrown — metric export + * failures must not break the workflow. + * + * @param {string} endpoint + * @param {object} payload + * @param {{ headersOverride?: string }} [opts] + * @returns {Promise} + */ +async function sendOTLPMetric(endpoint, payload, { headersOverride = undefined } = {}) { + const url = endpoint.replace(/\/$/, "") + "/v1/metrics"; + const rawHeaders = headersOverride !== undefined ? headersOverride : process.env.OTEL_EXPORTER_OTLP_HEADERS || ""; + const extraHeaders = parseOTLPHeaders(rawHeaders); + const headers = { "Content-Type": "application/json", ...extraHeaders }; + const body = JSON.stringify(payload); + try { + const response = hasProxyConfigured(endpoint) ? sendOTLPViaCurl(url, headers, body) : await fetch(url, { method: "POST", headers, body }); + if (!response.ok) { + console.warn(`OTLP metrics export failed: ${response.status} ${response.statusText}`); + } + } catch (err) { + console.warn(`OTLP metrics export error: ${err instanceof Error ? err.message : String(err)}`); + } +} + +/** + * Send an OTLP metrics payload to all configured endpoints concurrently. + * + * @param {OTLPEndpointEntry[]} endpoints + * @param {object} payload + * @returns {Promise} + */ +async function sendOTLPMetricToAllEndpoints(endpoints, payload) { + if (endpoints.length === 0) return; + await Promise.allSettled( + endpoints.map(ep => + sendOTLPMetric(ep.url, payload, { + headersOverride: ep.headers !== undefined ? ep.headers : "", + }) + ) + ); +} + // --------------------------------------------------------------------------- // Local JSONL mirror // --------------------------------------------------------------------------- @@ -2335,6 +2440,27 @@ async function sendJobConclusionSpan(spanName, options = {}) { // Pass skipJSONL: true so sendOTLPToAllEndpoints/sendOTLPSpan don't double-write the mirror. await sendOTLPToAllEndpoints(endpoints, payload, { skipJSONL: true }); + + // Emit gh-aw.aic as a proper OTLP metric (Sum, cumulative) so backends can + // aggregate, alert on, and dashboard AIC without custom field mappings. + // Only emitted from the job that owns token usage to avoid double-counting. + if (typeof aiCredits === "number" && aiCredits > 0 && jobEmitsOwnTokenUsage) { + const metricAttributes = [buildAttr("gh-aw.workflow.name", workflowName), buildAttr("gh-aw.run.id", runId), buildAttr("gh-aw.run.status", runStatus), buildAttr("gh-aw.job.name", jobName)]; + if (engineId) metricAttributes.push(buildAttr("gh-aw.engine.id", engineId)); + const aicMetricsPayload = buildOTLPMetricsPayload({ + name: "gh_aw.aic", + description: "AI Credits consumed by this workflow run", + unit: "AIC", + value: aiCredits, + startMs, + endMs, + attributes: metricAttributes, + serviceName, + scopeVersion: version, + resourceAttributes, + }); + await sendOTLPMetricToAllEndpoints(endpoints, aicMetricsPayload); + } } module.exports = { @@ -2374,4 +2500,7 @@ module.exports = { buildExperimentAttributes, parseOTLPCustomAttributes, buildCustomOTLPAttributes, + buildOTLPMetricsPayload, + sendOTLPMetric, + sendOTLPMetricToAllEndpoints, }; diff --git a/actions/setup/js/send_otlp_span.test.cjs b/actions/setup/js/send_otlp_span.test.cjs index 40c3a0af0d2..fcd89d57d6a 100644 --- a/actions/setup/js/send_otlp_span.test.cjs +++ b/actions/setup/js/send_otlp_span.test.cjs @@ -38,6 +38,7 @@ const { resolveEngineId, parseOTLPCustomAttributes, buildCustomOTLPAttributes, + buildOTLPMetricsPayload, } = await import("./send_otlp_span.cjs"); const { readExperimentAssignments, EXPERIMENT_ASSIGNMENTS_PATH } = await import("./experiment_helpers.cjs"); @@ -6493,3 +6494,158 @@ describe("sendJobConclusionSpan custom attributes", () => { expect(attrMap["langfuse.user.id"]).toBe("my-user-id"); }); }); + +// --------------------------------------------------------------------------- +// buildOTLPMetricsPayload +// --------------------------------------------------------------------------- + +describe("buildOTLPMetricsPayload", () => { + it("produces a valid OTLP resourceMetrics payload for a Sum metric", () => { + const payload = buildOTLPMetricsPayload({ + name: "gh_aw.aic", + description: "AI Credits consumed by this workflow run", + unit: "AIC", + value: 0.125, + startMs: 1_700_000_000_000, + endMs: 1_700_000_060_000, + attributes: [buildAttr("gh-aw.workflow.name", "my-workflow")], + serviceName: "gh-aw", + scopeVersion: "1.2.3", + }); + + expect(payload).toHaveProperty("resourceMetrics"); + const rm = payload.resourceMetrics[0]; + // resource carries service.name + const serviceNameAttr = rm.resource.attributes.find(a => a.key === "service.name"); + expect(serviceNameAttr).toBeDefined(); + expect(serviceNameAttr.value.stringValue).toBe("gh-aw"); + + // scope name and version + const sm = rm.scopeMetrics[0]; + expect(sm.scope.name).toBe("gh-aw"); + expect(sm.scope.version).toBe("1.2.3"); + + // metric definition + const metric = sm.metrics[0]; + expect(metric.name).toBe("gh_aw.aic"); + expect(metric.unit).toBe("AIC"); + expect(metric).toHaveProperty("sum"); + expect(metric.sum.aggregationTemporality).toBe(2); // CUMULATIVE + expect(metric.sum.isMonotonic).toBe(true); + + // data point + const dp = metric.sum.dataPoints[0]; + expect(dp.asDouble).toBe(0.125); + expect(dp.startTimeUnixNano).toBe(toNanoString(1_700_000_000_000)); + expect(dp.timeUnixNano).toBe(toNanoString(1_700_000_060_000)); + + // attribute forwarded to data point + const wfAttr = dp.attributes.find(a => a.key === "gh-aw.workflow.name"); + expect(wfAttr).toBeDefined(); + expect(wfAttr.value.stringValue).toBe("my-workflow"); + }); + + it("omits scope version when not provided", () => { + const payload = buildOTLPMetricsPayload({ + name: "gh_aw.aic", + description: "AIC", + unit: "AIC", + value: 1, + startMs: 0, + endMs: 1000, + attributes: [], + serviceName: "gh-aw", + }); + const sm = payload.resourceMetrics[0].scopeMetrics[0]; + expect(sm.scope).not.toHaveProperty("version"); + }); +}); + +// --------------------------------------------------------------------------- +// sendJobConclusionSpan — gh-aw.aic metric emission +// --------------------------------------------------------------------------- + +describe("sendJobConclusionSpan gh-aw.aic metric", () => { + let readFileSpy; + let statSpy; + + beforeEach(() => { + vi.resetModules(); + process.env.INPUT_JOB_NAME = "agent"; + process.env.GH_AW_AGENT_CONCLUSION = "success"; + readFileSpy = vi.spyOn(fs, "readFileSync"); + statSpy = vi.spyOn(fs, "statSync").mockImplementation(() => { + throw Object.assign(new Error("ENOENT"), { code: "ENOENT" }); + }); + readFileSpy.mockImplementation(filePath => { + if (filePath === "/tmp/gh-aw/agent_usage.json") { + return JSON.stringify({ input_tokens: 1000, output_tokens: 200, ai_credits: 0.5 }); + } + throw Object.assign(new Error("ENOENT"), { code: "ENOENT" }); + }); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + readFileSpy.mockRestore(); + statSpy.mockRestore(); + delete process.env.INPUT_JOB_NAME; + delete process.env.GH_AW_AGENT_CONCLUSION; + delete process.env.GH_AW_AIC; + delete process.env.GH_AW_OTLP_ENDPOINTS; + }); + + it("sends a /v1/metrics POST with gh_aw.aic when aiCredits > 0", async () => { + const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); + vi.stubGlobal("fetch", mockFetch); + process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]); + + await sendJobConclusionSpan("gh-aw.agent.conclusion", { startMs: 1_700_000_000_000 }); + + // Identify the /v1/metrics call (separate from /v1/traces calls) + const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics")); + expect(metricsCalls.length).toBe(1); + + const metricsBody = JSON.parse(metricsCalls[0][1].body); + expect(metricsBody).toHaveProperty("resourceMetrics"); + const metric = metricsBody.resourceMetrics[0].scopeMetrics[0].metrics[0]; + expect(metric.name).toBe("gh_aw.aic"); + expect(metric.unit).toBe("AIC"); + expect(metric.sum.isMonotonic).toBe(true); + expect(metric.sum.dataPoints[0].asDouble).toBe(0.5); + + // Dimension attributes on the data point + const dpAttrMap = Object.fromEntries(metric.sum.dataPoints[0].attributes.map(a => [a.key, a.value.stringValue ?? a.value.doubleValue])); + expect(dpAttrMap["gh-aw.job.name"]).toBe("agent"); + }); + + it("does not send a /v1/metrics POST when aiCredits is 0", async () => { + const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); + vi.stubGlobal("fetch", mockFetch); + process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]); + readFileSpy.mockImplementation(filePath => { + if (filePath === "/tmp/gh-aw/agent_usage.json") { + return JSON.stringify({ input_tokens: 0, output_tokens: 0, ai_credits: 0 }); + } + throw Object.assign(new Error("ENOENT"), { code: "ENOENT" }); + }); + + await sendJobConclusionSpan("gh-aw.agent.conclusion", { startMs: 1_700_000_000_000 }); + + const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics")); + expect(metricsCalls.length).toBe(0); + }); + + it("does not send a /v1/metrics POST for non-agent jobs", async () => { + process.env.INPUT_JOB_NAME = "conclusion"; + const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); + vi.stubGlobal("fetch", mockFetch); + process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]); + process.env.GH_AW_AIC = "0.5"; + + await sendJobConclusionSpan("gh-aw.conclusion", { startMs: 1_700_000_000_000 }); + + const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics")); + expect(metricsCalls.length).toBe(0); + }); +}); diff --git a/docs/src/content/docs/reference/open-telemetry.mdx b/docs/src/content/docs/reference/open-telemetry.mdx index 198df938a65..6814fcf4ec8 100644 --- a/docs/src/content/docs/reference/open-telemetry.mdx +++ b/docs/src/content/docs/reference/open-telemetry.mdx @@ -87,7 +87,7 @@ These attributes appear on built-in workflow setup, agent, and conclusion spans gh-aw.steering_event_countCount of steering events recorded during the run. gh-aw.action_minutesElapsed runtime converted to minutes. gh-aw.tracker.idTracker identifier when present. - gh-aw.aicAI credits consumed for the run when available. + gh-aw.aicAI credits consumed for the run when available. Also emitted as a gh_aw.aic OTLP metric (see below). gh-aw.turnsTotal agent turns recorded for the run. gh-aw.agent.conclusionNormalized agent conclusion. gh-aw.detection.conclusionDetection subsystem conclusion when present. @@ -249,6 +249,26 @@ These attributes are emitted when experiments are active for a run. +## OTLP metrics + +In addition to span attributes, gh-aw emits a native OTLP metric to `/v1/metrics` for AIC so backends can aggregate, alert, and dashboard without needing to extract values from span attributes. + + + + + + + + + + + + + +
Metric nameTypeUnitDescription
gh_aw.aicSum (cumulative, monotonic)AICAI Credits consumed by the workflow run. Emitted once per run from the job that owns token usage (agent or detection).
+ +Data point attributes: `gh-aw.workflow.name`, `gh-aw.run.id`, `gh-aw.run.status`, `gh-aw.job.name`, `gh-aw.engine.id` (when available). + ## Trace files and artifacts When observability is enabled, trace data is also mirrored to local JSONL files and uploaded in the agent artifact: diff --git a/specs/otel-observability-spec.md b/specs/otel-observability-spec.md index 17c57e77e75..33296970606 100644 --- a/specs/otel-observability-spec.md +++ b/specs/otel-observability-spec.md @@ -454,7 +454,7 @@ This section defines the attributes each span type MUST or MAY carry. | `gh-aw.staged` | boolean | Staging flag | | `gh-aw.trigger.*` | string | Trigger context (same fields as setup span) | | `gh-aw.frontmatter.*` | string | Frontmatter metadata (same fields as setup span) | -| `gh-aw.aic` | double | AI credits consumed (AIC); emitted only when known and > 0 | +| `gh-aw.aic` | double | AI credits consumed (AIC); emitted only when known and > 0. Also emitted as OTLP metric `gh_aw.aic` (see §10.6). | | `gh-aw.turns` | int | Number of agent turns | | `gh-aw.agent.conclusion` | string | Agent job outcome | | `gh-aw.detection.conclusion` | string | Threat detection outcome | @@ -553,6 +553,32 @@ The fleet summary span (`gh-aw.outcome.summary`) aggregates all evaluated outcom | `gh-aw.outcome.workflows` | string | Comma-separated distinct workflow names | | `gh-aw.outcome.types` | string | Comma-separated distinct outcome types | +### 10.6 OTLP Metrics Signal + +In addition to span attributes, gh-aw emits a native OTLP metric payload to `/v1/metrics` so that AIC is a first-class consumable metric for backends that support dashboarding and alerting on OTel metrics (e.g. Grafana, Datadog, Honeycomb). + +#### Metrics emitted + +| Metric name | Type | Unit | Description | +|---|---|---|---| +| `gh_aw.aic` | Sum (cumulative, monotonic) | AIC | AI Credits consumed by the workflow run. Emitted once from the job that owns token usage (`agent` or `detection`). | + +#### Data point attributes + +| Attribute | Type | Description | +|---|---|---| +| `gh-aw.workflow.name` | string | Workflow name | +| `gh-aw.run.id` | string | GitHub Actions run ID | +| `gh-aw.run.status` | string | Final run status (`success`, `failure`, `timeout`, `cancelled`) | +| `gh-aw.job.name` | string | Job name (`agent` or `detection`) | +| `gh-aw.engine.id` | string | Engine identifier (when available) | + +Resource attributes mirror those on conclusion spans (§10.2). + +#### Aggregation temporality + +`AGGREGATION_TEMPORALITY_CUMULATIVE` — each data point represents the total AIC for that single workflow run. Backends should **sum** across runs to compute fleet totals, or average to track per-run cost trends. + ### 10.7 MCP Gateway Span Attribute Contract This section defines the attributes emitted by the MCP gateway (`gh-aw-mcpg`). These spans are emitted under the `mcp-gateway` service but share the workflow's trace ID (linked via `GITHUB_AW_OTEL_TRACE_ID` and `GITHUB_AW_OTEL_PARENT_SPAN_ID` passed to the gateway container per §6.3).