Skip to content
129 changes: 129 additions & 0 deletions actions/setup/js/send_otlp_span.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,111 @@ function buildOTLPPayload({ traceId, spanId, parentSpanId, spanName, startMs, en
});
}

// ---------------------------------------------------------------------------
// OTLP metrics payload builder and sender
// ---------------------------------------------------------------------------

/**
* Build an OTLP/HTTP JSON metrics payload for a single Sum data point.
*
* Produces a `resourceMetrics` payload ready to POST to `/v1/metrics`.
* Uses a Sum with `isMonotonic: true` so backends treat each emitted value as
* a per-run total (cumulative, always non-negative).
*
* @param {{
* name: string,
* description: string,
* unit: string,
* value: number,
* startMs: number,
* endMs: number,
* attributes: Array<{key: string, value: object}>,
* serviceName: string,
* scopeVersion?: string,
* resourceAttributes?: Array<{key: string, value: object}>,
* }} opts
* @returns {object} - Ready to be serialised as JSON and POSTed to `/v1/metrics`
*/
function buildOTLPMetricsPayload({ name, description, unit, value, startMs, endMs, attributes, serviceName, scopeVersion, resourceAttributes }) {
const resourceAttrs = buildOTLPResourceAttributes(serviceName, scopeVersion, resourceAttributes);
return {
resourceMetrics: [
{
resource: { attributes: resourceAttrs },
scopeMetrics: [
{
scope: { name: "gh-aw", ...(scopeVersion ? { version: scopeVersion } : {}) },
metrics: [
{
name,
description,
unit,
sum: {
dataPoints: [
{
attributes,
startTimeUnixNano: toNanoString(startMs),
timeUnixNano: toNanoString(endMs),
asDouble: value,
},
],
aggregationTemporality: 2, // AGGREGATION_TEMPORALITY_CUMULATIVE
isMonotonic: true,
},
},
],
},
],
},
],
};
}

/**
* POST an OTLP metrics payload to `{endpoint}/v1/metrics`.
*
* Failures are surfaced as `console.warn` and never thrown — metric export
* failures must not break the workflow.
*
* @param {string} endpoint
* @param {object} payload
* @param {{ headersOverride?: string }} [opts]
* @returns {Promise<void>}
*/
async function sendOTLPMetric(endpoint, payload, { headersOverride = undefined } = {}) {
const url = endpoint.replace(/\/$/, "") + "/v1/metrics";
const rawHeaders = headersOverride !== undefined ? headersOverride : process.env.OTEL_EXPORTER_OTLP_HEADERS || "";
const extraHeaders = parseOTLPHeaders(rawHeaders);
const headers = { "Content-Type": "application/json", ...extraHeaders };
const body = JSON.stringify(payload);
try {
const response = hasProxyConfigured(endpoint) ? sendOTLPViaCurl(url, headers, body) : await fetch(url, { method: "POST", headers, body });
if (!response.ok) {
console.warn(`OTLP metrics export failed: ${response.status} ${response.statusText}`);
}
} catch (err) {
console.warn(`OTLP metrics export error: ${err instanceof Error ? err.message : String(err)}`);
}
}

/**
* Send an OTLP metrics payload to all configured endpoints concurrently.
*
* @param {OTLPEndpointEntry[]} endpoints
* @param {object} payload
* @returns {Promise<void>}
*/
async function sendOTLPMetricToAllEndpoints(endpoints, payload) {
if (endpoints.length === 0) return;
await Promise.allSettled(
endpoints.map(ep =>
sendOTLPMetric(ep.url, payload, {
headersOverride: ep.headers !== undefined ? ep.headers : "",
})
)
);
}

// ---------------------------------------------------------------------------
// Local JSONL mirror
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -2335,6 +2440,27 @@ async function sendJobConclusionSpan(spanName, options = {}) {

// Pass skipJSONL: true so sendOTLPToAllEndpoints/sendOTLPSpan don't double-write the mirror.
await sendOTLPToAllEndpoints(endpoints, payload, { skipJSONL: true });

// Emit gh-aw.aic as a proper OTLP metric (Sum, cumulative) so backends can
// aggregate, alert on, and dashboard AIC without custom field mappings.
// Only emitted from the job that owns token usage to avoid double-counting.
if (typeof aiCredits === "number" && aiCredits > 0 && jobEmitsOwnTokenUsage) {
const metricAttributes = [buildAttr("gh-aw.workflow.name", workflowName), buildAttr("gh-aw.run.id", runId), buildAttr("gh-aw.run.status", runStatus), buildAttr("gh-aw.job.name", jobName)];
if (engineId) metricAttributes.push(buildAttr("gh-aw.engine.id", engineId));
const aicMetricsPayload = buildOTLPMetricsPayload({
name: "gh_aw.aic",
description: "AI Credits consumed by this workflow run",
unit: "AIC",
value: aiCredits,
startMs,
endMs,
attributes: metricAttributes,
serviceName,
scopeVersion: version,
resourceAttributes,
});
await sendOTLPMetricToAllEndpoints(endpoints, aicMetricsPayload);
}
}

module.exports = {
Expand Down Expand Up @@ -2374,4 +2500,7 @@ module.exports = {
buildExperimentAttributes,
parseOTLPCustomAttributes,
buildCustomOTLPAttributes,
buildOTLPMetricsPayload,
sendOTLPMetric,
sendOTLPMetricToAllEndpoints,
};
156 changes: 156 additions & 0 deletions actions/setup/js/send_otlp_span.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const {
resolveEngineId,
parseOTLPCustomAttributes,
buildCustomOTLPAttributes,
buildOTLPMetricsPayload,
} = await import("./send_otlp_span.cjs");

const { readExperimentAssignments, EXPERIMENT_ASSIGNMENTS_PATH } = await import("./experiment_helpers.cjs");
Expand Down Expand Up @@ -6493,3 +6494,158 @@ describe("sendJobConclusionSpan custom attributes", () => {
expect(attrMap["langfuse.user.id"]).toBe("my-user-id");
});
});

// ---------------------------------------------------------------------------
// buildOTLPMetricsPayload
// ---------------------------------------------------------------------------

describe("buildOTLPMetricsPayload", () => {
it("produces a valid OTLP resourceMetrics payload for a Sum metric", () => {
const payload = buildOTLPMetricsPayload({
name: "gh_aw.aic",
description: "AI Credits consumed by this workflow run",
unit: "AIC",
value: 0.125,
startMs: 1_700_000_000_000,
endMs: 1_700_000_060_000,
attributes: [buildAttr("gh-aw.workflow.name", "my-workflow")],
serviceName: "gh-aw",
scopeVersion: "1.2.3",
});

expect(payload).toHaveProperty("resourceMetrics");
const rm = payload.resourceMetrics[0];
// resource carries service.name
const serviceNameAttr = rm.resource.attributes.find(a => a.key === "service.name");
expect(serviceNameAttr).toBeDefined();
expect(serviceNameAttr.value.stringValue).toBe("gh-aw");

// scope name and version
const sm = rm.scopeMetrics[0];
expect(sm.scope.name).toBe("gh-aw");
expect(sm.scope.version).toBe("1.2.3");

// metric definition
const metric = sm.metrics[0];
expect(metric.name).toBe("gh_aw.aic");
expect(metric.unit).toBe("AIC");
expect(metric).toHaveProperty("sum");
expect(metric.sum.aggregationTemporality).toBe(2); // CUMULATIVE
expect(metric.sum.isMonotonic).toBe(true);

// data point
const dp = metric.sum.dataPoints[0];
expect(dp.asDouble).toBe(0.125);
expect(dp.startTimeUnixNano).toBe(toNanoString(1_700_000_000_000));
expect(dp.timeUnixNano).toBe(toNanoString(1_700_000_060_000));

// attribute forwarded to data point
const wfAttr = dp.attributes.find(a => a.key === "gh-aw.workflow.name");
expect(wfAttr).toBeDefined();
expect(wfAttr.value.stringValue).toBe("my-workflow");
});

it("omits scope version when not provided", () => {
const payload = buildOTLPMetricsPayload({
name: "gh_aw.aic",
description: "AIC",
unit: "AIC",
value: 1,
startMs: 0,
endMs: 1000,
attributes: [],
serviceName: "gh-aw",
});
const sm = payload.resourceMetrics[0].scopeMetrics[0];
expect(sm.scope).not.toHaveProperty("version");
});
});

// ---------------------------------------------------------------------------
// sendJobConclusionSpan — gh-aw.aic metric emission
// ---------------------------------------------------------------------------

describe("sendJobConclusionSpan gh-aw.aic metric", () => {
let readFileSpy;
let statSpy;

beforeEach(() => {
vi.resetModules();
process.env.INPUT_JOB_NAME = "agent";
process.env.GH_AW_AGENT_CONCLUSION = "success";
readFileSpy = vi.spyOn(fs, "readFileSync");
statSpy = vi.spyOn(fs, "statSync").mockImplementation(() => {
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});
readFileSpy.mockImplementation(filePath => {
if (filePath === "/tmp/gh-aw/agent_usage.json") {
return JSON.stringify({ input_tokens: 1000, output_tokens: 200, ai_credits: 0.5 });
}
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});
});

afterEach(() => {
vi.unstubAllGlobals();
readFileSpy.mockRestore();
statSpy.mockRestore();
Comment thread
Copilot marked this conversation as resolved.
delete process.env.INPUT_JOB_NAME;
delete process.env.GH_AW_AGENT_CONCLUSION;
delete process.env.GH_AW_AIC;
delete process.env.GH_AW_OTLP_ENDPOINTS;
});

it("sends a /v1/metrics POST with gh_aw.aic when aiCredits > 0", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);

await sendJobConclusionSpan("gh-aw.agent.conclusion", { startMs: 1_700_000_000_000 });

// Identify the /v1/metrics call (separate from /v1/traces calls)
const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics"));
expect(metricsCalls.length).toBe(1);

const metricsBody = JSON.parse(metricsCalls[0][1].body);
expect(metricsBody).toHaveProperty("resourceMetrics");
const metric = metricsBody.resourceMetrics[0].scopeMetrics[0].metrics[0];
expect(metric.name).toBe("gh_aw.aic");
expect(metric.unit).toBe("AIC");
expect(metric.sum.isMonotonic).toBe(true);
expect(metric.sum.dataPoints[0].asDouble).toBe(0.5);

// Dimension attributes on the data point
const dpAttrMap = Object.fromEntries(metric.sum.dataPoints[0].attributes.map(a => [a.key, a.value.stringValue ?? a.value.doubleValue]));
expect(dpAttrMap["gh-aw.job.name"]).toBe("agent");
});

it("does not send a /v1/metrics POST when aiCredits is 0", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);
readFileSpy.mockImplementation(filePath => {
if (filePath === "/tmp/gh-aw/agent_usage.json") {
return JSON.stringify({ input_tokens: 0, output_tokens: 0, ai_credits: 0 });
}
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});

await sendJobConclusionSpan("gh-aw.agent.conclusion", { startMs: 1_700_000_000_000 });

const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics"));
expect(metricsCalls.length).toBe(0);
});

it("does not send a /v1/metrics POST for non-agent jobs", async () => {
process.env.INPUT_JOB_NAME = "conclusion";
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);
process.env.GH_AW_AIC = "0.5";

await sendJobConclusionSpan("gh-aw.conclusion", { startMs: 1_700_000_000_000 });

const metricsCalls = mockFetch.mock.calls.filter(([url]) => url.includes("/v1/metrics"));
expect(metricsCalls.length).toBe(0);
});
});
22 changes: 21 additions & 1 deletion docs/src/content/docs/reference/open-telemetry.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ These attributes appear on built-in workflow setup, agent, and conclusion spans
<tr><td><code>gh-aw.steering_event_count</code></td><td>Count of steering events recorded during the run.</td></tr>
<tr><td><code>gh-aw.action_minutes</code></td><td>Elapsed runtime converted to minutes.</td></tr>
<tr><td><code>gh-aw.tracker.id</code></td><td>Tracker identifier when present.</td></tr>
<tr><td><code>gh-aw.aic</code></td><td>AI credits consumed for the run when available.</td></tr>
<tr><td><code>gh-aw.aic</code></td><td>AI credits consumed for the run when available. Also emitted as a <code>gh_aw.aic</code> OTLP metric (see below).</td></tr>
<tr><td><code>gh-aw.turns</code></td><td>Total agent turns recorded for the run.</td></tr>
<tr><td><code>gh-aw.agent.conclusion</code></td><td>Normalized agent conclusion.</td></tr>
<tr><td><code>gh-aw.detection.conclusion</code></td><td>Detection subsystem conclusion when present.</td></tr>
Expand Down Expand Up @@ -249,6 +249,26 @@ These attributes are emitted when experiments are active for a run.
</tbody>
</table>

## OTLP metrics

In addition to span attributes, gh-aw emits a native OTLP metric to `/v1/metrics` for AIC so backends can aggregate, alert, and dashboard without needing to extract values from span attributes.

<table>
<thead>
<tr>
<th>Metric name</th>
<th>Type</th>
<th>Unit</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>gh_aw.aic</code></td><td>Sum (cumulative, monotonic)</td><td>AIC</td><td>AI Credits consumed by the workflow run. Emitted once per run from the job that owns token usage (<code>agent</code> or <code>detection</code>).</td></tr>
</tbody>
</table>

Data point attributes: `gh-aw.workflow.name`, `gh-aw.run.id`, `gh-aw.run.status`, `gh-aw.job.name`, `gh-aw.engine.id` (when available).

## Trace files and artifacts

When observability is enabled, trace data is also mirrored to local JSONL files and uploaded in the <code>agent</code> artifact:
Expand Down
28 changes: 27 additions & 1 deletion specs/otel-observability-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ This section defines the attributes each span type MUST or MAY carry.
| `gh-aw.staged` | boolean | Staging flag |
| `gh-aw.trigger.*` | string | Trigger context (same fields as setup span) |
| `gh-aw.frontmatter.*` | string | Frontmatter metadata (same fields as setup span) |
| `gh-aw.aic` | double | AI credits consumed (AIC); emitted only when known and > 0 |
| `gh-aw.aic` | double | AI credits consumed (AIC); emitted only when known and > 0. Also emitted as OTLP metric `gh_aw.aic` (see §10.6). |
| `gh-aw.turns` | int | Number of agent turns |
| `gh-aw.agent.conclusion` | string | Agent job outcome |
| `gh-aw.detection.conclusion` | string | Threat detection outcome |
Expand Down Expand Up @@ -553,6 +553,32 @@ The fleet summary span (`gh-aw.outcome.summary`) aggregates all evaluated outcom
| `gh-aw.outcome.workflows` | string | Comma-separated distinct workflow names |
| `gh-aw.outcome.types` | string | Comma-separated distinct outcome types |

### 10.6 OTLP Metrics Signal

In addition to span attributes, gh-aw emits a native OTLP metric payload to `/v1/metrics` so that AIC is a first-class consumable metric for backends that support dashboarding and alerting on OTel metrics (e.g. Grafana, Datadog, Honeycomb).

#### Metrics emitted

| Metric name | Type | Unit | Description |
|---|---|---|---|
| `gh_aw.aic` | Sum (cumulative, monotonic) | AIC | AI Credits consumed by the workflow run. Emitted once from the job that owns token usage (`agent` or `detection`). |

#### Data point attributes

| Attribute | Type | Description |
|---|---|---|
| `gh-aw.workflow.name` | string | Workflow name |
| `gh-aw.run.id` | string | GitHub Actions run ID |
| `gh-aw.run.status` | string | Final run status (`success`, `failure`, `timeout`, `cancelled`) |
| `gh-aw.job.name` | string | Job name (`agent` or `detection`) |
| `gh-aw.engine.id` | string | Engine identifier (when available) |

Resource attributes mirror those on conclusion spans (§10.2).

#### Aggregation temporality

`AGGREGATION_TEMPORALITY_CUMULATIVE` — each data point represents the total AIC for that single workflow run. Backends should **sum** across runs to compute fleet totals, or average to track per-run cost trends.

### 10.7 MCP Gateway Span Attribute Contract

This section defines the attributes emitted by the MCP gateway (`gh-aw-mcpg`). These spans are emitted under the `mcp-gateway` service but share the workflow's trace ID (linked via `GITHUB_AW_OTEL_TRACE_ID` and `GITHUB_AW_OTEL_PARENT_SPAN_ID` passed to the gateway container per §6.3).
Expand Down
Loading