diff --git a/package.json b/package.json index cf3eb61..77dcf89 100644 --- a/package.json +++ b/package.json @@ -93,6 +93,7 @@ "fastify-type-provider-zod": "^6.1.0", "ioredis": "^5.10.1", "pino": "^10.3.1", + "prom-client": "^15.1.3", "zod": "^4.4.3" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 528e3ee..f707789 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -62,6 +62,9 @@ importers: pino: specifier: ^10.3.1 version: 10.3.1 + prom-client: + specifier: ^15.1.3 + version: 15.1.3 zod: specifier: ^4.4.3 version: 4.4.3 @@ -1685,6 +1688,9 @@ packages: bignumber.js@9.3.1: resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==} + bintrees@1.0.2: + resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==} + bip39@3.1.0: resolution: {integrity: sha512-c9kiwdk45Do5GL0vJMe7tS95VjCii65mYAH7DfWl3uW8AVzXKQVUm64i3hzVybBDMp9r7j9iNxR85+ul8MdN/A==} @@ -3243,6 +3249,10 @@ packages: progress-events@1.0.1: resolution: {integrity: sha512-MOzLIwhpt64KIVN64h1MwdKWiyKFNc/S6BoYKPIVUHFg0/eIEyBulhWCgn678v/4c0ri3FdGuzXymNCv02MUIw==} + prom-client@15.1.3: + resolution: {integrity: sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==} + engines: {node: ^16 || ^18 || >=20} + prop-types@15.8.1: resolution: {integrity: sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==} @@ -3587,6 +3597,9 @@ packages: resolution: {integrity: sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==} engines: {node: '>=6'} + tdigest@0.1.2: + resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==} + thenify-all@1.6.0: resolution: {integrity: sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==} engines: {node: '>=0.8'} @@ -5652,6 +5665,8 @@ snapshots: bignumber.js@9.3.1: {} + bintrees@1.0.2: {} + bip39@3.1.0: dependencies: '@noble/hashes': 1.8.0 @@ -7385,6 +7400,11 @@ snapshots: progress-events@1.0.1: {} + prom-client@15.1.3: + dependencies: + '@opentelemetry/api': 1.9.1 + tdigest: 0.1.2 + prop-types@15.8.1: dependencies: loose-envify: 1.4.0 @@ -7794,6 +7814,10 @@ snapshots: tapable@2.3.0: {} + tdigest@0.1.2: + dependencies: + bintrees: 1.0.2 + thenify-all@1.6.0: dependencies: thenify: 3.3.1 diff --git a/src/routes/agent-discovery.ts b/src/routes/agent-discovery.ts index e3e044d..68525d7 100644 --- a/src/routes/agent-discovery.ts +++ b/src/routes/agent-discovery.ts @@ -22,6 +22,7 @@ Disallow: /verify Disallow: /settle Disallow: /upload Disallow: /files/ +Disallow: /metrics Sitemap: /sitemap.xml `; diff --git a/src/routes/metrics.ts b/src/routes/metrics.ts new file mode 100644 index 0000000..389125f --- /dev/null +++ b/src/routes/metrics.ts @@ -0,0 +1,61 @@ +// Prometheus metrics endpoint. +// +// Exposes default Node.js process/runtime metrics (heap, GC, event loop) +// plus per-route HTTP request count + duration histogram, scrape-able by +// any Prometheus-compatible system. Mounted at GET /metrics. +// +// /metrics itself and /health are excluded from request tracking -- the +// former to avoid recursive accounting, the latter to keep liveness-probe +// noise out of latency percentiles. + +import type { FastifyPluginCallback } from 'fastify'; +import fp from 'fastify-plugin'; +import { Counter, Histogram, Registry, collectDefaultMetrics } from 'prom-client'; + +const SKIP_ROUTES = new Set(['/metrics', '/health']); + +const metricsPlugin: FastifyPluginCallback = (fastify, _options, done) => { + const registry = new Registry(); + registry.setDefaultLabels({ service: 'cardano402' }); + collectDefaultMetrics({ register: registry }); + + const httpDuration = new Histogram({ + name: 'http_request_duration_seconds', + help: 'HTTP request duration in seconds, labeled by method, route, and status code', + labelNames: ['method', 'route', 'status_code'], + // Buckets cover the realistic facilitator latency band (sub-ms to a few seconds). + buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], + registers: [registry], + }); + + const httpTotal = new Counter({ + name: 'http_requests_total', + help: 'Total HTTP requests, labeled by method, route, and status code', + labelNames: ['method', 'route', 'status_code'], + registers: [registry], + }); + + fastify.addHook('onResponse', async (request, reply) => { + // Prefer the route pattern (e.g. "/files/:cid") over the raw URL so + // cardinality stays bounded. Falls back to raw URL for unmatched paths. + const route = request.routeOptions?.url ?? request.url; + if (SKIP_ROUTES.has(route)) return; + const method = request.method; + const statusCode = String(reply.statusCode); + const elapsedMs = reply.elapsedTime; + httpDuration.labels(method, route, statusCode).observe(elapsedMs / 1000); + httpTotal.labels(method, route, statusCode).inc(); + }); + + fastify.get('/metrics', async (_req, reply) => { + const body = await registry.metrics(); + return reply.type(registry.contentType).status(200).send(body); + }); + + done(); +}; + +export const metricsRoutesPlugin = fp(metricsPlugin, { + name: 'metrics-routes', + fastify: '5.x', +}); diff --git a/src/server.ts b/src/server.ts index 7f643f5..6100c1e 100644 --- a/src/server.ts +++ b/src/server.ts @@ -25,6 +25,7 @@ import { agentDiscoveryRoutesPlugin } from './routes/agent-discovery.js'; import { demoRoutesPlugin } from './routes/demo.js'; import { downloadRoutesPlugin } from './routes/download.js'; import { healthRoutesPlugin } from './routes/health.js'; +import { metricsRoutesPlugin } from './routes/metrics.js'; import { settleRoutesPlugin } from './routes/settle.js'; import { statusRoutesPlugin } from './routes/status.js'; import { supportedRoutesPlugin } from './routes/supported.js'; @@ -218,6 +219,10 @@ export async function createServer(options: CreateServerOptions): Promise { + let server: FastifyInstance; + + beforeEach(async () => { + server = fastify({ logger: false }); + await server.register(metricsRoutesPlugin); + // Sample routes for traffic that should be tracked + server.get('/sample', async () => ({ ok: true })); + server.get('/files/:cid', async (req) => ({ cid: (req.params as { cid: string }).cid })); + server.get('/health', async () => ({ status: 'ok' })); + await server.ready(); + }); + + afterEach(async () => { + if (server) await server.close(); + }); + + describe('GET /metrics', () => { + it('returns 200 with Prometheus text/plain content type', async () => { + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.statusCode).toBe(200); + expect(res.headers['content-type']).toContain('text/plain'); + expect(res.headers['content-type']).toContain('version=0.0.4'); + }); + + it('exposes default Node.js process metrics', async () => { + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).toMatch(/# HELP process_cpu_user_seconds_total/); + expect(res.body).toMatch(/# HELP nodejs_heap_size_total_bytes/); + expect(res.body).toMatch(/# HELP nodejs_eventloop_lag_seconds/); + }); + + it('exposes the http_requests_total counter and http_request_duration_seconds histogram', async () => { + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).toMatch(/# HELP http_requests_total/); + expect(res.body).toMatch(/# HELP http_request_duration_seconds/); + }); + + it('attaches a service="cardano402" default label', async () => { + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).toMatch(/service="cardano402"/); + }); + }); + + describe('HTTP request tracking', () => { + it('tracks the request count for tracked routes', async () => { + await server.inject({ method: 'GET', url: '/sample' }); + await server.inject({ method: 'GET', url: '/sample' }); + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).toMatch(/http_requests_total\{[^}]*route="\/sample"[^}]*\}\s+2/); + expect(res.body).toMatch( + /http_request_duration_seconds_count\{[^}]*route="\/sample"[^}]*\}\s+2/ + ); + }); + + it('uses the route pattern not the raw URL (bounded cardinality)', async () => { + await server.inject({ method: 'GET', url: '/files/abc123' }); + await server.inject({ method: 'GET', url: '/files/xyz789' }); + const res = await server.inject({ method: 'GET', url: '/metrics' }); + // Both calls collapse onto a single time series for the templated route + expect(res.body).toMatch(/http_requests_total\{[^}]*route="\/files\/:cid"[^}]*\}\s+2/); + // The raw cids are NOT present as labels (would explode cardinality) + expect(res.body).not.toMatch(/route="\/files\/abc123"/); + expect(res.body).not.toMatch(/route="\/files\/xyz789"/); + }); + + it('labels by method and status_code', async () => { + await server.inject({ method: 'GET', url: '/sample' }); + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).toMatch(/method="GET"/); + expect(res.body).toMatch(/status_code="200"/); + }); + }); + + describe('Excluded routes', () => { + it('does NOT track requests to /metrics (avoid recursive accounting)', async () => { + await server.inject({ method: 'GET', url: '/metrics' }); + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).not.toMatch(/http_requests_total\{[^}]*route="\/metrics"[^}]*\}/); + }); + + it('does NOT track requests to /health (liveness-probe noise)', async () => { + await server.inject({ method: 'GET', url: '/health' }); + await server.inject({ method: 'GET', url: '/health' }); + const res = await server.inject({ method: 'GET', url: '/metrics' }); + expect(res.body).not.toMatch(/http_requests_total\{[^}]*route="\/health"[^}]*\}/); + }); + }); +});