From 60d2dc4e83550ddc8e0f11dd7eba8d837f946309 Mon Sep 17 00:00:00 2001 From: alexchenai Date: Thu, 7 May 2026 00:43:43 +0200 Subject: [PATCH] Add canonicalization-gap check + fix guide (check #24) Adds a probe check that fetches /.well-known/agent.json and /.well-known/agent-card.json, computes raw_sha256 vs canonical_sha256 (lex-sort + no whitespace + UTF-8) for each, and surfaces: fail -> the two paths canonicalize to different digests (clients reading different paths produce divergent on-chain commitments) warn -> a single path serves bytes that are not their own canonical form (raw_sha256 != canonical_sha256) pass -> manifest is already canonical, hash is stable for on-chain commitment Wiring: - new checkCanonicalizationGap() helper next to checkAgent() - added to onRequestPost Promise.all - added to FIX_GUIDES with Express drop-in recipe - bumped CHECK_SET to default-24@2026-05-06 Why this matters: the KYA framework scores agents on identity primitives including hash-stable manifests, but if the discovery surface emits two byte-equivalent JSONs that two clients can disagree on, the downstream KYA score is anchored to a moving target. Same bug shape as ERC-8004 RegistrationFile feedback_hash: a verifier and an indexer must agree on which bytes hash to which digest, or trust collapses. Self-test against asterpay.io shows the live finding (issue #3): /.well-known/agent.json canonical_sha256 = a3e3b1a7... /.well-known/agent-card.json canonical_sha256 = 5570d70f... -> two paths diverge -> check returns status:fail Refs: #3 --- functions/api/audit.js | 128 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/functions/api/audit.js b/functions/api/audit.js index 9c92d8a..9c28a52 100644 --- a/functions/api/audit.js +++ b/functions/api/audit.js @@ -1,6 +1,6 @@ const TIMEOUT = 8000; const ENGINE_VERSION = 'probe@2.2.0'; -const CHECK_SET = 'default-23@2026-03-30'; +const CHECK_SET = 'default-24@2026-05-06'; const BLOCKED_HOSTS = /^(localhost|127\.|10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.|0\.|metadata|::1)/i; const BLOCKED_URLS = ['169.254.169.254', 'metadata.google.internal', 'metadata.internal']; @@ -77,6 +77,98 @@ async function checkAgent(base) { return { id: 'agent', name: 'Agent discovery', status: 'fail', value: 'No agent.json found', score: 0, maxScore: 10 }; } +async function sha256Hex(bytes) { + const buf = await crypto.subtle.digest('SHA-256', bytes); + const arr = Array.from(new Uint8Array(buf)); + return arr.map(b => b.toString(16).padStart(2, '0')).join(''); +} + +function canonicalizeJson(obj) { + if (obj === null || typeof obj !== 'object') return JSON.stringify(obj); + if (Array.isArray(obj)) return '[' + obj.map(canonicalizeJson).join(',') + ']'; + const keys = Object.keys(obj).sort(); + return '{' + keys.map(k => JSON.stringify(k) + ':' + canonicalizeJson(obj[k])).join(',') + '}'; +} + +async function checkCanonicalizationGap(base) { + const paths = ['/.well-known/agent.json', '/.well-known/agent-card.json']; + const samples = []; + let totalLatency = 0; + for (const p of paths) { + const { res, ok, latency } = await timedFetch(`${base}${p}`); + if (!ok || !res || res.status !== 200) continue; + totalLatency += latency || 0; + const ct = (res.headers.get('content-type') || '').toLowerCase(); + if (!ct.includes('json')) continue; + let raw; + try { raw = await res.text(); } catch { continue; } + if (!raw || raw.length === 0) continue; + let parsed; + try { parsed = JSON.parse(raw); } catch { continue; } + if (parsed === null || typeof parsed !== 'object') continue; + const canonical = canonicalizeJson(parsed); + const enc = new TextEncoder(); + const rawBytes = enc.encode(raw); + const canonicalBytes = enc.encode(canonical); + const rawHash = await sha256Hex(rawBytes); + const canonicalHash = await sha256Hex(canonicalBytes); + samples.push({ + path: p, + raw_bytes: rawBytes.length, + canonical_bytes: canonicalBytes.length, + raw_sha256: rawHash, + canonical_sha256: canonicalHash, + gap: rawHash !== canonicalHash, + }); + } + + if (samples.length === 0) { + return { id: 'canonicalization', name: 'Manifest canonicalization', status: 'warn', value: 'No JSON agent manifest found at /.well-known/agent.json or /.well-known/agent-card.json', score: 0, maxScore: 10 }; + } + + if (samples.length >= 2) { + const a = samples[0], b = samples[1]; + if (a.canonical_sha256 !== b.canonical_sha256) { + return { + id: 'canonicalization', + name: 'Manifest canonicalization', + status: 'fail', + value: `${a.path} and ${b.path} canonicalize to different digests (${a.canonical_sha256.slice(0,8)}… vs ${b.canonical_sha256.slice(0,8)}…) — clients reading different paths produce divergent on-chain commitments`, + score: 0, + maxScore: 10, + latency: totalLatency, + samples, + }; + } + } + + const gaps = samples.filter(s => s.gap); + if (gaps.length > 0) { + const detail = gaps.map(s => `${s.path}: raw_sha256=${s.raw_sha256.slice(0,8)}… ≠ canonical_sha256=${s.canonical_sha256.slice(0,8)}…`).join('; '); + return { + id: 'canonicalization', + name: 'Manifest canonicalization', + status: 'warn', + value: `${detail} — two clients with different canonicalization recipes will hash this manifest to different digests`, + score: 4, + maxScore: 10, + latency: totalLatency, + samples, + }; + } + + return { + id: 'canonicalization', + name: 'Manifest canonicalization', + status: 'pass', + value: `${samples.length} manifest(s) raw bytes already canonical (sorted keys, no whitespace) — stable hash for on-chain commitment`, + score: 10, + maxScore: 10, + latency: totalLatency, + samples, + }; +} + async function checkLlms(base) { const { res, ok, latency } = await timedFetch(`${base}/llms.txt`); if (!ok) return { id: 'llms', name: 'llms.txt', status: 'fail', value: 'Not reachable', score: 0, maxScore: 10, latency }; @@ -1040,6 +1132,39 @@ app.get('/status', async (req, res) => { }`, docs: 'https://artificialintelligenceact.eu/article/52/' }, + canonicalization: { + title: 'Make agent manifest hash stable (raw bytes == canonical bytes)', + steps: [ + 'Re-serialize /.well-known/agent.json server-side with sorted keys, no whitespace, ASCII escaping so the raw HTTP body equals the canonical form', + 'If you also serve /.well-known/agent-card.json (current ERC-8004 best-practice path), make it byte-identical to /.well-known/agent.json or drop the alias', + 'Document your canonicalization recipe in /llms.txt so downstream verifiers inherit a single rule', + 'Anchor on-chain commitments (ERC-8004 RegistrationFile feedback_hash, audit-trail hashes, attestations) to the canonical bytes — never to the raw HTTP response, which can vary by gateway, key order, or whitespace', + ], + code: `// Express example — make raw bytes == canonical bytes +app.get('/.well-known/agent.json', (req, res) => { + const card = { /* your AgentCard object */ }; + res.type('application/json').send(canonicalize(card)); +}); + +// Same handler at /.well-known/agent-card.json (ERC-8004 best-practice path) +app.get('/.well-known/agent-card.json', (req, res) => { + const card = { /* identical AgentCard object */ }; + res.type('application/json').send(canonicalize(card)); +}); + +function canonicalize(o) { + if (o === null || typeof o !== 'object') return JSON.stringify(o); + if (Array.isArray(o)) return '[' + o.map(canonicalize).join(',') + ']'; + return '{' + Object.keys(o).sort() + .map(k => JSON.stringify(k) + ':' + canonicalize(o[k])) + .join(',') + '}'; +} + +// Verify locally: +// curl -s https://your-agent.com/.well-known/agent.json | shasum -a 256 +// should match the sha256 of the canonical form your verifier computes`, + docs: 'https://chenecosystem.com/desk/canonicalization-gap-erc8004-may-5-2026', + }, wallet_trust: { title: 'Add a verifiable wallet with on-chain history', steps: [ @@ -1156,6 +1281,7 @@ export async function onRequestPost(context) { checkTravelRule(base), checkA2AProtocol(base), checkWalletTrust(base), + checkCanonicalizationGap(base), ]); const maxPossible = results.reduce((sum, c) => sum + (c.maxScore || 10), 0);