diff --git a/.prettierignore b/.prettierignore index 88af386..67a4f0f 100644 --- a/.prettierignore +++ b/.prettierignore @@ -5,3 +5,8 @@ apps/web/vendor/ apps/mcp/server.bundle.js apps/web/.vercel/ package-lock.json +# Plan/spec documents contain embedded TypeScript pseudo-code that prettier +# mis-parses into semantically-broken output (e.g. `{ a: 'x' | 'y', b }` is +# parsed as a comma expression). Keep them author-formatted. +docs/superpowers/plans/ +docs/superpowers/specs/ diff --git a/apps/api/app.test.ts b/apps/api/app.test.ts index b1625d3..be5f920 100644 --- a/apps/api/app.test.ts +++ b/apps/api/app.test.ts @@ -45,6 +45,7 @@ function fakePage( overrides: Partial<{ id: string; spec: unknown; + format: 'a2ui' | 'html'; state: 'open' | 'submitted' | 'received'; result: unknown; }> = {}, @@ -52,6 +53,7 @@ function fakePage( return { id: overrides.id ?? 'aabbccddeeff00112233445566778899', spec: overrides.spec ?? { anything: 1 }, + format: overrides.format ?? ('a2ui' as const), state: overrides.state ?? 'open', result: overrides.result ?? null, createdAt: Date.now(), @@ -103,7 +105,7 @@ describe('POST /new', () => { expect(calledPage.state).toBe('open'); }); - it('rejects bodies over 256 KB with 413', async () => { + it('rejects A2UI bodies over 256 KB with 413 (post-parse cap)', async () => { const body = JSON.stringify({ spec: 'x'.repeat(300_000) }); const res = await app.fetch( new Request(`${BASE}/new`, { @@ -115,13 +117,13 @@ describe('POST /new', () => { expect(res.status).toBe(413); const resBody = await json(res); expect(resBody.error).toBe('payload_too_large'); - expect(resBody.max_bytes).toBe(MAX_BODY_BYTES); + expect(resBody.max_bytes).toBe(256_000); + expect(resBody.format).toBe('a2ui'); expect(typeof resBody.message).toBe('string'); - expect(resBody.message as string).toContain(String(MAX_BODY_BYTES)); expect(db.insertPage).not.toHaveBeenCalled(); }); - it('accepts a body just under 256 KB', async () => { + it('accepts an A2UI body just under 256 KB', async () => { const body = JSON.stringify({ spec: 'x'.repeat(250_000) }); const res = await app.fetch( new Request(`${BASE}/new`, { @@ -133,7 +135,7 @@ describe('POST /new', () => { expect(res.status).toBe(201); }); - it('413 body.message references the byte limit', async () => { + it('413 body.message references the A2UI byte limit', async () => { const body = JSON.stringify({ spec: 'x'.repeat(300_000) }); const res = await app.fetch( new Request(`${BASE}/new`, { @@ -145,7 +147,102 @@ describe('POST /new', () => { expect(res.status).toBe(413); const resBody = await json(res); expect(typeof resBody.message).toBe('string'); - expect(resBody.message as string).toContain(String(MAX_BODY_BYTES)); + expect(resBody.message as string).toContain('256'); + }); + + it('bodyLimit middleware rejects any body > 1 MB with 413', async () => { + // 1 MB is the absolute bodyLimit cap (per spec, HTML's true ceiling). + // Stuff in a JSON string that pushes the wire body past 1 MB. + const body = JSON.stringify({ spec: 'x'.repeat(1_050_000) }); + const res = await app.fetch( + new Request(`${BASE}/new`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body, + }), + ); + expect(res.status).toBe(413); + const resBody = await json(res); + expect(resBody.error).toBe('payload_too_large'); + expect(resBody.max_bytes).toBe(MAX_BODY_BYTES); + expect(MAX_BODY_BYTES).toBe(1_000_000); + }); +}); + +// --------------------------------------------------------------------------- +// POST /new with format=html +// --------------------------------------------------------------------------- + +describe('POST /new with format=html', () => { + it('accepts an HTML payload and returns 201 with { id, url, expires_at }', async () => { + const res = await app.fetch( + req('POST', '/new', { + format: 'html', + spec: '

Hello

World

', + }), + ); + expect(res.status).toBe(201); + const body = await json(res); + expect((body.id as string) ?? '').toMatch(/^[a-f0-9]{32}$/); + expect(typeof body.url).toBe('string'); + expect(typeof body.expires_at).toBe('number'); + }); + + it('passes format=html and a sanitized spec to db.insertPage', async () => { + await app.fetch( + req('POST', '/new', { + format: 'html', + spec: '
safe
', + }), + ); + expect(db.insertPage).toHaveBeenCalledOnce(); + const [page] = vi.mocked(db.insertPage).mock.calls[0]; + expect(page.format).toBe('html'); + expect(typeof page.spec).toBe('string'); + expect(page.spec as string).not.toContain('safe'); + }); + + it('rejects HTML payloads > 1 MB with 413 payload_too_large', async () => { + const big = 'a'.repeat(1_000_001); + const res = await app.fetch(req('POST', '/new', { format: 'html', spec: big })); + // The wire body (with JSON wrapping) exceeds the 1 MB bodyLimit, so the + // bodyLimit middleware fires before Zod parsing and returns 413. + expect(res.status).toBe(413); + const body = await json(res); + expect(body.error).toBe('payload_too_large'); + }); + + it('accepts A2UI payloads with implicit default format (backwards compat)', async () => { + const res = await app.fetch( + req('POST', '/new', { spec: [{ createSurface: { surfaceId: 'm' } }] }), + ); + expect(res.status).toBe(201); + }); + + it('rejects A2UI payloads > 256 KB even when body limit allows up to 1 MB', async () => { + // Use a value below the 1 MB bodyLimit but above the 256 KB A2UI cap. + const big = 'x'.repeat(300_000); + const res = await app.fetch(req('POST', '/new', { spec: big })); + expect(res.status).toBe(413); + const body = await json(res); + expect(body.error).toBe('payload_too_large'); + expect(body.format).toBe('a2ui'); + }); + + it('returns 400 sanitized_empty when sanitization yields empty output', async () => { + // Pure forbidden tags — DOMPurify strips everything, leaving an empty + // string. The handler must reject with a clear error rather than store + // an empty HTML page. + const res = await app.fetch( + req('POST', '/new', { format: 'html', spec: '' }), + ); + expect(res.status).toBe(400); + const body = await json(res); + expect(body.error).toBe('sanitized_empty'); + expect(body.format).toBe('html'); + expect(typeof body.message).toBe('string'); + expect(db.insertPage).not.toHaveBeenCalled(); }); }); @@ -196,6 +293,10 @@ describe('POST /:id/result', () => { // returned 409 "already submitted" for a page that was merely expired. // The fixed SELECT adds `expires_at > now()`, making expired rows return // 'not_found' → 404, which is the correct user-facing response. + // The handler reads the page first via getActivePage for the format guard; + // mock it to return an active a2ui page so submitPage's 'not_found' outcome + // is what we exercise here (e.g. row expired between the two reads). + (db.getActivePage as ReturnType).mockResolvedValueOnce(fakePage()); (db.submitPage as ReturnType).mockResolvedValueOnce({ kind: 'not_found' }); const res = await app.fetch(req('POST', `/${UNKNOWN_ID}/result`, validAction)); expect(res.status).toBe(404); @@ -205,6 +306,7 @@ describe('POST /:id/result', () => { it('returns 200 and calls db.submitPage when page is open', async () => { const page = fakePage(); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); (db.submitPage as ReturnType).mockResolvedValueOnce({ kind: 'ok', createdAt: new Date(), @@ -218,6 +320,7 @@ describe('POST /:id/result', () => { it('returns 409 on conflict (already submitted)', async () => { const page = fakePage({ state: 'submitted' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); (db.submitPage as ReturnType).mockResolvedValueOnce({ kind: 'conflict' }); const res = await app.fetch(req('POST', `/${page.id}/result`, validAction)); expect(res.status).toBe(409); @@ -229,6 +332,7 @@ describe('POST /:id/result', () => { it('409 conflict body.message mentions creating a new page', async () => { const page = fakePage({ state: 'submitted' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); (db.submitPage as ReturnType).mockResolvedValueOnce({ kind: 'conflict' }); const res = await app.fetch(req('POST', `/${page.id}/result`, validAction)); const body = await json(res); @@ -236,7 +340,8 @@ describe('POST /:id/result', () => { }); it('returns 400 for result body with name: "" (empty name)', async () => { - // Validation happens before db call, so no need to stub submitPage + // Page must exist (a2ui) so we reach the body-parse stage. + (db.getActivePage as ReturnType).mockResolvedValueOnce(fakePage()); const res = await app.fetch(req('POST', `/${UNKNOWN_ID}/result`, { name: '', surfaceId: 'x' })); expect(res.status).toBe(400); const body = await json(res); @@ -253,18 +358,21 @@ describe('GET /:id/result', () => { (db.fetchAndAdvanceResult as ReturnType).mockResolvedValueOnce({ stateAtRead: 'open', result: null, + format: 'a2ui', }); const res = await app.fetch(req('GET', `/${UNKNOWN_ID}/result`)); expect(res.status).toBe(200); const body = await json(res); expect(body.state).toBe('open'); expect(body.result).toBeNull(); + expect(body.format).toBe('a2ui'); }); it('returns submitted result after POST /:id/result', async () => { (db.fetchAndAdvanceResult as ReturnType).mockResolvedValueOnce({ stateAtRead: 'submitted', result: validAction, + format: 'a2ui', }); const res = await app.fetch(req('GET', `/${UNKNOWN_ID}/result`)); expect(res.status).toBe(200); @@ -279,6 +387,7 @@ describe('GET /:id/result', () => { (db.fetchAndAdvanceResult as ReturnType).mockResolvedValueOnce({ stateAtRead: 'received', result: validAction, + format: 'a2ui', }); const res = await app.fetch(req('GET', `/${UNKNOWN_ID}/result`)); expect(res.status).toBe(200); @@ -293,6 +402,72 @@ describe('GET /:id/result', () => { }); }); +// --------------------------------------------------------------------------- +// format echo + HTML result handling +// --------------------------------------------------------------------------- + +describe('format echo and HTML result handling', () => { + it('GET /:id echoes format=html for an HTML page', async () => { + const page = fakePage({ format: 'html', spec: '

x

' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); + const res = await app.fetch(req('GET', `/${page.id}`)); + expect(res.status).toBe(200); + const body = await json(res); + expect(body.format).toBe('html'); + }); + + it('GET /:id echoes format=a2ui for an A2UI page', async () => { + const page = fakePage({ format: 'a2ui' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); + const res = await app.fetch(req('GET', `/${page.id}`)); + expect(res.status).toBe(200); + const body = await json(res); + expect(body.format).toBe('a2ui'); + }); + + it('GET /:id/result includes format on every response', async () => { + (db.fetchAndAdvanceResult as ReturnType).mockResolvedValueOnce({ + stateAtRead: 'open', + result: null, + format: 'html', + }); + const res = await app.fetch(req('GET', `/${UNKNOWN_ID}/result`)); + expect(res.status).toBe(200); + const body = await json(res); + expect(body.format).toBe('html'); + expect(body.state).toBe('open'); + expect(body.result).toBeNull(); + }); + + it('POST /:id/result rejects HTML pages with 400 invalid_for_format', async () => { + const page = fakePage({ format: 'html', spec: '

x

' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); + const res = await app.fetch( + req('POST', `/${page.id}/result`, { name: 'submitted', surfaceId: 'main' }), + ); + expect(res.status).toBe(400); + const body = await json(res); + expect(body.error).toBe('invalid_for_format'); + expect(body.format).toBe('html'); + expect(db.submitPage).not.toHaveBeenCalled(); + }); + + it('POST /:id/result still works for A2UI pages (regression)', async () => { + const page = fakePage({ format: 'a2ui' }); + (db.getActivePage as ReturnType).mockResolvedValueOnce(page); + (db.submitPage as ReturnType).mockResolvedValueOnce({ + kind: 'ok', + createdAt: new Date(), + }); + const res = await app.fetch( + req('POST', `/${page.id}/result`, { name: 'submitted', surfaceId: 'main' }), + ); + expect(res.status).toBe(200); + const body = await json(res); + expect(body.ok).toBe(true); + }); +}); + // --------------------------------------------------------------------------- // GET /health // --------------------------------------------------------------------------- @@ -479,6 +654,9 @@ describe('error handler', () => { }); it('submitPage throw on POST /:id/result returns 500', async () => { + // Page-existence gate runs first; mock it to return an a2ui page so + // the throw on submitPage is what we actually exercise. + (db.getActivePage as ReturnType).mockResolvedValueOnce(fakePage()); (db.submitPage as ReturnType).mockRejectedValueOnce(new Error('db write failed')); const res = await app.fetch(req('POST', `/${UNKNOWN_ID}/result`, validAction)); expect(res.status).toBe(500); diff --git a/apps/api/app.ts b/apps/api/app.ts index 5e86404..96a310d 100644 --- a/apps/api/app.ts +++ b/apps/api/app.ts @@ -12,6 +12,7 @@ import { trace } from '@opentelemetry/api'; import * as db from './db.ts'; import * as store from './store.ts'; import { clientKey } from './client-key.ts'; +import { HTML_MAX_BYTES } from './limits.ts'; import { env, pageIdSchema, newPageBodySchema, resultBodySchema } from './schemas.ts'; import { logger } from './logger.ts'; import { metrics, statusClassFor } from './metrics.ts'; @@ -38,7 +39,12 @@ export const PUBLIC_URL = env.PUBLIC_URL ?? `http://localhost:${PORT}`; export const PAGE_TTL_MS = env.PAGE_TTL_MS; export const ALLOWED_ORIGINS = env.ALLOWED_ORIGINS; -export const MAX_BODY_BYTES = 256 * 1024; // 256 KB +// The absolute body cap matches HTML_MAX_BYTES — the bodyLimit middleware +// enforces it on the wire body so HTML payloads at the spec'd 1 MB ceiling +// pass through cleanly. The historical 256 KB cap for A2UI specs is enforced +// post-parse in newPageHandler. Re-exported for tests / external callers. +export const MAX_BODY_BYTES = HTML_MAX_BYTES; +export const A2UI_MAX_SPEC_BYTES = 256_000; const newPageLimiter = rateLimiter({ windowMs: env.RATE_LIMIT_WINDOW_MS, @@ -210,7 +216,50 @@ const newPageHandler = async (c: Context) => { 400, ); } - const created = await store.createPage(result.data.spec, { + const { format, spec } = result.data; + + if (format === 'a2ui') { + // Enforce the historical 256 KB cap on A2UI specs; HTML uses the full 1 MB. + // The bodyLimit middleware lets us inspect the parsed value here without + // double-paying for the read. + const serialized = JSON.stringify(spec ?? null); + if (serialized.length > A2UI_MAX_SPEC_BYTES) { + return c.json( + { + error: 'payload_too_large', + format: 'a2ui', + max_bytes: A2UI_MAX_SPEC_BYTES, + message: `A2UI spec exceeds the ${A2UI_MAX_SPEC_BYTES}-byte limit`, + }, + 413, + ); + } + } + + if (format === 'html') { + try { + const created = await store.createHtmlPage( + spec as string, + { publicUrl: PUBLIC_URL, pageTtlMs: PAGE_TTL_MS }, + getLog(c), + ); + return c.json(created, 201); + } catch (err) { + if (err instanceof store.SanitizedEmptyError) { + return c.json( + { + error: 'sanitized_empty', + format: 'html', + message: err.message, + }, + 400, + ); + } + throw err; + } + } + + const created = await store.createPage(spec, format, { publicUrl: PUBLIC_URL, pageTtlMs: PAGE_TTL_MS, }); @@ -225,6 +274,7 @@ const getPageHandler = async (c: Context) => { if (!p) return c.json({ error: 'not_found', message: 'Page not found or expired' }, 404); return c.json({ spec: p.spec, + format: p.format, state: p.state, result: p.result, expires_at: p.expiresAt, @@ -235,6 +285,27 @@ const submitResultHandler = async (c: Context) => { const idResult = pageIdSchema.safeParse(c.req.param('id')); if (!idResult.success) return c.json({ error: 'not_found', message: 'Page not found or expired' }, 404); + + // Format check happens before body parse to fail fast on HTML pages. HTML + // pages are view-only — there is no submit pipeline for them. + const page = await db.getActivePage(idResult.data); + if (!page) return c.json({ error: 'not_found', message: 'Page not found or expired' }, 404); + if (page.format === 'html') { + return c.json( + { + error: 'invalid_for_format', + format: page.format, + message: 'POST /:id/result is not supported for format=html; HTML pages are view-only', + }, + 400, + ); + } + // Future formats: TypeScript exhaustiveness check — if PageFormat grows a new + // variant, this assignment fails to typecheck and forces maintainers to + // either handle the format above or remove it from the discriminated union. + const _exhaustive: 'a2ui' = page.format; + void _exhaustive; + const raw = await c.req.json().catch(() => null); const bodyResult = resultBodySchema.safeParse(raw); if (!bodyResult.success) { @@ -271,7 +342,11 @@ const getResultHandler = async (c: Context) => { const outcome = await store.advanceResult(idResult.data); if (outcome.kind === 'not_found') return c.json({ error: 'not_found', message: 'Page not found or expired' }, 404); - return c.json({ state: outcome.state, result: outcome.result }); + return c.json({ + state: outcome.state, + result: outcome.result, + format: outcome.format, + }); }; // --- Routes ------------------------------------------------------------------ diff --git a/apps/api/db.test.ts b/apps/api/db.test.ts index 6a26b6e..0184e1a 100644 --- a/apps/api/db.test.ts +++ b/apps/api/db.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { withRetry, getActivePage } from './db'; +import type { Page, PageFormat } from './db'; describe('withRetry', () => { beforeEach(() => { @@ -122,6 +123,7 @@ describe('getActivePage retry semantics', () => { const fakeRow = { id: 'test-id', spec: { type: 'test' }, + format: 'a2ui' as const, state: 'open' as const, result: null, created_at: new Date(1000), @@ -152,6 +154,7 @@ describe('getActivePage retry semantics', () => { return { id: r.id, spec: r.spec, + format: r.format, state: r.state, result: r.result, createdAt: r.created_at.getTime(), @@ -175,3 +178,77 @@ describe('getActivePage retry semantics', () => { expect(typeof getActivePage).toBe('function'); }); }); + +// --------------------------------------------------------------------------- +// Page format column — structural tests (no live DB) +// --------------------------------------------------------------------------- +// Real DB round-trips are out of scope for the unit suite (postgres URL is a +// placeholder in vitest.config.ts). Instead, we assert that the type surface +// carries `format`, that a row projection including `format` produces a Page +// with the expected discriminator, and that PageFormat is the closed union. + +// Type test: PageFormat is a closed union — assigning anything outside +// 'a2ui' | 'html' must fail to typecheck. The `@ts-expect-error` directive +// does the verification at compile time; npm run typecheck will fail if the +// union ever loosens. No runtime assertion is needed (and would be misleading +// since 'pdf' === 'pdf' is trivially true at runtime). +// @ts-expect-error — only 'a2ui' | 'html' should typecheck. +const _pageFormatClosedUnion: PageFormat = 'pdf'; +void _pageFormatClosedUnion; + +describe('Page format column (structural)', () => { + it('PageFormat accepts the two allowed string literals', () => { + const a: PageFormat = 'a2ui'; + const h: PageFormat = 'html'; + expect(a).toBe('a2ui'); + expect(h).toBe('html'); + }); + + it('Page accepts format and exposes it on the read shape', () => { + const p: Page = { + id: 'aabbccddeeff00112233445566778899', + spec: { foo: 1 }, + format: 'html', + state: 'open', + result: null, + createdAt: 1, + expiresAt: 2, + }; + expect(p.format).toBe('html'); + }); + + it('row projection from a select including `format` maps to Page.format', async () => { + // Mirrors getActivePage's mapping over the retry path. If the SELECT + // were to drop `format`, the projection would lose it; this regression + // test ensures the mapping is wired both ways. + type Row = { + id: string; + spec: unknown; + format: PageFormat; + state: 'open'; + result: unknown; + created_at: Date; + expires_at: Date; + }; + const fakeRow: Row = { + id: 'test-id', + spec: '
hi
', + format: 'html', + state: 'open', + result: null, + created_at: new Date(1000), + expires_at: new Date(2000), + }; + const projected: Page = { + id: fakeRow.id, + spec: fakeRow.spec, + format: fakeRow.format, + state: fakeRow.state, + result: fakeRow.result, + createdAt: fakeRow.created_at.getTime(), + expiresAt: fakeRow.expires_at.getTime(), + }; + expect(projected.format).toBe('html'); + expect(projected.spec).toBe('
hi
'); + }); +}); diff --git a/apps/api/db.ts b/apps/api/db.ts index 1108046..4e4377e 100644 --- a/apps/api/db.ts +++ b/apps/api/db.ts @@ -25,9 +25,12 @@ export async function withRetry(fn: () => Promise, opts: RetryOptions = {} export type PageState = 'open' | 'submitted' | 'received'; +export type PageFormat = 'a2ui' | 'html'; + export type Page = { id: string; spec: unknown; + format: PageFormat; state: PageState; result: unknown; createdAt: number; @@ -45,6 +48,7 @@ export async function init(connectionString: string): Promise { create table if not exists pages ( id text primary key, spec jsonb not null, + format text not null default 'a2ui' check (format in ('a2ui','html')), state text not null check (state in ('open','submitted','received')), result jsonb, created_at timestamptz not null default now(), @@ -53,6 +57,14 @@ export async function init(connectionString: string): Promise { received_at timestamptz ) `; + // Pick up the column on pre-existing deployments. Idempotent — safe to run + // on every boot. Backfill is implicit via the default. + await sql` + alter table pages + add column if not exists format text + not null default 'a2ui' + check (format in ('a2ui','html')) + `; await sql`create index if not exists pages_expires_at_idx on pages (expires_at)`; } @@ -75,6 +87,7 @@ function client(): ReturnType { type PageRow = { id: string; spec: unknown; + format: PageFormat; state: PageState; result: unknown; created_at: Date; @@ -85,7 +98,7 @@ export async function getActivePage(id: string): Promise { return withRetry(async () => { const c = client(); const rows = await c` - select id, spec, state, result, created_at, expires_at + select id, spec, format, state, result, created_at, expires_at from pages where id = ${id} and expires_at > now() `; @@ -94,6 +107,7 @@ export async function getActivePage(id: string): Promise { return { id: r.id, spec: r.spec, + format: r.format, state: r.state, result: r.result, createdAt: r.created_at.getTime(), @@ -150,13 +164,13 @@ export async function submitPage(id: string, action: unknown): Promise { +): Promise<{ stateAtRead: PageState; result: unknown; format: PageFormat } | null> { const c = client(); - const rows = await c<{ state: PageState; result: unknown }[]>` - select state, result from pages where id = ${id} and expires_at > now() + const rows = await c<{ state: PageState; result: unknown; format: PageFormat }[]>` + select state, result, format from pages where id = ${id} and expires_at > now() `; if (rows.length === 0) return null; - const { state, result } = rows[0]; + const { state, result, format } = rows[0]; const stateAtRead = state; if (state === 'submitted') { await c` @@ -165,14 +179,20 @@ export async function fetchAndAdvanceResult( where id = ${id} and state = 'submitted' `; } - return { stateAtRead, result }; + return { stateAtRead, result, format }; } export async function insertPage(p: Page): Promise { await withRetry(async () => { const c = client(); - await c`insert into pages (id, spec, state, expires_at) - values (${p.id}, ${c.json(p.spec as Parameters[0])}, 'open', to_timestamp(${p.expiresAt} / 1000.0))`; + await c`insert into pages (id, spec, format, state, expires_at) + values ( + ${p.id}, + ${c.json(p.spec as Parameters[0])}, + ${p.format}, + 'open', + to_timestamp(${p.expiresAt} / 1000.0) + )`; }); } diff --git a/apps/api/limits.ts b/apps/api/limits.ts new file mode 100644 index 0000000..f7b6f78 --- /dev/null +++ b/apps/api/limits.ts @@ -0,0 +1,12 @@ +/** + * Size limits shared across the API, MCP, and schema layers. + * + * Kept in a dedicated module to avoid import cycles: schemas.ts and + * mcp/tools.ts both need HTML_MAX_BYTES, but app.ts already imports from + * schemas.ts — so we can't host the constant there. + */ + +// 1 MB is the HTML payload cap (per spec). The bodyLimit middleware enforces +// this on the wire body; the format=html branch of newPageBodySchema and the +// show_html MCP tool both enforce it again post-parse for clear error shapes. +export const HTML_MAX_BYTES = 1_000_000; diff --git a/apps/api/mcp/http.test.ts b/apps/api/mcp/http.test.ts index adb12fc..df0f436 100644 --- a/apps/api/mcp/http.test.ts +++ b/apps/api/mcp/http.test.ts @@ -120,10 +120,14 @@ function postMcp( // --------------------------------------------------------------------------- describe('SDK client', () => { - it('lists both tools', async () => { + it('lists all three tools', async () => { const client = await newSdkClient(); const result = await client.listTools(); - expect(result.tools.map((t) => t.name).sort()).toEqual(['check_result', 'show_ui']); + expect(result.tools.map((t) => t.name).sort()).toEqual([ + 'check_result', + 'show_html', + 'show_ui', + ]); await client.close(); }); @@ -157,10 +161,47 @@ describe('SDK client', () => { await client.close(); }); + it('show_html sanitizes the input before storage and returns id/url/expires_at', async () => { + const client = await newSdkClient(); + const result = await client.callTool({ + name: 'show_html', + arguments: { html: '

safe

' }, + }); + const sc = result.structuredContent as Record; + expect(typeof sc.page_id).toBe('string'); + expect((sc.page_id as string).length).toBe(32); + expect(sc.url).toMatch(/^http:\/\/test\.local\//); + expect(typeof sc.expires_at).toBe('number'); + // db.insertPage receives the sanitized spec — ' }, + }); + expect(result.isError).toBe(true); + const text = (result.content as Array<{ type: string; text: string }>)[0]?.text ?? ''; + expect(text).toMatch(/stripped/i); + expect(db.insertPage).not.toHaveBeenCalled(); + await client.close(); + }); + it('check_result returns the open state for an existing page', async () => { (db.fetchAndAdvanceResult as ReturnType).mockResolvedValueOnce({ stateAtRead: 'open', result: null, + format: 'a2ui', }); const client = await newSdkClient(); const result = await client.callTool({ @@ -170,6 +211,7 @@ describe('SDK client', () => { const sc = result.structuredContent as Record; expect(sc.state).toBe('open'); expect(sc.result).toBe(null); + expect(sc.format).toBe('a2ui'); await client.close(); }); diff --git a/apps/api/mcp/http.ts b/apps/api/mcp/http.ts index 83415aa..9467bed 100644 --- a/apps/api/mcp/http.ts +++ b/apps/api/mcp/http.ts @@ -75,11 +75,26 @@ function applyBaseHeaders(req: IncomingMessage, res: ServerResponse, requestId: export function buildInProcessOps(cfg: McpHttpConfig): PageOps { return { async showUi(spec) { - return store.createPage(spec, { + return store.createPage(spec, 'a2ui', { publicUrl: cfg.publicUrl, pageTtlMs: cfg.pageTtlMs, }); }, + async showHtml(html) { + // No request context here — log at the module logger level. The REST + // POST /new path passes a request-scoped child logger; this is the MCP + // path. store.createHtmlPage handles sanitize+log+store in one ritual + // and throws SanitizedEmptyError if the input was stripped to empty — + // the MCP transport surfaces the throw to the client as an error. + return store.createHtmlPage( + html, + { + publicUrl: cfg.publicUrl, + pageTtlMs: cfg.pageTtlMs, + }, + logger, + ); + }, async checkResult(page_id) { return store.advanceResult(page_id); }, diff --git a/apps/api/mcp/tools.test.ts b/apps/api/mcp/tools.test.ts new file mode 100644 index 0000000..3095fcb --- /dev/null +++ b/apps/api/mcp/tools.test.ts @@ -0,0 +1,135 @@ +/** + * Unit tests for shared MCP tool registration. + * + * Exercises registerPagentTools against a stub server so we can assert + * the tools' shapes, descriptions, and handler behavior without booting + * a transport. The HTTP MCP integration (apps/api/mcp/http.test.ts) + * covers end-to-end client flows; this file pins the contract the model + * sees and the per-tool handler logic. + */ +import { describe, it, expect } from 'vitest'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { registerPagentTools, type PageOps } from './tools.ts'; + +type RegisteredTool = { + description: string; + inputSchema: unknown; + handler: (...args: unknown[]) => unknown; +}; + +function makeServer(): { + server: McpServer; + tools: Map; +} { + const tools = new Map(); + const server = { + registerTool( + name: string, + def: { description: string; inputSchema: unknown }, + handler: (...args: unknown[]) => unknown, + ) { + tools.set(name, { ...def, handler }); + }, + } as unknown as McpServer; + return { server, tools }; +} + +// Default no-op PageOps. Tests that exercise a specific handler call +// makeOps({ ... }) to override one or more methods. +const defaultOps: PageOps = { + showUi: async () => ({ id: 'a'.repeat(32), url: 'http://x/a', expires_at: 0 }), + showHtml: async () => ({ id: 'b'.repeat(32), url: 'http://x/b', expires_at: 0 }), + checkResult: async () => ({ kind: 'state', state: 'open', result: null, format: 'a2ui' }), +}; + +function makeOps(overrides: Partial = {}): PageOps { + return { ...defaultOps, ...overrides }; +} + +describe('registerPagentTools', () => { + it('registers three tools: show_ui, show_html, check_result', () => { + const { server, tools } = makeServer(); + registerPagentTools(server, makeOps()); + expect(tools.has('show_ui')).toBe(true); + expect(tools.has('show_html')).toBe(true); + expect(tools.has('check_result')).toBe(true); + }); + + it('show_html description mentions view-only and no scripts', () => { + const { server, tools } = makeServer(); + registerPagentTools(server, makeOps()); + const desc = tools.get('show_html')!.description; + expect(desc).toMatch(/view-only/i); + expect(desc).toMatch(/script/i); + expect(desc).toMatch(/JavaScript/i); + }); + + it('show_ui description distinguishes itself from show_html', () => { + const { server, tools } = makeServer(); + registerPagentTools(server, makeOps()); + const desc = tools.get('show_ui')!.description; + expect(desc).toMatch(/show_html/); + }); + + it('check_result structuredContent includes format', async () => { + const { server, tools } = makeServer(); + registerPagentTools(server, makeOps()); + const handler = tools.get('check_result')!.handler; + const out = (await handler({ page_id: 'a'.repeat(32) })) as { + structuredContent: { state: string; result: unknown; page_id: string; format: string }; + }; + expect(out.structuredContent.format).toBe('a2ui'); + }); + + it('show_html handler returns structuredContent matching showHtml + "do not poll" text', async () => { + const { server, tools } = makeServer(); + const expectedId = 'c'.repeat(32); + const expectedUrl = 'http://test.local/' + expectedId; + const expectedExpires = 1700000000000; + registerPagentTools( + server, + makeOps({ + showHtml: async (html) => { + // Sanity check: handler must forward the html argument. + expect(html).toBe('

x

'); + return { id: expectedId, url: expectedUrl, expires_at: expectedExpires }; + }, + }), + ); + const handler = tools.get('show_html')!.handler; + const out = (await handler({ html: '

x

' })) as { + structuredContent: { page_id: string; url: string; expires_at: number }; + content: Array<{ type: string; text: string }>; + }; + expect(out.structuredContent.page_id).toBe(expectedId); + expect(out.structuredContent.url).toBe(expectedUrl); + expect(out.structuredContent.expires_at).toBe(expectedExpires); + // Per show_html handler text (tools.ts), the LLM-facing string tells the + // model the page is view-only and not to poll. Match on "do not poll". + expect(out.content[0]?.text).toMatch(/do not poll/i); + }); + + it('check_result handler on an HTML page surfaces "stop polling" guidance', async () => { + const { server, tools } = makeServer(); + registerPagentTools( + server, + makeOps({ + checkResult: async () => ({ + kind: 'state', + state: 'open', + result: null, + format: 'html', + }), + }), + ); + const handler = tools.get('check_result')!.handler; + const out = (await handler({ page_id: 'd'.repeat(32) })) as { + structuredContent: { state: string; result: unknown; format: string; page_id: string }; + content: Array<{ type: string; text: string }>; + }; + expect(out.structuredContent.format).toBe('html'); + expect(out.structuredContent.state).toBe('open'); + expect(out.structuredContent.result).toBe(null); + expect(out.content[0]?.text).toMatch(/stop polling/i); + }); +}); diff --git a/apps/api/mcp/tools.ts b/apps/api/mcp/tools.ts index cece50d..091637e 100644 --- a/apps/api/mcp/tools.ts +++ b/apps/api/mcp/tools.ts @@ -10,23 +10,30 @@ */ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; +import { HTML_MAX_BYTES } from '../limits.ts'; // --- Operations contract ----------------------------------------------------- export type PageState = 'open' | 'submitted' | 'received'; +export type PageFormat = 'a2ui' | 'html'; + export type ShowUiResult = { id: string; url: string; expires_at: number; }; +// `format` is required on responses from both the in-process API path (which +// knows the format from the DB row) and the stdio adapter (which reads it +// from the REST API's GET /:id/result response). export type CheckResultOutcome = | { kind: 'not_found' } - | { kind: 'state'; state: PageState; result: unknown }; + | { kind: 'state'; state: PageState; result: unknown; format: PageFormat }; export interface PageOps { showUi(spec: unknown): Promise; + showHtml(html: string): Promise; checkResult(page_id: string): Promise; } @@ -36,10 +43,11 @@ export interface PageOps { // skill (Codex, OpenCode, Cursor, Cline, etc.) still get the guidance. const SHOW_UI_DESCRIPTION = [ - "Render an interactive UI in the user's browser — forms, pickers, dashboards, confirmations, multi-step wizards, surveys.", + 'Ask the user a question that needs a structured answer back. Forms, pickers, confirmations, multi-step wizards, surveys, dashboards-as-input.', 'Returns { page_id, url, expires_at }. PRINT the URL so the user can open it. The agent never sees the user typing — only the final submitted result.', 'Each page is single-shot: one spec, one result. For a follow-up question, call show_ui again with a fresh spec — there is no surface-replace mechanism.', 'After this call, poll check_result on your own cadence to read the user response (start at 2-3s, back off exponentially up to ~30s; do other useful work between polls rather than blocking).', + 'If you only want to SHOW something — a report, a chart, an infographic — use show_html instead. show_ui is for input.', ].join('\n\n'); const SHOW_UI_INPUT_DESCRIPTION = [ @@ -50,11 +58,29 @@ const SHOW_UI_INPUT_DESCRIPTION = [ 'Keep specs small — one screen, one purpose.', ].join(' '); +const SHOW_HTML_DESCRIPTION = [ + 'Show the user a rich visualization: a styled report, dashboard, chart, infographic, comparison table, slide, or other view-only artifact.', + 'Returns { page_id, url, expires_at }. PRINT the URL so the user can open it. The page is one-way — the user looks at it; nothing comes back.', + 'Do NOT poll check_result for HTML pages; they never produce a result. If you need a follow-up decision, call show_ui after with a fresh spec.', + 'Constraints (enforced — violations are stripped or rejected):', + 'No JavaScript: no ').output; + expect(out).not.toContain('safe'); + }); + + it('strips ').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize( + '', + ).output; + expect(out).not.toContain(' { + const out = sanitize('').output; + expect(out).not.toContain('onclick'); + expect(out).not.toContain('alert(1)'); + }); + + it('strips onerror on ', () => { + const out = sanitize('').output; + expect(out).not.toContain('onerror'); + }); + + it('strips javascript: URLs in href', () => { + const out = sanitize('click').output; + expect(out).not.toMatch(/javascript:/i); + }); + + it('strips vbscript: URLs', () => { + const out = sanitize('click').output; + expect(out).not.toMatch(/vbscript:/i); + }); + + it('strips data:text/html (executable data URL)', () => { + const out = sanitize('click').output; + expect(out).not.toContain('data:text/html'); + }); + + it('strips data:application/javascript URLs', () => { + const out = sanitize('click').output; + expect(out).not.toContain('data:application/javascript'); + }); + + it('strips ', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain('', () => { + const out = sanitize('').output; + expect(out).not.toContain(' { + const out = sanitize('x').output; + expect(out).not.toContain('onload'); + }); + + it('strips formaction (form override attack)', () => { + const out = sanitize('').output; + expect(out).not.toContain('formaction'); + }); + + it('strips srcdoc on any element', () => { + const out = sanitize('').output; + expect(out).not.toContain('srcdoc'); + }); + + it('preserves inline ', () => { + const input = ''; + const out = sanitize(input).output; + expect(out).toContain(' { + const out = sanitize('').output; + expect(out).not.toMatch(/xlink:href/i); + }); + + it('reports counts of removed tags and attrs', () => { + const r = sanitize(''); + expect(r.removedTags + r.removedAttrs).toBeGreaterThan(0); + }); +}); diff --git a/apps/api/sanitize.ts b/apps/api/sanitize.ts new file mode 100644 index 0000000..671d14b --- /dev/null +++ b/apps/api/sanitize.ts @@ -0,0 +1,78 @@ +/** + * Server-side HTML sanitization for the html page format. + * + * Runs once on POST /new before storage. Returns the cleaned HTML plus + * dropped-tag and dropped-attr counts (logged as forensic signal). + * + * Strict denylist, not allowlist — we accept arbitrary HTML/CSS/SVG and + * remove the dangerous parts. Combined with the iframe sandbox + meta-CSP + * in the renderer this is layer one of three (sanitizer -> CSP -> sandbox). + */ +import DOMPurify from 'isomorphic-dompurify'; + +const FORBID_TAGS = [ + 'script', + 'iframe', + 'frame', + 'frameset', + 'embed', + 'object', + 'applet', + 'link', // no external stylesheets + 'base', // we inject our own in the renderer scaffold + 'meta', // no ; renderer injects its own meta-CSP +]; + +const FORBID_ATTR = ['formaction', 'srcdoc', 'xlink:href']; + +// Allow https links, mailto, in-page anchors, and inline image data URIs only. +// Explicitly blocks javascript:, vbscript:, data:text/html, data:application/*. +const ALLOWED_URI_REGEXP = + /^(?:https:|mailto:|#|data:image\/(?:png|jpe?g|gif|webp|svg\+xml);base64,)/i; + +// Must remain synchronous — DOMPurify hook state is module-global. +export function sanitize(html: string): { + output: string; + removedTags: number; + removedAttrs: number; +} { + let removedTags = 0; + let removedAttrs = 0; + + // Hooks are global per DOMPurify instance. Reset and re-register on each + // call so the counters start clean. The try/finally ensures hooks always + // clear even if sanitize() throws partway, leaving the instance pristine + // for the next caller. + DOMPurify.removeAllHooks(); + DOMPurify.addHook('uponSanitizeElement', (_node, data) => { + if (data.allowedTags[data.tagName] === false) removedTags++; + }); + DOMPurify.addHook('uponSanitizeAttribute', (_node, data) => { + if (!data.allowedAttributes[data.attrName]) removedAttrs++; + }); + + try { + const output = DOMPurify.sanitize(html, { + USE_PROFILES: { html: true, svg: true }, + //
hi
'; + expect(sanitize(input).output).toContain('

Hello, World

This is a Pagent HTML page.

"}' +``` + +Open both returned URLs in the browser. The A2UI URL renders an empty surface; the HTML URL renders the styled `Hello, World` page with the chrome bar above it. Stop `npm run dev` when verified. + +- [ ] **Step 5: Defer commit (Phase 3)** + +--- + +## Task 13: Update the skill prose + +**Files:** +- Modify: `skills/pagent/SKILL.md` + +Top of the file gets a "Picking a tool" section. The description front-matter is broadened to cover both tools. + +- [ ] **Step 1: Update the description front-matter** + +Replace lines 1–4 (the YAML front matter) with: + +```yaml +--- +name: pagent +description: Render UI in the user's browser. Two tools — show_ui for asking the user a question that needs a structured answer back (forms, pickers, confirmations, dashboards-as-input, multi-step wizards), and show_html for showing a rich visualization the user just looks at (reports, dashboards, charts, infographics, comparison tables, slides). Trigger on "show me", "ask me", "let me pick", "confirm before you", "give me a dashboard", "render a chart", "show a report" — anything that beats plain text in chat. Rule: if anything has to come back from the user, show_ui. If the user just looks, show_html. +--- +``` + +- [ ] **Step 2: Add the "Picking a tool" section** + +Insert a new section after line 4 (right after the front matter, before `# Showing UI to your user`): + +```markdown +# Showing UI to your user + +## Picking a tool + +Two tools, one rule: **`show_html` to show; `show_ui` to ask.** + +If anything has to come back from the user — a value, a selection, a click — use `show_ui`. If the user just looks at it — a report, a chart, a dashboard, a comparison — use `show_html`. When in doubt, ask yourself: "Do I need to read the user's response?" Yes → `show_ui`. No → `show_html`. + +Multi-step flows work the same way: each step is a separate page. Show a dashboard with `show_html`, then ask "what next?" with `show_ui`. Don't try to bundle visualization and input into one page. + +Only A2UI pages (created by `show_ui`) have a result. HTML pages (`show_html`) never transition out of `open` — do not poll `check_result` on them. + +## When to use show_ui +``` + +Then make the existing section header `## When to use this skill` become `## When to use show_ui` to match the new structure. The rest of the file is unchanged. + +- [ ] **Step 3: Add a "When to use show_html" section** + +After the `## Worked example: "What's your name?"` section (line ~64), insert: + +```markdown +## When to use show_html + +Reach for `show_html` whenever you want to show the user something rich and visual that text in chat can't do justice to: + +- "Show me a styled dashboard of the test results." +- "Render the last quarter's metrics as an infographic." +- "Build me a side-by-side comparison table of these three options, with logos." +- "Give me a one-page report on the open issues, grouped by severity." +- "Mock up a landing page for this idea." +- "Show me a chart of the response times." + +Indirect cues: "show me", "render", "make me a", "design a", "lay this out as". + +NOT for input: if the next message you'd send the user contains a question they need to answer, use `show_ui` instead. + +## How show_html works + +Call `show_html(html)` with a single string containing the page body. The string can be a full document (`…`) or a fragment (`
`) — the renderer wraps it in a sandboxed scaffold either way. The tool returns `{ page_id, url }`. **Print the URL** so the user can open it. Do NOT poll `check_result` on this page — HTML is one-way. + +Rules (enforced — violations are stripped or rejected server-side): + +- **No JavaScript.** ``, + - `
`, + - ``, + - ``, + - ``, + - `