diff --git a/CHANGELOG.md b/CHANGELOG.md index 719bb2f..629f435 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +### Added +- `ALLOWLIST_FILE` env var: when set, the contents of the referenced JSON file replace the built-in `sanitize-html` configuration. Lets different consumers run with different policies without forking. Malformed input fails fast at startup. +- `lib/allowlist.js` module exporting `DEFAULT_ALLOWLIST` (the previous hardcoded config) and `loadAllowlist({ path })` for tests and programmatic use. + ## [2.2.0] - 2026-05-10 ### Added diff --git a/README.md b/README.md index be39956..53e61f2 100644 --- a/README.md +++ b/README.md @@ -82,13 +82,29 @@ The image is built on `node:24-alpine`, runs as the unprivileged `node` user, an ## Configuration -| Env var | Default | Description | -|-------------|---------------|-------------| -| `PORT` | `5001` | TCP port the HTTP server binds to. | -| `LOG_LEVEL` | `info` | `pino` log level (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `silent`). Forced to `silent` under `NODE_ENV=test`. | +| Env var | Default | Description | +|------------------|---------|-------------| +| `PORT` | `5001` | TCP port the HTTP server binds to. | +| `LOG_LEVEL` | `info` | `pino` log level (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `silent`). Forced to `silent` under `NODE_ENV=test`. | +| `ALLOWLIST_FILE` | _unset_ | Path to a JSON file with a custom `sanitize-html` configuration. When set, replaces the built-in allowlist wholesale. See [Customising the allowlist](#customising-the-allowlist). | The JSON body limit is fixed at `256kb`. Markdown larger than that is rejected by Express with a `413` before reaching the handler. Adjust `express.json({ limit: ... })` in `server.js` if you need more. +### Customising the allowlist + +Set `ALLOWLIST_FILE` to a JSON file whose contents are passed straight to `sanitize-html`. Useful when different consumers need different policies (e.g. a strict subset for user-generated content, a relaxed superset for trusted authoring tools). + +```json +{ + "allowedTags": ["p", "em", "strong", "a"], + "allowedAttributes": { "a": ["href"] }, + "allowedSchemes": ["https"], + "disallowedTagsMode": "discard" +} +``` + +The file is loaded once at startup. Malformed JSON, a missing file, or a non-array `allowedTags` causes the process to exit immediately rather than silently fall back. The default allowlist lives in [`lib/allowlist.js`](lib/allowlist.js) and is exported as `DEFAULT_ALLOWLIST` for reference. + ### Logging and request correlation Every request is logged as a single JSON line on stdout via [`pino-http`](https://github.com/pinojs/pino-http). Each request is tagged with an id surfaced in the `x-request-id` response header and included in every log line. If the caller sends an `x-request-id` header that matches `^[a-zA-Z0-9_.-]{1,128}$`, the service reuses it; otherwise a fresh UUID is generated. Use this id to correlate a client trace with the server log for a given request. @@ -108,12 +124,14 @@ Every request is logged as a single JSON line on stdout via [`pino-http`](https: ## Project layout ``` -server.js Express app + /validate, /health and /openapi.json handlers. Single source of truth. +server.js Express app + /validate, /health and /openapi.json handlers. +lib/allowlist.js Default sanitize-html allowlist and ALLOWLIST_FILE loader. openapi.json OpenAPI 3.1 contract served by /openapi.json. tests/validation.test.js Jest + Supertest suite covering happy path and rejection cases. tests/fuzzing.test.js Property-based tests (fast-check) for sanitizer invariants. tests/request-id.test.js Coverage for the x-request-id middleware. tests/openapi.test.js Coverage for the OpenAPI endpoint and contract. +tests/allowlist.test.js Unit + integration coverage for the allowlist loader. Dockerfile, .dockerignore Container build. ``` diff --git a/lib/allowlist.js b/lib/allowlist.js new file mode 100644 index 0000000..41e5f22 --- /dev/null +++ b/lib/allowlist.js @@ -0,0 +1,59 @@ +const fs = require('node:fs'); + +const DEFAULT_ALLOWLIST = Object.freeze({ + allowedTags: Object.freeze([ + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote', 'ul', 'ol', 'li', 'br', 'hr', + 'strong', 'em', 'u', 's', 'b', 'i', 'mark', 'sub', 'sup', + 'pre', 'code', 'kbd', 'samp', + 'table', 'thead', 'tbody', 'tr', 'td', 'th', + 'a', 'img', + 'dl', 'dt', 'dd', + ]), + allowedAttributes: Object.freeze({ + a: Object.freeze(['href', 'title', 'target']), + img: Object.freeze(['src', 'alt', 'width', 'height']), + code: Object.freeze(['class']), + }), + allowedSchemes: Object.freeze(['http', 'https', 'mailto']), + disallowedTagsMode: 'discard', +}); + +function loadAllowlist({ path = process.env.ALLOWLIST_FILE } = {}) { + if (!path) return DEFAULT_ALLOWLIST; + + let raw; + try { + raw = fs.readFileSync(path, 'utf8'); + } catch (err) { + throw new Error(`ALLOWLIST_FILE: cannot read "${path}": ${err.message}`); + } + + let parsed; + try { + parsed = JSON.parse(raw); + } catch (err) { + throw new Error(`ALLOWLIST_FILE: invalid JSON in "${path}": ${err.message}`); + } + + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error('ALLOWLIST_FILE: top-level must be a JSON object'); + } + if (!Array.isArray(parsed.allowedTags)) { + throw new Error('ALLOWLIST_FILE: "allowedTags" must be an array'); + } + if ( + parsed.allowedAttributes !== undefined && + (typeof parsed.allowedAttributes !== 'object' || + Array.isArray(parsed.allowedAttributes) || + parsed.allowedAttributes === null) + ) { + throw new Error('ALLOWLIST_FILE: "allowedAttributes" must be an object'); + } + if (parsed.allowedSchemes !== undefined && !Array.isArray(parsed.allowedSchemes)) { + throw new Error('ALLOWLIST_FILE: "allowedSchemes" must be an array'); + } + + return parsed; +} + +module.exports = { DEFAULT_ALLOWLIST, loadAllowlist }; diff --git a/server.js b/server.js index e1a19e0..f9fd3ed 100644 --- a/server.js +++ b/server.js @@ -5,6 +5,9 @@ const pino = require('pino'); const pinoHttp = require('pino-http'); const Ajv = require('ajv'); const openapi = require('./openapi.json'); +const { loadAllowlist } = require('./lib/allowlist'); + +const allowlist = loadAllowlist(); const ajv = new Ajv({ strict: false }); const validateRequest = ajv.compile(openapi.components.schemas.ValidateRequest); @@ -43,24 +46,7 @@ app.use((req, res, next) => { app.use(express.json({ limit: '256kb' })); const validateBody = (body) => { - const sanitized = sanitizeHtml(body, { - allowedTags: [ - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote', 'ul', 'ol', 'li', 'br', 'hr', - 'strong', 'em', 'u', 's', 'b', 'i', 'mark', 'sub', 'sup', - 'pre', 'code', 'kbd', 'samp', - 'table', 'thead', 'tbody', 'tr', 'td', 'th', - 'a', 'img', - 'dl', 'dt', 'dd' - ], - allowedAttributes: { - 'a': ['href', 'title', 'target'], - 'img': ['src', 'alt', 'width', 'height'], - 'code': ['class'] - }, - allowedSchemes: ['http', 'https', 'mailto'], - disallowedTagsMode: 'discard' - }); - + const sanitized = sanitizeHtml(body, allowlist); return { safe: body.trim() === sanitized.trim(), sanitized }; }; diff --git a/tests/allowlist.test.js b/tests/allowlist.test.js new file mode 100644 index 0000000..41e32c1 --- /dev/null +++ b/tests/allowlist.test.js @@ -0,0 +1,119 @@ +const fs = require("node:fs"); +const os = require("node:os"); +const path = require("node:path"); +const request = require("supertest"); +const { loadAllowlist, DEFAULT_ALLOWLIST } = require("../lib/allowlist"); + +const writeFixture = (name, content) => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "allowlist-test-")); + const file = path.join(dir, `${name}.json`); + fs.writeFileSync(file, typeof content === "string" ? content : JSON.stringify(content)); + return file; +}; + +const removeFixture = (file) => { + fs.rmSync(path.dirname(file), { recursive: true, force: true }); +}; + +describe("loadAllowlist", () => { + it("returns the default allowlist when no path is provided", () => { + expect(loadAllowlist({ path: undefined })).toBe(DEFAULT_ALLOWLIST); + expect(DEFAULT_ALLOWLIST.allowedTags).toContain("p"); + expect(DEFAULT_ALLOWLIST.allowedTags).not.toContain("script"); + }); + + it("reads a JSON file when a path is provided", () => { + const file = writeFixture("allowlist", { + allowedTags: ["p", "em"], + allowedAttributes: {}, + allowedSchemes: ["http"], + disallowedTagsMode: "escape", + }); + + try { + const loaded = loadAllowlist({ path: file }); + expect(loaded.allowedTags).toEqual(["p", "em"]); + expect(loaded.disallowedTagsMode).toBe("escape"); + } finally { + removeFixture(file); + } + }); + + it("throws when the file cannot be read", () => { + expect(() => + loadAllowlist({ path: "/definitely/does/not/exist.json" }) + ).toThrow(/cannot read/); + }); + + it("throws on malformed JSON", () => { + const file = writeFixture("bad", "not json"); + try { + expect(() => loadAllowlist({ path: file })).toThrow(/invalid JSON/); + } finally { + removeFixture(file); + } + }); + + it("throws when allowedTags is not an array", () => { + const file = writeFixture("wrong-tags", { allowedTags: "not an array" }); + try { + expect(() => loadAllowlist({ path: file })).toThrow( + /allowedTags.*array/ + ); + } finally { + removeFixture(file); + } + }); + + it("throws when the top-level is not an object", () => { + const file = writeFixture("array-top", ["p", "em"]); + try { + expect(() => loadAllowlist({ path: file })).toThrow( + /top-level must be a JSON object/ + ); + } finally { + removeFixture(file); + } + }); +}); + +describe("ALLOWLIST_FILE integration", () => { + let originalEnv; + + beforeEach(() => { + originalEnv = process.env.ALLOWLIST_FILE; + }); + + afterEach(() => { + if (originalEnv === undefined) delete process.env.ALLOWLIST_FILE; + else process.env.ALLOWLIST_FILE = originalEnv; + jest.resetModules(); + }); + + it("a custom allowlist relaxes sanitization when set via env", async () => { + const file = writeFixture("relaxed", { + allowedTags: ["iframe"], + allowedAttributes: { iframe: ["src"] }, + allowedSchemes: ["https"], + disallowedTagsMode: "discard", + }); + process.env.ALLOWLIST_FILE = file; + + let app; + jest.isolateModules(() => { + app = require("../server"); + }); + + try { + const res = await request(app) + .post("/validate") + .send({ markdown: '' }) + .set("Content-Type", "application/json"); + + expect(res.body.safe).toBe(true); + expect(res.body.sanitized).toContain("