From ea34df35decec041d3ee34cde8cf6c3bbb9456e2 Mon Sep 17 00:00:00 2001
From: scops <2014109+scops@users.noreply.github.com>
Date: Sun, 10 May 2026 21:20:50 +0200
Subject: [PATCH 1/2] refactor: extract allowlist to lib/ with ALLOWLIST_FILE
override
---
CHANGELOG.md | 4 ++
README.md | 28 ++++++++--
lib/allowlist.js | 59 +++++++++++++++++++++
server.js | 22 ++------
tests/allowlist.test.js | 114 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 204 insertions(+), 23 deletions(-)
create mode 100644 lib/allowlist.js
create mode 100644 tests/allowlist.test.js
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 719bb2f..629f435 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
## [Unreleased]
+### Added
+- `ALLOWLIST_FILE` env var: when set, the contents of the referenced JSON file replace the built-in `sanitize-html` configuration. Lets different consumers run with different policies without forking. Malformed input fails fast at startup.
+- `lib/allowlist.js` module exporting `DEFAULT_ALLOWLIST` (the previous hardcoded config) and `loadAllowlist({ path })` for tests and programmatic use.
+
## [2.2.0] - 2026-05-10
### Added
diff --git a/README.md b/README.md
index be39956..53e61f2 100644
--- a/README.md
+++ b/README.md
@@ -82,13 +82,29 @@ The image is built on `node:24-alpine`, runs as the unprivileged `node` user, an
## Configuration
-| Env var | Default | Description |
-|-------------|---------------|-------------|
-| `PORT` | `5001` | TCP port the HTTP server binds to. |
-| `LOG_LEVEL` | `info` | `pino` log level (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `silent`). Forced to `silent` under `NODE_ENV=test`. |
+| Env var | Default | Description |
+|------------------|---------|-------------|
+| `PORT` | `5001` | TCP port the HTTP server binds to. |
+| `LOG_LEVEL` | `info` | `pino` log level (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `silent`). Forced to `silent` under `NODE_ENV=test`. |
+| `ALLOWLIST_FILE` | _unset_ | Path to a JSON file with a custom `sanitize-html` configuration. When set, replaces the built-in allowlist wholesale. See [Customising the allowlist](#customising-the-allowlist). |
The JSON body limit is fixed at `256kb`. Markdown larger than that is rejected by Express with a `413` before reaching the handler. Adjust `express.json({ limit: ... })` in `server.js` if you need more.
+### Customising the allowlist
+
+Set `ALLOWLIST_FILE` to a JSON file whose contents are passed straight to `sanitize-html`. Useful when different consumers need different policies (e.g. a strict subset for user-generated content, a relaxed superset for trusted authoring tools).
+
+```json
+{
+ "allowedTags": ["p", "em", "strong", "a"],
+ "allowedAttributes": { "a": ["href"] },
+ "allowedSchemes": ["https"],
+ "disallowedTagsMode": "discard"
+}
+```
+
+The file is loaded once at startup. Malformed JSON, a missing file, or a non-array `allowedTags` causes the process to exit immediately rather than silently fall back. The default allowlist lives in [`lib/allowlist.js`](lib/allowlist.js) and is exported as `DEFAULT_ALLOWLIST` for reference.
+
### Logging and request correlation
Every request is logged as a single JSON line on stdout via [`pino-http`](https://github.com/pinojs/pino-http). Each request is tagged with an id surfaced in the `x-request-id` response header and included in every log line. If the caller sends an `x-request-id` header that matches `^[a-zA-Z0-9_.-]{1,128}$`, the service reuses it; otherwise a fresh UUID is generated. Use this id to correlate a client trace with the server log for a given request.
@@ -108,12 +124,14 @@ Every request is logged as a single JSON line on stdout via [`pino-http`](https:
## Project layout
```
-server.js Express app + /validate, /health and /openapi.json handlers. Single source of truth.
+server.js Express app + /validate, /health and /openapi.json handlers.
+lib/allowlist.js Default sanitize-html allowlist and ALLOWLIST_FILE loader.
openapi.json OpenAPI 3.1 contract served by /openapi.json.
tests/validation.test.js Jest + Supertest suite covering happy path and rejection cases.
tests/fuzzing.test.js Property-based tests (fast-check) for sanitizer invariants.
tests/request-id.test.js Coverage for the x-request-id middleware.
tests/openapi.test.js Coverage for the OpenAPI endpoint and contract.
+tests/allowlist.test.js Unit + integration coverage for the allowlist loader.
Dockerfile, .dockerignore Container build.
```
diff --git a/lib/allowlist.js b/lib/allowlist.js
new file mode 100644
index 0000000..41e5f22
--- /dev/null
+++ b/lib/allowlist.js
@@ -0,0 +1,59 @@
+const fs = require('node:fs');
+
+const DEFAULT_ALLOWLIST = Object.freeze({
+ allowedTags: Object.freeze([
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote', 'ul', 'ol', 'li', 'br', 'hr',
+ 'strong', 'em', 'u', 's', 'b', 'i', 'mark', 'sub', 'sup',
+ 'pre', 'code', 'kbd', 'samp',
+ 'table', 'thead', 'tbody', 'tr', 'td', 'th',
+ 'a', 'img',
+ 'dl', 'dt', 'dd',
+ ]),
+ allowedAttributes: Object.freeze({
+ a: Object.freeze(['href', 'title', 'target']),
+ img: Object.freeze(['src', 'alt', 'width', 'height']),
+ code: Object.freeze(['class']),
+ }),
+ allowedSchemes: Object.freeze(['http', 'https', 'mailto']),
+ disallowedTagsMode: 'discard',
+});
+
+function loadAllowlist({ path = process.env.ALLOWLIST_FILE } = {}) {
+ if (!path) return DEFAULT_ALLOWLIST;
+
+ let raw;
+ try {
+ raw = fs.readFileSync(path, 'utf8');
+ } catch (err) {
+ throw new Error(`ALLOWLIST_FILE: cannot read "${path}": ${err.message}`);
+ }
+
+ let parsed;
+ try {
+ parsed = JSON.parse(raw);
+ } catch (err) {
+ throw new Error(`ALLOWLIST_FILE: invalid JSON in "${path}": ${err.message}`);
+ }
+
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+ throw new Error('ALLOWLIST_FILE: top-level must be a JSON object');
+ }
+ if (!Array.isArray(parsed.allowedTags)) {
+ throw new Error('ALLOWLIST_FILE: "allowedTags" must be an array');
+ }
+ if (
+ parsed.allowedAttributes !== undefined &&
+ (typeof parsed.allowedAttributes !== 'object' ||
+ Array.isArray(parsed.allowedAttributes) ||
+ parsed.allowedAttributes === null)
+ ) {
+ throw new Error('ALLOWLIST_FILE: "allowedAttributes" must be an object');
+ }
+ if (parsed.allowedSchemes !== undefined && !Array.isArray(parsed.allowedSchemes)) {
+ throw new Error('ALLOWLIST_FILE: "allowedSchemes" must be an array');
+ }
+
+ return parsed;
+}
+
+module.exports = { DEFAULT_ALLOWLIST, loadAllowlist };
diff --git a/server.js b/server.js
index e1a19e0..f9fd3ed 100644
--- a/server.js
+++ b/server.js
@@ -5,6 +5,9 @@ const pino = require('pino');
const pinoHttp = require('pino-http');
const Ajv = require('ajv');
const openapi = require('./openapi.json');
+const { loadAllowlist } = require('./lib/allowlist');
+
+const allowlist = loadAllowlist();
const ajv = new Ajv({ strict: false });
const validateRequest = ajv.compile(openapi.components.schemas.ValidateRequest);
@@ -43,24 +46,7 @@ app.use((req, res, next) => {
app.use(express.json({ limit: '256kb' }));
const validateBody = (body) => {
- const sanitized = sanitizeHtml(body, {
- allowedTags: [
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote', 'ul', 'ol', 'li', 'br', 'hr',
- 'strong', 'em', 'u', 's', 'b', 'i', 'mark', 'sub', 'sup',
- 'pre', 'code', 'kbd', 'samp',
- 'table', 'thead', 'tbody', 'tr', 'td', 'th',
- 'a', 'img',
- 'dl', 'dt', 'dd'
- ],
- allowedAttributes: {
- 'a': ['href', 'title', 'target'],
- 'img': ['src', 'alt', 'width', 'height'],
- 'code': ['class']
- },
- allowedSchemes: ['http', 'https', 'mailto'],
- disallowedTagsMode: 'discard'
- });
-
+ const sanitized = sanitizeHtml(body, allowlist);
return { safe: body.trim() === sanitized.trim(), sanitized };
};
diff --git a/tests/allowlist.test.js b/tests/allowlist.test.js
new file mode 100644
index 0000000..bf33e83
--- /dev/null
+++ b/tests/allowlist.test.js
@@ -0,0 +1,114 @@
+const fs = require("node:fs");
+const os = require("node:os");
+const path = require("node:path");
+const request = require("supertest");
+const { loadAllowlist, DEFAULT_ALLOWLIST } = require("../lib/allowlist");
+
+const writeFixture = (name, content) => {
+ const file = path.join(os.tmpdir(), `${name}-${Date.now()}-${Math.random().toString(36).slice(2)}.json`);
+ fs.writeFileSync(file, typeof content === "string" ? content : JSON.stringify(content));
+ return file;
+};
+
+describe("loadAllowlist", () => {
+ it("returns the default allowlist when no path is provided", () => {
+ expect(loadAllowlist({ path: undefined })).toBe(DEFAULT_ALLOWLIST);
+ expect(DEFAULT_ALLOWLIST.allowedTags).toContain("p");
+ expect(DEFAULT_ALLOWLIST.allowedTags).not.toContain("script");
+ });
+
+ it("reads a JSON file when a path is provided", () => {
+ const file = writeFixture("allowlist", {
+ allowedTags: ["p", "em"],
+ allowedAttributes: {},
+ allowedSchemes: ["http"],
+ disallowedTagsMode: "escape",
+ });
+
+ try {
+ const loaded = loadAllowlist({ path: file });
+ expect(loaded.allowedTags).toEqual(["p", "em"]);
+ expect(loaded.disallowedTagsMode).toBe("escape");
+ } finally {
+ fs.unlinkSync(file);
+ }
+ });
+
+ it("throws when the file cannot be read", () => {
+ expect(() =>
+ loadAllowlist({ path: "/definitely/does/not/exist.json" })
+ ).toThrow(/cannot read/);
+ });
+
+ it("throws on malformed JSON", () => {
+ const file = writeFixture("bad", "not json");
+ try {
+ expect(() => loadAllowlist({ path: file })).toThrow(/invalid JSON/);
+ } finally {
+ fs.unlinkSync(file);
+ }
+ });
+
+ it("throws when allowedTags is not an array", () => {
+ const file = writeFixture("wrong-tags", { allowedTags: "not an array" });
+ try {
+ expect(() => loadAllowlist({ path: file })).toThrow(
+ /allowedTags.*array/
+ );
+ } finally {
+ fs.unlinkSync(file);
+ }
+ });
+
+ it("throws when the top-level is not an object", () => {
+ const file = writeFixture("array-top", ["p", "em"]);
+ try {
+ expect(() => loadAllowlist({ path: file })).toThrow(
+ /top-level must be a JSON object/
+ );
+ } finally {
+ fs.unlinkSync(file);
+ }
+ });
+});
+
+describe("ALLOWLIST_FILE integration", () => {
+ let originalEnv;
+
+ beforeEach(() => {
+ originalEnv = process.env.ALLOWLIST_FILE;
+ });
+
+ afterEach(() => {
+ if (originalEnv === undefined) delete process.env.ALLOWLIST_FILE;
+ else process.env.ALLOWLIST_FILE = originalEnv;
+ jest.resetModules();
+ });
+
+ it("a custom allowlist relaxes sanitization when set via env", async () => {
+ const file = writeFixture("relaxed", {
+ allowedTags: ["iframe"],
+ allowedAttributes: { iframe: ["src"] },
+ allowedSchemes: ["https"],
+ disallowedTagsMode: "discard",
+ });
+ process.env.ALLOWLIST_FILE = file;
+
+ let app;
+ jest.isolateModules(() => {
+ app = require("../server");
+ });
+
+ try {
+ const res = await request(app)
+ .post("/validate")
+ .send({ markdown: '' })
+ .set("Content-Type", "application/json");
+
+ expect(res.body.safe).toBe(true);
+ expect(res.body.sanitized).toContain("