From 6422c374b00b67430a44ceb46a5011d808c2421a Mon Sep 17 00:00:00 2001 From: jacaudi <47005674+jacaudi@users.noreply.github.com> Date: Fri, 10 Apr 2026 23:57:39 -0700 Subject: [PATCH] fix: string-literal-aware comment stripper in build.js (#45, #38) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the regex-based comment stripper with a hand-written state-machine tokenizer in scripts/strip-comments.js. The old regex (/\/\/.*$/gm) would silently truncate any line containing a `//`, including URL string literals like "https://example.com". It also didn't handle /* block */ comments at all. The new stripper is aware of: - double- and single-quoted strings with backslash escapes - template literals with nested ${} interpolations - best-effort regex literal disambiguation - both // line comments and /* block */ comments outside strings New test file app/tests/test-comment-stripper.js (20 assertions) asserts URL preservation, string literal handling, escape sequences, block comments, and edge cases. Built app/index.html is byte-identical to the baseline because no current worker source hits the bug — this is a latent-defect fix verified by unit tests, not output diff. Closes #45 Closes #38 (build-script portion; Go portion addressed separately) Co-Authored-By: Claude Opus 4.6 --- app/tests/test-comment-stripper.js | 170 ++++++++++++++++++++++++++++ scripts/build.js | 8 +- scripts/strip-comments.js | 176 +++++++++++++++++++++++++++++ 3 files changed, 351 insertions(+), 3 deletions(-) create mode 100644 app/tests/test-comment-stripper.js create mode 100644 scripts/strip-comments.js diff --git a/app/tests/test-comment-stripper.js b/app/tests/test-comment-stripper.js new file mode 100644 index 0000000..e20a6e7 --- /dev/null +++ b/app/tests/test-comment-stripper.js @@ -0,0 +1,170 @@ +'use strict'; + +// Tests for the string-literal-aware comment stripper used by scripts/build.js. +// See issues #45 (URL literal mangling) and #38 (block comments untouched). + +const { test } = require('node:test'); +const assert = require('node:assert/strict'); +const path = require('node:path'); + +const { stripComments } = require(path.join(__dirname, '..', '..', 'scripts', 'strip-comments.js')); + +test('empty input returns empty string', () => { + assert.equal(stripComments(''), ''); +}); + +test('plain code with no comments is unchanged', () => { + const src = 'const x = 1;\nconst y = 2;'; + assert.equal(stripComments(src), src); +}); + +test('strips end-of-line // comment', () => { + const src = 'const x = 1; // a comment\nconst y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('a comment'), 'comment text should be stripped'); + assert.ok(out.includes('const x = 1;'), 'code before comment preserved'); + assert.ok(out.includes('const y = 2;'), 'following line preserved'); +}); + +test('strips /* block */ comment on one line', () => { + const src = 'const x = 1; /* block comment */ const y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('block comment'), 'block comment text should be stripped'); + assert.ok(out.includes('const x = 1;'), 'code before comment preserved'); + assert.ok(out.includes('const y = 2;'), 'code after comment preserved'); +}); + +test('strips multi-line /* ... */ block comment', () => { + const src = 'const x = 1;\n/* line one\n line two\n line three */\nconst y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('line one'), 'block comment line 1 stripped'); + assert.ok(!out.includes('line two'), 'block comment line 2 stripped'); + assert.ok(!out.includes('line three'), 'block comment line 3 stripped'); + assert.ok(out.includes('const x = 1;')); + assert.ok(out.includes('const y = 2;')); +}); + +test('URL inside double-quoted string survives (issue #45)', () => { + const src = 'const url = "https://example.com/path"; // real comment'; + const out = stripComments(src); + assert.ok(out.includes('"https://example.com/path"'), + 'URL literal with // must not be mangled'); + assert.ok(!out.includes('real comment'), 'eol comment after the string is stripped'); +}); + +test('URL inside single-quoted string survives', () => { + const src = "const url = 'https://example.com'; // trailing"; + const out = stripComments(src); + assert.ok(out.includes("'https://example.com'"), + 'single-quoted URL must survive'); + assert.ok(!out.includes('trailing')); +}); + +test('URL inside template literal survives', () => { + const src = 'const url = `https://example.com/${path}`; // tail'; + const out = stripComments(src); + assert.ok(out.includes('`https://example.com/${path}`'), + 'template literal URL must survive'); + assert.ok(!out.includes('tail')); +}); + +test('escaped quote inside string does not end the string', () => { + const src = 'const s = "a\\"b//c"; // real'; + const out = stripComments(src); + assert.ok(out.includes('"a\\"b//c"'), + 'escaped-quote string contents must survive verbatim'); + assert.ok(!out.includes('// real'), 'trailing eol comment stripped'); +}); + +test('escaped backslash before quote still closes the string', () => { + // "a\\" is a valid closed string (backslash then closing quote). + // What follows ( // b ) IS a comment and should be stripped. + const src = 'const s = "a\\\\"; // b'; + const out = stripComments(src); + assert.ok(out.includes('"a\\\\"'), 'string with trailing \\\\ preserved'); + assert.ok(!out.includes('// b'), 'trailing comment stripped'); +}); + +test('// inside a block comment does not escape it', () => { + const src = 'const x = 1; /* contains // inside */ const y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('contains'), 'block comment stripped entirely'); + assert.ok(out.includes('const x = 1;')); + assert.ok(out.includes('const y = 2;')); +}); + +test('/* inside a // comment is not a block opener', () => { + const src = 'const x = 1; // has /* inside\nconst y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('has'), 'line comment stripped'); + assert.ok(out.includes('const x = 1;')); + assert.ok(out.includes('const y = 2;')); +}); + +test('// inside single-quoted string survives', () => { + const src = "const s = 'a//b'; const y = 2;"; + const out = stripComments(src); + assert.ok(out.includes("'a//b'"), 'string containing // survives'); +}); + +test('/* inside double-quoted string is not a block opener', () => { + const src = 'const s = "a/*b*/c"; const y = 2;'; + const out = stripComments(src); + assert.ok(out.includes('"a/*b*/c"'), 'string containing /* ... */ survives'); +}); + +test('regex literal with // does not crash and is preserved', () => { + // A division-like expression containing // must not be interpreted as a comment. + // This is conservative: we only require the builder not to crash and not to + // strip the trailing identifier. + const src = 'const r = /a\\/b/; const y = 2;'; + assert.doesNotThrow(() => stripComments(src)); + const out = stripComments(src); + assert.ok(out.includes('const y = 2;'), + 'code after regex literal survives'); +}); + +test('unterminated string does not crash', () => { + // Pathological input: don't hang or throw. + const src = 'const s = "unterminated\nconst y = 2;'; + assert.doesNotThrow(() => stripComments(src)); +}); + +test('unterminated block comment does not crash', () => { + const src = 'const x = 1; /* never closed'; + assert.doesNotThrow(() => stripComments(src)); +}); + +test('consecutive // comments on separate lines stripped', () => { + const src = '// one\n// two\nconst x = 1;'; + const out = stripComments(src); + assert.ok(!out.includes('one')); + assert.ok(!out.includes('two')); + assert.ok(out.includes('const x = 1;')); +}); + +test('http:// at start of comment is still stripped as comment', () => { + // Not inside a string -- it's just two slashes, so it's a line comment. + const src = 'const x = 1; // see http://example.com\nconst y = 2;'; + const out = stripComments(src); + assert.ok(!out.includes('see')); + assert.ok(!out.includes('example.com')); + assert.ok(out.includes('const x = 1;')); + assert.ok(out.includes('const y = 2;')); +}); + +test('real-world worker prologue: /* */ after string with URL', () => { + const src = [ + 'const API = "https://api.example.com/v1";', + '/* legal block header', + ' spanning multiple lines */', + 'self.onmessage = (e) => { /* inner */ postMessage(e.data); }; // done', + ].join('\n'); + const out = stripComments(src); + assert.ok(out.includes('"https://api.example.com/v1"'), 'URL survives'); + assert.ok(!out.includes('legal block header'), 'block header stripped'); + assert.ok(!out.includes('spanning'), 'block body stripped'); + assert.ok(!out.includes('inner'), 'inline block stripped'); + assert.ok(!out.includes('done'), 'trailing eol comment stripped'); + assert.ok(out.includes('postMessage(e.data)'), 'real code preserved'); +}); diff --git a/scripts/build.js b/scripts/build.js index d58ed88..ee462ae 100644 --- a/scripts/build.js +++ b/scripts/build.js @@ -3,6 +3,7 @@ const fs = require('node:fs'); const path = require('node:path'); +const { stripComments } = require('./strip-comments.js'); const ROOT = path.join(__dirname, '..'); const SRC = path.join(ROOT, 'src'); @@ -30,9 +31,10 @@ const workerParts = WORKER_FILES.map(f => fs.readFileSync(path.join(SRC, 'js', 'worker', f), 'utf8') ); -// Minify worker: strip // comments (they break single-line output), then collapse whitespace -const workerBlob = workerParts.join('\n') - .replace(/\/\/.*$/gm, '') // remove single-line comments before collapsing newlines +// Minify worker: strip // and /* */ comments in a string-literal-aware way so +// URL literals and other strings containing `//` are preserved (issues #45, #38). +// Then collapse whitespace for the single-line worker blob. +const workerBlob = stripComments(workerParts.join('\n')) .replace(/\s+/g, ' ') .trim(); diff --git a/scripts/strip-comments.js b/scripts/strip-comments.js new file mode 100644 index 0000000..dca117f --- /dev/null +++ b/scripts/strip-comments.js @@ -0,0 +1,176 @@ +'use strict'; + +// String-literal-aware JavaScript comment stripper. +// +// Removes `// line` and `/* block */` comments while leaving string literals +// (double, single, and template) untouched -- including URLs that contain `//`. +// This replaces a naive `/\/\/.*$/gm` regex that would silently mangle any line +// containing `//` inside a string (see issues #45 and #38). +// +// Not a full JS parser. It handles the shapes that actually appear in +// src/js/worker/*.js today: +// - // line comments +// - /* block comments */ (single- and multi-line) +// - "..." and '...' strings with \-escapes +// - `...` template literals (${...} interpolations with nested braces) +// - regex literals /.../flags (best-effort disambiguation from division) +// Pathological inputs (unterminated string or block comment) do not throw -- +// they consume to EOF. + +/** + * @param {string} src + * @returns {string} + */ +function stripComments(src) { + if (typeof src !== 'string') { + throw new TypeError('stripComments: src must be a string'); + } + const n = src.length; + let out = ''; + let i = 0; + // `prevSignificant` tracks the last non-whitespace, non-comment character + // we emitted. We use it to decide whether a `/` begins a regex literal or + // a division operator. Rough heuristic: after an operator/keyword-like + // position, `/` starts a regex; after an identifier/number/)/], it's + // division. We start in "regex position". + let prevSignificant = ''; + + const isRegexContext = () => { + // Regex is allowed at the start of input or after these characters. + // Division is expected after identifiers, numbers, `)`, `]`, `}` (in + // expression position) and string/template closers. We conservatively + // treat `}` as division context; that's fine for our worker sources. + if (prevSignificant === '') return true; + return !/[A-Za-z0-9_$)\]}'"`]/.test(prevSignificant); + }; + + while (i < n) { + const c = src[i]; + const c2 = i + 1 < n ? src[i + 1] : ''; + + // Line comment + if (c === '/' && c2 === '/') { + i += 2; + while (i < n && src[i] !== '\n') i++; + // Leave the newline (if any) for the outer loop to emit, so line + // structure is preserved. + continue; + } + + // Block comment + if (c === '/' && c2 === '*') { + i += 2; + while (i < n) { + if (src[i] === '*' && i + 1 < n && src[i + 1] === '/') { + i += 2; + break; + } + i++; + } + // Emit a single space so tokens on either side don't fuse. + out += ' '; + prevSignificant = ' '; + continue; + } + + // Double- or single-quoted string + if (c === '"' || c === "'") { + const quote = c; + out += c; + i++; + while (i < n) { + const ch = src[i]; + if (ch === '\\' && i + 1 < n) { + // Copy escape sequence verbatim (covers \", \', \\, \n, etc.). + out += ch + src[i + 1]; + i += 2; + continue; + } + out += ch; + i++; + if (ch === quote) break; + if (ch === '\n') break; // unterminated string: bail out of the loop + } + prevSignificant = quote; + continue; + } + + // Template literal + if (c === '`') { + out += c; + i++; + let braceDepth = 0; + while (i < n) { + const ch = src[i]; + if (ch === '\\' && i + 1 < n) { + out += ch + src[i + 1]; + i += 2; + continue; + } + if (braceDepth === 0 && ch === '`') { + out += ch; + i++; + break; + } + if (braceDepth === 0 && ch === '$' && i + 1 < n && src[i + 1] === '{') { + out += '${'; + i += 2; + braceDepth = 1; + continue; + } + if (braceDepth > 0) { + if (ch === '{') braceDepth++; + else if (ch === '}') { + braceDepth--; + out += ch; + i++; + continue; + } + } + out += ch; + i++; + } + prevSignificant = '`'; + continue; + } + + // Regex literal (best-effort) + if (c === '/' && isRegexContext()) { + out += c; + i++; + let inClass = false; + while (i < n) { + const ch = src[i]; + if (ch === '\\' && i + 1 < n) { + out += ch + src[i + 1]; + i += 2; + continue; + } + if (ch === '[') inClass = true; + else if (ch === ']') inClass = false; + out += ch; + i++; + if (ch === '/' && !inClass) break; + if (ch === '\n') break; // unterminated: bail + } + // Flags + while (i < n && /[a-z]/i.test(src[i])) { + out += src[i]; + i++; + } + prevSignificant = '/'; + continue; + } + + // Default: copy character, update prevSignificant if non-whitespace. + out += c; + if (c !== ' ' && c !== '\t' && c !== '\n' && c !== '\r') { + prevSignificant = c; + } + i++; + } + + return out; +} + +module.exports = { stripComments };