From 6422c374b00b67430a44ceb46a5011d808c2421a Mon Sep 17 00:00:00 2001
From: jacaudi <47005674+jacaudi@users.noreply.github.com>
Date: Fri, 10 Apr 2026 23:57:39 -0700
Subject: [PATCH] fix: string-literal-aware comment stripper in build.js (#45,
 #38)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the regex-based comment stripper with a hand-written
state-machine tokenizer in scripts/strip-comments.js. The old regex
(/\/\/.*$/gm) would silently truncate any line containing a `//`,
including URL string literals like "https://example.com". It also
didn't handle /* block */ comments at all.

The new stripper is aware of:
- double- and single-quoted strings with backslash escapes
- template literals with nested ${} interpolations
- best-effort regex literal disambiguation
- both // line comments and /* block */ comments outside strings

New test file app/tests/test-comment-stripper.js (20 assertions) asserts
URL preservation, string literal handling, escape sequences, block
comments, and edge cases. Built app/index.html is byte-identical to
the baseline because no current worker source hits the bug — this
is a latent-defect fix verified by unit tests, not output diff.

Closes #45
Closes #38 (build-script portion; Go portion addressed separately)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 app/tests/test-comment-stripper.js | 170 ++++++++++++++++++++++++++++
 scripts/build.js                   |   8 +-
 scripts/strip-comments.js          | 176 +++++++++++++++++++++++++++++
 3 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 app/tests/test-comment-stripper.js
 create mode 100644 scripts/strip-comments.js

diff --git a/app/tests/test-comment-stripper.js b/app/tests/test-comment-stripper.js
new file mode 100644
index 0000000..e20a6e7
--- /dev/null
+++ b/app/tests/test-comment-stripper.js
@@ -0,0 +1,170 @@
+'use strict';
+
+// Tests for the string-literal-aware comment stripper used by scripts/build.js.
+// See issues #45 (URL literal mangling) and #38 (block comments untouched).
+
+const { test } = require('node:test');
+const assert = require('node:assert/strict');
+const path = require('node:path');
+
+const { stripComments } = require(path.join(__dirname, '..', '..', 'scripts', 'strip-comments.js'));
+
+test('empty input returns empty string', () => {
+    assert.equal(stripComments(''), '');
+});
+
+test('plain code with no comments is unchanged', () => {
+    const src = 'const x = 1;\nconst y = 2;';
+    assert.equal(stripComments(src), src);
+});
+
+test('strips end-of-line // comment', () => {
+    const src = 'const x = 1; // a comment\nconst y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('a comment'), 'comment text should be stripped');
+    assert.ok(out.includes('const x = 1;'), 'code before comment preserved');
+    assert.ok(out.includes('const y = 2;'), 'following line preserved');
+});
+
+test('strips /* block */ comment on one line', () => {
+    const src = 'const x = 1; /* block comment */ const y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('block comment'), 'block comment text should be stripped');
+    assert.ok(out.includes('const x = 1;'), 'code before comment preserved');
+    assert.ok(out.includes('const y = 2;'), 'code after comment preserved');
+});
+
+test('strips multi-line /* ... */ block comment', () => {
+    const src = 'const x = 1;\n/* line one\n   line two\n   line three */\nconst y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('line one'), 'block comment line 1 stripped');
+    assert.ok(!out.includes('line two'), 'block comment line 2 stripped');
+    assert.ok(!out.includes('line three'), 'block comment line 3 stripped');
+    assert.ok(out.includes('const x = 1;'));
+    assert.ok(out.includes('const y = 2;'));
+});
+
+test('URL inside double-quoted string survives (issue #45)', () => {
+    const src = 'const url = "https://example.com/path"; // real comment';
+    const out = stripComments(src);
+    assert.ok(out.includes('"https://example.com/path"'),
+        'URL literal with // must not be mangled');
+    assert.ok(!out.includes('real comment'), 'eol comment after the string is stripped');
+});
+
+test('URL inside single-quoted string survives', () => {
+    const src = "const url = 'https://example.com'; // trailing";
+    const out = stripComments(src);
+    assert.ok(out.includes("'https://example.com'"),
+        'single-quoted URL must survive');
+    assert.ok(!out.includes('trailing'));
+});
+
+test('URL inside template literal survives', () => {
+    const src = 'const url = `https://example.com/${path}`; // tail';
+    const out = stripComments(src);
+    assert.ok(out.includes('`https://example.com/${path}`'),
+        'template literal URL must survive');
+    assert.ok(!out.includes('tail'));
+});
+
+test('escaped quote inside string does not end the string', () => {
+    const src = 'const s = "a\\"b//c"; // real';
+    const out = stripComments(src);
+    assert.ok(out.includes('"a\\"b//c"'),
+        'escaped-quote string contents must survive verbatim');
+    assert.ok(!out.includes('// real'), 'trailing eol comment stripped');
+});
+
+test('escaped backslash before quote still closes the string', () => {
+    // "a\\" is a valid closed string (backslash then closing quote).
+    // What follows ( // b ) IS a comment and should be stripped.
+    const src = 'const s = "a\\\\"; // b';
+    const out = stripComments(src);
+    assert.ok(out.includes('"a\\\\"'), 'string with trailing \\\\ preserved');
+    assert.ok(!out.includes('// b'), 'trailing comment stripped');
+});
+
+test('// inside a block comment does not escape it', () => {
+    const src = 'const x = 1; /* contains // inside */ const y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('contains'), 'block comment stripped entirely');
+    assert.ok(out.includes('const x = 1;'));
+    assert.ok(out.includes('const y = 2;'));
+});
+
+test('/* inside a // comment is not a block opener', () => {
+    const src = 'const x = 1; // has /* inside\nconst y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('has'), 'line comment stripped');
+    assert.ok(out.includes('const x = 1;'));
+    assert.ok(out.includes('const y = 2;'));
+});
+
+test('// inside single-quoted string survives', () => {
+    const src = "const s = 'a//b'; const y = 2;";
+    const out = stripComments(src);
+    assert.ok(out.includes("'a//b'"), 'string containing // survives');
+});
+
+test('/* inside double-quoted string is not a block opener', () => {
+    const src = 'const s = "a/*b*/c"; const y = 2;';
+    const out = stripComments(src);
+    assert.ok(out.includes('"a/*b*/c"'), 'string containing /* ... */ survives');
+});
+
+test('regex literal with // does not crash and is preserved', () => {
+    // A division-like expression containing // must not be interpreted as a comment.
+    // This is conservative: we only require the builder not to crash and not to
+    // strip the trailing identifier.
+    const src = 'const r = /a\\/b/; const y = 2;';
+    assert.doesNotThrow(() => stripComments(src));
+    const out = stripComments(src);
+    assert.ok(out.includes('const y = 2;'),
+        'code after regex literal survives');
+});
+
+test('unterminated string does not crash', () => {
+    // Pathological input: don't hang or throw.
+    const src = 'const s = "unterminated\nconst y = 2;';
+    assert.doesNotThrow(() => stripComments(src));
+});
+
+test('unterminated block comment does not crash', () => {
+    const src = 'const x = 1; /* never closed';
+    assert.doesNotThrow(() => stripComments(src));
+});
+
+test('consecutive // comments on separate lines stripped', () => {
+    const src = '// one\n// two\nconst x = 1;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('one'));
+    assert.ok(!out.includes('two'));
+    assert.ok(out.includes('const x = 1;'));
+});
+
+test('http:// at start of comment is still stripped as comment', () => {
+    // Not inside a string -- it's just two slashes, so it's a line comment.
+    const src = 'const x = 1; // see http://example.com\nconst y = 2;';
+    const out = stripComments(src);
+    assert.ok(!out.includes('see'));
+    assert.ok(!out.includes('example.com'));
+    assert.ok(out.includes('const x = 1;'));
+    assert.ok(out.includes('const y = 2;'));
+});
+
+test('real-world worker prologue: /* */ after string with URL', () => {
+    const src = [
+        'const API = "https://api.example.com/v1";',
+        '/* legal block header',
+        '   spanning multiple lines */',
+        'self.onmessage = (e) => { /* inner */ postMessage(e.data); }; // done',
+    ].join('\n');
+    const out = stripComments(src);
+    assert.ok(out.includes('"https://api.example.com/v1"'), 'URL survives');
+    assert.ok(!out.includes('legal block header'), 'block header stripped');
+    assert.ok(!out.includes('spanning'), 'block body stripped');
+    assert.ok(!out.includes('inner'), 'inline block stripped');
+    assert.ok(!out.includes('done'), 'trailing eol comment stripped');
+    assert.ok(out.includes('postMessage(e.data)'), 'real code preserved');
+});
diff --git a/scripts/build.js b/scripts/build.js
index d58ed88..ee462ae 100644
--- a/scripts/build.js
+++ b/scripts/build.js
@@ -3,6 +3,7 @@
 
 const fs = require('node:fs');
 const path = require('node:path');
+const { stripComments } = require('./strip-comments.js');
 
 const ROOT = path.join(__dirname, '..');
 const SRC = path.join(ROOT, 'src');
@@ -30,9 +31,10 @@ const workerParts = WORKER_FILES.map(f =>
     fs.readFileSync(path.join(SRC, 'js', 'worker', f), 'utf8')
 );
 
-// Minify worker: strip // comments (they break single-line output), then collapse whitespace
-const workerBlob = workerParts.join('\n')
-    .replace(/\/\/.*$/gm, '')   // remove single-line comments before collapsing newlines
+// Minify worker: strip // and /* */ comments in a string-literal-aware way so
+// URL literals and other strings containing `//` are preserved (issues #45, #38).
+// Then collapse whitespace for the single-line worker blob.
+const workerBlob = stripComments(workerParts.join('\n'))
     .replace(/\s+/g, ' ')
     .trim();
 
diff --git a/scripts/strip-comments.js b/scripts/strip-comments.js
new file mode 100644
index 0000000..dca117f
--- /dev/null
+++ b/scripts/strip-comments.js
@@ -0,0 +1,176 @@
+'use strict';
+
+// String-literal-aware JavaScript comment stripper.
+//
+// Removes `// line` and `/* block */` comments while leaving string literals
+// (double, single, and template) untouched -- including URLs that contain `//`.
+// This replaces a naive `/\/\/.*$/gm` regex that would silently mangle any line
+// containing `//` inside a string (see issues #45 and #38).
+//
+// Not a full JS parser. It handles the shapes that actually appear in
+// src/js/worker/*.js today:
+//   - // line comments
+//   - /* block comments */ (single- and multi-line)
+//   - "..." and '...' strings with \-escapes
+//   - `...` template literals (${...} interpolations with nested braces)
+//   - regex literals /.../flags (best-effort disambiguation from division)
+// Pathological inputs (unterminated string or block comment) do not throw --
+// they consume to EOF.
+
+/**
+ * @param {string} src
+ * @returns {string}
+ */
+function stripComments(src) {
+    if (typeof src !== 'string') {
+        throw new TypeError('stripComments: src must be a string');
+    }
+    const n = src.length;
+    let out = '';
+    let i = 0;
+    // `prevSignificant` tracks the last non-whitespace, non-comment character
+    // we emitted. We use it to decide whether a `/` begins a regex literal or
+    // a division operator. Rough heuristic: after an operator/keyword-like
+    // position, `/` starts a regex; after an identifier/number/)/], it's
+    // division. We start in "regex position".
+    let prevSignificant = '';
+
+    const isRegexContext = () => {
+        // Regex is allowed at the start of input or after these characters.
+        // Division is expected after identifiers, numbers, `)`, `]`, `}` (in
+        // expression position) and string/template closers. We conservatively
+        // treat `}` as division context; that's fine for our worker sources.
+        if (prevSignificant === '') return true;
+        return !/[A-Za-z0-9_$)\]}'"`]/.test(prevSignificant);
+    };
+
+    while (i < n) {
+        const c = src[i];
+        const c2 = i + 1 < n ? src[i + 1] : '';
+
+        // Line comment
+        if (c === '/' && c2 === '/') {
+            i += 2;
+            while (i < n && src[i] !== '\n') i++;
+            // Leave the newline (if any) for the outer loop to emit, so line
+            // structure is preserved.
+            continue;
+        }
+
+        // Block comment
+        if (c === '/' && c2 === '*') {
+            i += 2;
+            while (i < n) {
+                if (src[i] === '*' && i + 1 < n && src[i + 1] === '/') {
+                    i += 2;
+                    break;
+                }
+                i++;
+            }
+            // Emit a single space so tokens on either side don't fuse.
+            out += ' ';
+            prevSignificant = ' ';
+            continue;
+        }
+
+        // Double- or single-quoted string
+        if (c === '"' || c === "'") {
+            const quote = c;
+            out += c;
+            i++;
+            while (i < n) {
+                const ch = src[i];
+                if (ch === '\\' && i + 1 < n) {
+                    // Copy escape sequence verbatim (covers \", \', \\, \n, etc.).
+                    out += ch + src[i + 1];
+                    i += 2;
+                    continue;
+                }
+                out += ch;
+                i++;
+                if (ch === quote) break;
+                if (ch === '\n') break; // unterminated string: bail out of the loop
+            }
+            prevSignificant = quote;
+            continue;
+        }
+
+        // Template literal
+        if (c === '`') {
+            out += c;
+            i++;
+            let braceDepth = 0;
+            while (i < n) {
+                const ch = src[i];
+                if (ch === '\\' && i + 1 < n) {
+                    out += ch + src[i + 1];
+                    i += 2;
+                    continue;
+                }
+                if (braceDepth === 0 && ch === '`') {
+                    out += ch;
+                    i++;
+                    break;
+                }
+                if (braceDepth === 0 && ch === '$' && i + 1 < n && src[i + 1] === '{') {
+                    out += '${';
+                    i += 2;
+                    braceDepth = 1;
+                    continue;
+                }
+                if (braceDepth > 0) {
+                    if (ch === '{') braceDepth++;
+                    else if (ch === '}') {
+                        braceDepth--;
+                        out += ch;
+                        i++;
+                        continue;
+                    }
+                }
+                out += ch;
+                i++;
+            }
+            prevSignificant = '`';
+            continue;
+        }
+
+        // Regex literal (best-effort)
+        if (c === '/' && isRegexContext()) {
+            out += c;
+            i++;
+            let inClass = false;
+            while (i < n) {
+                const ch = src[i];
+                if (ch === '\\' && i + 1 < n) {
+                    out += ch + src[i + 1];
+                    i += 2;
+                    continue;
+                }
+                if (ch === '[') inClass = true;
+                else if (ch === ']') inClass = false;
+                out += ch;
+                i++;
+                if (ch === '/' && !inClass) break;
+                if (ch === '\n') break; // unterminated: bail
+            }
+            // Flags
+            while (i < n && /[a-z]/i.test(src[i])) {
+                out += src[i];
+                i++;
+            }
+            prevSignificant = '/';
+            continue;
+        }
+
+        // Default: copy character, update prevSignificant if non-whitespace.
+        out += c;
+        if (c !== ' ' && c !== '\t' && c !== '\n' && c !== '\r') {
+            prevSignificant = c;
+        }
+        i++;
+    }
+
+    return out;
+}
+
+module.exports = { stripComments };