Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions app/tests/test-comment-stripper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
'use strict';

// Tests for the string-literal-aware comment stripper used by scripts/build.js.
// See issues #45 (URL literal mangling) and #38 (block comments untouched).

const { test } = require('node:test');
const assert = require('node:assert/strict');
const path = require('node:path');

const { stripComments } = require(path.join(__dirname, '..', '..', 'scripts', 'strip-comments.js'));

test('empty input returns empty string', () => {
assert.equal(stripComments(''), '');
});

test('plain code with no comments is unchanged', () => {
const src = 'const x = 1;\nconst y = 2;';
assert.equal(stripComments(src), src);
});

test('strips end-of-line // comment', () => {
const src = 'const x = 1; // a comment\nconst y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('a comment'), 'comment text should be stripped');
assert.ok(out.includes('const x = 1;'), 'code before comment preserved');
assert.ok(out.includes('const y = 2;'), 'following line preserved');
});

test('strips /* block */ comment on one line', () => {
const src = 'const x = 1; /* block comment */ const y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('block comment'), 'block comment text should be stripped');
assert.ok(out.includes('const x = 1;'), 'code before comment preserved');
assert.ok(out.includes('const y = 2;'), 'code after comment preserved');
});

test('strips multi-line /* ... */ block comment', () => {
const src = 'const x = 1;\n/* line one\n line two\n line three */\nconst y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('line one'), 'block comment line 1 stripped');
assert.ok(!out.includes('line two'), 'block comment line 2 stripped');
assert.ok(!out.includes('line three'), 'block comment line 3 stripped');
assert.ok(out.includes('const x = 1;'));
assert.ok(out.includes('const y = 2;'));
});

test('URL inside double-quoted string survives (issue #45)', () => {
const src = 'const url = "https://example.com/path"; // real comment';
const out = stripComments(src);
assert.ok(out.includes('"https://example.com/path"'),
'URL literal with // must not be mangled');
assert.ok(!out.includes('real comment'), 'eol comment after the string is stripped');
});

test('URL inside single-quoted string survives', () => {
const src = "const url = 'https://example.com'; // trailing";
const out = stripComments(src);
assert.ok(out.includes("'https://example.com'"),
'single-quoted URL must survive');
assert.ok(!out.includes('trailing'));
});

test('URL inside template literal survives', () => {
const src = 'const url = `https://example.com/${path}`; // tail';
const out = stripComments(src);
assert.ok(out.includes('`https://example.com/${path}`'),
'template literal URL must survive');
assert.ok(!out.includes('tail'));
});

test('escaped quote inside string does not end the string', () => {
const src = 'const s = "a\\"b//c"; // real';
const out = stripComments(src);
assert.ok(out.includes('"a\\"b//c"'),
'escaped-quote string contents must survive verbatim');
assert.ok(!out.includes('// real'), 'trailing eol comment stripped');
});

test('escaped backslash before quote still closes the string', () => {
// "a\\" is a valid closed string (backslash then closing quote).
// What follows ( // b ) IS a comment and should be stripped.
const src = 'const s = "a\\\\"; // b';
const out = stripComments(src);
assert.ok(out.includes('"a\\\\"'), 'string with trailing \\\\ preserved');
assert.ok(!out.includes('// b'), 'trailing comment stripped');
});

test('// inside a block comment does not escape it', () => {
const src = 'const x = 1; /* contains // inside */ const y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('contains'), 'block comment stripped entirely');
assert.ok(out.includes('const x = 1;'));
assert.ok(out.includes('const y = 2;'));
});

test('/* inside a // comment is not a block opener', () => {
const src = 'const x = 1; // has /* inside\nconst y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('has'), 'line comment stripped');
assert.ok(out.includes('const x = 1;'));
assert.ok(out.includes('const y = 2;'));
});

test('// inside single-quoted string survives', () => {
const src = "const s = 'a//b'; const y = 2;";
const out = stripComments(src);
assert.ok(out.includes("'a//b'"), 'string containing // survives');
});

test('/* inside double-quoted string is not a block opener', () => {
const src = 'const s = "a/*b*/c"; const y = 2;';
const out = stripComments(src);
assert.ok(out.includes('"a/*b*/c"'), 'string containing /* ... */ survives');
});

test('regex literal with // does not crash and is preserved', () => {
// A division-like expression containing // must not be interpreted as a comment.
// This is conservative: we only require the builder not to crash and not to
// strip the trailing identifier.
const src = 'const r = /a\\/b/; const y = 2;';
assert.doesNotThrow(() => stripComments(src));
const out = stripComments(src);
assert.ok(out.includes('const y = 2;'),
'code after regex literal survives');
});

test('unterminated string does not crash', () => {
// Pathological input: don't hang or throw.
const src = 'const s = "unterminated\nconst y = 2;';
assert.doesNotThrow(() => stripComments(src));
});

test('unterminated block comment does not crash', () => {
const src = 'const x = 1; /* never closed';
assert.doesNotThrow(() => stripComments(src));
});

test('consecutive // comments on separate lines stripped', () => {
const src = '// one\n// two\nconst x = 1;';
const out = stripComments(src);
assert.ok(!out.includes('one'));
assert.ok(!out.includes('two'));
assert.ok(out.includes('const x = 1;'));
});

test('http:// at start of comment is still stripped as comment', () => {
// Not inside a string -- it's just two slashes, so it's a line comment.
const src = 'const x = 1; // see http://example.com\nconst y = 2;';
const out = stripComments(src);
assert.ok(!out.includes('see'));
assert.ok(!out.includes('example.com'));
assert.ok(out.includes('const x = 1;'));
assert.ok(out.includes('const y = 2;'));
});

test('real-world worker prologue: /* */ after string with URL', () => {
const src = [
'const API = "https://api.example.com/v1";',
'/* legal block header',
' spanning multiple lines */',
'self.onmessage = (e) => { /* inner */ postMessage(e.data); }; // done',
].join('\n');
const out = stripComments(src);
assert.ok(out.includes('"https://api.example.com/v1"'), 'URL survives');
assert.ok(!out.includes('legal block header'), 'block header stripped');
assert.ok(!out.includes('spanning'), 'block body stripped');
assert.ok(!out.includes('inner'), 'inline block stripped');
assert.ok(!out.includes('done'), 'trailing eol comment stripped');
assert.ok(out.includes('postMessage(e.data)'), 'real code preserved');
});
8 changes: 5 additions & 3 deletions scripts/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

const fs = require('node:fs');
const path = require('node:path');
const { stripComments } = require('./strip-comments.js');

const ROOT = path.join(__dirname, '..');
const SRC = path.join(ROOT, 'src');
Expand Down Expand Up @@ -30,9 +31,10 @@ const workerParts = WORKER_FILES.map(f =>
fs.readFileSync(path.join(SRC, 'js', 'worker', f), 'utf8')
);

// Minify worker: strip // comments (they break single-line output), then collapse whitespace
const workerBlob = workerParts.join('\n')
.replace(/\/\/.*$/gm, '') // remove single-line comments before collapsing newlines
// Minify worker: strip // and /* */ comments in a string-literal-aware way so
// URL literals and other strings containing `//` are preserved (issues #45, #38).
// Then collapse whitespace for the single-line worker blob.
const workerBlob = stripComments(workerParts.join('\n'))
.replace(/\s+/g, ' ')
.trim();

Expand Down
176 changes: 176 additions & 0 deletions scripts/strip-comments.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
'use strict';

// String-literal-aware JavaScript comment stripper.
//
// Removes `// line` and `/* block */` comments while leaving string literals
// (double, single, and template) untouched -- including URLs that contain `//`.
// This replaces a naive `/\/\/.*$/gm` regex that would silently mangle any line
// containing `//` inside a string (see issues #45 and #38).
//
// Not a full JS parser. It handles the shapes that actually appear in
// src/js/worker/*.js today:
// - // line comments
// - /* block comments */ (single- and multi-line)
// - "..." and '...' strings with \-escapes
// - `...` template literals (${...} interpolations with nested braces)
// - regex literals /.../flags (best-effort disambiguation from division)
// Pathological inputs (unterminated string or block comment) do not throw --
// they consume to EOF.

/**
* @param {string} src
* @returns {string}
*/
function stripComments(src) {
if (typeof src !== 'string') {
throw new TypeError('stripComments: src must be a string');
}
const n = src.length;
let out = '';
let i = 0;
// `prevSignificant` tracks the last non-whitespace, non-comment character
// we emitted. We use it to decide whether a `/` begins a regex literal or
// a division operator. Rough heuristic: after an operator/keyword-like
// position, `/` starts a regex; after an identifier/number/)/], it's
// division. We start in "regex position".
let prevSignificant = '';

const isRegexContext = () => {
// Regex is allowed at the start of input or after these characters.
// Division is expected after identifiers, numbers, `)`, `]`, `}` (in
// expression position) and string/template closers. We conservatively
// treat `}` as division context; that's fine for our worker sources.
if (prevSignificant === '') return true;
return !/[A-Za-z0-9_$)\]}'"`]/.test(prevSignificant);
};

while (i < n) {
const c = src[i];
const c2 = i + 1 < n ? src[i + 1] : '';

// Line comment
if (c === '/' && c2 === '/') {
i += 2;
while (i < n && src[i] !== '\n') i++;
// Leave the newline (if any) for the outer loop to emit, so line
// structure is preserved.
continue;
}

// Block comment
if (c === '/' && c2 === '*') {
i += 2;
while (i < n) {
if (src[i] === '*' && i + 1 < n && src[i + 1] === '/') {
i += 2;
break;
}
i++;
}
// Emit a single space so tokens on either side don't fuse.
out += ' ';
prevSignificant = ' ';
continue;
}

// Double- or single-quoted string
if (c === '"' || c === "'") {
const quote = c;
out += c;
i++;
while (i < n) {
const ch = src[i];
if (ch === '\\' && i + 1 < n) {
// Copy escape sequence verbatim (covers \", \', \\, \n, etc.).
out += ch + src[i + 1];
i += 2;
continue;
}
out += ch;
i++;
if (ch === quote) break;
if (ch === '\n') break; // unterminated string: bail out of the loop
}
prevSignificant = quote;
continue;
}

// Template literal
if (c === '`') {
out += c;
i++;
let braceDepth = 0;
while (i < n) {
const ch = src[i];
if (ch === '\\' && i + 1 < n) {
out += ch + src[i + 1];
i += 2;
continue;
}
if (braceDepth === 0 && ch === '`') {
out += ch;
i++;
break;
}
if (braceDepth === 0 && ch === '$' && i + 1 < n && src[i + 1] === '{') {
out += '${';
i += 2;
braceDepth = 1;
continue;
}
if (braceDepth > 0) {
if (ch === '{') braceDepth++;
else if (ch === '}') {
braceDepth--;
out += ch;
i++;
continue;
}
}
out += ch;
i++;
}
prevSignificant = '`';
continue;
}

// Regex literal (best-effort)
if (c === '/' && isRegexContext()) {
out += c;
i++;
let inClass = false;
while (i < n) {
const ch = src[i];
if (ch === '\\' && i + 1 < n) {
out += ch + src[i + 1];
i += 2;
continue;
}
if (ch === '[') inClass = true;
else if (ch === ']') inClass = false;
out += ch;
i++;
if (ch === '/' && !inClass) break;
if (ch === '\n') break; // unterminated: bail
}
// Flags
while (i < n && /[a-z]/i.test(src[i])) {
out += src[i];
i++;
}
prevSignificant = '/';
continue;
}

// Default: copy character, update prevSignificant if non-whitespace.
out += c;
if (c !== ' ' && c !== '\t' && c !== '\n' && c !== '\r') {
prevSignificant = c;
}
i++;
}

return out;
}

module.exports = { stripComments };
Loading