Skip to content

Commit 6dfed31

Browse files
committed
fix: Preserve XML structural entities during decode
1 parent 058da08 commit 6dfed31

1 file changed

Lines changed: 30 additions & 13 deletions

File tree

src/snapshot/tosvg.js

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,39 @@ var Color = require('../components/color');
99
var xmlnsNamespaces = require('../constants/xmlns_namespaces');
1010
var DOUBLEQUOTE_REGEX = /"/g;
1111
var DUMMY_SUB = 'TOBESTRIPPED';
12-
var DUMMY_REGEX = new RegExp('("' + DUMMY_SUB + ')|(' + DUMMY_SUB + '")', 'g');
13-
12+
// Match TOBESTRIPPED adjacent to either a literal " or its entity form ".
13+
// XMLSerializer escapes inner double-quotes to " inside "-delimited
14+
// attributes, and htmlEntityDecode now preserves that entity for safety.
15+
const DUMMY_REGEX = new RegExp(`("${DUMMY_SUB})|(${DUMMY_SUB}")|("${DUMMY_SUB})|(${DUMMY_SUB}")`, 'g');
16+
17+
// Entities for & " ' - decoding these in attribute context is an XSS vector,
18+
// so preserve them as-is. List includes named, decimal, and hex numeric forms.
19+
const PRESERVED_ENTITIES = ['&', '&', '&', '"', '"', '"', ''', ''', '''];
20+
// Entities for < and > - normalize to numeric so downstream passes treat them
21+
// uniformly regardless of which form the serializer emitted.
22+
const LESS_THAN_ENTITIES = ['&lt;', '&#60;', '&#x3c;'];
23+
const GREATER_THAN_ENTITIES = ['&gt;', '&#62;', '&#x3e;'];
24+
25+
/**
26+
* Decode non-structural entities to Unicode for non-browser SVG renderers,
27+
* keeping & " ' < > entity-encoded to prevent attribute-context escape (XSS).
28+
*
29+
* @param s - serialized SVG string
30+
* @returns entity-normalized SVG string
31+
*/
1432
function htmlEntityDecode(s) {
15-
var hiddenDiv = d3.select('body').append('div').style({ display: 'none' }).html('');
16-
var replaced = s.replace(/(&[^;]*;)/gi, function (d) {
17-
if (d === '&lt;') {
18-
return '&#60;';
19-
} // special handling for brackets
20-
if (d === '&rt;') {
21-
return '&#62;';
22-
}
23-
if (d.indexOf('<') !== -1 || d.indexOf('>') !== -1) {
24-
return '';
25-
}
33+
const hiddenDiv = d3.select('body').append('div').style({ display: 'none' }).html('');
34+
const replaced = s.replace(/(&[^;]*;)/gi, (d) => {
35+
const lower = d.toLowerCase();
36+
if (PRESERVED_ENTITIES.includes(lower)) return d;
37+
if (LESS_THAN_ENTITIES.includes(lower)) return '&#60;';
38+
if (GREATER_THAN_ENTITIES.includes(lower)) return '&#62;';
39+
if (d.includes('<') || d.includes('>')) return '';
40+
2641
return hiddenDiv.html(d).text(); // everything else, let the browser decode it to unicode
2742
});
2843
hiddenDiv.remove();
44+
2945
return replaced;
3046
}
3147

@@ -156,6 +172,7 @@ module.exports = function toSVG(gd, format, scale) {
156172
}
157173

158174
var s = new window.XMLSerializer().serializeToString(svg.node());
175+
// Decode numeric refs to Unicode so non-browser renderers (Batik, Illustrator) render them correctly.
159176
s = htmlEntityDecode(s);
160177
s = xmlEntityEncode(s);
161178

0 commit comments

Comments
 (0)