From f5d3dede6b6aab06a5758ec847f9b29cda95f652 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Sat, 6 Dec 2025 18:19:44 +0100 Subject: [PATCH 1/5] node: textContent must ignore comments for elements --- src/browser/tests/node/text_content.html | 17 +++++++++++++---- src/browser/webapi/Node.zig | 17 +++++++++++++++-- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/browser/tests/node/text_content.html b/src/browser/tests/node/text_content.html index d6250320e..fc6a0de9a 100644 --- a/src/browser/tests/node/text_content.html +++ b/src/browser/tests/node/text_content.html @@ -1,6 +1,12 @@
d1

hello

+
+ + + This is a
+ text +
diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 596dc5549..2cb0d323d 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -171,7 +171,20 @@ pub fn childNodes(self: *const Node, page: *Page) !*collections.ChildNodes { pub fn getTextContent(self: *Node, writer: *std.Io.Writer) error{WriteFailed}!void { switch (self._type) { - .element => |el| return el.getInnerText(writer), + .element => { + var it = self.childrenIterator(); + while (it.next()) |child| { + // ignore comments and TODO processing instructions. + switch (child._type) { + .cdata => |c| switch (c._type) { + .comment => continue, + .text => {}, + }, + else => {}, + } + try child.getTextContent(writer); + } + }, .cdata => |c| try writer.writeAll(c.getData()), .document => {}, .document_type => {}, @@ -719,7 +732,7 @@ pub const JsApi = struct { switch (self._type) { .element => |el| { var buf = std.Io.Writer.Allocating.init(page.call_arena); - try el.getInnerText(&buf.writer); + try el.asNode().getTextContent(&buf.writer); return buf.written(); }, .cdata => |cdata| return cdata.getData(), From a673eb89b658ca7071c01e74bec868c7debf2dd4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Sat, 6 Dec 2025 19:09:20 +0100 Subject: [PATCH 2/5] element: innerText which must return rendered text --- src/browser/tests/element/inner.html | 13 ++++++++++ src/browser/webapi/Element.zig | 36 +++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/browser/tests/element/inner.html b/src/browser/tests/element/inner.html index c9bb08946..2c93a7178 100644 --- a/src/browser/tests/element/inner.html +++ b/src/browser/tests/element/inner.html @@ -1,6 +1,12 @@
hello world
+
+ + + This is a
+ text +
diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig index 758bb1f24..83f971a6d 100644 --- a/src/browser/webapi/Element.zig +++ b/src/browser/webapi/Element.zig @@ -223,10 +223,44 @@ pub fn getNamespaceURI(self: *const Element) []const u8 { return self._namespace.toUri(); } +// innerText represents the **rendered** text content of a node and its +// descendants. pub fn getInnerText(self: *Element, writer: *std.Io.Writer) !void { var it = self.asNode().childrenIterator(); while (it.next()) |child| { - try child.getTextContent(writer); + switch (child._type) { + .element => |e| switch (e._type) { + .html => |he| switch (he._type) { + .br => try writer.writeByte('\n'), + .script, .style, .template => continue, + else => try e.getInnerText(writer), // TODO check if elt is hidden. + }, + .svg => {}, + }, + .cdata => |c| switch (c._type) { + .comment => continue, + .text => { + const data = c.getData(); + if (std.mem.trim(u8, data, &std.ascii.whitespace).len != 0) { + // Trim all whitespaces except spaces. + // TODO this is not the correct way to render text, this is + // a temp approximation. + const text = std.mem.trim(u8, data, &[_]u8{ + '\t', + '\n', + '\r', + std.ascii.control_code.vt, + std.ascii.control_code.ff, + }); + try writer.writeAll(text); + } + }, + }, + .document => {}, + .document_type => {}, + .document_fragment => {}, + .attribute => |attr| try writer.writeAll(attr._value), + } } } From 240e8b35022570926fd004815b5eebe1ca07a39d Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Sun, 7 Dec 2025 09:52:59 +0100 Subject: [PATCH 3/5] use a better comparison to detect comment --- src/browser/webapi/Node.zig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 2cb0d323d..ef6de4a7d 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -175,12 +175,8 @@ pub fn getTextContent(self: *Node, writer: *std.Io.Writer) error{WriteFailed}!vo var it = self.childrenIterator(); while (it.next()) |child| { // ignore comments and TODO processing instructions. - switch (child._type) { - .cdata => |c| switch (c._type) { - .comment => continue, - .text => {}, - }, - else => {}, + if (child.is(CData.Comment) != null) { + continue; } try child.getTextContent(writer); } From aac35ae868ff618872ee227e10e5b4a6dfd45f17 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Dec 2025 09:41:06 +0100 Subject: [PATCH 4/5] implement CData.render --- src/browser/webapi/CData.zig | 90 ++++++++++++++++++++++++++++++++++ src/browser/webapi/Element.zig | 17 +------ 2 files changed, 91 insertions(+), 16 deletions(-) diff --git a/src/browser/webapi/CData.zig b/src/browser/webapi/CData.zig index f02d51cad..507031af2 100644 --- a/src/browser/webapi/CData.zig +++ b/src/browser/webapi/CData.zig @@ -60,6 +60,57 @@ pub fn getData(self: *const CData) []const u8 { return self._data; } +pub const RenderOpts = struct { + trim_left: bool = true, + trim_right: bool = true, +}; +// Replace successives whitespaces with one withespace. +// Trims left and right according to the options. +pub fn render(self: *const CData, writer: *std.io.Writer, opts: RenderOpts) !void { + var start: usize = 0; + var prev_w: ?bool = null; + var is_w: bool = undefined; + const s = self._data; + + for (s, 0..) |c, i| { + is_w = std.ascii.isWhitespace(c); + + // Detect the first char type. + if (prev_w == null) { + prev_w = is_w; + } + // The current char is the same kind of char, the chunk continues. + if (prev_w.? == is_w) { + continue; + } + + // Starting here, the chunk changed. + if (is_w) { + // We have a chunk of non-whitespaces, we write it as it. + try writer.writeAll(s[start..i]); + } else { + // We have a chunk of whitespaces, replace with one space, + // depending the position. + if (start > 0 or !opts.trim_left) { + try writer.writeByte(' '); + } + } + // Start the new chunk. + prev_w = is_w; + start = i; + } + // Write the reminder chunk. + if (is_w) { + // Last chunk is whitespaces. + if (opts.trim_right == false) { + try writer.writeByte(' '); + } + } else { + // last chunk is non whitespaces. + try writer.writeAll(s[start..]); + } +} + pub fn setData(self: *CData, value: ?[]const u8, page: *Page) !void { const old_value = self._data; @@ -223,3 +274,42 @@ const testing = @import("../../testing.zig"); test "WebApi: CData" { try testing.htmlRunner("cdata", .{}); } + +test "WebApi: CData.render" { + const allocator = std.testing.allocator; + + const TestCase = struct { + value: []const u8, + expected: []const u8, + opts: RenderOpts = .{}, + }; + + const test_cases = [_]TestCase{ + .{ .value = "foo bar", .expected = "foo bar" }, + .{ .value = "foo bar", .expected = "foo bar" }, + .{ .value = " foo bar", .expected = "foo bar" }, + .{ .value = "foo bar ", .expected = "foo bar" }, + .{ .value = " foo bar ", .expected = "foo bar" }, + .{ .value = "foo\n\tbar", .expected = "foo bar" }, + .{ .value = "\tfoo bar baz \t\n yeah\r\n", .expected = "foo bar baz yeah" }, + .{ .value = " foo bar", .expected = " foo bar", .opts = .{ .trim_left = false } }, + .{ .value = "foo bar ", .expected = "foo bar ", .opts = .{ .trim_right = false } }, + .{ .value = " foo bar ", .expected = " foo bar ", .opts = .{ .trim_left = false, .trim_right = false } }, + }; + + var buffer = std.io.Writer.Allocating.init(allocator); + defer buffer.deinit(); + for (test_cases) |test_case| { + buffer.clearRetainingCapacity(); + + const cdata = CData{ + ._type = .{ .text = undefined }, + ._proto = undefined, + ._data = test_case.value, + }; + + try cdata.render(&buffer.writer, test_case.opts); + + try std.testing.expectEqualStrings(test_case.expected, buffer.written()); + } +} diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig index 83f971a6d..b40791b5f 100644 --- a/src/browser/webapi/Element.zig +++ b/src/browser/webapi/Element.zig @@ -239,22 +239,7 @@ pub fn getInnerText(self: *Element, writer: *std.Io.Writer) !void { }, .cdata => |c| switch (c._type) { .comment => continue, - .text => { - const data = c.getData(); - if (std.mem.trim(u8, data, &std.ascii.whitespace).len != 0) { - // Trim all whitespaces except spaces. - // TODO this is not the correct way to render text, this is - // a temp approximation. - const text = std.mem.trim(u8, data, &[_]u8{ - '\t', - '\n', - '\r', - std.ascii.control_code.vt, - std.ascii.control_code.ff, - }); - try writer.writeAll(text); - } - }, + .text => try c.render(writer, .{}), }, .document => {}, .document_type => {}, From 5284d75cb7b77aa71552928127c7b893920f94fe Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Dec 2025 09:55:31 +0100 Subject: [PATCH 5/5] use CData.render for innerText --- src/browser/tests/element/inner.html | 2 +- src/browser/webapi/CData.zig | 5 ++++- src/browser/webapi/Element.zig | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/browser/tests/element/inner.html b/src/browser/tests/element/inner.html index 2c93a7178..ada3f7c17 100644 --- a/src/browser/tests/element/inner.html +++ b/src/browser/tests/element/inner.html @@ -172,5 +172,5 @@ // TODO innerText is not rendered correctly for now. //testing.expectEqual("This is a\ntext", d2.innerText); - testing.expectEqual(" This is a \n text", d2.innerText); + testing.expectEqual(" This is a \n text ", d2.innerText); diff --git a/src/browser/webapi/CData.zig b/src/browser/webapi/CData.zig index 507031af2..b1c78b038 100644 --- a/src/browser/webapi/CData.zig +++ b/src/browser/webapi/CData.zig @@ -102,7 +102,8 @@ pub fn render(self: *const CData, writer: *std.io.Writer, opts: RenderOpts) !voi // Write the reminder chunk. if (is_w) { // Last chunk is whitespaces. - if (opts.trim_right == false) { + // If the string contains only whitespaces, don't write it. + if (start > 0 and opts.trim_right == false) { try writer.writeByte(' '); } } else { @@ -285,6 +286,8 @@ test "WebApi: CData.render" { }; const test_cases = [_]TestCase{ + .{ .value = " ", .expected = "" }, + .{ .value = " ", .expected = "", .opts = .{ .trim_left = false, .trim_right = false } }, .{ .value = "foo bar", .expected = "foo bar" }, .{ .value = "foo bar", .expected = "foo bar" }, .{ .value = " foo bar", .expected = "foo bar" }, diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig index b40791b5f..4d50051dd 100644 --- a/src/browser/webapi/Element.zig +++ b/src/browser/webapi/Element.zig @@ -239,7 +239,7 @@ pub fn getInnerText(self: *Element, writer: *std.Io.Writer) !void { }, .cdata => |c| switch (c._type) { .comment => continue, - .text => try c.render(writer, .{}), + .text => try c.render(writer, .{ .trim_right = false, .trim_left = false }), }, .document => {}, .document_type => {},