Skip to content

Commit 2a70173

Browse files
yonrannicholasio
andauthored
Fix blocks containing character references (#921)
Co-authored-by: Nícholas Oliveira <nicholas.andre@10up.com>
1 parent fc8a39e commit 2a70173

4 files changed

Lines changed: 254 additions & 17 deletions

File tree

.changeset/weak-cloths-lose.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@headstartwp/headstartwp": patch
3+
---
4+
5+
Fix how data-wp-block attribute is set to avoid generating incorrect/insecure markup

docs/documentation/06-WordPress Integration/gutenberg.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ This filter is not as useful as the previous one but it allows you to filter the
4040
/**
4141
* Filter's out the block's attributes after serialization
4242
*
43-
* @param string $encoded_attrs The serialized block's Attributes
43+
* @param string $encoded_attrs The block attributes serialized to a JSON string
4444
* @param array $attrs The Block's Attributes
4545
* @param array $block The Block's schema
4646
* @param \WP_Block $block_instance The block's instance
4747
*/
4848
$block_attrs_serialized = apply_filters(
4949
'tenup_headless_wp_render_blocks_attrs_serialized',
50-
esc_attr( wp_json_encode( $block_attrs ) ),
50+
wp_json_encode( $block_attrs ),
5151
$block_attrs,
5252
$block,
5353
$block_instance

wp/headless-wp/includes/classes/Integrations/Gutenberg.php

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,37 @@ public function process_block_with_dom_document_api( $html, $block_name, $block_
304304
}
305305
}
306306

307+
/**
308+
* Set the block attributes in the HTML
309+
*
310+
* This is a workaround to avoid the issue with the HTML_Tag_Processor API not handling JSON with HTML in attributes.
311+
*
312+
* @see https://github.com/10up/headstartwp/pull/921
313+
*
314+
* @param string $placeholder The placeholder for the block attributes
315+
* @param string $html The block markup
316+
* @param string $block_attrs_serialized The block attributes serialized to a JSON string
317+
*
318+
* @return string The processed html
319+
*/
320+
public function set_block_attributes_tag_api( $placeholder, $html, $block_attrs_serialized ) {
321+
$search = sprintf( '/data-wp-block="%s"/', preg_quote( $placeholder, '/' ) );
322+
$replace = sprintf( 'data-wp-block="%s"', htmlspecialchars( $block_attrs_serialized ) );
323+
324+
// phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped
325+
return preg_replace(
326+
$search,
327+
$replace,
328+
$html
329+
);
330+
}
331+
307332
/**
308333
* Process the block with the WP_HTML_Tag_Processor
309334
*
310335
* @param string $html The block markup
311336
* @param string $block_name The block name
312-
* @param string $block_attrs_serialized The serialized block attributes
337+
* @param string $block_attrs_serialized The block attributes serialized to a JSON string
313338
* @param array $block The block schema
314339
* @param WP_Block $block_instance The block instance
315340
*
@@ -321,7 +346,14 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr
321346

322347
if ( ! $this->bypass_block_attributes( $block_name, $block_instance ) && $doc->next_tag() ) {
323348
$doc->set_attribute( 'data-wp-block-name', $block_name );
324-
$doc->set_attribute( 'data-wp-block', $block_attrs_serialized );
349+
$placeholder = '___HEADSTARTWP_BLOCK_ATTRS___';
350+
$doc->set_attribute( 'data-wp-block', $placeholder );
351+
352+
$intermediate_html = $doc->get_updated_html();
353+
$intermediate_html = $this->set_block_attributes_tag_api( $placeholder, $intermediate_html, $block_attrs_serialized );
354+
355+
$doc = new WP_HTML_Tag_Processor( $intermediate_html );
356+
$doc->next_tag();
325357

326358
/**
327359
* Filter the block before rendering
@@ -347,7 +379,7 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr
347379
*
348380
* @param string $html The block markup
349381
* @param string $block_name The block name
350-
* @param string $serialized_attributes Serialized attributes
382+
* @param string $serialized_attributes The block attributes serialized to a JSON string
351383
* @param array $block The block array
352384
* @param WP_Block $block_instance The block instance
353385
*
@@ -365,14 +397,8 @@ public function process_dom_document_block(
365397
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
366398
$root_node = $document->documentElement;
367399

368-
$attrs = $document->createAttribute( 'data-wp-block' );
369-
$attrs->value = $serialized_attributes;
370-
371-
$block_name_obj = $document->createAttribute( 'data-wp-block-name' );
372-
$block_name_obj->value = $block_name;
373-
374-
$root_node->appendChild( $attrs );
375-
$root_node->appendChild( $block_name_obj );
400+
$root_node->setAttribute( 'data-wp-block-name', $block_name );
401+
$root_node->setAttribute( 'data-wp-block', $serialized_attributes );
376402

377403
/**
378404
* Filter the block's DOMElement before rendering
@@ -476,14 +502,14 @@ public function render_block( $html, $block, $block_instance ) {
476502
/**
477503
* Filter out the block attributes after serialization
478504
*
479-
* @param string $encoded_attrs The serialized block attributes
505+
* @param string $encoded_attrs The block attributes serialized to a JSON string
480506
* @param array $attrs The block attributes
481507
* @param array $block The block schema
482508
* @param WP_Block $block_instance The block instance
483509
*/
484510
$block_attrs_serialized = apply_filters(
485511
'tenup_headless_wp_render_blocks_attrs_serialized',
486-
esc_attr( wp_json_encode( $block_attrs ) ),
512+
wp_json_encode( $block_attrs ),
487513
$block_attrs,
488514
$block,
489515
$block_instance

wp/headless-wp/tests/php/tests/TestGutenbergIntegration.php

Lines changed: 208 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,10 @@ public function test_handle_multi_byte_html_encoding() {
167167
MARKUP
168168
);
169169
$dom_expected = <<<RESULT
170-
<p data-wp-block='{"dropCap":false}' data-wp-block-name="core/paragraph">The temperature is 23&deg;C &#9728;&#65039; (sun emoji) and &copy; (copyright symbol). HTML entity for Degrees: &deg;.</p>
170+
<p data-wp-block-name="core/paragraph" data-wp-block='{"dropCap":false}'>The temperature is 23&deg;C &#9728;&#65039; (sun emoji) and &copy; (copyright symbol). HTML entity for Degrees: &deg;.</p>
171171
RESULT;
172172
$html_tag_api_expected = <<<RESULT
173-
<p data-wp-block="{&quot;dropCap&quot;:false}" data-wp-block-name="core/paragraph">The temperature is 23&deg;C &#9728;&#65039; (sun emoji) and &copy; (copyright symbol). HTML entity for Degrees: &deg;.</p>
173+
<p data-wp-block-name="core/paragraph" data-wp-block="{&quot;dropCap&quot;:false}">The temperature is 23&deg;C &#9728;&#65039; (sun emoji) and &copy; (copyright symbol). HTML entity for Degrees: &deg;.</p>
174174
RESULT;
175175

176176
$dom_output = $this->parser->render_block( $html, $block, $instance );
@@ -309,6 +309,212 @@ public function test_render_html_tag_api( array $incoming, array $block_structur
309309
remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' );
310310
}
311311

312+
/**
313+
* Tests that HTML entities in block attributes are preserved correctly with tag processor
314+
*
315+
* @return void
316+
*/
317+
public function test_html_entities_are_double_encoded() {
318+
// Test with content containing HTML entities
319+
// (and a ' to ensure that it is not serialized as a single-quote string
320+
// by WP_HTML_Tag_Processor)
321+
$markup = '<!-- wp:heading {"content":"&lt;script&gt;alert(&#039;xss&#039;)&lt;/script&gt;\'","level":2} -->content<!-- /wp:heading -->';
322+
$block = $this->core_render_block_from_markup( $markup );
323+
$enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] );
324+
325+
// Any HTML entities in JSON strings should be double-encoded
326+
$this->assertStringContainsString(
327+
'data-wp-block="{&quot;content&quot;:&quot;&amp;lt;script&amp;gt;alert(&amp;#039;xss&amp;#039;)&amp;lt;\/script&amp;gt;',
328+
$enhanced_block
329+
);
330+
}
331+
332+
/**
333+
* Tests that HTML entities in block attributes are preserved correctly with tag processor
334+
*
335+
* @return void
336+
*/
337+
public function test_html_entities_are_double_encoded_using_WP_HTML_Tag_Processor() {
338+
add_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' );
339+
$this->test_html_entities_are_double_encoded();
340+
remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', '__return_true' );
341+
}
342+
343+
/**
344+
* Data provider for block roundtrip tests
345+
*
346+
* @return array
347+
*/
348+
public function block_roundtrip_data_provider() {
349+
$test_cases = [
350+
'block value containing no special characters' => [
351+
'core/heading',
352+
[
353+
'x' => 'hi',
354+
'level' => 2,
355+
],
356+
'<!-- wp:heading {"x":"hi"} --> <h2></h2> <!-- /wp:heading -->',
357+
],
358+
'block value containing named character reference &apos;' => [
359+
'core/heading',
360+
[
361+
'x' => '&apos;',
362+
'level' => 2,
363+
],
364+
'<!-- wp:heading {"x":"&apos;"} --> <h2></h2> <!-- /wp:heading -->',
365+
],
366+
'block value containing lone apostrophe \' (from ENT_HTML5)' => [
367+
'core/heading',
368+
[
369+
'x' => '\'',
370+
'level' => 2,
371+
],
372+
'<!-- wp:heading {"x":"\'"} --> <h2></h2> <!-- /wp:heading -->',
373+
],
374+
'block value containing lone quote " (from ENT_COMPAT)' => [
375+
'core/heading',
376+
[
377+
'x' => '"',
378+
'level' => 2,
379+
],
380+
'<!-- wp:heading {"x":"\\""} --> <h2></h2> <!-- /wp:heading -->',
381+
],
382+
'block value containing named character reference &quot;' => [
383+
'core/heading',
384+
[
385+
'x' => '&quot;',
386+
'level' => 2,
387+
],
388+
'<!-- wp:heading {"x":"&quot;"} --> <h2></h2> <!-- /wp:heading -->',
389+
],
390+
'block value containing lone ampersand &' => [
391+
'core/heading',
392+
[
393+
'x' => '&',
394+
'level' => 2,
395+
],
396+
'<!-- wp:heading {"x":"&"} --> <h2></h2> <!-- /wp:heading -->',
397+
],
398+
'block value containing named character reference &amp;' => [
399+
'core/heading',
400+
[
401+
'x' => '&amp;',
402+
'level' => 2,
403+
],
404+
'<!-- wp:heading {"x":"&amp;"} --> <h2></h2> <!-- /wp:heading -->',
405+
],
406+
'block value containing hexadecimal numeric character reference &#x26; (should not be converted to &amp;)' => [
407+
'core/heading',
408+
[
409+
'x' => '&#x26;',
410+
'level' => 2,
411+
],
412+
'<!-- wp:heading {"x":"&#x26;"} --> <h2></h2> <!-- /wp:heading -->',
413+
],
414+
'block value containing leading zero hexadecimal numeric character reference &#x026; (should not be converted to &amp;)' => [
415+
'core/heading',
416+
[
417+
'x' => '&#x026;',
418+
'level' => 2,
419+
],
420+
'<!-- wp:heading {"x":"&#x026;"} --> <h2></h2> <!-- /wp:heading -->',
421+
],
422+
'block value containing decimal numeric character reference &#38; (should not be converted to &amp;)' => [
423+
'core/heading',
424+
[
425+
'x' => '&#38;',
426+
'level' => 2,
427+
],
428+
'<!-- wp:heading {"x":"&#38;"} --> <h2></h2> <!-- /wp:heading -->',
429+
],
430+
'block value containing leading zero decimal numeric character reference &#038; (should not be converted to &amp;)' => [
431+
'core/heading',
432+
[
433+
'x' => '&#038;',
434+
'level' => 2,
435+
],
436+
'<!-- wp:heading {"x":"&#038;"} --> <h2></h2> <!-- /wp:heading -->',
437+
],
438+
'html_entities' => [
439+
'core/heading',
440+
[
441+
'content' => '&lt;script&gt;alert(&#039;xss&#039;)&lt;/script&gt;',
442+
'level' => 2,
443+
],
444+
'<!-- wp:heading {"content":"&lt;script&gt;alert(&#039;xss&#039;)&lt;/script&gt;","level":2} --> <h2>&lt;script&gt;alert(\'xss\')&lt;/script&gt;</h2><!-- /wp:heading -->',
445+
],
446+
'complex_attributes' => [
447+
'core/image',
448+
[
449+
'id' => 28,
450+
'sizeSlug' => 'large',
451+
'linkDestination' => 'none',
452+
'alt' => '',
453+
],
454+
'<!-- wp:image {"id":28,"sizeSlug":"large","linkDestination":"none"} --> <figure class="wp-block-image size-large"><img src="http://example.com/image.jpg" alt="" class="wp-image-28"/></figure><!-- /wp:image -->',
455+
],
456+
'special_characters' => [
457+
'core/quote',
458+
[
459+
'citation' => 'Author "Name" & Co.',
460+
'value' => '<p>Quote with "quotes" & ampersands</p>',
461+
],
462+
'<!-- wp:quote {"citation":"Author \"Name\" & Co.","value":"<p>Quote with \"quotes\" & ampersands</p>"} --> <blockquote><p>Quote with "quotes" & ampersands</p><cite>Author "Name" & Co.</cite></blockquote><!-- /wp:quote -->',
463+
],
464+
];
465+
$test_cases_with_or_without_tag_processor = [];
466+
foreach ( $test_cases as $name => $case ) {
467+
$test_cases_with_or_without_tag_processor[ "$name with WP_HTML_Tag_Processor" ] = array_merge( $case, [ true ] );
468+
$test_cases_with_or_without_tag_processor[ "$name with DomDocument" ] = array_merge( $case, [ false ] );
469+
}
470+
return $test_cases_with_or_without_tag_processor;
471+
}
472+
473+
/**
474+
* Tests that block attributes can be round-tripped correctly
475+
*
476+
* @dataProvider block_roundtrip_data_provider
477+
*
478+
* @param string $expected_block_name The expected block name
479+
* @param array $expected_attributes The expected block attributes
480+
* @param string $markup The block markup to test
481+
* @param bool $use_tag_processor Whether to use the tag processor
482+
* @return void
483+
*/
484+
public function test_block_attributes_roundtrip( $expected_block_name, $expected_attributes, $markup, $use_tag_processor ) {
485+
$block = $this->core_render_block_from_markup( $markup );
486+
$tag_processor_function = $use_tag_processor ? '__return_true' : '__return_false';
487+
add_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function );
488+
try {
489+
$enhanced_block = $this->parser->render_block( $block['html'], $block['parsed_block'], $block['instance'] );
490+
} finally {
491+
remove_filter( 'tenup_headless_wp_render_block_use_tag_processor', $tag_processor_function );
492+
}
493+
494+
// Parse the enhanced block using DOMDocument to extract data-wp-block and data-wp-block-name
495+
$doc = new \DOMDocument();
496+
$success = $doc->loadHTML( $enhanced_block, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD );
497+
498+
$this->assertTrue( $success, 'DOMDocument should successfully parse the enhanced block HTML' );
499+
500+
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
501+
$root_element = $doc->documentElement;
502+
$this->assertNotNull( $root_element, 'Should have a root element' );
503+
504+
$block_name_attr = $root_element->getAttribute( 'data-wp-block-name' );
505+
$block_data_attr = $root_element->getAttribute( 'data-wp-block' );
506+
507+
$this->assertNotEmpty( $block_name_attr, 'data-wp-block-name attribute should be present' );
508+
$this->assertNotEmpty( $block_data_attr, 'data-wp-block attribute should be present' );
509+
510+
// Parse JSON - DOMDocument should have already handled HTML entity decoding
511+
$parsed_attributes = json_decode( $block_data_attr, true );
512+
513+
$this->assertIsArray( $parsed_attributes, 'Block data should decode to valid JSON array' );
514+
$this->assertEquals( $expected_block_name, $block_name_attr, 'Block name should match expected' );
515+
$this->assertEquals( $expected_attributes, $parsed_attributes, 'Block attributes should match expected (encoded: ' . $enhanced_block . ')' );
516+
}
517+
312518
/**
313519
* Tests block's rendering Synced Patterns which use another post to store the patterns content
314520
* - Run separate to hook the Parser filter on all render_block processing, required for nested blocks

0 commit comments

Comments
 (0)