Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 166 additions & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,115 @@ public static function create_full_parser( $html, $known_definite_encoding = 'UT
return $processor;
}

public function set_inner_html( ?string $html ) {
if ( $this->is_virtual() ) {
return false;
}

if ( $this->get_token_type() !== '#tag' ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( ! $this->expects_closer() ) {
return false;
}

if (
'html' !== $this->state->current_token->namespace &&
$this->state->current_token->has_self_closing_flag
) {
return false;
}

if ( null === $html ) {
$html = '';
}
if ( '' !== $html ) {
$fragment_parser = $this->spawn_fragment_parser( $html );
if (
null === $fragment_parser
) {
return false;
}

try {
$html = $fragment_parser->serialize();
} catch ( Exception $e ) {
return false;
}
}

// @todo apply modifications if there are any???

if ( ! parent::set_bookmark( 'SET_INNER_HTML: opener' ) ) {
return false;
}

if ( ! $this->seek_to_matching_closer() ) {
parent::seek( 'SET_INNER_HTML: opener' );
return false;
}

if ( ! parent::set_bookmark( 'SET_INNER_HTML: closer' ) ) {
return false;
}

$inner_html_start = $this->bookmarks['SET_INNER_HTML: opener']->start + $this->bookmarks['SET_INNER_HTML: opener']->length;
$inner_html_length = $this->bookmarks['SET_INNER_HTML: closer']->start - $inner_html_start;

$this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement(
$inner_html_start,
$inner_html_length,
$html
);

parent::seek( 'SET_INNER_HTML: opener' );
parent::release_bookmark( 'SET_INNER_HTML: opener' );
parent::release_bookmark( 'SET_INNER_HTML: closer' );

// @todo check for whether that html will make a mess!
// Will it break out of tags?

return true;
}

public function seek_to_matching_closer(): bool {
$tag_name = $this->get_tag();

if ( null === $tag_name ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( ! $this->expects_closer() ) {
return false;
}

$breadcrumbs = $this->breadcrumbs;
array_pop( $breadcrumbs );

// @todo Can't use these queries together
while ( $this->next_tag(
array(
'tag_name' => $this->get_tag(),
'tag_closers' => 'visit',
)
) ) {
if ( $this->get_breadcrumbs() === $breadcrumbs ) {
return true;
}
}
return false;
}


/**
* Constructor.
*
Expand Down Expand Up @@ -424,6 +533,61 @@ function ( WP_HTML_Token $token ): void {
};
}

/**
* Creates a fragment processor with the current node as its context element.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
*
* @param string $html Input HTML fragment to process.
* @return static|null The created processor if successful, otherwise null.
*/
public function spawn_fragment_parser( string $html ): ?self {
if ( $this->get_token_type() !== '#tag' ) {
return null;
}

$namespace = $this->get_namespace();

/*
* Prevent creating fragments at "self-contained" nodes.
*
* @see https://github.com/WordPress/wordpress-develop/pull/7141
* @see https://github.com/WordPress/wordpress-develop/pull/7198
*/
if (
'html' === $namespace &&
in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
) {
return null;
}

$fragment_processor = self::create_fragment( $html );
$fragment_processor->compat_mode = $this->compat_mode;

$fragment_processor->context_node = clone $this->state->current_token;
$fragment_processor->context_node->bookmark_name = 'context-node';
$fragment_processor->context_node->on_destroy = null;

$context_element = array( $fragment_processor->context_node->node_name, array() );
foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) {
$context_element[1][ $name ] = $value;
}

$fragment_processor->breadcrumbs = array();

if ( 'TEMPLATE' === $context_element[0] ) {
$fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
}

$fragment_processor->reset_insertion_mode_appropriately();

// @todo Set the parser's form element pointer.

$fragment_processor->state->encoding_confidence = 'irrelevant';

return $fragment_processor;
}

/**
* Stops the parser and terminates its execution when encountering unsupported markup.
*
Expand Down Expand Up @@ -522,6 +686,7 @@ public function get_unsupported_exception() {
* 1 for "first" tag, 3 for "third," etc.
* Defaults to first tag.
* @type string|null $class_name Tag must contain this whole class name to match.
* @type string $tag_name Tag name to match.
* @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
* May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
* }
Expand All @@ -545,7 +710,7 @@ public function next_tag( $query = null ): bool {
}

if ( is_string( $query ) ) {
$query = array( 'breadcrumbs' => array( $query ) );
$query = array( 'tag_name' => $query );
}

if ( ! is_array( $query ) ) {
Expand Down
97 changes: 97 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -863,4 +863,101 @@ public function test_adjusts_for_mathml_integration_points() {
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}

/**
* @ticket TBD
*
* @dataProvider data_set_inner_html
*/
public function test_set_inner_html( string $html, ?string $replacement, string $expected ) {
$processor = WP_HTML_Processor::create_fragment( $html );
while ( $processor->next_tag() ) {
if ( $processor->get_attribute( 'target' ) ) {
break;
}
}

$this->assertTrue( $processor->set_inner_html( $replacement ) );
$this->assertSame( $expected, $processor->get_updated_html() );
}

public static function data_set_inner_html() {
return array(
array(
'<div target>replace me</div>',
'with me!',
'<div target>with me!</div>',
),
array(
'<div target><div><p><a>replace me</div></div>',
'with me!',
'<div target>with me!</div>',
),
array(
'<table target><td>replace me</table>',
'<td>with me!',
'<table target><tbody><tr><td>with me!</td></tr></tbody></table>',
),
);
}

/**
* @ticket TBD
*
* @dataProvider data_set_inner_html_not_allowed
*/
public function test_set_inner_html_not_allowed( string $html, string $replacement ) {
$processor = WP_HTML_Processor::create_fragment( $html );
while ( $processor->next_tag() ) {
if ( $processor->get_attribute( 'target' ) ) {
break;
}
}
$this->assertFalse( $processor->set_inner_html( $replacement ), "Should have failed but produced: {$processor->get_updated_html()}" );
$this->assertSame( $html, $processor->get_updated_html() );
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_set_inner_html_not_allowed(): array {
return array(
'not allowed in void tags' => array(
'<br target>',
'anything',
),
'not allowed in self-closing tags' => array(
'<svg><text target />',
'anything',
),
'must have closing tag' => array(
'<body><div target></body>',
'anything',
),

'a in a' => array(
'<a target></a>',
'<a>',
),
'a nested in a' => array(
'<a><i><em><strong target></a>',
'<a>A cannot nest inside a',
),

'text in table' => array(
'<table target><td>hello</table>',
'text triggers forstering - not allowed',
),
'text in thead' => array(
'<table><thead target><td>hello</thead>',
'text triggers forstering - not allowed',
),
'text in tr' => array(
'<table><tr target>hello</tr>',
'text triggers forstering - not allowed',
),
);
}
}
80 changes: 71 additions & 9 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,6 @@ public function data_external_html5lib_tests() {
* @return bool True if the test case should be skipped. False otherwise.
*/
private static function should_skip_test( ?string $test_context_element, string $test_name ): bool {
if ( null !== $test_context_element && 'body' !== $test_context_element ) {
return true;
}

if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
return true;
}
Expand All @@ -157,11 +153,77 @@ private static function should_skip_test( ?string $test_context_element, string
* @return string|null Tree structure of parsed HTML, if supported, else null.
*/
private static function build_tree_representation( ?string $fragment_context, string $html ) {
$processor = $fragment_context
? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
: WP_HTML_Processor::create_full_parser( $html );
if ( null === $processor ) {
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
$processor = null;
if ( $fragment_context ) {
if ( 'body' === $fragment_context ) {
$processor = WP_HTML_Processor::create_fragment( $html );
} else {

/*
* If the string of characters starts with "svg ", the context
* element is in the SVG namespace and the substring after
* "svg " is the local name. If the string of characters starts
* with "math ", the context element is in the MathML namespace
* and the substring after "math " is the local name.
* Otherwise, the context element is in the HTML namespace and
* the string is the local name.
*/
if ( str_starts_with( $fragment_context, 'svg ' ) ) {
$tag_name = substr( $fragment_context, 4 );
if ( 'svg' === $tag_name ) {
$parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><svg>' );
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><svg><{$tag_name}>" );
}
$parent_processor->next_tag( $tag_name );
} elseif ( str_starts_with( $fragment_context, 'math ' ) ) {
$tag_name = substr( $fragment_context, 5 );
if ( 'math' === $tag_name ) {
$parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><math>' );
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><math><{$tag_name}>" );
}
$parent_processor->next_tag( $tag_name );
} else {
if ( in_array(
$fragment_context,
array(
'caption',
'col',
'colgroup',
'tbody',
'td',
'tfoot',
'th',
'thead',
'tr',
),
true
) ) {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><table><{$fragment_context}>" );
$parent_processor->next_tag();
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><{$fragment_context}>" );
}
$parent_processor->next_tag( $fragment_context );
}
if ( null !== $parent_processor->get_unsupported_exception() ) {
throw $parent_processor->get_unsupported_exception();
}
if ( null !== $parent_processor->get_last_error() ) {
throw new Exception( $parent_processor->get_last_error() );
}
$processor = $parent_processor->spawn_fragment_parser( $html );
}

if ( null === $processor ) {
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
}
} else {
$processor = WP_HTML_Processor::create_full_parser( $html );
if ( null === $processor ) {
throw new Exception( 'Could not create a full parser.' );
}
}

/*
Expand Down