diff --git a/debug_dom.php b/debug_dom.php new file mode 100644 index 00000000..6adde54a --- /dev/null +++ b/debug_dom.php @@ -0,0 +1,15 @@ +'; +$wrapped_buffer = '' . $buffer . ''; + +$dom = new DOMDocument(); +$encoded_buffer = mb_encode_numericentity( $wrapped_buffer, array( 0x80, 0x10FFFF, 0, 0x1FFFFF ), 'UTF-8' ); +$dom->loadHTML( $encoded_buffer, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); + +$dummy = $dom->getElementsByTagName('dummy')->item(0); + +echo "Child Nodes: " . $dummy->childNodes->length . "\n"; +foreach ($dummy->childNodes as $node) { + echo "Node: " . $node->nodeName . "\n"; + echo "Content: " . $dom->saveHTML($node) . "\n"; +} diff --git a/src/lib/helper.php b/src/lib/helper.php index 64501cfb..176b4a41 100644 --- a/src/lib/helper.php +++ b/src/lib/helper.php @@ -6,6 +6,8 @@ use Exception; use InvalidArgumentException; use RuntimeException; + use DOMDocument; + use DOMElement; /** * @param string $type @@ -99,96 +101,79 @@ function cookiebot_addons_remove_class_action( $action, $class, $method, $priori * @since 1.2.0 */ function cookiebot_addons_manipulate_script( $buffer, $keywords ) { - /** - * normalize potential self-closing script tags - */ + if ( empty( $buffer ) ) { + return $buffer; + } + + // Use DOMDocument to safely parse and modify the script tag + $dom = new DOMDocument(); + + // Suppress errors for partial HTML + $libxml_previous_state = libxml_use_internal_errors( true ); + + // Wrap buffer in a custom tag to ensure correct parsing of fragments (e.g. multiple siblings at root) + // This prevents DOMDocument from trying to fix structure by nesting siblings + $wrapped_buffer = '' . $buffer . ''; - $normalized_buffer = preg_replace( '/()/is', '', $buffer ); + // Load HTML with UTF-8 encoding hack + // The mb_convert_encoding is to ensure we don't have encoding issues + // We use LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD to avoid adding wrappers automatically + // Replacement for deprecated mb_convert_encoding(..., 'HTML-ENTITIES', 'UTF-8') + $encoded_buffer = mb_encode_numericentity( $wrapped_buffer, array( 0x80, 0x10FFFF, 0, 0x1FFFFF ), 'UTF-8' ); + $dom->loadHTML( $encoded_buffer, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD ); - if ( $normalized_buffer !== null ) { - $buffer = $normalized_buffer; + libxml_use_internal_errors( $libxml_previous_state ); + + $scripts = $dom->getElementsByTagName( 'script' ); + $modified = false; + + // Convert DOMNodeList to array to avoid modification issues during iteration + $script_nodes = array(); + foreach ( $scripts as $script ) { + $script_nodes[] = $script; } - /** - * Pattern to get all scripts - * - * @version 2.0.4 - * @since 1.2.0 - */ - $pattern = '/(]*+>)(.*?)(<\/script>)/is'; + foreach ( $script_nodes as $script ) { + /** @var DOMElement $script */ + // Get the full script HTML to check for keywords + // We check the outer HTML to include attributes in the search, matching the regex behavior + $full_script_content = $dom->saveHTML( $script ); - /** - * Get all scripts and add cookieconsent if it does match with the criterion - */ - $updated_scripts = preg_replace_callback( - $pattern, - function ( $matches ) use ( $keywords ) { - $script = $matches[0]; // the full script html - $script_tag_open = $matches[1]; // only the script open tag with all attributes - $script_tag_inner = $matches[2]; // only the script's innerText - $script_tag_close = $matches[3]; // only the script closing tag - - /** - * Check if the script contains the keywords, checks keywords one by one - * - * If one match, then the rest of the keywords will be skipped. - */ - foreach ( $keywords as $needle => $cookie_type ) { - /** - * The script contains the needle - */ - if ( strpos( $script, $needle ) !== false ) { - /** - * replace all single quotes with double quotes in the open tag - * remove previously set data-cookieconsent attribute - * remove type attribute - */ - $script_tag_open = str_replace( '\'', '"', $script_tag_open ); - $script_tag_open = preg_replace( '/\sdata-cookieconsent=\"[^"]*+\"/', '', $script_tag_open ); - $script_tag_open = preg_replace( '/\stype=\"[^"]*+\"/', '', $script_tag_open ); - - /** - * set the type attribute to text/plain to prevent javascript execution - * add data-cookieconsent attribute - */ - $cookie_types = cookiebot_addons_output_cookie_types( $cookie_type ); - - $script_tag_open = str_replace( - ' $cookie_type ) { + if ( strpos( $full_script_content, $needle ) !== false ) { + // Match found - /** - * return the reconstructed script - */ - return $script; - }, - $buffer - ); + // Remove existing attributes + $script->removeAttribute( 'type' ); + $script->removeAttribute( 'data-cookieconsent' ); - /** - * Fallback when the regex fails to work due to PCRE_ERROR_JIT_STACKLIMIT - * - * @version 2.0.4 - * @since 2.0.4 - */ - if ( $updated_scripts === null ) { - $updated_scripts = $buffer; + // Add new attributes + $script->setAttribute( 'type', 'text/plain' ); + $script->setAttribute( 'data-cookieconsent', cookiebot_addons_output_cookie_types( $cookie_type ) ); - if ( get_option( 'cookiebot_regex_stacklimit' ) === false ) { - update_option( 'cookiebot_regex_stacklimit', 1 ); + $modified = true; + // Break inner loop (keywords) as we found a match + break; + } + } + } + + if ( $modified ) { + // Save HTML + // We extract children of our wrapper + $wrapper = $dom->getElementsByTagName( 'cookiebot-wrapper' )->item( 0 ); + if ( $wrapper ) { + $output = ''; + foreach ( $wrapper->childNodes as $node ) { + $output .= $dom->saveHTML( $node ); + } + return $output; } + // Fallback if wrapper is missing (should not happen) + return $dom->saveHTML(); } - return $updated_scripts; + return $buffer; } /** diff --git a/src/lib/script_loader_tag/Script_Loader_Tag.php b/src/lib/script_loader_tag/Script_Loader_Tag.php index 876f9b73..a5b222b2 100644 --- a/src/lib/script_loader_tag/Script_Loader_Tag.php +++ b/src/lib/script_loader_tag/Script_Loader_Tag.php @@ -2,6 +2,9 @@ namespace cybot\cookiebot\lib\script_loader_tag; +use DOMDocument; +use DOMElement; + class Script_Loader_Tag implements Script_Loader_Tag_Interface { @@ -74,27 +77,51 @@ public function ignore_script( $script ) { * @since 1.2.0 */ public function cookiebot_add_consent_attribute_to_tag( $tag, $handle, $src ) { + // Check if the handle is in our list of tags to modify if ( array_key_exists( $handle, $this->tags ) && ! empty( $this->tags[ $handle ] ) ) { + // If we have a match, we completely replace the tag with our own constructed one + // This is safer than parsing for this specific case as we know exactly what we want //phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript - return ''; + return ''; } + // Check if the script should be ignored if ( $this->check_ignore_script( $src ) ) { - return preg_replace_callback( - '/[^>]*)>/', - function ( $tag ) use ( $handle ) { - // Prevent modification of the script tags inside the other script tag by validating the ID of the - // script and checking if we already set the consent status for the script. This will fix the issue - // on Gutenberg editor pages. - if ( ! self::validate_attributes_for_consent_ignore( $handle, $tag['atts'] ) ) { - return $tag[0]; - } - - //phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript - return str_replace( ') + $html = preg_replace('/]*)>\s*<\/script>/', '', $html); + // Remove spaces around = + $html = preg_replace('/\s*=\s*/', '=', $html); + + // Sort attributes to handle order differences + // This is a naive implementation but should work for simple cases + $html = preg_replace_callback('/]*)>/', function($matches) { + $attrs = $matches[1]; + preg_match_all('/(\w+)(?:="([^"]*)")?/', $attrs, $attr_matches, PREG_SET_ORDER); + $sorted_attrs = []; + foreach ($attr_matches as $m) { + $name = $m[1]; + $value = isset($m[2]) ? $m[2] : ''; + $sorted_attrs[$name] = $value; + } + ksort($sorted_attrs); + $new_attrs = ''; + foreach ($sorted_attrs as $k => $v) { + if ($v === '') { + $new_attrs .= " $k"; + } else { + $new_attrs .= " $k=\"$v\""; + } + } + return ""; + }, $html); + + return $html; +} + +function assert_equivalent($new, $old, $test_name) { + global $passes, $fails; + + $new_norm = normalize_html($new); + $old_norm = normalize_html($old); + + if ($new_norm === $old_norm) { + echo "[PASS] $test_name\n"; + $passes++; + } else { + echo "[FAIL] $test_name\n"; + echo " Old (Norm): $old_norm\n"; + echo " New (Norm): $new_norm\n"; + echo " Old (Raw): $old\n"; + echo " New (Raw): $new\n"; + $fails++; + } +} + +// --- Test Suite 1: helper.php cookiebot_addons_manipulate_script --- + +echo "Testing cookiebot_addons_manipulate_script (helper.php)...\n"; + +$keywords = ['tracking.js' => 'marketing']; + +// Case 1: Simple Script +$input = ''; +$new = cookiebot_addons_manipulate_script($input, $keywords); +$old = cookiebot_addons_manipulate_script_old($input, $keywords); +assert_equivalent($new, $old, 'Simple Script'); + +// Case 2: Script with existing attributes +$input = ''; +$new = cookiebot_addons_manipulate_script($input, $keywords); +$old = cookiebot_addons_manipulate_script_old($input, $keywords); +assert_equivalent($new, $old, 'Attributes'); + +// Case 3: Script with existing type +$input = ''; +$new = cookiebot_addons_manipulate_script($input, $keywords); +$old = cookiebot_addons_manipulate_script_old($input, $keywords); +assert_equivalent($new, $old, 'Existing Type'); + +// Case 4: Multiple scripts in buffer +$input = '
'; +$new = cookiebot_addons_manipulate_script($input, $keywords); +$old = cookiebot_addons_manipulate_script_old($input, $keywords); +assert_equivalent($new, $old, 'Multiple Scripts'); + +// Case 5: Fragment (no html/body) +$input = ''; +$new = cookiebot_addons_manipulate_script($input, $keywords); +$old = cookiebot_addons_manipulate_script_old($input, $keywords); +assert_equivalent($new, $old, 'Fragment'); + +// --- Test Suite 2: Script_Loader_Tag --- + +echo "\nTesting Script_Loader_Tag...\n"; + +$loader_new = new Script_Loader_Tag(); +$loader_new->ignore_script('ignore-me.js'); + +$loader_old = new Script_Loader_Tag_Old(); +$loader_old->ignore_script('ignore-me.js'); + +// Case 6: Ignored Script +$tag = ''; +$handle = 'ignore-me'; +$src = 'https://example.com/ignore-me.js'; + +$new = $loader_new->cookiebot_add_consent_attribute_to_tag($tag, $handle, $src); +$old = $loader_old->cookiebot_add_consent_attribute_to_tag($tag, $handle, $src); +assert_equivalent($new, $old, 'Ignored Script'); + +// Case 7: Non-ignored script +$tag = ''; +$handle = 'normal'; +$src = 'https://example.com/normal.js'; + +$new = $loader_new->cookiebot_add_consent_attribute_to_tag($tag, $handle, $src); +$old = $loader_old->cookiebot_add_consent_attribute_to_tag($tag, $handle, $src); +assert_equivalent($new, $old, 'Non-ignored script'); + +echo "\nSummary: $passes Passed, $fails Failed.\n"; +if ($fails > 0) { + exit(1); +} diff --git a/tests/mock_wp_functions.php b/tests/mock_wp_functions.php new file mode 100644 index 00000000..983a8770 --- /dev/null +++ b/tests/mock_wp_functions.php @@ -0,0 +1,59 @@ + []]; } +} +if (!function_exists('is_multisite')) { + function is_multisite() { return false; } +} +if (!function_exists('get_site_url')) { + function get_site_url() { return 'http://example.com'; } +} +if (!function_exists('get_site_option')) { + function get_site_option($option, $default = false) { return $default; } +} +if (!function_exists('get_locale')) { + function get_locale() { return 'en_US'; } +} +if (!function_exists('plugin_dir_url')) { + function plugin_dir_url($file) { return 'http://example.com/wp-content/plugins/cookiebot/'; } +} +if (!function_exists('plugin_dir_path')) { + function plugin_dir_path($file) { return '/path/to/plugin/'; } +} + +// Constants +if (!defined('ABSPATH')) define('ABSPATH', '/tmp/'); +if (!defined('CYBOT_COOKIEBOT_PLUGIN_DIR')) define('CYBOT_COOKIEBOT_PLUGIN_DIR', __DIR__ . '/../'); +if (!defined('CYBOT_COOKIEBOT_PLUGIN_ASSETS_DIR')) define('CYBOT_COOKIEBOT_PLUGIN_ASSETS_DIR', 'assets/'); diff --git a/tests/old_logic.php b/tests/old_logic.php new file mode 100644 index 00000000..d1b397c5 --- /dev/null +++ b/tests/old_logic.php @@ -0,0 +1,101 @@ +)/is', '', $buffer ); + if ( $normalized_buffer !== null ) { + $buffer = $normalized_buffer; + } + $pattern = '/(]*+>)(.*?)(<\/script>)/is'; + $updated_scripts = preg_replace_callback( + $pattern, + function ( $matches ) use ( $keywords ) { + $script = $matches[0]; + $script_tag_open = $matches[1]; + $script_tag_inner = $matches[2]; + $script_tag_close = $matches[3]; + + foreach ( $keywords as $needle => $cookie_type ) { + if ( strpos( $script, $needle ) !== false ) { + $script_tag_open = str_replace( '\'', '"', $script_tag_open ); + $script_tag_open = preg_replace( '/\sdata-cookieconsent=\"[^"]*+\"/', '', $script_tag_open ); + $script_tag_open = preg_replace( '/\stype=\"[^"]*+\"/', '', $script_tag_open ); + + $cookie_types = cookiebot_addons_output_cookie_types( $cookie_type ); + + $script_tag_open = str_replace( + 'tags[ $tag ] = $type; + } + + public function ignore_script( $script ) { + array_push( $this->ignore_scripts, $script ); + } + + public function cookiebot_add_consent_attribute_to_tag( $tag, $handle, $src ) { + if ( array_key_exists( $handle, $this->tags ) && ! empty( $this->tags[ $handle ] ) ) { + return ''; + } + + if ( $this->check_ignore_script( $src ) ) { + return preg_replace_callback( + '/[^>]*)>/', + function ( $tag ) use ( $handle ) { + if ( ! self::validate_attributes_for_consent_ignore( $handle, $tag['atts'] ) ) { + return $tag[0]; + } + return str_replace( '