Skip to content

Commit f3280f2

Browse files
committed
Force UTF-8 encoding for better support for non-Latin character sets
1 parent 388aeba commit f3280f2

File tree

3 files changed

+57
-3
lines changed

3 files changed

+57
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
tests/coverage
22
vendor
33
.phpunit.result.cache
4+
.vscode
45

56
# The composer.lock file is not needed, as this is a library whose dependencies
67
# will depend on the version of PHP being used.

src/MarkupAssertionsTrait.php

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,11 @@ public function assertElementNotRegExp($regexp, $selector = '', $markup = '', $m
222222
*/
223223
protected function executeDomQuery($markup, $query)
224224
{
225-
return Query::execute($query, new Document($markup), Query::TYPE_CSS);
225+
return Query::execute(
226+
$query,
227+
new Document('<?xml encoding="UTF-8">' . $markup, Document::DOC_HTML, 'UTF-8'),
228+
Query::TYPE_CSS
229+
);
226230
}
227231

228232
/**
@@ -274,7 +278,7 @@ protected function getInnerHtmlOfMatchedElements($markup, $query)
274278
$document = new DOMDocument();
275279
$document->appendChild($document->importNode($result->firstChild, true));
276280

277-
$contents[] = trim($document->saveHTML());
281+
$contents[] = trim(html_entity_decode($document->saveHTML()));
278282
}
279283

280284
return implode(PHP_EOL, $contents);

tests/MarkupAssertionsTraitTest.php

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ public function assertElementContains_should_scope_matches_to_selector()
153153
);
154154
}
155155

156+
/**
157+
* @test
158+
* @testdox assertElementContains() should handle various character sets
159+
* @dataProvider provideGreetingsInDifferentLanguages
160+
* @ticket https://github.com/stevegrunwell/phpunit-markup-assertions/issues/31
161+
*/
162+
public function assertElementContains_should_handle_various_character_sets($greeting)
163+
{
164+
$this->assertElementContains(
165+
$greeting,
166+
'h1',
167+
sprintf('<div><h1>%s</h1></div>', $greeting)
168+
);
169+
}
170+
156171
/**
157172
* @test
158173
* @testdox assertElementNotContains() should be able to search for a selector
@@ -162,7 +177,22 @@ public function assertElementNotContains_can_match_a_selector()
162177
$this->assertElementNotContains(
163178
'ipsum',
164179
'#main',
165-
'<header>Foo bar baz</header><div id="main">Some string</div>'
180+
'<div>Foo bar baz</div><div id="main">Some string</div>'
181+
);
182+
}
183+
184+
/**
185+
* @test
186+
* @testdox assertElementNotContains() should handle various character sets
187+
* @dataProvider provideGreetingsInDifferentLanguages
188+
* @ticket https://github.com/stevegrunwell/phpunit-markup-assertions/issues/31
189+
*/
190+
public function assertElementNotContains_should_handle_various_character_sets($greeting)
191+
{
192+
$this->assertElementNotContains(
193+
$greeting,
194+
'h1',
195+
sprintf('<h1>Translation</h1><p>%s</p>', $greeting)
166196
);
167197
}
168198

@@ -328,4 +358,23 @@ public function provideSelectorVariants()
328358
'Tag with href attribute' => ['a[href="https://example.com"]'],
329359
];
330360
}
361+
362+
/**
363+
* Provide a list of strings in various language.
364+
*
365+
* @return array<string,array<string>>
366+
*/
367+
public function provideGreetingsInDifferentLanguages()
368+
{
369+
return [
370+
'Arabic' => ['مرحبا!'],
371+
'Chinese' => ['你好'],
372+
'English' => ['Hello'],
373+
'Hebrew' => ['שלום'],
374+
'Japanese' => ['こんにちは'],
375+
'Korean' => ['안녕하십니까'],
376+
'Punjabi' => ['ਸਤ ਸ੍ਰੀ ਅਕਾਲ'],
377+
'Ukrainian' => ['Привіт'],
378+
];
379+
}
331380
}

0 commit comments

Comments
 (0)