Skip to content

Commit 0a2dfc2

Browse files
committed
Fixes
1 parent 3767975 commit 0a2dfc2

9 files changed

Lines changed: 46 additions & 31 deletions

File tree

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222

2323
strategy:
2424
matrix:
25-
php: ['8.1', '8.2', '8.3']
25+
php: ['8.1', '8.2', '8.3', '8.4']
2626
libxml: ['2.9.14']
2727

2828
# Steps represent a sequence of tasks that will be executed as part of the job

Makefile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.PHONY: test-all
22

3-
test-all: start test-8.1 test-8.2 test-8.3 stop
3+
test-all: start test-8.1 test-8.2 test-8.3 test-8.4 stop
44

55
test-8.1:
66
docker-compose exec php-8.1-libxml-2.9.13 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
@@ -11,15 +11,18 @@ test-8.2:
1111
test-8.3:
1212
docker-compose exec php-8.3-libxml-2.9.14 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
1313

14+
test-8.4:
15+
docker-compose exec php-8.4-libxml-2.9.14 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
16+
1417
start:
15-
docker-compose up -d php-8.1-libxml-2.9.13 php-8.2-libxml-2.9.14 php-8.3-libxml-2.9.14
18+
docker-compose up -d php-8.1-libxml-2.9.13 php-8.2-libxml-2.9.14 php-8.3-libxml-2.9.14 php-8.4-libxml-2.9.14
1619

1720
stop:
1821
docker-compose stop
1922

2023
test-all-versions:
21-
for php_version in 8.1 8.2 8.3; do \
22-
for libxml_version in 2.9.13 2.9.14; do \
24+
for php_version in 8.1 8.2 8.3 8.4; do \
25+
for libxml_version in 2.9.14; do \
2326
docker-compose up -d php-$$php_version-libxml-$$libxml_version; \
2427
docker-compose exec php-$$php_version-libxml-$$libxml_version php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml; \
2528
done \

docker-compose.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
version: '3'
2-
31
services:
42
php-8.1-libxml-2.9.13: &template
53
build:
@@ -33,4 +31,12 @@ services:
3331
context: ./docker/php
3432
args:
3533
LIBXML_VERSION: 2.9.14
36-
PHP_VERSION: 8.3
34+
PHP_VERSION: 8.3
35+
36+
php-8.4-libxml-2.9.14:
37+
<<: *template
38+
build:
39+
context: ./docker/php
40+
args:
41+
LIBXML_VERSION: 2.9.14
42+
PHP_VERSION: 8.4

docker/php/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# Ubuntu 23.04 - php 8.1, libxml2 2.9.14
1111
# Ubuntu 23.10 - php 8.2, libxml2 2.9.14
1212
# Ubuntu 24.04 - php 8.3, libxml2 2.9.14
13+
# Ubuntu 24.10 - php 8.3, libxml2 2.12.7
1314

1415
ARG PHP_VERSION
1516
ARG LIBXML_VERSION

docker/php/build.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
# Use this file to build a Docker image using the versions of PHP and Libxml specified.
22
#
3-
# We have pre-built images at https://hub.docker.com/r/fivefilters/php-libxml which are
3+
# We have pre-built images at https://hub.docker.com/r/fivefilters/php-libxml which are
44
# faster to load than building from this file.
55
#
6-
# To build using this file, type the following command from the root project folder
6+
# To build using this file, type the following command from the root project folder
77
# (replace version of PHP/Libxml with the ones you want to use):
88
#
99
# docker build --build-arg PHP_VERSION=7.4 --build-arg LIBXML_VERSION=2.9.12 -t php-libxml -f ./docker/php/build.Dockerfile .
1010

1111
# To upload the image to Docker Hub, the tag (-t) value should be something like org/repo:tag, e.g. for us, fivefilters/php-libxml:php-8-libxml-2.9.12
1212
# The tag can be applied afterwards too, e.g. docker tag php-libxml org/repo:tag
1313

14-
ARG PHP_VERSION=8.1
14+
ARG PHP_VERSION=8.4
1515
FROM php:${PHP_VERSION}-cli
1616

1717
# Install sqlite and libonig-dev (required for building PHP 7.4), libreadline-dev for php 8.1

src/Nodes/DOM/DOMNodeList.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public function __get($name)
4242
/**
4343
* Add node to the list.
4444
*/
45-
public function add(DOMNode|DOMElement|DOMComment $node): DOMNodeList
45+
public function add(DOMNode|DOMElement|DOMText|DOMComment $node): DOMNodeList
4646
{
4747
$this->items[] = $node;
4848
$this->length++;
@@ -53,7 +53,7 @@ public function add(DOMNode|DOMElement|DOMComment $node): DOMNodeList
5353
/**
5454
* Get node.
5555
*/
56-
public function item(int $offset): DOMNode|DOMElement|DOMComment
56+
public function item(int $offset): DOMNode|DOMElement|DOMText|DOMComment
5757
{
5858
return $this->items[$offset];
5959
}

src/Nodes/NodeTrait.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public function getAllLinks(): array
221221
* Get the density of links as a percentage of the content
222222
* This is the amount of text that is inside a link divided by the total text in the node.
223223
*/
224-
public function getLinkDensity(): int
224+
public function getLinkDensity(): float
225225
{
226226
$textLength = mb_strlen($this->getTextContent(true));
227227
if ($textLength === 0) {

src/Nodes/NodeUtility.php

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
use fivefilters\Readability\Nodes\DOM\DOMDocument;
66
use fivefilters\Readability\Nodes\DOM\DOMElement;
77
use fivefilters\Readability\Nodes\DOM\DOMNode;
8+
use fivefilters\Readability\Nodes\DOM\DOMText;
9+
use fivefilters\Readability\Nodes\DOM\DOMComment;
810
use fivefilters\Readability\Nodes\DOM\DOMNodeList;
911

1012
/**
@@ -50,7 +52,7 @@ class NodeUtility
5052
*
5153
* Imported from the Element class on league\html-to-markdown.
5254
*/
53-
public static function nextNode(DOMNode $node): DOMNode
55+
public static function nextNode(DOMNode|DOMComment|DOMText|DOMElement|null $node): DOMNode|DOMComment|DOMText|DOMElement|null
5456
{
5557
$next = $node;
5658
while ($next
@@ -66,7 +68,7 @@ public static function nextNode(DOMNode $node): DOMNode
6668
* Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
6769
* element with the new tag name and importing it to the main DOMDocument.
6870
*/
69-
public static function setNodeTag(DOMNode|DOMElement $node, string $value, bool $importAttributes = true): DOMNode
71+
public static function setNodeTag(DOMNode|DOMElement $node, string $value, bool $importAttributes = true): DOMNode|DOMElement
7072
{
7173
$new = new DOMDocument('1.0', 'utf-8');
7274
$new->appendChild($new->createElement($value));
@@ -95,7 +97,7 @@ public static function setNodeTag(DOMNode|DOMElement $node, string $value, bool
9597
/**
9698
* Removes the current node and returns the next node to be parsed (child, sibling or parent).
9799
*/
98-
public static function removeAndGetNext(DOMNode|DOMElement $node): DOMNode
100+
public static function removeAndGetNext(DOMNode|DOMComment|DOMText|DOMElement $node): DOMNode|DOMComment|DOMText|DOMElement|null
99101
{
100102
$nextNode = self::getNextNode($node, true);
101103
$node->parentNode->removeChild($node);
@@ -106,7 +108,7 @@ public static function removeAndGetNext(DOMNode|DOMElement $node): DOMNode
106108
/**
107109
* Remove the selected node.
108110
*/
109-
public static function removeNode(DOMElement $node): void
111+
public static function removeNode(DOMNode|DOMComment|DOMText|DOMElement $node): void
110112
{
111113
$parent = $node->parentNode;
112114
if ($parent) {
@@ -118,7 +120,7 @@ public static function removeNode(DOMElement $node): void
118120
* Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
119121
* for parents.
120122
*/
121-
public static function getNextNode(DOMNode|DOMElement|DOMDocument $originalNode, bool $ignoreSelfAndKids = false): DOMNode
123+
public static function getNextNode(DOMNode|DOMComment|DOMText|DOMElement|DOMDocument $originalNode, bool $ignoreSelfAndKids = false): DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|null
122124
{
123125
/*
124126
* Traverse the DOM from node to node, starting at the node passed in.

src/Readability.php

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
use fivefilters\Readability\Nodes\DOM\DOMElement;
77
use fivefilters\Readability\Nodes\DOM\DOMNode;
88
use fivefilters\Readability\Nodes\DOM\DOMText;
9+
use fivefilters\Readability\Nodes\DOM\DOMComment;
910
use fivefilters\Readability\Nodes\NodeUtility;
1011
use Psr\Log\LoggerInterface;
1112
use Masterminds\HTML5;
@@ -794,9 +795,12 @@ public function getPathInfo(string $url): array
794795
/**
795796
* Gets nodes from the root element.
796797
*/
797-
private function getNodes(DOMNode|DOMText $node): array
798+
private function getNodes(DOMNode|DOMComment|DOMText|DOMElement|null $node): array
798799
{
799800
$this->logger->info('[Get Nodes] Retrieving nodes...');
801+
if ($node === null) {
802+
return [];
803+
}
800804

801805
$stripUnlikelyCandidates = $this->configuration->getStripUnlikelyCandidates();
802806

@@ -939,7 +943,7 @@ private function getNodes(DOMNode|DOMText $node): array
939943
*
940944
* @return int 1 = same text, 0 = completely different text
941945
*/
942-
private function textSimilarity(string $textA, string $textB): int
946+
private function textSimilarity(string $textA, string $textB): float
943947
{
944948
$tokensA = array_filter(preg_split(NodeUtility::$regexps['tokenize'], mb_strtolower($textA)));
945949
$tokensB = array_filter(preg_split(NodeUtility::$regexps['tokenize'], mb_strtolower($textB)));
@@ -956,7 +960,7 @@ private function textSimilarity(string $textA, string $textB): int
956960
/**
957961
* Checks if the node is a byline.
958962
*/
959-
private function checkByline(DOMNode $node, string $matchString): bool
963+
private function checkByline(DOMNode|DOMText|DOMElement $node, string $matchString): bool
960964
{
961965
if (!$this->configuration->getArticleByLine()) {
962966
return false;
@@ -999,7 +1003,7 @@ private function isValidByline(string $text): bool
9991003
/**
10001004
* Converts some of the common HTML entities in string to their corresponding characters.
10011005
*/
1002-
private function unescapeHtmlEntities(string $str): string
1006+
private function unescapeHtmlEntities(?string $str): ?string
10031007
{
10041008
if (!$str) {
10051009
return $str;
@@ -1026,7 +1030,7 @@ private function unescapeHtmlEntities(string $str): string
10261030
* Check if node is image, or if node contains exactly only one image
10271031
* whether as a direct child or as its descendants.
10281032
*/
1029-
private function isSingleImage(DOMElement $node): bool
1033+
private function isSingleImage(DOMElement|DOMNode|DOMText $node): bool
10301034
{
10311035
if ($node->tagName === 'img') {
10321036
return true;
@@ -1749,7 +1753,7 @@ public function _fixLazyImages(DOMDocument $article): void
17491753
/**
17501754
* Remove the style attribute on every e and under.
17511755
**/
1752-
public function _cleanStyles(DOMDocument|DOMNode $node): void
1756+
public function _cleanStyles(DOMDocument|DOMNode|DOMElement|DOMText $node): void
17531757
{
17541758
if (property_exists($node, 'tagName') && $node->tagName === 'svg') {
17551759
return;
@@ -2038,7 +2042,7 @@ public function _cleanHeaders(DOMDocument $article): void
20382042
* @param DOMNode the node to check.
20392043
* @return boolean indicating whether this is a title-like header.
20402044
*/
2041-
private function headerDuplicatesTitle(DOMNode $node): bool
2045+
private function headerDuplicatesTitle(DOMNode|DOMText|DOMElement $node): bool
20422046
{
20432047
if ($node->nodeName !== 'h1' && $node->nodeName !== 'h2') {
20442048
return false;
@@ -2058,7 +2062,7 @@ private function headerDuplicatesTitle(DOMNode $node): bool
20582062
* Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes
20592063
* here so no need to filter those.
20602064
**/
2061-
public function _cleanClasses(DOMDocument|DOMNode|DOMElement $node): void
2065+
public function _cleanClasses(DOMDocument|DOMText|DOMNode|DOMElement $node): void
20622066
{
20632067
if ($node->getAttribute('class') !== '') {
20642068
$node->removeAttribute('class');
@@ -2176,9 +2180,8 @@ public function postProcessContent(DOMDocument $article): DOMDocument
21762180
*
21772181
* @param array nodeList The NodeList.
21782182
* @param callable fn The test function.
2179-
* @return DOMNode|null
21802183
*/
2181-
private function findNode(array $nodeList, callable $fn): ?DOMNode
2184+
private function findNode(array $nodeList, callable $fn): DOMNode|DOMText|DOMElement|null
21822185
{
21832186
foreach ($nodeList as $node) {
21842187
if ($fn($node)) {
@@ -2207,7 +2210,7 @@ public function getTitle(): ?string
22072210
/**
22082211
* Set title.
22092212
*/
2210-
protected function setTitle(string $title): void
2213+
protected function setTitle(?string $title): void
22112214
{
22122215
$this->title = $title;
22132216
}
@@ -2258,7 +2261,7 @@ public function getExcerpt(): ?string
22582261
/**
22592262
* Set excerpt.
22602263
*/
2261-
public function setExcerpt(string $excerpt): void
2264+
public function setExcerpt(?string $excerpt): void
22622265
{
22632266
$this->excerpt = $excerpt;
22642267
}

0 commit comments

Comments
 (0)