Skip to content

Commit f131fcb

Browse files
authored
fix: allow DOMProcessingInstruction in Readability and NodeUtility (#36)
Signed-off-by: Arthur Schiwon <blizzz@arthur-schiwon.de>
1 parent 4613c22 commit f131fcb

2 files changed

Lines changed: 7 additions & 5 deletions

File tree

src/Nodes/NodeUtility.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use fivefilters\Readability\Nodes\DOM\DOMDocument;
66
use fivefilters\Readability\Nodes\DOM\DOMElement;
77
use fivefilters\Readability\Nodes\DOM\DOMNode;
8+
use fivefilters\Readability\Nodes\DOM\DOMProcessingInstruction;
89
use fivefilters\Readability\Nodes\DOM\DOMText;
910
use fivefilters\Readability\Nodes\DOM\DOMComment;
1011
use fivefilters\Readability\Nodes\DOM\DOMNodeList;
@@ -120,7 +121,7 @@ public static function removeNode(DOMNode|DOMComment|DOMText|DOMElement $node):
120121
* Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
121122
* for parents.
122123
*/
123-
public static function getNextNode(DOMNode|DOMComment|DOMText|DOMElement|DOMDocument $originalNode, bool $ignoreSelfAndKids = false): DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|null
124+
public static function getNextNode(DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|DOMProcessingInstruction $originalNode, bool $ignoreSelfAndKids = false): DOMNode|DOMComment|DOMText|DOMElement|DOMDocument|DOMProcessingInstruction|null
124125
{
125126
/*
126127
* Traverse the DOM from node to node, starting at the node passed in.

src/Readability.php

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use fivefilters\Readability\Nodes\DOM\DOMDocument;
66
use fivefilters\Readability\Nodes\DOM\DOMElement;
77
use fivefilters\Readability\Nodes\DOM\DOMNode;
8+
use fivefilters\Readability\Nodes\DOM\DOMProcessingInstruction;
89
use fivefilters\Readability\Nodes\DOM\DOMText;
910
use fivefilters\Readability\Nodes\DOM\DOMComment;
1011
use fivefilters\Readability\Nodes\NodeUtility;
@@ -391,7 +392,7 @@ private function getJSONLD(DOMDocument $dom): array
391392
return $metadata;
392393
} catch (\Exception $err) {
393394
// The try-catch blocks are from the JS version. Not sure if there's anything
394-
// here in the PHP version that would trigger an error or exception, so perhaps we can
395+
// here in the PHP version that would trigger an error or exception, so perhaps we can
395396
// remove the try-catch blocks here (or at least translate errors to exceptions for this bit)
396397
$this->logger->debug('[JSON-LD] Error parsing: ' . $err->getMessage());
397398
}
@@ -418,7 +419,7 @@ private function getMetadata(): void
418419
/* @var DOMNode $meta */
419420
$elementName = $meta->getAttribute('name');
420421
$elementProperty = $meta->getAttribute('property');
421-
$content = $meta->getAttribute('content');
422+
$content = $meta->getAttribute('content');
422423
$matches = null;
423424
$name = null;
424425

@@ -960,7 +961,7 @@ private function textSimilarity(string $textA, string $textB): float
960961
/**
961962
* Checks if the node is a byline.
962963
*/
963-
private function checkByline(DOMNode|DOMText|DOMElement $node, string $matchString): bool
964+
private function checkByline(DOMNode|DOMText|DOMElement|DOMProcessingInstruction $node, string $matchString): bool
964965
{
965966
if (!$this->configuration->getArticleByline()) {
966967
return false;
@@ -2043,7 +2044,7 @@ public function _cleanHeaders(DOMDocument $article): void
20432044
* @param DOMNode the node to check.
20442045
* @return boolean indicating whether this is a title-like header.
20452046
*/
2046-
private function headerDuplicatesTitle(DOMNode|DOMText|DOMElement $node): bool
2047+
private function headerDuplicatesTitle(DOMNode|DOMText|DOMElement|DOMProcessingInstruction $node): bool
20472048
{
20482049
if ($node->nodeName !== 'h1' && $node->nodeName !== 'h2') {
20492050
return false;

0 commit comments

Comments
 (0)