66use fivefilters \Readability \Nodes \DOM \DOMElement ;
77use fivefilters \Readability \Nodes \DOM \DOMNode ;
88use fivefilters \Readability \Nodes \DOM \DOMText ;
9+ use fivefilters \Readability \Nodes \DOM \DOMComment ;
910use fivefilters \Readability \Nodes \NodeUtility ;
1011use Psr \Log \LoggerInterface ;
1112use Masterminds \HTML5 ;
@@ -794,9 +795,12 @@ public function getPathInfo(string $url): array
794795 /**
795796 * Gets nodes from the root element.
796797 */
797- private function getNodes (DOMNode |DOMText $ node ): array
798+ private function getNodes (DOMNode |DOMComment | DOMText | DOMElement | null $ node ): array
798799 {
799800 $ this ->logger ->info ('[Get Nodes] Retrieving nodes... ' );
801+ if ($ node === null ) {
802+ return [];
803+ }
800804
801805 $ stripUnlikelyCandidates = $ this ->configuration ->getStripUnlikelyCandidates ();
802806
@@ -939,7 +943,7 @@ private function getNodes(DOMNode|DOMText $node): array
939943 *
940944 * @return int 1 = same text, 0 = completely different text
941945 */
942- private function textSimilarity (string $ textA , string $ textB ): int
946+ private function textSimilarity (string $ textA , string $ textB ): float
943947 {
944948 $ tokensA = array_filter (preg_split (NodeUtility::$ regexps ['tokenize ' ], mb_strtolower ($ textA )));
945949 $ tokensB = array_filter (preg_split (NodeUtility::$ regexps ['tokenize ' ], mb_strtolower ($ textB )));
@@ -956,7 +960,7 @@ private function textSimilarity(string $textA, string $textB): int
956960 /**
957961 * Checks if the node is a byline.
958962 */
959- private function checkByline (DOMNode $ node , string $ matchString ): bool
963+ private function checkByline (DOMNode | DOMText | DOMElement $ node , string $ matchString ): bool
960964 {
961965 if (!$ this ->configuration ->getArticleByLine ()) {
962966 return false ;
@@ -999,7 +1003,7 @@ private function isValidByline(string $text): bool
9991003 /**
10001004 * Converts some of the common HTML entities in string to their corresponding characters.
10011005 */
1002- private function unescapeHtmlEntities (string $ str ): string
1006+ private function unescapeHtmlEntities (? string $ str ): ? string
10031007 {
10041008 if (!$ str ) {
10051009 return $ str ;
@@ -1026,7 +1030,7 @@ private function unescapeHtmlEntities(string $str): string
10261030 * Check if node is image, or if node contains exactly only one image
10271031 * whether as a direct child or as its descendants.
10281032 */
1029- private function isSingleImage (DOMElement $ node ): bool
1033+ private function isSingleImage (DOMElement | DOMNode | DOMText $ node ): bool
10301034 {
10311035 if ($ node ->tagName === 'img ' ) {
10321036 return true ;
@@ -1749,7 +1753,7 @@ public function _fixLazyImages(DOMDocument $article): void
17491753 /**
17501754 * Remove the style attribute on every e and under.
17511755 **/
1752- public function _cleanStyles (DOMDocument |DOMNode $ node ): void
1756+ public function _cleanStyles (DOMDocument |DOMNode | DOMElement | DOMText $ node ): void
17531757 {
17541758 if (property_exists ($ node , 'tagName ' ) && $ node ->tagName === 'svg ' ) {
17551759 return ;
@@ -2038,7 +2042,7 @@ public function _cleanHeaders(DOMDocument $article): void
20382042 * @param DOMNode the node to check.
20392043 * @return boolean indicating whether this is a title-like header.
20402044 */
2041- private function headerDuplicatesTitle (DOMNode $ node ): bool
2045+ private function headerDuplicatesTitle (DOMNode | DOMText | DOMElement $ node ): bool
20422046 {
20432047 if ($ node ->nodeName !== 'h1 ' && $ node ->nodeName !== 'h2 ' ) {
20442048 return false ;
@@ -2058,7 +2062,7 @@ private function headerDuplicatesTitle(DOMNode $node): bool
20582062 * Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes
20592063 * here so no need to filter those.
20602064 **/
2061- public function _cleanClasses (DOMDocument |DOMNode |DOMElement $ node ): void
2065+ public function _cleanClasses (DOMDocument |DOMText | DOMNode |DOMElement $ node ): void
20622066 {
20632067 if ($ node ->getAttribute ('class ' ) !== '' ) {
20642068 $ node ->removeAttribute ('class ' );
@@ -2176,9 +2180,8 @@ public function postProcessContent(DOMDocument $article): DOMDocument
21762180 *
21772181 * @param array nodeList The NodeList.
21782182 * @param callable fn The test function.
2179- * @return DOMNode|null
21802183 */
2181- private function findNode (array $ nodeList , callable $ fn ): ? DOMNode
2184+ private function findNode (array $ nodeList , callable $ fn ): DOMNode | DOMText | DOMElement | null
21822185 {
21832186 foreach ($ nodeList as $ node ) {
21842187 if ($ fn ($ node )) {
@@ -2207,7 +2210,7 @@ public function getTitle(): ?string
22072210 /**
22082211 * Set title.
22092212 */
2210- protected function setTitle (string $ title ): void
2213+ protected function setTitle (? string $ title ): void
22112214 {
22122215 $ this ->title = $ title ;
22132216 }
@@ -2258,7 +2261,7 @@ public function getExcerpt(): ?string
22582261 /**
22592262 * Set excerpt.
22602263 */
2261- public function setExcerpt (string $ excerpt ): void
2264+ public function setExcerpt (? string $ excerpt ): void
22622265 {
22632266 $ this ->excerpt = $ excerpt ;
22642267 }
0 commit comments