diff --git a/Readability.js b/Readability.js index 5cff4540..437b6344 100644 --- a/Readability.js +++ b/Readability.js @@ -1475,7 +1475,7 @@ Readability.prototype = { } else { var contentBonus = 0; - // Give a bonus if sibling nodes and top candidates have the example same classname + // Give a bonus if sibling nodes and top candidates have the same classname if ( sibling.className === topCandidate.className && topCandidate.className !== "" @@ -1508,24 +1508,13 @@ Readability.prototype = { } if (append) { - this.log("Appending node:", sibling); - if (!this.ALTER_TO_DIV_EXCEPTIONS.includes(sibling.nodeName)) { - // We have a node that isn't a common block level element, like a form or td tag. - // Turn it into a div so it doesn't get filtered out later by accident. - this.log("Altering sibling:", sibling, "to div."); - sibling = this._setNodeTag(sibling, "DIV"); } articleContent.appendChild(sibling); - // Fetch children again to make it compatible - // with DOM parsers without live collection support. + // Siblings array is live, so re-grab it and adjust index siblings = parentOfTopCandidate.children; - // siblings is a reference to the children array, and - // sibling is removed from the array when we call appendChild(). - // As a result, we must revisit this index since the nodes - // have been shifted. s -= 1; sl -= 1; } diff --git a/test/test-pages/bbc-reader-bug/expected-metadata.json b/test/test-pages/bbc-reader-bug/expected-metadata.json new file mode 100644 index 00000000..4123193e --- /dev/null +++ b/test/test-pages/bbc-reader-bug/expected-metadata.json @@ -0,0 +1,9 @@ +{ + "title": "Motorhead guitarist Phil Campbell honoured", + "byline": null, + "dir": null, + "lang": null, + "excerpt": "Earlier paragraph 1 (should NOT be skipped)", + "siteName": null, + "readerable": false +} \ No newline at end of file diff --git a/test/test-pages/bbc-reader-bug/expected.html b/test/test-pages/bbc-reader-bug/expected.html new file mode 100644 index 00000000..d01fd02a --- /dev/null +++ b/test/test-pages/bbc-reader-bug/expected.html @@ -0,0 +1,9 @@ +
+
+

Motorhead paid tribute to Campbell...

+

Paragraph 2 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 3 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 4 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 5 with more text to score well, adding commas like this, and this, for points.

+
+
\ No newline at end of file diff --git a/test/test-pages/bbc-reader-bug/source.html b/test/test-pages/bbc-reader-bug/source.html new file mode 100644 index 00000000..9a7c579b --- /dev/null +++ b/test/test-pages/bbc-reader-bug/source.html @@ -0,0 +1,24 @@ + + + Motorhead guitarist Phil Campbell honoured + + + +
+
+

Earlier paragraph 1 (should NOT be skipped)

+

Earlier paragraph 2

+
+ +
+
+

Motorhead paid tribute to Campbell...

+

Paragraph 2 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 3 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 4 with more text to score well, adding commas like this, and this, for points.

+

Paragraph 5 with more text to score well, adding commas like this, and this, for points.

+
+
+
+ + \ No newline at end of file