From 5b1e69bdf2c3d4e669a5d73dbb48fa263068118f Mon Sep 17 00:00:00 2001 From: andrei-ch Date: Sat, 19 Nov 2016 20:04:15 -0800 Subject: [PATCH] Bug fix: still not grabbing full content from nytimes.com articles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Solution: strip one level of empty
elements so they don’t obstruct merging adjacent content downstream. --- Readability.js | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Readability.js b/Readability.js index b68739c..2f64fa8 100644 --- a/Readability.js +++ b/Readability.js @@ -727,12 +727,14 @@ Readability.prototype = { } else { // EXPERIMENTAL this._forEachNode(node.childNodes, function(childNode) { - if (childNode.nodeType === Node.TEXT_NODE && childNode.textContent.trim() != "") { + if (childNode.nodeType === Node.TEXT_NODE && childNode.textContent.trim().length > 0) { var p = doc.createElement('p'); p.textContent = childNode.textContent; p.style.display = 'inline'; p.className = 'readability-styled'; node.replaceChild(p, childNode); + } else if (this._isEmptyDivElement(childNode)) { + node.replaceChild(doc.createTextNode(childNode.textContent), childNode); } }); } @@ -1158,6 +1160,13 @@ Readability.prototype = { }); }, + _isEmptyDivElement: function(node) { + return node.nodeType === Node.ELEMENT_NODE && + node.tagName === "DIV" && + node.children.length == 0 && + node.textContent.trim().length == 0; + }, + /** * Determine whether element has any children block level elements. *