diff --git a/Readability.js b/Readability.js index 4c8789e..6414216 100644 --- a/Readability.js +++ b/Readability.js @@ -915,7 +915,7 @@ Readability.prototype = { _hasChildBlockElement: function (element) { return this._someNode(element.childNodes, function(node) { return this.DIV_TO_P_ELEMS.indexOf(node.tagName) !== -1 || - this._hasChildBlockElement(node) + this._hasChildBlockElement(node); }); }, @@ -928,9 +928,10 @@ Readability.prototype = { * @return string **/ _getInnerText: function(e, normalizeSpaces) { + normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces; var textContent = e.textContent.trim(); - if (!Boolean(normalizeSpaces)) { + if (normalizeSpaces) { return textContent.replace(this.REGEXPS.normalize, " "); } else { return textContent; @@ -989,6 +990,9 @@ Readability.prototype = { **/ _getLinkDensity: function(element) { var textLength = this._getInnerText(element).length; + if (textLength === 0) + return; + var linkLength = 0; // XXX implement _reduceNodeList? @@ -996,7 +1000,7 @@ Readability.prototype = { linkLength += this._getInnerText(linkNode).length; }); - return textLength !== 0 ? linkLength / textLength : 0; + return linkLength / textLength; }, /** diff --git a/test/test-pages/normalize-spaces/expected-metadata.json b/test/test-pages/normalize-spaces/expected-metadata.json new file mode 100644 index 0000000..7300185 --- /dev/null +++ b/test/test-pages/normalize-spaces/expected-metadata.json @@ -0,0 +1,5 @@ +{ + "title": "Normalize space test", + "byline": null, + "excerpt": "Lorem\n ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n\ttab here\n incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum." +} diff --git a/test/test-pages/normalize-spaces/expected.html b/test/test-pages/normalize-spaces/expected.html new file mode 100644 index 0000000..55e5350 --- /dev/null +++ b/test/test-pages/normalize-spaces/expected.html @@ -0,0 +1,16 @@ +
+
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tab here incididunt ut labore et dolore magna aliqua. Ut enim ad minim + veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea + commodo consequat. Duis aute irure dolor in reprehenderit in voluptate + velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat + cupidatat non proident, sunt in culpa qui officia deserunt mollit anim + id est laborum.

+

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat + non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

+
+
\ No newline at end of file diff --git a/test/test-pages/normalize-spaces/source.html b/test/test-pages/normalize-spaces/source.html new file mode 100644 index 0000000..f19992b --- /dev/null +++ b/test/test-pages/normalize-spaces/source.html @@ -0,0 +1,35 @@ + + + + + Normalize space test + + +
+

Lorem

+
+ Lorem + ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tab here + incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +
+

Foo

+
+ Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation + + + + + ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +
+
+ + diff --git a/test/test-pages/style-tags-removal/source.html b/test/test-pages/style-tags-removal/source.html index 8a26266..4c6426d 100644 --- a/test/test-pages/style-tags-removal/source.html +++ b/test/test-pages/style-tags-removal/source.html @@ -35,7 +35,7 @@