Don't remove elements containing figures or having them as a parent.

pull/82/head
Nicolas Perriault 9 years ago
parent b6730703a1
commit f8d37e4276

@ -1441,6 +1441,29 @@ Readability.prototype = {
});
},
/**
* Check if a given node has one of its ancestor tag name matching the
* provided one.
* @param HTMLElement node
* @param String tagName
* @param Number maxDepth
* @return Boolean
*/
_hasAncestorTag: function(node, tagName, maxDepth) {
maxDepth = maxDepth || 3;
tagName = tagName.toUpperCase();
var depth = 0;
while (node.parentNode) {
if (depth > maxDepth)
return false;
if (node.parentNode.tagName === tagName)
return true;
node = node.parentNode;
depth++;
}
return false;
},
/**
* Clean an element of all tags of type "tag" if they look fishy.
* "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
@ -1486,8 +1509,9 @@ Readability.prototype = {
var linkDensity = this._getLinkDensity(tagsList[i]);
var contentLength = this._getInnerText(tagsList[i]).length;
var toRemove = false;
if (li > p && tag !== "ul" && tag !== "ol") {
if (img > p && !this._hasAncestorTag(tagsList[i], "figure")) {
toRemove = true;
} else if (li > p && tag !== "ul" && tag !== "ol") {
toRemove = true;
} else if ( input > Math.floor(p/3) ) {
toRemove = true;
@ -1501,8 +1525,9 @@ Readability.prototype = {
toRemove = true;
}
if (toRemove)
if (toRemove) {
tagsList[i].parentNode.removeChild(tagsList[i]);
}
}
}
},

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save