Don't remove elements containing figures or having them as a parent.

9 years ago · f8d37e4276
parent b6730703a1
commit f8d37e4276
2 changed files with 516 additions and 594 deletions
--- a/Readability.js
+++ b/Readability.js
@ -1441,6 +1441,29 @@ Readability.prototype = {
    });
  },

+  /**
+   * Check if a given node has one of its ancestor tag name matching the
+   * provided one.
+   * @param  HTMLElement node
+   * @param  String      tagName
+   * @param  Number      maxDepth
+   * @return Boolean
+   */
+  _hasAncestorTag: function(node, tagName, maxDepth) {
+    maxDepth = maxDepth || 3;
+    tagName = tagName.toUpperCase();
+    var depth = 0;
+    while (node.parentNode) {
+      if (depth > maxDepth)
+        return false;
+      if (node.parentNode.tagName === tagName)
+        return true;
+      node = node.parentNode;
+      depth++;
+    }
+    return false;
+  },
+
  /**
   * Clean an element of all tags of type "tag" if they look fishy.
   * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
@ -1486,8 +1509,9 @@ Readability.prototype = {
        var linkDensity = this._getLinkDensity(tagsList[i]);
        var contentLength = this._getInnerText(tagsList[i]).length;
        var toRemove = false;
-
-        if (li > p && tag !== "ul" && tag !== "ol") {
+        if (img > p && !this._hasAncestorTag(tagsList[i], "figure")) {
+          toRemove = true;
+        } else if (li > p && tag !== "ul" && tag !== "ol") {
          toRemove = true;
        } else if ( input > Math.floor(p/3) ) {
          toRemove = true;
@ -1501,8 +1525,9 @@ Readability.prototype = {
          toRemove = true;
        }

-        if (toRemove)
+        if (toRemove) {
          tagsList[i].parentNode.removeChild(tagsList[i]);
+        }
      }
    }
  },
--- a/test/test-pages/keep-images/expected.html
+++ b/test/test-pages/keep-images/expected.html