Merge pull request #52 from mozilla/forEach-loops

Use forEach when it makes sense.
This commit is contained in:
Gijs 2015-03-22 09:25:50 -07:00
commit 6ad9dd9952
25 changed files with 573 additions and 91 deletions

View File

@ -118,6 +118,36 @@ Readability.prototype = {
this._fixRelativeUris(articleContent); this._fixRelativeUris(articleContent);
}, },
/**
* Iterate over a NodeList, which doesn't natively fully implement the Array
* interface.
*
* For convenience, the current object context is applied to the provided
* iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @return void
*/
_forEachNode: function(nodeList, fn) {
return Array.prototype.forEach.call(nodeList, fn, this);
},
/**
* Iterate over a NodeList, return true if any of the provided iterate
* function calls returns true, false otherwise.
*
* For convenience, the current object context is applied to the
* provided iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @return Boolean
*/
_someNode: function(nodeList, fn) {
return Array.prototype.some.call(nodeList, fn, this);
},
/** /**
* Converts each <a> and <img> uri in the given element to an absolute URI. * Converts each <a> and <img> uri in the given element to an absolute URI.
* *
@ -149,19 +179,18 @@ Readability.prototype = {
function convertRelativeURIs(tagName, propName) { function convertRelativeURIs(tagName, propName) {
var elems = articleContent.getElementsByTagName(tagName); var elems = articleContent.getElementsByTagName(tagName);
for (var i = elems.length; --i >= 0;) { this._forEachNode(elems, function(elem) {
var elem = elems[i];
var relativeURI = elem.getAttribute(propName); var relativeURI = elem.getAttribute(propName);
if (relativeURI != null) if (relativeURI != null)
elems[i].setAttribute(propName, toAbsoluteURI(relativeURI)); elem.setAttribute(propName, toAbsoluteURI(relativeURI));
} });
} }
// Fix links. // Fix links.
convertRelativeURIs("a", "href"); convertRelativeURIs.call(this, "a", "href");
// Fix images. // Fix images.
convertRelativeURIs("img", "src"); convertRelativeURIs.call(this, "img", "src");
}, },
/** /**
@ -217,19 +246,17 @@ Readability.prototype = {
var doc = this._doc; var doc = this._doc;
// Remove all style tags in head // Remove all style tags in head
var styleTags = doc.getElementsByTagName("style"); this._forEachNode(doc.getElementsByTagName("style"), function(styleNode) {
for (var st = styleTags.length - 1; st >= 0; st -= 1) { styleNode.parentNode.removeChild(styleNode);
styleTags[st].parentNode.removeChild(styleTags[st]); });
}
if (doc.body) { if (doc.body) {
this._replaceBrs(doc.body); this._replaceBrs(doc.body);
} }
var fonts = doc.getElementsByTagName("FONT"); this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
for (var i = fonts.length; --i >=0;) { this._setNodeTag(fontNode, "SPAN");
this._setNodeTag(fonts[i], "SPAN"); });
}
}, },
/** /**
@ -255,9 +282,7 @@ Readability.prototype = {
* <div>foo<br>bar<p>abc</p></div> * <div>foo<br>bar<p>abc</p></div>
*/ */
_replaceBrs: function (elem) { _replaceBrs: function (elem) {
var brs = elem.getElementsByTagName("br"); this._forEachNode(elem.getElementsByTagName("br"), function(br) {
for (var i = 0; i < brs.length; i++) {
var br = brs[i];
var next = br.nextSibling; var next = br.nextSibling;
// Whether 2 or more <br> elements have been found and replaced with a // Whether 2 or more <br> elements have been found and replaced with a
@ -296,7 +321,7 @@ Readability.prototype = {
next = sibling; next = sibling;
} }
} }
} });
}, },
_setNodeTag: function (node, tag) { _setNodeTag: function (node, tag) {
@ -336,26 +361,21 @@ Readability.prototype = {
this._cleanConditionally(articleContent, "div"); this._cleanConditionally(articleContent, "div");
// Remove extra paragraphs // Remove extra paragraphs
var articleParagraphs = articleContent.getElementsByTagName('p'); this._forEachNode(articleContent.getElementsByTagName('p'), function(paragraph) {
for (var i = articleParagraphs.length - 1; i >= 0; i -= 1) { var imgCount = paragraph.getElementsByTagName('img').length;
var imgCount = articleParagraphs[i].getElementsByTagName('img').length; var embedCount = paragraph.getElementsByTagName('embed').length;
var embedCount = articleParagraphs[i].getElementsByTagName('embed').length; var objectCount = paragraph.getElementsByTagName('object').length;
var objectCount = articleParagraphs[i].getElementsByTagName('object').length; var totalCount = imgCount + embedCount + objectCount;
if (imgCount === 0 && if (totalCount === 0 && !this._getInnerText(paragraph, false))
embedCount === 0 && paragraph.parentNode.removeChild(paragraph);
objectCount === 0 && });
this._getInnerText(articleParagraphs[i], false) === '')
articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
}
var brs = articleContent.getElementsByTagName("BR"); this._forEachNode(articleContent.getElementsByTagName("br"), function(br) {
for (var i = brs.length; --i >= 0;) {
var br = brs[i];
var next = this._nextElement(br.nextSibling); var next = this._nextElement(br.nextSibling);
if (next && next.tagName == "P") if (next && next.tagName == "P")
br.parentNode.removeChild(br); br.parentNode.removeChild(br);
} });
}, },
/** /**
@ -522,8 +542,7 @@ Readability.prototype = {
elementsToScore.push(node); elementsToScore.push(node);
} else { } else {
// EXPERIMENTAL // EXPERIMENTAL
for (var i = 0, il = node.childNodes.length; i < il; i += 1) { this._forEachNode(node.childNodes, function(childNode) {
var childNode = node.childNodes[i];
if (childNode.nodeType === Node.TEXT_NODE) { if (childNode.nodeType === Node.TEXT_NODE) {
var p = doc.createElement('p'); var p = doc.createElement('p');
p.textContent = childNode.textContent; p.textContent = childNode.textContent;
@ -531,7 +550,7 @@ Readability.prototype = {
p.className = 'readability-styled'; p.className = 'readability-styled';
node.replaceChild(p, childNode); node.replaceChild(p, childNode);
} }
} });
} }
} }
node = this._getNextNode(node); node = this._getNextNode(node);
@ -544,17 +563,17 @@ Readability.prototype = {
* A score is determined by things like number of commas, class names, etc. Maybe eventually link density. * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
**/ **/
var candidates = []; var candidates = [];
for (var pt = 0; pt < elementsToScore.length; pt += 1) { this._forEachNode(elementsToScore, function(elementToScore) {
var parentNode = elementsToScore[pt].parentNode; var parentNode = elementToScore.parentNode;
var grandParentNode = parentNode ? parentNode.parentNode : null; var grandParentNode = parentNode ? parentNode.parentNode : null;
var innerText = this._getInnerText(elementsToScore[pt]); var innerText = this._getInnerText(elementToScore);
if (!parentNode || typeof(parentNode.tagName) === 'undefined') if (!parentNode || typeof(parentNode.tagName) === 'undefined')
continue; return;
// If this paragraph is less than 25 characters, don't even count it. // If this paragraph is less than 25 characters, don't even count it.
if (innerText.length < 25) if (innerText.length < 25)
continue; return;
// Initialize readability data for the parent. // Initialize readability data for the parent.
if (typeof parentNode.readability === 'undefined') { if (typeof parentNode.readability === 'undefined') {
@ -586,7 +605,7 @@ Readability.prototype = {
if (grandParentNode) if (grandParentNode)
grandParentNode.readability.contentScore += contentScore / 2; grandParentNode.readability.contentScore += contentScore / 2;
} });
// After we've calculated scores, loop through all of the possible // After we've calculated scores, loop through all of the possible
// candidate nodes we found and find the one with the highest score. // candidate nodes we found and find the one with the highest score.
@ -813,14 +832,13 @@ Readability.prototype = {
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi; var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
// Find description tags. // Find description tags.
for (var i = 0; i < metaElements.length; i++) { this._forEachNode(metaElements, function(element) {
var element = metaElements[i];
var elementName = element.getAttribute("name"); var elementName = element.getAttribute("name");
var elementProperty = element.getAttribute("property"); var elementProperty = element.getAttribute("property");
if (elementName === "author") { if (elementName === "author") {
metadata.byline = element.getAttribute("content"); metadata.byline = element.getAttribute("content");
continue; return;
} }
var name = null; var name = null;
@ -839,7 +857,7 @@ Readability.prototype = {
values[name] = content.trim(); values[name] = content.trim();
} }
} }
} });
if ("description" in values) { if ("description" in values) {
metadata.excerpt = values["description"]; metadata.excerpt = values["description"];
@ -860,14 +878,13 @@ Readability.prototype = {
* @param Element * @param Element
**/ **/
_removeScripts: function(doc) { _removeScripts: function(doc) {
var scripts = doc.getElementsByTagName('script'); this._forEachNode(doc.getElementsByTagName('script'), function(scriptNode) {
for (var i = scripts.length - 1; i >= 0; i -= 1) { scriptNode.nodeValue = "";
scripts[i].nodeValue=""; scriptNode.removeAttribute('src');
scripts[i].removeAttribute('src');
if (scripts[i].parentNode) if (scriptNode.parentNode)
scripts[i].parentNode.removeChild(scripts[i]); scriptNode.parentNode.removeChild(scriptNode);
} });
}, },
/** /**
@ -877,22 +894,17 @@ Readability.prototype = {
* *
* @param Element * @param Element
**/ **/
_hasSinglePInsideElement: function(e) { _hasSinglePInsideElement: function(element) {
// There should be exactly 1 element child which is a P: // There should be exactly 1 element child which is a P:
if (e.children.length != 1 || e.firstElementChild.tagName !== "P") { if (element.children.length != 1 || element.firstElementChild.tagName !== "P") {
return false; return false;
} }
// And there should be no text nodes with real content
var childNodes = e.childNodes;
for (var i = childNodes.length; --i >= 0;) {
var node = childNodes[i];
if (node.nodeType == Node.TEXT_NODE &&
this.REGEXPS.hasContent.test(node.textContent)) {
return false;
}
}
return true; // And there should be no text nodes with real content
return !this._someNode(element.childNodes, function(node) {
return node.nodeType === Node.TEXT_NODE &&
this.REGEXPS.hasContent.test(node.textContent);
});
}, },
/** /**
@ -900,14 +912,11 @@ Readability.prototype = {
* *
* @param Element * @param Element
*/ */
_hasChildBlockElement: function (e) { _hasChildBlockElement: function (element) {
var length = e.children.length; return this._someNode(element.childNodes, function(node) {
for (var i = 0; i < length; i++) { return this.DIV_TO_P_ELEMS.indexOf(node.tagName) !== -1 ||
var child = e.children[i]; this._hasChildBlockElement(node);
if (this.DIV_TO_P_ELEMS.indexOf(child.tagName) !== -1 || this._hasChildBlockElement(child)) });
return true;
}
return false;
}, },
/** /**
@ -915,11 +924,12 @@ Readability.prototype = {
* This also strips out any excess whitespace to be found. * This also strips out any excess whitespace to be found.
* *
* @param Element * @param Element
* @param Boolean normalizeSpaces (default: true)
* @return string * @return string
**/ **/
_getInnerText: function(e, normalizeSpaces) { _getInnerText: function(e, normalizeSpaces) {
var textContent = e.textContent.trim();
normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces; normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
var textContent = e.textContent.trim();
if (normalizeSpaces) { if (normalizeSpaces) {
return textContent.replace(this.REGEXPS.normalize, " "); return textContent.replace(this.REGEXPS.normalize, " ");
@ -978,14 +988,17 @@ Readability.prototype = {
* @param Element * @param Element
* @return number (float) * @return number (float)
**/ **/
_getLinkDensity: function(e) { _getLinkDensity: function(element) {
var links = e.getElementsByTagName("a"); var textLength = this._getInnerText(element).length;
var textLength = this._getInnerText(e).length; if (textLength === 0)
return;
var linkLength = 0; var linkLength = 0;
for (var i = 0, il = links.length; i < il; i += 1) { // XXX implement _reduceNodeList?
linkLength += this._getInnerText(links[i]).length; this._forEachNode(element.getElementsByTagName("a"), function(linkNode) {
} linkLength += this._getInnerText(linkNode).length;
});
return linkLength / textLength; return linkLength / textLength;
}, },
@ -1398,28 +1411,27 @@ Readability.prototype = {
* @return void * @return void
**/ **/
_clean: function(e, tag) { _clean: function(e, tag) {
var targetList = e.getElementsByTagName(tag);
var isEmbed = (tag === 'object' || tag === 'embed'); var isEmbed = (tag === 'object' || tag === 'embed');
for (var y = targetList.length - 1; y >= 0; y -= 1) { this._forEachNode(e.getElementsByTagName(tag), function(element) {
// Allow youtube and vimeo videos through as people usually want to see those. // Allow youtube and vimeo videos through as people usually want to see those.
if (isEmbed) { if (isEmbed) {
var attributeValues = ""; var attributeValues = "";
for (var i = 0, il = targetList[y].attributes.length; i < il; i += 1) { for (var i = 0, il = element.attributes.length; i < il; i += 1) {
attributeValues += targetList[y].attributes[i].value + '|'; attributeValues += element.attributes[i].value + '|';
} }
// First, check the elements attributes to see if any of them contain youtube or vimeo // First, check the elements attributes to see if any of them contain youtube or vimeo
if (this.REGEXPS.videos.test(attributeValues)) if (this.REGEXPS.videos.test(attributeValues))
continue; return;
// Then check the elements inside this element for the same. // Then check the elements inside this element for the same.
if (this.REGEXPS.videos.test(targetList[y].innerHTML)) if (this.REGEXPS.videos.test(element.innerHTML))
continue; return;
} }
targetList[y].parentNode.removeChild(targetList[y]); element.parentNode.removeChild(element);
} });
}, },
/** /**
@ -1571,7 +1583,7 @@ Readability.prototype = {
if (!metadata.excerpt) { if (!metadata.excerpt) {
var paragraphs = articleContent.getElementsByTagName("p"); var paragraphs = articleContent.getElementsByTagName("p");
if (paragraphs.length > 0) { if (paragraphs.length > 0) {
metadata.excerpt = paragraphs[0].textContent; metadata.excerpt = paragraphs[0].textContent.trim();
} }
} }

View File

@ -0,0 +1,5 @@
{
"title": "Basic tag cleaning test",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
}

View File

@ -0,0 +1,19 @@
<div id="readability-page-1" class="page">
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
ut aliquip ex ea commodo consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</div>

View File

@ -0,0 +1,35 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Basic tag cleaning test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<iframe src="about:blank">Iframe fallback test</iframe>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<object data="foo.swf" type="application/x-shockwave-flash" width="88" height="31">
<param movie="foo.swf" />
</object>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Normalize space test",
"byline": null,
"excerpt": "Lorem\n ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n\ttab here\n incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
}

View File

@ -0,0 +1,16 @@
<div id="readability-page-1" class="page">
<article>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tab here incididunt ut labore et dolore magna aliqua. Ut enim ad minim
veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
cupidatat non proident, sunt in culpa qui officia deserunt mollit anim
id est laborum.</p>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</article>
</div>

View File

@ -0,0 +1,35 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Normalize space test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tab here
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation
ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Remove trailing brs test",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
}

View File

@ -0,0 +1,21 @@
<div id="readability-page-1" class="page">
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
ut aliquip ex ea commodo consequat.</p>
</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</div>

View File

@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Remove trailing brs test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<br>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<br><br><p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p><br>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Replace font tags test",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
}

View File

@ -0,0 +1,19 @@
<div id="readability-page-1" class="page">
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
ut aliquip ex ea commodo consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</div>

View File

@ -0,0 +1,41 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Replace font tags test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p></p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p></p>
<p></p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<p></p>
</div>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>
</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<p>
</p>
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Remove script tags test",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
}

View File

@ -0,0 +1,19 @@
<div id="readability-page-1" class="page">
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
ut aliquip ex ea commodo consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</div>

View File

@ -0,0 +1,43 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Remove script tags test</title>
</head>
<body>
<script type="text/javascript1.8">alert('wrong')</script>
<article>
<h1>Lorem</h1>
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<script>
alert('wrong')
</script>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<script type="text/javascript">
alert('wrong')
</script>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<script type="text/javascript1.8">alert('wrong')</script>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur.
<script type="text/vbscript" language="vbscript">
document.write("super wrong.")
</script>
Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Replace brs test",
"byline": null,
"excerpt": "Lorem ipsum"
}

View File

@ -0,0 +1,20 @@
<div id="readability-page-1" class="page">
<div>
<p style="display: inline;" class="readability-styled">Lorem ipsum</p>
<p style="display: inline;" class="readability-styled">dolor sit</p>
<p>amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut
labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure
dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<div>
<p style="display: inline;" class="readability-styled">Tempor</p>
<p>incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</div>

View File

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Replace brs test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum<br>dolor sit<br> <br><br>amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<h2>Foo</h2>
<div>
Tempor<br><br>incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Replace font tags test",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
}

View File

@ -0,0 +1,17 @@
<div id="readability-page-1" class="page">
<article>
<p> <span face="Arial" size="2">Lorem ipsum dolor</span> sit amet, consectetur
adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore
magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco
laboris nisi ut aliquip ex ea commodo consequat. <span face="Arial" size="2">Duis</span> aute
irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.</p>
<p>Tempor incididunt ut labore et <span face="Arial" size="2">dolore</span> magna
aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit
in voluptate velit esse cillum dolore eu fugiat nulla pariatur. <span face="Arial"
size="2">Excepteur sint occaecat</span> cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.</p>
</article>
</div>

View File

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Replace font tags test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<font face="Arial" size="2">Lorem ipsum dolor</font> sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. <font face="Arial" size="2">Duis</font> aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et <font face="Arial" size="2">dolore</font> magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. <font face="Arial" size="2">Excepteur sint occaecat</font> cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -0,0 +1,5 @@
{
"title": "Style tags removal",
"byline": null,
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
}

View File

@ -0,0 +1,15 @@
<div id="readability-page-1" class="page">
<article>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</article>
</div>

View File

@ -0,0 +1,42 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Style tags removal</title>
<style>h1{font-weight:normal}</style>
</head>
<body>
<article>
<h1>Lorem</h1>
<style>
div{font-weight:bold}
</style>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<style>
h2 {
color: red;
}
</style>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
<style>
* {
color: yellow;
}
</style>
</body>
</html>