Merge pull request #52 from mozilla/forEach-loops
Use forEach when it makes sense.
This commit is contained in:
commit
6ad9dd9952
192
Readability.js
192
Readability.js
@ -118,6 +118,36 @@ Readability.prototype = {
|
|||||||
this._fixRelativeUris(articleContent);
|
this._fixRelativeUris(articleContent);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate over a NodeList, which doesn't natively fully implement the Array
|
||||||
|
* interface.
|
||||||
|
*
|
||||||
|
* For convenience, the current object context is applied to the provided
|
||||||
|
* iterate function.
|
||||||
|
*
|
||||||
|
* @param NodeList nodeList The NodeList.
|
||||||
|
* @param Function fn The iterate function.
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
_forEachNode: function(nodeList, fn) {
|
||||||
|
return Array.prototype.forEach.call(nodeList, fn, this);
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate over a NodeList, return true if any of the provided iterate
|
||||||
|
* function calls returns true, false otherwise.
|
||||||
|
*
|
||||||
|
* For convenience, the current object context is applied to the
|
||||||
|
* provided iterate function.
|
||||||
|
*
|
||||||
|
* @param NodeList nodeList The NodeList.
|
||||||
|
* @param Function fn The iterate function.
|
||||||
|
* @return Boolean
|
||||||
|
*/
|
||||||
|
_someNode: function(nodeList, fn) {
|
||||||
|
return Array.prototype.some.call(nodeList, fn, this);
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts each <a> and <img> uri in the given element to an absolute URI.
|
* Converts each <a> and <img> uri in the given element to an absolute URI.
|
||||||
*
|
*
|
||||||
@ -149,19 +179,18 @@ Readability.prototype = {
|
|||||||
|
|
||||||
function convertRelativeURIs(tagName, propName) {
|
function convertRelativeURIs(tagName, propName) {
|
||||||
var elems = articleContent.getElementsByTagName(tagName);
|
var elems = articleContent.getElementsByTagName(tagName);
|
||||||
for (var i = elems.length; --i >= 0;) {
|
this._forEachNode(elems, function(elem) {
|
||||||
var elem = elems[i];
|
|
||||||
var relativeURI = elem.getAttribute(propName);
|
var relativeURI = elem.getAttribute(propName);
|
||||||
if (relativeURI != null)
|
if (relativeURI != null)
|
||||||
elems[i].setAttribute(propName, toAbsoluteURI(relativeURI));
|
elem.setAttribute(propName, toAbsoluteURI(relativeURI));
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fix links.
|
// Fix links.
|
||||||
convertRelativeURIs("a", "href");
|
convertRelativeURIs.call(this, "a", "href");
|
||||||
|
|
||||||
// Fix images.
|
// Fix images.
|
||||||
convertRelativeURIs("img", "src");
|
convertRelativeURIs.call(this, "img", "src");
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -217,19 +246,17 @@ Readability.prototype = {
|
|||||||
var doc = this._doc;
|
var doc = this._doc;
|
||||||
|
|
||||||
// Remove all style tags in head
|
// Remove all style tags in head
|
||||||
var styleTags = doc.getElementsByTagName("style");
|
this._forEachNode(doc.getElementsByTagName("style"), function(styleNode) {
|
||||||
for (var st = styleTags.length - 1; st >= 0; st -= 1) {
|
styleNode.parentNode.removeChild(styleNode);
|
||||||
styleTags[st].parentNode.removeChild(styleTags[st]);
|
});
|
||||||
}
|
|
||||||
|
|
||||||
if (doc.body) {
|
if (doc.body) {
|
||||||
this._replaceBrs(doc.body);
|
this._replaceBrs(doc.body);
|
||||||
}
|
}
|
||||||
|
|
||||||
var fonts = doc.getElementsByTagName("FONT");
|
this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
|
||||||
for (var i = fonts.length; --i >=0;) {
|
this._setNodeTag(fontNode, "SPAN");
|
||||||
this._setNodeTag(fonts[i], "SPAN");
|
});
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -255,9 +282,7 @@ Readability.prototype = {
|
|||||||
* <div>foo<br>bar<p>abc</p></div>
|
* <div>foo<br>bar<p>abc</p></div>
|
||||||
*/
|
*/
|
||||||
_replaceBrs: function (elem) {
|
_replaceBrs: function (elem) {
|
||||||
var brs = elem.getElementsByTagName("br");
|
this._forEachNode(elem.getElementsByTagName("br"), function(br) {
|
||||||
for (var i = 0; i < brs.length; i++) {
|
|
||||||
var br = brs[i];
|
|
||||||
var next = br.nextSibling;
|
var next = br.nextSibling;
|
||||||
|
|
||||||
// Whether 2 or more <br> elements have been found and replaced with a
|
// Whether 2 or more <br> elements have been found and replaced with a
|
||||||
@ -296,7 +321,7 @@ Readability.prototype = {
|
|||||||
next = sibling;
|
next = sibling;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
_setNodeTag: function (node, tag) {
|
_setNodeTag: function (node, tag) {
|
||||||
@ -336,26 +361,21 @@ Readability.prototype = {
|
|||||||
this._cleanConditionally(articleContent, "div");
|
this._cleanConditionally(articleContent, "div");
|
||||||
|
|
||||||
// Remove extra paragraphs
|
// Remove extra paragraphs
|
||||||
var articleParagraphs = articleContent.getElementsByTagName('p');
|
this._forEachNode(articleContent.getElementsByTagName('p'), function(paragraph) {
|
||||||
for (var i = articleParagraphs.length - 1; i >= 0; i -= 1) {
|
var imgCount = paragraph.getElementsByTagName('img').length;
|
||||||
var imgCount = articleParagraphs[i].getElementsByTagName('img').length;
|
var embedCount = paragraph.getElementsByTagName('embed').length;
|
||||||
var embedCount = articleParagraphs[i].getElementsByTagName('embed').length;
|
var objectCount = paragraph.getElementsByTagName('object').length;
|
||||||
var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
|
var totalCount = imgCount + embedCount + objectCount;
|
||||||
|
|
||||||
if (imgCount === 0 &&
|
if (totalCount === 0 && !this._getInnerText(paragraph, false))
|
||||||
embedCount === 0 &&
|
paragraph.parentNode.removeChild(paragraph);
|
||||||
objectCount === 0 &&
|
});
|
||||||
this._getInnerText(articleParagraphs[i], false) === '')
|
|
||||||
articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
var brs = articleContent.getElementsByTagName("BR");
|
this._forEachNode(articleContent.getElementsByTagName("br"), function(br) {
|
||||||
for (var i = brs.length; --i >= 0;) {
|
|
||||||
var br = brs[i];
|
|
||||||
var next = this._nextElement(br.nextSibling);
|
var next = this._nextElement(br.nextSibling);
|
||||||
if (next && next.tagName == "P")
|
if (next && next.tagName == "P")
|
||||||
br.parentNode.removeChild(br);
|
br.parentNode.removeChild(br);
|
||||||
}
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -522,8 +542,7 @@ Readability.prototype = {
|
|||||||
elementsToScore.push(node);
|
elementsToScore.push(node);
|
||||||
} else {
|
} else {
|
||||||
// EXPERIMENTAL
|
// EXPERIMENTAL
|
||||||
for (var i = 0, il = node.childNodes.length; i < il; i += 1) {
|
this._forEachNode(node.childNodes, function(childNode) {
|
||||||
var childNode = node.childNodes[i];
|
|
||||||
if (childNode.nodeType === Node.TEXT_NODE) {
|
if (childNode.nodeType === Node.TEXT_NODE) {
|
||||||
var p = doc.createElement('p');
|
var p = doc.createElement('p');
|
||||||
p.textContent = childNode.textContent;
|
p.textContent = childNode.textContent;
|
||||||
@ -531,7 +550,7 @@ Readability.prototype = {
|
|||||||
p.className = 'readability-styled';
|
p.className = 'readability-styled';
|
||||||
node.replaceChild(p, childNode);
|
node.replaceChild(p, childNode);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
node = this._getNextNode(node);
|
node = this._getNextNode(node);
|
||||||
@ -544,17 +563,17 @@ Readability.prototype = {
|
|||||||
* A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
|
* A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
|
||||||
**/
|
**/
|
||||||
var candidates = [];
|
var candidates = [];
|
||||||
for (var pt = 0; pt < elementsToScore.length; pt += 1) {
|
this._forEachNode(elementsToScore, function(elementToScore) {
|
||||||
var parentNode = elementsToScore[pt].parentNode;
|
var parentNode = elementToScore.parentNode;
|
||||||
var grandParentNode = parentNode ? parentNode.parentNode : null;
|
var grandParentNode = parentNode ? parentNode.parentNode : null;
|
||||||
var innerText = this._getInnerText(elementsToScore[pt]);
|
var innerText = this._getInnerText(elementToScore);
|
||||||
|
|
||||||
if (!parentNode || typeof(parentNode.tagName) === 'undefined')
|
if (!parentNode || typeof(parentNode.tagName) === 'undefined')
|
||||||
continue;
|
return;
|
||||||
|
|
||||||
// If this paragraph is less than 25 characters, don't even count it.
|
// If this paragraph is less than 25 characters, don't even count it.
|
||||||
if (innerText.length < 25)
|
if (innerText.length < 25)
|
||||||
continue;
|
return;
|
||||||
|
|
||||||
// Initialize readability data for the parent.
|
// Initialize readability data for the parent.
|
||||||
if (typeof parentNode.readability === 'undefined') {
|
if (typeof parentNode.readability === 'undefined') {
|
||||||
@ -586,7 +605,7 @@ Readability.prototype = {
|
|||||||
|
|
||||||
if (grandParentNode)
|
if (grandParentNode)
|
||||||
grandParentNode.readability.contentScore += contentScore / 2;
|
grandParentNode.readability.contentScore += contentScore / 2;
|
||||||
}
|
});
|
||||||
|
|
||||||
// After we've calculated scores, loop through all of the possible
|
// After we've calculated scores, loop through all of the possible
|
||||||
// candidate nodes we found and find the one with the highest score.
|
// candidate nodes we found and find the one with the highest score.
|
||||||
@ -813,14 +832,13 @@ Readability.prototype = {
|
|||||||
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
|
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi;
|
||||||
|
|
||||||
// Find description tags.
|
// Find description tags.
|
||||||
for (var i = 0; i < metaElements.length; i++) {
|
this._forEachNode(metaElements, function(element) {
|
||||||
var element = metaElements[i];
|
|
||||||
var elementName = element.getAttribute("name");
|
var elementName = element.getAttribute("name");
|
||||||
var elementProperty = element.getAttribute("property");
|
var elementProperty = element.getAttribute("property");
|
||||||
|
|
||||||
if (elementName === "author") {
|
if (elementName === "author") {
|
||||||
metadata.byline = element.getAttribute("content");
|
metadata.byline = element.getAttribute("content");
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var name = null;
|
var name = null;
|
||||||
@ -839,7 +857,7 @@ Readability.prototype = {
|
|||||||
values[name] = content.trim();
|
values[name] = content.trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
if ("description" in values) {
|
if ("description" in values) {
|
||||||
metadata.excerpt = values["description"];
|
metadata.excerpt = values["description"];
|
||||||
@ -860,14 +878,13 @@ Readability.prototype = {
|
|||||||
* @param Element
|
* @param Element
|
||||||
**/
|
**/
|
||||||
_removeScripts: function(doc) {
|
_removeScripts: function(doc) {
|
||||||
var scripts = doc.getElementsByTagName('script');
|
this._forEachNode(doc.getElementsByTagName('script'), function(scriptNode) {
|
||||||
for (var i = scripts.length - 1; i >= 0; i -= 1) {
|
scriptNode.nodeValue = "";
|
||||||
scripts[i].nodeValue="";
|
scriptNode.removeAttribute('src');
|
||||||
scripts[i].removeAttribute('src');
|
|
||||||
|
|
||||||
if (scripts[i].parentNode)
|
if (scriptNode.parentNode)
|
||||||
scripts[i].parentNode.removeChild(scripts[i]);
|
scriptNode.parentNode.removeChild(scriptNode);
|
||||||
}
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -877,22 +894,17 @@ Readability.prototype = {
|
|||||||
*
|
*
|
||||||
* @param Element
|
* @param Element
|
||||||
**/
|
**/
|
||||||
_hasSinglePInsideElement: function(e) {
|
_hasSinglePInsideElement: function(element) {
|
||||||
// There should be exactly 1 element child which is a P:
|
// There should be exactly 1 element child which is a P:
|
||||||
if (e.children.length != 1 || e.firstElementChild.tagName !== "P") {
|
if (element.children.length != 1 || element.firstElementChild.tagName !== "P") {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// And there should be no text nodes with real content
|
|
||||||
var childNodes = e.childNodes;
|
|
||||||
for (var i = childNodes.length; --i >= 0;) {
|
|
||||||
var node = childNodes[i];
|
|
||||||
if (node.nodeType == Node.TEXT_NODE &&
|
|
||||||
this.REGEXPS.hasContent.test(node.textContent)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
// And there should be no text nodes with real content
|
||||||
|
return !this._someNode(element.childNodes, function(node) {
|
||||||
|
return node.nodeType === Node.TEXT_NODE &&
|
||||||
|
this.REGEXPS.hasContent.test(node.textContent);
|
||||||
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -900,14 +912,11 @@ Readability.prototype = {
|
|||||||
*
|
*
|
||||||
* @param Element
|
* @param Element
|
||||||
*/
|
*/
|
||||||
_hasChildBlockElement: function (e) {
|
_hasChildBlockElement: function (element) {
|
||||||
var length = e.children.length;
|
return this._someNode(element.childNodes, function(node) {
|
||||||
for (var i = 0; i < length; i++) {
|
return this.DIV_TO_P_ELEMS.indexOf(node.tagName) !== -1 ||
|
||||||
var child = e.children[i];
|
this._hasChildBlockElement(node);
|
||||||
if (this.DIV_TO_P_ELEMS.indexOf(child.tagName) !== -1 || this._hasChildBlockElement(child))
|
});
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -915,11 +924,12 @@ Readability.prototype = {
|
|||||||
* This also strips out any excess whitespace to be found.
|
* This also strips out any excess whitespace to be found.
|
||||||
*
|
*
|
||||||
* @param Element
|
* @param Element
|
||||||
|
* @param Boolean normalizeSpaces (default: true)
|
||||||
* @return string
|
* @return string
|
||||||
**/
|
**/
|
||||||
_getInnerText: function(e, normalizeSpaces) {
|
_getInnerText: function(e, normalizeSpaces) {
|
||||||
var textContent = e.textContent.trim();
|
|
||||||
normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
|
normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
|
||||||
|
var textContent = e.textContent.trim();
|
||||||
|
|
||||||
if (normalizeSpaces) {
|
if (normalizeSpaces) {
|
||||||
return textContent.replace(this.REGEXPS.normalize, " ");
|
return textContent.replace(this.REGEXPS.normalize, " ");
|
||||||
@ -978,14 +988,17 @@ Readability.prototype = {
|
|||||||
* @param Element
|
* @param Element
|
||||||
* @return number (float)
|
* @return number (float)
|
||||||
**/
|
**/
|
||||||
_getLinkDensity: function(e) {
|
_getLinkDensity: function(element) {
|
||||||
var links = e.getElementsByTagName("a");
|
var textLength = this._getInnerText(element).length;
|
||||||
var textLength = this._getInnerText(e).length;
|
if (textLength === 0)
|
||||||
|
return;
|
||||||
|
|
||||||
var linkLength = 0;
|
var linkLength = 0;
|
||||||
|
|
||||||
for (var i = 0, il = links.length; i < il; i += 1) {
|
// XXX implement _reduceNodeList?
|
||||||
linkLength += this._getInnerText(links[i]).length;
|
this._forEachNode(element.getElementsByTagName("a"), function(linkNode) {
|
||||||
}
|
linkLength += this._getInnerText(linkNode).length;
|
||||||
|
});
|
||||||
|
|
||||||
return linkLength / textLength;
|
return linkLength / textLength;
|
||||||
},
|
},
|
||||||
@ -1398,28 +1411,27 @@ Readability.prototype = {
|
|||||||
* @return void
|
* @return void
|
||||||
**/
|
**/
|
||||||
_clean: function(e, tag) {
|
_clean: function(e, tag) {
|
||||||
var targetList = e.getElementsByTagName(tag);
|
|
||||||
var isEmbed = (tag === 'object' || tag === 'embed');
|
var isEmbed = (tag === 'object' || tag === 'embed');
|
||||||
|
|
||||||
for (var y = targetList.length - 1; y >= 0; y -= 1) {
|
this._forEachNode(e.getElementsByTagName(tag), function(element) {
|
||||||
// Allow youtube and vimeo videos through as people usually want to see those.
|
// Allow youtube and vimeo videos through as people usually want to see those.
|
||||||
if (isEmbed) {
|
if (isEmbed) {
|
||||||
var attributeValues = "";
|
var attributeValues = "";
|
||||||
for (var i = 0, il = targetList[y].attributes.length; i < il; i += 1) {
|
for (var i = 0, il = element.attributes.length; i < il; i += 1) {
|
||||||
attributeValues += targetList[y].attributes[i].value + '|';
|
attributeValues += element.attributes[i].value + '|';
|
||||||
}
|
}
|
||||||
|
|
||||||
// First, check the elements attributes to see if any of them contain youtube or vimeo
|
// First, check the elements attributes to see if any of them contain youtube or vimeo
|
||||||
if (this.REGEXPS.videos.test(attributeValues))
|
if (this.REGEXPS.videos.test(attributeValues))
|
||||||
continue;
|
return;
|
||||||
|
|
||||||
// Then check the elements inside this element for the same.
|
// Then check the elements inside this element for the same.
|
||||||
if (this.REGEXPS.videos.test(targetList[y].innerHTML))
|
if (this.REGEXPS.videos.test(element.innerHTML))
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
targetList[y].parentNode.removeChild(targetList[y]);
|
element.parentNode.removeChild(element);
|
||||||
}
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1571,7 +1583,7 @@ Readability.prototype = {
|
|||||||
if (!metadata.excerpt) {
|
if (!metadata.excerpt) {
|
||||||
var paragraphs = articleContent.getElementsByTagName("p");
|
var paragraphs = articleContent.getElementsByTagName("p");
|
||||||
if (paragraphs.length > 0) {
|
if (paragraphs.length > 0) {
|
||||||
metadata.excerpt = paragraphs[0].textContent;
|
metadata.excerpt = paragraphs[0].textContent.trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Basic tag cleaning test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
|
||||||
|
}
|
19
test/test-pages/basic-tags-cleaning/expected.html
Normal file
19
test/test-pages/basic-tags-cleaning/expected.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
|
||||||
|
ut aliquip ex ea commodo consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
35
test/test-pages/basic-tags-cleaning/source.html
Normal file
35
test/test-pages/basic-tags-cleaning/source.html
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Basic tag cleaning test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<iframe src="about:blank">Iframe fallback test</iframe>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<object data="foo.swf" type="application/x-shockwave-flash" width="88" height="31">
|
||||||
|
<param movie="foo.swf" />
|
||||||
|
</object>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
5
test/test-pages/normalize-spaces/expected-metadata.json
Normal file
5
test/test-pages/normalize-spaces/expected-metadata.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Normalize space test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem\n ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n\ttab here\n incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
|
||||||
|
}
|
16
test/test-pages/normalize-spaces/expected.html
Normal file
16
test/test-pages/normalize-spaces/expected.html
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<article>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tab here incididunt ut labore et dolore magna aliqua. Ut enim ad minim
|
||||||
|
veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
|
||||||
|
commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
|
||||||
|
velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
|
||||||
|
cupidatat non proident, sunt in culpa qui officia deserunt mollit anim
|
||||||
|
id est laborum.</p>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
|
||||||
|
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</article>
|
||||||
|
</div>
|
35
test/test-pages/normalize-spaces/source.html
Normal file
35
test/test-pages/normalize-spaces/source.html
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Normalize space test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
Lorem
|
||||||
|
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tab here
|
||||||
|
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
5
test/test-pages/remove-extra-brs/expected-metadata.json
Normal file
5
test/test-pages/remove-extra-brs/expected-metadata.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Remove trailing brs test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
|
||||||
|
}
|
21
test/test-pages/remove-extra-brs/expected.html
Normal file
21
test/test-pages/remove-extra-brs/expected.html
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>
|
||||||
|
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
|
||||||
|
ut aliquip ex ea commodo consequat.</p>
|
||||||
|
</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
32
test/test-pages/remove-extra-brs/source.html
Normal file
32
test/test-pages/remove-extra-brs/source.html
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Remove trailing brs test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<br>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<br><br><p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p><br>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Replace font tags test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
|
||||||
|
}
|
19
test/test-pages/remove-extra-paragraphs/expected.html
Normal file
19
test/test-pages/remove-extra-paragraphs/expected.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
|
||||||
|
ut aliquip ex ea commodo consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
41
test/test-pages/remove-extra-paragraphs/source.html
Normal file
41
test/test-pages/remove-extra-paragraphs/source.html
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Replace font tags test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p></p>
|
||||||
|
<p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p></p>
|
||||||
|
<p></p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p></p>
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>
|
||||||
|
</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>
|
||||||
|
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Remove script tags test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua."
|
||||||
|
}
|
19
test/test-pages/remove-script-tags/expected.html
Normal file
19
test/test-pages/remove-script-tags/expected.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
|
||||||
|
ut aliquip ex ea commodo consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
|
||||||
|
dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
43
test/test-pages/remove-script-tags/source.html
Normal file
43
test/test-pages/remove-script-tags/source.html
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Remove script tags test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<script type="text/javascript1.8">alert('wrong')</script>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<script>
|
||||||
|
alert('wrong')
|
||||||
|
</script>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<script type="text/javascript">
|
||||||
|
alert('wrong')
|
||||||
|
</script>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<script type="text/javascript1.8">alert('wrong')</script>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur.
|
||||||
|
<script type="text/vbscript" language="vbscript">
|
||||||
|
document.write("super wrong.")
|
||||||
|
</script>
|
||||||
|
Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
5
test/test-pages/replace-brs/expected-metadata.json
Normal file
5
test/test-pages/replace-brs/expected-metadata.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Replace brs test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum"
|
||||||
|
}
|
20
test/test-pages/replace-brs/expected.html
Normal file
20
test/test-pages/replace-brs/expected.html
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<div>
|
||||||
|
<p style="display: inline;" class="readability-styled">Lorem ipsum</p>
|
||||||
|
<p style="display: inline;" class="readability-styled">dolor sit</p>
|
||||||
|
<p>amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut
|
||||||
|
labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation
|
||||||
|
ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure
|
||||||
|
dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
|
||||||
|
nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
||||||
|
culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p style="display: inline;" class="readability-styled">Tempor</p>
|
||||||
|
<p>incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
|
||||||
|
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
28
test/test-pages/replace-brs/source.html
Normal file
28
test/test-pages/replace-brs/source.html
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Replace brs test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum<br>dolor sit<br> <br><br>amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor<br><br>incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
5
test/test-pages/replace-font-tags/expected-metadata.json
Normal file
5
test/test-pages/replace-font-tags/expected-metadata.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Replace font tags test",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
|
||||||
|
}
|
17
test/test-pages/replace-font-tags/expected.html
Normal file
17
test/test-pages/replace-font-tags/expected.html
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<article>
|
||||||
|
<p> <span face="Arial" size="2">Lorem ipsum dolor</span> sit amet, consectetur
|
||||||
|
adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore
|
||||||
|
magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco
|
||||||
|
laboris nisi ut aliquip ex ea commodo consequat. <span face="Arial" size="2">Duis</span> aute
|
||||||
|
irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
|
||||||
|
nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
|
||||||
|
culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Tempor incididunt ut labore et <span face="Arial" size="2">dolore</span> magna
|
||||||
|
aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
|
||||||
|
nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit
|
||||||
|
in voluptate velit esse cillum dolore eu fugiat nulla pariatur. <span face="Arial"
|
||||||
|
size="2">Excepteur sint occaecat</span> cupidatat non proident, sunt in
|
||||||
|
culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</article>
|
||||||
|
</div>
|
28
test/test-pages/replace-font-tags/source.html
Normal file
28
test/test-pages/replace-font-tags/source.html
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Replace font tags test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<font face="Arial" size="2">Lorem ipsum dolor</font> sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. <font face="Arial" size="2">Duis</font> aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et <font face="Arial" size="2">dolore</font> magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. <font face="Arial" size="2">Excepteur sint occaecat</font> cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"title": "Style tags removal",
|
||||||
|
"byline": null,
|
||||||
|
"excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
|
||||||
|
}
|
15
test/test-pages/style-tags-removal/expected.html
Normal file
15
test/test-pages/style-tags-removal/expected.html
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
<div id="readability-page-1" class="page">
|
||||||
|
<article>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
|
||||||
|
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
|
||||||
|
non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</article>
|
||||||
|
</div>
|
42
test/test-pages/style-tags-removal/source.html
Normal file
42
test/test-pages/style-tags-removal/source.html
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Style tags removal</title>
|
||||||
|
<style>h1{font-weight:normal}</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<style>
|
||||||
|
div{font-weight:bold}
|
||||||
|
</style>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<style>
|
||||||
|
h2 {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
<style>
|
||||||
|
* {
|
||||||
|
color: yellow;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in New Issue
Block a user