diff --git a/Readability.js b/Readability.js index 1e2bdce..03817b0 100644 --- a/Readability.js +++ b/Readability.js @@ -94,7 +94,7 @@ Readability.prototype = { * @return void **/ _postProcessContent: function(articleContent) { - // Readability cannot open relative uris so we convert them to absolute uris. + // Readability cannot open relative uris so we convert them to absolute uris. this._fixRelativeUris(articleContent); }, @@ -105,9 +105,9 @@ Readability.prototype = { * @return void */ _fixRelativeUris: function(articleContent) { - let scheme = this._uri.scheme; - let prePath = this._uri.prePath; - let pathBase = this._uri.pathBase; + var scheme = this._uri.scheme; + var prePath = this._uri.prePath; + var pathBase = this._uri.pathBase; function toAbsoluteURI(uri) { // If this is already an absolute URI, return it. @@ -128,10 +128,10 @@ Readability.prototype = { } function convertRelativeURIs(tagName, propName) { - let elems = articleContent.getElementsByTagName(tagName); - for (let i = elems.length; --i >= 0;) { - let elem = elems[i]; - let relativeURI = elem.getAttribute(propName); + var elems = articleContent.getElementsByTagName(tagName); + for (var i = elems.length; --i >= 0;) { + var elem = elems[i]; + var relativeURI = elem.getAttribute(propName); if (relativeURI != null) elems[i].setAttribute(propName, toAbsoluteURI(relativeURI)); } @@ -150,9 +150,9 @@ Readability.prototype = { * @return void **/ _getArticleTitle: function() { - let doc = this._doc; - let curTitle = ""; - let origTitle = ""; + var doc = this._doc; + var curTitle = ""; + var origTitle = ""; try { curTitle = origTitle = doc.title; @@ -173,7 +173,7 @@ Readability.prototype = { if (curTitle.split(' ').length < 3) curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1'); } else if (curTitle.length > 150 || curTitle.length < 15) { - let hOnes = doc.getElementsByTagName('h1'); + var hOnes = doc.getElementsByTagName('h1'); if (hOnes.length === 1) curTitle = this._getInnerText(hOnes[0]); @@ -194,13 +194,13 @@ Readability.prototype = { * @return void **/ _prepDocument: function() { - let doc = this._doc; + var doc = this._doc; // In some cases a body element can't be found (if the HTML is // totally hosed for example) so we create a new body node and // append it to the document. if (!doc.body) { - let body = doc.createElement("body"); + var body = doc.createElement("body"); try { doc.body = body; @@ -211,15 +211,15 @@ Readability.prototype = { } // Remove all style tags in head - let styleTags = doc.getElementsByTagName("style"); - for (let st = 0; st < styleTags.length; st += 1) { + var styleTags = doc.getElementsByTagName("style"); + for (var st = 0; st < styleTags.length; st += 1) { styleTags[st].textContent = ""; } this._replaceBrs(doc.body); - let fonts = doc.getElementsByTagName("FONT"); - for (let i = fonts.length; --i >=0;) { + var fonts = doc.getElementsByTagName("FONT"); + for (var i = fonts.length; --i >=0;) { this._setNodeTag(fonts[i], "SPAN"); } }, @@ -230,7 +230,7 @@ Readability.prototype = { * returned. */ _nextElement: function (node) { - let next = node; + var next = node; while (next && (next.nodeType != Node.ELEMENT_NODE) && this.REGEXPS.whitespace.test(next.textContent)) { @@ -247,21 +247,21 @@ Readability.prototype = { *
foo
bar

abc

*/ _replaceBrs: function (elem) { - let brs = elem.getElementsByTagName("br"); - for (let i = 0; i < brs.length; i++) { - let br = brs[i]; - let next = br.nextSibling; + var brs = elem.getElementsByTagName("br"); + for (var i = 0; i < brs.length; i++) { + var br = brs[i]; + var next = br.nextSibling; // Whether 2 or more
elements have been found and replaced with a //

block. - let replaced = false; + var replaced = false; // If we find a
chain, remove the
s until we hit another element // or non-whitespace. This leaves behind the first
in the chain // (which will be replaced with a

later). while ((next = this._nextElement(next)) && (next.tagName == "BR")) { replaced = true; - let sibling = next.nextSibling; + var sibling = next.nextSibling; next.parentNode.removeChild(next); next = sibling; } @@ -270,20 +270,20 @@ Readability.prototype = { // all sibling nodes as children of the

until we hit another
// chain. if (replaced) { - let p = this._doc.createElement("p"); + var p = this._doc.createElement("p"); br.parentNode.replaceChild(p, br); next = p.nextSibling; while (next) { // If we've hit another

, we're done adding children to this

. if (next.tagName == "BR") { - let nextElem = this._nextElement(next); + var nextElem = this._nextElement(next); if (nextElem && nextElem.tagName == "BR") break; } - + // Otherwise, make this node a child of the new

. - let sibling = next.nextSibling; + var sibling = next.nextSibling; p.appendChild(next); next = sibling; } @@ -326,11 +326,11 @@ Readability.prototype = { this._cleanConditionally(articleContent, "div"); // Remove extra paragraphs - let articleParagraphs = articleContent.getElementsByTagName('p'); - for (let i = articleParagraphs.length - 1; i >= 0; i -= 1) { - let imgCount = articleParagraphs[i].getElementsByTagName('img').length; - let embedCount = articleParagraphs[i].getElementsByTagName('embed').length; - let objectCount = articleParagraphs[i].getElementsByTagName('object').length; + var articleParagraphs = articleContent.getElementsByTagName('p'); + for (var i = articleParagraphs.length - 1; i >= 0; i -= 1) { + var imgCount = articleParagraphs[i].getElementsByTagName('img').length; + var embedCount = articleParagraphs[i].getElementsByTagName('embed').length; + var objectCount = articleParagraphs[i].getElementsByTagName('object').length; if (imgCount === 0 && embedCount === 0 && @@ -339,10 +339,10 @@ Readability.prototype = { articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]); } - let brs = articleContent.getElementsByTagName("BR"); - for (let i = brs.length; --i >= 0;) { - let br = brs[i]; - let next = this._nextElement(br.nextSibling); + var brs = articleContent.getElementsByTagName("BR"); + for (var i = brs.length; --i >= 0;) { + var br = brs[i]; + var next = this._nextElement(br.nextSibling); if (next && next.tagName == "P") br.parentNode.removeChild(br); } @@ -402,17 +402,17 @@ Readability.prototype = { * @return Element **/ _grabArticle: function (page) { - let doc = this._doc; - let isPaging = (page !== null ? true: false); + var doc = this._doc; + var isPaging = (page !== null ? true: false); page = page ? page : this._doc.body; - let pageCacheHtml = page.innerHTML; + var pageCacheHtml = page.innerHTML; // Check if any "dir" is set on the toplevel document element this._articleDir = doc.documentElement.getAttribute("dir"); while (true) { - let stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS); - let allElements = page.getElementsByTagName('*'); + var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS); + var allElements = page.getElementsByTagName('*'); // First, node prepping. Trash nodes that look cruddy (like ones with the // class name "comment", etc), and turn divs into P tags where they have been @@ -420,11 +420,11 @@ Readability.prototype = { // // Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 // TODO: Shouldn't this be a reverse traversal? - let node = null; - let nodesToScore = []; + var node = null; + var nodesToScore = []; - // Let each node know its index in the allElements array. - for (let i = allElements.length; --i >= 0;) { + // var each node know its index in the allElements array. + for (var i = allElements.length; --i >= 0;) { allElements[i]._index = i; } @@ -434,18 +434,18 @@ Readability.prototype = { * of its children - from the allElements array. */ function purgeNode(node) { - for (let i = node.childNodes.length; --i >= 0;) { + for (var i = node.childNodes.length; --i >= 0;) { purgeNode(node.childNodes[i]); } if (node._index !== undefined && allElements[node._index] == node) delete allElements[node._index]; } - for (let nodeIndex = 0; nodeIndex < allElements.length; nodeIndex++) { + for (var nodeIndex = 0; nodeIndex < allElements.length; nodeIndex++) { if (!(node = allElements[nodeIndex])) continue; - let matchString = node.className + node.id; + var matchString = node.className + node.id; if (matchString.search(this.REGEXPS.byline) !== -1 && !this._articleByline) { if (this._isValidByline(node.textContent)) { this._articleByline = node.textContent.trim(); @@ -476,11 +476,11 @@ Readability.prototype = { // element. DIVs with only a P element inside and no text content can be // safely converted into plain P elements to avoid confusing the scoring // algorithm with DIVs with are, in practice, paragraphs. - let pIndex = this._getSinglePIndexInsideDiv(node); + var pIndex = this._getSinglePIndexInsideDiv(node); if (pIndex >= 0 || !this._hasChildBlockElement(node)) { if (pIndex >= 0) { - let newNode = node.childNodes[pIndex]; + var newNode = node.childNodes[pIndex]; node.parentNode.replaceChild(newNode, node); purgeNode(node); } else { @@ -489,13 +489,13 @@ Readability.prototype = { } } else { // EXPERIMENTAL - for (let i = 0, il = node.childNodes.length; i < il; i += 1) { - let childNode = node.childNodes[i]; + for (var i = 0, il = node.childNodes.length; i < il; i += 1) { + var childNode = node.childNodes[i]; if (!childNode) continue; if (childNode.nodeType === 3) { // Node.TEXT_NODE - let p = doc.createElement('p'); + var p = doc.createElement('p'); p.textContent = childNode.textContent; p.style.display = 'inline'; p.className = 'readability-styled'; @@ -512,11 +512,11 @@ Readability.prototype = { * * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. **/ - let candidates = []; - for (let pt = 0; pt < nodesToScore.length; pt += 1) { - let parentNode = nodesToScore[pt].parentNode; - let grandParentNode = parentNode ? parentNode.parentNode : null; - let innerText = this._getInnerText(nodesToScore[pt]); + var candidates = []; + for (var pt = 0; pt < nodesToScore.length; pt += 1) { + var parentNode = nodesToScore[pt].parentNode; + var grandParentNode = parentNode ? parentNode.parentNode : null; + var innerText = this._getInnerText(nodesToScore[pt]); if (!parentNode || typeof(parentNode.tagName) === 'undefined') continue; @@ -539,7 +539,7 @@ Readability.prototype = { candidates.push(grandParentNode); } - let contentScore = 0; + var contentScore = 0; // Add a point for the paragraph itself as a base. contentScore += 1; @@ -559,21 +559,21 @@ Readability.prototype = { // After we've calculated scores, loop through all of the possible // candidate nodes we found and find the one with the highest score. - let topCandidates = []; - for (let c = 0, cl = candidates.length; c < cl; c += 1) { - let candidate = candidates[c]; + var topCandidates = []; + for (var c = 0, cl = candidates.length; c < cl; c += 1) { + var candidate = candidates[c]; // Scale the final candidates score based on link density. Good content // should have a relatively small link density (5% or less) and be mostly // unaffected by this operation. - let candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); + var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); candidate.readability.contentScore = candidateScore; this.log('Candidate: ' + candidate + " (" + candidate.className + ":" + candidate.id + ") with score " + candidateScore); - for (let t = 0; t < this.N_TOP_CANDIDATES; t++) { - let aTopCandidate = topCandidates[t]; + for (var t = 0; t < this.N_TOP_CANDIDATES; t++) { + var aTopCandidate = topCandidates[t]; if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) { topCandidates.splice(t, 0, candidate); @@ -584,15 +584,15 @@ Readability.prototype = { } } - let topCandidate = topCandidates[0] || null; + var topCandidate = topCandidates[0] || null; // If we still have no top candidate, just use the body as a last resort. // We also have to copy the body node so it is something we can modify. if (topCandidate === null || topCandidate.tagName === "BODY") { // Move all of the page's children into topCandidate topCandidate = doc.createElement("DIV"); - let children = page.childNodes; - for (let i = 0; i < children.length; ++i) { + var children = page.childNodes; + for (var i = 0; i < children.length; ++i) { topCandidate.appendChild(children[i]); } @@ -604,16 +604,16 @@ Readability.prototype = { // Now that we have the top candidate, look through its siblings for content // that might also be related. Things like preambles, content split by ads // that we removed, etc. - let articleContent = doc.createElement("DIV"); + var articleContent = doc.createElement("DIV"); if (isPaging) articleContent.id = "readability-content"; - let siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); - let siblingNodes = topCandidate.parentNode.childNodes; + var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); + var siblingNodes = topCandidate.parentNode.childNodes; - for (let s = 0, sl = siblingNodes.length; s < sl; s += 1) { - let siblingNode = siblingNodes[s]; - let append = false; + for (var s = 0, sl = siblingNodes.length; s < sl; s += 1) { + var siblingNode = siblingNodes[s]; + var append = false; this.log("Looking at sibling node: " + siblingNode + " (" + siblingNode.className + ":" + siblingNode.id + ")" + ((typeof siblingNode.readability !== 'undefined') ? (" with score " + siblingNode.readability.contentScore) : '')); this.log("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown')); @@ -621,7 +621,7 @@ Readability.prototype = { if (siblingNode === topCandidate) append = true; - let contentBonus = 0; + var contentBonus = 0; // Give a bonus if sibling nodes and top candidates have the example same classname if (siblingNode.className === topCandidate.className && topCandidate.className !== "") @@ -632,9 +632,9 @@ Readability.prototype = { append = true; if (siblingNode.nodeName === "P") { - let linkDensity = this._getLinkDensity(siblingNode); - let nodeContent = this._getInnerText(siblingNode); - let nodeLength = nodeContent.length; + var linkDensity = this._getLinkDensity(siblingNode); + var nodeContent = this._getInnerText(siblingNode); + var nodeLength = nodeContent.length; if (nodeLength > 80 && linkDensity < 0.25) { append = true; @@ -675,11 +675,11 @@ Readability.prototype = { this._prepArticle(articleContent); if (this._curPageNum === 1) { - let div = doc.createElement("DIV"); + var div = doc.createElement("DIV"); div.id = "readability-page-1"; div.className = "page"; - let children = articleContent.childNodes; - for (let i = 0; i < children.length; ++i) { + var children = articleContent.childNodes; + for (var i = 0; i < children.length; ++i) { div.appendChild(children[i]); } articleContent.appendChild(div); @@ -738,23 +738,23 @@ Readability.prototype = { * @return String - excerpt of the article **/ _getExcerpt: function(articleContent) { - let values = {}; - let metaElements = this._doc.getElementsByTagName("meta"); + var values = {}; + var metaElements = this._doc.getElementsByTagName("meta"); // Match "description", or Twitter's "twitter:description" (Cards) // in name attribute. - let namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi; + var namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi; // Match Facebook's og:description (Open Graph) in property attribute. - let propertyPattern = /^\s*og\s*:\s*description\s*$/gi; + var propertyPattern = /^\s*og\s*:\s*description\s*$/gi; // Find description tags. - for (let i = 0; i < metaElements.length; i++) { - let element = metaElements[i]; - let elementName = element.getAttribute("name"); - let elementProperty = element.getAttribute("property"); + for (var i = 0; i < metaElements.length; i++) { + var element = metaElements[i]; + var elementName = element.getAttribute("name"); + var elementProperty = element.getAttribute("property"); - let name; + var name; if (namePattern.test(elementName)) { name = elementName; } else if (propertyPattern.test(elementProperty)) { @@ -762,7 +762,7 @@ Readability.prototype = { } if (name) { - let content = element.getAttribute("content"); + var content = element.getAttribute("content"); if (content) { // Convert to lowercase and remove any whitespace // so we can match below. @@ -787,7 +787,7 @@ Readability.prototype = { } // No description meta tags, use the article's first paragraph. - let paragraphs = articleContent.getElementsByTagName("p"); + var paragraphs = articleContent.getElementsByTagName("p"); if (paragraphs.length > 0) { return paragraphs[0].textContent; } @@ -801,8 +801,8 @@ Readability.prototype = { * @param Element **/ _removeScripts: function(doc) { - let scripts = doc.getElementsByTagName('script'); - for (let i = scripts.length - 1; i >= 0; i -= 1) { + var scripts = doc.getElementsByTagName('script'); + for (var i = scripts.length - 1; i >= 0; i -= 1) { scripts[i].nodeValue=""; scripts[i].removeAttribute('src'); @@ -819,11 +819,11 @@ Readability.prototype = { * @param Element **/ _getSinglePIndexInsideDiv: function(e) { - let childNodes = e.childNodes; - let pIndex = -1; + var childNodes = e.childNodes; + var pIndex = -1; - for (let i = childNodes.length; --i >= 0;) { - let node = childNodes[i]; + for (var i = childNodes.length; --i >= 0;) { + var node = childNodes[i]; if (node.nodeType === Node.ELEMENT_NODE) { if (node.tagName !== "P") @@ -843,13 +843,13 @@ Readability.prototype = { /** * Determine whether element has any children block level elements. - * + * * @param Element */ _hasChildBlockElement: function (e) { - let length = e.childNodes.length; - for (let i = 0; i < length; i++) { - let child = e.childNodes[i]; + var length = e.childNodes.length; + for (var i = 0; i < length; i++) { + var child = e.childNodes[i]; if (child.nodeType != 1) continue; @@ -867,7 +867,7 @@ Readability.prototype = { * @return string **/ _getInnerText: function(e, normalizeSpaces) { - let textContent = e.textContent.replace(this.REGEXPS.trim, ""); + var textContent = e.textContent.replace(this.REGEXPS.trim, ""); normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces; if (normalizeSpaces) { @@ -898,7 +898,7 @@ Readability.prototype = { **/ _cleanStyles: function(e) { e = e || this._doc; - let cur = e.firstChild; + var cur = e.firstChild; if (!e) return; @@ -929,11 +929,11 @@ Readability.prototype = { * @return number (float) **/ _getLinkDensity: function(e) { - let links = e.getElementsByTagName("a"); - let textLength = this._getInnerText(e).length; - let linkLength = 0; + var links = e.getElementsByTagName("a"); + var textLength = this._getInnerText(e).length; + var linkLength = 0; - for (let i = 0, il = links.length; i < il; i += 1) { + for (var i = 0, il = links.length; i < il; i += 1) { linkLength += this._getInnerText(links[i]).length; } @@ -947,14 +947,14 @@ Readability.prototype = { * @return string the base url **/ _findBaseUrl: function() { - let uri = this._uri; - let noUrlParams = uri.path.split("?")[0]; - let urlSlashes = noUrlParams.split("/").reverse(); - let cleanedSegments = []; - let possibleType = ""; + var uri = this._uri; + var noUrlParams = uri.path.split("?")[0]; + var urlSlashes = noUrlParams.split("/").reverse(); + var cleanedSegments = []; + var possibleType = ""; - for (let i = 0, slashLen = urlSlashes.length; i < slashLen; i += 1) { - let segment = urlSlashes[i]; + for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i += 1) { + var segment = urlSlashes[i]; // Split off and save anything that looks like a file type. if (segment.indexOf(".") !== -1) { @@ -974,7 +974,7 @@ Readability.prototype = { if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0))) segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, ""); - let del = false; + var del = false; // If this is purely a number, and it's the first or second segment, // it's probably a page number. Remove it. @@ -1006,10 +1006,10 @@ Readability.prototype = { * @return object (array) **/ _findNextPageLink: function(elem) { - let uri = this._uri; - let possiblePages = {}; - let allLinks = elem.getElementsByTagName('a'); - let articleBaseUrl = this._findBaseUrl(); + var uri = this._uri; + var possiblePages = {}; + var allLinks = elem.getElementsByTagName('a'); + var articleBaseUrl = this._findBaseUrl(); // Loop through all links, looking for hints that they may be next-page links. // Things like having "page" in their textContent, className or id, or being a child @@ -1018,9 +1018,9 @@ Readability.prototype = { // Also possible: levenshtein distance? longest common subsequence? // // After we do that, assign each page a score, and - for (let i = 0, il = allLinks.length; i < il; i += 1) { - let link = allLinks[i]; - let linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, ''); + for (var i = 0, il = allLinks.length; i < il; i += 1) { + var link = allLinks[i]; + var linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, ''); // If we've already seen this page, ignore it. if (linkHref === "" || @@ -1034,7 +1034,7 @@ Readability.prototype = { if (uri.host !== linkHref.split(/\/+/g)[1]) continue; - let linkText = this._getInnerText(link); + var linkText = this._getInnerText(link); // If the linkText looks like it's not the next page, skip it. if (linkText.match(this.REGEXPS.extraneous) || linkText.length > 25) @@ -1042,7 +1042,7 @@ Readability.prototype = { // If the leftovers of the URL after removing the base URL don't contain // any digits, it's certainly not a next page link. - let linkHrefLeftover = linkHref.replace(articleBaseUrl, ''); + var linkHrefLeftover = linkHref.replace(articleBaseUrl, ''); if (!linkHrefLeftover.match(/\d/)) continue; @@ -1052,7 +1052,7 @@ Readability.prototype = { possiblePages[linkHref].linkText += ' | ' + linkText; } - let linkObj = possiblePages[linkHref]; + var linkObj = possiblePages[linkHref]; // If the articleBaseUrl isn't part of this URL, penalize this link. It could // still be the link, but the odds are lower. @@ -1060,7 +1060,7 @@ Readability.prototype = { if (linkHref.indexOf(articleBaseUrl) !== 0) linkObj.score -= 25; - let linkData = linkText + ' ' + link.className + ' ' + link.id; + var linkData = linkText + ' ' + link.className + ' ' + link.id; if (linkData.match(this.REGEXPS.nextLink)) linkObj.score += 50; @@ -1082,12 +1082,12 @@ Readability.prototype = { linkObj.score -= 200; // If a parentNode contains page or paging or paginat - let parentNode = link.parentNode; - let positiveNodeMatch = false; - let negativeNodeMatch = false; + var parentNode = link.parentNode; + var positiveNodeMatch = false; + var negativeNodeMatch = false; while (parentNode) { - let parentNodeClassAndId = parentNode.className + ' ' + parentNode.id; + var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id; if (!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) { positiveNodeMatch = true; @@ -1127,7 +1127,7 @@ Readability.prototype = { // If the link text can be parsed as a number, give it a minor bonus, with a slight // bias towards lower numbered pages. This is so that pages that might not have 'next' // in their text can still get scored, and sorted properly by score. - let linkTextAsNumber = parseInt(linkText, 10); + var linkTextAsNumber = parseInt(linkText, 10); if (linkTextAsNumber) { // Punish 1 since we're either already there, or it's probably // before what we want anyways. @@ -1142,8 +1142,8 @@ Readability.prototype = { // Loop thrugh all of our possible pages from above and find our top // candidate for the next page URL. Require at least a score of 50, which // is a relatively high confidence that this page is the next link. - let topPage = null; - for (let page in possiblePages) { + var topPage = null; + for (var page in possiblePages) { if (possiblePages.hasOwnProperty(page)) { if (possiblePages[page].score >= 50 && (!topPage || topPage.score < possiblePages[page].score)) @@ -1152,7 +1152,7 @@ Readability.prototype = { } if (topPage) { - let nextHref = topPage.href.replace(/\/$/,''); + var nextHref = topPage.href.replace(/\/$/,''); this.log('NEXT PAGE IS ' + nextHref); this._parsedPages[nextHref] = true; @@ -1169,7 +1169,7 @@ Readability.prototype = { }, _ajax: function(url, options) { - let request = new XMLHttpRequest(); + var request = new XMLHttpRequest(); function respondToReadyState(readyState) { if (request.readyState === 4) { @@ -1202,10 +1202,10 @@ Readability.prototype = { }, _appendNextPage: function(nextPageLink) { - let doc = this._doc; + var doc = this._doc; this._curPageNum += 1; - let articlePage = doc.createElement("DIV"); + var articlePage = doc.createElement("DIV"); articlePage.id = 'readability-page-' + this._curPageNum; articlePage.className = 'page'; articlePage.innerHTML = '

§

'; @@ -1213,7 +1213,7 @@ Readability.prototype = { doc.getElementById("readability-content").appendChild(articlePage); if (this._curPageNum > this.MAX_PAGES) { - let nextPageMarkup = "
View Next Page
"; + var nextPageMarkup = "
View Next Page
"; articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup; return; } @@ -1225,7 +1225,7 @@ Readability.prototype = { success: function(r) { // First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. - let eTag = r.getResponseHeader('ETag'); + var eTag = r.getResponseHeader('ETag'); if (eTag) { if (eTag in this._pageETags) { this.log("Exact duplicate page found via ETag. Aborting."); @@ -1237,7 +1237,7 @@ Readability.prototype = { } // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. - let page = doc.createElement("DIV"); + var page = doc.createElement("DIV"); // Do some preprocessing to our HTML to make it ready for appending. // - Remove any script tags. Swap and reswap newlines with a unicode @@ -1247,7 +1247,7 @@ Readability.prototype = { // - Turn all double br's into p's - was handled by prepDocument in the original view. // Maybe in the future abstract out prepDocument to work for both the original document // and AJAX-added pages. - let responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, ''); + var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, ''); responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, ''); responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div'); responseHtml = responseHtml.replace(this.REGEXPS.replaceFonts, '<$1span>'); @@ -1259,11 +1259,11 @@ Readability.prototype = { // disable as necessary at the end of grabArticle. this._flags = 0x1 | 0x2 | 0x4; - let nextPageLink = this._findNextPageLink(page); - + var nextPageLink = this._findNextPageLink(page); + // NOTE: if we end up supporting _appendNextPage(), we'll need to // change this call to be async - let content = this._grabArticle(page); + var content = this._grabArticle(page); if (!content) { this.log("No content found in page to append. Aborting."); @@ -1273,10 +1273,10 @@ Readability.prototype = { // Anti-duplicate mechanism. Essentially, get the first paragraph of our new page. // Compare it against all of the the previous document's we've gotten. If the previous // document contains exactly the innerHTML of this first paragraph, it's probably a duplicate. - let firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null; + var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null; if (firstP && firstP.innerHTML.length > 100) { - for (let i = 1; i <= this._curPageNum; i += 1) { - let rPage = doc.getElementById('readability-page-' + i); + for (var i = 1; i <= this._curPageNum; i += 1) { + var rPage = doc.getElementById('readability-page-' + i); if (rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) { this.log('Duplicate of page ' + i + ' - skipping.'); articlePage.style.display = 'none'; @@ -1306,7 +1306,7 @@ Readability.prototype = { }, /** - * Get an elements class/id weight. Uses regular expressions to tell if this + * Get an elements class/id weight. Uses regular expressions to tell if this * element looks good or bad. * * @param Element @@ -1316,7 +1316,7 @@ Readability.prototype = { if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) return 0; - let weight = 0; + var weight = 0; // Look for a special classname if (typeof(e.className) === 'string' && e.className !== '') { @@ -1348,14 +1348,14 @@ Readability.prototype = { * @return void **/ _clean: function(e, tag) { - let targetList = e.getElementsByTagName(tag); - let isEmbed = (tag === 'object' || tag === 'embed'); + var targetList = e.getElementsByTagName(tag); + var isEmbed = (tag === 'object' || tag === 'embed'); - for (let y = targetList.length - 1; y >= 0; y -= 1) { + for (var y = targetList.length - 1; y >= 0; y -= 1) { // Allow youtube and vimeo videos through as people usually want to see those. if (isEmbed) { - let attributeValues = ""; - for (let i = 0, il = targetList[y].attributes.length; i < il; i += 1) { + var attributeValues = ""; + for (var i = 0, il = targetList[y].attributes.length; i < il; i += 1) { attributeValues += targetList[y].attributes[i].value + '|'; } @@ -1382,17 +1382,17 @@ Readability.prototype = { if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) return; - let tagsList = e.getElementsByTagName(tag); - let curTagsLength = tagsList.length; + var tagsList = e.getElementsByTagName(tag); + var curTagsLength = tagsList.length; // Gather counts for other typical elements embedded within. // Traverse backwards so we can remove nodes at the same time // without effecting the traversal. // // TODO: Consider taking into account original contentScore here. - for (let i = curTagsLength-1; i >= 0; i -= 1) { - let weight = this._getClassWeight(tagsList[i]); - let contentScore = 0; + for (var i = curTagsLength-1; i >= 0; i -= 1) { + var weight = this._getClassWeight(tagsList[i]); + var contentScore = 0; this.log("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")"); @@ -1402,21 +1402,21 @@ Readability.prototype = { // If there are not very many commas, and the number of // non-paragraph elements is more than paragraphs or other // ominous signs, remove the element. - let p = tagsList[i].getElementsByTagName("p").length; - let img = tagsList[i].getElementsByTagName("img").length; - let li = tagsList[i].getElementsByTagName("li").length-100; - let input = tagsList[i].getElementsByTagName("input").length; - - let embedCount = 0; - let embeds = tagsList[i].getElementsByTagName("embed"); - for (let ei = 0, il = embeds.length; ei < il; ei += 1) { + var p = tagsList[i].getElementsByTagName("p").length; + var img = tagsList[i].getElementsByTagName("img").length; + var li = tagsList[i].getElementsByTagName("li").length-100; + var input = tagsList[i].getElementsByTagName("input").length; + + var embedCount = 0; + var embeds = tagsList[i].getElementsByTagName("embed"); + for (var ei = 0, il = embeds.length; ei < il; ei += 1) { if (embeds[ei].src.search(this.REGEXPS.videos) === -1) embedCount += 1; } - let linkDensity = this._getLinkDensity(tagsList[i]); - let contentLength = this._getInnerText(tagsList[i]).length; - let toRemove = false; + var linkDensity = this._getLinkDensity(tagsList[i]); + var contentLength = this._getInnerText(tagsList[i]).length; + var toRemove = false; if (img > p) { toRemove = true; @@ -1447,9 +1447,9 @@ Readability.prototype = { * @return void **/ _cleanHeaders: function(e) { - for (let headerIndex = 1; headerIndex < 3; headerIndex += 1) { - let headers = e.getElementsByTagName('h' + headerIndex); - for (let i = headers.length - 1; i >= 0; i -= 1) { + for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) { + var headers = e.getElementsByTagName('h' + headerIndex); + for (var i = headers.length - 1; i >= 0; i -= 1) { if (this._getClassWeight(headers[i]) < 0 || this._getLinkDensity(headers[i]) > 0.33) headers[i].parentNode.removeChild(headers[i]); } @@ -1492,12 +1492,12 @@ Readability.prototype = { // this._parsedPages[uri.spec.replace(/\/$/, '')] = true; // Pull out any possible next page link first. - // let nextPageLink = this._findNextPageLink(doc.body); + // var nextPageLink = this._findNextPageLink(doc.body); this._prepDocument(); - let articleTitle = this._getArticleTitle(); - let articleContent = this._grabArticle(); + var articleTitle = this._getArticleTitle(); + var articleContent = this._grabArticle(); if (!articleContent) return null; @@ -1511,7 +1511,7 @@ Readability.prototype = { // }).bind(this), 500); // } - let excerpt = this._getExcerpt(articleContent); + var excerpt = this._getExcerpt(articleContent); return { title: articleTitle, byline: this._articleByline,