From d9f7bb29653fef05ad08b0f82d245e4aedcc46d8 Mon Sep 17 00:00:00 2001 From: Gijs Kruitbosch Date: Sat, 14 Jul 2018 14:28:41 +0100 Subject: [PATCH 1/4] Fix quotes --- JSDOMParser.js | 4 +- Readability.js | 116 ++++++++++++++++++++++++------------------------- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/JSDOMParser.js b/JSDOMParser.js index cf15bc0..8a761d5 100644 --- a/JSDOMParser.js +++ b/JSDOMParser.js @@ -684,7 +684,7 @@ // the attribute value will be HTML escaped. var val = attr.value; var quote = (val.indexOf('"') === -1 ? '"' : "'"); - arr.push(" " + attr.name + '=' + quote + val + quote); + arr.push(" " + attr.name + "=" + quote + val + quote); } if (child.localName in voidElems && !child.childNodes.length) { @@ -963,7 +963,7 @@ strBuf.push(c); c = this.nextChar(); } - var tag = strBuf.join(''); + var tag = strBuf.join(""); if (!tag) return false; diff --git a/Readability.js b/Readability.js index 1d26000..c34f67c 100644 --- a/Readability.js +++ b/Readability.js @@ -260,7 +260,7 @@ Readability.prototype = { _getAllNodesWithTag: function(node, tagNames) { if (node.querySelectorAll) { - return node.querySelectorAll(tagNames.join(',')); + return node.querySelectorAll(tagNames.join(",")); } return [].concat.apply([], tagNames.map(function(tag) { var collection = node.getElementsByTagName(tag); @@ -359,7 +359,7 @@ Readability.prototype = { // If they had an element with id "title" in their HTML if (typeof curTitle !== "string") - curTitle = origTitle = this._getInnerText(doc.getElementsByTagName('title')[0]); + curTitle = origTitle = this._getInnerText(doc.getElementsByTagName("title")[0]); } catch (e) {/* ignore exceptions setting the title. */} var titleHadHierarchicalSeparators = false; @@ -370,18 +370,18 @@ Readability.prototype = { // If there's a separator in the title, first remove the final part if ((/ [\|\-\\\/>»] /).test(curTitle)) { titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle); - curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, '$1'); + curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, "$1"); // If the resulting title is too short (3 words or fewer), remove // the first part instead: if (wordCount(curTitle) < 3) - curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, '$1'); - } else if (curTitle.indexOf(': ') !== -1) { + curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, "$1"); + } else if (curTitle.indexOf(": ") !== -1) { // Check if we have an heading containing this exact string, so we // could assume it's the full title. var headings = this._concatNodeLists( - doc.getElementsByTagName('h1'), - doc.getElementsByTagName('h2') + doc.getElementsByTagName("h1"), + doc.getElementsByTagName("h2") ); var trimmedTitle = curTitle.trim(); var match = this._someNode(headings, function(heading) { @@ -390,19 +390,19 @@ Readability.prototype = { // If we don't, let's extract the title out of the original title string. if (!match) { - curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1); + curTitle = origTitle.substring(origTitle.lastIndexOf(":") + 1); // If the title is now too short, try the first colon instead: if (wordCount(curTitle) < 3) { - curTitle = origTitle.substring(origTitle.indexOf(':') + 1); + curTitle = origTitle.substring(origTitle.indexOf(":") + 1); // But if we have too many words before the colon there's something weird // with the titles and the H tags so let's just use the original title instead - } else if (wordCount(origTitle.substr(0, origTitle.indexOf(':'))) > 5) { + } else if (wordCount(origTitle.substr(0, origTitle.indexOf(":"))) > 5) { curTitle = origTitle; } } } else if (curTitle.length > 150 || curTitle.length < 15) { - var hOnes = doc.getElementsByTagName('h1'); + var hOnes = doc.getElementsByTagName("h1"); if (hOnes.length === 1) curTitle = this._getInnerText(hOnes[0]); @@ -569,7 +569,7 @@ Readability.prototype = { // If there is only one h2 and its text content substantially equals article title, // they are probably using it as a header and not a subheader, // so remove it since we already extract the title separately. - var h2 = articleContent.getElementsByTagName('h2'); + var h2 = articleContent.getElementsByTagName("h2"); if (h2.length === 1) { var lengthSimilarRate = (h2[0].textContent.length - this._articleTitle.length) / this._articleTitle.length; if (Math.abs(lengthSimilarRate) < 0.5) { @@ -599,12 +599,12 @@ Readability.prototype = { this._cleanConditionally(articleContent, "div"); // Remove extra paragraphs - this._removeNodes(articleContent.getElementsByTagName('p'), function (paragraph) { - var imgCount = paragraph.getElementsByTagName('img').length; - var embedCount = paragraph.getElementsByTagName('embed').length; - var objectCount = paragraph.getElementsByTagName('object').length; + this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) { + var imgCount = paragraph.getElementsByTagName("img").length; + var embedCount = paragraph.getElementsByTagName("embed").length; + var objectCount = paragraph.getElementsByTagName("object").length; // At this point, nasty iframes have been removed, only remain embedded video ones. - var iframeCount = paragraph.getElementsByTagName('iframe').length; + var iframeCount = paragraph.getElementsByTagName("iframe").length; var totalCount = imgCount + embedCount + objectCount + iframeCount; return totalCount === 0 && !this._getInnerText(paragraph, false); @@ -641,34 +641,34 @@ Readability.prototype = { node.readability = {"contentScore": 0}; switch (node.tagName) { - case 'DIV': + case "DIV": node.readability.contentScore += 5; break; - case 'PRE': - case 'TD': - case 'BLOCKQUOTE': + case "PRE": + case "TD": + case "BLOCKQUOTE": node.readability.contentScore += 3; break; - case 'ADDRESS': - case 'OL': - case 'UL': - case 'DL': - case 'DD': - case 'DT': - case 'LI': - case 'FORM': + case "ADDRESS": + case "OL": + case "UL": + case "DL": + case "DD": + case "DT": + case "LI": + case "FORM": node.readability.contentScore -= 3; break; - case 'H1': - case 'H2': - case 'H3': - case 'H4': - case 'H5': - case 'H6': - case 'TH': + case "H1": + case "H2": + case "H3": + case "H4": + case "H5": + case "H6": + case "TH": node.readability.contentScore -= 5; break; } @@ -817,7 +817,7 @@ Readability.prototype = { if (p !== null) { p.appendChild(childNode); } else if (!this._isWhitespace(childNode)) { - p = doc.createElement('p'); + p = doc.createElement("p"); node.replaceChild(p, childNode); p.appendChild(childNode); } @@ -853,7 +853,7 @@ Readability.prototype = { **/ var candidates = []; this._forEachNode(elementsToScore, function(elementToScore) { - if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === 'undefined') + if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === "undefined") return; // If this paragraph is less than 25 characters, don't even count it. @@ -872,17 +872,17 @@ Readability.prototype = { contentScore += 1; // Add points for any commas within this paragraph. - contentScore += innerText.split(',').length; + contentScore += innerText.split(",").length; // For every 100 characters in this paragraph, add another point. Up to 3 points. contentScore += Math.min(Math.floor(innerText.length / 100), 3); // Initialize and score ancestors. this._forEachNode(ancestors, function(ancestor, level) { - if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === 'undefined') + if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === "undefined") return; - if (typeof(ancestor.readability) === 'undefined') { + if (typeof(ancestor.readability) === "undefined") { this._initializeNode(ancestor); candidates.push(ancestor); } @@ -913,7 +913,7 @@ Readability.prototype = { var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); candidate.readability.contentScore = candidateScore; - this.log('Candidate:', candidate, "with score " + candidateScore); + this.log("Candidate:", candidate, "with score " + candidateScore); for (var t = 0; t < this._nbTopCandidates; t++) { var aTopCandidate = topCandidates[t]; @@ -1032,8 +1032,8 @@ Readability.prototype = { var sibling = siblings[s]; var append = false; - this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : ''); - this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : 'Unknown'); + this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : ""); + this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : "Unknown"); if (sibling === topCandidate) { append = true; @@ -1067,7 +1067,7 @@ Readability.prototype = { if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) { // We have a node that isn't a common block level element, like a form or td tag. // Turn it into a div so it doesn't get filtered out later by accident. - this.log("Altering sibling:", sibling, 'to div.'); + this.log("Altering sibling:", sibling, "to div."); sibling = this._setNodeTag(sibling, "DIV"); } @@ -1175,7 +1175,7 @@ Readability.prototype = { * @return Boolean - whether the input string is a byline. */ _isValidByline: function(byline) { - if (typeof byline == 'string' || byline instanceof String) { + if (typeof byline == "string" || byline instanceof String) { byline = byline.trim(); return (byline.length > 0) && (byline.length < 100); } @@ -1221,7 +1221,7 @@ Readability.prototype = { if (content) { // Convert to lowercase and remove any whitespace // so we can match below. - name = name.toLowerCase().replace(/\s/g, ''); + name = name.toLowerCase().replace(/\s/g, ""); values[name] = content.trim(); } } @@ -1257,12 +1257,12 @@ Readability.prototype = { * @param Element **/ _removeScripts: function(doc) { - this._removeNodes(doc.getElementsByTagName('script'), function(scriptNode) { + this._removeNodes(doc.getElementsByTagName("script"), function(scriptNode) { scriptNode.nodeValue = ""; - scriptNode.removeAttribute('src'); + scriptNode.removeAttribute("src"); return true; }); - this._removeNodes(doc.getElementsByTagName('noscript')); + this._removeNodes(doc.getElementsByTagName("noscript")); }, /** @@ -1329,7 +1329,7 @@ Readability.prototype = { * @return string **/ _getInnerText: function(e, normalizeSpaces) { - normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces; + normalizeSpaces = (typeof normalizeSpaces === "undefined") ? true : normalizeSpaces; var textContent = e.textContent.trim(); if (normalizeSpaces) { @@ -1358,7 +1358,7 @@ Readability.prototype = { * @return void **/ _cleanStyles: function(e) { - if (!e || e.tagName.toLowerCase() === 'svg') + if (!e || e.tagName.toLowerCase() === "svg") return; // Remove `style` and deprecated presentational attributes @@ -1367,8 +1367,8 @@ Readability.prototype = { } if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e.tagName) !== -1) { - e.removeAttribute('width'); - e.removeAttribute('height'); + e.removeAttribute("width"); + e.removeAttribute("height"); } var cur = e.firstElementChild; @@ -1414,7 +1414,7 @@ Readability.prototype = { var weight = 0; // Look for a special classname - if (typeof(e.className) === 'string' && e.className !== '') { + if (typeof(e.className) === "string" && e.className !== "") { if (this.REGEXPS.negative.test(e.className)) weight -= 25; @@ -1423,7 +1423,7 @@ Readability.prototype = { } // Look for a special ID - if (typeof(e.id) === 'string' && e.id !== '') { + if (typeof(e.id) === "string" && e.id !== "") { if (this.REGEXPS.negative.test(e.id)) weight -= 25; @@ -1612,7 +1612,7 @@ Readability.prototype = { return true; } - if (this._getCharCount(node, ',') < 10) { + if (this._getCharCount(node, ",") < 10) { // If there are not very many commas, and the number of // non-paragraph elements is more than paragraphs or other // ominous signs, remove the element. @@ -1672,7 +1672,7 @@ Readability.prototype = { **/ _cleanHeaders: function(e) { for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) { - this._removeNodes(e.getElementsByTagName('h' + headerIndex), function (header) { + this._removeNodes(e.getElementsByTagName("h" + headerIndex), function (header) { return this._getClassWeight(header) < 0; }); } From 7cf95bd427a3ebb8877e00c25a80423f8fbcd7cd Mon Sep 17 00:00:00 2001 From: Gijs Kruitbosch Date: Sat, 14 Jul 2018 22:09:00 +0100 Subject: [PATCH 2/4] Fix same-line loops and if statements --- JSDOMParser.js | 4 +++- Readability.js | 14 ++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/JSDOMParser.js b/JSDOMParser.js index 8a761d5..bbf8ac9 100644 --- a/JSDOMParser.js +++ b/JSDOMParser.js @@ -974,7 +974,9 @@ while (c !== "/" && c !== ">") { if (c === undefined) return false; - while (whitespace.indexOf(this.html[this.currentChar++]) != -1); + while (whitespace.indexOf(this.html[this.currentChar++]) != -1) { + // Advance cursor to first non-whitespace char. + } this.currentChar--; c = this.nextChar(); if (c !== "/" && c !== ">") { diff --git a/Readability.js b/Readability.js index c34f67c..daf3bdb 100644 --- a/Readability.js +++ b/Readability.js @@ -498,7 +498,8 @@ Readability.prototype = { break; } - if (!this._isPhrasingContent(next)) break; + if (!this._isPhrasingContent(next)) + break; // Otherwise, make this node a child of the new

. var sibling = next.nextSibling; @@ -506,9 +507,12 @@ Readability.prototype = { next = sibling; } - while (p.lastChild && this._isWhitespace(p.lastChild)) p.removeChild(p.lastChild); + while (p.lastChild && this._isWhitespace(p.lastChild)) { + p.removeChild(p.lastChild); + } - if (p.parentNode.tagName === "P") this._setNodeTag(p.parentNode, "DIV"); + if (p.parentNode.tagName === "P") + this._setNodeTag(p.parentNode, "DIV"); } }); }, @@ -822,7 +826,9 @@ Readability.prototype = { p.appendChild(childNode); } } else if (p !== null) { - while (p.lastChild && this._isWhitespace(p.lastChild)) p.removeChild(p.lastChild); + while (p.lastChild && this._isWhitespace(p.lastChild)) { + p.removeChild(p.lastChild); + } p = null; } childNode = nextSibling; From f511d1aa2b34e49fee16872e1ee26745f962c2a5 Mon Sep 17 00:00:00 2001 From: Gijs Kruitbosch Date: Sat, 14 Jul 2018 22:09:14 +0100 Subject: [PATCH 3/4] Enable eslint checks for quotes and single-line loops/conditionals --- .eslintrc.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.eslintrc.js b/.eslintrc.js index 671a991..7824556 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -163,12 +163,18 @@ module.exports = { // No using with "no-with": 2, + // No if/while/for blocks on the same line as the if/while/for statement: + "nonblock-statement-body-position": [2, "below"], + // Always require semicolon at end of statement "semi": [2, "always"], // Require space after keywords "keyword-spacing": 2, + // Always use double quotes + "quotes": [2, "double", {"avoidEscape": true}], + // Require space before blocks "space-before-blocks": 2, From 30611cc57ffdad248a7f1aca8dd5d8c5cb308e9a Mon Sep 17 00:00:00 2001 From: Gijs Kruitbosch Date: Sun, 15 Jul 2018 15:43:50 +0100 Subject: [PATCH 4/4] Fix quotes issues in test and benchmark files --- benchmarks/benchmark-reporter.js | 12 ++++++------ test/generate-testcase.js | 2 +- test/test-jsdomparser.js | 2 +- test/test-readability.js | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/benchmarks/benchmark-reporter.js b/benchmarks/benchmark-reporter.js index f58278d..4467123 100644 --- a/benchmarks/benchmark-reporter.js +++ b/benchmarks/benchmark-reporter.js @@ -1,4 +1,4 @@ -var clean = require('matcha/lib/matcha/reporters/clean'); +var clean = require("matcha/lib/matcha/reporters/clean"); function average(list) { if (!list.length) @@ -18,20 +18,20 @@ module.exports = function(runner, utils) { var color = utils.color; var results = {}; var currentResults = []; - runner.on('bench end', function(benchResults) { + runner.on("bench end", function(benchResults) { currentResults.push(benchResults.ops); }); - runner.on('suite end', function(suite) { + runner.on("suite end", function(suite) { var avg = humanize(average(currentResults)); - console.log(padBefore(avg + ' op/s', 22) + ' » ' + suite.title); + console.log(padBefore(avg + " op/s", 22) + " » " + suite.title); console.log(); results[suite.title] = avg; currentResults = []; }); - runner.on('end', function() { + runner.on("end", function() { for (var k in results) { - console.log(color(padBefore(k, 30) + ': ', 'gray') + results[k] + ' op/s'); + console.log(color(padBefore(k, 30) + ": ", "gray") + results[k] + " op/s"); } console.log(); }); diff --git a/test/generate-testcase.js b/test/generate-testcase.js index f12e73b..b97cb27 100644 --- a/test/generate-testcase.js +++ b/test/generate-testcase.js @@ -59,7 +59,7 @@ function fetchSource(url, callbackFn) { client = require("https"); } var options = urlparse(url); - options.headers = {'User-Agent': FFX_UA}; + options.headers = {"User-Agent": FFX_UA}; client.get(options, function(response) { if (debug) { diff --git a/test/test-jsdomparser.js b/test/test-jsdomparser.js index ac1b11d..b52d546 100644 --- a/test/test-jsdomparser.js +++ b/test/test-jsdomparser.js @@ -115,7 +115,7 @@ describe("Test JSDOM functionality", function() { }); it("should have a working replaceChild", function() { - var parent = baseDoc.getElementsByTagName('div')[0]; + var parent = baseDoc.getElementsByTagName("div")[0]; var p = baseDoc.createElement("p"); p.setAttribute("id", "my-replaced-kid"); var childCount = parent.childNodes.length; diff --git a/test/test-readability.js b/test/test-readability.js index f73d089..6998865 100644 --- a/test/test-readability.js +++ b/test/test-readability.js @@ -95,10 +95,10 @@ function runTestsWithItems(label, domGenerationFn, source, expectedContent, expe function genPath(node) { if (node.id) { - return '#' + node.id; + return "#" + node.id; } if (node.tagName == "BODY") { - return 'body'; + return "body"; } var parent = node.parentNode; var parentPath = genPath(parent);