chore: refactored and linted

8 years ago · 7e2a34945f
parent 9906bd36a4
commit 7e2a34945f
193 changed files with 4177 additions and 4315 deletions
--- a/.eslintignore
+++ b/.eslintignore
@ -0,0 +1 @@
+**/fixtures/*
--- a/.eslintrc
+++ b/.eslintrc
@ -0,0 +1,39 @@
+// Use this file as a starting point for your project's .eslintrc.
+// Copy this file, and add rule overrides as needed.
+{
+  "parser": "babel-eslint",
+  "extends": "airbnb",
+  "plugins": [
+    "babel"
+  ],
+  "globals": {
+    /* mocha */
+    "describe",
+    "it"
+  },
+  "rules": {
+    "no-param-reassign": 0,
+    /* TODO fix this; this should work w/import/resolver below, but doesn't */
+    "import/no-extraneous-dependencies": 0,
+    "import/no-unresolved": 0,
+    "no-control-regex": 0,
+    "import/prefer-default-export": 0,
+    "generator-star-spacing": 0,
+    "babel/generator-star-spacing": 0,
+    "func-names": 0,
+    "no-useless-escape": 0,
+    "no-confusing-arrow": 0,
+  },
+  "settings": {
+    "import/resolver": {
+      "babel-module": {
+        "extensions": [".js"]
+      }
+    }
+  },
+  "parserOptions":{
+    "ecmaFeatures": {
+      "experimentalObjectRestSpread": true
+    }
+  }
+}
--- a/package.json
+++ b/package.json
@ -5,14 +5,17 @@
  "main": "index.js",
  "scripts": {
    "start": "node ./build",
-    "build": "rollup -c",
+    "lint": "eslint src/**",
+    "build": "eslint src/** && rollup -c",
    "test": "./test-runner"
  },
  "author": "",
  "license": "ISC",
  "devDependencies": {
+    "babel-eslint": "^6.1.2",
    "babel-plugin-external-helpers": "^6.8.0",
    "babel-plugin-module-alias": "^1.6.0",
+    "babel-plugin-module-resolver": "^2.2.0",
    "babel-plugin-transform-async-to-generator": "^6.8.0",
    "babel-plugin-transform-es2015-destructuring": "^6.9.0",
    "babel-plugin-transform-object-rest-spread": "^6.8.0",
@ -21,6 +24,14 @@
    "babel-preset-es2015-rollup": "^1.2.0",
    "babel-register": "^6.11.6",
    "babelrc-rollup": "^3.0.0",
+    "eslint": "^3.5.0",
+    "eslint-config-airbnb": "^11.1.0",
+    "eslint-import-resolver-babel-module": "^2.0.1",
+    "eslint-plugin-async": "^0.1.1",
+    "eslint-plugin-babel": "^3.3.0",
+    "eslint-plugin-import": "^1.15.0",
+    "eslint-plugin-jsx-a11y": "^2.2.2",
+    "eslint-plugin-react": "^6.2.1",
    "mocha": "^3.0.2",
    "rollup": "^0.34.13",
    "rollup-plugin-babel": "^2.6.1",
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+#!/usr/local/bin/fish
+
+set file $argv[1]
+set function $argv[2]
+
+touch src/extractors/generic/next-page-url/scoring/utils/index.js
+touch src/extractors/generic/next-page-url/scoring/utils/$file.js
+touch src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+
+echo "import assert from 'assert';" > src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "import $function from './$file';" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "export { default as $function } from './$file'" >> src/extractors/generic/next-page-url/scoring/utils/index.js
+
+echo "Now make it a default export"
+echo "Move it to its file"
+echo "Move its tests to its test file"
+echo "import in score-links"
+echo "Test it."
+
--- a/src/cleaners/author.js
+++ b/src/cleaners/author.js
@ -1,7 +1,7 @@
-import { CLEAN_AUTHOR_RE } from './constants'
+import { CLEAN_AUTHOR_RE } from './constants';

 // Take an author string (like 'By David Smith ') and clean it to
 // just the name(s): 'David Smith'.
 export default function cleanAuthor(author) {
-  return author.replace(CLEAN_AUTHOR_RE, '$2').trim()
+  return author.replace(CLEAN_AUTHOR_RE, '$2').trim();
 }
--- a/src/cleaners/author.test.js
+++ b/src/cleaners/author.test.js
@ -1,21 +1,21 @@
-import assert from 'assert'
+import assert from 'assert';

-import cleanAuthor from './author'
+import cleanAuthor from './author';

 describe('cleanAuthor(author)', () => {
  it('removes the By from an author string', () => {
-    const author = cleanAuthor('By Bob Dylan')
+    const author = cleanAuthor('By Bob Dylan');

-    assert.equal(author, 'Bob Dylan')
-  })
+    assert.equal(author, 'Bob Dylan');
+  });

  it('trims trailing whitespace and line breaks', () => {
    const text = `
      written by
      Bob Dylan
-    `
-    const author = cleanAuthor(text)
+    `;
+    const author = cleanAuthor(text);

-    assert.equal(author, 'Bob Dylan')
-  })
-})
+    assert.equal(author, 'Bob Dylan');
+  });
+});
--- a/src/cleaners/constants.js
+++ b/src/cleaners/constants.js
@ -1,9 +1,9 @@
 // CLEAN AUTHOR CONSTANTS
-export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i
+export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
    //     author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',

 // CLEAN DEK CONSTANTS
-export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
+export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');
 // An ordered list of meta tag names that denote likely article deks.
 // From most distinct to least distinct.
 //
@ -14,7 +14,7 @@ export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
 // However, these tags often have SEO-specific junk in them that's not
 // header-worthy like a dek is. Excerpt material at best.
 export const DEK_META_TAGS = [
-]
+];

 // An ordered list of Selectors to find likely article deks. From
 // most explicit to least explicit.
@ -23,18 +23,36 @@ export const DEK_META_TAGS = [
 // detrimental to the aesthetics of an article.
 export const DEK_SELECTORS = [
  '.entry-summary',
-]
+];

 // CLEAN DATE PUBLISHED CONSTANTS
-export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i
-export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i
-export const TIME_MERIDIAN_DOTS_RE = /\.m\./i
-export const SPLIT_DATE_STRING = /(\d{1,2}:\d{2,2}(\s?[ap]\.?m\.?)?)|(\d{1,2}[\/-]\d{1,2}[\/-]\d{2,4})|(\d{1,4})|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/ig
+export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i;
+export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i;
+export const TIME_MERIDIAN_DOTS_RE = /\.m\./i;
+const months = [
+  'jan',
+  'feb',
+  'mar',
+  'apr',
+  'may',
+  'jun',
+  'jul',
+  'aug',
+  'sep',
+  'oct',
+  'nov',
+  'dec',
+];
+const allMonths = months.join('|');
+const timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
+const timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
+export const SPLIT_DATE_STRING =
+  new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');

 // CLEAN TITLE CONSTANTS
 // A regular expression that will match separating characters on a
 // title, that usually denote breadcrumbs or something similar.
-export const TITLE_SPLITTERS_RE = /(: | - | \| )/g
+export const TITLE_SPLITTERS_RE = /(: | - | \| )/g;

 export const DOMAIN_ENDINGS_RE =
-  new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g')
+  new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g');
--- a/src/cleaners/content.js
+++ b/src/cleaners/content.js
@ -8,54 +8,52 @@ import {
  rewriteTopLevel,
  stripJunkTags,
  makeLinksAbsolute,
-} from 'utils/dom'
-
-import { convertNodeTo } from 'utils/dom'
+} from 'utils/dom';

 // Clean our article content, returning a new, cleaned node.
 export default function extractCleanNode(
  article,
  {
    $,
-    cleanConditionally=true,
-    title='',
-    url='',
+    cleanConditionally = true,
+    title = '',
+    url = '',
  }
 ) {
  // Rewrite the tag name to div if it's a top level node like body or
  // html to avoid later complications with multiple body tags.
-  rewriteTopLevel(article, $)
+  rewriteTopLevel(article, $);

  // Drop small images and spacer images
-  cleanImages(article, $)
+  cleanImages(article, $);

  // Drop certain tags like <title>, etc
  // This is -mostly- for cleanliness, not security.
-  stripJunkTags(article, $)
+  stripJunkTags(article, $);

  // H1 tags are typically the article title, which should be extracted
  // by the title extractor instead. If there's less than 3 of them (<3),
  // strip them. Otherwise, turn 'em into H2s.
-  cleanHOnes(article, $)
+  cleanHOnes(article, $);

  // Clean headers
-  cleanHeaders(article, $, title)
+  cleanHeaders(article, $, title);

  // Make links absolute
-  makeLinksAbsolute(article, $, url)
+  makeLinksAbsolute(article, $, url);

  // Remove style or align attributes
-  cleanAttributes(article, $)
+  cleanAttributes(article);

  // We used to clean UL's and OL's here, but it was leading to
  // too many in-article lists being removed. Consider a better
  // way to detect menus particularly and remove them.
-  cleanTags(article, $, cleanConditionally)
+  cleanTags(article, $, cleanConditionally);

  // Remove empty paragraph nodes
-  removeEmpty(article, $)
+  removeEmpty(article, $);

-  return article
+  return article;
 }
    //     headers = doc.xpath('.//h2 | .//h3 | .//h4 | .//h5 | .//h6')
    //     for header in headers:
--- a/src/cleaners/content.test.js
+++ b/src/cleaners/content.test.js
@ -1,32 +1,32 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';

-import extractCleanNode from './content'
-import extractBestNode from 'extractors/generic/content/extract-best-node'
+import extractBestNode from 'extractors/generic/content/extract-best-node';
+import extractCleanNode from './content';

 describe('extractCleanNode(article, { $, cleanConditionally, title } })', () => {
-  it("cleans cruft out of a DOM node", () => {
-    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8')
-    let $ = cheerio.load(html)
+  it('cleans cruft out of a DOM node', () => {
+    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8');
+    const $ = cheerio.load(html);

    const opts = {
-                    stripUnlikelyCandidates: true,
-                    weightNodes: true,
-                    cleanConditionally: true,
-                 }
+      stripUnlikelyCandidates: true,
+      weightNodes: true,
+      cleanConditionally: true,
+    };

-    const bestNode = extractBestNode($, opts)
-    let result = $.html(bestNode)
-    // console.log(result)
-    // console.log(result.length)
-    const cleanNode = extractCleanNode(bestNode, { $, opts })
-    result = $.html(cleanNode)
-    // console.log(result.length)
-    // console.log(result)
-    // console.log(bestNode.html())
+    const bestNode = extractBestNode($, opts);
+    // let result = $.html(bestNode);
+    // // console.log(result)
+    // // console.log(result.length)
+    const cleanNode = extractCleanNode(bestNode, { $, opts });
+    // result = $.html(cleanNode);
+    // // console.log(result.length)
+    // // console.log(result)
+    // // console.log(bestNode.html())

-    assert.equal($(bestNode).text().length, 2687)
-  })
-})
+    assert.equal($(cleanNode).text().length, 2687);
+  });
+});

--- a/src/cleaners/date-published.js
+++ b/src/cleaners/date-published.js
@ -1,4 +1,4 @@
-import moment from 'moment'
+import moment from 'moment';
 // Is there a compelling reason to use moment here?
 // Mostly only being used for the isValid() method,
 // but could just check for 'Invalid Date' string.
@ -7,27 +7,27 @@ import {
  CLEAN_DATE_STRING_RE,
  SPLIT_DATE_STRING,
  TIME_MERIDIAN_SPACE_RE,
-  TIME_MERIDIAN_DOTS_RE
-} from './constants'
+  TIME_MERIDIAN_DOTS_RE,
+} from './constants';
+
+export function cleanDateString(dateString) {
+  return (dateString.match(SPLIT_DATE_STRING) || [])
+                   .join(' ')
+                   .replace(TIME_MERIDIAN_DOTS_RE, 'm')
+                   .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
+                   .replace(CLEAN_DATE_STRING_RE, '$1')
+                   .trim();
+}

 // Take a date published string, and hopefully return a date out of
 // it. Return none if we fail.
 export default function cleanDatePublished(dateString) {
-  let date = moment(new Date(dateString))
+  let date = moment(new Date(dateString));

  if (!date.isValid()) {
-    dateString = cleanDateString(dateString)
-    date = moment(new Date(dateString))
+    dateString = cleanDateString(dateString);
+    date = moment(new Date(dateString));
  }

-  return date.isValid() ? date.toISOString() : null
-}
-
-export function cleanDateString(dateString) {
-  return (dateString.match(SPLIT_DATE_STRING) || [])
-                   .join(' ')
-                   .replace(TIME_MERIDIAN_DOTS_RE, 'm')
-                   .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
-                   .replace(CLEAN_DATE_STRING_RE, '$1')
-                   .trim()
+  return date.isValid() ? date.toISOString() : null;
 }
--- a/src/cleaners/date-published.test.js
+++ b/src/cleaners/date-published.test.js
@ -1,67 +1,62 @@
-import assert from 'assert'
+import assert from 'assert';

 import {
  default as cleanDatePublished,
  cleanDateString,
-} from './date-published'
+} from './date-published';

 describe('cleanDatePublished(dateString)', () => {
  it('returns a date object', () => {
-    const datePublished = cleanDatePublished('published: 1/1/2020')
+    const datePublished = cleanDatePublished('published: 1/1/2020');

    assert.equal(
      datePublished,
      new Date('1/1/2020').toISOString()
-    )
-  })
+    );
+  });

  it('returns null if date is invalid', () => {
-    const datePublished = cleanDatePublished('blargh')
+    const datePublished = cleanDatePublished('blargh');

-    assert.equal(datePublished, null)
-  })
-
-})
+    assert.equal(datePublished, null);
+  });
+});

 describe('cleanDateString(dateString)', () => {
  it('removes "published" text from an datePublished string', () => {
-    const datePublished = cleanDateString('published: 1/1/2020')
+    const datePublished = cleanDateString('published: 1/1/2020');

-    assert.equal(datePublished, '1/1/2020')
-  })
+    assert.equal(datePublished, '1/1/2020');
+  });

  it('trims whitespace', () => {
-    const datePublished = cleanDateString('    1/1/2020     ')
+    const datePublished = cleanDateString('    1/1/2020     ');

-    assert.equal(datePublished, '1/1/2020')
-  })
+    assert.equal(datePublished, '1/1/2020');
+  });

  it('puts a space b/w a time and am/pm', () => {
    // The JS date parser is forgiving, but
    // it needs am/pm separated from a time
-    const date1 = cleanDateString('1/1/2020 8:30am')
-    assert.equal(date1, '1/1/2020 8:30 am')
+    const date1 = cleanDateString('1/1/2020 8:30am');
+    assert.equal(date1, '1/1/2020 8:30 am');

-    const date2 = cleanDateString('8:30PM 1/1/2020')
-    assert.equal(date2, '8:30 PM  1/1/2020')
-  })
+    const date2 = cleanDateString('8:30PM 1/1/2020');
+    assert.equal(date2, '8:30 PM   1/1/2020');
+  });

  it('cleans the dots from a.m. or p.m.', () => {
    // The JS date parser is forgiving, but
    // it needs a.m./p.m. without dots
-    const date1 = cleanDateString('1/1/2020 8:30 a.m.')
-    assert.equal(date1, '1/1/2020 8:30 am')
-  })
+    const date1 = cleanDateString('1/1/2020 8:30 a.m.');
+    assert.equal(date1, '1/1/2020 8:30 am');
+  });

  it('can handle some tough timestamps', () => {
    // The JS date parser is forgiving, but
    // it needs am/pm separated from a time
-    const date1 = cleanDateString('This page was last modified on 15 April 2016, at 10:59.')
-    assert.equal(date1, '15 Apr 2016 10:59')
-
-    const date2 = cleanDateString('8:30PM 1/1/2020')
-    assert.equal(date2, '8:30 PM  1/1/2020')
-  })
-
-})
+    const date1 = cleanDateString('This page was last modified on 15 April 2016, at 10:59.');
+    assert.equal(date1, '15 Apr 2016 10:59');
+  });
+});

--- a/src/cleaners/dek.js
+++ b/src/cleaners/dek.js
@ -1,17 +1,18 @@
-import { TEXT_LINK_RE } from './constants'
-import { stripTags } from 'utils/dom'
+import { stripTags } from 'utils/dom';
+
+import { TEXT_LINK_RE } from './constants';

 // Take a dek HTML fragment, and return the cleaned version of it.
 // Return None if the dek wasn't good enough.
 export default function cleanDek(dek, { $ }) {
  // Sanity check that we didn't get too short or long of a dek.
-  if (dek.length > 1000 || dek.length < 5) return null
+  if (dek.length > 1000 || dek.length < 5) return null;

-  const dekText = stripTags(dek, $)
+  const dekText = stripTags(dek, $);

  // Plain text links shouldn't exist in the dek. If we have some, it's
  // not a good dek - bail.
-  if (TEXT_LINK_RE.test(dekText)) return null
+  if (TEXT_LINK_RE.test(dekText)) return null;

-  return dekText.trim()
+  return dekText.trim();
 }
--- a/src/cleaners/dek.test.js
+++ b/src/cleaners/dek.test.js
@ -1,52 +1,50 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import {
-  default as cleanDek,
-  cleanDekString,
-} from './dek'
+import cleanDek from './dek';

 describe('cleanDek(dekString, { $ })', () => {
  it('returns null if the dek is < 5 chars', () => {
-    const $ = cheerio.load('<div></div>')
-    assert.equal(cleanDek('Hi', { $ }), null)
-  })
+    const $ = cheerio.load('<div></div>');
+    assert.equal(cleanDek('Hi', { $ }), null);
+  });

  it('returns null if the dek is > 1000 chars', () => {
-    const $ = cheerio.load('<div></div>')
+    const $ = cheerio.load('<div></div>');
    const longDek =
      // generate a string that is 1,280 chars
-      [0,1,2,3,4,5,6].reduce((acc, i) =>
-                                    acc += acc, '0123456789'
-                                  )
-    assert.equal(cleanDek(longDek, { $ }), null)
-  })
+      [0, 1, 2, 3, 4, 5, 6].reduce((acc) => {
+        acc += acc;
+        return acc;
+      }, '0123456789');
+    assert.equal(cleanDek(longDek, { $ }), null);
+  });

  it('strip html tags from the dek', () => {
-    const $ = cheerio.load('<div></div>')
-    const dek = 'This is a <em>very</em> important dek.'
+    const $ = cheerio.load('<div></div>');
+    const dek = 'This is a <em>very</em> important dek.';

-    assert.equal(cleanDek(dek, { $ }), 'This is a very important dek.')
-  })
+    assert.equal(cleanDek(dek, { $ }), 'This is a very important dek.');
+  });

  it('returns null if dek contains plain text link', () => {
-    const $ = cheerio.load('<div></div>')
-    const dek = 'This has this link http://example.com/foo/bar'
+    const $ = cheerio.load('<div></div>');
+    const dek = 'This has this link http://example.com/foo/bar';

-    assert.equal(cleanDek(dek, { $ }), null)
-  })
+    assert.equal(cleanDek(dek, { $ }), null);
+  });

  it('returns a normal dek as is', () => {
-    const $ = cheerio.load('<div></div>')
-    const dek = 'This is the dek'
+    const $ = cheerio.load('<div></div>');
+    const dek = 'This is the dek';

-    assert.equal(cleanDek(dek, { $ }), dek)
-  })
+    assert.equal(cleanDek(dek, { $ }), dek);
+  });

  it('cleans extra whitespace', () => {
-    const $ = cheerio.load('<div></div>')
-    const dek = '    This is the dek   '
+    const $ = cheerio.load('<div></div>');
+    const dek = '    This is the dek   ';

-    assert.equal(cleanDek(dek, { $ }), 'This is the dek')
-  })
-})
+    assert.equal(cleanDek(dek, { $ }), 'This is the dek');
+  });
+});
--- a/src/cleaners/fixtures/html.js
+++ b/src/cleaners/fixtures/html.js
@ -1,5 +1,5 @@
 const HTML = {
-  docWithH1: `<div><h1>This Is the Real Title</h1></div>`,
+  docWithH1: '<div><h1>This Is the Real Title</h1></div>',
  docWith2H1s: `
    <div>
      <h1>This Is the Real Title</h1>
@ -7,9 +7,9 @@ const HTML = {
    </div>
  `,
  docWithTagsInH1: {
-    before: `<div><h1>This Is the <em>Real</em> Title</h1></div>`,
-    after: `This Is the Real Title`
+    before: '<div><h1>This Is the <em>Real</em> Title</h1></div>',
+    after: 'This Is the Real Title',
  },
-}
+};

-export default HTML
+export default HTML;
--- a/src/cleaners/index.js
+++ b/src/cleaners/index.js
@ -1,9 +1,9 @@
-import cleanAuthor from './author'
-import cleanImage from './lead-image-url'
-import cleanDek from './dek'
-import cleanDatePublished from './date-published'
-import cleanContent from './content'
-import cleanTitle from './title'
+import cleanAuthor from './author';
+import cleanImage from './lead-image-url';
+import cleanDek from './dek';
+import cleanDatePublished from './date-published';
+import cleanContent from './content';
+import cleanTitle from './title';

 const Cleaners = {
  author: cleanAuthor,
@ -12,15 +12,15 @@ const Cleaners = {
  datePublished: cleanDatePublished,
  content: cleanContent,
  title: cleanTitle,
-}
+};


-export default Cleaners
+export default Cleaners;

-export { cleanAuthor }
-export { cleanImage }
-export { cleanDek }
-export { cleanDatePublished }
-export { cleanContent }
-export { cleanTitle }
-export { default as resolveSplitTitle } from './resolve-split-title'
+export { cleanAuthor };
+export { cleanImage };
+export { cleanDek };
+export { cleanDatePublished };
+export { cleanContent };
+export { cleanTitle };
+export { default as resolveSplitTitle } from './resolve-split-title';
--- a/src/cleaners/lead-image-url.js
+++ b/src/cleaners/lead-image-url.js
@ -1,10 +1,10 @@
-import validUrl from 'valid-url'
+import validUrl from 'valid-url';

 export default function clean(leadImageUrl) {
-  leadImageUrl = leadImageUrl.trim()
+  leadImageUrl = leadImageUrl.trim();
  if (validUrl.isWebUri(leadImageUrl)) {
-    return leadImageUrl
-  } else {
-    return null
+    return leadImageUrl;
  }
+
+  return null;
 }
--- a/src/cleaners/lead-image-url.test.js
+++ b/src/cleaners/lead-image-url.test.js
@ -1,20 +1,20 @@
-import assert from 'assert'
+import assert from 'assert';

-import clean from './lead-image-url'
+import clean from './lead-image-url';

 describe('clean(leadImageUrl)', () => {
  it('returns the url if valid', () => {
-    const url = 'https://example.com'
-    assert.equal(clean(url), url)
-  })
+    const url = 'https://example.com';
+    assert.equal(clean(url), url);
+  });

  it('returns null if the url is not valid', () => {
-    const url = 'this is not a valid url'
-    assert.equal(clean(url), null)
-  })
+    const url = 'this is not a valid url';
+    assert.equal(clean(url), null);
+  });

  it('trims whitespace', () => {
-    const url = '  https://example.com/foo/bar.jpg'
-    assert.equal(clean(url), url.trim())
-  })
-})
+    const url = '  https://example.com/foo/bar.jpg';
+    assert.equal(clean(url), url.trim());
+  });
+});
--- a/src/cleaners/resolve-split-title.js
+++ b/src/cleaners/resolve-split-title.js
@ -1,34 +1,11 @@
-import URL from 'url'
-import 'babel-polyfill'
-import wuzzy from 'wuzzy'
+import URL from 'url';
+import 'babel-polyfill';
+import wuzzy from 'wuzzy';

 import {
  TITLE_SPLITTERS_RE,
  DOMAIN_ENDINGS_RE,
-} from './constants'
-
-// Given a title with separators in it (colons, dashes, etc),
-// resolve whether any of the segments should be removed.
-export default function resolveSplitTitle(title, url='') {
-  // Splits while preserving splitters, like:
-  // ['The New New York', ' - ', 'The Washington Post']
-  title = title
-
-  let splitTitle = title.split(TITLE_SPLITTERS_RE)
-  if (splitTitle.length === 1) {
-    return title
-  }
-
-  let newTitle = extractBreadcrumbTitle(splitTitle, title)
-  if (newTitle) return newTitle
-
-  newTitle = cleanDomainFromTitle(splitTitle, url)
-  if (newTitle) return newTitle
-
-  // Fuzzy ratio didn't find anything, so this title is probably legit.
-  // Just return it all.
-  return title
-}
+} from './constants';

 function extractBreadcrumbTitle(splitTitle, text) {
  // This must be a very breadcrumbed title, like:
@ -38,40 +15,40 @@ function extractBreadcrumbTitle(splitTitle, text) {
    // Look to see if we can find a breadcrumb splitter that happens
    // more than once. If we can, we'll be able to better pull out
    // the title.
-    const termCounts = splitTitle.reduce((acc, text) => {
-      acc[text] = acc[text] ? acc[text] + 1 : 1
-      return acc
-    }, {})
+    const termCounts = splitTitle.reduce((acc, titleText) => {
+      acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
+      return acc;
+    }, {});

    const [maxTerm, termCount] =
      Reflect.ownKeys(termCounts)
             .reduce((acc, key) => {
               if (acc[1] < termCounts[key]) {
-                 return [key, termCounts[key]]
-               } else {
-                 return acc
+                 return [key, termCounts[key]];
               }
-             }, [0, 0])
+
+               return acc;
+             }, [0, 0]);

    // We found a splitter that was used more than once, so it
    // is probably the breadcrumber. Split our title on that instead.
    // Note: max_term should be <= 4 characters, so that " >> "
    // will match, but nothing longer than that.
    if (termCount >= 2 && maxTerm.length <= 4) {
-      splitTitle = text.split(maxTerm)
+      splitTitle = text.split(maxTerm);
    }

-    const splitEnds = [splitTitle[0], splitTitle.slice(-1)]
-    const longestEnd = splitEnds.reduce((acc, end) => {
-      return acc.length > end.length ? acc : end
-    }, '')
+    const splitEnds = [splitTitle[0], splitTitle.slice(-1)];
+    const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');

    if (longestEnd.length > 10) {
-      return longestEnd
-    } else {
-      return text
+      return longestEnd;
    }
+
+    return text;
  }
+
+  return null;
 }

 function cleanDomainFromTitle(splitTitle, url) {
@ -81,20 +58,43 @@ function cleanDomainFromTitle(splitTitle, url) {
  //
  // Strip out the big TLDs - it just makes the matching a bit more
  // accurate. Not the end of the world if it doesn't strip right.
-  const { host } = URL.parse(url)
-  const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '')
+  const { host } = URL.parse(url);
+  const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');

-  const startSlug = splitTitle[0].toLowerCase().replace(' ', '')
-  const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain)
+  const startSlug = splitTitle[0].toLowerCase().replace(' ', '');
+  const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);

-  if (startSlugRatio > .4 && startSlug.length > 5) {
-    return splitTitle.slice(2).join('')
+  if (startSlugRatio > 0.4 && startSlug.length > 5) {
+    return splitTitle.slice(2).join('');
  }

-  const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '')
-  const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain)
+  const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
+  const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);

-  if (endSlugRatio > .4 && endSlug.length >= 5) {
-    return splitTitle.slice(0, -2).join('')
+  if (endSlugRatio > 0.4 && endSlug.length >= 5) {
+    return splitTitle.slice(0, -2).join('');
  }
+
+  return null;
+}
+
+// Given a title with separators in it (colons, dashes, etc),
+// resolve whether any of the segments should be removed.
+export default function resolveSplitTitle(title, url = '') {
+  // Splits while preserving splitters, like:
+  // ['The New New York', ' - ', 'The Washington Post']
+  const splitTitle = title.split(TITLE_SPLITTERS_RE);
+  if (splitTitle.length === 1) {
+    return title;
+  }
+
+  let newTitle = extractBreadcrumbTitle(splitTitle, title);
+  if (newTitle) return newTitle;
+
+  newTitle = cleanDomainFromTitle(splitTitle, url);
+  if (newTitle) return newTitle;
+
+  // Fuzzy ratio didn't find anything, so this title is probably legit.
+  // Just return it all.
+  return title;
 }
--- a/src/cleaners/resolve-split-title.test.js
+++ b/src/cleaners/resolve-split-title.test.js
@ -1,32 +1,31 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';

-import { resolveSplitTitle } from './index'
+import { resolveSplitTitle } from './index';

 describe('resolveSplitTitle(text)', () => {
  it('does nothing if title not splittable', () => {
-    const title = "This Is a Normal Title"
+    const title = 'This Is a Normal Title';

-    assert.equal(resolveSplitTitle(title), title)
-  })
+    assert.equal(resolveSplitTitle(title), title);
+  });

  it('extracts titles from breadcrumb-like titles', () => {
-    const title = "The Best Gadgets on Earth : Bits : Blogs : NYTimes.com"
+    const title = 'The Best Gadgets on Earth : Bits : Blogs : NYTimes.com';

-    assert.equal(resolveSplitTitle(title), "The Best Gadgets on Earth ")
-  })
+    assert.equal(resolveSplitTitle(title), 'The Best Gadgets on Earth ');
+  });

  it('cleans domains from titles at the front', () => {
-    const title = "NYTimes - The Best Gadgets on Earth"
-    const url = "https://www.nytimes.com/bits/blog/etc/"
+    const title = 'NYTimes - The Best Gadgets on Earth';
+    const url = 'https://www.nytimes.com/bits/blog/etc/';

-    assert.equal(resolveSplitTitle(title, url), "The Best Gadgets on Earth")
-  })
+    assert.equal(resolveSplitTitle(title, url), 'The Best Gadgets on Earth');
+  });

  it('cleans domains from titles at the back', () => {
-    const title = "The Best Gadgets on Earth | NYTimes"
-    const url = "https://www.nytimes.com/bits/blog/etc/"
+    const title = 'The Best Gadgets on Earth | NYTimes';
+    const url = 'https://www.nytimes.com/bits/blog/etc/';

-    assert.equal(resolveSplitTitle(title, url), "The Best Gadgets on Earth")
-  })
-})
+    assert.equal(resolveSplitTitle(title, url), 'The Best Gadgets on Earth');
+  });
+});
--- a/src/cleaners/title.js
+++ b/src/cleaners/title.js
@ -1,25 +1,26 @@
-import { TITLE_SPLITTERS_RE } from './constants'
-import { resolveSplitTitle } from './index'
-import { stripTags } from 'utils/dom'
+import { stripTags } from 'utils/dom';
+
+import { TITLE_SPLITTERS_RE } from './constants';
+import { resolveSplitTitle } from './index';

 export default function cleanTitle(title, { url, $ }) {
  // If title has |, :, or - in it, see if
  // we can clean it up.
  if (TITLE_SPLITTERS_RE.test(title)) {
-    title = resolveSplitTitle(title, url)
+    title = resolveSplitTitle(title, url);
  }

  // Final sanity check that we didn't get a crazy title.
  // if (title.length > 150 || title.length < 15) {
  if (title.length > 150) {
    // If we did, return h1 from the document if it exists
-    const h1 = $('h1')
+    const h1 = $('h1');
    if (h1.length === 1) {
-      title = h1.text()
+      title = h1.text();
    }
  }

  // strip any html tags in the title text
-  return stripTags(title, $).trim()
+  return stripTags(title, $).trim();
 }

--- a/src/cleaners/title.test.js
+++ b/src/cleaners/title.test.js
@ -1,8 +1,8 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
-import { cleanTitle } from './index'
+import HTML from './fixtures/html';
+import { cleanTitle } from './index';

 describe('cleanTitle(title, { url, $ })', () => {
  it('uses a single h1 if the title is too short or too long', () => {
@ -10,28 +10,27 @@ describe('cleanTitle(title, { url, $ })', () => {
    // const $ = cheerio.load(HTML.docWithH1)
    //
    // assert.equal(cleanTitle(title, { url: '', $ }), $('h1').text())
-  })
+  });

  it('only uses h1 if there is only one on the page', () => {
-    const title = "Too Short"
-    const $ = cheerio.load(HTML.docWith2H1s)
+    const title = 'Too Short';
+    const $ = cheerio.load(HTML.docWith2H1s);

-    assert.equal(cleanTitle(title, { url: '', $ }), title)
-  })
+    assert.equal(cleanTitle(title, { url: '', $ }), title);
+  });

  it('removes HTML tags from titles', () => {
-    const $ = cheerio.load(HTML.docWithTagsInH1.before)
-    const title = $('h1').html()
+    const $ = cheerio.load(HTML.docWithTagsInH1.before);
+    const title = $('h1').html();

-    assert.equal(cleanTitle(title, { url: '', $ }), HTML.docWithTagsInH1.after)
-  })
+    assert.equal(cleanTitle(title, { url: '', $ }), HTML.docWithTagsInH1.after);
+  });

  it('trims extraneous spaces', () => {
-    const title = " This Is a Great Title That You'll Love "
-    const $ = cheerio.load(HTML.docWithTagsInH1.before)
+    const title = " This Is a Great Title That You'll Love ";
+    const $ = cheerio.load(HTML.docWithTagsInH1.before);

-    assert.equal(cleanTitle(title, { url: '', $ }), title.trim())
-  })
-
-})
+    assert.equal(cleanTitle(title, { url: '', $ }), title.trim());
+  });
+});

--- a/src/extractors/all.js
+++ b/src/extractors/all.js
@ -1,12 +1,11 @@
-import GenericExtractor from './generic'
-import NYMagExtractor from './custom/nymag.com'
-import BloggerExtractor from './custom/blogspot.com'
-import WikipediaExtractor from './custom/wikipedia.org'
+import NYMagExtractor from './custom/nymag.com';
+import BloggerExtractor from './custom/blogspot.com';
+import WikipediaExtractor from './custom/wikipedia.org';

 const Extractors = {
  'nymag.com': NYMagExtractor,
  'blogspot.com': BloggerExtractor,
  'wikipedia.org': WikipediaExtractor,
-}
+};

-export default Extractors
+export default Extractors;
--- a/src/extractors/constants.js
+++ b/src/extractors/constants.js
@ -1 +1 @@
-export const ATTR_RE = /\[([\w-]+)\]/
+export const ATTR_RE = /\[([\w-]+)\]/;
--- a/src/extractors/custom/blogspot.com/index.js
+++ b/src/extractors/custom/blogspot.com/index.js
@ -14,27 +14,27 @@ const BloggerExtractor = {

    // Convert the noscript tag to a div
    transforms: {
-      'noscript': 'div'
+      noscript: 'div',
    },
  },

  author: {
    selectors: [
-      '.post-author-name'
-    ]
+      '.post-author-name',
+    ],
  },

  title: {
    selectors: [
      'h2.title',
-    ]
+    ],
  },

  datePublished: {
    selectors: [
      'span.publishdate',
-    ]
-  }
-}
+    ],
+  },
+};

-export default BloggerExtractor
+export default BloggerExtractor;
--- a/src/extractors/custom/nymag.com/index.js
+++ b/src/extractors/custom/nymag.com/index.js
@ -22,37 +22,39 @@ const NYMagExtractor = {
    // the transformation.
    transforms: {
      // Convert h1s to h2s
-      'h1': 'h2',
+      h1: 'h2',

      // Convert lazy-loaded noscript images to figures
-      'noscript': ($node) => {
-        const $children = $node.children()
+      noscript: ($node) => {
+        const $children = $node.children();
        if ($children.length === 1 && $children.get(0).tagName === 'img') {
-          return 'figure'
+          return 'figure';
        }
-      }
-    }
+
+        return null;
+      },
+    },
  },

  title: {
    selectors: [
      'h1.headline-primary',
      'h1',
-    ]
+    ],
  },

  author: {
    selectors: [
      '.by-authors',
-    ]
+    ],
  },

  datePublished: {
    selectors: [
      'time.article-timestamp[datetime]',
      'time.article-timestamp',
-    ]
-  }
-}
+    ],
+  },
+};

-export default NYMagExtractor
+export default NYMagExtractor;
--- a/src/extractors/custom/wikipedia.org/index.js
+++ b/src/extractors/custom/wikipedia.org/index.js
@ -8,7 +8,7 @@ const WikipediaExtractor = {
    // transform top infobox to an image with caption
    transforms: {
      '.infobox img': ($node) => {
-        $node.parents('.infobox').prepend($node)
+        $node.parents('.infobox').prepend($node);
      },
      '.infobox caption': 'figcaption',
      '.infobox': 'figure',
@ -28,15 +28,15 @@ const WikipediaExtractor = {
  title: {
    selectors: [
      'h2.title',
-    ]
+    ],
  },

  datePublished: {
    selectors: [
      '#footer-info-lastmod',
-    ]
+    ],
  },

-}
+};

-export default WikipediaExtractor
+export default WikipediaExtractor;
--- a/src/extractors/generic/author/constants.js
+++ b/src/extractors/generic/author/constants.js
@ -5,22 +5,22 @@
 // Note: "author" is too often the -developer- of the page, so it is not
 // added here.
 export const AUTHOR_META_TAGS = [
-    'byl',
-    'clmst',
-    'dc.author',
-    'dcsext.author',
-    'dc.creator',
-    'rbauthors',
-    'authors',
-]
+  'byl',
+  'clmst',
+  'dc.author',
+  'dcsext.author',
+  'dc.creator',
+  'rbauthors',
+  'authors',
+];

-export const AUTHOR_MAX_LENGTH = 300
+export const AUTHOR_MAX_LENGTH = 300;

 // An ordered list of XPath Selectors to find likely article authors. From
 // most explicit to least explicit.
 //
 // Note - this does not use classes like CSS. This checks to see if the string
-// exists in the className, which is not as accurate as .className (which 
+// exists in the className, which is not as accurate as .className (which
 // splits on spaces/endlines), but for our purposes it's close enough. The
 // speed tradeoff is worth the accuracy hit.
 export const AUTHOR_SELECTORS = [
@ -47,12 +47,12 @@ export const AUTHOR_SELECTORS = [
  '.articleauthor',
  '.ArticleAuthor',
  '.byline',
-]
+];

 // An ordered list of Selectors to find likely article authors, with
 // regular expression for content.
-const byline_re = /^[\n\s]*By/i
+const bylineRe = /^[\n\s]*By/i;
 export const BYLINE_SELECTORS_RE = [
-  ['#byline', byline_re],
-  ['.byline', byline_re],
-]
+  ['#byline', bylineRe],
+  ['.byline', bylineRe],
+];
--- a/src/extractors/generic/author/extractor.js
+++ b/src/extractors/generic/author/extractor.js
@ -1,49 +1,48 @@
+import { cleanAuthor } from 'cleaners';
+import {
+  extractFromMeta,
+  extractFromSelectors,
+} from 'utils/dom';
+
 import {
  AUTHOR_META_TAGS,
  AUTHOR_MAX_LENGTH,
  AUTHOR_SELECTORS,
  BYLINE_SELECTORS_RE,
-} from './constants'
-
-import { cleanAuthor } from 'cleaners'
-
-import {
-  extractFromMeta,
-  extractFromSelectors
-} from 'utils/dom'
+} from './constants';

 const GenericAuthorExtractor = {
  extract({ $, metaCache }) {
-    let author
+    let author;

    // First, check to see if we have a matching
    // meta tag that we can make use of.
-    author = extractFromMeta($, AUTHOR_META_TAGS, metaCache)
+    author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
    if (author && author.length < AUTHOR_MAX_LENGTH) {
-      return cleanAuthor(author)
+      return cleanAuthor(author);
    }

    // Second, look through our selectors looking for potential authors.
-    author = extractFromSelectors($, AUTHOR_SELECTORS, 2)
+    author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
    if (author && author.length < AUTHOR_MAX_LENGTH) {
-      return cleanAuthor(author)
+      return cleanAuthor(author);
    }

    // Last, use our looser regular-expression based selectors for
    // potential authors.
    for (const [selector, regex] of BYLINE_SELECTORS_RE) {
-      const node = $(selector)
+      const node = $(selector);
      if (node.length === 1) {
-        const text = node.text()
+        const text = node.text();
        if (regex.test(text)) {
-          return cleanAuthor(text)
+          return cleanAuthor(text);
        }
      }
    }

-    return null
-  }
-}
+    return null;
+  },
+};

-export default GenericAuthorExtractor
+export default GenericAuthorExtractor;

--- a/src/extractors/generic/author/extractor.test.js
+++ b/src/extractors/generic/author/extractor.test.js
@ -1,46 +1,46 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
-import GenericAuthorExtractor from './extractor'
+import HTML from './fixtures/html';
+import GenericAuthorExtractor from './extractor';

 describe('GenericAuthorExtractor', () => {
  describe('extract($, cachedMeta)', () => {
    it('extracts author from meta tags', () => {
-      const $ = cheerio.load(HTML.authorMeta.test)
+      const $ = cheerio.load(HTML.authorMeta.test);
      const result = GenericAuthorExtractor.extract(
-        { $, metaCache: ["dc.author", "something-else"] }
-      )
+        { $, metaCache: ['dc.author', 'something-else'] }
+      );

-      assert.equal(result, HTML.authorMeta.result)
-    })
+      assert.equal(result, HTML.authorMeta.result);
+    });

    it('extracts author from author selectors', () => {
-      const $ = cheerio.load(HTML.authorSelectors.test)
+      const $ = cheerio.load(HTML.authorSelectors.test);
      const result = GenericAuthorExtractor.extract(
-        { $, metaCache: ["dc.author", "something-else"] }
-      )
+        { $, metaCache: ['dc.author', 'something-else'] }
+      );

-      assert.equal(result, HTML.authorSelectors.result)
-    })
+      assert.equal(result, HTML.authorSelectors.result);
+    });

    it('extracts author with regex selectors', () => {
-      const $ = cheerio.load(HTML.authorRegSelectors.test)
+      const $ = cheerio.load(HTML.authorRegSelectors.test);
      const result = GenericAuthorExtractor.extract(
-        { $, metaCache: ["dc.author", "something-else"] }
-      )
+        { $, metaCache: ['dc.author', 'something-else'] }
+      );

-      assert.equal(result, HTML.authorRegSelectors.result)
-    })
+      assert.equal(result, HTML.authorRegSelectors.result);
+    });

    it('returns null if no author found', () => {
-      const $ = cheerio.load('<div></div>')
+      const $ = cheerio.load('<div></div>');
      const result = GenericAuthorExtractor.extract(
-        { $, metaCache: ["dc.author", "something-else"] }
-      )
+        { $, metaCache: ['dc.author', 'something-else'] }
+      );

-      assert.equal(result, null)
-    })
-  })
-})
+      assert.equal(result, null);
+    });
+  });
+});

--- a/src/extractors/generic/author/fixtures/html.js
+++ b/src/extractors/generic/author/fixtures/html.js
@ -5,7 +5,7 @@ const HTML = {
        <meta name="dc.author" value="Adam" />
      </html>
    `,
-    result: `Adam`
+    result: 'Adam',
  },
  authorSelectors: {
    test: `
@ -15,7 +15,7 @@ const HTML = {
        </div>
      </div>
    `,
-    result: `Adam`
+    result: 'Adam',
  },
  authorRegSelectors: {
    test: `
@ -25,8 +25,8 @@ const HTML = {
        </div>
      </div>
    `,
-    result: `Adam`
+    result: 'Adam',
  },
-}
+};

-export default HTML
+export default HTML;
--- a/src/extractors/generic/content/extract-best-node.js
+++ b/src/extractors/generic/content/extract-best-node.js
@ -1,11 +1,12 @@
-import {
-  scoreContent,
-  findTopCandidate,
-} from './scoring'
 import {
  stripUnlikelyCandidates,
  convertToParagraphs,
-} from 'utils/dom'
+} from 'utils/dom';
+
+import {
+  scoreContent,
+  findTopCandidate,
+} from './scoring';

 // Using a variety of scoring techniques, extract the content most
 // likely to be article text.
@ -26,12 +27,12 @@ export default function extractBestNode($, opts) {


  if (opts.stripUnlikelyCandidates) {
-    $ = stripUnlikelyCandidates($)
+    $ = stripUnlikelyCandidates($);
  }

-  $ = convertToParagraphs($)
-  $ = scoreContent($, opts.weightNodes)
-  const $topCandidate = findTopCandidate($)
+  $ = convertToParagraphs($);
+  $ = scoreContent($, opts.weightNodes);
+  const $topCandidate = findTopCandidate($);

-  return $topCandidate
+  return $topCandidate;
 }
--- a/src/extractors/generic/content/extract-best-node.test.js
+++ b/src/extractors/generic/content/extract-best-node.test.js
@ -1,24 +1,26 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';

 // import HTML from './fixtures/html'

-import extractBestNode from './extract-best-node'
+import extractBestNode from './extract-best-node';

 describe('extractBestNode($, flags)', () => {
-  it("scores the dom nodes and returns the best option", () => {
-    const html = fs.readFileSync('./fixtures/latimes.html', 'utf-8')
+  it('scores the dom nodes and returns the best option', () => {
+    const html = fs.readFileSync('./fixtures/latimes.html', 'utf-8');
    const opts = {
-                    stripUnlikelyCandidates: true,
-                    weightNodes: true,
-                 }
+      stripUnlikelyCandidates: true,
+      weightNodes: true,
+    };

-    let $ = cheerio.load(html)
+    const $ = cheerio.load(html);

-    const bestNode = extractBestNode($, opts)
+    const bestNode = extractBestNode($, opts);
+
+    assert(typeof bestNode, 'object');
    // console.log(bestNode.html())

    // assert.equal($(bestNode).text().length, 3652)
-  })
-})
+  });
+});
--- a/src/extractors/generic/content/extractor.js
+++ b/src/extractors/generic/content/extractor.js
@ -1,10 +1,11 @@
-import cheerio from 'cheerio'
-import 'babel-polyfill'
+import cheerio from 'cheerio';
+import 'babel-polyfill';

-import extractBestNode from './extract-best-node'
-import { nodeIsSufficient } from 'utils/dom'
-import { cleanContent } from 'cleaners'
-import { normalizeSpaces } from 'utils/text'
+import { nodeIsSufficient } from 'utils/dom';
+import { cleanContent } from 'cleaners';
+import { normalizeSpaces } from 'utils/text';
+
+import extractBestNode from './extract-best-node';

 const GenericContentExtractor = {
  defaultOpts: {
@ -33,46 +34,44 @@ const GenericContentExtractor = {
  // cleanConditionally: Clean the node to return of some
  // superfluous content. Things like forms, ads, etc.
  extract({ $, html, title, url }, opts) {
-    opts = { ...this.defaultOpts, ...opts }
+    opts = { ...this.defaultOpts, ...opts };

-    $ = $ || cheerio.load(html)
+    $ = $ || cheerio.load(html);

    // Cascade through our extraction-specific opts in an ordered fashion,
    // turning them off as we try to extract content.
-    let node = this.getContentNode($, title, url, opts)
+    let node = this.getContentNode($, title, url, opts);

    if (nodeIsSufficient(node)) {
-      return this.cleanAndReturnNode(node, $)
-    } else {
-      // We didn't succeed on first pass, one by one disable our
-      // extraction opts and try again.
-      for (const key of Reflect.ownKeys(opts).filter(key => opts[key] === true)) {
-        opts[key] = false
-        $ = cheerio.load(html)
-
-        node = this.getContentNode($, title, url, opts)
-
-        if (nodeIsSufficient(node)) {
-          break
-        }
-      }
+      return this.cleanAndReturnNode(node, $);
+    }

-      return this.cleanAndReturnNode(node, $)
+    // We didn't succeed on first pass, one by one disable our
+    // extraction opts and try again.
+    for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {
+      opts[key] = false;
+      $ = cheerio.load(html);
+
+      node = this.getContentNode($, title, url, opts);
+
+      if (nodeIsSufficient(node)) {
+        break;
+      }
    }

-    return this.cleanAndReturnNode(node, $)
+    return this.cleanAndReturnNode(node, $);
  },

  // Get node given current options
  getContentNode($, title, url, opts) {
    return cleanContent(
              extractBestNode($, opts),
-              {
-                $,
-                cleanConditionally: opts.cleanConditionally,
-                title,
-                url,
-              })
+      {
+        $,
+        cleanConditionally: opts.cleanConditionally,
+        title,
+        url,
+      });
  },

  // Once we got here, either we're at our last-resort node, or
@ -80,10 +79,10 @@ const GenericContentExtractor = {
  // move forward.
  cleanAndReturnNode(node, $) {
    if (!node) {
-      return null
+      return null;
    }

-    return normalizeSpaces($.html(node))
+    return normalizeSpaces($.html(node));

    // if return_type == "html":
    //     return normalize_spaces(node_to_html(node))
@ -91,6 +90,6 @@ const GenericContentExtractor = {
    //     return node
  },

-}
+};

-export default GenericContentExtractor
+export default GenericContentExtractor;
--- a/src/extractors/generic/content/extractor.test.js
+++ b/src/extractors/generic/content/extractor.test.js
@ -1,16 +1,15 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import fs from 'fs';

-import { clean } from 'test-helpers'
+import { clean } from 'test-helpers';

-import GenericContentExtractor from './extractor'
+import GenericContentExtractor from './extractor';

-describe('GenericContentExtractor', function() {
-  this.timeout(1000000)
+describe('GenericContentExtractor', function () {
+  this.timeout(1000000);
  describe('extract($, html, opts)', () => {
-    it("extracts html and returns the article", () => {
-      const html = fs.readFileSync('./fixtures/vulture.html', 'utf-8')
+    it('extracts html and returns the article', () => {
+      const html = fs.readFileSync('./fixtures/vulture.html', 'utf-8');

      // Array.from(range(1, 100)).map((i) => {
      //   console.log(i)
@ -20,15 +19,10 @@ describe('GenericContentExtractor', function() {
      // })
      const result = clean(GenericContentExtractor.extract(
        { $: null, html, url: 'http://www.vulture.com/2016/08/dc-comics-greg-berlanti-c-v-r.html' }
-      ))
-      // console.log(result)
-    })
-  })
-})
-
+      ));

-function* range(start = 1, end = 1) {
-  while (start <= end) {
-    yield start++
-  }
-}
+      assert(typeof result, 'string');
+      // console.log(result)
+    });
+  });
+});
--- a/src/extractors/generic/content/scoring/add-score.js
+++ b/src/extractors/generic/content/scoring/add-score.js
@ -1,15 +1,15 @@
 import {
  getOrInitScore,
  setScore,
-} from './index'
+} from './index';

 export default function addScore($node, $, amount) {
  try {
-    const score = getOrInitScore($node, $) + amount
-    setScore($node, $, score)
-  } catch(e) {
-    console.debug(e)
-  } finally {
-    return $node
+    const score = getOrInitScore($node, $) + amount;
+    setScore($node, $, score);
+  } catch (e) {
+    // Ignoring; error occurs in scoreNode
  }
+
+  return $node;
 }
--- a/src/extractors/generic/content/scoring/add-score.test.js
+++ b/src/extractors/generic/content/scoring/add-score.test.js
@ -1,28 +1,27 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 import {
  addScore,
  getScore,
-} from './index'
+} from './index';

 describe('Scoring utils', () => {
  describe('addScore(node, $, amount)', () => {
-    it(`adds the specified amount to a node's score`, () => {
-      const $ = cheerio.load('<p score="25">Foo</p>')
-      let $node = $('p').first()
+    it('adds the specified amount to a node\'s score', () => {
+      const $ = cheerio.load('<p score="25">Foo</p>');
+      let $node = $('p').first();

-      $node = addScore($node, $, 25)
-      assert.equal(getScore($node), 50)
-    })
+      $node = addScore($node, $, 25);
+      assert.equal(getScore($node), 50);
+    });

-    it(`adds score if score not yet set (assumes score is 0)`, () => {
-      const $ = cheerio.load('<p>Foo</p>')
-      let $node = $('p').first()
+    it('adds score if score not yet set (assumes score is 0)', () => {
+      const $ = cheerio.load('<p>Foo</p>');
+      let $node = $('p').first();

-      $node = addScore($node, $, 25)
-      assert.equal(getScore($node), 25)
-    })
-
-  })
-})
+      $node = addScore($node, $, 25);
+      assert.equal(getScore($node), 25);
+    });
+  });
+});
--- a/src/extractors/generic/content/scoring/add-to-parent.js
+++ b/src/extractors/generic/content/scoring/add-to-parent.js
@ -1,11 +1,11 @@
-import { addScore } from './index'
+import { addScore } from './index';

 // Adds 1/4 of a child's score to its parent
 export default function addToParent(node, $, score) {
-  const parent = node.parent()
+  const parent = node.parent();
  if (parent) {
-    addScore(parent, $, score * .25)
+    addScore(parent, $, score * 0.25);
  }

-  return node
+  return node;
 }
--- a/src/extractors/generic/content/scoring/add-to-parent.test.js
+++ b/src/extractors/generic/content/scoring/add-to-parent.test.js
@ -1,24 +1,23 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 import {
  addToParent,
  getScore,
-} from './index'
+} from './index';

 describe('Scoring utils', () => {
  describe('addToParent(node, $, amount)', () => {
-    it(`adds 1/4 of a node's score it its parent`, () => {
-      const html = '<div score="25"><p score="40">Foo</p></div>'
-      const $ = cheerio.load(html)
-      let $node = $('p').first()
+    it('adds 1/4 of a node\'s score it its parent', () => {
+      const html = '<div score="25"><p score="40">Foo</p></div>';
+      const $ = cheerio.load(html);
+      let $node = $('p').first();

-      $node = addToParent($node, $, 40)
+      $node = addToParent($node, $, 40);

-      assert.equal(getScore($node.parent()), 35)
-      assert.equal(getScore($node), 40)
-    })
-  })
-
-})
+      assert.equal(getScore($node.parent()), 35);
+      assert.equal(getScore($node), 40);
+    });
+  });
+});

--- a/src/extractors/generic/content/scoring/constants.js
+++ b/src/extractors/generic/content/scoring/constants.js
@ -1,49 +1,49 @@
-//// CONTENT FETCHING CONSTANTS ////
+// // CONTENT FETCHING CONSTANTS ////

 // A list of strings that can be considered unlikely candidates when
 // extracting content from a resource. These strings are joined together
 // and then tested for existence using re:test, so may contain simple,
 // non-pipe style regular expression queries if necessary.
 export const UNLIKELY_CANDIDATES_BLACKLIST = [
-    'ad-break',
-    'adbox',
-    'advert',
-    'addthis',
-    'agegate',
-    'aux',
-    'blogger-labels',
-    'combx',
-    'comment',
-    'conversation',
-    'disqus',
-    'entry-unrelated',
-    'extra',
-    'foot',
-    'form',
-    'header',
-    'hidden',
-    'loader',
-    'login',                     // Note: This can hit 'blogindex'.
-    'menu',
-    'meta',
-    'nav',
-    'pager',
-    'pagination',
-    'predicta',                  // readwriteweb inline ad box
-    'presence_control_external', // lifehacker.com container full of false positives
-    'popup',
-    'printfriendly',
-    'related',
-    'remove',
-    'remark',
-    'rss',
-    'share',
-    'shoutbox',
-    'sidebar',
-    'sociable',
-    'sponsor',
-    'tools'
-]
+  'ad-break',
+  'adbox',
+  'advert',
+  'addthis',
+  'agegate',
+  'aux',
+  'blogger-labels',
+  'combx',
+  'comment',
+  'conversation',
+  'disqus',
+  'entry-unrelated',
+  'extra',
+  'foot',
+  'form',
+  'header',
+  'hidden',
+  'loader',
+  'login',                     // Note: This can hit 'blogindex'.
+  'menu',
+  'meta',
+  'nav',
+  'pager',
+  'pagination',
+  'predicta',                  // readwriteweb inline ad box
+  'presence_control_external', // lifehacker.com container full of false positives
+  'popup',
+  'printfriendly',
+  'related',
+  'remove',
+  'remark',
+  'rss',
+  'share',
+  'shoutbox',
+  'sidebar',
+  'sociable',
+  'sponsor',
+  'tools',
+];

 // A list of strings that can be considered LIKELY candidates when
 // extracting content from a resource. Essentially, the inverse of the
@ -57,56 +57,56 @@ export const UNLIKELY_CANDIDATES_BLACKLIST = [
 // re:test, so may contain simple, non-pipe style regular expression queries
 // if necessary.
 export const UNLIKELY_CANDIDATES_WHITELIST = [
-    'and',
-    'article',
-    'body',
-    'blogindex',
-    'column',
-    'content',
-    'entry-content-asset',
-    'format', // misuse of form
-    'hfeed',
-    'hentry',
-    'hatom',
-    'main',
-    'page',
-    'posts',
-    'shadow'
-]
+  'and',
+  'article',
+  'body',
+  'blogindex',
+  'column',
+  'content',
+  'entry-content-asset',
+  'format', // misuse of form
+  'hfeed',
+  'hentry',
+  'hatom',
+  'main',
+  'page',
+  'posts',
+  'shadow',
+];

 // A list of tags which, if found inside, should cause a <div /> to NOT
 // be turned into a paragraph tag. Shallow div tags without these elements
 // should be turned into <p /> tags.
 export const DIV_TO_P_BLOCK_TAGS = [
-    'a',
-    'blockquote',
-    'dl',
-    'div',
-    'img',
-    'p',
-    'pre',
-    'table',
-].join(',')
+  'a',
+  'blockquote',
+  'dl',
+  'div',
+  'img',
+  'p',
+  'pre',
+  'table',
+].join(',');

 // A list of tags that should be ignored when trying to find the top candidate
 // for a document.
 export const NON_TOP_CANDIDATE_TAGS = [
-    'br',
-    'b',
-    'i',
-    'label',
-    'hr',
-    'area',
-    'base',
-    'basefont',
-    'input',
-    'img',
-    'link',
-    'meta',
-]
+  'br',
+  'b',
+  'i',
+  'label',
+  'hr',
+  'area',
+  'base',
+  'basefont',
+  'input',
+  'img',
+  'link',
+  'meta',
+];

 export const NON_TOP_CANDIDATE_TAGS_RE =
-  new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i')
+  new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');

 // A list of selectors that specify, very clearly, either hNews or other
 // very content-specific style content, like Blogger templates.
@ -118,53 +118,15 @@ export const HNEWS_CONTENT_SELECTORS = [
  ['.post', '.postbody'],
  ['.post', '.post_body'],
  ['.post', '.post-body'],
-]
-// export const HNEWS_CONTENT_SELECTORS = [
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "hentry")]/#<{(|[contains(@class, "entry-content")]'),
-//         must_exist: {
-//             classes: ['hentry', 'entry-content'],
-//         }
-//     },
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "entry")]/#<{(|[contains(@class, "entry-content")]'),
-//         must_exist: {
-//             classes: ['entry', 'entry-content'],
-//         }
-//     },
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "entry")]/#<{(|[contains(@class, "entry_content")]'),
-//         must_exist: {
-//             classes: ['entry', 'entry_content'],
-//         }
-//     },
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "post")]/#<{(|[contains(@class, "post-body")]'),
-//         must_exist: {
-//             classes: ['post', 'post-body'],
-//         }
-//     },
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "post")]/#<{(|[contains(@class, "post_body")]'),
-//         must_exist: {
-//             classes: ['post', 'post_body'],
-//         }
-//     },
-//     {
-//         //selector: XPath('/#<{(|[contains(@class, "post")]/#<{(|[contains(@class, "postbody")]'),
-//         must_exist: {
-//             classes: ['post', 'postbody'],
-//         }
-//     },
-// ]
+];

 export const PHOTO_HINTS = [
-    'figure',
-    'photo',
-    'image',
-    'caption'
-]
-export const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i')
+  'figure',
+  'photo',
+  'image',
+  'caption',
+];
+export const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');


 // A list of strings that denote a positive scoring for this content as being
@ -172,175 +134,175 @@ export const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i')
 //
 // TODO: Perhaps have these scale based on their odds of being quality?
 export const POSITIVE_SCORE_HINTS = [
-    'article', 
-    'articlecontent',
-    'instapaper_body',
-    'blog',
-    'body',
-    'content',
-    'entry-content-asset',
-    'entry',
-    'hentry',
-    'main',
-    'Normal',
-    'page',
-    'pagination',
-    'permalink',
-    'post',
-    'story',
-    'text',
-    '[-_]copy', //usatoday
-    '\Bcopy'
-]
+  'article',
+  'articlecontent',
+  'instapaper_body',
+  'blog',
+  'body',
+  'content',
+  'entry-content-asset',
+  'entry',
+  'hentry',
+  'main',
+  'Normal',
+  'page',
+  'pagination',
+  'permalink',
+  'post',
+  'story',
+  'text',
+  '[-_]copy', // usatoday
+  '\Bcopy',
+];

 // The above list, joined into a matching regular expression
-export const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i')
+export const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');

 // Readability publisher-specific guidelines
-export const READABILITY_ASSET = new RegExp('entry-content-asset', 'i')
+export const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');

 // A list of strings that denote a negative scoring for this content as being
 // an article container. Checked against className and id.
 //
 // TODO: Perhaps have these scale based on their odds of being quality?
 export const NEGATIVE_SCORE_HINTS = [
-    'adbox',
-    'advert',
-    'author',
-    'bio',
-    'bookmark',
-    'bottom',
-    'byline',
-    'clear',
-    'com-',
-    'combx',
-    'comment',
-    'comment\B',
-    'contact',
-    'copy',
-    'credit',
-    'crumb',
-    'date',
-    'deck',
-    'excerpt',
-    'featured', //tnr.com has a featured_content which throws us off
-    'foot',
-    'footer',
-    'footnote',
-    'graf',
-    'head',
-    'info',
-    'infotext', //newscientist.com copyright
-    'instapaper_ignore',
-    'jump',
-    'linebreak',
-    'link',
-    'masthead',
-    'media',
-    'meta',
-    'modal',
-    'outbrain', //slate.com junk
-    'promo',
-    'pr_', // autoblog - press release
-    'related',
-    'respond',
-    'roundcontent', //lifehacker restricted content warning
-    'scroll',
-    'secondary',
-    'share',
-    'shopping',
-    'shoutbox',
-    'side',
-    'sidebar',
-    'sponsor',
-    'stamp',
-    'sub',
-    'summary',
-    'tags',
-    'tools',
-    'widget'
-]
+  'adbox',
+  'advert',
+  'author',
+  'bio',
+  'bookmark',
+  'bottom',
+  'byline',
+  'clear',
+  'com-',
+  'combx',
+  'comment',
+  'comment\B',
+  'contact',
+  'copy',
+  'credit',
+  'crumb',
+  'date',
+  'deck',
+  'excerpt',
+  'featured', // tnr.com has a featured_content which throws us off
+  'foot',
+  'footer',
+  'footnote',
+  'graf',
+  'head',
+  'info',
+  'infotext', // newscientist.com copyright
+  'instapaper_ignore',
+  'jump',
+  'linebreak',
+  'link',
+  'masthead',
+  'media',
+  'meta',
+  'modal',
+  'outbrain', // slate.com junk
+  'promo',
+  'pr_', // autoblog - press release
+  'related',
+  'respond',
+  'roundcontent', // lifehacker restricted content warning
+  'scroll',
+  'secondary',
+  'share',
+  'shopping',
+  'shoutbox',
+  'side',
+  'sidebar',
+  'sponsor',
+  'stamp',
+  'sub',
+  'summary',
+  'tags',
+  'tools',
+  'widget',
+];
 // The above list, joined into a matching regular expression
-export const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i')
+export const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');

 // Match a digit. Pretty clear.
-export const DIGIT_RE = new RegExp('[0-9]')
+export const DIGIT_RE = new RegExp('[0-9]');

 // Match 2 or more consecutive <br> tags
-export const BR_TAGS_RE = new RegExp('(<br[^>]*>[ \n\r\t]*){2,}', 'i')
+export const BR_TAGS_RE = new RegExp('(<br[^>]*>[ \n\r\t]*){2,}', 'i');

 // Match 1 BR tag.
-export const BR_TAG_RE = new RegExp('<br[^>]*>', 'i')
+export const BR_TAG_RE = new RegExp('<br[^>]*>', 'i');

 // A list of all of the block level tags known in HTML5 and below. Taken from
 // http://bit.ly/qneNIT
 export const BLOCK_LEVEL_TAGS = [
-    'article',
-    'aside',
-    'blockquote',
-    'body',
-    'br',
-    'button',
-    'canvas',
-    'caption',
-    'col',
-    'colgroup',
-    'dd',
-    'div',
-    'dl',
-    'dt',
-    'embed',
-    'fieldset',
-    'figcaption',
-    'figure',
-    'footer',
-    'form',
-    'h1',
-    'h2',
-    'h3',
-    'h4',
-    'h5',
-    'h6',
-    'header',
-    'hgroup',
-    'hr',
-    'li',
-    'map',
-    'object',
-    'ol',
-    'output',
-    'p',
-    'pre',
-    'progress',
-    'section',
-    'table',
-    'tbody',
-    'textarea',
-    'tfoot',
-    'th',
-    'thead',
-    'tr',
-    'ul',
-    'video',
-]
-export const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i')
+  'article',
+  'aside',
+  'blockquote',
+  'body',
+  'br',
+  'button',
+  'canvas',
+  'caption',
+  'col',
+  'colgroup',
+  'dd',
+  'div',
+  'dl',
+  'dt',
+  'embed',
+  'fieldset',
+  'figcaption',
+  'figure',
+  'footer',
+  'form',
+  'h1',
+  'h2',
+  'h3',
+  'h4',
+  'h5',
+  'h6',
+  'header',
+  'hgroup',
+  'hr',
+  'li',
+  'map',
+  'object',
+  'ol',
+  'output',
+  'p',
+  'pre',
+  'progress',
+  'section',
+  'table',
+  'tbody',
+  'textarea',
+  'tfoot',
+  'th',
+  'thead',
+  'tr',
+  'ul',
+  'video',
+];
+export const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');


 // The removal is implemented as a blacklist and whitelist, this test finds
 // blacklisted elements that aren't whitelisted. We do this all in one
 // expression-both because it's only one pass, and because this skips the
 // serialization for whitelisted nodes.
-const candidates_blacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|')
-export const CANDIDATES_BLACKLIST = new RegExp(candidates_blacklist, 'i')
+const candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
+export const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');

-const candidates_whitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|')
-export const CANDIDATES_WHITELIST = new RegExp(candidates_whitelist, 'i')
+const candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
+export const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');

-export const UNLIKELY_RE = new RegExp(`!(${candidates_whitelist})|(${candidates_blacklist})`, 'i')
+export const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');


-export const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i')
-export const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i')
-export const BAD_TAGS = new RegExp('^(address|form)$', 'i')
+export const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');
+export const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
+export const BAD_TAGS = new RegExp('^(address|form)$', 'i');

-export const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i')
+export const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');
--- a/src/extractors/generic/content/scoring/find-top-candidate.js
+++ b/src/extractors/generic/content/scoring/find-top-candidate.js
@ -1,115 +1,35 @@
-import { NON_TOP_CANDIDATE_TAGS_RE } from './constants'
-import { getScore } from './index'
-import {
-  textLength,
-  linkDensity
-} from 'utils/dom'
+import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';
+import { getScore } from './index';
+import mergeSiblings from './merge-siblings';

 // After we've calculated scores, loop through all of the possible
 // candidate nodes we found and find the one with the highest score.
 export default function findTopCandidate($) {
-  let $candidate, topScore = 0
+  let $candidate;
+  let topScore = 0;

  $('*[score]').each((index, node) => {
-    const $node = $(node)
+    const $node = $(node);
    // Ignore tags like BR, HR, etc
    if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {
-      return
+      return;
    }

-    const score = getScore($node)
+    const score = getScore($node);

    if (score > topScore) {
-      topScore = score
-      $candidate = $node
+      topScore = score;
+      $candidate = $node;
    }
-  })
+  });

  // If we don't have a candidate, return the body
  // or whatever the first element is
  if (!$candidate) {
-    return $('body') || $('*').first()
+    return $('body') || $('*').first();
  }

-  $candidate = mergeSiblings($candidate, topScore, $)
+  $candidate = mergeSiblings($candidate, topScore, $);

-  return $candidate
-}
-
-// Now that we have a top_candidate, look through the siblings of
-// it to see if any of them are decently scored. If they are, they
-// may be split parts of the content (Like two divs, a preamble and
-// a body.) Example:
-// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
-export function mergeSiblings($candidate, topScore, $) {
-  if (!$candidate.parent().length) {
-    return $candidate
-  }
-
-  const siblingScoreThreshold = Math.max(10, topScore * 0.2)
-  let wrappingDiv = $('<div></div>')
-
-  $candidate.parent().children().each((index, child) => {
-    const $child = $(child)
-    // Ignore tags like BR, HR, etc
-    if (NON_TOP_CANDIDATE_TAGS_RE.test(child.tagName)) {
-      return
-    }
-
-    const childScore = getScore($child)
-    if (childScore) {
-      if ($child === $candidate) {
-        wrappingDiv.append($child)
-      } else {
-        let contentBonus = 0
-        // extract to scoreLinkDensity() TODO
-        const density = linkDensity($child)
-
-        // If sibling has a very low link density,
-        // give it a small bonus
-        if (density < .05) {
-          contentBonus = contentBonus + 20
-        }
-
-        // If sibling has a high link density,
-        // give it a penalty
-        if (density >= 0.5) {
-          contentBonus = contentBonus - 20
-        }
-
-        // If sibling node has the same class as
-        // candidate, give it a bonus
-        if ($child.attr('class') === $candidate.attr('class')) {
-          contentBonus = contentBonus + topScore * .2
-        }
-
-        const newScore = getScore($child) + contentBonus
-
-        if (newScore >= siblingScoreThreshold) {
-          return wrappingDiv.append($child)
-        } else if (child.tagName === 'p') {
-          const childContentLength = textLength($child.text())
-
-          if (childContentLength > 80 && density < .25) {
-            return wrappingDiv.append($child)
-          } else if (childContentLength <= 80 && density === 0 &&
-                    hasSentenceEnd(childContent)) {
-
-            return wrappingDiv.append($child)
-          }
-        }
-      }
-    }
-
-  })
-
-  return wrappingDiv
-}
-
-// TODO Extract into util - AP
-// Given a string, return True if it appears to have an ending sentence
-// within it, false otherwise.
-const SENTENCE_END_RE = new RegExp('\.( |$)')
-function hasSentenceEnd(text) {
-  return SENTENCE_END_RE.test(text)
+  return $candidate;
 }
--- a/src/extractors/generic/content/scoring/find-top-candidate.test.js
+++ b/src/extractors/generic/content/scoring/find-top-candidate.test.js
@ -1,58 +1,58 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';

-import HTML from './fixtures/html'
+import HTML from './fixtures/html';

 import {
  getScore,
  findTopCandidate,
-  scoreContent
-} from './index'
+  scoreContent,
+} from './index';

 describe('findTopCandidate($)', () => {
-  it("finds the top candidate from simple case", () => {
-    const $ = cheerio.load(HTML.findDom1)
+  it('finds the top candidate from simple case', () => {
+    const $ = cheerio.load(HTML.findDom1);

-    const $$topCandidate = findTopCandidate($)
+    const $$topCandidate = findTopCandidate($);

-    assert.equal(getScore($$topCandidate), 100)
-  })
+    assert.equal(getScore($$topCandidate), 100);
+  });

-  it("finds the top candidate from a nested case", () => {
-    const $ = cheerio.load(HTML.findDom2)
+  it('finds the top candidate from a nested case', () => {
+    const $ = cheerio.load(HTML.findDom2);

-    const $$topCandidate = findTopCandidate($)
+    const $$topCandidate = findTopCandidate($);

    // this is wrapped in a div so checking
    // the score of the first child
-    assert.equal(getScore($$topCandidate.children().first()), 50)
-  })
+    assert.equal(getScore($$topCandidate.children().first()), 50);
+  });

-  it("ignores tags like BR", () => {
-    const $ = cheerio.load(HTML.findDom3)
+  it('ignores tags like BR', () => {
+    const $ = cheerio.load(HTML.findDom3);

-    const $topCandidate = findTopCandidate($)
+    const $topCandidate = findTopCandidate($);

-    assert.equal(getScore($topCandidate), 50)
-  })
+    assert.equal(getScore($topCandidate), 50);
+  });

-  it("returns BODY if no candidates found", () => {
-    const $ = cheerio.load(HTML.topBody)
+  it('returns BODY if no candidates found', () => {
+    const $ = cheerio.load(HTML.topBody);

-    const $topCandidate = findTopCandidate($)
+    const $topCandidate = findTopCandidate($);

-    assert.equal($topCandidate.get(0).tagName, 'body')
-  })
+    assert.equal($topCandidate.get(0).tagName, 'body');
+  });

-  it("appends a sibling with a good enough score", () => {
-    const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8')
+  it('appends a sibling with a good enough score', () => {
+    const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8');

-    let $ = cheerio.load(html)
-    $ = scoreContent($)
+    let $ = cheerio.load(html);
+    $ = scoreContent($);

-    const $topCandidate = findTopCandidate($)
-    assert.equal($($topCandidate).text().length, 3652)
-  })
-})
+    const $topCandidate = findTopCandidate($);
+    assert.equal($($topCandidate).text().length, 3652);
+  });
+});

--- a/src/extractors/generic/content/scoring/fixtures/get-weight.js
+++ b/src/extractors/generic/content/scoring/fixtures/get-weight.js
@ -237,7 +237,7 @@ const HTML = {
    `,
    after: `
    <div><div><div><p><a href="">Wow how about that</a></p></div></div></div>
-    `
+    `,
  },

  // cleanImages
@ -252,7 +252,7 @@ const HTML = {
    <div>
      <img width="50">
    </div>
-    `
+    `,
  },
  cleanHeight: {
    before: `
@ -264,7 +264,7 @@ const HTML = {
    <div>
      <img width="50">
    </div>
-    `
+    `,
  },
  cleanSpacer: {
    before: `
@ -279,7 +279,7 @@ const HTML = {
      <img src="/foo/bar/baz/normal.png">
      <p>Some text</p>
    </div>
-    `
+    `,
  },
  // stripJunkTags
  stripsJunk: {
@ -298,7 +298,7 @@ const HTML = {
    <div>
      <p>What an article</p>
    </div>
-    `
+    `,
  },

  // stripHOnes
@ -314,7 +314,7 @@ const HTML = {
    <div>
      <p>What do you think?</p>
    </div>
-    `
+    `,
  },
  convertThreeHOnes: {
    before: `
@ -334,7 +334,7 @@ const HTML = {
        <p>What do you think?</p>
        <h2>Can you believe it?!</h2>
      </div>
-    `
+    `,
  },

  // cleanAttributes
@ -348,7 +348,7 @@ const HTML = {
      <div>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  removeAlign: {
    before: `
@ -360,7 +360,7 @@ const HTML = {
      <div>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },

  // removeEmpty
@ -375,7 +375,7 @@ const HTML = {
      <div>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  doNotRemoveBr: {
    before: `
@ -392,7 +392,7 @@ const HTML = {
        <div></div>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  doNotNested: {
    before: `
@ -409,7 +409,7 @@ const HTML = {
        <p><img src="foo/bar.jpg" /></p>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },

  // cleanConditionally
@ -433,7 +433,7 @@ const HTML = {
        </p>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  removeTooManyInputs: {
    before: `
@ -467,7 +467,7 @@ const HTML = {
        <p>What do you think?</p>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  removeShortNoImg: {
    before: `
@ -490,7 +490,7 @@ const HTML = {
          <img src="asdf">
        </div>
      </div>
-    `
+    `,
  },

  linkDensityHigh: {
@ -527,7 +527,7 @@ const HTML = {
          <li>Keep this one</li>
        </ul>
      </div>
-    `
+    `,
  },
  goodScoreTooDense: {
    before: `
@ -567,7 +567,7 @@ const HTML = {
          <li>Keep this one</li>
        </ul>
      </div>
-    `
+    `,
  },
  previousEndsInColon: {
    before: `
@ -608,7 +608,7 @@ const HTML = {
        <p>What do you think?</p>
      </div>
    `,
-    after: `What do you think?`
+    after: 'What do you think?',
  },

  // cleanHeaders
@ -627,7 +627,7 @@ const HTML = {
        <h2>Keep me</h2>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  cleanTitleMatch: {
    before: `
@ -642,7 +642,7 @@ const HTML = {
        <p>What do you think?</p>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
  dropWithNegativeWeight: {
    before: `
@ -657,8 +657,8 @@ const HTML = {
        <p>What do you think?</p>
        <p>What do you think?</p>
      </div>
-    `
+    `,
  },
-}
+};

-export default HTML
+export default HTML;
--- a/src/extractors/generic/content/scoring/fixtures/html.js
+++ b/src/extractors/generic/content/scoring/fixtures/html.js
@ -82,6 +82,6 @@ const HTML = {
      </article>
    <body>
  `,
-}
+};

-export default HTML
+export default HTML;
--- a/src/extractors/generic/content/scoring/get-or-init-score.js
+++ b/src/extractors/generic/content/scoring/get-or-init-score.js
@ -3,27 +3,26 @@ import {
  scoreNode,
  getWeight,
  addToParent,
-} from './index'
+} from './index';

 // gets and returns the score if it exists
 // if not, initializes a score based on
 // the node's tag type
-export default function getOrInitScore($node, $, weightNodes=true) {
-  let score = getScore($node)
+export default function getOrInitScore($node, $, weightNodes = true) {
+  let score = getScore($node);

  if (score) {
-    return score
-  } else {
-    score = scoreNode($node)
+    return score;
+  }

-    if (weightNodes) {
-      score = score + getWeight($node)
-    }
+  score = scoreNode($node);

-    addToParent($node, $, score)
+  if (weightNodes) {
+    score += getWeight($node);
  }

-  return score
-}
+  addToParent($node, $, score);

+  return score;
+}

--- a/src/extractors/generic/content/scoring/get-or-init-score.test.js
+++ b/src/extractors/generic/content/scoring/get-or-init-score.test.js
@ -1,61 +1,61 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
+import HTML from './fixtures/html';
 import {
  getOrInitScore,
  getScore,
-} from './index'
+} from './index';

 describe('getOrInitScore(node, $)', () => {
  describe('when score set', () => {
-    it(`returns score if node's score already set`, () => {
-      const html = '<p score="40">Foo</p>'
-      const $ = cheerio.load(html)
-      const node = $('p').first()
+    it('returns score if node\'s score already set', () => {
+      const html = '<p score="40">Foo</p>';
+      const $ = cheerio.load(html);
+      const node = $('p').first();

-      const score = getOrInitScore(node, $)
+      const score = getOrInitScore(node, $);

-      assert.equal(score, 40)
-    })
-  })
+      assert.equal(score, 40);
+    });
+  });

  describe('when no score set', () => {
-    it(`returns 0 if no class/id and text < 25 chars`, () => {
-      const html = '<p>Foo</p>'
-      const $ = cheerio.load(html)
-      const node = $('p').first()
+    it('returns 0 if no class/id and text < 25 chars', () => {
+      const html = '<p>Foo</p>';
+      const $ = cheerio.load(html);
+      const node = $('p').first();

-      const score = getOrInitScore(node, $)
+      const score = getOrInitScore(node, $);

-      assert.equal(score, 0)
-    })
+      assert.equal(score, 0);
+    });

-    it(`returns score if no class/id and has commas/length`, () => {
-      const $ = cheerio.load(HTML.score19)
-      const node = $('p').first()
+    it('returns score if no class/id and has commas/length', () => {
+      const $ = cheerio.load(HTML.score19);
+      const node = $('p').first();

-      const score = getOrInitScore(node, $)
+      const score = getOrInitScore(node, $);

-      assert.equal(score, 19)
-    })
+      assert.equal(score, 19);
+    });

-    it(`returns greater score if weighted class/id is set`, () => {
-      const $ = cheerio.load(HTML.score44)
-      const node = $('p').first()
+    it('returns greater score if weighted class/id is set', () => {
+      const $ = cheerio.load(HTML.score44);
+      const node = $('p').first();

-      const score = getOrInitScore(node, $)
+      const score = getOrInitScore(node, $);

-      assert.equal(score, 44)
-    })
+      assert.equal(score, 44);
+    });

-    it(`gives 1/4 of its score to its parent`, () => {
-      const $ = cheerio.load(HTML.score44Parent)
-      const node = $('p').first()
+    it('gives 1/4 of its score to its parent', () => {
+      const $ = cheerio.load(HTML.score44Parent);
+      const node = $('p').first();

-      const score = getOrInitScore(node, $)
+      getOrInitScore(node, $);

-      assert.equal(getScore(node.parent()), 16)
-    })
-  })
-})
+      assert.equal(getScore(node.parent()), 16);
+    });
+  });
+});
--- a/src/extractors/generic/content/scoring/get-score.js
+++ b/src/extractors/generic/content/scoring/get-score.js
@ -2,5 +2,5 @@
 // the node's score attribute
 // returns null if no score set
 export default function getScore($node) {
-  return parseFloat($node.attr('score')) || null
+  return parseFloat($node.attr('score')) || null;
 }
--- a/src/extractors/generic/content/scoring/get-score.test.js
+++ b/src/extractors/generic/content/scoring/get-score.test.js
@ -1,25 +1,22 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import { getScore } from './index'
+import { getScore } from './index';

 describe('Scoring utils', () => {
  describe('getScore($node)', () => {
-    it("returns null if the node has no score set", () => {
-      const $ = cheerio.load('<p>Foo</p>')
-      const $node = $('p').first()
-      assert.equal(getScore($node), null)
-    })
+    it('returns null if the node has no score set', () => {
+      const $ = cheerio.load('<p>Foo</p>');
+      const $node = $('p').first();
+      assert.equal(getScore($node), null);
+    });

-    it("returns 25 if the node has a score attr of 25", () => {
-      const $ = cheerio.load('<p score="25">Foo</p>')
-      const $node = $('p').first()
-      assert.equal(typeof getScore($node), 'number')
-      assert.equal(getScore($node), 25)
-    })
-
-  })
-
-
-})
+    it('returns 25 if the node has a score attr of 25', () => {
+      const $ = cheerio.load('<p score="25">Foo</p>');
+      const $node = $('p').first();
+      assert.equal(typeof getScore($node), 'number');
+      assert.equal(getScore($node), 25);
+    });
+  });
+});

--- a/src/extractors/generic/content/scoring/get-weight.js
+++ b/src/extractors/generic/content/scoring/get-weight.js
@ -3,42 +3,42 @@ import {
  POSITIVE_SCORE_RE,
  PHOTO_HINTS_RE,
  READABILITY_ASSET,
-} from './constants'
+} from './constants';


 // Get the score of a node based on its className and id.
 export default function getWeight(node) {
-  const classes = node.attr('class')
-  const id = node.attr('id')
-  let score = 0
+  const classes = node.attr('class');
+  const id = node.attr('id');
+  let score = 0;

  if (id) {
    // if id exists, try to score on both positive and negative
    if (POSITIVE_SCORE_RE.test(id)) {
-      score = score + 25
+      score += 25;
    }
    if (NEGATIVE_SCORE_RE.test(id)) {
-      score = score - 25
+      score -= 25;
    }
  }

  if (classes) {
-    if (score == 0) {
+    if (score === 0) {
      // if classes exist and id did not contribute to score
      // try to score on both positive and negative
      if (POSITIVE_SCORE_RE.test(classes)) {
-        score = score + 25
+        score += 25;
      }
      if (NEGATIVE_SCORE_RE.test(classes)) {
-        score = score - 25
+        score -= 25;
      }
    }

-    // even if score has been set by id, add score for 
+    // even if score has been set by id, add score for
    // possible photo matches
    // "try to keep photos if we can"
    if (PHOTO_HINTS_RE.test(classes)) {
-      score = score + 10
+      score += 10;
    }

    // add 25 if class matches entry-content-asset,
@ -46,11 +46,10 @@ export default function getWeight(node) {
    // Readability publisher guidelines
    // https://www.readability.com/developers/guidelines
    if (READABILITY_ASSET.test(classes)) {
-      score = score + 25
+      score += 25;
    }
-
  }

-  return score
+  return score;
 }

--- a/src/extractors/generic/content/scoring/get-weight.test.js
+++ b/src/extractors/generic/content/scoring/get-weight.test.js
@ -1,59 +1,58 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/get-weight'
+import HTML from './fixtures/get-weight';
 import {
-  getWeight
-} from './index'
+  getWeight,
+} from './index';

 describe('Generic Extractor Utils', () => {
  describe('getWeight(node)', () => {
-    it("returns a score of 25 if node has positive id", () => {
-      const $ = cheerio.load(HTML.positiveId)
-      assert.equal(getWeight($('div')), 25)
-    })
-
-    it("returns a score of -25 if node has negative id", () => {
-      const $ = cheerio.load(HTML.negativeId)
-      assert.equal(getWeight($('div')), -25)
-    })
-
-    it("returns a score of 25 if node has positive class", () => {
-      const $ = cheerio.load(HTML.positiveClass)
-      assert.equal(getWeight($('div')), 25)
-    })
-
-    it("returns a score of -25 if node has negative class", () => {
-      const $ = cheerio.load(HTML.negativeClass)
-      assert.equal(getWeight($('div')), -25)
-    })
-
-    it("returns a score of 25 if node has both positive id and class", () => {
-      const $ = cheerio.load(HTML.positiveIdAndClass)
-      assert.equal(getWeight($('div')), 25)
-    })
-
-    it("returns a score of 25 if node has pos id and neg class", () => {
+    it('returns a score of 25 if node has positive id', () => {
+      const $ = cheerio.load(HTML.positiveId);
+      assert.equal(getWeight($('div')), 25);
+    });
+
+    it('returns a score of -25 if node has negative id', () => {
+      const $ = cheerio.load(HTML.negativeId);
+      assert.equal(getWeight($('div')), -25);
+    });
+
+    it('returns a score of 25 if node has positive class', () => {
+      const $ = cheerio.load(HTML.positiveClass);
+      assert.equal(getWeight($('div')), 25);
+    });
+
+    it('returns a score of -25 if node has negative class', () => {
+      const $ = cheerio.load(HTML.negativeClass);
+      assert.equal(getWeight($('div')), -25);
+    });
+
+    it('returns a score of 25 if node has both positive id and class', () => {
+      const $ = cheerio.load(HTML.positiveIdAndClass);
+      assert.equal(getWeight($('div')), 25);
+    });
+
+    it('returns a score of 25 if node has pos id and neg class', () => {
      // is this really wanted? id="entry" class="adbox"
      // should get positive score?
-      const $ = cheerio.load(HTML.positiveIdNegClass)
-      assert.equal(getWeight($('div')), 25)
-    })
+      const $ = cheerio.load(HTML.positiveIdNegClass);
+      assert.equal(getWeight($('div')), 25);
+    });

-    it("returns a score of 10 if node has pos img class", () => {
-      const $ = cheerio.load(HTML.positivePhotoClass)
-      assert.equal(getWeight($('div')), 10)
-    })
+    it('returns a score of 10 if node has pos img class', () => {
+      const $ = cheerio.load(HTML.positivePhotoClass);
+      assert.equal(getWeight($('div')), 10);
+    });

-    it("returns a score of 35 if node has pos id pos img class", () => {
-      const $ = cheerio.load(HTML.positiveIdAndPhoto)
-      assert.equal(getWeight($('div')), 35)
-    })
+    it('returns a score of 35 if node has pos id pos img class', () => {
+      const $ = cheerio.load(HTML.positiveIdAndPhoto);
+      assert.equal(getWeight($('div')), 35);
+    });

    it("adds an add'l 25 (total 50) if node uses entry-content-asset class", () => {
-      const $ = cheerio.load(HTML.entryContentAsset)
-      assert.equal(getWeight($('div')), 50)
-    })
-
-  })
-})
+      const $ = cheerio.load(HTML.entryContentAsset);
+      assert.equal(getWeight($('div')), 50);
+    });
+  });
+});
--- a/src/extractors/generic/content/scoring/index.js
+++ b/src/extractors/generic/content/scoring/index.js
@ -1,13 +1,13 @@
 // Scoring
-export { default as getWeight } from './get-weight'
-export { default as getScore } from './get-score'
-export { default as scoreCommas } from './score-commas'
-export { default as scoreLength } from './score-length'
-export { default as scoreParagraph } from './score-paragraph'
-export { default as setScore } from './set-score'
-export { default as addScore } from './add-score'
-export { default as addToParent } from './add-to-parent'
-export { default as getOrInitScore } from './get-or-init-score'
-export { default as scoreNode } from './score-node'
-export { default as scoreContent } from './score-content'
-export { default as findTopCandidate } from './find-top-candidate'
+export { default as getWeight } from './get-weight';
+export { default as getScore } from './get-score';
+export { default as scoreCommas } from './score-commas';
+export { default as scoreLength } from './score-length';
+export { default as scoreParagraph } from './score-paragraph';
+export { default as setScore } from './set-score';
+export { default as addScore } from './add-score';
+export { default as addToParent } from './add-to-parent';
+export { default as getOrInitScore } from './get-or-init-score';
+export { default as scoreNode } from './score-node';
+export { default as scoreContent } from './score-content';
+export { default as findTopCandidate } from './find-top-candidate';
--- a/src/extractors/generic/content/scoring/merge-siblings.js
+++ b/src/extractors/generic/content/scoring/merge-siblings.js
@ -0,0 +1,79 @@
+import {
+  textLength,
+  linkDensity,
+} from 'utils/dom';
+import { hasSentenceEnd } from 'utils/text';
+
+import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';
+import { getScore } from './index';
+
+// Now that we have a top_candidate, look through the siblings of
+// it to see if any of them are decently scored. If they are, they
+// may be split parts of the content (Like two divs, a preamble and
+// a body.) Example:
+// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
+export default function mergeSiblings($candidate, topScore, $) {
+  if (!$candidate.parent().length) {
+    return $candidate;
+  }
+
+  const siblingScoreThreshold = Math.max(10, topScore * 0.2);
+  const wrappingDiv = $('<div></div>');
+
+  $candidate.parent().children().each((index, child) => {
+    const $child = $(child);
+    // Ignore tags like BR, HR, etc
+    if (NON_TOP_CANDIDATE_TAGS_RE.test(child.tagName)) {
+      return null;
+    }
+
+    const childScore = getScore($child);
+    if (childScore) {
+      if ($child === $candidate) {
+        wrappingDiv.append($child);
+      } else {
+        let contentBonus = 0;
+        // extract to scoreLinkDensity() TODO
+        const density = linkDensity($child);
+
+        // If sibling has a very low link density,
+        // give it a small bonus
+        if (density < 0.05) {
+          contentBonus += 20;
+        }
+
+        // If sibling has a high link density,
+        // give it a penalty
+        if (density >= 0.5) {
+          contentBonus -= 20;
+        }
+
+        // If sibling node has the same class as
+        // candidate, give it a bonus
+        if ($child.attr('class') === $candidate.attr('class')) {
+          contentBonus += topScore * 0.2;
+        }
+
+        const newScore = getScore($child) + contentBonus;
+
+        if (newScore >= siblingScoreThreshold) {
+          return wrappingDiv.append($child);
+        } else if (child.tagName === 'p') {
+          const childContent = $child.text();
+          const childContentLength = textLength(childContent);
+
+          if (childContentLength > 80 && density < 0.25) {
+            return wrappingDiv.append($child);
+          } else if (childContentLength <= 80 && density === 0 &&
+                    hasSentenceEnd(childContent)) {
+            return wrappingDiv.append($child);
+          }
+        }
+      }
+    }
+
+    return null;
+  });
+
+  return wrappingDiv;
+}
--- a/src/extractors/generic/content/scoring/score-commas.js
+++ b/src/extractors/generic/content/scoring/score-commas.js
@ -1,5 +1,5 @@
 // return 1 for every comma in text
 export default function scoreCommas(text) {
-  return (text.match(/,/g) || []).length
+  return (text.match(/,/g) || []).length;
 }

--- a/src/extractors/generic/content/scoring/score-commas.test.js
+++ b/src/extractors/generic/content/scoring/score-commas.test.js
@ -1,20 +1,18 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';

-import { scoreCommas } from './index'
+import { scoreCommas } from './index';

 describe('Scoring utils', () => {
  describe('scoreCommas(text)', () => {
-    it(`returns 0 if text has no commas`, () => {
-      assert.equal(scoreCommas("Foo bar"), 0)
-    })
-
-    it(`returns a point for every comma in the text`, () => {
-      assert.equal(scoreCommas('Foo, bar'), 1)
-      assert.equal(scoreCommas('Foo, bar, baz'), 2)
-      assert.equal(scoreCommas('Foo, bar, baz, bat'), 3)
-    })
-  })
-})
+    it('returns 0 if text has no commas', () => {
+      assert.equal(scoreCommas('Foo bar'), 0);
+    });

+    it('returns a point for every comma in the text', () => {
+      assert.equal(scoreCommas('Foo, bar'), 1);
+      assert.equal(scoreCommas('Foo, bar, baz'), 2);
+      assert.equal(scoreCommas('Foo, bar, baz, bat'), 3);
+    });
+  });
+});

--- a/src/extractors/generic/content/scoring/score-content.js
+++ b/src/extractors/generic/content/scoring/score-content.js
@ -1,119 +1,69 @@
-import { HNEWS_CONTENT_SELECTORS } from './constants'
+import { convertNodeTo } from 'utils/dom';

+import { HNEWS_CONTENT_SELECTORS } from './constants';
 import {
  scoreNode,
  setScore,
  getOrInitScore,
  addScore,
-} from './index'
+} from './index';

-import { convertNodeTo } from 'utils/dom'
-
-// score content. Parents get the full value of their children's
-// content score, grandparents half
-export default function scoreContent($, weightNodes=true) {
-
-  // First, look for special hNews based selectors and give them a big
-  // boost, if they exist
-  HNEWS_CONTENT_SELECTORS.map(([parentSelector, childSelector]) => {
-    $(`${parentSelector} ${childSelector}`).each((index, node) => {
-      addScore($(node).parent(parentSelector), $, 80)
-    })
-  })
+function convertSpans($node, $) {
+  if ($node.get(0)) {
+    const { tagName } = $node.get(0);

-  scorePs($, weightNodes)
+    if (tagName === 'span') {
+      // convert spans to divs
+      convertNodeTo($node, $, 'div');
+    }
+  }
+}

-  return $
+function addScoreTo($node, $, score) {
+  if ($node) {
+    convertSpans($node, $);
+    addScore($node, $, score);
+  }
 }

 function scorePs($, weightNodes) {
  $('p, pre').toArray().map((node) => {
    // The raw score for this paragraph, before we add any parent/child
    // scores.
-    let $node = $(node)
-    $node = setScore($node, $, getOrInitScore($node, $, weightNodes))
+    let $node = $(node);
+    $node = setScore($node, $, getOrInitScore($node, $, weightNodes));

-    return $node
+    return $node;
  }).forEach(($node) => {
    // The parent scoring has to be done in a separate loop
    // because otherwise scoring the parent overwrites
    // the score added to the child

    // Add the individual content score to the parent node
-    const rawScore = scoreNode($node)
+    const rawScore = scoreNode($node);

-    const $parent = $node.parent()
-    addScoreTo($parent, $, rawScore, weightNodes)
+    const $parent = $node.parent();
+    addScoreTo($parent, $, rawScore, weightNodes);
    if ($parent) {
      // Add half of the individual content score to the
      // grandparent
-      addScoreTo($parent.parent(), $, rawScore/2, weightNodes)
+      addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
    }
-
-  })
+  });
 }

-function convertSpans($node, $) {
-  if ($node.get(0)) {
-    const { tagName } = $node.get(0)
+// score content. Parents get the full value of their children's
+// content score, grandparents half
+export default function scoreContent($, weightNodes = true) {
+  // First, look for special hNews based selectors and give them a big
+  // boost, if they exist
+  HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {
+    $(`${parentSelector} ${childSelector}`).each((index, node) => {
+      addScore($(node).parent(parentSelector), $, 80);
+    });
+  });

-    if (tagName === 'span') {
-      // convert spans to divs
-      convertNodeTo($node, $, 'div')
-    }
-  }
-}
+  scorePs($, weightNodes);

-function addScoreTo($node, $, score, weightNodes) {
-  if ($node) {
-    convertSpans($node, $)
-    addScore($node, $, score)
-  }
+  return $;
 }
-
-
-    // def _score_content(self, doc, weight_nodes=True):
-    //     for selector in constants.HNEWS_CONTENT_SELECTORS:
-    //         # Not self.resource.extract_by_selector because our doc is a copy
-    //         # of the resource doc.
-    //         nodes = extract_by_selector(doc, selector,
-    //                                         AttribMap(doc))
-    //         for node in nodes:
-    //             self._add_score(node, 80)
-    //
-    //     paras = doc.xpath('.//p | .//pre')
-    //
-    //     # If we don't have any paragraphs at all, we can't score based on
-    //     # paragraphs, so return without modifying anything else.
-    //     if len(paras) == 0:
-    //         return doc
-    //
-    //     for para in paras:
-    //         # Don't score invalid tags
-    //         if not isinstance(para.tag, basestring):
-    //             continue
-    //
-    //         # The raw score for this paragraph, before we add any parent/child
-    //         # scores.
-    //         raw_score = self._score_node(para)
-    //         self._set_score(para, self._get_score(para, weight_nodes))
-    //
-    //         parent = para.getparent()
-    //         if parent is not None:
-    //             if parent.tag == 'span':
-    //                 parent.tag = 'div'
-    //
-    //             # Add the individual content score to the parent node
-    //             self._add_score(parent, raw_score, weight_nodes=weight_nodes)
-    //
-    //             grandparent = parent.getparent()
-    //             if grandparent is not None:
-    //                 if grandparent.tag == 'span':
-    //                     grandparent.tag = 'div'
-    //
-    //                 # Add half of the individual content score to the
-    //                 # grandparent
-    //                 gp_score = raw_score / 2.0
-    //                 self._add_score(grandparent, gp_score, weight_nodes=weight_nodes)
-    //
-    //     return doc
--- a/src/extractors/generic/content/scoring/score-content.test.js
+++ b/src/extractors/generic/content/scoring/score-content.test.js
@ -1,47 +1,45 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';

-import { clean } from 'test-helpers'
-import HTML from './fixtures/html'
+import HTML from './fixtures/html';

 import {
  scoreContent,
  getScore,
-} from './index'
+} from './index';

 // TODO: Walk through these and sanity check my scores
 // Commented out scores were what I expected, but I was also
 // probably missing something when calculating
 describe('scoreContent($, weightNodes)', () => {
-  it("loves hNews content", () => {
-    const $ = cheerio.load(HTML.hNews.before)
-    const result = scoreContent($).html()
+  it('loves hNews content', () => {
+    const $ = cheerio.load(HTML.hNews.before);
+    scoreContent($).html();

-    assert.equal(getScore($('div').first()), 140)
-  })
+    assert.equal(getScore($('div').first()), 140);
+  });

-  it("is so-so about non-hNews content", () => {
-    const $ = cheerio.load(HTML.nonHNews.before)
-    const result = scoreContent($).html()
+  it('is so-so about non-hNews content', () => {
+    const $ = cheerio.load(HTML.nonHNews.before);
+    scoreContent($).html();

-    assert.equal(getScore($('div').first()), 65)
-  })
+    assert.equal(getScore($('div').first()), 65);
+  });

-  it("scores this Wired article the same", () => {
-    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8')
-    const $ = cheerio.load(html)
-    const result = scoreContent($).html()
+  it('scores this Wired article the same', () => {
+    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8');
+    const $ = cheerio.load(html);
+    scoreContent($).html();

-    assert.equal(getScore($('article').first()), 65.5)
-  })
+    assert.equal(getScore($('article').first()), 65.5);
+  });

-  it("scores this Vulture article", () => {
-    const html = fs.readFileSync('./fixtures/vulture.html', 'utf-8')
-    let $ = cheerio.load(html)
-    $ = scoreContent($)
+  it('scores this Vulture article', () => {
+    const html = fs.readFileSync('./fixtures/vulture.html', 'utf-8');
+    let $ = cheerio.load(html);
+    $ = scoreContent($);

-    assert.equal($('p[score]').length, 62)
-  })
-
-})
+    assert.equal($('p[score]').length, 62);
+  });
+});
--- a/src/extractors/generic/content/scoring/score-length.js
+++ b/src/extractors/generic/content/scoring/score-length.js
@ -1,11 +1,10 @@
-const idkRe = new RegExp('^(p|pre)$', 'i')
+const idkRe = new RegExp('^(p|pre)$', 'i');

-export default function scoreLength(textLength, tagName='p') {
-  let score
-  const chunks = textLength / 50
+export default function scoreLength(textLength, tagName = 'p') {
+  const chunks = textLength / 50;

  if (chunks > 0) {
-    let lengthBonus
+    let lengthBonus;

    // No idea why p or pre are being tamped down here
    // but just following the source for now
@ -13,14 +12,14 @@ export default function scoreLength(textLength, tagName='p') {
    // since this is only being called from the context
    // of scoreParagraph
    if (idkRe.test(tagName)) {
-      lengthBonus = chunks - 2
+      lengthBonus = chunks - 2;
    } else {
-      lengthBonus = chunks - 1.25
+      lengthBonus = chunks - 1.25;
    }

-    return Math.min(Math.max(lengthBonus, 0), 3)
-  } else {
-    return 0
+    return Math.min(Math.max(lengthBonus, 0), 3);
  }
+
+  return 0;
 }

--- a/src/extractors/generic/content/scoring/score-length.test.js
+++ b/src/extractors/generic/content/scoring/score-length.test.js
@ -1,22 +1,21 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';

-import { scoreLength } from './index'
+import { scoreLength } from './index';

 describe('Scoring utils', () => {
  describe('scoreLength(textLength, tagName)', () => {
-    it(`returns 0 if length < 50 chars`, () => {
-      assert.equal(scoreLength(30), 0)
-    })
+    it('returns 0 if length < 50 chars', () => {
+      assert.equal(scoreLength(30), 0);
+    });

-    it(`returns varying scores but maxes out at 3`, () => {
-      assert.equal(scoreLength(150), 1)
-      assert.equal(scoreLength(199), 1.98)
-      assert.equal(scoreLength(200), 2)
-      assert.equal(scoreLength(250), 3)
-      assert.equal(scoreLength(500), 3)
-      assert.equal(scoreLength(1500), 3)
-    })
-  })
-})
+    it('returns varying scores but maxes out at 3', () => {
+      assert.equal(scoreLength(150), 1);
+      assert.equal(scoreLength(199), 1.98);
+      assert.equal(scoreLength(200), 2);
+      assert.equal(scoreLength(250), 3);
+      assert.equal(scoreLength(500), 3);
+      assert.equal(scoreLength(1500), 3);
+    });
+  });
+});

--- a/src/extractors/generic/content/scoring/score-node.js
+++ b/src/extractors/generic/content/scoring/score-node.js
@ -1,29 +1,29 @@
-import { scoreParagraph } from './index'
+import { scoreParagraph } from './index';
 import {
  PARAGRAPH_SCORE_TAGS,
  CHILD_CONTENT_TAGS,
  BAD_TAGS,
-} from './constants'
+} from './constants';

 // Score an individual node. Has some smarts for paragraphs, otherwise
 // just scores based on tag.
 export default function scoreNode($node) {
-  const { tagName } = $node.get(0)
+  const { tagName } = $node.get(0);

  // TODO: Consider ordering by most likely.
  // E.g., if divs are a more common tag on a page,
  // Could save doing that regex test on every node – AP
  if (PARAGRAPH_SCORE_TAGS.test(tagName)) {
-    return scoreParagraph($node)
+    return scoreParagraph($node);
  } else if (tagName === 'div') {
-    return 5
+    return 5;
  } else if (CHILD_CONTENT_TAGS.test(tagName)) {
-    return 3
+    return 3;
  } else if (BAD_TAGS.test(tagName)) {
-    return -3
+    return -3;
  } else if (tagName === 'th') {
-    return -5
+    return -5;
  }

-  return 0
+  return 0;
 }
--- a/src/extractors/generic/content/scoring/score-node.test.js
+++ b/src/extractors/generic/content/scoring/score-node.test.js
@ -1,95 +1,94 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
+import HTML from './fixtures/html';

 import {
  scoreNode,
  scoreParagraph,
-} from './index'
+} from './index';


 describe('scoreNode(node)', () => {
-  it(`scores P, LI, SPAN, and PRE using scoreParagraph`, () => {
-    const html = '<p><em>Foo</em> bar</p>'
-    const $ = cheerio.load(html)
-    let node = $('p').first()
+  it('scores P, LI, SPAN, and PRE using scoreParagraph', () => {
+    const html = '<p><em>Foo</em> bar</p>';
+    const $ = cheerio.load(html);
+    const node = $('p').first();

-    const score = scoreNode(node)
-    const pScore = scoreParagraph(node)
+    const score = scoreNode(node);
+    const pScore = scoreParagraph(node);

-    assert.equal(score, pScore)
-    assert.equal(score, 0)
-  })
+    assert.equal(score, pScore);
+    assert.equal(score, 0);
+  });

-  it(`scores P, LI, SPAN, and PRE using scoreParagraph`, () => {
-    const $ = cheerio.load(HTML.score1)
-    let node = $('p').first()
+  it('scores P, LI, SPAN, and PRE using scoreParagraph', () => {
+    const $ = cheerio.load(HTML.score1);
+    const node = $('p').first();

-    const score = scoreNode(node)
-    const pScore = scoreParagraph(node)
+    const score = scoreNode(node);
+    const pScore = scoreParagraph(node);

-    assert.equal(score, pScore)
-    assert.equal(score, 1)
+    assert.equal(score, pScore);
+    assert.equal(score, 1);
+  });

-  })
+  it('scores P, LI, SPAN, and PRE using scoreParagraph', () => {
+    const $ = cheerio.load(HTML.score3);
+    const node = $('p').first();

-  it(`scores P, LI, SPAN, and PRE using scoreParagraph`, () => {
-    const $ = cheerio.load(HTML.score3)
-    let node = $('p').first()
+    const score = scoreNode(node);
+    const pScore = scoreParagraph(node);

-    const score = scoreNode(node)
-    const pScore = scoreParagraph(node)
+    assert.equal(score, pScore);
+    assert.equal(score, 3);
+  });

-    assert.equal(score, pScore)
-    assert.equal(score, 3)
-  })
+  it('scores P, LI, SPAN, and PRE using scoreParagraph', () => {
+    const $ = cheerio.load(HTML.score19);
+    const node = $('p').first();

-  it(`scores P, LI, SPAN, and PRE using scoreParagraph`, () => {
-    const $ = cheerio.load(HTML.score19)
-    let node = $('p').first()
+    const score = scoreNode(node);
+    const pScore = scoreParagraph(node);

-    const score = scoreNode(node)
-    const pScore = scoreParagraph(node)
+    assert.equal(score, pScore);
+    assert.equal(score, 19);
+  });

-    assert.equal(score, pScore)
-    assert.equal(score, 19)
-  })
+  it('scores divs with 5', () => {
+    const $ = cheerio.load(HTML.divScore5);
+    const node = $('div').first();

-  it(`scores divs with 5`, () => {
-    const $ = cheerio.load(HTML.divScore5)
-    let node = $('div').first()
+    const score = scoreNode(node);

-    const score = scoreNode(node)
+    assert.equal(score, 5);
+  });

-    assert.equal(score, 5)
-  })
+  it('scores the blockquote family with 3', () => {
+    const $ = cheerio.load(HTML.blockquoteScore3);
+    const node = $('blockquote').first();

-  it(`scores the blockquote family with 3`, () => {
-    const $ = cheerio.load(HTML.blockquoteScore3)
-    let node = $('blockquote').first()
+    const score = scoreNode(node);

-    const score = scoreNode(node)
+    assert.equal(score, 3);
+  });

-    assert.equal(score, 3)
-  })
+  it('scores a form with negative 3', () => {
+    const $ = cheerio.load(HTML.formScoreNeg3);
+    const node = $('form').first();

-  it(`scores a form with negative 3`, () => {
-    const $ = cheerio.load(HTML.formScoreNeg3)
-    let node = $('form').first()
+    const score = scoreNode(node);

-    const score = scoreNode(node)
+    assert.equal(score, -3);
+  });

-    assert.equal(score, -3)
-  })
+  it('scores a TH element with negative 5', () => {
+    const $ = cheerio.load(HTML.thScoreNeg5);
+    const node = $('th').first();

-  it(`scores a TH element with negative 5`, () => {
-    const $ = cheerio.load(HTML.thScoreNeg5)
-    let node = $('th').first()
+    const score = scoreNode(node);

-    const score = scoreNode(node)
-
-    assert.equal(score, -5)
-  })
-})
+    assert.equal(score, -5);
+  });
+});

--- a/src/extractors/generic/content/scoring/score-paragraph.js
+++ b/src/extractors/generic/content/scoring/score-paragraph.js
@ -1,35 +1,35 @@
 import {
  scoreCommas,
  scoreLength,
-} from './index'
+} from './index';

 // Score a paragraph using various methods. Things like number of
 // commas, etc. Higher is better.
 export default function scoreParagraph(node) {
-  let score = 1
-  const text = node.text().trim()
-  const textLength = text.length
+  let score = 1;
+  const text = node.text().trim();
+  const textLength = text.length;

  // If this paragraph is less than 25 characters, don't count it.
  if (textLength < 25) {
-    return 0
+    return 0;
  }

  // Add points for any commas within this paragraph
-  score = score + scoreCommas(text)
+  score += scoreCommas(text);

  // For every 50 characters in this paragraph, add another point. Up
  // to 3 points.
-  score = score + scoreLength(textLength)
+  score += scoreLength(textLength);

  // Articles can end with short paragraphs when people are being clever
  // but they can also end with short paragraphs setting up lists of junk
  // that we strip. This negative tweaks junk setup paragraphs just below
  // the cutoff threshold.
  if (text.slice(-1) === ':') {
-    score = score - 1
+    score -= 1;
  }

-  return score
+  return score;
 }

--- a/src/extractors/generic/content/scoring/score-paragraph.test.js
+++ b/src/extractors/generic/content/scoring/score-paragraph.test.js
@ -1,48 +1,48 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
+import HTML from './fixtures/html';
 import {
  scoreParagraph,
-} from './index'
+} from './index';

 describe('Scoring utils', () => {
  describe('scoreParagraph(node)', () => {
-    it(`returns 0 if text is less than 25 chars`, () => {
-      const html = '<p><em>Foo</em> bar</p>'
-      const $ = cheerio.load(html)
-      let node = $('p').first()
+    it('returns 0 if text is less than 25 chars', () => {
+      const html = '<p><em>Foo</em> bar</p>';
+      const $ = cheerio.load(html);
+      const node = $('p').first();

-      const score = scoreParagraph(node)
+      const score = scoreParagraph(node);

-      assert.equal(score, 0)
-    })
+      assert.equal(score, 0);
+    });

-    it(`returns 1 if text is > 25 chars and has 0 commas`, () => {
-      const $ = cheerio.load(HTML.score1)
-      let node = $('p').first()
+    it('returns 1 if text is > 25 chars and has 0 commas', () => {
+      const $ = cheerio.load(HTML.score1);
+      const node = $('p').first();

-      const score = scoreParagraph(node)
+      const score = scoreParagraph(node);

-      assert.equal(score, 1)
-    })
+      assert.equal(score, 1);
+    });

-    it(`returns 3 if text is > 25 chars and has 2 commas`, () => {
-      const $ = cheerio.load(HTML.score3)
-      let node = $('p').first()
+    it('returns 3 if text is > 25 chars and has 2 commas', () => {
+      const $ = cheerio.load(HTML.score3);
+      const node = $('p').first();

-      const score = scoreParagraph(node)
+      const score = scoreParagraph(node);

-      assert.equal(score, 3)
-    })
+      assert.equal(score, 3);
+    });

-    it(`returns 19 if text has 15 commas, ~600 chars`, () => {
-      const $ = cheerio.load(HTML.score19)
-      let node = $('p').first()
+    it('returns 19 if text has 15 commas, ~600 chars', () => {
+      const $ = cheerio.load(HTML.score19);
+      const node = $('p').first();

-      const score = scoreParagraph(node)
+      const score = scoreParagraph(node);

-      assert.equal(score, 19)
-    })
-  })
-})
+      assert.equal(score, 19);
+    });
+  });
+});
--- a/src/extractors/generic/content/scoring/set-score.js
+++ b/src/extractors/generic/content/scoring/set-score.js
@ -1,7 +1,6 @@

 export default function setScore($node, $, score) {
-  $node.attr('score', score)
-  return $node
+  $node.attr('score', score);
+  return $node;
 }

-
--- a/src/extractors/generic/content/scoring/set-score.test.js
+++ b/src/extractors/generic/content/scoring/set-score.test.js
@ -1,23 +1,22 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 import {
  setScore,
-  getScore
-} from './index'
+  getScore,
+} from './index';

 describe('Scoring utils', () => {
-
  describe('setScore(node, $, amount)', () => {
    it("sets the specified amount as the node's score", () => {
-      const $ = cheerio.load('<p>Foo</p>')
-      let $node = $('p').first()
+      const $ = cheerio.load('<p>Foo</p>');
+      let $node = $('p').first();

-      const newScore = 25
-      $node = setScore($node, $, newScore)
+      const newScore = 25;
+      $node = setScore($node, $, newScore);

-      const score = getScore($node)
-      assert(score, newScore)
-    })
-  })
-})
+      const score = getScore($node);
+      assert(score, newScore);
+    });
+  });
+});
--- a/src/extractors/generic/date-published/constants.js
+++ b/src/extractors/generic/date-published/constants.js
@ -3,23 +3,23 @@
 // should be lowercase for faster case-insensitive matching.
 // From most distinct to least distinct.
 export const DATE_PUBLISHED_META_TAGS = [
-    'article:published_time',
-    'displaydate',
-    'dc.date',
-    'dc.date.issued',
-    'rbpubdate',
-    'publish_date',
-    'pub_date',
-    'pagedate',
-    'pubdate',
-    'revision_date',
-    'doc_date',
-    'date_created',
-    'content_create_date',
-    'lastmodified',
-    'created',
-    'date'
-]
+  'article:published_time',
+  'displaydate',
+  'dc.date',
+  'dc.date.issued',
+  'rbpubdate',
+  'publish_date',
+  'pub_date',
+  'pagedate',
+  'pubdate',
+  'revision_date',
+  'doc_date',
+  'date_created',
+  'content_create_date',
+  'lastmodified',
+  'created',
+  'date',
+];

 // An ordered list of XPath Selectors to find
 // likely date published dates. From most explicit
@ -42,20 +42,20 @@ export const DATE_PUBLISHED_SELECTORS = [
  '#story .datetime',
  '.dateline',
  '.pubdate',
-]
+];

 // An ordered list of compiled regular expressions to find likely date
 // published dates from the URL. These should always have the first
 // reference be a date string that is parseable by dateutil.parser.parse
-const abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)'
+const abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';
 export const DATE_PUBLISHED_URL_RES = [
    // /2012/01/27/ but not /2012/01/293
-    new RegExp('/(20\\d{2}/\\d{2}/\\d{2})/', 'i'),
+  new RegExp('/(20\\d{2}/\\d{2}/\\d{2})/', 'i'),
    // 20120127 or 20120127T but not 2012012733 or 8201201733
    // /[^0-9](20\d{2}[01]\d[0-3]\d)([^0-9]|$)/i,
    // 2012-01-27
-    new RegExp('(20\\d{2}-[01]\\d-[0-3]\\d)', 'i'),
+  new RegExp('(20\\d{2}-[01]\\d-[0-3]\\d)', 'i'),
    // /2012/jan/27/
-    new RegExp(`/(20\\d{2}/${abbrevMonthsStr}/[0-3]\\d)/`, 'i')
-]
+  new RegExp(`/(20\\d{2}/${abbrevMonthsStr}/[0-3]\\d)/`, 'i'),
+];

--- a/src/extractors/generic/date-published/extractor.js
+++ b/src/extractors/generic/date-published/extractor.js
@ -1,37 +1,36 @@
+import { cleanDatePublished } from 'cleaners';
+import {
+  extractFromMeta,
+  extractFromSelectors,
+} from 'utils/dom';
+import { extractFromUrl } from 'utils/text';
+
 import {
  DATE_PUBLISHED_META_TAGS,
  DATE_PUBLISHED_SELECTORS,
  DATE_PUBLISHED_URL_RES,
-} from './constants'
-
-import { cleanDatePublished } from 'cleaners'
-
-import {
-  extractFromMeta,
-  extractFromSelectors,
-} from 'utils/dom'
-import { extractFromUrl } from 'utils/text'
+} from './constants';

 const GenericDatePublishedExtractor = {
  extract({ $, url, metaCache }) {
-    let datePublished
+    let datePublished;
    // First, check to see if we have a matching meta tag
    // that we can make use of.
    // Don't try cleaning tags from this string
-    datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false)
-    if(datePublished) return cleanDatePublished(datePublished)
+    datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);
+    if (datePublished) return cleanDatePublished(datePublished);

    // Second, look through our selectors looking for potential
    // date_published's.
-    datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS)
-    if(datePublished) return cleanDatePublished(datePublished)
+    datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
+    if (datePublished) return cleanDatePublished(datePublished);

    // Lastly, look to see if a dately string exists in the URL
-    datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES)
-    if(datePublished) return cleanDatePublished(datePublished)
+    datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
+    if (datePublished) return cleanDatePublished(datePublished);

-    return null
-  }
-}
+    return null;
+  },
+};

-export default GenericDatePublishedExtractor
+export default GenericDatePublishedExtractor;
--- a/src/extractors/generic/date-published/extractor.test.js
+++ b/src/extractors/generic/date-published/extractor.test.js
@ -1,97 +1,95 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import moment from 'moment'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import moment from 'moment';

-import HTML from './fixtures/html'
-import GenericDatePublishedExtractor from './extractor'
+import HTML from './fixtures/html';
+import GenericDatePublishedExtractor from './extractor';

 describe('GenericDatePublishedExtractor', () => {
  describe('extract($, metaCache)', () => {
    it('extracts datePublished from meta tags', () => {
-      const $ = cheerio.load(HTML.datePublishedMeta.test)
-      const metaCache = ["displaydate", "something-else"]
+      const $ = cheerio.load(HTML.datePublishedMeta.test);
+      const metaCache = ['displaydate', 'something-else'];
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url: '', metaCache }
-        )
+        );

-        assert.equal(
+      assert.equal(
          result,
          HTML.datePublishedMeta.result.toISOString()
-        )
-    })
+        );
+    });

    it('extracts datePublished from selectors', () => {
-      const $ = cheerio.load(HTML.datePublishedSelectors.test)
-      const metaCache = []
+      const $ = cheerio.load(HTML.datePublishedSelectors.test);
+      const metaCache = [];
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url: '', metaCache }
-        )
+        );

-        assert.equal(
+      assert.equal(
          result,
          HTML.datePublishedMeta.result.toISOString()
-        )
-    })
+        );
+    });

    it('extracts from url formatted /2012/08/01/etc', () => {
-      const $ = cheerio.load('<div></div>')
-      const metaCache = []
-      const url = 'https://example.com/2012/08/01/this-is-good'
+      const $ = cheerio.load('<div></div>');
+      const metaCache = [];
+      const url = 'https://example.com/2012/08/01/this-is-good';
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url, metaCache }
-        )
+        );

-        assert.equal(
+      assert.equal(
          result,
          new Date('2012/08/01').toISOString()
-        )
-    })
+        );
+    });

    it('extracts from url formatted /2020-01-01', () => {
-      const $ = cheerio.load('<div></div>')
-      const metaCache = []
-      const url = 'https://example.com/2020-01-01/this-is-good'
+      const $ = cheerio.load('<div></div>');
+      const metaCache = [];
+      const url = 'https://example.com/2020-01-01/this-is-good';
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url, metaCache }
-        )
+        );

-        assert.equal(
+      assert.equal(
          result,
          moment(new Date('2020-01-01')).toISOString()
-        )
-    })
+        );
+    });

    it('extracts from url formatted /2020/jan/01', () => {
-      const $ = cheerio.load('<div></div>')
-      const metaCache = []
-      const url = 'https://example.com/2020/jan/01/this-is-good'
+      const $ = cheerio.load('<div></div>');
+      const metaCache = [];
+      const url = 'https://example.com/2020/jan/01/this-is-good';
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url, metaCache }
-        )
+        );

-        assert.equal(
+      assert.equal(
          result,
          new Date('2020/jan/01').toISOString()
-        )
-    })
+        );
+    });

    it('returns null if no date can be found', () => {
-      const $ = cheerio.load('<div></div>')
-      const metaCache = []
+      const $ = cheerio.load('<div></div>');
+      const metaCache = [];
      const result =
        GenericDatePublishedExtractor.extract(
          { $, url: '', metaCache }
-        )
-
-      assert.equal(result, null)
-    })
-
-  })
-})
+        );

+      assert.equal(result, null);
+    });
+  });
+});

--- a/src/extractors/generic/date-published/fixtures/html.js
+++ b/src/extractors/generic/date-published/fixtures/html.js
@ -7,7 +7,7 @@ const HTML = {
        </head>
      </html>
    `,
-    result: new Date('1/1/2020 8:30 (EST)')
+    result: new Date('1/1/2020 8:30 (EST)'),
  },
  datePublishedSelectors: {
    test: `
@ -19,8 +19,8 @@ const HTML = {
        </head>
      </div>
    `,
-    result: new Date('1/1/2020 8:30 am (EST)')
+    result: new Date('1/1/2020 8:30 am (EST)'),
  },
-}
+};

-export default HTML
+export default HTML;
--- a/src/extractors/generic/dek/extractor.js
+++ b/src/extractors/generic/dek/extractor.js
@ -1,27 +1,28 @@
-import {
-  DEK_META_TAGS,
-  DEK_SELECTORS,
-  DEK_URL_RES,
-} from './constants'
+// import {
+//   DEK_META_TAGS,
+//   DEK_SELECTORS,
+//   DEK_URL_RES,
+// } from './constants';

-import { cleanDek } from 'cleaners'
+// import { cleanDek } from 'cleaners';

-import {
-  extractFromMeta,
-  extractFromSelectors,
-} from 'utils/dom'
+// import {
+//   extractFromMeta,
+//   extractFromSelectors,
+// } from 'utils/dom';

 // Currently there is only one selector for
 // deks. We should simply return null here
 // until we have a more robust generic option.
 // Below is the original source for this, for reference.
 const GenericDekExtractor = {
-  extract({ $, content, metaCache }) {
-    return null
-  }
-}
+  // extract({ $, content, metaCache }) {
+  extract() {
+    return null;
+  },
+};

-export default GenericDekExtractor
+export default GenericDekExtractor;

 // def extract_dek(self):
 //     # First, check to see if we have a matching meta tag that we can make
--- a/src/extractors/generic/dek/extractor.test.js
+++ b/src/extractors/generic/dek/extractor.test.js
@ -1,20 +1,18 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 // import HTML from './fixtures/html'
-import GenericDekExtractor from './extractor'
+import GenericDekExtractor from './extractor';

 describe('GenericDekExtractor', () => {
  describe('extract({ $, metaCache })', () => {
-
    it('returns null if no dek can be found', () => {
-      const $ = cheerio.load('<div></div>')
-      const metaCache = []
+      const $ = cheerio.load('<div></div>');
+      const metaCache = [];
      const result =
-        GenericDekExtractor.extract({ $, metaCache })
-
-      assert.equal(result, null)
-    })
+        GenericDekExtractor.extract({ $, metaCache });

-  })
-})
+      assert.equal(result, null);
+    });
+  });
+});
--- a/src/extractors/generic/index.js
+++ b/src/extractors/generic/index.js
@ -1,50 +1,50 @@
-import cheerio from 'cheerio'
+import cheerio from 'cheerio';

-import GenericContentExtractor from './content/extractor'
-import GenericTitleExtractor from './title/extractor'
-import GenericAuthorExtractor from './author/extractor'
-import GenericDatePublishedExtractor from './date-published/extractor'
-import GenericDekExtractor from './dek/extractor'
-import GenericLeadImageUrlExtractor from './lead-image-url/extractor'
-import GenericNextPageUrlExtractor from './next-page-url/extractor'
+import GenericContentExtractor from './content/extractor';
+import GenericTitleExtractor from './title/extractor';
+import GenericAuthorExtractor from './author/extractor';
+import GenericDatePublishedExtractor from './date-published/extractor';
+import GenericDekExtractor from './dek/extractor';
+import GenericLeadImageUrlExtractor from './lead-image-url/extractor';
+import GenericNextPageUrlExtractor from './next-page-url/extractor';

 const GenericExtractor = {
  // This extractor is the default for all domains
  domain: '*',
  title: GenericTitleExtractor.extract,
-  datePublished : GenericDatePublishedExtractor.extract,
+  datePublished: GenericDatePublishedExtractor.extract,
  author: GenericAuthorExtractor.extract,
  content: GenericContentExtractor.extract.bind(GenericContentExtractor),
  leadImageUrl: GenericLeadImageUrlExtractor.extract,
  dek: GenericDekExtractor.extract,
  nextPageUrl: GenericNextPageUrlExtractor.extract,

-  extract: function(options) {
-    let { html } = options
+  extract(options) {
+    const { html } = options;

    if (html) {
-      const $ = cheerio.load(html)
-      options.$ = $
+      const $ = cheerio.load(html);
+      options.$ = $;
    }

-    const title = this.title(options)
-    const datePublished = this.datePublished(options)
-    const author = this.author(options)
-    const content = this.content({ ...options, title })
-    const leadImageUrl = this.leadImageUrl(options)
-    const dek = this.dek(options)
-    const nextPageUrl = this.nextPageUrl(options)
+    const title = this.title(options);
+    const datePublished = this.datePublished(options);
+    const author = this.author(options);
+    const content = this.content({ ...options, title });
+    const leadImageUrl = this.leadImageUrl(options);
+    const dek = this.dek(options);
+    const nextPageUrl = this.nextPageUrl(options);

    return {
      title,
      author,
-      datePublished: datePublished ? datePublished : null,
+      datePublished: datePublished || null,
      dek,
      leadImageUrl,
      content,
      nextPageUrl,
-    }
-  }
-}
+    };
+  },
+};

-export default GenericExtractor
+export default GenericExtractor;
--- a/src/extractors/generic/index.test.js
+++ b/src/extractors/generic/index.test.js
@ -1,14 +1,12 @@
-import assert from 'assert'
-import fs from 'fs'
+import assert from 'assert';
+import fs from 'fs';

-import { clean } from 'test-helpers'
-
-import GenericExtractor from './index'
+import GenericExtractor from './index';

 describe('GenericExtractor', () => {
  describe('extract(opts)', () => {
-    it("extracts this old LA Times article", () => {
-      const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8')
+    it('extracts this old LA Times article', () => {
+      const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8');

      const {
        title,
@ -16,23 +14,23 @@ describe('GenericExtractor', () => {
        datePublished,
        dek,
      } = GenericExtractor.extract(
-        { url: "http://latimes.com", html, metaCache: [] }
-      )
+        { url: 'http://latimes.com', html, metaCache: [] }
+      );

-      assert.equal(author, null)
+      assert.equal(author, null);
      assert.equal(
        title,
        'California appears poised to be first to ban power-guzzling big-screen TVs'
-      )
+      );
      assert.equal(
        datePublished,
        '2009-10-14T04:00:00.000Z'
-      )
-      assert.equal(dek, null)
-    })
+      );
+      assert.equal(dek, null);
+    });

-    it("extracts html and returns the article title", () => {
-      const html = fs.readFileSync('../fixtures/wired.html', 'utf-8')
+    it('extracts html and returns the article title', () => {
+      const html = fs.readFileSync('../fixtures/wired.html', 'utf-8');

      const {
        author,
@ -40,18 +38,17 @@ describe('GenericExtractor', () => {
        datePublished,
        dek,
      } = GenericExtractor.extract(
-        { url: "http://wired.com", html, metaCache: [] }
-      )
+        { url: 'http://wired.com', html, metaCache: [] }
+      );

-      assert.equal(author, 'Eric Adams')
+      assert.equal(author, 'Eric Adams');
      assert.equal(
        title,
        'Airplane Tires Don’t Explode on Landing Because They Are Pumped!'
-      )
-      assert.equal(datePublished, null)
-      assert.equal(dek, null)
-    })
-
-  })
-})
+      );
+      assert.equal(datePublished, null);
+      assert.equal(dek, null);
+    });
+  });
+});

--- a/src/extractors/generic/lead-image-url/constants.js
+++ b/src/extractors/generic/lead-image-url/constants.js
@ -2,52 +2,52 @@
 // All attributes should be lowercase for faster case-insensitive matching.
 // From most distinct to least distinct.
 export const LEAD_IMAGE_URL_META_TAGS = [
-    'og:image',
-    'twitter:image',
-    'image_src',
-]
+  'og:image',
+  'twitter:image',
+  'image_src',
+];

 export const LEAD_IMAGE_URL_SELECTORS = [
  'link[rel=image_src]',
-]
+];

 export const POSITIVE_LEAD_IMAGE_URL_HINTS = [
-    'upload',
-    'wp-content',
-    'large',
-    'photo',
-    'wp-image',
-]
-export const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i')
+  'upload',
+  'wp-content',
+  'large',
+  'photo',
+  'wp-image',
+];
+export const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');

 export const NEGATIVE_LEAD_IMAGE_URL_HINTS = [
-    'spacer',
-    'sprite',
-    'blank',
-    'throbber',
-    'gradient',
-    'tile',
-    'bg',
-    'background',
-    'icon',
-    'social',
-    'header',
-    'hdr',
-    'advert',
-    'spinner',
-    'loader',
-    'loading',
-    'default',
-    'rating',
-    'share',
-    'facebook',
-    'twitter',
-    'theme',
-    'promo',
-    'ads',
-    'wp-includes',
-]
-export const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i')
+  'spacer',
+  'sprite',
+  'blank',
+  'throbber',
+  'gradient',
+  'tile',
+  'bg',
+  'background',
+  'icon',
+  'social',
+  'header',
+  'hdr',
+  'advert',
+  'spinner',
+  'loader',
+  'loading',
+  'default',
+  'rating',
+  'share',
+  'facebook',
+  'twitter',
+  'theme',
+  'promo',
+  'ads',
+  'wp-includes',
+];
+export const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');

-export const GIF_RE = /\.gif(\?.*)?$/i
-export const JPG_RE = /\.jpe?g(\?.*)?$/i
+export const GIF_RE = /\.gif(\?.*)?$/i;
+export const JPG_RE = /\.jpe?g(\?.*)?$/i;
--- a/src/extractors/generic/lead-image-url/extractor.js
+++ b/src/extractors/generic/lead-image-url/extractor.js
@ -1,14 +1,12 @@
-import 'babel-polyfill'
+import 'babel-polyfill';
+
+import { extractFromMeta } from 'utils/dom';
+import { cleanImage } from 'cleaners';

 import {
  LEAD_IMAGE_URL_META_TAGS,
  LEAD_IMAGE_URL_SELECTORS,
-} from './constants'
-
-import {
-  extractFromMeta,
-  extractFromSelectors
-} from 'utils/dom'
+} from './constants';

 import {
  scoreImageUrl,
@ -17,9 +15,7 @@ import {
  scoreBySibling,
  scoreByDimensions,
  scoreByPosition,
-} from './score-image'
-
-import { cleanImage } from 'cleaners'
+} from './score-image';

 // Given a resource, try to find the lead image URL from within
 // it. Like content and next page extraction, uses a scoring system
@ -31,86 +27,87 @@ import { cleanImage } from 'cleaners'
 //   * weird aspect ratio
 const GenericLeadImageUrlExtractor = {
  extract({ $, content, metaCache }) {
-    let imageUrl, cleanUrl
+    let cleanUrl;

    // Check to see if we have a matching meta tag that we can make use of.
    // Moving this higher because common practice is now to use large
    // images on things like Open Graph or Twitter cards.
    // images usually have for things like Open Graph.
-    imageUrl =
+    const imageUrl =
      extractFromMeta(
        $,
        LEAD_IMAGE_URL_META_TAGS,
        metaCache,
        false
-      )
+      );

    if (imageUrl) {
-      cleanUrl = cleanImage(imageUrl)
+      cleanUrl = cleanImage(imageUrl);

-      if (cleanUrl) return cleanUrl
+      if (cleanUrl) return cleanUrl;
    }

    // Next, try to find the "best" image via the content.
    // We'd rather not have to fetch each image and check dimensions,
    // so try to do some analysis and determine them instead.
-    const imgs = $('img', content).toArray()
-    let imgScores = {}
+    const imgs = $('img', content).toArray();
+    const imgScores = {};

    imgs.forEach((img, index) => {
-      const $img = $(img)
-      const src = $img.attr('src')
+      const $img = $(img);
+      const src = $img.attr('src');

-      if (!src) return
+      if (!src) return;

-      let score = scoreImageUrl(src)
-      score = score + scoreAttr($img)
-      score = score + scoreByParents($img)
-      score = score + scoreBySibling($img)
-      score = score + scoreByDimensions($img)
-      score = score + scoreByPosition(imgs, index)
+      let score = scoreImageUrl(src);
+      score += scoreAttr($img);
+      score += scoreByParents($img);
+      score += scoreBySibling($img);
+      score += scoreByDimensions($img);
+      score += scoreByPosition(imgs, index);

-      imgScores[src] = score
-    })
+      imgScores[src] = score;
+    });

    const [topUrl, topScore] =
      Reflect.ownKeys(imgScores).reduce((acc, key) =>
        imgScores[key] > acc[1] ? [key, imgScores[key]] : acc
-      , [null, 0])
+      , [null, 0]);

    if (topScore > 0) {
-      cleanUrl = cleanImage(topUrl)
+      cleanUrl = cleanImage(topUrl);

-      if (cleanUrl) return cleanUrl
+      if (cleanUrl) return cleanUrl;
    }

    // If nothing else worked, check to see if there are any really
    // probable nodes in the doc, like <link rel="image_src" />.
    for (const selector of LEAD_IMAGE_URL_SELECTORS) {
-      const $node = $(selector).first()
-      const src = $node.attr('src')
+      const $node = $(selector).first();
+      const src = $node.attr('src');
      if (src) {
-        cleanUrl = cleanImage(src)
-        if (cleanUrl) return cleanUrl
+        cleanUrl = cleanImage(src);
+        if (cleanUrl) return cleanUrl;
      }

-      const href = $node.attr('href')
+      const href = $node.attr('href');
      if (href) {
-        cleanUrl = cleanImage(href)
-        if (cleanUrl) return cleanUrl
+        cleanUrl = cleanImage(href);
+        if (cleanUrl) return cleanUrl;
      }

-      const value = $node.attr('value')
+      const value = $node.attr('value');
      if (value) {
-        cleanUrl = cleanImage(value)
-        if (cleanUrl) return cleanUrl
+        cleanUrl = cleanImage(value);
+        if (cleanUrl) return cleanUrl;
      }
    }

+    return null;
  },
-}
+};

-export default GenericLeadImageUrlExtractor
+export default GenericLeadImageUrlExtractor;

 // def extract(self):
 //     """
@ -182,7 +179,7 @@ export default GenericLeadImageUrlExtractor
 //         if sibling is not None:
 //             if sibling.tag == 'figcaption':
 //                 img_score += 25
-//     
+//
 //             sib_sig = ' '.join([sibling.get('id', ''),
 //                                 sibling.get('class', '')]).lower()
 //             if 'caption' in sib_sig:
@ -215,7 +212,7 @@ export default GenericLeadImageUrlExtractor
 //
 //         if img_width and img_height and not 'sprite' in img_path:
 //             area = img_width * img_height
-//     
+//
 //             if area < 5000: # Smaller than 50x100
 //                 logger.debug('Image with small area found. Subtracting 100.')
 //                 img_score -= 100
--- a/src/extractors/generic/lead-image-url/extractor.test.js
+++ b/src/extractors/generic/lead-image-url/extractor.test.js
@ -1,62 +1,62 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

-import HTML from './fixtures/html'
+import HTML from './fixtures/html';

-import GenericLeadImageUrlExtractor from './extractor'
+import GenericLeadImageUrlExtractor from './extractor';

 describe('GenericLeadImageUrlExtractor', () => {
  describe('extract({ $, content, metaCache })', () => {
    it('returns og:image first', () => {
-      const $ = cheerio.load(HTML.og.test)
-      const content = $('*').first()
-      const metaCache = ['og:image']
+      const $ = cheerio.load(HTML.og.test);
+      const content = $('*').first();
+      const metaCache = ['og:image'];

      const result =
        GenericLeadImageUrlExtractor.extract(
          { $, content, metaCache }
-        )
+        );

-      assert.equal(result, HTML.og.result)
-    })
+      assert.equal(result, HTML.og.result);
+    });

    it('returns twitter:image', () => {
-      const $ = cheerio.load(HTML.twitter.test)
-      const content = $('*').first()
-      const metaCache = ['twitter:image']
+      const $ = cheerio.load(HTML.twitter.test);
+      const content = $('*').first();
+      const metaCache = ['twitter:image'];

      const result =
        GenericLeadImageUrlExtractor.extract(
          { $, content, metaCache }
-        )
+        );

-      assert.equal(result, HTML.twitter.result)
-    })
+      assert.equal(result, HTML.twitter.result);
+    });

    it('finds images based on scoring', () => {
-      const $ = cheerio.load(HTML.scoring.test)
-      const content = $('*').first()
-      const metaCache = []
+      const $ = cheerio.load(HTML.scoring.test);
+      const content = $('*').first();
+      const metaCache = [];

      const result =
        GenericLeadImageUrlExtractor.extract(
          { $, content, metaCache }
-        )
+        );

-      assert.equal(result, HTML.scoring.result)
-    })
+      assert.equal(result, HTML.scoring.result);
+    });

    it('returns image based on selectors', () => {
-      const $ = cheerio.load(HTML.selectors.test)
-      const content = $('*').first()
-      const metaCache = []
+      const $ = cheerio.load(HTML.selectors.test);
+      const content = $('*').first();
+      const metaCache = [];

      const result =
        GenericLeadImageUrlExtractor.extract(
          { $, content, metaCache }
-        )
+        );

-      assert.equal(result, HTML.selectors.result)
-    })
-  })
-})
+      assert.equal(result, HTML.selectors.result);
+    });
+  });
+});
--- a/src/extractors/generic/lead-image-url/fixtures/html.js
+++ b/src/extractors/generic/lead-image-url/fixtures/html.js
@ -7,7 +7,7 @@ const HTML = {
        </head>
      </html>
    `,
-    result: `http://example.com/lead.jpg`
+    result: 'http://example.com/lead.jpg',
  },
  twitter: {
    test: `
@ -17,7 +17,7 @@ const HTML = {
        </head>
      </html>
    `,
-    result: `http://example.com/lead.jpg`
+    result: 'http://example.com/lead.jpg',
  },
  scoring: {
    test: `
@ -27,7 +27,7 @@ const HTML = {
        <img src="http://example.com/upload/whateverpic.png" />
      </div>
    `,
-    result: `http://example.com/upload/goodpic.jpg`
+    result: 'http://example.com/upload/goodpic.jpg',
  },
  selectors: {
    test: `
@ -35,8 +35,8 @@ const HTML = {
        <link rel="image_src" href="http://example.com/upload/goodpic.jpg">
      </div>
    `,
-    result: `http://example.com/upload/goodpic.jpg`
+    result: 'http://example.com/upload/goodpic.jpg',
  },
-}
+};

-export default HTML
+export default HTML;
--- a/src/extractors/generic/lead-image-url/score-image.js
+++ b/src/extractors/generic/lead-image-url/score-image.js
@ -3,123 +3,123 @@ import {
  NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,
  GIF_RE,
  JPG_RE,
-} from './constants'
+} from './constants';

-import { PHOTO_HINTS_RE } from '../content/scoring/constants'
+import { PHOTO_HINTS_RE } from '../content/scoring/constants';
+
+function getSig($node) {
+  return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;
+}

 // Scores image urls based on a variety of heuristics.
 export function scoreImageUrl(url) {
-  url = url.trim()
-  let score = 0
+  url = url.trim();
+  let score = 0;

  if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
-    score = score + 20
+    score += 20;
  }

  if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
-    score = score - 20
+    score -= 20;
  }

  // TODO: We might want to consider removing this as
  // gifs are much more common/popular than they once were
  if (GIF_RE.test(url)) {
-    score = score - 10
+    score -= 10;
  }

  if (JPG_RE.test(url)) {
-    score = score + 10
+    score += 10;
  }

  // PNGs are neutral.

-  return score
+  return score;
 }

 // Alt attribute usually means non-presentational image.
 export function scoreAttr($img) {
  if ($img.attr('alt')) {
-    return 5
-  } else {
-    return 0
+    return 5;
  }
+
+  return 0;
 }

 // Look through our parent and grandparent for figure-like
 // container elements, give a bonus if we find them
 export function scoreByParents($img) {
-  let score = 0
-  const $figParent = $img.parents('figure').first()
+  let score = 0;
+  const $figParent = $img.parents('figure').first();

  if ($figParent.length === 1) {
-    score = score + 25
+    score += 25;
  }

-  const $parent = $img.parent()
-  let $gParent
+  const $parent = $img.parent();
+  let $gParent;
  if ($parent.length === 1) {
-    $gParent = $parent.parent()
+    $gParent = $parent.parent();
  }

-  [$parent, $gParent].forEach($node => {
+  [$parent, $gParent].forEach(($node) => {
    if (PHOTO_HINTS_RE.test(getSig($node))) {
-      score = score + 15
+      score += 15;
    }
-  })
+  });

-  return score
+  return score;
 }

 // Look at our immediate sibling and see if it looks like it's a
 // caption. Bonus if so.
 export function scoreBySibling($img) {
-  let score = 0
-  const $sibling = $img.next()
-  const sibling = $sibling.get(0)
+  let score = 0;
+  const $sibling = $img.next();
+  const sibling = $sibling.get(0);

  if (sibling && sibling.tagName === 'figcaption') {
-    score = score + 25
+    score += 25;
  }

  if (PHOTO_HINTS_RE.test(getSig($sibling))) {
-    score = score + 15
+    score += 15;
  }

-  return score
+  return score;
 }

 export function scoreByDimensions($img) {
-  let score = 0
+  let score = 0;

-  const width = parseFloat($img.attr('width'))
-  const height = parseFloat($img.attr('height'))
-  const src = $img.attr('src')
+  const width = parseFloat($img.attr('width'));
+  const height = parseFloat($img.attr('height'));
+  const src = $img.attr('src');

  // Penalty for skinny images
  if (width && width <= 50) {
-    score = score - 50
+    score -= 50;
  }

  // Penalty for short images
  if (height && height <= 50) {
-    score = score - 50
+    score -= 50;
  }

  if (width && height && !src.includes('sprite')) {
-    const area = width * height
+    const area = width * height;
    if (area < 5000) { // Smaller than 50 x 100
-      score = score - 100
+      score -= 100;
    } else {
-      score = score + Math.round(area/1000)
+      score += Math.round(area / 1000);
    }
  }

-  return score
+  return score;
 }

 export function scoreByPosition($imgs, index) {
-  return $imgs.length/2 - index
-}
-
-function getSig($node) {
-  return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`
+  return ($imgs.length / 2) - index;
 }
--- a/src/extractors/generic/lead-image-url/score-image.test.js
+++ b/src/extractors/generic/lead-image-url/score-image.test.js
@ -1,5 +1,5 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import cheerio from 'cheerio';

 import {
  scoreImageUrl,
@ -8,61 +8,61 @@ import {
  scoreBySibling,
  scoreByDimensions,
  scoreByPosition,
-} from './score-image'
+} from './score-image';

 describe('scoreImageUrlUrl(url)', () => {
  it('gets 20 points for a positive lead img hint', () => {
-    const url = 'http://example.com/upload/img.png'
+    const url = 'http://example.com/upload/img.png';

-    assert.equal(scoreImageUrl(url), 20)
-  })
+    assert.equal(scoreImageUrl(url), 20);
+  });

  it('loses 20 points for a negative lead img hint', () => {
-    const url = 'http://example.com/sprite/foo/bar.png'
+    const url = 'http://example.com/sprite/foo/bar.png';

-    assert.equal(scoreImageUrl(url), -20)
-  })
+    assert.equal(scoreImageUrl(url), -20);
+  });

  it('loses 10 points for a gif', () => {
-    const url = 'http://example.com/foo/bar.gif'
+    const url = 'http://example.com/foo/bar.gif';

-    assert.equal(scoreImageUrl(url), -10)
+    assert.equal(scoreImageUrl(url), -10);

-    const url2 = 'http://example.com/foogif/bar'
+    const url2 = 'http://example.com/foogif/bar';

-    assert.equal(scoreImageUrl(url2), 0)
-  })
+    assert.equal(scoreImageUrl(url2), 0);
+  });

  it('gains 10 points for a jpg', () => {
-    const url = 'http://example.com/foo/bar.jpg'
-    assert.equal(scoreImageUrl(url), 10)
+    const url = 'http://example.com/foo/bar.jpg';
+    assert.equal(scoreImageUrl(url), 10);

-    const url2 = 'http://example.com/foo/bar.jpeg'
-    assert.equal(scoreImageUrl(url2), 10)
+    const url2 = 'http://example.com/foo/bar.jpeg';
+    assert.equal(scoreImageUrl(url2), 10);

-    const url3 = 'http://example.com/foojpg/bar'
-    assert.equal(scoreImageUrl(url3), 0)
+    const url3 = 'http://example.com/foojpg/bar';
+    assert.equal(scoreImageUrl(url3), 0);

-    const url4 = 'http://example.com/foo.jpg?bar=baz'
-    assert.equal(scoreImageUrl(url4), 10)
-  })
-})
+    const url4 = 'http://example.com/foo.jpg?bar=baz';
+    assert.equal(scoreImageUrl(url4), 10);
+  });
+});

 describe('scoreAttr($img)', () => {
  it('gets 5 points if the img node has an alt attribute', () => {
-    const $ = cheerio.load('<div><img alt="Wow" /></div>')
-    const $img = $('img').first()
+    const $ = cheerio.load('<div><img alt="Wow" /></div>');
+    const $img = $('img').first();

-    assert.equal(scoreAttr($img), 5)
-  })
+    assert.equal(scoreAttr($img), 5);
+  });

  it('gets 0 points if the img node has an alt attribute', () => {
-    const $ = cheerio.load('<div><img /></div>')
-    const $img = $('img').first()
+    const $ = cheerio.load('<div><img /></div>');
+    const $img = $('img').first();

-    assert.equal(scoreAttr($img), 0)
-  })
-})
+    assert.equal(scoreAttr($img), 0);
+  });
+});

 describe('scoreByParents($img)', () => {
  it('gets 25 points if it has a figure parent', () => {
@ -74,18 +74,18 @@ describe('scoreByParents($img)', () => {
            </div>
          </figure>
        </div>`
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByParents($img), 25)
-  })
+    assert.equal(scoreByParents($img), 25);
+  });

  it('gets 0 points if the img has no figure parent', () => {
-    const $ = cheerio.load('<div><img /></div>')
-    const $img = $('img').first()
+    const $ = cheerio.load('<div><img /></div>');
+    const $img = $('img').first();

-    assert.equal(scoreByParents($img), 0)
-  })
+    assert.equal(scoreByParents($img), 0);
+  });

  it('gets 15 points if parent or gparent has photo hints', () => {
    const $ = cheerio.load(
@ -96,12 +96,12 @@ describe('scoreByParents($img)', () => {
            </div>
          </div>
        </div>`
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByParents($img), 15)
-  })
-})
+    assert.equal(scoreByParents($img), 15);
+  });
+});

 describe('scoreBySibling($img)', () => {
  it('gets 25 points if its sibling is figcaption', () => {
@ -112,11 +112,11 @@ describe('scoreBySibling($img)', () => {
        <figcaption>Wow</figcaption>
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreBySibling($img), 25)
-  })
+    assert.equal(scoreBySibling($img), 25);
+  });

  it('gets 15 points if its sibling has photo hints', () => {
    const $ = cheerio.load(
@ -128,12 +128,12 @@ describe('scoreBySibling($img)', () => {
              </div>
          </div>
        </div>`
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreBySibling($img), 15)
-  })
-})
+    assert.equal(scoreBySibling($img), 15);
+  });
+});

 describe('scoreByDimensions($img)', () => {
  it('penalizes skinny images', () => {
@ -143,11 +143,11 @@ describe('scoreByDimensions($img)', () => {
        <img width="10" />
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByDimensions($img), -50)
-  })
+    assert.equal(scoreByDimensions($img), -50);
+  });

  it('penalizes short images', () => {
    const $ = cheerio.load(
@ -156,11 +156,11 @@ describe('scoreByDimensions($img)', () => {
        <img height="10" />
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByDimensions($img), -50)
-  })
+    assert.equal(scoreByDimensions($img), -50);
+  });

  it('ignores sprites', () => {
    const $ = cheerio.load(
@ -169,11 +169,11 @@ describe('scoreByDimensions($img)', () => {
        <img src="/sprite/etc/foo.png" width="1000" height="1000" />
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByDimensions($img), 0)
-  })
+    assert.equal(scoreByDimensions($img), 0);
+  });

  it('penalizes images with small areas', () => {
    const $ = cheerio.load(
@ -182,11 +182,11 @@ describe('scoreByDimensions($img)', () => {
        <img src="/etc/foo.png" width="60" height="60" />
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByDimensions($img), -100)
-  })
+    assert.equal(scoreByDimensions($img), -100);
+  });

  it('prefers the largest images', () => {
    const $ = cheerio.load(
@ -195,13 +195,12 @@ describe('scoreByDimensions($img)', () => {
        <img src="/etc/foo.png" width="1000" height="1000" />
      </div>
      `
-    )
-    const $img = $('img').first()
+    );
+    const $img = $('img').first();

-    assert.equal(scoreByDimensions($img), 1000)
-  })
-
-})
+    assert.equal(scoreByDimensions($img), 1000);
+  });
+});

 describe('scoreByPosition($imgs, index)', () => {
  it('gives higher scores to images that come first', () => {
@ -216,10 +215,10 @@ describe('scoreByPosition($imgs, index)', () => {
        <img width="10" />
      </div>
      `
-    )
-    const $imgs = $('img')
+    );
+    const $imgs = $('img');

-    assert.equal(scoreByPosition($imgs, 0), 3)
-  })
-})
+    assert.equal(scoreByPosition($imgs, 0), 3);
+  });
+});

--- a/src/extractors/generic/next-page-url/extractor.js
+++ b/src/extractors/generic/next-page-url/extractor.js
@ -1,25 +1,22 @@
-import 'babel-polyfill'
-import URL from 'url'
+import 'babel-polyfill';
+import URL from 'url';

 import {
-  pageNumFromUrl,
  articleBaseUrl,
  removeAnchor,
-} from 'utils/text'
-import scoreLinks from './scoring/score-links'
+} from 'utils/text';
+import scoreLinks from './scoring/score-links';

 // Looks for and returns next page url
 // for multi-page articles
 const GenericNextPageUrlExtractor = {
-  extract({ $, url, parsedUrl, previousUrls=[] }) {
-    parsedUrl = parsedUrl || URL.parse(url)
+  extract({ $, url, parsedUrl, previousUrls = [] }) {
+    parsedUrl = parsedUrl || URL.parse(url);

-    const currentPageNum = pageNumFromUrl(url)
-    const articleUrl = removeAnchor(url)
-    const baseUrl = articleBaseUrl(url, parsedUrl)
-    const { host } = parsedUrl
+    const articleUrl = removeAnchor(url);
+    const baseUrl = articleBaseUrl(url, parsedUrl);

-    const links = $('a[href]').toArray()
+    const links = $('a[href]').toArray();

    const scoredLinks = scoreLinks({
      links,
@ -27,28 +24,28 @@ const GenericNextPageUrlExtractor = {
      baseUrl,
      parsedUrl,
      $,
-      previousUrls
-    })
+      previousUrls,
+    });

    // If no links were scored, return null
-    if (!scoredLinks) return null
+    if (!scoredLinks) return null;

    // now that we've scored all possible pages,
    // find the biggest one.
    const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {
-      const scoredLink = scoredLinks[link]
-      return scoredLink.score > acc.score ? scoredLink : acc
-    }, { score: -100 })
+      const scoredLink = scoredLinks[link];
+      return scoredLink.score > acc.score ? scoredLink : acc;
+    }, { score: -100 });

    // If the score is less than 50, we're not confident enough to use it,
    // so we fail.
    if (topPage.score >= 50) {
-      return topPage.href
-    } else {
-      return null
+      return topPage.href;
    }
-  }
-}

+    return null;
+  },
+};

-export default GenericNextPageUrlExtractor
+
+export default GenericNextPageUrlExtractor;
--- a/src/extractors/generic/next-page-url/extractor.test.js
+++ b/src/extractors/generic/next-page-url/extractor.test.js
@ -1,34 +1,34 @@
-import assert from 'assert'
-import fs from 'fs'
-import cheerio from 'cheerio'
+import assert from 'assert';
+import fs from 'fs';
+import cheerio from 'cheerio';

-import GenericNextPageUrlExtractor from './extractor'
+import GenericNextPageUrlExtractor from './extractor';

 describe('GenericNextPageUrlExtractor', () => {
  it('returns most likely next page url', () => {
-    const html = fs.readFileSync('./fixtures/ars.html', 'utf8')
-    const $ = cheerio.load(html)
-    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/'
-    const next = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/2'
+    const html = fs.readFileSync('./fixtures/ars.html', 'utf8');
+    const $ = cheerio.load(html);
+    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/';
+    const next = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/2';

    const nextPage = GenericNextPageUrlExtractor.extract({
      $,
-      url
-    })
+      url,
+    });

-    assert.equal(nextPage, next)
-  })
+    assert.equal(nextPage, next);
+  });

  it('returns null if there is no likely next page', () => {
-    const html = `<div><p>HI</p></div>`
-    const $ = cheerio.load(html)
-    const url = 'http://example.com/foo/bar'
+    const html = '<div><p>HI</p></div>';
+    const $ = cheerio.load(html);
+    const url = 'http://example.com/foo/bar';

    const nextPage = GenericNextPageUrlExtractor.extract({
      $,
-      url
-    })
+      url,
+    });

-    assert.equal(nextPage, null)
-  })
-})
+    assert.equal(nextPage, null);
+  });
+});
--- a/src/extractors/generic/next-page-url/scoring/constants.js
+++ b/src/extractors/generic/next-page-url/scoring/constants.js
@ -1,38 +1,38 @@
-export const DIGIT_RE = /\d/
+export const DIGIT_RE = /\d/;

 // A list of words that, if found in link text or URLs, likely mean that
 // this link is not a next page link.
 export const EXTRANEOUS_LINK_HINTS = [
-    'print',
-    'archive',
-    'comment',
-    'discuss',
-    'e-mail',
-    'email',
-    'share',
-    'reply',
-    'all',
-    'login',
-    'sign',
-    'single',
-    'adx',
-    'entry-unrelated'
-]
-export const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i')
+  'print',
+  'archive',
+  'comment',
+  'discuss',
+  'e-mail',
+  'email',
+  'share',
+  'reply',
+  'all',
+  'login',
+  'sign',
+  'single',
+  'adx',
+  'entry-unrelated',
+];
+export const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');

 // Match any link text/classname/id that looks like it could mean the next
 // page. Things like: next, continue, >, >>, » but not >|, »| as those can
 // mean last page.
-export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i')
+export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');

 // Match any link text/classname/id that looks like it is an end link: things
 // like "first", "last", "end", etc.
-export const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i')
+export const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');

 // Match any link text/classname/id that looks like it means the previous
 // page.
-export const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i')
+export const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');

 // Match any phrase that looks like it could be page, or paging, or pagination
-export const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i')
+export const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');

--- a/src/extractors/generic/next-page-url/scoring/score-links.js
+++ b/src/extractors/generic/next-page-url/scoring/score-links.js
@ -1,27 +1,32 @@
-import 'babel-polyfill'
-import URL from 'url'
-import difflib from 'difflib'
+import 'babel-polyfill';
+import URL from 'url';

-import { range } from 'utils'
-import { isWordpress } from 'utils/dom'
+import { isWordpress } from 'utils/dom';
 import {
  removeAnchor,
  pageNumFromUrl,
-} from 'utils/text'
-import {
-  DIGIT_RE,
-  NEXT_LINK_TEXT_RE,
-  PREV_LINK_TEXT_RE,
-  EXTRANEOUS_LINK_HINTS_RE,
-  CAP_LINK_TEXT_RE,
-  PAGE_RE,
-} from './constants'
+} from 'utils/text';

 import {
-  NEGATIVE_SCORE_RE,
-  POSITIVE_SCORE_RE,
-} from 'utils/dom/constants'
-import { IS_DIGIT_RE } from 'utils/text/constants'
+  scoreSimilarity,
+  scoreLinkText,
+  scorePageInLink,
+  scoreExtraneousLinks,
+  scoreByParents,
+  scorePrevLink,
+  shouldScore,
+  scoreBaseUrl,
+  scoreCapLinks,
+  scoreNextLinkText,
+} from './utils';
+
+export function makeBaseRegex(baseUrl) {
+  return new RegExp(`^${baseUrl}`, 'i');
+}
+
+function makeSig($link, linkText) {
+  return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;
+}

 export default function scoreLinks({
  links,
@ -29,11 +34,11 @@ export default function scoreLinks({
  baseUrl,
  parsedUrl,
  $,
-  previousUrls=[]
+  previousUrls = [],
 }) {
-  parsedUrl = parsedUrl || URL.parse(articleUrl)
-  const baseRegex = makeBaseRegex(baseUrl)
-  const isWp = isWordpress($)
+  parsedUrl = parsedUrl || URL.parse(articleUrl);
+  const baseRegex = makeBaseRegex(baseUrl);
+  const isWp = isWordpress($);

  // Loop through all links, looking for hints that they may be next-page
  // links. Things like having "page" in their textContent, className or
@ -46,12 +51,12 @@ export default function scoreLinks({
    // Remove any anchor data since we don't do a good job
    // standardizing URLs (it's hard), we're going to do
    // some checking with and without a trailing slash
-    let href = removeAnchor(link.attribs.href)
-    const $link = $(link)
-    const linkText = $link.text()
+    const href = removeAnchor(link.attribs.href);
+    const $link = $(link);
+    const linkText = $link.text();

    if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {
-      return possiblePages
+      return possiblePages;
    }

    // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
@ -60,242 +65,29 @@ export default function scoreLinks({
        score: 0,
        linkText,
        href,
-      }
+      };
    } else {
-      possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`
-    }
-
-    const possiblePage = possiblePages[href]
-    const linkData = makeSig($link, linkText)
-    const pageNum = pageNumFromUrl(href)
-
-    let score = scoreBaseUrl(href, baseRegex)
-    score = score + scoreNextLinkText(linkData)
-    score = score + scoreCapLinks(linkData)
-    score = score + scorePrevLink(linkData)
-    score = score + scoreByParents($link)
-    score = score + scoreExtraneousLinks(href)
-    score = score + scorePageInLink(pageNum, isWp)
-    score = score + scoreLinkText(linkText, pageNum)
-    score = score + scoreSimilarity(score, articleUrl, href)
-
-    possiblePage.score = score
-
-    return possiblePages
-  }, {})
-
-  return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages
-}
-
-export function makeBaseRegex(baseUrl) {
-  return new RegExp(`^${baseUrl}`, 'i')
-}
-
-export function scoreSimilarity(score, articleUrl, href) {
-  // Do this last and only if we have a real candidate, because it's
-  // potentially expensive computationally. Compare the link to this
-  // URL using difflib to get the % similarity of these URLs. On a
-  // sliding scale, subtract points from this link based on
-  // similarity.
-  if (score > 0) {
-    const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio()
-    // Subtract .1 from diff_percent when calculating modifier,
-    // which means that if it's less than 10% different, we give a
-    // bonus instead. Ex:
-    //  3% different = +17.5 points
-    // 10% different = 0 points
-    // 20% different = -25 points
-    const diffPercent = 1.0 - similarity
-    const diffModifier = -(250 * (diffPercent - 0.2))
-    return score + diffModifier
-  }
-
-  return 0
-}
-
-export function scoreLinkText(linkText, pageNum) {
-  // If the link text can be parsed as a number, give it a minor
-  // bonus, with a slight bias towards lower numbered pages. This is
-  // so that pages that might not have 'next' in their text can still
-  // get scored, and sorted properly by score.
-  let score = 0
-
-  if (IS_DIGIT_RE.test(linkText.trim())) {
-    const linkTextAsNum = parseInt(linkText)
-    // If it's the first page, we already got it on the first call.
-    // Give it a negative score. Otherwise, up to page 10, give a
-    // small bonus.
-    if (linkTextAsNum < 2) {
-      score = -30
-    } else {
-      score = Math.max(0, 10 - linkTextAsNum)
-    }
-
-    // If it appears that the current page number is greater than
-    // this links page number, it's a very bad sign. Give it a big
-    // penalty.
-    if (pageNum && pageNum >= linkTextAsNum) {
-      score = score - 50
-    }
-  }
-
-  return score
-}
-
-export function scorePageInLink(pageNum, isWp) {
-  // page in the link = bonus. Intentionally ignore wordpress because
-  // their ?p=123 link style gets caught by this even though it means
-  // separate documents entirely.
-  if (pageNum && !isWp) {
-    return 50
-  }
-
-  return 0
-}
-
-export function scoreExtraneousLinks(href) {
-  // If the URL itself contains extraneous values, give a penalty.
-  if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {
-    return -25
-  }
-
-  return 0
-}
-
-export function scoreByParents($link) {
-  // If a parent node contains paging-like classname or id, give a
-  // bonus. Additionally, if a parent_node contains bad content
-  // (like 'sponsor'), give a penalty.
-  let $parent = $link.parent()
-  let positiveMatch = false
-  let negativeMatch = false
-  let score = 0
-
-  Array.from(range(0, 4)).forEach((_) => {
-    if ($parent.length === 0) {
-      return
+      possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;
    }

-    const parentData = makeSig($parent, ' ')
-
-    // If we have 'page' or 'paging' in our data, that's a good
-    // sign. Add a bonus.
-    if (!positiveMatch && PAGE_RE.test(parentData)) {
-      positiveMatch = true
-      score = score + 25
-    }
+    const possiblePage = possiblePages[href];
+    const linkData = makeSig($link, linkText);
+    const pageNum = pageNumFromUrl(href);

-    // If we have 'comment' or something in our data, and
-    // we don't have something like 'content' as well, that's
-    // a bad sign. Give a penalty.
-    if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)
-       && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {
-         if (!POSITIVE_SCORE_RE.test(parentData)) {
-           negativeMatch = true
-           score = score - 25
-         }
-    }
-
-    $parent = $parent.parent()
-  })
-
-  return score
-}
+    let score = scoreBaseUrl(href, baseRegex);
+    score += scoreNextLinkText(linkData);
+    score += scoreCapLinks(linkData);
+    score += scorePrevLink(linkData);
+    score += scoreByParents($link);
+    score += scoreExtraneousLinks(href);
+    score += scorePageInLink(pageNum, isWp);
+    score += scoreLinkText(linkText, pageNum);
+    score += scoreSimilarity(score, articleUrl, href);

-export function scorePrevLink(linkData) {
-  // If the link has something like "previous", its definitely
-  // an old link, skip it.
-  if (PREV_LINK_TEXT_RE.test(linkData)) {
-    return -200
-  }
+    possiblePage.score = score;

-  return 0
-}
-
-export function scoreCapLinks(linkData) {
-  // Cap links are links like "last", etc.
-  if (CAP_LINK_TEXT_RE.test(linkData)) {
-    // If we found a link like "last", but we've already seen that
-    // this link is also "next", it's fine. If it's not been
-    // previously marked as "next", then it's probably bad.
-    // Penalize.
-    if (NEXT_LINK_TEXT_RE.test(linkData)) {
-      return -65
-    }
-  }
-
-  return 0
-}
-
-export function scoreNextLinkText(linkData) {
-  // Things like "next", ">>", etc.
-  if (NEXT_LINK_TEXT_RE.test(linkData)) {
-    return 50
-  }
-
-  return 0
-}
+    return possiblePages;
+  }, {});

-export function scoreBaseUrl(href, baseRegex) {
-  // If the baseUrl isn't part of this URL, penalize this
-  // link. It could still be the link, but the odds are lower.
-  // Example:
-  // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
-  if (!baseRegex.test(href)) {
-    return -25
-  }
-
-  return 0
-}
-
-export function shouldScore(
-  href,
-  articleUrl,
-  baseUrl,
-  parsedUrl,
-  linkText,
-  previousUrls
-) {
-  // skip if we've already fetched this url
-  if(previousUrls.find((url) => href === url) !== undefined) {
-    return false
-  }
-
-  // If we've already parsed this URL, or the URL matches the base
-  // URL, or is empty, skip it.
-  if (!href || href === articleUrl || href === baseUrl) {
-    return false
-  }
-
-  const { hostname } = parsedUrl
-  const { hostname: linkHost } = URL.parse(href)
-
-  // Domain mismatch.
-  if (linkHost !== hostname) {
-    return false
-  }
-
-  // If href doesn't contain a digit after removing the base URL,
-  // it's certainly not the next page.
-  const fragment = href.replace(baseUrl, '')
-  if (!DIGIT_RE.test(fragment)) {
-    return false
-  }
-
-  // This link has extraneous content (like "comment") in its link
-  // text, so we skip it.
-  if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {
-    return false
-  }
-
-  // Next page link text is never long, skip if it is too long.
-  if (linkText.length > 25) {
-    return false
-  }
-
-  return true
-}
-
-function makeSig($link, linkText) {
-  return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`
+  return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;
 }
--- a/src/extractors/generic/next-page-url/scoring/score-links.test.js
+++ b/src/extractors/generic/next-page-url/scoring/score-links.test.js
@ -1,239 +1,42 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
-import URL from 'url'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';

-import scoreLinks from './score-links'
-import {
-  makeBaseRegex,
-  scoreBaseUrl,
-  scoreNextLinkText,
-  scoreCapLinks,
-  scorePrevLink,
-  scoreByParents,
-  scoreExtraneousLinks,
-  scorePageInLink,
-  scoreLinkText,
-  scoreSimilarity,
-  shouldScore,
-} from './score-links'
+import scoreLinks from './score-links';

 describe('scoreLinks(links)', () => {
  it('returns an object of scored links', () => {
-    const html = fs.readFileSync('./fixtures/ars.html', 'utf8')
+    const html = fs.readFileSync('./fixtures/ars.html', 'utf8');

-    const $ = cheerio.load(html)
-    const links = $('a[href]').toArray()
-    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/'
+    const $ = cheerio.load(html);
+    const links = $('a[href]').toArray();
+    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/';

    const scoredPages = scoreLinks({
      links,
      articleUrl: url,
      baseUrl: 'http://arstechnica.com',
      $,
-    })
+    });

-    assert.equal(typeof scoredPages, 'object')
-  })
+    assert.equal(typeof scoredPages, 'object');
+  });

  it('returns null if no possible pages', () => {
-    const html = `<div><p>Hello wow</p></div>`
+    const html = '<div><p>Hello wow</p></div>';

-    const $ = cheerio.load(html)
-    const links = $('a[href]').toArray()
-    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/'
+    const $ = cheerio.load(html);
+    const links = $('a[href]').toArray();
+    const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/';

    const scoredPages = scoreLinks({
      links,
      articleUrl: url,
      baseUrl: 'http://arstechnica.com',
      $,
-    })
+    });

-    assert.equal(scoredPages, null)
-  })
-})
+    assert.equal(scoredPages, null);
+  });
+});

-describe('scoreBaseUrl(href, baseRegex)', () => {
-  it('returns -25 if url does not contain the base url', () => {
-    const baseUrl = 'http://example.com/foo/bar'
-    const badUrl = 'http://foo.com/foo/bar'
-    const baseRegex = makeBaseRegex(baseUrl)
-
-    assert.equal(scoreBaseUrl(badUrl, baseRegex), -25)
-  })
-
-  it('returns 0 if url contains the base url', () => {
-    const baseUrl = 'http://example.com/foo/bar'
-    const badUrl = 'http://example.com/foo/bar/bat'
-    const baseRegex = makeBaseRegex(baseUrl)
-
-    assert.equal(scoreBaseUrl(badUrl, baseRegex), 0)
-  })
-})
-
-describe('scoreNextLinkText(linkData)', () => {
-  it('returns 50 if contains common next link text', () => {
-    const linkData = "foo bar Next page"
-
-    assert.equal(scoreNextLinkText(linkData), 50)
-  })
-
-  it('returns 0 if does not contain common next link text', () => {
-    const linkData = "foo bar WOW GREAT"
-
-    assert.equal(scoreNextLinkText(linkData), 0)
-  })
-})
-
-describe('scoreCapLinks(linkData)', () => {
-  it('returns -65 if cap link with next link text', () => {
-    const linkData = "foo next Last page"
-
-    assert.equal(scoreCapLinks(linkData), -65)
-  })
-
-  it('returns 0 if does not match a cap link', () => {
-    const linkData = "foo bar WOW GREAT"
-
-    assert.equal(scoreCapLinks(linkData), 0)
-  })
-})
-
-describe('scorePrevLink(linkData)', () => {
-  it('returns -200 if link matches previous text', () => {
-    const linkData = "foo next previous page"
-
-    assert.equal(scorePrevLink(linkData), -200)
-  })
-
-  it('returns 0 if does not match a prev link', () => {
-    const linkData = "foo bar WOW GREAT"
-
-    assert.equal(scoreCapLinks(linkData), 0)
-  })
-})
-
-describe('scoreByParents($link)', () => {
-  it('returns 25 if parent sig looks like a page', () => {
-    const html = `
-      <div>
-        <div class="next-page">
-          <a href="blah">Next page</a>
-        </div>
-      </div>
-    `
-    const $ = cheerio.load(html)
-    const $link = $('a').first()
-
-    assert.equal(scoreByParents($link), 25)
-  })
-
-  it('returns -25 if parent sig looks like a comment', () => {
-    const html = `
-      <div>
-        <div class="comment">
-          <a href="blah">Next page</a>
-        </div>
-      </div>
-    `
-    const $ = cheerio.load(html)
-    const $link = $('a').first()
-
-    assert.equal(scoreByParents($link), -25)
-  })
-
-})
-
-describe('scoreExtraneousLinks(href)', () => {
-  it('returns -25 if link matches extraneous text', () => {
-    const url = "http://example.com/email-link"
-
-    assert.equal(scoreExtraneousLinks(url), -25)
-  })
-
-  it('returns 0 if does not match extraneous text', () => {
-    const url = "http://example.com/asdf"
-
-    assert.equal(scoreExtraneousLinks(url), 0)
-  })
-})
-
-describe('scorePageInLink(pageNum, isWp)', () => {
-  it('returns 50 if link contains a page num', () => {
-    assert.equal(scorePageInLink(1, false), 50)
-  })
-
-  it('returns 0 if link contains no page num', () => {
-    assert.equal(scorePageInLink(null, false), 0)
-  })
-
-  it('returns 0 if page is wordpress', () => {
-    assert.equal(scorePageInLink(10, true), 0)
-  })
-
-})
-
-describe('scoreLinkText(linkText)', () => {
-  it('returns 8 if link contains the num 2', () => {
-    assert.equal(scoreLinkText('2', 0), 8)
-  })
-
-  it('returns 5 if link contains the num 5', () => {
-    assert.equal(scoreLinkText('5', 0), 5)
-  })
-
-  it('returns -30 if link contains the number 1', () => {
-    assert.equal(scoreLinkText('1', 0), -30)
-  })
-
-  it('penalizes -50 if pageNum is >= link text as num', () => {
-    assert.equal(scoreLinkText('4', 5), -44)
-  })
-
-})
-
-describe('scoreSimilarity(score, articleUrl, href)', () => {
-  it('returns a similarity bonus based on current score', () => {
-    const articleUrl = 'http://example.com/foo/bar'
-    const href = 'http://example.com/foo/bar/2'
-    const score = 25
-    assert.equal(
-      Math.round(scoreSimilarity(score, articleUrl, href)),
-      66
-    )
-  })
-
-  it('returns 0 is current score <= 0', () => {
-    const articleUrl = 'http://example.com/foo/bar'
-    const href = 'http://example.com/foo/bar/2'
-    const score = 0
-    assert.equal(scoreSimilarity(score, articleUrl, href), 0)
-  })
-
-})
-
-describe('shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)', () => {
-  it('returns false if href has already been fetched', () => {
-    const previousUrls = [ 'http://example.com/foo/bar/2' ]
-    const href = 'http://example.com/foo/bar/2'
-    const parsedUrl = URL.parse(href)
-
-    assert.equal(
-      shouldScore(href, '', '', parsedUrl, '', previousUrls),
-      false
-    )
-  })
-
-  it('returns true if href has not been fetched', () => {
-    const previousUrls = [ 'http://example.com/foo/bar' ]
-    const href = 'http://example.com/foo/bar/2'
-    const parsedUrl = URL.parse(href)
-
-    assert.equal(
-      shouldScore(href, '', '', parsedUrl, '', previousUrls),
-      true
-    )
-  })
-
-})
--- a/src/extractors/generic/next-page-url/scoring/utils/index.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/index.js
@ -0,0 +1,10 @@
+export { default as scoreSimilarity } from './score-similarity';
+export { default as scoreLinkText } from './score-link-text';
+export { default as scorePageInLink } from './score-page-in-link';
+export { default as scoreExtraneousLinks } from './score-extraneous-links';
+export { default as scoreByParents } from './score-by-parents';
+export { default as scorePrevLink } from './score-prev-link';
+export { default as shouldScore } from './should-score';
+export { default as scoreBaseUrl } from './score-base-url';
+export { default as scoreNextLinkText } from './score-next-link-text';
+export { default as scoreCapLinks } from './score-cap-links';
--- a/src/extractors/generic/next-page-url/scoring/utils/score-base-url.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-base-url.js
@ -0,0 +1,11 @@
+export default function scoreBaseUrl(href, baseRegex) {
+  // If the baseUrl isn't part of this URL, penalize this
+  // link. It could still be the link, but the odds are lower.
+  // Example:
+  // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
+  if (!baseRegex.test(href)) {
+    return -25;
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-base-url.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-base-url.test.js
@ -0,0 +1,23 @@
+import assert from 'assert';
+
+import scoreBaseUrl from './score-base-url';
+import { makeBaseRegex } from '../score-links';
+
+describe('scoreBaseUrl(href, baseRegex)', () => {
+  it('returns -25 if url does not contain the base url', () => {
+    const baseUrl = 'http://example.com/foo/bar';
+    const badUrl = 'http://foo.com/foo/bar';
+    const baseRegex = makeBaseRegex(baseUrl);
+
+    assert.equal(scoreBaseUrl(badUrl, baseRegex), -25);
+  });
+
+  it('returns 0 if url contains the base url', () => {
+    const baseUrl = 'http://example.com/foo/bar';
+    const badUrl = 'http://example.com/foo/bar/bat';
+    const baseRegex = makeBaseRegex(baseUrl);
+
+    assert.equal(scoreBaseUrl(badUrl, baseRegex), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js
@ -0,0 +1,52 @@
+import { range } from 'utils';
+import {
+  NEGATIVE_SCORE_RE,
+  POSITIVE_SCORE_RE,
+  PAGE_RE,
+} from 'utils/dom/constants';
+import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';
+
+function makeSig($link) {
+  return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;
+}
+
+export default function scoreByParents($link) {
+  // If a parent node contains paging-like classname or id, give a
+  // bonus. Additionally, if a parent_node contains bad content
+  // (like 'sponsor'), give a penalty.
+  let $parent = $link.parent();
+  let positiveMatch = false;
+  let negativeMatch = false;
+  let score = 0;
+
+  Array.from(range(0, 4)).forEach(() => {
+    if ($parent.length === 0) {
+      return;
+    }
+
+    const parentData = makeSig($parent, ' ');
+
+    // If we have 'page' or 'paging' in our data, that's a good
+    // sign. Add a bonus.
+    if (!positiveMatch && PAGE_RE.test(parentData)) {
+      positiveMatch = true;
+      score += 25;
+    }
+
+    // If we have 'comment' or something in our data, and
+    // we don't have something like 'content' as well, that's
+    // a bad sign. Give a penalty.
+    if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)
+       && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {
+      if (!POSITIVE_SCORE_RE.test(parentData)) {
+        negativeMatch = true;
+        score -= 25;
+      }
+    }
+
+    $parent = $parent.parent();
+  });
+
+  return score;
+}
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-by-parents.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-by-parents.test.js
@ -0,0 +1,35 @@
+import assert from 'assert';
+import cheerio from 'cheerio';
+
+import scoreByParents from './score-by-parents';
+
+describe('scoreByParents($link)', () => {
+  it('returns 25 if parent sig looks like a page', () => {
+    const html = `
+      <div>
+        <div class="next-page">
+          <a href="blah">Next page</a>
+        </div>
+      </div>
+    `;
+    const $ = cheerio.load(html);
+    const $link = $('a').first();
+
+    assert.equal(scoreByParents($link), 25);
+  });
+
+  it('returns -25 if parent sig looks like a comment', () => {
+    const html = `
+      <div>
+        <div class="comment">
+          <a href="blah">Next page</a>
+        </div>
+      </div>
+    `;
+    const $ = cheerio.load(html);
+    const $link = $('a').first();
+
+    assert.equal(scoreByParents($link), -25);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js
@ -0,0 +1,19 @@
+import {
+  NEXT_LINK_TEXT_RE,
+  CAP_LINK_TEXT_RE,
+} from '../constants';
+
+export default function scoreCapLinks(linkData) {
+  // Cap links are links like "last", etc.
+  if (CAP_LINK_TEXT_RE.test(linkData)) {
+    // If we found a link like "last", but we've already seen that
+    // this link is also "next", it's fine. If it's not been
+    // previously marked as "next", then it's probably bad.
+    // Penalize.
+    if (NEXT_LINK_TEXT_RE.test(linkData)) {
+      return -65;
+    }
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-cap-links.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-cap-links.test.js
@ -0,0 +1,18 @@
+import assert from 'assert';
+
+import scoreCapLinks from './score-cap-links';
+
+describe('scoreCapLinks(linkData)', () => {
+  it('returns -65 if cap link with next link text', () => {
+    const linkData = 'foo next Last page';
+
+    assert.equal(scoreCapLinks(linkData), -65);
+  });
+
+  it('returns 0 if does not match a cap link', () => {
+    const linkData = 'foo bar WOW GREAT';
+
+    assert.equal(scoreCapLinks(linkData), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js
@ -0,0 +1,10 @@
+import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';
+
+export default function scoreExtraneousLinks(href) {
+  // If the URL itself contains extraneous values, give a penalty.
+  if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {
+    return -25;
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.test.js
@ -0,0 +1,18 @@
+import assert from 'assert';
+
+import scoreExtraneousLinks from './score-extraneous-links';
+
+describe('scoreExtraneousLinks(href)', () => {
+  it('returns -25 if link matches extraneous text', () => {
+    const url = 'http://example.com/email-link';
+
+    assert.equal(scoreExtraneousLinks(url), -25);
+  });
+
+  it('returns 0 if does not match extraneous text', () => {
+    const url = 'http://example.com/asdf';
+
+    assert.equal(scoreExtraneousLinks(url), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-link-text.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-link-text.js
@ -0,0 +1,30 @@
+import { IS_DIGIT_RE } from 'utils/text/constants';
+
+export default function scoreLinkText(linkText, pageNum) {
+  // If the link text can be parsed as a number, give it a minor
+  // bonus, with a slight bias towards lower numbered pages. This is
+  // so that pages that might not have 'next' in their text can still
+  // get scored, and sorted properly by score.
+  let score = 0;
+
+  if (IS_DIGIT_RE.test(linkText.trim())) {
+    const linkTextAsNum = parseInt(linkText, 10);
+    // If it's the first page, we already got it on the first call.
+    // Give it a negative score. Otherwise, up to page 10, give a
+    // small bonus.
+    if (linkTextAsNum < 2) {
+      score = -30;
+    } else {
+      score = Math.max(0, 10 - linkTextAsNum);
+    }
+
+    // If it appears that the current page number is greater than
+    // this links page number, it's a very bad sign. Give it a big
+    // penalty.
+    if (pageNum && pageNum >= linkTextAsNum) {
+      score -= 50;
+    }
+  }
+
+  return score;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-link-text.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-link-text.test.js
@ -0,0 +1,22 @@
+import assert from 'assert';
+
+import scoreLinkText from './score-link-text';
+
+describe('scoreLinkText(linkText)', () => {
+  it('returns 8 if link contains the num 2', () => {
+    assert.equal(scoreLinkText('2', 0), 8);
+  });
+
+  it('returns 5 if link contains the num 5', () => {
+    assert.equal(scoreLinkText('5', 0), 5);
+  });
+
+  it('returns -30 if link contains the number 1', () => {
+    assert.equal(scoreLinkText('1', 0), -30);
+  });
+
+  it('penalizes -50 if pageNum is >= link text as num', () => {
+    assert.equal(scoreLinkText('4', 5), -44);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js
@ -0,0 +1,10 @@
+import { NEXT_LINK_TEXT_RE } from '../constants';
+
+export default function scoreNextLinkText(linkData) {
+  // Things like "next", ">>", etc.
+  if (NEXT_LINK_TEXT_RE.test(linkData)) {
+    return 50;
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.test.js
@ -0,0 +1,18 @@
+import assert from 'assert';
+
+import scoreNextLinkText from './score-next-link-text';
+
+describe('scoreNextLinkText(linkData)', () => {
+  it('returns 50 if contains common next link text', () => {
+    const linkData = 'foo bar Next page';
+
+    assert.equal(scoreNextLinkText(linkData), 50);
+  });
+
+  it('returns 0 if does not contain common next link text', () => {
+    const linkData = 'foo bar WOW GREAT';
+
+    assert.equal(scoreNextLinkText(linkData), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js
@ -0,0 +1,10 @@
+export default function scorePageInLink(pageNum, isWp) {
+  // page in the link = bonus. Intentionally ignore wordpress because
+  // their ?p=123 link style gets caught by this even though it means
+  // separate documents entirely.
+  if (pageNum && !isWp) {
+    return 50;
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.test.js
@ -0,0 +1,18 @@
+import assert from 'assert';
+
+import scorePageInLink from './score-page-in-link';
+
+describe('scorePageInLink(pageNum, isWp)', () => {
+  it('returns 50 if link contains a page num', () => {
+    assert.equal(scorePageInLink(1, false), 50);
+  });
+
+  it('returns 0 if link contains no page num', () => {
+    assert.equal(scorePageInLink(null, false), 0);
+  });
+
+  it('returns 0 if page is wordpress', () => {
+    assert.equal(scorePageInLink(10, true), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js
@ -0,0 +1,11 @@
+import { PREV_LINK_TEXT_RE } from '../constants';
+
+export default function scorePrevLink(linkData) {
+  // If the link has something like "previous", its definitely
+  // an old link, skip it.
+  if (PREV_LINK_TEXT_RE.test(linkData)) {
+    return -200;
+  }
+
+  return 0;
+}
--- a/src/extractors/generic/next-page-url/scoring/utils/score-prev-link.test.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-prev-link.test.js
@ -0,0 +1,18 @@
+import assert from 'assert';
+
+import scorePrevLink from './score-prev-link';
+
+describe('scorePrevLink(linkData)', () => {
+  it('returns -200 if link matches previous text', () => {
+    const linkData = 'foo next previous page';
+
+    assert.equal(scorePrevLink(linkData), -200);
+  });
+
+  it('returns 0 if does not match a prev link', () => {
+    const linkData = 'foo bar WOW GREAT';
+
+    assert.equal(scorePrevLink(linkData), 0);
+  });
+});
+
--- a/src/extractors/generic/next-page-url/scoring/utils/score-similarity.js
+++ b/src/extractors/generic/next-page-url/scoring/utils/score-similarity.js
@ -0,0 +1,23 @@
+import difflib from 'difflib';
+
+export default function scoreSimilarity(score, articleUrl, href) {
+  // Do this last and only if we have a real candidate, because it's
+  // potentially expensive computationally. Compare the link to this
+  // URL using difflib to get the % similarity of these URLs. On a
+  // sliding scale, subtract points from this link based on
+  // similarity.
+  if (score > 0) {
+    const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
+    // Subtract .1 from diff_percent when calculating modifier,
+    // which means that if it's less than 10% different, we give a
+    // bonus instead. Ex:
+    //  3% different = +17.5 points
+    // 10% different = 0 points
+    // 20% different = -25 points
+    const diffPercent = 1.0 - similarity;
+    const diffModifier = -(250 * (diffPercent - 0.2));
+    return score + diffModifier;
+  }
+
+  return 0;
+}
--- a/Show More
+++ b/Show More