refactor: limiting calls to $ function

Squashed commit of the following: commit c72da261cb5319d1eef207bff63b3c9cd49018df Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 9 15:28:43 2016 -0400 refactor: limiting calls to $ function commit eeae88247d844d5c6acbc529dbc3ce4d14e04191 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 9 15:14:33 2016 -0400 refactor: convertNodeTo; requires a cheerio object
8 years ago · 47ac7e9803
parent 81e9e7a317
commit 47ac7e9803
28 changed files with 148 additions and 142 deletions
--- a/TODO.md
+++ b/TODO.md
@ -3,12 +3,12 @@ TODO:
 - Rename all cleaners from cleanThing to clean
 - Make sure weightNodes flag is being passed properly
 - Get better sense of when cheerio returns a raw node and when a cheerio object
-  - Remove $ from function calls to getScore
  - Remove $ whenever possible
 - Test if .is method is faster than regex methods
 - Separate constants into activity-specific folders (dom, scoring)

 DONE:
+x Remove $ from function calls to getScore
 x remove all but attributes whitelist. research what attributes are important beyond SRC and href
 x remove logic for fetching meta attrs with custom props
 x cleaning embed and object nodes
--- a/src/extractors/generic/content/extractor.test.js
+++ b/src/extractors/generic/content/extractor.test.js
@ -10,11 +10,13 @@ describe('GenericContentExtractor', function() {
  this.timeout(1000000)
  describe('extract($, html, opts)', () => {
    it("extracts html and returns the article", () => {
-      const html = fs.readFileSync('./fixtures/latimes.html', 'utf-8')
+      const html = fs.readFileSync('./fixtures/wired.html', 'utf-8')

      // Array.from(range(1, 100)).map((i) => {
      //   console.log(i)
-      //   clean(GenericContentExtractor.extract(null, html))
+      //   clean(GenericContentExtractor.extract(
+      //     { $: null, html, url: 'http://example.com' }
+      //   ))
      // })
      const result = clean(GenericContentExtractor.extract(
        { $: null, html, url: 'http://example.com' }
--- a/src/extractors/generic/content/utils/scoring/add-score.js
+++ b/src/extractors/generic/content/utils/scoring/add-score.js
@ -1,5 +1,4 @@
 import {
-  getScore,
  getOrInitScore,
  setScore,
 } from './index'
--- a/src/extractors/generic/content/utils/scoring/add-score.test.js
+++ b/src/extractors/generic/content/utils/scoring/add-score.test.js
@ -10,18 +10,18 @@ describe('Scoring utils', () => {
  describe('addScore(node, $, amount)', () => {
    it(`adds the specified amount to a node's score`, () => {
      const $ = cheerio.load('<p score="25">Foo</p>')
-      let node = $('p').first()
+      let $node = $('p').first()

-      node = addScore(node, $, 25)
-      assert.equal(getScore(node, $), 50)
+      $node = addScore($node, $, 25)
+      assert.equal(getScore($node), 50)
    })

    it(`adds score if score not yet set (assumes score is 0)`, () => {
      const $ = cheerio.load('<p>Foo</p>')
-      let node = $('p').first()
+      let $node = $('p').first()

-      node = addScore(node, $, 25)
-      assert.equal(getScore(node, $), 25)
+      $node = addScore($node, $, 25)
+      assert.equal(getScore($node), 25)
    })

  })
--- a/src/extractors/generic/content/utils/scoring/add-to-parent.test.js
+++ b/src/extractors/generic/content/utils/scoring/add-to-parent.test.js
@ -11,12 +11,12 @@ describe('Scoring utils', () => {
    it(`adds 1/4 of a node's score it its parent`, () => {
      const html = '<div score="25"><p score="40">Foo</p></div>'
      const $ = cheerio.load(html)
-      let node = $('p').first()
+      let $node = $('p').first()

-      node = addToParent(node, $, 40)
+      $node = addToParent($node, $, 40)

-      assert.equal(getScore(node.parent(), $), 35)
-      assert.equal(getScore(node, $), 40)
+      assert.equal(getScore($node.parent()), 35)
+      assert.equal(getScore($node), 40)
    })
  })

--- a/src/extractors/generic/content/utils/scoring/find-top-candidate.test.js
+++ b/src/extractors/generic/content/utils/scoring/find-top-candidate.test.js
@ -14,35 +14,35 @@ describe('findTopCandidate($)', () => {
  it("finds the top candidate from simple case", () => {
    const $ = cheerio.load(HTML.findDom1)

-    const topCandidate = findTopCandidate($)
+    const $$topCandidate = findTopCandidate($)

-    assert.equal(getScore(topCandidate), 100)
+    assert.equal(getScore($$topCandidate), 100)
  })

  it("finds the top candidate from a nested case", () => {
    const $ = cheerio.load(HTML.findDom2)

-    const topCandidate = findTopCandidate($)
+    const $$topCandidate = findTopCandidate($)

    // this is wrapped in a div so checking
    // the score of the first child
-    assert.equal(getScore(topCandidate.children().first()), 50)
+    assert.equal(getScore($$topCandidate.children().first()), 50)
  })

  it("ignores tags like BR", () => {
    const $ = cheerio.load(HTML.findDom3)

-    const topCandidate = findTopCandidate($)
+    const $topCandidate = findTopCandidate($)

-    assert.equal(getScore(topCandidate), 50)
+    assert.equal(getScore($topCandidate), 50)
  })

  it("returns BODY if no candidates found", () => {
    const $ = cheerio.load(HTML.topBody)

-    const topCandidate = findTopCandidate($)
+    const $topCandidate = findTopCandidate($)

-    assert.equal(topCandidate.get(0).tagName, 'body')
+    assert.equal($topCandidate.get(0).tagName, 'body')
  })

  it("appends a sibling with a good enough score", () => {
@ -51,8 +51,8 @@ describe('findTopCandidate($)', () => {
    let $ = cheerio.load(html)
    $ = scoreContent($)

-    const topCandidate = findTopCandidate($)
-    assert.equal($(topCandidate).text().length, 3652)
+    const $topCandidate = findTopCandidate($)
+    assert.equal($($topCandidate).text().length, 3652)
  })
 })

--- a/src/extractors/generic/content/utils/scoring/get-or-init-score.js
+++ b/src/extractors/generic/content/utils/scoring/get-or-init-score.js
@ -8,19 +8,19 @@ import {
 // gets and returns the score if it exists
 // if not, initializes a score based on
 // the node's tag type
-export default function getOrInitScore(node, $, weightNodes=true) {
-  let score = getScore(node, $)
+export default function getOrInitScore($node, $, weightNodes=true) {
+  let score = getScore($node)

  if (score) {
    return score
  } else {
-    score = scoreNode(node)
+    score = scoreNode($node)

    if (weightNodes) {
-      score = score + getWeight(node)
+      score = score + getWeight($node)
    }

-    addToParent(node, $, score)
+    addToParent($node, $, score)
  }

  return score
--- a/src/extractors/generic/content/utils/scoring/get-or-init-score.test.js
+++ b/src/extractors/generic/content/utils/scoring/get-or-init-score.test.js
@ -55,7 +55,7 @@ describe('getOrInitScore(node, $)', () => {

      const score = getOrInitScore(node, $)

-      assert.equal(getScore(node.parent(), $), 16)
+      assert.equal(getScore(node.parent()), 16)
    })
  })
 })
--- a/src/extractors/generic/content/utils/scoring/get-score.js
+++ b/src/extractors/generic/content/utils/scoring/get-score.js
@ -1,6 +1,6 @@
 // returns the score of a node based on
 // the node's score attribute
 // returns null if no score set
-export default function getScore(node, $) {
-  return parseFloat(node.attr('score')) || null
+export default function getScore($node) {
+  return parseFloat($node.attr('score')) || null
 }
--- a/src/extractors/generic/content/utils/scoring/get-score.test.js
+++ b/src/extractors/generic/content/utils/scoring/get-score.test.js
@ -4,18 +4,18 @@ import cheerio from 'cheerio'
 import { getScore } from './index'

 describe('Scoring utils', () => {
-  describe('getScore(node, $)', () => {
+  describe('getScore($node)', () => {
    it("returns null if the node has no score set", () => {
      const $ = cheerio.load('<p>Foo</p>')
-      const node = $('p').first()
-      assert.equal(getScore(node, $), null)
+      const $node = $('p').first()
+      assert.equal(getScore($node), null)
    })

    it("returns 25 if the node has a score attr of 25", () => {
      const $ = cheerio.load('<p score="25">Foo</p>')
-      const node = $('p').first()
-      assert.equal(typeof getScore(node, $), 'number')
-      assert.equal(getScore(node, $), 25)
+      const $node = $('p').first()
+      assert.equal(typeof getScore($node), 'number')
+      assert.equal(getScore($node), 25)
    })

  })
--- a/src/extractors/generic/content/utils/scoring/score-content.js
+++ b/src/extractors/generic/content/utils/scoring/score-content.js
@ -3,7 +3,6 @@ import { HNEWS_CONTENT_SELECTORS } from '../constants'
 import {
  scoreNode,
  setScore,
-  getScore,
  getOrInitScore,
  addScore,
 } from './index'
@ -17,7 +16,7 @@ export default function scoreContent($, weightNodes=true) {
  // First, look for special hNews based selectors and give them a big
  // boost, if they exist
  HNEWS_CONTENT_SELECTORS.map(([parentSelector, childSelector]) => {
-    $(parentSelector).find(childSelector).each((index, node) => {
+    $(`${parentSelector} ${childSelector}`).each((index, node) => {
      addScore($(node).parent(parentSelector), $, 80)
    })
  })
@ -25,37 +24,38 @@ export default function scoreContent($, weightNodes=true) {
  $('p, pre').each((index, node) => {
    // The raw score for this paragraph, before we add any parent/child
    // scores.
-    const rawScore = scoreNode($(node))
-    node = setScore(node, $, getOrInitScore($(node), $, weightNodes))
+    let $node = $(node)
+    const rawScore = scoreNode($node)
+    $node = setScore($node, $, getOrInitScore($node, $, weightNodes))

    // Add the individual content score to the parent node
-    const parent = $(node).parent()
-    addScoreTo(parent, $, rawScore, weightNodes)
-    if (parent) {
+    const $parent = $node.parent()
+    addScoreTo($parent, $, rawScore, weightNodes)
+    if ($parent) {
      // Add half of the individual content score to the
      // grandparent
-      addScoreTo(parent.parent(), $, rawScore/2, weightNodes)
+      addScoreTo($parent.parent(), $, rawScore/2, weightNodes)
    }
  })

  return $
 }

-function convertSpans(node, $) {
-  if (node.get(0)) {
-    const { tagName } = node.get(0)
+function convertSpans($node, $) {
+  if ($node.get(0)) {
+    const { tagName } = $node.get(0)

    if (tagName === 'span') {
      // convert spans to divs
-      convertNodeTo(node, $, 'div')
+      convertNodeTo($node, $, 'div')
    }
  }
 }

-function addScoreTo(node, $, score, weightNodes) {
-  if (node) {
-    convertSpans(node, $)
-    addScore(node, $, score)
+function addScoreTo($node, $, score, weightNodes) {
+  if ($node) {
+    convertSpans($node, $)
+    addScore($node, $, score)
  }
 }

--- a/src/extractors/generic/content/utils/scoring/score-content.test.js
+++ b/src/extractors/generic/content/utils/scoring/score-content.test.js
@ -18,16 +18,14 @@ describe('scoreContent($, weightNodes)', () => {
    const $ = cheerio.load(HTML.hNews.before)
    const result = scoreContent($).html()

-    assert.equal(getScore($('div').first(), $), 140)
-    // assert.equal(getScore($('div').first(), $), 99)
+    assert.equal(getScore($('div').first()), 140)
  })

  it("is so-so about non-hNews content", () => {
    const $ = cheerio.load(HTML.nonHNews.before)
    const result = scoreContent($).html()

-    // assert.equal(getScore($('div').first(), $), 38)
-    assert.equal(getScore($('div').first(), $), 65)
+    assert.equal(getScore($('div').first()), 65)
  })

  it("scores this Wired article the same", () => {
@ -35,8 +33,7 @@ describe('scoreContent($, weightNodes)', () => {
    const $ = cheerio.load(html)
    const result = scoreContent($).html()

-    // assert.equal(getScore($('article').first(), $), 63.75)
-    assert.equal(getScore($('article').first(), $), 65.5)
+    assert.equal(getScore($('article').first()), 65.5)
  })

 })
--- a/src/extractors/generic/content/utils/scoring/score-node.js
+++ b/src/extractors/generic/content/utils/scoring/score-node.js
@ -7,14 +7,14 @@ import {

 // Score an individual node. Has some smarts for paragraphs, otherwise
 // just scores based on tag.
-export default function scoreNode(node) {
-  const { tagName } = node.get(0)
+export default function scoreNode($node) {
+  const { tagName } = $node.get(0)

  // TODO: Consider ordering by most likely.
  // E.g., if divs are a more common tag on a page,
  // Could save doing that regex test on every node – AP
  if (PARAGRAPH_SCORE_TAGS.test(tagName)) {
-    return scoreParagraph(node)
+    return scoreParagraph($node)
  } else if (tagName === 'div') {
    return 5
  } else if (CHILD_CONTENT_TAGS.test(tagName)) {
--- a/src/extractors/generic/content/utils/scoring/set-score.js
+++ b/src/extractors/generic/content/utils/scoring/set-score.js
@ -1,7 +1,7 @@

-export default function setScore(node, $, score) {
-  $(node).attr('score', score)
-  return node
+export default function setScore($node, $, score) {
+  $node.attr('score', score)
+  return $node
 }


--- a/src/extractors/generic/content/utils/scoring/set-score.test.js
+++ b/src/extractors/generic/content/utils/scoring/set-score.test.js
@ -11,12 +11,12 @@ describe('Scoring utils', () => {
  describe('setScore(node, $, amount)', () => {
    it("sets the specified amount as the node's score", () => {
      const $ = cheerio.load('<p>Foo</p>')
-      let node = $('p').first()
+      let $node = $('p').first()

      const newScore = 25
-      node = setScore(node, $, newScore)
+      $node = setScore($node, $, newScore)

-      const score = getScore(node, $)
+      const score = getScore($node)
      assert(score, newScore)
    })
  })
--- a/src/extractors/root-extractor.js
+++ b/src/extractors/root-extractor.js
@ -117,7 +117,7 @@ export function transformElements($content, $, { transforms }) {
    // If value is a string, convert directly
    if (typeof value === 'string') {
      $matches.each((index, node) => {
-        convertNodeTo(node, $, transforms[key])
+        convertNodeTo($(node), $, transforms[key])
      })
    } else if (typeof value === 'function') {
      // If value is function, apply function to node
@ -125,7 +125,7 @@ export function transformElements($content, $, { transforms }) {
        const result = value($(node), $)
        // If function returns a string, convert node to that value
        if (typeof result === 'string') {
-          convertNodeTo(node, $, result)
+          convertNodeTo($(node), $, result)
        }
      })
    }
--- a/src/utils/dom/clean-h-ones.js
+++ b/src/utils/dom/clean-h-ones.js
@ -5,12 +5,12 @@ import { convertNodeTo } from 'utils/dom'
 // strip them. Otherwise, turn 'em into H2s.
 export default function cleanHOnes(article, $) {
  // const hOnes = $.find('h1')
-  const hOnes = $('h1', article)
-  if (hOnes.length < 3) {
-    hOnes.each((index, node) => $(node).remove())
+  const $hOnes = $('h1', article)
+  if ($hOnes.length < 3) {
+    $hOnes.each((index, node) => $(node).remove())
  } else {
-    hOnes.each((index, node) => {
-      convertNodeTo(node, $, 'h2')
+    $hOnes.each((index, node) => {
+      convertNodeTo($(node), $, 'h2')
    })
  }

--- a/src/utils/dom/clean-headers.js
+++ b/src/utils/dom/clean-headers.js
@ -2,25 +2,26 @@ import { HEADER_TAG_LIST } from './constants'
 import { normalizeSpaces } from '../text'
 import { getWeight } from 'extractors/generic/content/utils/scoring'

-export default function cleanHeaders(article, $, title='') {
-  $(HEADER_TAG_LIST, article).each((index, header) => {
+export default function cleanHeaders($article, $, title='') {
+  $(HEADER_TAG_LIST, $article).each((index, header) => {
+    const $header = $(header)
    // Remove any headers that appear before all other p tags in the
    // document. This probably means that it was part of the title, a
    // subtitle or something else extraneous like a datestamp or byline,
    // all of which should be handled by other metadata handling.
-    if ($(header, article).prevAll('p').length === 0) {
-      return $(header).remove()
+    if ($($header, $article).prevAll('p').length === 0) {
+      return $header.remove()
    }

    // Remove any headers that match the title exactly.
    if (normalizeSpaces($(header).text()) === title) {
-      return $(header).remove()
+      return $header.remove()
    }

    // If this header has a negative weight, it's probably junk.
    // Get rid of it.
    if (getWeight($(header)) < 0) {
-      return $(header).remove()
+      return $header.remove()
    }
  })
  return $
--- a/src/utils/dom/clean-images.js
+++ b/src/utils/dom/clean-images.js
@ -1,30 +1,30 @@
 import { SPACER_RE } from './constants'

-export default function cleanImages(article, $) {
-  $(article).find('img').each((index, img) => {
-    img = $(img)
+export default function cleanImages($article, $) {
+  $article.find('img').each((index, img) => {
+    const $img = $(img)

-    cleanForHeight(img, $)
-    removeSpacers(img, $)
+    cleanForHeight($img, $)
+    removeSpacers($img, $)
  })

  return $
 }

-function cleanForHeight(img, $) {
-  const height = parseInt(img.attr('height'))
-  const width = parseInt(img.attr('width')) || 20
+function cleanForHeight($img, $) {
+  const height = parseInt($img.attr('height'))
+  const width = parseInt($img.attr('width')) || 20

  // Remove images that explicitly have very small heights or
  // widths, because they are most likely shims or icons,
  // which aren't very useful for reading.
  if ((height || 20) < 10 || width < 10) {
-    $(img).remove()
+    $img.remove()
  } else if (height) {
    // Don't ever specify a height on images, so that we can
    // scale with respect to width without screwing up the
    // aspect ratio.
-    img.removeAttr('height')
+    $img.removeAttr('height')
  }

  return $
@ -32,9 +32,9 @@ function cleanForHeight(img, $) {

 // Cleans out images where the source string matches transparent/spacer/etc
 // TODO This seems very aggressive - AP
-function removeSpacers(img, $) {
-  if (SPACER_RE.test(img.attr('src'))) {
-    $(img).remove()
+function removeSpacers($img, $) {
+  if (SPACER_RE.test($img.attr('src'))) {
+    $img.remove()
  }

  return $
--- a/src/utils/dom/clean-tags.js
+++ b/src/utils/dom/clean-tags.js
@ -17,62 +17,63 @@ import { linkDensity } from './index'
 // etc)
 //
 // Return this same doc.
-export default function cleanTags(article, $) {
-  $(CLEAN_CONDITIONALLY_TAGS, article).each((index, node) => {
-    let weight = getScore($(node))
+export default function cleanTags($article, $) {
+  $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {
+    const $node = $(node)
+    let weight = getScore($node)
    if (!weight) {
-      weight = getOrInitScore($(node), $)
-      setScore(weight, $)
+      weight = getOrInitScore($node, $)
+      setScore($node, $, weight)
    }

    // drop node if its weight is < 0
    if (weight < 0) {
-      $(node).remove()
+      $node.remove()
    } else {
      // deteremine if node seems like content
-      removeUnlessContent(node, $, weight)
+      removeUnlessContent($node, $, weight)
    }
  })

  return $
 }

-function removeUnlessContent(node, $, weight) {
+function removeUnlessContent($node, $, weight) {
    // Explicitly save entry-content-asset tags, which are
    // noted as valuable in the Publisher guidelines. For now
    // this works everywhere. We may want to consider making
    // this less of a sure-thing later.
-    if ($(node).hasClass('entry-content-asset')) {
+    if ($node.hasClass('entry-content-asset')) {
      return
    }

-    const content = normalizeSpaces($(node).text())
+    const content = normalizeSpaces($node.text())

    if (scoreCommas(content) < 10) {
-      const pCount = $('p', node).length
-      const inputCount = $('input', node).length
+      const pCount = $('p', $node).length
+      const inputCount = $('input', $node).length

      // Looks like a form, too many inputs.
      if (inputCount > (pCount / 3)) {
-        return $(node).remove()
+        return $node.remove()
      }

      const contentLength = content.length
-      const imgCount = $('img', node).length
+      const imgCount = $('img', $node).length

      // Content is too short, and there are no images, so
      // this is probably junk content.
      if (contentLength < 25 && imgCount === 0) {
-        return $(node).remove()
+        return $node.remove()
      }

-      const density = linkDensity($(node))
+      const density = linkDensity($node)

      // Too high of link density, is probably a menu or
      // something similar.
      // console.log(weight, density, contentLength)
      if (weight < 25 && density > 0.2 && contentLength > 75) {
-        return $(node).remove()
+        return $node.remove()
      }

      // Too high of a link density, despite the score being
@ -81,22 +82,23 @@ function removeUnlessContent(node, $, weight) {
        // Don't remove the node if it's a list and the
        // previous sibling starts with a colon though. That
        // means it's probably content.
-        const nodeIsList = node.tagName === 'ol' || node.tagName === 'ul'
+        const tagName = $node.get(0).tagName
+        const nodeIsList = tagName === 'ol' || tagName === 'ul'
        if (nodeIsList) {
-          const previousNode = $(node).prev()
+          const previousNode = $node.prev()
          if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
            return
          }
        }

-        return $(node).remove()
+        return $node.remove()
      }

-      const scriptCount = $('script', node).length
+      const scriptCount = $('script', $node).length

      // Too many script tags, not enough content.
      if (scriptCount > 0 && contentLength < 150) {
-        return $(node).remove()
+        return $node.remove()
      }
    }
 }
--- a/src/utils/dom/convert-node-to.js
+++ b/src/utils/dom/convert-node-to.js
@ -1,4 +1,4 @@
-export default function convertNodeTo(node, $, tag='p') {
-  $(node).replaceWith(`<${tag}>${$(node).contents()}</${tag}>`)
+export default function convertNodeTo($node, $, tag='p') {
+  $node.replaceWith(`<${tag}>${$node.contents()}</${tag}>`)
  return $
 }
--- a/src/utils/dom/convert-to-paragraphs.js
+++ b/src/utils/dom/convert-to-paragraphs.js
@ -24,10 +24,11 @@ export default function convertToParagraphs($) {

 function convertDivs($) {
  $('div').each((index, div) => {
-    const convertable = $(div).children()
+    const $div = $(div)
+    const convertable = $div.children()
      .not(DIV_TO_P_BLOCK_TAGS).length == 0
    if (convertable) {
-      convertNodeTo(div, $, 'p')
+      convertNodeTo($div, $, 'p')
    }
  })

@ -36,9 +37,10 @@ function convertDivs($) {

 function convertSpans($) {
  $('span').each((index, span) => {
-    const convertable = $(span).parents('p, div').length == 0
+    const $span = $(span)
+    const convertable = $span.parents('p, div').length == 0
    if (convertable) {
-      convertNodeTo(span, $, 'p')
+      convertNodeTo($span, $, 'p')
    }
  })

--- a/src/utils/dom/extract-from-selectors.js
+++ b/src/utils/dom/extract-from-selectors.js
@ -14,23 +14,23 @@ export default function extractFromSelectors(
    // If we didn't get exactly one of this selector, this may be
    // a list of articles or comments. Skip it.
    if (nodes.length === 1) {
-      const node = nodes[0]
+      const $node = $(nodes[0])

      // If it has a number of children, it's more likely a container
      // element. Skip it.
-      if ($(node).children().length > maxChildren) {
+      if ($node.children().length > maxChildren) {
        continue
      }
      // If it looks to be within a comment, skip it.
-      if (withinComment(node, $)) {
+      if (withinComment($node, $)) {
        continue
      }

      let content
      if (textOnly) {
-        content = $(node).text()
+        content = $node.text()
      } else {
-        content = $(node).html()
+        content = $node.html()
      }

      if (content) {
--- a/src/utils/dom/link-density.js
+++ b/src/utils/dom/link-density.js
@ -1,10 +1,10 @@
 // Determines what percentage of the text
 // in a node is link text
 // Takes a node, returns a float
-export function linkDensity(node) {
-  const totalTextLength = textLength(node.text())
+export function linkDensity($node) {
+  const totalTextLength = textLength($node.text())

-  const linkText = node.find('a').text()
+  const linkText = $node.find('a').text()
  const linkLength = textLength(linkText)

  if (totalTextLength > 0) {
--- a/src/utils/dom/node-is-sufficient.js
+++ b/src/utils/dom/node-is-sufficient.js
@ -2,6 +2,6 @@
 // param: node (a cheerio node)
 // return: boolean

-export default function nodeIsSufficient(node) {
-  return node.text().trim().length >= 100
+export default function nodeIsSufficient($node) {
+  return $node.text().trim().length >= 100
 }
--- a/src/utils/dom/paragraphize.js
+++ b/src/utils/dom/paragraphize.js
@ -12,6 +12,8 @@ import { BLOCK_LEVEL_TAGS_RE } from './constants'
 // :param br: Whether or not the passed node is a br

 export default function paragraphize(node, $, br=false) {
+  const $node = $(node)
+
  if (br) {
    let sibling = node.nextSibling
    let p = $('<p></p>')
@ -28,8 +30,8 @@ export default function paragraphize(node, $, br=false) {
      sibling = nextSibling
    }

-    $(node).replaceWith(p)
-    $(node).remove()
+    $node.replaceWith(p)
+    $node.remove()
    return $
  } else {
    // Not currently implemented. May not need to; can leverage
--- a/src/utils/dom/strip-unlikely-candidates.js
+++ b/src/utils/dom/strip-unlikely-candidates.js
@ -18,9 +18,10 @@ export default function stripUnlikelyCandidates($) {
  //
  //  :param $: a cheerio object to strip nodes from
  //  :return $: the cleaned cheerio object
-  $('*').not('a').each(function(index, element) {
-    const classes = $(element).attr('class')
-    const id = $(element).attr('id')
+  $('*').not('a').each(function(index, node) {
+    const $node = $(node)
+    const classes = $node.attr('class')
+    const id = $node.attr('id')
    if (!id && !classes) {
      return
    } else {
@ -28,7 +29,7 @@ export default function stripUnlikelyCandidates($) {
      if (CANDIDATES_WHITELIST.test(classAndId)) {
        return
      } else if (CANDIDATES_BLACKLIST.test(classAndId)) {
-        return $(element).remove()
+        return $node.remove()
      }
    }
  })
--- a/src/utils/dom/within-comment.js
+++ b/src/utils/dom/within-comment.js
@ -1,7 +1,7 @@
-export default function withinComment(node, $) {
-  const parents = $(node).parents().toArray()
+export default function withinComment($node, $) {
+  const parents = $node.parents().toArray()
  const commentParent = parents.find((parent) => {
-    const classAndId = `${$(parent).attr('class')} ${$(parent).attr('id')}`
+    const classAndId = `${parent.attribs['class']} ${parent.attribs['id']}`
    return classAndId.includes('comment')
  })