From cc734c7e7d951e7e9d58d4200d1ac7bea7c8ea3b Mon Sep 17 00:00:00 2001 From: Adam Pash Date: Wed, 24 Aug 2016 15:50:39 -0400 Subject: [PATCH] chore: cleaned up repetative testing for dom --- src/extractor/generic/utils/dom/brs-to-ps.js | 2 +- .../generic/utils/dom/brs-to-ps.test.js | 7 +------ .../utils/dom/convert-to-paragraphs.test.js | 16 +++++++++------- src/extractor/generic/utils/dom/paragraphize.js | 2 +- .../generic/utils/dom/paragraphize.test.js | 4 ++++ .../utils/dom/strip-unlikely-candidates.test.js | 14 ++++---------- src/extractor/generic/utils/dom/test-helpers.js | 14 ++++++++++++++ 7 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/extractor/generic/utils/dom/brs-to-ps.js b/src/extractor/generic/utils/dom/brs-to-ps.js index 30e4424a..1bcd6901 100644 --- a/src/extractor/generic/utils/dom/brs-to-ps.js +++ b/src/extractor/generic/utils/dom/brs-to-ps.js @@ -1,4 +1,4 @@ -import paragraphize from './paragraphize' +import { paragraphize } from './index' // ## NOTES: // Another good candidate for refactoring/optimizing. diff --git a/src/extractor/generic/utils/dom/brs-to-ps.test.js b/src/extractor/generic/utils/dom/brs-to-ps.test.js index bce2ec24..40e7af87 100644 --- a/src/extractor/generic/utils/dom/brs-to-ps.test.js +++ b/src/extractor/generic/utils/dom/brs-to-ps.test.js @@ -1,17 +1,12 @@ import assert from 'assert' import cheerio from 'cheerio' -import { clean } from './test-helpers' +import { assertBeforeAndAfter } from './test-helpers' import HTML from '../fixtures/html' import { brsToPs } from './index' -function assertBeforeAndAfter(key, fn) { - const $ = cheerio.load(HTML[key].before) - assert.equal(clean(fn($).html()), clean(HTML[key].after)) -} - describe('Generic Extractor Utils', () => { describe('brsToPs(node)', () => { diff --git a/src/extractor/generic/utils/dom/convert-to-paragraphs.test.js b/src/extractor/generic/utils/dom/convert-to-paragraphs.test.js index 568322c9..88df5e34 100644 --- a/src/extractor/generic/utils/dom/convert-to-paragraphs.test.js +++ b/src/extractor/generic/utils/dom/convert-to-paragraphs.test.js @@ -1,22 +1,23 @@ import assert from 'assert' import cheerio from 'cheerio' -import { clean } from './test-helpers' +import { + assertBeforeAndAfter, + assertClean +} from './test-helpers' import HTML from '../fixtures/html' + import { convertToParagraphs } from './index' + import { convertNodeToP } from './convert-to-paragraphs' describe('Generic Extractor Utils', () => { describe('convertToParagraphs($)', () => { it("performs all conversions", () => { - const $ = cheerio.load(HTML.convertToParagraphs.before) - // Note: Result is not valid html - // Cheerio's parser will fix this elsewhere - const result = convertToParagraphs($).html() - assert.equal(clean(result), clean(HTML.convertToParagraphs.after)) + assertBeforeAndAfter('convertToParagraphs', convertToParagraphs) }) }) @@ -25,9 +26,10 @@ describe('Generic Extractor Utils', () => { it('takes a node with any tag and turns it into a P tag', () => { const $ = cheerio.load(HTML.convertNodeToP.before) const node = $('div').first() + const result = convertNodeToP(node, $).html() - assert.equal(clean(result), clean(HTML.convertNodeToP.after)) + assertClean(result, HTML.convertNodeToP.after) }) }) diff --git a/src/extractor/generic/utils/dom/paragraphize.js b/src/extractor/generic/utils/dom/paragraphize.js index 8970577e..3865e5e4 100644 --- a/src/extractor/generic/utils/dom/paragraphize.js +++ b/src/extractor/generic/utils/dom/paragraphize.js @@ -7,7 +7,7 @@ import { BLOCK_LEVEL_TAGS_RE } from '../constants' // If the node is a
, it treats the following inline siblings // as if they were its children. // -// :param node: The node to paragraphize +// :param node: The node to paragraphize; this is a raw node // :param $: The cheerio object to handle dom manipulation // :param br: Whether or not the passed node is a br diff --git a/src/extractor/generic/utils/dom/paragraphize.test.js b/src/extractor/generic/utils/dom/paragraphize.test.js index dec3b0ee..5cae9f59 100644 --- a/src/extractor/generic/utils/dom/paragraphize.test.js +++ b/src/extractor/generic/utils/dom/paragraphize.test.js @@ -13,16 +13,20 @@ describe('Generic Extractor Utils', () => { it("conversts a BR into P and moves inline contents to P tag after current parent", () => { const $ = cheerio.load(HTML.paragraphize.before) let node = $('br').get(0) + // note: result here is not valid html; will handle elsewhere let result = paragraphize(node, $, true).html() + assert.equal(clean(result), clean(HTML.paragraphize.after)) }) it("conversts a BR into P and stops when block element hit", () => { const $ = cheerio.load(HTML.paragraphizeBlock.before) let node = $('br').get(0) + // note: result here is not valid html; will handle elsewhere let result = paragraphize(node, $, true).html() + assert.equal(clean(result), clean(HTML.paragraphizeBlock.after)) }) diff --git a/src/extractor/generic/utils/dom/strip-unlikely-candidates.test.js b/src/extractor/generic/utils/dom/strip-unlikely-candidates.test.js index 0c9984ab..bc494175 100644 --- a/src/extractor/generic/utils/dom/strip-unlikely-candidates.test.js +++ b/src/extractor/generic/utils/dom/strip-unlikely-candidates.test.js @@ -1,7 +1,7 @@ import assert from 'assert' import cheerio from 'cheerio' -import { clean } from './test-helpers' +import { assertBeforeAndAfter } from './test-helpers' import HTML from '../fixtures/html' import { stripUnlikelyCandidates @@ -16,21 +16,15 @@ describe('Generic Extractor Utils', () => { }) it("strips unlikely matches from the doc", () => { - const $ = cheerio.load(HTML.whitelistMatch.before) - const stripped = clean(stripUnlikelyCandidates($).html()) - assert.equal(stripped, clean(HTML.whitelistMatch.after)) + assertBeforeAndAfter('whitelistMatch', stripUnlikelyCandidates) }) it("keeps likely matches even when they also match the blacklist", () => { - const $ = cheerio.load(HTML.whiteAndBlack.before) - const stripped = clean(stripUnlikelyCandidates($).html()) - assert.equal(stripped, clean(HTML.whiteAndBlack.after)) + assertBeforeAndAfter('whiteAndBlack', stripUnlikelyCandidates) }) it("removed likely matches when inside blacklist node", () => { - const $ = cheerio.load(HTML.whiteInsideBlack.before) - const stripped = clean(stripUnlikelyCandidates($).html()) - assert.equal(stripped, clean(HTML.whiteInsideBlack.after)) + assertBeforeAndAfter('whiteInsideBlack', stripUnlikelyCandidates) }) diff --git a/src/extractor/generic/utils/dom/test-helpers.js b/src/extractor/generic/utils/dom/test-helpers.js index 53f3ce1d..247f9dea 100644 --- a/src/extractor/generic/utils/dom/test-helpers.js +++ b/src/extractor/generic/utils/dom/test-helpers.js @@ -1,3 +1,17 @@ +import assert from 'assert' +import cheerio from 'cheerio' +import HTML from '../fixtures/html' + export function clean(string) { return string.trim().replace(/\r?\n|\r/g, '').replace(/\s+/g, ' ') } + +export function assertBeforeAndAfter(key, fn) { + const $ = cheerio.load(HTML[key].before) + assert.equal(clean(fn($).html()), clean(HTML[key].after)) +} + +export function assertClean(a, b) { + assert.equal(clean(a), clean(b)) +} +