You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
891 B
JavaScript
30 lines
891 B
JavaScript
import assert from 'assert';
|
|
import cheerio from 'cheerio';
|
|
import fs from 'fs';
|
|
|
|
import extractBestNode from 'extractors/generic/content/extract-best-node';
|
|
import extractCleanNode from './content';
|
|
|
|
describe('extractCleanNode(article, { $, cleanConditionally, title } })', () => {
|
|
it('cleans cruft out of a DOM node', () => {
|
|
const html = fs.readFileSync('./fixtures/wired.html', 'utf-8');
|
|
const $ = cheerio.load(html);
|
|
|
|
const opts = {
|
|
stripUnlikelyCandidates: true,
|
|
weightNodes: true,
|
|
cleanConditionally: true,
|
|
};
|
|
|
|
const bestNode = extractBestNode($, opts);
|
|
|
|
const cleanNode = extractCleanNode(bestNode, { $, opts });
|
|
|
|
const text = $(cleanNode).text()
|
|
.replace(/\n/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
assert.equal(text.length === 2656 || text.length === 2657, true);
|
|
});
|
|
});
|