import assert from 'assert'; import fs from 'fs'; import cheerio from 'cheerio'; import { assertClean } from 'test-helpers'; import { default as RootExtractor, select, cleanBySelectors, transformElements, } from './root-extractor'; import { NYMagExtractor } from './custom/nymag.com'; describe('RootExtractor', () => { it('only returns what the custom parser gives it if fallback is disabled', () => { const fullUrl = 'http://nymag.com/daily/intelligencer/2016/09/trump-discussed-usd25k-donation-with-florida-ag-not-fraud.html'; const html = fs.readFileSync('./src/extractors/custom/nymag.com/fixtures/test.html', 'utf8'); const $ = cheerio.load(html); const { url } = RootExtractor.extract( NYMagExtractor, { url: fullUrl, html, $, metaCache: [], fallback: false } ); assert.equal(url, null); }); }); describe('cleanBySelectors($content, $, { clean })', () => { it('removes provided selectors from the content', () => { const opts = { clean: ['.ad', '.share'] }; const html = `
This is some good content
Here are some words
Here are some words
Here are some words
Here are some words