You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/date-published/extractor.test.js

96 lines
2.5 KiB
JavaScript

import assert from 'assert';
import cheerio from 'cheerio';
import moment from 'moment';
import HTML from './fixtures/html';
import GenericDatePublishedExtractor from './extractor';
describe('GenericDatePublishedExtractor', () => {
describe('extract($, metaCache)', () => {
it('extracts datePublished from meta tags', () => {
const $ = cheerio.load(HTML.datePublishedMeta.test);
const metaCache = ['displaydate', 'something-else'];
const result =
GenericDatePublishedExtractor.extract(
{ $, url: '', metaCache }
);
assert.equal(
result,
HTML.datePublishedMeta.result.toISOString()
);
});
it('extracts datePublished from selectors', () => {
const $ = cheerio.load(HTML.datePublishedSelectors.test);
const metaCache = [];
const result =
GenericDatePublishedExtractor.extract(
{ $, url: '', metaCache }
);
assert.equal(
result,
HTML.datePublishedMeta.result.toISOString()
);
});
it('extracts from url formatted /2012/08/01/etc', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2012/08/01/this-is-good';
const result =
GenericDatePublishedExtractor.extract(
{ $, url, metaCache }
);
assert.equal(
result,
new Date('2012/08/01').toISOString()
);
});
it('extracts from url formatted /2020-01-01', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2020-01-01/this-is-good';
const result =
GenericDatePublishedExtractor.extract(
{ $, url, metaCache }
);
assert.equal(
result,
moment(new Date('2020-01-01')).toISOString()
);
});
it('extracts from url formatted /2020/jan/01', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2020/jan/01/this-is-good';
const result =
GenericDatePublishedExtractor.extract(
{ $, url, metaCache }
);
assert.equal(
result,
new Date('2020/jan/01').toISOString()
);
});
it('returns null if no date can be found', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const result =
GenericDatePublishedExtractor.extract(
{ $, url: '', metaCache }
);
assert.equal(result, null);
});
});
});