You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/date-published/extractor.test.js

103 lines
2.8 KiB
JavaScript

import assert from 'assert';
import cheerio from 'cheerio';
import dayjs from 'dayjs';
import GenericDatePublishedExtractor from './extractor';
describe('GenericDatePublishedExtractor', () => {
describe('extract($, metaCache)', () => {
it('extracts datePublished from meta tags', () => {
const $ = cheerio.load(`
<html>
<head>
<meta name="displaydate" value="1/1/2020 8:30 (EST)" />
</head>
</html>
`);
const metaCache = ['displaydate', 'something-else'];
const result = GenericDatePublishedExtractor.extract({
$,
url: '',
metaCache,
});
assert.equal(result, new Date('1/1/2020 8:30 (EST)').toISOString());
});
it('extracts datePublished from selectors', () => {
const $ = cheerio.load(`
<div>
<div class="hentry">
<div class="updated">
1/1/2020 <span class="time">8:30am</span>
</div>
</head>
</div>
`);
const metaCache = [];
const result = GenericDatePublishedExtractor.extract({
$,
url: '',
metaCache,
});
assert.equal(result, new Date('1/1/2020 8:30 (EST)').toISOString());
});
it('extracts from url formatted /2012/08/01/etc', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2012/08/01/this-is-good';
const result = GenericDatePublishedExtractor.extract({
$,
url,
metaCache,
});
assert.equal(result, new Date('2012/08/01').toISOString());
});
it('extracts from url formatted /2020-01-01', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2020-01-01/this-is-good';
const result = GenericDatePublishedExtractor.extract({
$,
url,
metaCache,
});
assert.equal(result, dayjs('2020-01-01', 'YYYY-MM-DD').toISOString());
});
it('extracts from url formatted /2020/jan/01', () => {
// this works in Chrome, but not in PhantomJS, so disabling
// for browser testing
if (!cheerio.browser) {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const url = 'https://example.com/2020/jan/01/this-is-good';
const result = GenericDatePublishedExtractor.extract({
$,
url,
metaCache,
});
assert.equal(result, dayjs(new Date('2020 jan 01')).toISOString());
}
});
it('returns null if no date can be found', () => {
const $ = cheerio.load('<div></div>');
const metaCache = [];
const result = GenericDatePublishedExtractor.extract({
$,
url: '',
metaCache,
});
assert.equal(result, null);
});
});
});