diff --git a/package.json b/package.json index f5df5be4..2c06cd91 100644 --- a/package.json +++ b/package.json @@ -132,7 +132,6 @@ }, "bundleDependencies": [ "jquery", - "moment-timezone", "browser-request" ], "browser": { @@ -140,8 +139,7 @@ "cheerio": "./src/shims/cheerio-query", "jquery": "./node_modules/jquery/dist/jquery.min.js", "postman-request": "browser-request", - "iconv-lite": "./src/shims/iconv-lite", - "moment-timezone": "./node_modules/moment-timezone/builds/moment-timezone-with-data-2012-2022.min.js" + "iconv-lite": "./src/shims/iconv-lite" }, "husky": { "hooks": { diff --git a/src/cleaners/date-published.js b/src/cleaners/date-published.js index 70017782..001f9e32 100644 --- a/src/cleaners/date-published.js +++ b/src/cleaners/date-published.js @@ -45,11 +45,17 @@ export function createDate(dateString, timezone, format) { } if (timezone) { - return format - ? dayjs.tz(dateString, format, timezone) - : dayjs.tz(new Date(dateString), timezone); + try { + return format + ? dayjs.tz(dateString, format, timezone) + : dayjs.tz(dayjs(dateString).format('YYYY-MM-DD HH:mm:ss'), timezone); + } catch (error) { + // return an intentionally invalid dayjs object, + // in case the input needs to be cleaned first + return dayjs(''); + } } - return format ? dayjs(dateString, format) : dayjs(new Date(dateString)); + return format ? dayjs(dateString, format) : dayjs(dateString); } // Take a date published string, and hopefully return a date out of @@ -70,7 +76,7 @@ export default function cleanDatePublished( if (!date.isValid()) { dateString = cleanDateString(dateString); - date = createDate(dateString, timezone, format); + date = createDate(dateString, timezone); } return date.isValid() ? date.toISOString() : null; diff --git a/src/extractors/custom/clinicaltrials.gov/index.test.js b/src/extractors/custom/clinicaltrials.gov/index.test.js index 73f881fb..ea16bb4a 100644 --- a/src/extractors/custom/clinicaltrials.gov/index.test.js +++ b/src/extractors/custom/clinicaltrials.gov/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -59,7 +58,7 @@ describe('ClinicaltrialsGovExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(moment(date_published).format('YYYY-MM-DD'), '2018-11-21'); + assert.equal(date_published, '2018-11-21T05:00:00.000Z'); }); it('returns the content', async () => { diff --git a/src/extractors/custom/fortune.com/index.js b/src/extractors/custom/fortune.com/index.js index 199cf24a..1fc08280 100644 --- a/src/extractors/custom/fortune.com/index.js +++ b/src/extractors/custom/fortune.com/index.js @@ -11,8 +11,6 @@ export const FortuneComExtractor = { date_published: { selectors: ['.MblGHNMJ'], - - timezone: 'UTC', }, lead_image_url: { diff --git a/src/extractors/custom/genius.com/index.test.js b/src/extractors/custom/genius.com/index.test.js index 0468f65f..ebefdd94 100644 --- a/src/extractors/custom/genius.com/index.test.js +++ b/src/extractors/custom/genius.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -51,11 +50,10 @@ describe('GeniusComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/genius.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '1984-06-25'); + assert.equal(date_published, '1984-06-25T04:00:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/news.nationalgeographic.com/index.js b/src/extractors/custom/news.nationalgeographic.com/index.js index 5c8ed807..b0e02d4a 100644 --- a/src/extractors/custom/news.nationalgeographic.com/index.js +++ b/src/extractors/custom/news.nationalgeographic.com/index.js @@ -11,8 +11,7 @@ export const NewsNationalgeographicComExtractor = { date_published: { selectors: [['meta[name="article:published_time"]', 'value']], - format: 'ddd MMM DD HH:mm:ss zz YYYY', - timezone: 'EST', + timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/news.nationalgeographic.com/index.test.js b/src/extractors/custom/news.nationalgeographic.com/index.test.js index 11b400d9..863cfd6c 100644 --- a/src/extractors/custom/news.nationalgeographic.com/index.test.js +++ b/src/extractors/custom/news.nationalgeographic.com/index.test.js @@ -49,7 +49,7 @@ describe('NewsNationalgeographicComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2015-08-03T17:45:00.000Z'); + assert.equal(date_published, '2015-08-03T16:45:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/people.com/index.test.js b/src/extractors/custom/people.com/index.test.js index 8559418c..4de9dae7 100644 --- a/src/extractors/custom/people.com/index.test.js +++ b/src/extractors/custom/people.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,9 @@ describe('PeopleComExtractor', () => { // in ./src/extractors/custom/people.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; - // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-12'); + assert.equal(date_published, '2016-12-12T14:22:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/pitchfork.com/index.test.js b/src/extractors/custom/pitchfork.com/index.test.js index 2da225f7..6e8c0cdb 100644 --- a/src/extractors/custom/pitchfork.com/index.test.js +++ b/src/extractors/custom/pitchfork.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -41,11 +40,8 @@ describe('PitchforkComExtractor', () => { it('returns the date_published', async () => { const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; - assert.equal(new_date_published, '2019-06-07'); + assert.equal(date_published, '2019-06-07T04:00:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/takagi-hiromitsu.jp/index.test.js b/src/extractors/custom/takagi-hiromitsu.jp/index.test.js index 5c6a008c..6207680c 100644 --- a/src/extractors/custom/takagi-hiromitsu.jp/index.test.js +++ b/src/extractors/custom/takagi-hiromitsu.jp/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -57,11 +56,10 @@ describe('TakagihiromitsuJpExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/takagi-hiromitsu.jp/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '2019-02-17'); + assert.equal(date_published, '2019-02-17T14:34:06.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.chicagotribune.com/index.js b/src/extractors/custom/www.chicagotribune.com/index.js index edc63d59..df4c944a 100644 --- a/src/extractors/custom/www.chicagotribune.com/index.js +++ b/src/extractors/custom/www.chicagotribune.com/index.js @@ -11,6 +11,7 @@ export const WwwChicagotribuneComExtractor = { date_published: { selectors: ['time'], + timezone: 'America/Chicago', }, lead_image_url: { diff --git a/src/extractors/custom/www.chicagotribune.com/index.test.js b/src/extractors/custom/www.chicagotribune.com/index.test.js index 4a44437d..303ece3f 100644 --- a/src/extractors/custom/www.chicagotribune.com/index.test.js +++ b/src/extractors/custom/www.chicagotribune.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('WwwChicagotribuneComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.chicagotribune.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-13'); + assert.equal(date_published, '2016-12-13T21:45:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.infoq.com/index.js b/src/extractors/custom/www.infoq.com/index.js index 710f9d0a..92053900 100644 --- a/src/extractors/custom/www.infoq.com/index.js +++ b/src/extractors/custom/www.infoq.com/index.js @@ -11,7 +11,7 @@ export const WwwInfoqComExtractor = { date_published: { selectors: ['.article__readTime.date'], - format: 'YYYY年MM月DD日', + format: 'YYYY[年]M[月]D[日]', timezone: 'Asia/Tokyo', }, diff --git a/src/extractors/custom/www.macrumors.com/index.js b/src/extractors/custom/www.macrumors.com/index.js index 4ad307af..6b7e27c2 100644 --- a/src/extractors/custom/www.macrumors.com/index.js +++ b/src/extractors/custom/www.macrumors.com/index.js @@ -11,8 +11,7 @@ export const WwwMacrumorsComExtractor = { date_published: { selectors: [['time', 'datetime']], - - timezone: 'America/Los_Angeles', + // timezone: 'America/Los_Angeles', }, dek: { diff --git a/src/extractors/custom/www.msn.com/index.test.js b/src/extractors/custom/www.msn.com/index.test.js index f73d6b7a..c1f30157 100644 --- a/src/extractors/custom/www.msn.com/index.test.js +++ b/src/extractors/custom/www.msn.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -58,11 +57,10 @@ describe('MSNExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.msn.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '2016-09-21'); + assert.equal(date_published.split('T')[0], '2016-09-21'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.nationalgeographic.com/index.test.js b/src/extractors/custom/www.nationalgeographic.com/index.test.js index a59fb788..96616001 100644 --- a/src/extractors/custom/www.nationalgeographic.com/index.test.js +++ b/src/extractors/custom/www.nationalgeographic.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -44,13 +43,10 @@ describe('WwwNationalgeographicComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.nationalgeographic.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-15'); + assert.equal(date_published.split('T')[0], '2016-12-15'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.nbcnews.com/index.js b/src/extractors/custom/www.nbcnews.com/index.js index 1b9f6c73..d79b19e9 100644 --- a/src/extractors/custom/www.nbcnews.com/index.js +++ b/src/extractors/custom/www.nbcnews.com/index.js @@ -19,7 +19,7 @@ export const WwwNbcnewsComExtractor = { '.flag_article-wrapper time', ], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, lead_image_url: { diff --git a/src/extractors/custom/www.nbcnews.com/index.test.js b/src/extractors/custom/www.nbcnews.com/index.test.js index 704e277e..988ba73a 100644 --- a/src/extractors/custom/www.nbcnews.com/index.test.js +++ b/src/extractors/custom/www.nbcnews.com/index.test.js @@ -53,7 +53,7 @@ describe('WwwNbcnewsComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2016-12-13T23:06:00.000Z'); + assert.equal(date_published, '2016-12-13T18:06:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.nydailynews.com/index.test.js b/src/extractors/custom/www.nydailynews.com/index.test.js index b87b204b..d3ea2446 100644 --- a/src/extractors/custom/www.nydailynews.com/index.test.js +++ b/src/extractors/custom/www.nydailynews.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,14 +54,11 @@ describe('WwwNydailynewsComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.nydailynews.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-16'); + assert.equal(date_published.split('T')[0], '2016-12-16'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.ossnews.jp/index.js b/src/extractors/custom/www.ossnews.jp/index.js index 1932f833..396ca348 100644 --- a/src/extractors/custom/www.ossnews.jp/index.js +++ b/src/extractors/custom/www.ossnews.jp/index.js @@ -9,7 +9,7 @@ export const WwwOssnewsJpExtractor = { date_published: { selectors: ['p.fs12'], - format: 'YYYY年MM月DD日 HH:mm', + format: 'YYYY[年]M[月]D[日] HH:mm', timezone: 'Asia/Tokyo', }, diff --git a/src/extractors/custom/www.phoronix.com/index.js b/src/extractors/custom/www.phoronix.com/index.js index 02179de1..2ac2a10e 100644 --- a/src/extractors/custom/www.phoronix.com/index.js +++ b/src/extractors/custom/www.phoronix.com/index.js @@ -12,7 +12,7 @@ export const WwwPhoronixComExtractor = { date_published: { selectors: ['.author'], // 1 June 2019 at 08:34 PM EDT - format: 'D MMMM YYYY at hh:mm', + format: 'D MMMM YYYY [at] hh:mm A', timezone: 'America/New_York', }, diff --git a/src/extractors/custom/www.phoronix.com/index.test.js b/src/extractors/custom/www.phoronix.com/index.test.js index 5e6abe6e..d4ca6494 100644 --- a/src/extractors/custom/www.phoronix.com/index.test.js +++ b/src/extractors/custom/www.phoronix.com/index.test.js @@ -57,7 +57,7 @@ describe('WwwPhoronixComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2019-06-01T12:34:00.000Z'); + assert.equal(date_published, '2019-06-02T00:34:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.politico.com/index.test.js b/src/extractors/custom/www.politico.com/index.test.js index 3d9e44fd..290288c7 100644 --- a/src/extractors/custom/www.politico.com/index.test.js +++ b/src/extractors/custom/www.politico.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('PoliticoExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.politico.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-10-04'); + assert.equal(date_published.split('T')[0], '2016-10-04'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.prospectmagazine.co.uk/index.js b/src/extractors/custom/www.prospectmagazine.co.uk/index.js index 4cd13c6f..2a66a8b4 100644 --- a/src/extractors/custom/www.prospectmagazine.co.uk/index.js +++ b/src/extractors/custom/www.prospectmagazine.co.uk/index.js @@ -11,8 +11,6 @@ export const WwwProspectmagazineCoUkExtractor = { date_published: { selectors: [['meta[name="article:published_time"]', 'value'], '.post-info'], - - timezone: 'Europe/London', }, dek: { diff --git a/src/extractors/custom/www.reddit.com/index.test.js b/src/extractors/custom/www.reddit.com/index.test.js index e845678d..7cd98d17 100644 --- a/src/extractors/custom/www.reddit.com/index.test.js +++ b/src/extractors/custom/www.reddit.com/index.test.js @@ -1,7 +1,7 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; +import dayjs from 'dayjs'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,17 +55,15 @@ describe('WwwRedditComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.reddit.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published) - .format() - .split('T')[0]; - const expectedDate = moment() + + const expectedDate = dayjs() .subtract(4, 'years') .format() .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(newDatePublished, expectedDate); + assert.equal(date_published.split('T')[0], expectedDate); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.spektrum.de/index.js b/src/extractors/custom/www.spektrum.de/index.js index 88fad8d0..f2bca07d 100644 --- a/src/extractors/custom/www.spektrum.de/index.js +++ b/src/extractors/custom/www.spektrum.de/index.js @@ -11,6 +11,7 @@ export const SpektrumExtractor = { date_published: { selectors: ['.content__meta__date'], + format: 'DD[.]MM[.]YYYY', timezone: 'Europe/Berlin', }, diff --git a/src/extractors/custom/www.today.com/index.test.js b/src/extractors/custom/www.today.com/index.test.js index beb390ac..f36dafd4 100644 --- a/src/extractors/custom/www.today.com/index.test.js +++ b/src/extractors/custom/www.today.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('WwwTodayComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.today.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-22'); + assert.equal(date_published.split('T')[0], '2016-12-22'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/generic/date-published/extractor.test.js b/src/extractors/generic/date-published/extractor.test.js index e20b6b97..4038e5e8 100644 --- a/src/extractors/generic/date-published/extractor.test.js +++ b/src/extractors/generic/date-published/extractor.test.js @@ -1,6 +1,6 @@ import assert from 'assert'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; +import dayjs from 'dayjs'; import GenericDatePublishedExtractor from './extractor'; @@ -67,7 +67,7 @@ describe('GenericDatePublishedExtractor', () => { metaCache, }); - assert.equal(result, moment('2020-01-01', 'YYYY-MM-DD').toISOString()); + assert.equal(result, dayjs('2020-01-01', 'YYYY-MM-DD').toISOString()); }); it('extracts from url formatted /2020/jan/01', () => { @@ -83,7 +83,7 @@ describe('GenericDatePublishedExtractor', () => { metaCache, }); - assert.equal(result, moment(new Date('2020 jan 01')).toISOString()); + assert.equal(result, dayjs(new Date('2020 jan 01')).toISOString()); } }); diff --git a/src/extractors/generic/index.test.js b/src/extractors/generic/index.test.js index fcb0bffb..13f37e01 100644 --- a/src/extractors/generic/index.test.js +++ b/src/extractors/generic/index.test.js @@ -1,5 +1,5 @@ import assert from 'assert'; -import moment from 'moment'; +import dayjs from 'dayjs'; import GenericExtractor from './index'; @@ -18,7 +18,7 @@ describe('GenericExtractor', () => { html, metaCache: [], }); - const newDatePublished = moment(date_published).format(); + const newDatePublished = dayjs(date_published).format(); assert.equal(author, null); assert.equal(