diff --git a/src/cleaners/date-published.js b/src/cleaners/date-published.js index 1bd639dd..7fcf8f36 100644 --- a/src/cleaners/date-published.js +++ b/src/cleaners/date-published.js @@ -23,29 +23,29 @@ export function cleanDateString(dateString) { .trim(); } -export function createDate(dateString, timezone) { +export function createDate(dateString, timezone, format) { if (TIME_WITH_OFFSET_RE.test(dateString)) { return moment(new Date(dateString)); } return timezone ? - moment.tz(dateString, parseFormat(dateString), timezone) : - moment(dateString, parseFormat(dateString)); + moment.tz(dateString, format || parseFormat(dateString), timezone) : + moment(dateString, format || parseFormat(dateString)); } // Take a date published string, and hopefully return a date out of // it. Return none if we fail. -export default function cleanDatePublished(dateString, { timezone } = {}) { +export default function cleanDatePublished(dateString, { timezone, format } = {}) { // If string is in milliseconds or seconds, convert to int and return if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) { return new Date(parseInt(dateString, 10)).toISOString(); } - let date = createDate(dateString, timezone); + let date = createDate(dateString, timezone, format); if (!date.isValid()) { dateString = cleanDateString(dateString); - date = createDate(dateString, timezone); + date = createDate(dateString, timezone, format); } return date.isValid() ? date.toISOString() : null; diff --git a/src/cleaners/date-published.test.js b/src/cleaners/date-published.test.js index ccceff6b..92994915 100644 --- a/src/cleaners/date-published.test.js +++ b/src/cleaners/date-published.test.js @@ -30,6 +30,14 @@ describe('cleanDatePublished(dateString)', () => { cleanDatePublished('November 29, 2016: 8:18 AM ET', { timezone: 'America/New_York' }); assert.equal(datePublished, '2016-11-29T13:18:00.000Z'); }); + + it('accepts a custom date format', () => { + // The JS date parser is forgiving, but + // it needs am/pm separated from a time + const datePublished = + cleanDatePublished('Mon Aug 03 12:45:00 EDT 2015', { timezone: 'America/New_York', format: 'ddd MMM DD HH:mm:ss zz YYYY' }); + assert.equal(datePublished, '2015-08-03T16:45:00.000Z'); + }); }); describe('cleanDateString(dateString)', () => { diff --git a/src/extractors/custom/www.macrumors.com/index.js b/src/extractors/custom/www.macrumors.com/index.js index b55fb367..1e76b3b2 100644 --- a/src/extractors/custom/www.macrumors.com/index.js +++ b/src/extractors/custom/www.macrumors.com/index.js @@ -19,15 +19,8 @@ export const WwwMacrumorsComExtractor = { '.article .byline', ], - transforms: { - '*': ($node, $) => { - const $parent = $node.parent(); - const text = $parent.text().split(' by')[0]; - $parent.empty().text(text); - - $('body').after($node); - }, - }, + // Wednesday January 18, 2017 11:44 am PST + format: 'dddd MMMM D, YYYY h:mm A zz', timezone: 'America/Los_Angeles', },