You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.7 KiB
JavaScript
62 lines
1.7 KiB
JavaScript
// An ordered list of meta tag names that denote
|
|
// likely date published dates. All attributes
|
|
// should be lowercase for faster case-insensitive matching.
|
|
// From most distinct to least distinct.
|
|
export const DATE_PUBLISHED_META_TAGS = [
|
|
'article:published_time',
|
|
'displaydate',
|
|
'dc.date',
|
|
'dc.date.issued',
|
|
'rbpubdate',
|
|
'publish_date',
|
|
'pub_date',
|
|
'pagedate',
|
|
'pubdate',
|
|
'revision_date',
|
|
'doc_date',
|
|
'date_created',
|
|
'content_create_date',
|
|
'lastmodified',
|
|
'created',
|
|
'date',
|
|
];
|
|
|
|
// An ordered list of XPath Selectors to find
|
|
// likely date published dates. From most explicit
|
|
// to least explicit.
|
|
export const DATE_PUBLISHED_SELECTORS = [
|
|
'.hentry .dtstamp.published',
|
|
'.hentry .published',
|
|
'.hentry .dtstamp.updated',
|
|
'.hentry .updated',
|
|
'.single .published',
|
|
'.meta .published',
|
|
'.meta .postDate',
|
|
'.entry-date',
|
|
'.byline .date',
|
|
'.postmetadata .date',
|
|
'.article_datetime',
|
|
'.date-header',
|
|
'.story-date',
|
|
'.dateStamp',
|
|
'#story .datetime',
|
|
'.dateline',
|
|
'.pubdate',
|
|
];
|
|
|
|
// An ordered list of compiled regular expressions to find likely date
|
|
// published dates from the URL. These should always have the first
|
|
// reference be a date string that is parseable by dateutil.parser.parse
|
|
const abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';
|
|
export const DATE_PUBLISHED_URL_RES = [
|
|
// /2012/01/27/ but not /2012/01/293
|
|
new RegExp('/(20\\d{2}/\\d{2}/\\d{2})/', 'i'),
|
|
// 20120127 or 20120127T but not 2012012733 or 8201201733
|
|
// /[^0-9](20\d{2}[01]\d[0-3]\d)([^0-9]|$)/i,
|
|
// 2012-01-27
|
|
new RegExp('(20\\d{2}-[01]\\d-[0-3]\\d)', 'i'),
|
|
// /2012/jan/27/
|
|
new RegExp(`/(20\\d{2}/${abbrevMonthsStr}/[0-3]\\d)/`, 'i'),
|
|
];
|
|
|