You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/author/constants.js

59 lines
1.5 KiB
JavaScript

// An ordered list of meta tag names that denote likely article authors. All
// attributes should be lowercase for faster case-insensitive matching. From
// most distinct to least distinct.
//
// Note: "author" is too often the -developer- of the page, so it is not
// added here.
export const AUTHOR_META_TAGS = [
'byl',
'clmst',
'dc.author',
'dcsext.author',
'dc.creator',
'rbauthors',
'authors',
];
export const AUTHOR_MAX_LENGTH = 300;
// An ordered list of XPath Selectors to find likely article authors. From
// most explicit to least explicit.
//
// Note - this does not use classes like CSS. This checks to see if the string
// exists in the className, which is not as accurate as .className (which
// splits on spaces/endlines), but for our purposes it's close enough. The
// speed tradeoff is worth the accuracy hit.
export const AUTHOR_SELECTORS = [
'.entry .entry-author',
'.author.vcard .fn',
'.author .vcard .fn',
'.byline.vcard .fn',
'.byline .vcard .fn',
'.byline .by .author',
'.byline .by',
'.byline .author',
'.post-author.vcard',
'.post-author .vcard',
'a[rel=author]',
'#by_author',
'.by_author',
'#entryAuthor',
'.entryAuthor',
'.byline a[href*=author]',
'#author .authorname',
'.author .authorname',
'#author',
'.author',
'.articleauthor',
'.ArticleAuthor',
'.byline',
];
// An ordered list of Selectors to find likely article authors, with
// regular expression for content.
const bylineRe = /^[\n\s]*By/i;
export const BYLINE_SELECTORS_RE = [
['#byline', bylineRe],
['.byline', bylineRe],
];