fix: better selector for nytimes authors

pull/14/head
Adam Pash 8 years ago
parent 3b87b557be
commit 5c7f2cd28e

5
dist/mercury.js vendored

@ -506,7 +506,7 @@ var NYTimesExtractor = {
},
author: {
selectors: ['.g-byline', '.byline']
selectors: [['meta[name="author"]', 'value'], '.g-byline', '.byline']
},
content: {
@ -1056,6 +1056,7 @@ var ApartmentTherapyExtractor = {
'div[data-render-react-id="images/LazyPicture"]': function divDataRenderReactIdImagesLazyPicture($node, $) {
var data = JSON.parse($node.attr('data-props'));
var src = data.sources[0].src;
var $img = $('<img />').attr('src', src);
$node.replaceWith($img);
}
@ -1124,7 +1125,7 @@ var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
return '[' + selector + ']';
});
var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt', 'score'];
var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
// removeEmpty

File diff suppressed because one or more lines are too long

@ -8,6 +8,7 @@ export const NYTimesExtractor = {
author: {
selectors: [
['meta[name="author"]', 'value'],
'.g-byline',
'.byline',
],

@ -33,7 +33,7 @@ describe('NYTimesExtractor', () => {
.slice(0, 20);
assert.equal(title, 'Ahmad Khan Rahami Is Arrested in Manhattan and New Jersey Bombings');
assert.equal(author, 'MARC SANTORA, WILLIAM K. RASHBAUM, AL BAKER and ADAM GOLDMAN');
assert.equal(author, 'Marc Santora, William K. Rashbaum, Al Baker and Adam Goldman');
assert.equal(text, 'The man believed to ');
});
});

Loading…
Cancel
Save