fix: better selector for nytimes authors

pull/14/head
Adam Pash 8 years ago
parent 3b87b557be
commit 5c7f2cd28e

5
dist/mercury.js vendored

@ -506,7 +506,7 @@ var NYTimesExtractor = {
}, },
author: { author: {
selectors: ['.g-byline', '.byline'] selectors: [['meta[name="author"]', 'value'], '.g-byline', '.byline']
}, },
content: { content: {
@ -1056,6 +1056,7 @@ var ApartmentTherapyExtractor = {
'div[data-render-react-id="images/LazyPicture"]': function divDataRenderReactIdImagesLazyPicture($node, $) { 'div[data-render-react-id="images/LazyPicture"]': function divDataRenderReactIdImagesLazyPicture($node, $) {
var data = JSON.parse($node.attr('data-props')); var data = JSON.parse($node.attr('data-props'));
var src = data.sources[0].src; var src = data.sources[0].src;
var $img = $('<img />').attr('src', src); var $img = $('<img />').attr('src', src);
$node.replaceWith($img); $node.replaceWith($img);
} }
@ -1124,7 +1125,7 @@ var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
return '[' + selector + ']'; return '[' + selector + ']';
}); });
var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(','); var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt', 'score']; var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i'); var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
// removeEmpty // removeEmpty

File diff suppressed because one or more lines are too long

@ -8,6 +8,7 @@ export const NYTimesExtractor = {
author: { author: {
selectors: [ selectors: [
['meta[name="author"]', 'value'],
'.g-byline', '.g-byline',
'.byline', '.byline',
], ],

@ -33,7 +33,7 @@ describe('NYTimesExtractor', () => {
.slice(0, 20); .slice(0, 20);
assert.equal(title, 'Ahmad Khan Rahami Is Arrested in Manhattan and New Jersey Bombings'); assert.equal(title, 'Ahmad Khan Rahami Is Arrested in Manhattan and New Jersey Bombings');
assert.equal(author, 'MARC SANTORA, WILLIAM K. RASHBAUM, AL BAKER and ADAM GOLDMAN'); assert.equal(author, 'Marc Santora, William K. Rashbaum, Al Baker and Adam Goldman');
assert.equal(text, 'The man believed to '); assert.equal(text, 'The man believed to ');
}); });
}); });

Loading…
Cancel
Save