You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/custom/www.nytimes.com/index.js

67 lines
1.2 KiB
JavaScript

feat: generator for custom parsers and some documentation Squashed commit of the following: commit deaf9e60d031d9ee06e74b8c0895495b187032a5 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Sep 20 10:31:09 2016 -0400 chore: README for custom parsers commit a8e8ad633e0d1576a52dbc90ce31b98fb2ec21ee Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 23:36:09 2016 -0400 draft of readme commit 4f0f463f821465c282ce006378e5d55f8f41df5f Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:56:34 2016 -0400 custom extractor used to build basic parser for theatlantic commit c5562a3cede41f56c4e723dcfa1181b49dcaae4d Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:20:13 2016 -0400 pre-commit to test custom parser generator commit 7d50d5b7ab780b79fae38afcb87a7d1da5d139b2 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:19:55 2016 -0400 feat: added nytimes parser commit 58b8d83a56927177984ddfdf70830bc4f328f200 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:17:28 2016 -0400 feat: can do fuzzy search or go straight to file commit c99add753723a8e2ac64d51d7379ac8e23125526 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 10:52:26 2016 -0400 refactored export for custom extractors for easier renames commit 22563413669651bb497f1bb2a92085b71f2ae324 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 17:36:13 2016 -0400 feat: custom extractor generation in place commit 2285a29908a7f82a5de3c81f6b2b902ddec9bdaa Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 16:42:20 2016 -0400 good progress
8 years ago
export const NYTimesExtractor = {
domain: 'www.nytimes.com',
feat: generator for custom parsers and some documentation Squashed commit of the following: commit deaf9e60d031d9ee06e74b8c0895495b187032a5 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Sep 20 10:31:09 2016 -0400 chore: README for custom parsers commit a8e8ad633e0d1576a52dbc90ce31b98fb2ec21ee Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 23:36:09 2016 -0400 draft of readme commit 4f0f463f821465c282ce006378e5d55f8f41df5f Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:56:34 2016 -0400 custom extractor used to build basic parser for theatlantic commit c5562a3cede41f56c4e723dcfa1181b49dcaae4d Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:20:13 2016 -0400 pre-commit to test custom parser generator commit 7d50d5b7ab780b79fae38afcb87a7d1da5d139b2 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:19:55 2016 -0400 feat: added nytimes parser commit 58b8d83a56927177984ddfdf70830bc4f328f200 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:17:28 2016 -0400 feat: can do fuzzy search or go straight to file commit c99add753723a8e2ac64d51d7379ac8e23125526 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 10:52:26 2016 -0400 refactored export for custom extractors for easier renames commit 22563413669651bb497f1bb2a92085b71f2ae324 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 17:36:13 2016 -0400 feat: custom extractor generation in place commit 2285a29908a7f82a5de3c81f6b2b902ddec9bdaa Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 16:42:20 2016 -0400 good progress
8 years ago
title: {
selectors: [
'.g-headline',
'h1.headline',
],
},
author: {
selectors: [
['meta[name="author"]', 'value'],
feat: generator for custom parsers and some documentation Squashed commit of the following: commit deaf9e60d031d9ee06e74b8c0895495b187032a5 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Sep 20 10:31:09 2016 -0400 chore: README for custom parsers commit a8e8ad633e0d1576a52dbc90ce31b98fb2ec21ee Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 23:36:09 2016 -0400 draft of readme commit 4f0f463f821465c282ce006378e5d55f8f41df5f Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:56:34 2016 -0400 custom extractor used to build basic parser for theatlantic commit c5562a3cede41f56c4e723dcfa1181b49dcaae4d Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:20:13 2016 -0400 pre-commit to test custom parser generator commit 7d50d5b7ab780b79fae38afcb87a7d1da5d139b2 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:19:55 2016 -0400 feat: added nytimes parser commit 58b8d83a56927177984ddfdf70830bc4f328f200 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:17:28 2016 -0400 feat: can do fuzzy search or go straight to file commit c99add753723a8e2ac64d51d7379ac8e23125526 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 10:52:26 2016 -0400 refactored export for custom extractors for easier renames commit 22563413669651bb497f1bb2a92085b71f2ae324 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 17:36:13 2016 -0400 feat: custom extractor generation in place commit 2285a29908a7f82a5de3c81f6b2b902ddec9bdaa Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 16:42:20 2016 -0400 good progress
8 years ago
'.g-byline',
'.byline',
],
},
content: {
selectors: [
'div.g-blocks',
'article#story',
],
transforms: {
'img.g-lazy': ($node) => {
let src = $node.attr('src');
// const widths = $node.attr('data-widths')
// .slice(1)
// .slice(0, -1)
// .split(',');
// if (widths.length) {
// width = widths.slice(-1);
// } else {
// width = '900';
// }
const width = 640;
src = src.replace('{{size}}', width);
$node.attr('src', src);
},
},
clean: [
'.ad',
'header#story-header',
'.story-body-1 .lede.video',
'.visually-hidden',
'#newsletter-promo',
'.promo',
'.comments-button',
'.hidden',
'.comments',
feat: generator for custom parsers and some documentation Squashed commit of the following: commit deaf9e60d031d9ee06e74b8c0895495b187032a5 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Sep 20 10:31:09 2016 -0400 chore: README for custom parsers commit a8e8ad633e0d1576a52dbc90ce31b98fb2ec21ee Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 23:36:09 2016 -0400 draft of readme commit 4f0f463f821465c282ce006378e5d55f8f41df5f Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:56:34 2016 -0400 custom extractor used to build basic parser for theatlantic commit c5562a3cede41f56c4e723dcfa1181b49dcaae4d Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:20:13 2016 -0400 pre-commit to test custom parser generator commit 7d50d5b7ab780b79fae38afcb87a7d1da5d139b2 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:19:55 2016 -0400 feat: added nytimes parser commit 58b8d83a56927177984ddfdf70830bc4f328f200 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 17:17:28 2016 -0400 feat: can do fuzzy search or go straight to file commit c99add753723a8e2ac64d51d7379ac8e23125526 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Sep 19 10:52:26 2016 -0400 refactored export for custom extractors for easier renames commit 22563413669651bb497f1bb2a92085b71f2ae324 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 17:36:13 2016 -0400 feat: custom extractor generation in place commit 2285a29908a7f82a5de3c81f6b2b902ddec9bdaa Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 16 16:42:20 2016 -0400 good progress
8 years ago
],
},
date_published: null,
lead_image_url: null,
dek: null,
next_page_url: null,
excerpt: null,
};