mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-15 06:12:48 +00:00
fix: cleaning up deks (#44)
We've solidified what we consider a dek. This PR removes the dek selectors that do not fit that mold.
This commit is contained in:
parent
b415d1d37c
commit
00f8965c1f
@ -51,7 +51,6 @@ export const WikiaExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -53,7 +53,6 @@ export const ApartmentTherapyExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name=description]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('CustomExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-10-13T21:00:00.000Z');
|
assert.equal(date_published, '2016-10-13T21:00:00.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, "Name: Ashley Location: Downtown — Los Angeles, California Welcome to our sunny and spacious downtown home located in the in the heart of Downtown LA's Historic Core. Inside you'll find a 1,300 square foot bi-level ground unit with loft (only three of its kind!) that offers an unparalleled, refined industrial, modern aesthetic.");
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
|
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
|
||||||
|
@ -47,7 +47,6 @@ export const BroadwayWorldExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('CustomExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-10-13T19:35:00.000Z');
|
assert.equal(date_published, '2016-10-13T19:35:00.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'The American Theatre Wing announced today that their Andrew Lloyd Webber Initiative has launched its second initiative program, the Training Scholarships, bridging the gap between talent and opportunity and creating a strong pipeline to the professional theatre for promising artists of all backgrounds.');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
|
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
|
||||||
|
@ -56,7 +56,6 @@ export const BuzzfeedExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('BuzzfeedExtractor', () => {
|
|||||||
// // assert.equal(date_published, 'hi');
|
// // assert.equal(date_published, 'hi');
|
||||||
// });
|
// });
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
|
|
||||||
const articleUrl =
|
|
||||||
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'Lovato said: "Is that how my boobs should look?"..');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
|
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
|
||||||
|
@ -20,8 +20,6 @@ export const WwwCnnComExtractor = {
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
dek: null,
|
|
||||||
|
|
||||||
lead_image_url: {
|
lead_image_url: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:image"]', 'value'],
|
['meta[name="og:image"]', 'value'],
|
||||||
|
@ -66,22 +66,6 @@ describe('WwwCnnComExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-11-29T10:39:35.000Z');
|
assert.equal(date_published, '2016-11-29T10:39:35.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.cnn.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, null);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.cnn.com/index.js.
|
// in ./src/extractors/custom/www.cnn.com/index.js.
|
||||||
|
@ -53,38 +53,6 @@ describe('LittleThingsExtractor', () => {
|
|||||||
assert.equal(author, 'Laura Caseley');
|
assert.equal(author, 'Laura Caseley');
|
||||||
});
|
});
|
||||||
|
|
||||||
// it('returns the date_published', async () => {
|
|
||||||
// // To pass this test, fill out the date_published selector
|
|
||||||
// // in ./src/extractors/custom/www.littlethings.com/index.js.
|
|
||||||
// const html =
|
|
||||||
// fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
|
|
||||||
// const articleUrl =
|
|
||||||
// 'http://www.littlethings.com/diy-pineapple-lamp/';
|
|
||||||
//
|
|
||||||
// const { date_published } =
|
|
||||||
// await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
//
|
|
||||||
// // Update these values with the expected values from
|
|
||||||
// // the article.
|
|
||||||
// assert.equal(date_published, '');
|
|
||||||
// });
|
|
||||||
|
|
||||||
// it('returns the dek', async () => {
|
|
||||||
// // To pass this test, fill out the dek selector
|
|
||||||
// // in ./src/extractors/custom/www.littlethings.com/index.js.
|
|
||||||
// const html =
|
|
||||||
// fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
|
|
||||||
// const articleUrl =
|
|
||||||
// 'http://www.littlethings.com/diy-pineapple-lamp/';
|
|
||||||
//
|
|
||||||
// const { dek } =
|
|
||||||
// await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
//
|
|
||||||
// // Update these values with the expected values from
|
|
||||||
// // the article.
|
|
||||||
// assert.equal(dek, '');
|
|
||||||
// });
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.littlethings.com/index.js.
|
// in ./src/extractors/custom/www.littlethings.com/index.js.
|
||||||
|
@ -51,7 +51,6 @@ export const MSNExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('MSNExtractor', () => {
|
|||||||
assert.equal(date_published.split('T')[0], '2016-09-21');
|
assert.equal(date_published.split('T')[0], '2016-09-21');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.msn.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'The psychological reason why we love to watch sad movies is linked to the release of endorphins.');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.msn.com/index.js.
|
// in ./src/extractors/custom/www.msn.com/index.js.
|
||||||
|
@ -48,7 +48,6 @@ export const NewYorkerExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -68,22 +68,6 @@ describe('NewYorkerExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-09-26T18:04:22.000Z');
|
assert.equal(date_published, '2016-09-26T18:04:22.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.newyorker.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'In a decade, events like the recent data breach at Yahoo could become much more common, driven by a new kind of machine.');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.newyorker.com/index.js.
|
// in ./src/extractors/custom/www.newyorker.com/index.js.
|
||||||
|
@ -54,7 +54,6 @@ export const PoliticoExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('PoliticoExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-10-04T09:07:00.000Z');
|
assert.equal(date_published, '2016-10-04T09:07:00.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.politico.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
|
|
||||||
const articleUrl =
|
|
||||||
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, '"Is it just me or are the two VP candidates infinitely more appealing than their running mates?" said a Pennsylvania Republican.');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.politico.com/index.js.
|
// in ./src/extractors/custom/www.politico.com/index.js.
|
||||||
|
@ -40,8 +40,6 @@ export const TheAtlanticExtractor = {
|
|||||||
|
|
||||||
lead_image_url: null,
|
lead_image_url: null,
|
||||||
|
|
||||||
dek: null,
|
|
||||||
|
|
||||||
next_page_url: null,
|
next_page_url: null,
|
||||||
|
|
||||||
excerpt: null,
|
excerpt: null,
|
||||||
|
@ -22,7 +22,6 @@ export const WwwWashingtonpostComExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -66,22 +66,6 @@ describe('WwwWashingtonpostComExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-11-22T13:57:00.000Z');
|
assert.equal(date_published, '2016-11-22T13:57:00.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
|
|
||||||
const articleUrl =
|
|
||||||
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'The foundation checked “yes” on the form for 2015 when asked whether it had transferred “income or assets to a disqualified person.”');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
|
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
|
||||||
|
@ -51,7 +51,6 @@ export const WiredExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -69,22 +69,6 @@ describe('WiredExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-09-30T07:00:12.000Z');
|
assert.equal(date_published, '2016-09-30T07:00:12.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.wired.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
|
|
||||||
const articleUrl =
|
|
||||||
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'Time to break out the tissues, space fans.');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.wired.com/index.js.
|
// in ./src/extractors/custom/www.wired.com/index.js.
|
||||||
|
@ -51,7 +51,6 @@ export const YahooExtractor = {
|
|||||||
|
|
||||||
dek: {
|
dek: {
|
||||||
selectors: [
|
selectors: [
|
||||||
['meta[name="og:description"]', 'value'],
|
|
||||||
// enter dek selectors
|
// enter dek selectors
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -69,22 +69,6 @@ describe('YahooExtractor', () => {
|
|||||||
assert.equal(date_published, '2016-10-03T05:00:00.000Z');
|
assert.equal(date_published, '2016-10-03T05:00:00.000Z');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns the dek', async () => {
|
|
||||||
// To pass this test, fill out the dek selector
|
|
||||||
// in ./src/extractors/custom/www.yahoo.com/index.js.
|
|
||||||
const html =
|
|
||||||
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
|
|
||||||
const articleUrl =
|
|
||||||
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
|
|
||||||
|
|
||||||
const { dek } =
|
|
||||||
await Mercury.parse(articleUrl, html, { fallback: false });
|
|
||||||
|
|
||||||
// Update these values with the expected values from
|
|
||||||
// the article.
|
|
||||||
assert.equal(dek, 'The Hillary Clinton campaign has canceled joint appearances with former primary opponent Bernie Sanders after he admitted that "of course" it bothered him that Clinton seemed to be talking down to his supporters in hacked audio from a fundraiser. The two were set to have joint appearance together Monday. Instead, Sanders will appear in both Iowa and Wisconsin on Monday to boost her candidacy without her. Clinton is now scheduled to swing through Iowa later in the week, but possibly without Sanders, who was asked on CNN\'s "State of the Union" if it bothered him that Clinton had referred to his younger supporters as "the children of the great recession" who "live in their parents\' basement" to');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('returns the lead_image_url', async () => {
|
it('returns the lead_image_url', async () => {
|
||||||
// To pass this test, fill out the lead_image_url selector
|
// To pass this test, fill out the lead_image_url selector
|
||||||
// in ./src/extractors/custom/www.yahoo.com/index.js.
|
// in ./src/extractors/custom/www.yahoo.com/index.js.
|
||||||
|
Loading…
Reference in New Issue
Block a user