From 00f8965c1f433e6e7c8a9dd781a02214adf38666 Mon Sep 17 00:00:00 2001 From: Adam Pash Date: Fri, 2 Dec 2016 15:17:49 -0800 Subject: [PATCH] fix: cleaning up deks (#44) We've solidified what we consider a dek. This PR removes the dek selectors that do not fit that mold. --- .../custom/fandom.wikia.com/index.js | 1 - .../custom/www.apartmenttherapy.com/index.js | 1 - .../www.apartmenttherapy.com/index.test.js | 16 ---------- .../custom/www.broadwayworld.com/index.js | 1 - .../www.broadwayworld.com/index.test.js | 16 ---------- .../custom/www.buzzfeed.com/index.js | 1 - .../custom/www.buzzfeed.com/index.test.js | 16 ---------- src/extractors/custom/www.cnn.com/index.js | 2 -- .../custom/www.cnn.com/index.test.js | 16 ---------- .../custom/www.littlethings.com/index.test.js | 32 ------------------- src/extractors/custom/www.msn.com/index.js | 1 - .../custom/www.msn.com/index.test.js | 16 ---------- .../custom/www.newyorker.com/index.js | 1 - .../custom/www.newyorker.com/index.test.js | 16 ---------- .../custom/www.politico.com/index.js | 1 - .../custom/www.politico.com/index.test.js | 16 ---------- .../custom/www.theatlantic.com/index.js | 2 -- .../custom/www.washingtonpost.com/index.js | 1 - .../www.washingtonpost.com/index.test.js | 16 ---------- src/extractors/custom/www.wired.com/index.js | 1 - .../custom/www.wired.com/index.test.js | 16 ---------- src/extractors/custom/www.yahoo.com/index.js | 1 - .../custom/www.yahoo.com/index.test.js | 16 ---------- 23 files changed, 206 deletions(-) diff --git a/src/extractors/custom/fandom.wikia.com/index.js b/src/extractors/custom/fandom.wikia.com/index.js index c06801cc..be675653 100644 --- a/src/extractors/custom/fandom.wikia.com/index.js +++ b/src/extractors/custom/fandom.wikia.com/index.js @@ -51,7 +51,6 @@ export const WikiaExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.apartmenttherapy.com/index.js b/src/extractors/custom/www.apartmenttherapy.com/index.js index d491770e..e601814e 100644 --- a/src/extractors/custom/www.apartmenttherapy.com/index.js +++ b/src/extractors/custom/www.apartmenttherapy.com/index.js @@ -53,7 +53,6 @@ export const ApartmentTherapyExtractor = { dek: { selectors: [ - ['meta[name=description]', 'value'], ], }, diff --git a/src/extractors/custom/www.apartmenttherapy.com/index.test.js b/src/extractors/custom/www.apartmenttherapy.com/index.test.js index a9fbbe6b..b84b0bf7 100644 --- a/src/extractors/custom/www.apartmenttherapy.com/index.test.js +++ b/src/extractors/custom/www.apartmenttherapy.com/index.test.js @@ -69,22 +69,6 @@ describe('CustomExtractor', () => { assert.equal(date_published, '2016-10-13T21:00:00.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.apartmenttherapy.com/index.js. - const html = - fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html'); - const articleUrl = - 'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, "Name: Ashley Location: Downtown — Los Angeles, California Welcome to our sunny and spacious downtown home located in the in the heart of Downtown LA's Historic Core. Inside you'll find a 1,300 square foot bi-level ground unit with loft (only three of its kind!) that offers an unparalleled, refined industrial, modern aesthetic."); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.apartmenttherapy.com/index.js. diff --git a/src/extractors/custom/www.broadwayworld.com/index.js b/src/extractors/custom/www.broadwayworld.com/index.js index c5db73e2..01539794 100644 --- a/src/extractors/custom/www.broadwayworld.com/index.js +++ b/src/extractors/custom/www.broadwayworld.com/index.js @@ -47,7 +47,6 @@ export const BroadwayWorldExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.broadwayworld.com/index.test.js b/src/extractors/custom/www.broadwayworld.com/index.test.js index 672dff11..b8430adf 100644 --- a/src/extractors/custom/www.broadwayworld.com/index.test.js +++ b/src/extractors/custom/www.broadwayworld.com/index.test.js @@ -69,22 +69,6 @@ describe('CustomExtractor', () => { assert.equal(date_published, '2016-10-13T19:35:00.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.broadwayworld.com/index.js. - const html = - fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html'); - const articleUrl = - 'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'The American Theatre Wing announced today that their Andrew Lloyd Webber Initiative has launched its second initiative program, the Training Scholarships, bridging the gap between talent and opportunity and creating a strong pipeline to the professional theatre for promising artists of all backgrounds.'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.broadwayworld.com/index.js. diff --git a/src/extractors/custom/www.buzzfeed.com/index.js b/src/extractors/custom/www.buzzfeed.com/index.js index 4636f1ec..341b049b 100644 --- a/src/extractors/custom/www.buzzfeed.com/index.js +++ b/src/extractors/custom/www.buzzfeed.com/index.js @@ -56,7 +56,6 @@ export const BuzzfeedExtractor = { dek: { selectors: [ - ['meta[name="description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.buzzfeed.com/index.test.js b/src/extractors/custom/www.buzzfeed.com/index.test.js index a7c8f5db..ca3fe85b 100644 --- a/src/extractors/custom/www.buzzfeed.com/index.test.js +++ b/src/extractors/custom/www.buzzfeed.com/index.test.js @@ -69,22 +69,6 @@ describe('BuzzfeedExtractor', () => { // // assert.equal(date_published, 'hi'); // }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.buzzfeed.com/index.js. - const html = - fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html'); - const articleUrl = - 'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'Lovato said: "Is that how my boobs should look?"..'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.buzzfeed.com/index.js. diff --git a/src/extractors/custom/www.cnn.com/index.js b/src/extractors/custom/www.cnn.com/index.js index 87c3b62f..08ee0a71 100644 --- a/src/extractors/custom/www.cnn.com/index.js +++ b/src/extractors/custom/www.cnn.com/index.js @@ -20,8 +20,6 @@ export const WwwCnnComExtractor = { ], }, - dek: null, - lead_image_url: { selectors: [ ['meta[name="og:image"]', 'value'], diff --git a/src/extractors/custom/www.cnn.com/index.test.js b/src/extractors/custom/www.cnn.com/index.test.js index 7a10688a..613a61ea 100644 --- a/src/extractors/custom/www.cnn.com/index.test.js +++ b/src/extractors/custom/www.cnn.com/index.test.js @@ -66,22 +66,6 @@ describe('WwwCnnComExtractor', () => { assert.equal(date_published, '2016-11-29T10:39:35.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.cnn.com/index.js. - const html = - fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html'); - const articleUrl = - 'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, null); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.cnn.com/index.js. diff --git a/src/extractors/custom/www.littlethings.com/index.test.js b/src/extractors/custom/www.littlethings.com/index.test.js index 33df04ed..78fc75a8 100644 --- a/src/extractors/custom/www.littlethings.com/index.test.js +++ b/src/extractors/custom/www.littlethings.com/index.test.js @@ -53,38 +53,6 @@ describe('LittleThingsExtractor', () => { assert.equal(author, 'Laura Caseley'); }); - // it('returns the date_published', async () => { - // // To pass this test, fill out the date_published selector - // // in ./src/extractors/custom/www.littlethings.com/index.js. - // const html = - // fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html'); - // const articleUrl = - // 'http://www.littlethings.com/diy-pineapple-lamp/'; - // - // const { date_published } = - // await Mercury.parse(articleUrl, html, { fallback: false }); - // - // // Update these values with the expected values from - // // the article. - // assert.equal(date_published, ''); - // }); - - // it('returns the dek', async () => { - // // To pass this test, fill out the dek selector - // // in ./src/extractors/custom/www.littlethings.com/index.js. - // const html = - // fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html'); - // const articleUrl = - // 'http://www.littlethings.com/diy-pineapple-lamp/'; - // - // const { dek } = - // await Mercury.parse(articleUrl, html, { fallback: false }); - // - // // Update these values with the expected values from - // // the article. - // assert.equal(dek, ''); - // }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.littlethings.com/index.js. diff --git a/src/extractors/custom/www.msn.com/index.js b/src/extractors/custom/www.msn.com/index.js index 558db25c..2fc1de52 100644 --- a/src/extractors/custom/www.msn.com/index.js +++ b/src/extractors/custom/www.msn.com/index.js @@ -51,7 +51,6 @@ export const MSNExtractor = { dek: { selectors: [ - ['meta[name="description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.msn.com/index.test.js b/src/extractors/custom/www.msn.com/index.test.js index dda948fb..dbd0325c 100644 --- a/src/extractors/custom/www.msn.com/index.test.js +++ b/src/extractors/custom/www.msn.com/index.test.js @@ -69,22 +69,6 @@ describe('MSNExtractor', () => { assert.equal(date_published.split('T')[0], '2016-09-21'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.msn.com/index.js. - const html = - fs.readFileSync('./fixtures/www.msn.com/1475506925474.html'); - const articleUrl = - 'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'The psychological reason why we love to watch sad movies is linked to the release of endorphins.'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.msn.com/index.js. diff --git a/src/extractors/custom/www.newyorker.com/index.js b/src/extractors/custom/www.newyorker.com/index.js index 6aba8b3a..c1fc92e0 100644 --- a/src/extractors/custom/www.newyorker.com/index.js +++ b/src/extractors/custom/www.newyorker.com/index.js @@ -48,7 +48,6 @@ export const NewYorkerExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.newyorker.com/index.test.js b/src/extractors/custom/www.newyorker.com/index.test.js index fcef72d7..ab036f35 100644 --- a/src/extractors/custom/www.newyorker.com/index.test.js +++ b/src/extractors/custom/www.newyorker.com/index.test.js @@ -68,22 +68,6 @@ describe('NewYorkerExtractor', () => { assert.equal(date_published, '2016-09-26T18:04:22.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.newyorker.com/index.js. - const html = - fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html'); - const articleUrl = - 'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'In a decade, events like the recent data breach at Yahoo could become much more common, driven by a new kind of machine.'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.newyorker.com/index.js. diff --git a/src/extractors/custom/www.politico.com/index.js b/src/extractors/custom/www.politico.com/index.js index 4a49be97..c18699b3 100644 --- a/src/extractors/custom/www.politico.com/index.js +++ b/src/extractors/custom/www.politico.com/index.js @@ -54,7 +54,6 @@ export const PoliticoExtractor = { dek: { selectors: [ - ['meta[name="description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.politico.com/index.test.js b/src/extractors/custom/www.politico.com/index.test.js index e263f380..16f07c33 100644 --- a/src/extractors/custom/www.politico.com/index.test.js +++ b/src/extractors/custom/www.politico.com/index.test.js @@ -69,22 +69,6 @@ describe('PoliticoExtractor', () => { assert.equal(date_published, '2016-10-04T09:07:00.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.politico.com/index.js. - const html = - fs.readFileSync('./fixtures/www.politico.com/1475617690069.html'); - const articleUrl = - 'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, '"Is it just me or are the two VP candidates infinitely more appealing than their running mates?" said a Pennsylvania Republican.'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.politico.com/index.js. diff --git a/src/extractors/custom/www.theatlantic.com/index.js b/src/extractors/custom/www.theatlantic.com/index.js index 04770ea2..8fdeedd4 100644 --- a/src/extractors/custom/www.theatlantic.com/index.js +++ b/src/extractors/custom/www.theatlantic.com/index.js @@ -40,8 +40,6 @@ export const TheAtlanticExtractor = { lead_image_url: null, - dek: null, - next_page_url: null, excerpt: null, diff --git a/src/extractors/custom/www.washingtonpost.com/index.js b/src/extractors/custom/www.washingtonpost.com/index.js index a9b8828f..fd2d1f7e 100644 --- a/src/extractors/custom/www.washingtonpost.com/index.js +++ b/src/extractors/custom/www.washingtonpost.com/index.js @@ -22,7 +22,6 @@ export const WwwWashingtonpostComExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.washingtonpost.com/index.test.js b/src/extractors/custom/www.washingtonpost.com/index.test.js index bd1d1b45..8e986967 100644 --- a/src/extractors/custom/www.washingtonpost.com/index.test.js +++ b/src/extractors/custom/www.washingtonpost.com/index.test.js @@ -66,22 +66,6 @@ describe('WwwWashingtonpostComExtractor', () => { assert.equal(date_published, '2016-11-22T13:57:00.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.washingtonpost.com/index.js. - const html = - fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html'); - const articleUrl = - 'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'The foundation checked “yes” on the form for 2015 when asked whether it had transferred “income or assets to a disqualified person.”'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.washingtonpost.com/index.js. diff --git a/src/extractors/custom/www.wired.com/index.js b/src/extractors/custom/www.wired.com/index.js index 3a00e2de..10351072 100644 --- a/src/extractors/custom/www.wired.com/index.js +++ b/src/extractors/custom/www.wired.com/index.js @@ -51,7 +51,6 @@ export const WiredExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], ], }, diff --git a/src/extractors/custom/www.wired.com/index.test.js b/src/extractors/custom/www.wired.com/index.test.js index 27e91793..7ff6eac1 100644 --- a/src/extractors/custom/www.wired.com/index.test.js +++ b/src/extractors/custom/www.wired.com/index.test.js @@ -69,22 +69,6 @@ describe('WiredExtractor', () => { assert.equal(date_published, '2016-09-30T07:00:12.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.wired.com/index.js. - const html = - fs.readFileSync('./fixtures/www.wired.com/1475256747028.html'); - const articleUrl = - 'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'Time to break out the tissues, space fans.'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.wired.com/index.js. diff --git a/src/extractors/custom/www.yahoo.com/index.js b/src/extractors/custom/www.yahoo.com/index.js index 3fc61b8b..c9ba3918 100644 --- a/src/extractors/custom/www.yahoo.com/index.js +++ b/src/extractors/custom/www.yahoo.com/index.js @@ -51,7 +51,6 @@ export const YahooExtractor = { dek: { selectors: [ - ['meta[name="og:description"]', 'value'], // enter dek selectors ], }, diff --git a/src/extractors/custom/www.yahoo.com/index.test.js b/src/extractors/custom/www.yahoo.com/index.test.js index 45b1f760..599f9d54 100644 --- a/src/extractors/custom/www.yahoo.com/index.test.js +++ b/src/extractors/custom/www.yahoo.com/index.test.js @@ -69,22 +69,6 @@ describe('YahooExtractor', () => { assert.equal(date_published, '2016-10-03T05:00:00.000Z'); }); - it('returns the dek', async () => { - // To pass this test, fill out the dek selector - // in ./src/extractors/custom/www.yahoo.com/index.js. - const html = - fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html'); - const articleUrl = - 'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html'; - - const { dek } = - await Mercury.parse(articleUrl, html, { fallback: false }); - - // Update these values with the expected values from - // the article. - assert.equal(dek, 'The Hillary Clinton campaign has canceled joint appearances with former primary opponent Bernie Sanders after he admitted that "of course" it bothered him that Clinton seemed to be talking down to his supporters in hacked audio from a fundraiser. The two were set to have joint appearance together Monday. Instead, Sanders will appear in both Iowa and Wisconsin on Monday to boost her candidacy without her. Clinton is now scheduled to swing through Iowa later in the week, but possibly without Sanders, who was asked on CNN\'s "State of the Union" if it bothered him that Clinton had referred to his younger supporters as "the children of the great recession" who "live in their parents\' basement" to'); - }); - it('returns the lead_image_url', async () => { // To pass this test, fill out the lead_image_url selector // in ./src/extractors/custom/www.yahoo.com/index.js.