fix: cleaning up deks (#44)

We've solidified what we consider a dek. This PR removes the dek selectors that do not fit that mold.
This commit is contained in:
Adam Pash 2016-12-02 15:17:49 -08:00 committed by GitHub
parent b415d1d37c
commit 00f8965c1f
23 changed files with 0 additions and 206 deletions

View File

@ -51,7 +51,6 @@ export const WikiaExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
], ],
}, },

View File

@ -53,7 +53,6 @@ export const ApartmentTherapyExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name=description]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('CustomExtractor', () => {
assert.equal(date_published, '2016-10-13T21:00:00.000Z'); assert.equal(date_published, '2016-10-13T21:00:00.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js.
const html =
fs.readFileSync('./fixtures/www.apartmenttherapy.com/1476396697639.html');
const articleUrl =
'http://www.apartmenttherapy.com/a-light-filled-la-loft-236564';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, "Name: Ashley Location: Downtown — Los Angeles, California Welcome to our sunny and spacious downtown home located in the in the heart of Downtown LA's Historic Core. Inside you'll find a 1,300 square foot bi-level ground unit with loft (only three of its kind!) that offers an unparalleled, refined industrial, modern aesthetic.");
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.apartmenttherapy.com/index.js. // in ./src/extractors/custom/www.apartmenttherapy.com/index.js.

View File

@ -47,7 +47,6 @@ export const BroadwayWorldExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('CustomExtractor', () => {
assert.equal(date_published, '2016-10-13T19:35:00.000Z'); assert.equal(date_published, '2016-10-13T19:35:00.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js.
const html =
fs.readFileSync('./fixtures/www.broadwayworld.com/1476392567143.html');
const articleUrl =
'http://www.broadwayworld.com/article/American-Theatre-Wing-Launches-Andrew-Lloyd-Webber-Training-Scholarships-20161013';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The American Theatre Wing announced today that their Andrew Lloyd Webber Initiative has launched its second initiative program, the Training Scholarships, bridging the gap between talent and opportunity and creating a strong pipeline to the professional theatre for promising artists of all backgrounds.');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.broadwayworld.com/index.js. // in ./src/extractors/custom/www.broadwayworld.com/index.js.

View File

@ -56,7 +56,6 @@ export const BuzzfeedExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="description"]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('BuzzfeedExtractor', () => {
// // assert.equal(date_published, 'hi'); // // assert.equal(date_published, 'hi');
// }); // });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js.
const html =
fs.readFileSync('./fixtures/www.buzzfeed.com/1475531975121.html');
const articleUrl =
'https://www.buzzfeed.com/ikrd/people-are-calling-out-this-edited-picture-of-demi-lovato-fo';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Lovato said: "Is that how my boobs should look?"..');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.buzzfeed.com/index.js. // in ./src/extractors/custom/www.buzzfeed.com/index.js.

View File

@ -20,8 +20,6 @@ export const WwwCnnComExtractor = {
], ],
}, },
dek: null,
lead_image_url: { lead_image_url: {
selectors: [ selectors: [
['meta[name="og:image"]', 'value'], ['meta[name="og:image"]', 'value'],

View File

@ -66,22 +66,6 @@ describe('WwwCnnComExtractor', () => {
assert.equal(date_published, '2016-11-29T10:39:35.000Z'); assert.equal(date_published, '2016-11-29T10:39:35.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.cnn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
const articleUrl =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, null);
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.cnn.com/index.js. // in ./src/extractors/custom/www.cnn.com/index.js.

View File

@ -53,38 +53,6 @@ describe('LittleThingsExtractor', () => {
assert.equal(author, 'Laura Caseley'); assert.equal(author, 'Laura Caseley');
}); });
// it('returns the date_published', async () => {
// // To pass this test, fill out the date_published selector
// // in ./src/extractors/custom/www.littlethings.com/index.js.
// const html =
// fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
// const articleUrl =
// 'http://www.littlethings.com/diy-pineapple-lamp/';
//
// const { date_published } =
// await Mercury.parse(articleUrl, html, { fallback: false });
//
// // Update these values with the expected values from
// // the article.
// assert.equal(date_published, '');
// });
// it('returns the dek', async () => {
// // To pass this test, fill out the dek selector
// // in ./src/extractors/custom/www.littlethings.com/index.js.
// const html =
// fs.readFileSync('./fixtures/www.littlethings.com/1475605036506.html');
// const articleUrl =
// 'http://www.littlethings.com/diy-pineapple-lamp/';
//
// const { dek } =
// await Mercury.parse(articleUrl, html, { fallback: false });
//
// // Update these values with the expected values from
// // the article.
// assert.equal(dek, '');
// });
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.littlethings.com/index.js. // in ./src/extractors/custom/www.littlethings.com/index.js.

View File

@ -51,7 +51,6 @@ export const MSNExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="description"]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('MSNExtractor', () => {
assert.equal(date_published.split('T')[0], '2016-09-21'); assert.equal(date_published.split('T')[0], '2016-09-21');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.msn.com/index.js.
const html =
fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
const articleUrl =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The psychological reason why we love to watch sad movies is linked to the release of endorphins.');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.msn.com/index.js. // in ./src/extractors/custom/www.msn.com/index.js.

View File

@ -48,7 +48,6 @@ export const NewYorkerExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
], ],
}, },

View File

@ -68,22 +68,6 @@ describe('NewYorkerExtractor', () => {
assert.equal(date_published, '2016-09-26T18:04:22.000Z'); assert.equal(date_published, '2016-09-26T18:04:22.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.newyorker.com/index.js.
const html =
fs.readFileSync('./fixtures/www.newyorker.com/1475248565793.html');
const articleUrl =
'http://www.newyorker.com/tech/elements/hacking-cryptography-and-the-countdown-to-quantum-computing';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'In a decade, events like the recent data breach at Yahoo could become much more common, driven by a new kind of machine.');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.newyorker.com/index.js. // in ./src/extractors/custom/www.newyorker.com/index.js.

View File

@ -54,7 +54,6 @@ export const PoliticoExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="description"]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('PoliticoExtractor', () => {
assert.equal(date_published, '2016-10-04T09:07:00.000Z'); assert.equal(date_published, '2016-10-04T09:07:00.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.politico.com/index.js.
const html =
fs.readFileSync('./fixtures/www.politico.com/1475617690069.html');
const articleUrl =
'http://www.politico.com/story/2016/10/who-will-win-the-vp-debate-229079?lo=ut_a1';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, '"Is it just me or are the two VP candidates infinitely more appealing than their running mates?" said a Pennsylvania Republican.');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.politico.com/index.js. // in ./src/extractors/custom/www.politico.com/index.js.

View File

@ -40,8 +40,6 @@ export const TheAtlanticExtractor = {
lead_image_url: null, lead_image_url: null,
dek: null,
next_page_url: null, next_page_url: null,
excerpt: null, excerpt: null,

View File

@ -22,7 +22,6 @@ export const WwwWashingtonpostComExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
], ],
}, },

View File

@ -66,22 +66,6 @@ describe('WwwWashingtonpostComExtractor', () => {
assert.equal(date_published, '2016-11-22T13:57:00.000Z'); assert.equal(date_published, '2016-11-22T13:57:00.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js.
const html =
fs.readFileSync('./fixtures/www.washingtonpost.com/1480364838420.html');
const articleUrl =
'https://www.washingtonpost.com/politics/trump-foundation-apparently-admits-to-violating-ban-on-self-dealing-new-filing-to-irs-shows/2016/11/22/893f6508-b0a9-11e6-8616-52b15787add0_story.html';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The foundation checked “yes” on the form for 2015 when asked whether it had transferred “income or assets to a disqualified person.”');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.washingtonpost.com/index.js. // in ./src/extractors/custom/www.washingtonpost.com/index.js.

View File

@ -51,7 +51,6 @@ export const WiredExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('WiredExtractor', () => {
assert.equal(date_published, '2016-09-30T07:00:12.000Z'); assert.equal(date_published, '2016-09-30T07:00:12.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.wired.com/index.js.
const html =
fs.readFileSync('./fixtures/www.wired.com/1475256747028.html');
const articleUrl =
'https://www.wired.com/2016/09/ode-rosetta-spacecraft-going-die-comet/';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'Time to break out the tissues, space fans.');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.wired.com/index.js. // in ./src/extractors/custom/www.wired.com/index.js.

View File

@ -51,7 +51,6 @@ export const YahooExtractor = {
dek: { dek: {
selectors: [ selectors: [
['meta[name="og:description"]', 'value'],
// enter dek selectors // enter dek selectors
], ],
}, },

View File

@ -69,22 +69,6 @@ describe('YahooExtractor', () => {
assert.equal(date_published, '2016-10-03T05:00:00.000Z'); assert.equal(date_published, '2016-10-03T05:00:00.000Z');
}); });
it('returns the dek', async () => {
// To pass this test, fill out the dek selector
// in ./src/extractors/custom/www.yahoo.com/index.js.
const html =
fs.readFileSync('./fixtures/www.yahoo.com/1475529982399.html');
const articleUrl =
'https://www.yahoo.com/news/m/1c621104-b0eb-3b4d-9b0a-7bb979f80d7d/ss_clinton-cancels-joint-events.html';
const { dek } =
await Mercury.parse(articleUrl, html, { fallback: false });
// Update these values with the expected values from
// the article.
assert.equal(dek, 'The Hillary Clinton campaign has canceled joint appearances with former primary opponent Bernie Sanders after he admitted that "of course" it bothered him that Clinton seemed to be talking down to his supporters in hacked audio from a fundraiser. The two were set to have joint appearance together Monday. Instead, Sanders will appear in both Iowa and Wisconsin on Monday to boost her candidacy without her. Clinton is now scheduled to swing through Iowa later in the week, but possibly without Sanders, who was asked on CNN\'s "State of the Union" if it bothered him that Clinton had referred to his younger supporters as "the children of the great recession" who "live in their parents\' basement" to');
});
it('returns the lead_image_url', async () => { it('returns the lead_image_url', async () => {
// To pass this test, fill out the lead_image_url selector // To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.yahoo.com/index.js. // in ./src/extractors/custom/www.yahoo.com/index.js.