feat: add content format output options (#256)

pull/258/head
Adam Pash 5 years ago committed by GitHub
parent a57f29eec3
commit 9b0664bc91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,10 +2,14 @@
/* eslint-disable */
const Mercury = require('./dist/mercury');
const argv = require('yargs-parser')(process.argv.slice(2));
const [, , url] = process.argv;
(async urlToParse => {
const {
_: [url],
format,
f,
} = argv;
(async (urlToParse, contentType) => {
if (!urlToParse) {
console.log(
'\n\
@ -13,14 +17,23 @@ mercury-parser\n\n\
The Mercury Parser extracts semantic content from any url\n\n\
Usage:\n\
\n\
mercury-parser [url-to-parse]\n\
$ mercury-parser url-to-parse [--format=html|text|markdown]\n\
\n\
'
);
return;
}
try {
const result = await Mercury.parse(urlToParse);
const contentTypeMap = {
html: 'html',
markdown: 'markdown',
md: 'markdown',
text: 'text',
txt: 'text',
};
const result = await Mercury.parse(urlToParse, null, {
contentType: contentTypeMap[contentType],
});
console.log(JSON.stringify(result, null, 2));
} catch (e) {
if (e.message === 'ETIMEDOUT' && false) {
@ -38,4 +51,4 @@ Usage:\n\
console.error(`\n${reportBug}\n`);
process.exit(1);
}
})(url);
})(url, format || f);

4292
dist/mercury.js vendored

File diff suppressed because it is too large Load Diff

20973
dist/mercury.web.js vendored

File diff suppressed because one or more lines are too long

@ -116,9 +116,11 @@
"request": "github:czardoz/request",
"request-promise": "^4.2.2",
"string-direction": "^0.1.2",
"turndown": "^5.0.3",
"url": "^0.11.0",
"valid-url": "^1.0.9",
"wuzzy": "^0.1.3"
"wuzzy": "^0.1.3",
"yargs-parser": "^13.0.0"
},
"bundleDependencies": [
"jquery",

@ -25,7 +25,7 @@ var Mercury = require('./dist/mercury_test')
console.log(`Fetching link(s)`)
urls.map(url => {
Mercury.parse(url, null, { fallback: false }).then(function(result) {
Mercury.parse(url, { fallback: false }).then(function(result) {
var htmlFile = './preview.html'
var jsonFile = './preview.json'

@ -12,10 +12,10 @@ const run = () => {
const html = fs.readFileSync(`${fixture}`);
// first parse is just to get the url
Mercury.parse('http://example.com', html, { fallback: false }).then(
Mercury.parse('http://example.com', { html, fallback: false }).then(
({ url, domain, excerpt, word_count, direction }) => {
// with the url, second pass will test the correct parser
Mercury.parse(url, html, { fallback: false }).then(json => {
Mercury.parse(url, { html, fallback: false }).then(json => {
// removing excerpt b/c this comes from content, not necessary
delete json.excerpt;

@ -1,3 +1,5 @@
#!/bin/bash
echo $3
find $3 -exec sed -i '' "s%$1%$2%g" '{}' \;

@ -114,7 +114,7 @@ function savePage($, [url], newParser) {
fs.writeFileSync(file, html);
Mercury.parse(url, html).then(result => {
Mercury.parse(url, { html }).then(result => {
if (newParser) {
confirm(
generateScaffold,

@ -50,7 +50,7 @@ export default function(file, url, dir, result, name) {
const html =
fs.readFileSync('${file}');
result =
Mercury.parse(url, html, { fallback: false });
Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -40,7 +40,7 @@ execFile('find', ['fixtures', '-type', 'f'], (err, stdout) => {
Promise.all(
fixturesToUpdate.map((fixture, i) => {
const html = fs.readFileSync(fixture);
return Mercury.parse(`http://${baseDomains[i]}`, html);
return Mercury.parse(`http://${baseDomains[i]}`, { html });
})
).then(parsedFixture => {
const fixturesAndUrls = fixturesToUpdate.reduce(

@ -18,7 +18,7 @@ describe('twofortysevensportsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/247sports.com/1481309665090.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('AbcnewsGoComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/abcnews.go.com/1481922563840.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('DeadspinExtractor', () => {
const html = fs.readFileSync(
'./fixtures/deadspin.com/1476389931786.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WikiaExtractor', () => {
const html = fs.readFileSync(
'./fixtures/fandom.wikia.com/1475595373938.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -15,7 +15,7 @@ describe('FortuneComExtractor', () => {
beforeAll(() => {
url = 'http://fortune.com/2016/12/15/amazon-alexa-gadgets/';
const html = fs.readFileSync('./fixtures/fortune.com/1485216994169.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('ForwardComExtractor', () => {
url =
'http://forward.com/schmooze/358592/why-does-slack-want-me-to-say-hummus/';
const html = fs.readFileSync('./fixtures/forward.com/1488392273490.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('FusionNetExtractor', () => {
url =
'http://fusion.net/story/377467/la-la-land-oscar-hollywood-musicals-race/';
const html = fs.readFileSync('./fixtures/fusion.net/1482529202024.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('GothamistComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/gothamist.com/1489073770258.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('HellogigglesComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/hellogiggles.com/1482437663500.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('IciRadioCanadaCaExtractor', () => {
const html = fs.readFileSync(
'./fixtures/ici.radio-canada.ca/1489433621634.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('MashableComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/mashable.com/1481670648585.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('MediumExtractor', () => {
url =
'https://medium.com/the-wtf-economy/wtf-whats-the-future-e52ab9515573#.ilwrgwsks';
const html = fs.readFileSync('./fixtures/medium.com/1477523363921.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
@ -95,7 +95,7 @@ describe('MediumExtractor', () => {
url =
'https://medium.com/@JakobUlbrich/flag-attributes-in-android-how-to-use-them-ac4ec8aee7d1#.h949wjmyw';
const html = fs.readFileSync('./fixtures/medium.com/1485902752952.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('returns the content', async () => {

@ -18,7 +18,7 @@ describe('MoneyCnnComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/money.cnn.com/1480437611330.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.

@ -18,7 +18,7 @@ describe('NewrepublicComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/newrepublic.com/1480434805231.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.

@ -18,7 +18,7 @@ describe('NewsNationalgeographicComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/news.nationalgeographic.com/1481919545107.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('ObamawhitehouseArchivesGovExtractor', () => {
const html = fs.readFileSync(
'./fixtures/obamawhitehouse.archives.gov/1485905445365.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {
@ -125,7 +125,7 @@ describe('ObamawhitehouseArchivesGovExtractor', () => {
const html = fs.readFileSync(
'./fixtures/obamawhitehouse.archives.gov/1490209983872.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('includes this youtube video', async () => {
@ -146,7 +146,7 @@ describe('ObamawhitehouseArchivesGovExtractor', () => {
const html = fs.readFileSync(
'./fixtures/obamawhitehouse.archives.gov/1490227791307.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('gets the words and video', async () => {
@ -180,7 +180,7 @@ describe('ObamawhitehouseArchivesGovExtractor', () => {
const html = fs.readFileSync(
'./fixtures/obamawhitehouse.archives.gov/1490375478954.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('gets the content', async () => {

@ -18,7 +18,7 @@ describe('ObserverComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/observer.com/1481925269939.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('PagesixComExtractor', () => {
url =
'http://pagesix.com/2016/12/19/sofia-vergara-and-nick-loebs-embryo-drama-taking-a-detour/';
const html = fs.readFileSync('./fixtures/pagesix.com/1482254007534.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -15,7 +15,7 @@ describe('PeopleComExtractor', () => {
beforeAll(() => {
url = 'http://people.com/style/jennifer-aniston-coat-tags-jimmy-kimmel/';
const html = fs.readFileSync('./fixtures/people.com/1481580462922.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('QzComExtractor', () => {
url =
'http://qz.com/863015/uber-is-rolling-out-self-driving-cars-in-san-francisco-in-open-defiance-of-california-dmv/';
const html = fs.readFileSync('./fixtures/qz.com/1481758330660.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('ScienceflyComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/sciencefly.com/1482530492413.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('ThefederalistpapersOrgExtractor', () => {
const html = fs.readFileSync(
'./fixtures/thefederalistpapers.org/1482344359572.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('ThoughtcatalogComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/thoughtcatalog.com/1482426075702.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -9,7 +9,9 @@ describe('TwitterExtractor', () => {
const html = fs.readFileSync('./fixtures/twitter.com/tweet.html');
const uri = 'https://twitter.com/KingBeyonceStan/status/745276948213968896';
const { title, author, date_published } = await Mercury.parse(uri, html);
const { title, author, date_published } = await Mercury.parse(uri, {
html,
});
assert.equal(title, 'Lina Morgana on Twitter');
assert.equal(author, '@KingBeyonceStan');

@ -15,7 +15,7 @@ describe('UproxxComExtractor', () => {
beforeAll(() => {
url = 'http://uproxx.com/news/rudy-giuliani-not-secretary-of-state/';
const html = fs.readFileSync('./fixtures/uproxx.com/1481324633976.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('WwwAlComExtractor', () => {
url =
'http://www.al.com/news/birmingham/index.ssf/2016/12/two_arrested_in_multi-state_de.html#incart_river_home';
const html = fs.readFileSync('./fixtures/www.al.com/1482445422101.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwAmericanowComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.americanow.com/1482528557836.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwAndroidcentralComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.androidcentral.com/1484345154702.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('WwwAolComExtractor', () => {
url =
'http://www.aol.com/article/news/2016/12/01/son-of-slain-police-officer-given-teddy-bears-made-from-dads-un/21618553/';
const html = fs.readFileSync('./fixtures/www.aol.com/1480618816916.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
// This test should be passing by default.

@ -18,7 +18,7 @@ describe('ApartmentTherapyExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.apartmenttherapy.com/1476396697639.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in

@ -18,7 +18,7 @@ describe('WwwBloombergComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.bloomberg.com/1481135708958.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {
@ -108,7 +108,7 @@ describe('WwwBloombergComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.bloomberg.com/1481136509532.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {
@ -199,7 +199,7 @@ describe('WwwBloombergComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.bloomberg.com/1481138014494.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -19,7 +19,7 @@ describe('CustomExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.broadwayworld.com/1476392567143.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
// To pass this test, rename your extractor in

@ -18,7 +18,7 @@ describe('WwwBustleComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.bustle.com/1481129185239.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -19,7 +19,7 @@ describe('BuzzfeedExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.buzzfeed.com/1475531975121.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
@ -102,7 +102,7 @@ describe('BuzzfeedExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.buzzfeed.com/1480717502688.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('returns big header images in the content', async () => {

@ -18,7 +18,7 @@ describe('WwwCbssportsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.cbssports.com/1482254907948.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwChicagotribuneComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.chicagotribune.com/1481669367099.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwCinemablendComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.cinemablend.com/1482432215722.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwCnbcComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.cnbc.com/1482251664848.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwCnetComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.cnet.com/1482428196806.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('WwwCnnComExtractor', () => {
url =
'http://www.cnn.com/2016/11/29/politics/donald-trump-transition-presidency/index.html';
const html = fs.readFileSync('./fixtures/www.cnn.com/1480458253239.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwDmagazineComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.dmagazine.com/1481755804475.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwEonlineComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.eonline.com/1481567592820.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwFastcompanyComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.fastcompany.com/1547124373499.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwFoolComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.fool.com/1489175437362.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwFortinetComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.fortinet.com/1546954846985.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwHowtogeekComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.howtogeek.com/1482438125052.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwHuffingtonpostComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.huffingtonpost.com/1480454076105.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwInquisitrComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.inquisitr.com/1481665067498.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwLatimesComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.latimes.com/1481928636876.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwLinkedinComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.linkedin.com/1485452542218.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('LittleThingsExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.littlethings.com/1475605036506.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -17,7 +17,7 @@ describe('WwwMacrumorsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.macrumors.com/1484778558090.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwMentalflossComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.mentalfloss.com/1482186439659.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwMiamiheraldComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.miamiherald.com/1481571585318.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('MSNExtractor', () => {
url =
'http://www.msn.com/en-us/health/wellness/this-is-your-brain-on-sad-movies-plus-5-films-to-cry-to/ar-BBwsPWG?li=BBnb2gg';
const html = fs.readFileSync('./fixtures/www.msn.com/1475506925474.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwMsnbcComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.msnbc.com/1482261084088.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwNationalgeographicComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.nationalgeographic.com/1481921323654.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('WwwNbcnewsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.nbcnews.com/1481667763790.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('NewYorkerExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.newyorker.com/1475248565793.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
@ -111,7 +111,7 @@ describe('NewYorkerExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.newyorker.com/1480713300334.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('returns the dek when present', async () => {

@ -16,7 +16,7 @@ describe('WwwNjComExtractor', () => {
url =
'http://www.nj.com/essex/index.ssf/2016/12/man_sentenced_for_stealing_millions_from_nj_atms_i.html#incart_river_home';
const html = fs.readFileSync('./fixtures/www.nj.com/1481666201503.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -16,7 +16,7 @@ describe('WwwNprOrgExtractor', () => {
url =
'http://www.npr.org/sections/thetwo-way/2016/12/15/505723552/jury-finds-dylann-roof-guilty-in-s-c-church-shooting';
const html = fs.readFileSync('./fixtures/www.npr.org/1481842125199.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwNydailynewsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.nydailynews.com/1481917212613.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('NYTimesExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.nytimes.com/1474318141888.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {
@ -112,7 +112,7 @@ describe('NYTimesExtractor', () => {
const uri =
'http://www.nytimes.com/interactive/2016/09/15/arts/design/national-museum-of-african-american-history-and-culture.html';
const { content, title, author } = await Mercury.parse(uri, html);
const { content, title, author } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content);
const text = $('*')
.first()
@ -132,7 +132,7 @@ describe('NYTimesExtractor', () => {
const uri =
'https://www.nytimes.com/2018/10/09/us/politics/nikki-haley-united-nations.html';
const { title } = await Mercury.parse(uri, html);
const { title } = await Mercury.parse(uri, { html });
assert.equal(
title,

@ -18,7 +18,7 @@ describe('WwwOpposingviewsComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.opposingviews.com/1482427531189.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -19,7 +19,7 @@ describe('PoliticoExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.politico.com/1475617690069.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwPopsugarComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.popsugar.com/1482182390796.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwProspectmagazineCoUkExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.prospectmagazine.co.uk/1488476298434.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('WwwQdailyComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.qdaily.com/1488417691505.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwRawstoryComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.rawstory.com/1482439337481.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwRecodeNetExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.recode.net/1481841159344.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwRefinery29ComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.refinery29.com/1481661863250.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('WwwReutersComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.reuters.com/1481754223635.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwRollingstoneComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.rollingstone.com/1482380017694.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwSbnationComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.sbnation.com/1481062909839.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {
// This test should be passing by default.

@ -16,7 +16,7 @@ describe('WwwSiComExtractor', () => {
url =
'http://www.si.com/nfl/2017/01/12/dallas-cowboys-stephen-jerry-jones';
const html = fs.readFileSync('./fixtures/www.si.com/1484253704408.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwSlateComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.slate.com/1489160616058.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('AtlanticExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.theatlantic.com/1474321707642.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwTheguardianComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.theguardian.com/1480457558008.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -17,7 +17,7 @@ describe('WwwThepennyhoarderComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.thepennyhoarder.com/1482509014877.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwThepoliticalinsiderComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.thepoliticalinsider.com/1482255981213.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwThevergeComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.theverge.com/1480520999617.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -15,7 +15,7 @@ describe('WwwTmzComExtractor', () => {
beforeAll(() => {
url = 'http://www.tmz.com/2016/11/28/prince-wife-estate-will/';
const html = fs.readFileSync('./fixtures/www.tmz.com/1480368537455.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwTodayComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.today.com/1482432737905.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -28,7 +28,8 @@ describe('WwwUsmagazineComExtractor', () => {
const articleUrl =
'http://www.usmagazine.com/celebrity-news/news/lady-gaga-shares-pic-of-ex-taylor-kinney-with-her-mom-w454419';
const { title } = await Mercury.parse(articleUrl, html, {
const { title } = await Mercury.parse(articleUrl, {
html,
fallback: false,
});
@ -67,7 +68,8 @@ describe('WwwUsmagazineComExtractor', () => {
const articleUrl =
'http://www.usmagazine.com/celebrity-news/news/lady-gaga-shares-pic-of-ex-taylor-kinney-with-her-mom-w454419';
const { date_published } = await Mercury.parse(articleUrl, html, {
const { date_published } = await Mercury.parse(articleUrl, {
html,
fallback: false,
});
@ -85,7 +87,8 @@ describe('WwwUsmagazineComExtractor', () => {
const articleUrl =
'http://www.usmagazine.com/celebrity-news/news/lady-gaga-shares-pic-of-ex-taylor-kinney-with-her-mom-w454419';
const { lead_image_url } = await Mercury.parse(articleUrl, html, {
const { lead_image_url } = await Mercury.parse(articleUrl, {
html,
fallback: false,
});
@ -108,7 +111,7 @@ describe('WwwUsmagazineComExtractor', () => {
const url =
'http://www.usmagazine.com/celebrity-news/news/lady-gaga-shares-pic-of-ex-taylor-kinney-with-her-mom-w454419';
const { content } = await Mercury.parse(url, html, { fallback: false });
const { content } = await Mercury.parse(url, { html, fallback: false });
const $ = cheerio.load(content || '');

@ -16,7 +16,7 @@ describe('WwwVoxComExtractor', () => {
url =
'http://www.vox.com/culture/2016/12/10/13898352/trump-twitter-harassment-policy-bannable';
const html = fs.readFileSync('./fixtures/www.vox.com/1481563623532.html');
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -18,7 +18,7 @@ describe('WwwWashingtonpostComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.washingtonpost.com/1546958901450.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -18,7 +18,7 @@ describe('WwwWesternjournalismComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.westernjournalism.com/1482520953825.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

@ -19,7 +19,7 @@ describe('WiredExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.wired.com/1475256747028.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -19,7 +19,7 @@ describe('YahooExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.yahoo.com/1475529982399.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', async () => {

@ -17,7 +17,7 @@ describe('WwwYoutubeComExtractor', () => {
const html = fs.readFileSync(
'./fixtures/www.youtube.com/1481042537359.html'
);
result = Mercury.parse(url, html, { fallback: false });
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save