release: 1.0.2 (#54)

pull/56/head 1.0.2
Adam Pash 8 years ago committed by GitHub
parent 81aa89f2c1
commit 332f85928f

@ -1,8 +1,24 @@
* [[`19ed035382`](https://github.com/postlight/mercury-parser/commit/19ed035382)] - **release**: 1.0.1 (Adam Pash)
* [[`f9902cfa05`](https://github.com/postlight/mercury-parser/commit/f9902cfa05)] - **Fix**: extension bugs (#47) (Adam Pash)
* [[`16860f1d85`](https://github.com/postlight/mercury-parser/commit/16860f1d85)] - **feat**: improved nyt parser (#46) (Adam Pash)
* [[`d0453efbf8`](https://github.com/postlight/mercury-parser/commit/d0453efbf8)] - **feat**: improvements for nyer magazine articles (#45) (Adam Pash)
* [[`00f8965c1f`](https://github.com/postlight/mercury-parser/commit/00f8965c1f)] - **fix**: cleaning up deks (#44) (Adam Pash)
* [[`b415d1d37c`](https://github.com/postlight/mercury-parser/commit/b415d1d37c)] - **feat**: aol custom extractor (#42) (Janet)
* [[`4cc3b68b5e`](https://github.com/postlight/mercury-parser/commit/4cc3b68b5e)] - **feat**: remove footer links (#40) (Matt)
* [[`e9a36d6ebd`](https://github.com/postlight/mercury-parser/commit/e9a36d6ebd)] - **release**: 1.0.0 so we can start doing proper releaes (#39) (Adam Pash)
# Mercury Parser Changelog
### 1.0.2 (Dec 6, 2016)
##### Commits
* [[`81aa89f2c1`](https://github.com/postlight/mercury-parser/commit/81aa89f2c1)] - **feat**: youtube custom extractor (#53) (Adam Pash)
* [[`2fb47640f2`](https://github.com/postlight/mercury-parser/commit/2fb47640f2)] - **Feat**: detect platforms (#52) (Adam Pash)
* [[`64c0fad2fd`](https://github.com/postlight/mercury-parser/commit/64c0fad2fd)] - **fix**: preserve whitespace (#51) (Adam Pash)
* [[`15656cb3e1`](https://github.com/postlight/mercury-parser/commit/15656cb3e1)] - **Refactor**: running tests more efficiently (#49) (Adam Pash)
* [[`edcb7295d1`](https://github.com/postlight/mercury-parser/commit/edcb7295d1)] - **release**: 1.0.1 (#48) (Adam Pash)
### 1.0.1 (Dec 2, 2016)
##### Commits
* [[`19ed035382`](https://github.com/postlight/mercury-parser/commit/19ed035382)] - **release**: 1.0.1 (Adam Pash)
* [[`f9902cfa05`](https://github.com/postlight/mercury-parser/commit/f9902cfa05)] - **Fix**: extension bugs (#47) (Adam Pash)
* [[`16860f1d85`](https://github.com/postlight/mercury-parser/commit/16860f1d85)] - **feat**: improved nyt parser (#46) (Adam Pash)
* [[`d0453efbf8`](https://github.com/postlight/mercury-parser/commit/d0453efbf8)] - **feat**: improvements for nyer magazine articles (#45) (Adam Pash)
* [[`00f8965c1f`](https://github.com/postlight/mercury-parser/commit/00f8965c1f)] - **fix**: cleaning up deks (#44) (Adam Pash)
* [[`b415d1d37c`](https://github.com/postlight/mercury-parser/commit/b415d1d37c)] - **feat**: aol custom extractor (#42) (Janet)
* [[`4cc3b68b5e`](https://github.com/postlight/mercury-parser/commit/4cc3b68b5e)] - **feat**: remove footer links (#40) (Matt)
* [[`e9a36d6ebd`](https://github.com/postlight/mercury-parser/commit/e9a36d6ebd)] - **release**: 1.0.0 so we can start doing proper releaes (#39) (Adam Pash)

83
dist/mercury.js vendored

@ -1850,7 +1850,7 @@ var Resource = {
throw new Error('Content does not appear to be text.');
}
var $ = cheerio.load(content, { normalizeWhitespace: true });
var $ = cheerio.load(content);
if ($.root().children().length === 0) {
throw new Error('No children, likely a bad parse.');
@ -3080,6 +3080,54 @@ var WwwAolComExtractor = {
}
};
var WwwYoutubeComExtractor = {
domain: 'www.youtube.com',
title: {
selectors: ['.watch-title', 'h1.watch-title-container']
},
author: {
selectors: ['.yt-user-info']
},
date_published: {
selectors: [['meta[itemProp="datePublished"]', 'value']],
timezone: 'GMT'
},
dek: {
selectors: [
// enter selectors
]
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
defaultCleaner: false,
selectors: [['#player-api', '#eow-description']],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'#player-api': function playerApi($node, $) {
var videoId = $('meta[itemProp="videoId"]').attr('value');
$node.html('\n <iframe src="https://www.youtube.com/embed/' + videoId + '" frameborder="0" allowfullscreen></iframe>');
}
},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var CustomExtractors = Object.freeze({
@ -3108,7 +3156,8 @@ var CustomExtractors = Object.freeze({
MoneyCnnComExtractor: MoneyCnnComExtractor,
WwwThevergeComExtractor: WwwThevergeComExtractor,
WwwCnnComExtractor: WwwCnnComExtractor,
WwwAolComExtractor: WwwAolComExtractor
WwwAolComExtractor: WwwAolComExtractor,
WwwYoutubeComExtractor: WwwYoutubeComExtractor
});
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
@ -4716,14 +4765,27 @@ var GenericExtractor = {
}
};
function getExtractor(url, parsedUrl) {
var Detectors = {
'meta[name="al:ios:app_name"][value="Medium"]': MediumExtractor,
'meta[name="generator"][value="blogger"]': BloggerExtractor
};
function detectByHtml($) {
var selector = _Reflect$ownKeys(Detectors).find(function (s) {
return $(s).length > 0;
});
return Detectors[selector];
}
function getExtractor(url, parsedUrl, $) {
parsedUrl = parsedUrl || URL.parse(url);
var _parsedUrl = parsedUrl,
hostname = _parsedUrl.hostname;
var baseDomain = hostname.split('.').slice(-2).join('.');
return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;
return Extractors[hostname] || Extractors[baseDomain] || detectByHtml($) || GenericExtractor;
}
// Remove elements by an array of selectors
@ -5034,7 +5096,7 @@ var Mercury = {
var opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {
var _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, parsedUrl, Extractor, $, metaCache, result, _result, title, next_page_url;
var _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, parsedUrl, $, Extractor, metaCache, result, _result, title, next_page_url;
return _regeneratorRuntime.wrap(function _callee$(_context) {
while (1) {
@ -5061,14 +5123,15 @@ var Mercury = {
return _context.abrupt('return', Errors.badUrl);
case 5:
Extractor = getExtractor(url, parsedUrl);
// console.log(`Using extractor for ${Extractor.domain}`);
_context.next = 8;
_context.next = 7;
return Resource.create(url, html, parsedUrl);
case 8:
case 7:
$ = _context.sent;
Extractor = getExtractor(url, parsedUrl, $);
// console.log(`Using extractor for ${Extractor.domain}`);
// If we found an error creating the resource, return that error
if (!$.failed) {
_context.next = 11;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "mercury-parser",
"version": "1.0.1",
"version": "1.0.2",
"description": "",
"repository": "github:postlight/mercury-parser",
"main": "./dist/mercury.js",

Loading…
Cancel
Save