release: 1.0.7 (#160)

pull/148/head^2 1.0.7
Adam Pash 7 years ago committed by GitHub
parent f13bb721f6
commit e267d57d78

@ -1,5 +1,22 @@
# Mercury Parser Changelog
### 1.0.7 (Mar 15, 2017)
##### Commits
* [[`f13bb721f6`](https://github.com/postlight/mercury-parser/commit/f13bb721f6)] - **feat**: prospect magazine parser (#147) (Janet)
* [[`1b28713cf5`](https://github.com/postlight/mercury-parser/commit/1b28713cf5)] - **feat**: fool.com parser (#158) (Kevin Ngao)
* [[`c18959779d`](https://github.com/postlight/mercury-parser/commit/c18959779d)] - **feat**: forward.com parser (#144) (Janet)
* [[`50e548bac2`](https://github.com/postlight/mercury-parser/commit/50e548bac2)] - **feat**: qdaily parser (#146) (Janet)
* [[`51a4d1d12f`](https://github.com/postlight/mercury-parser/commit/51a4d1d12f)] - **feat**: newrepublic parser shows image on page (#159) (Silas Burton)
* [[`11382ce651`](https://github.com/postlight/mercury-parser/commit/11382ce651)] - **feat**: Slate extractor (#153) (Silas Burton)
* [[`5acaa6ab56`](https://github.com/postlight/mercury-parser/commit/5acaa6ab56)] - **feat**: ici.radio-canada.ca extractor (#156) (Silas Burton)
* [[`4509b341e6`](https://github.com/postlight/mercury-parser/commit/4509b341e6)] - **feat**: better cleanup of atlantic articles (#157) (Silas Burton)
* [[`f2e3f055c2`](https://github.com/postlight/mercury-parser/commit/f2e3f055c2)] - **fix**: an issue with encoding (#154) (Kevin Ngao)
* [[`9b371e51ac`](https://github.com/postlight/mercury-parser/commit/9b371e51ac)] - **feat**: gothamist extractor (#151) (Silas Burton)
* [[`afbef9bc39`](https://github.com/postlight/mercury-parser/commit/afbef9bc39)] - **fix**: Encoding on Body (#143) (Kevin Ngao)
* [[`9d4c883d51`](https://github.com/postlight/mercury-parser/commit/9d4c883d51)] - **release**: 1.0.6 (#142) (Adam Pash)
### 1.0.6 (Feb 9, 2017)
##### Commits

250
dist/mercury.js vendored

@ -2120,7 +2120,7 @@ var TheAtlanticExtractor = {
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.partner-box']
clean: ['.partner-box', '.callout']
},
date_published: {
@ -2904,7 +2904,7 @@ var NewrepublicComExtractor = {
},
content: {
selectors: ['div.content-body', '.minutes-primary div.content-body'],
selectors: [['.article-cover', 'div.content-body'], ['.minute-image', '.minutes-primary div.content-body']],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
@ -5309,6 +5309,123 @@ var WwwOpposingviewsComExtractor = {
}
};
var WwwProspectmagazineCoUkExtractor = {
domain: 'www.prospectmagazine.co.uk',
title: {
selectors: ['.page-title']
},
author: {
selectors: ['.aside_author .title']
},
date_published: {
selectors: ['.post-info'],
timezone: 'Europe/London'
},
dek: {
selectors: ['.page-subtitle']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: [
// ['article.type-post div.post_content p'],
'article .post_content'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var ForwardComExtractor = {
domain: 'forward.com',
title: {
selectors: [['meta[name="og:title"]', 'value']]
},
author: {
selectors: ['.author-name', ['meta[name="sailthru.author"]', 'value']]
},
date_published: {
selectors: [['meta[name="date"]', 'value']]
},
dek: {
selectors: [
// enter selectors
]
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: [['.post-item-media-wrap', '.post-item p']],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.donate-box', '.message', '.subtitle']
}
};
var WwwQdailyComExtractor = {
domain: 'www.qdaily.com',
title: {
selectors: ['h2', 'h2.title']
},
author: {
selectors: ['.name']
},
date_published: {
selectors: [['.date.smart-date', 'data-origindate']]
},
dek: {
selectors: ['.excerpt']
},
lead_image_url: {
selectors: [['.article-detail-hd img', 'src']]
},
content: {
selectors: ['.detail'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.lazyload', '.lazylad', '.lazylood']
}
};
var GothamistComExtractor = {
domain: 'gothamist.com',
@ -5357,6 +5474,127 @@ var GothamistComExtractor = {
}
};
var WwwFoolComExtractor = {
domain: 'www.fool.com',
title: {
selectors: ['h1']
},
author: {
selectors: ['.author-inline .author-name']
},
date_published: {
selectors: [['meta[name="date"]', 'value']]
},
dek: {
selectors: ['header h2']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.article-content'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'.caption img': function captionImg($node) {
var src = $node.attr('src');
$node.parent().replaceWith('<figure><img src="' + src + '"/></figure>');
},
'.caption': 'figcaption'
},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['#pitch']
}
};
var WwwSlateComExtractor = {
domain: 'www.slate.com',
title: {
selectors: ['.hed', 'h1']
},
author: {
selectors: ['a[rel=author]']
},
date_published: {
selectors: ['.pub-date'],
timezone: 'America/New_York'
},
dek: {
selectors: ['.dek']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: ['.body'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: ['.about-the-author', '.pullquote', '.newsletter-signup-component', '.top-comment']
}
};
var IciRadioCanadaCaExtractor = {
domain: 'ici.radio-canada.ca',
title: {
selectors: ['h1']
},
author: {
selectors: [['meta[name="dc.creator"]', 'value']]
},
date_published: {
selectors: [['meta[name="dc.date.created"]', 'value']],
timezone: 'America/New_York'
},
dek: {
selectors: ['.bunker-component.lead']
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
content: {
selectors: [['.main-multimedia-item', '.news-story-content']],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean: []
}
};
var CustomExtractors = Object.freeze({
@ -5444,7 +5682,13 @@ var CustomExtractors = Object.freeze({
WwwLinkedinComExtractor: WwwLinkedinComExtractor,
ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
GothamistComExtractor: GothamistComExtractor
WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
ForwardComExtractor: ForwardComExtractor,
WwwQdailyComExtractor: WwwQdailyComExtractor,
GothamistComExtractor: GothamistComExtractor,
WwwFoolComExtractor: WwwFoolComExtractor,
WwwSlateComExtractor: WwwSlateComExtractor,
IciRadioCanadaCaExtractor: IciRadioCanadaCaExtractor
});
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "mercury-parser",
"version": "1.0.6",
"version": "1.0.7",
"description": "",
"repository": "github:postlight/mercury-parser",
"main": "./dist/mercury.js",
@ -68,7 +68,7 @@
"ora": "^0.3.0",
"phantomjs-polyfill-find": "ptim/phantomjs-polyfill-find",
"phantomjs-polyfill-string-includes": "^1.0.0",
"phantomjs-prebuilt": "^2.1.13",
"phantomjs-prebuilt": "^2.1.14",
"requirejs": "^2.3.2",
"rollup": "^0.36.3",
"rollup-plugin-babel": "^2.6.1",

@ -2248,6 +2248,14 @@ fs-extra@~0.30.0:
path-is-absolute "^1.0.0"
rimraf "^2.2.8"
fs-extra@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-1.0.0.tgz#cd3ce5f7e7cb6145883fcae3191e9877f8587950"
dependencies:
graceful-fs "^4.1.2"
jsonfile "^2.1.0"
klaw "^1.0.0"
fs.realpath@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
@ -4097,7 +4105,21 @@ phantomjs-polyfill-string-includes@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/phantomjs-polyfill-string-includes/-/phantomjs-polyfill-string-includes-1.0.0.tgz#ea180d4bbc24b8d83e477f8ee8893efabcb29393"
phantomjs-prebuilt@^2.1.13, phantomjs-prebuilt@^2.1.7:
phantomjs-prebuilt@^2.1.14:
version "2.1.14"
resolved "https://registry.yarnpkg.com/phantomjs-prebuilt/-/phantomjs-prebuilt-2.1.14.tgz#d53d311fcfb7d1d08ddb24014558f1188c516da0"
dependencies:
es6-promise "~4.0.3"
extract-zip "~1.5.0"
fs-extra "~1.0.0"
hasha "~2.2.0"
kew "~0.7.0"
progress "~1.1.8"
request "~2.79.0"
request-progress "~2.0.1"
which "~1.2.10"
phantomjs-prebuilt@^2.1.7:
version "2.1.13"
resolved "https://registry.yarnpkg.com/phantomjs-prebuilt/-/phantomjs-prebuilt-2.1.13.tgz#66556ad9e965d893ca5a7dc9e763df7e8697f76d"
dependencies:
@ -4423,7 +4445,7 @@ request-promise@^4.1.1:
request-promise-core "1.1.1"
stealthy-require "^1.0.0"
request@^2.55.0, request@^2.72.0, request@^2.75.0:
request@^2.55.0, request@^2.72.0, request@^2.75.0, request@~2.79.0:
version "2.79.0"
resolved "https://registry.yarnpkg.com/request/-/request-2.79.0.tgz#4dfe5bf6be8b8cdc37fcf93e04b65577722710de"
dependencies:

Loading…
Cancel
Save