release: 1.0.9 (#167)

pull/168/head 1.0.9
Adam Pash 7 years ago committed by GitHub
parent 61f0f4e1af
commit e56e8e24cd

@ -1,5 +1,14 @@
# Mercury Parser Changelog
### 1.0.9 (Mar 23, 2017)
##### Commits
* [[`61f0f4e1af`](https://github.com/postlight/mercury-parser/commit/61f0f4e1af)] - **fix**: kept elements being removed (#166) (Adam Pash)
* [[`5741910fdc`](https://github.com/postlight/mercury-parser/commit/5741910fdc)] - **docs**: update changelog (#165) (Adam Pash)
* [[`321c087be6`](https://github.com/postlight/mercury-parser/commit/321c087be6)] - **release**: 1.0.8 (#164) (Adam Pash)
### 1.0.8 (Mar 22, 2017)
##### Commits

27
dist/mercury.js vendored

@ -792,9 +792,6 @@ function stripJunkTags(article, $) {
// any element with a class of mercury-parser-keep
$(tags.join(','), article).not('.' + KEEP_CLASS).remove();
// Remove the mercury-parser-keep class from result
$('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);
return $;
}
@ -817,7 +814,7 @@ function cleanHOnes$$1(article, $) {
return $;
}
function removeAllButWhitelist($article) {
function removeAllButWhitelist($article, $) {
$article.find('*').each(function (index, node) {
var attrs = getAttrs(node);
@ -830,6 +827,9 @@ function removeAllButWhitelist($article) {
}, {}));
});
// Remove the mercury-parser-keep class from result
$('.' + KEEP_CLASS, $article).removeClass(KEEP_CLASS);
return $article;
}
@ -840,11 +840,11 @@ function removeAllButWhitelist($article) {
// }
// Remove attributes like style or align
function cleanAttributes$$1($article) {
function cleanAttributes$$1($article, $) {
// Grabbing the parent because at this point
// $article will be wrapped in a div which will
// have a score set on it.
return removeAllButWhitelist($article.parent().length ? $article.parent() : $article);
return removeAllButWhitelist($article.parent().length ? $article.parent() : $article, $);
}
function removeEmpty($article, $) {
@ -1402,6 +1402,9 @@ function removeUnlessContent($node, $, weight) {
function cleanTags$$1($article, $) {
$(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
var $node = $(node);
// If marked to keep, skip it
if ($node.hasClass(KEEP_CLASS) || $node.find('.' + KEEP_CLASS).length > 0) return;
var weight = getScore($node);
if (!weight) {
weight = getOrInitScore$$1($node, $);
@ -5261,11 +5264,7 @@ var ObamawhitehouseArchivesGovExtractor = {
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'iframe[src*=youtube]': function iframeSrcYoutube($node) {
$node.parents('.panel-pane').replaceWith($node);
}
},
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
@ -5843,6 +5842,9 @@ function extractCleanNode(article, _ref) {
// this can sometimes be too aggressive.
if (defaultCleaner) cleanImages(article, $);
// Make links absolute
makeLinksAbsolute$$1(article, $, url);
// Mark elements to keep that would normally be removed.
// E.g., stripJunkTags will remove iframes, so we're going to mark
// YouTube/Vimeo videos as elements we want to keep.
@ -5860,9 +5862,6 @@ function extractCleanNode(article, _ref) {
// Clean headers
cleanHeaders(article, $, title);
// Make links absolute
makeLinksAbsolute$$1(article, $, url);
// We used to clean UL's and OL's here, but it was leading to
// too many in-article lists being removed. Consider a better
// way to detect menus particularly and remove them.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "mercury-parser",
"version": "1.0.8",
"version": "1.0.9",
"description": "",
"repository": "github:postlight/mercury-parser",
"main": "./dist/mercury.js",

Loading…
Cancel
Save