mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-17 03:25:31 +00:00
fix: dek and leadImg should not be html
This commit is contained in:
parent
45ef18ba37
commit
93ca688955
@ -13,8 +13,8 @@ const RootExtractor = {
|
||||
const datePublished = extract({ ...opts, type: 'datePublished', extractor })
|
||||
const author = extract({ ...opts, type: 'author', extractor })
|
||||
const content = extract({ ...opts, type: 'content', extractor, html: true })
|
||||
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor, html: true })
|
||||
const dek = extract({ ...opts, type: 'dek', extractor, html: true })
|
||||
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
|
||||
const dek = extract({ ...opts, type: 'dek', extractor })
|
||||
|
||||
return {
|
||||
title,
|
||||
@ -27,11 +27,11 @@ const RootExtractor = {
|
||||
}
|
||||
|
||||
function extract(opts) {
|
||||
const { type, extractor, $ } = opts
|
||||
const { type, extractor, $, html } = opts
|
||||
|
||||
// If nothing matches the selector,
|
||||
// run the Generic extraction
|
||||
return select($, extractor[type]) ||
|
||||
return select($, extractor[type], html) ||
|
||||
GenericExtractor[type](opts)
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ function select($, extractionOpts, html=false) {
|
||||
$content = cleanBySelectors($content, $, extractionOpts)
|
||||
$content = transformElements($content, $, extractionOpts)
|
||||
|
||||
return $content
|
||||
return $.html($content)
|
||||
} else {
|
||||
return stripTags($(matchingSelector).text(), $)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user