fix: dek and leadImg should not be html

This commit is contained in:
Adam Pash 2016-09-08 11:24:19 -04:00
parent 45ef18ba37
commit 93ca688955

View File

@ -13,8 +13,8 @@ const RootExtractor = {
const datePublished = extract({ ...opts, type: 'datePublished', extractor })
const author = extract({ ...opts, type: 'author', extractor })
const content = extract({ ...opts, type: 'content', extractor, html: true })
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor, html: true })
const dek = extract({ ...opts, type: 'dek', extractor, html: true })
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
const dek = extract({ ...opts, type: 'dek', extractor })
return {
title,
@ -27,11 +27,11 @@ const RootExtractor = {
}
function extract(opts) {
const { type, extractor, $ } = opts
const { type, extractor, $, html } = opts
// If nothing matches the selector,
// run the Generic extraction
return select($, extractor[type]) ||
return select($, extractor[type], html) ||
GenericExtractor[type](opts)
}
@ -53,7 +53,7 @@ function select($, extractionOpts, html=false) {
$content = cleanBySelectors($content, $, extractionOpts)
$content = transformElements($content, $, extractionOpts)
return $content
return $.html($content)
} else {
return stripTags($(matchingSelector).text(), $)
}