fix: dek and leadImg should not be html

This commit is contained in:
Adam Pash 2016-09-08 11:24:19 -04:00
parent 45ef18ba37
commit 93ca688955

View File

@ -13,8 +13,8 @@ const RootExtractor = {
const datePublished = extract({ ...opts, type: 'datePublished', extractor }) const datePublished = extract({ ...opts, type: 'datePublished', extractor })
const author = extract({ ...opts, type: 'author', extractor }) const author = extract({ ...opts, type: 'author', extractor })
const content = extract({ ...opts, type: 'content', extractor, html: true }) const content = extract({ ...opts, type: 'content', extractor, html: true })
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor, html: true }) const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
const dek = extract({ ...opts, type: 'dek', extractor, html: true }) const dek = extract({ ...opts, type: 'dek', extractor })
return { return {
title, title,
@ -27,11 +27,11 @@ const RootExtractor = {
} }
function extract(opts) { function extract(opts) {
const { type, extractor, $ } = opts const { type, extractor, $, html } = opts
// If nothing matches the selector, // If nothing matches the selector,
// run the Generic extraction // run the Generic extraction
return select($, extractor[type]) || return select($, extractor[type], html) ||
GenericExtractor[type](opts) GenericExtractor[type](opts)
} }
@ -53,7 +53,7 @@ function select($, extractionOpts, html=false) {
$content = cleanBySelectors($content, $, extractionOpts) $content = cleanBySelectors($content, $, extractionOpts)
$content = transformElements($content, $, extractionOpts) $content = transformElements($content, $, extractionOpts)
return $content return $.html($content)
} else { } else {
return stripTags($(matchingSelector).text(), $) return stripTags($(matchingSelector).text(), $)
} }