mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-17 03:25:31 +00:00
fix: duplicate key bug
This commit is contained in:
parent
93ca688955
commit
6c6451b34b
@ -12,13 +12,14 @@ const RootExtractor = {
|
|||||||
const title = extract({ ...opts, type: 'title', extractor })
|
const title = extract({ ...opts, type: 'title', extractor })
|
||||||
const datePublished = extract({ ...opts, type: 'datePublished', extractor })
|
const datePublished = extract({ ...opts, type: 'datePublished', extractor })
|
||||||
const author = extract({ ...opts, type: 'author', extractor })
|
const author = extract({ ...opts, type: 'author', extractor })
|
||||||
const content = extract({ ...opts, type: 'content', extractor, html: true })
|
const content = extract({ ...opts, type: 'content', extractor, extractHtml: true })
|
||||||
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
|
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
|
||||||
const dek = extract({ ...opts, type: 'dek', extractor })
|
const dek = extract({ ...opts, type: 'dek', extractor })
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title,
|
title,
|
||||||
content,
|
content,
|
||||||
|
author,
|
||||||
datePublished,
|
datePublished,
|
||||||
leadImageUrl,
|
leadImageUrl,
|
||||||
dek,
|
dek,
|
||||||
@ -27,15 +28,15 @@ const RootExtractor = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function extract(opts) {
|
function extract(opts) {
|
||||||
const { type, extractor, $, html } = opts
|
const { type, extractor, $, extractHtml } = opts
|
||||||
|
|
||||||
// If nothing matches the selector,
|
// If nothing matches the selector,
|
||||||
// run the Generic extraction
|
// run the Generic extraction
|
||||||
return select($, extractor[type], html) ||
|
return select($, extractor[type], extractHtml) ||
|
||||||
GenericExtractor[type](opts)
|
GenericExtractor[type](opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
function select($, extractionOpts, html=false) {
|
function select($, extractionOpts, extractHtml=false) {
|
||||||
// Skip if there's not extraction for this type
|
// Skip if there's not extraction for this type
|
||||||
if (!extractionOpts) return
|
if (!extractionOpts) return
|
||||||
|
|
||||||
@ -44,12 +45,20 @@ function select($, extractionOpts, html=false) {
|
|||||||
const matchingSelector = selectors.find((selector) => {
|
const matchingSelector = selectors.find((selector) => {
|
||||||
return $(selector).length === 1
|
return $(selector).length === 1
|
||||||
})
|
})
|
||||||
|
console.log(matchingSelector)
|
||||||
|
// console.log($(matchingSelector).text())
|
||||||
|
console.log(extractHtml)
|
||||||
if (!matchingSelector) return
|
if (!matchingSelector) return
|
||||||
|
|
||||||
// If the selector type requests html as its return type
|
// If the selector type requests html as its return type
|
||||||
// clean the element with provided cleaning selectors
|
// clean the element with provided cleaning selectors
|
||||||
if (html) {
|
if (extractHtml) {
|
||||||
let $content = $(matchingSelector)
|
let $content = $(matchingSelector)
|
||||||
|
|
||||||
|
// Wrap in div so transformation can take place on root element
|
||||||
|
$content.wrap($('<div></div>'))
|
||||||
|
$content = $content.parent()
|
||||||
|
|
||||||
$content = cleanBySelectors($content, $, extractionOpts)
|
$content = cleanBySelectors($content, $, extractionOpts)
|
||||||
$content = transformElements($content, $, extractionOpts)
|
$content = transformElements($content, $, extractionOpts)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user