mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-17 03:25:31 +00:00
fix: duplicate key bug
This commit is contained in:
parent
93ca688955
commit
6c6451b34b
@ -12,13 +12,14 @@ const RootExtractor = {
|
||||
const title = extract({ ...opts, type: 'title', extractor })
|
||||
const datePublished = extract({ ...opts, type: 'datePublished', extractor })
|
||||
const author = extract({ ...opts, type: 'author', extractor })
|
||||
const content = extract({ ...opts, type: 'content', extractor, html: true })
|
||||
const content = extract({ ...opts, type: 'content', extractor, extractHtml: true })
|
||||
const leadImageUrl = extract({ ...opts, type: 'leadImageUrl', extractor })
|
||||
const dek = extract({ ...opts, type: 'dek', extractor })
|
||||
|
||||
return {
|
||||
title,
|
||||
content,
|
||||
author,
|
||||
datePublished,
|
||||
leadImageUrl,
|
||||
dek,
|
||||
@ -27,15 +28,15 @@ const RootExtractor = {
|
||||
}
|
||||
|
||||
function extract(opts) {
|
||||
const { type, extractor, $, html } = opts
|
||||
const { type, extractor, $, extractHtml } = opts
|
||||
|
||||
// If nothing matches the selector,
|
||||
// run the Generic extraction
|
||||
return select($, extractor[type], html) ||
|
||||
return select($, extractor[type], extractHtml) ||
|
||||
GenericExtractor[type](opts)
|
||||
}
|
||||
|
||||
function select($, extractionOpts, html=false) {
|
||||
function select($, extractionOpts, extractHtml=false) {
|
||||
// Skip if there's not extraction for this type
|
||||
if (!extractionOpts) return
|
||||
|
||||
@ -44,12 +45,20 @@ function select($, extractionOpts, html=false) {
|
||||
const matchingSelector = selectors.find((selector) => {
|
||||
return $(selector).length === 1
|
||||
})
|
||||
console.log(matchingSelector)
|
||||
// console.log($(matchingSelector).text())
|
||||
console.log(extractHtml)
|
||||
if (!matchingSelector) return
|
||||
|
||||
// If the selector type requests html as its return type
|
||||
// clean the element with provided cleaning selectors
|
||||
if (html) {
|
||||
if (extractHtml) {
|
||||
let $content = $(matchingSelector)
|
||||
|
||||
// Wrap in div so transformation can take place on root element
|
||||
$content.wrap($('<div></div>'))
|
||||
$content = $content.parent()
|
||||
|
||||
$content = cleanBySelectors($content, $, extractionOpts)
|
||||
$content = transformElements($content, $, extractionOpts)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user