You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/custom/wikipedia.org/index.js

43 lines
740 B
JavaScript

const WikipediaExtractor = {
domain: 'wikipedia.org',
content: {
selectors: [
'#mw-content-text',
],
// transform top infobox to an image with caption
transforms: {
'.infobox img': ($node) => {
$node.parents('.infobox').prepend($node);
},
'.infobox caption': 'figcaption',
'.infobox': 'figure',
},
// Selectors to remove from the extracted content
clean: [
'.mw-editsection',
'figure tr, figure td, figure tbody',
'#toc',
],
},
author: 'Wikipedia Contributors',
title: {
selectors: [
'h2.title',
],
},
datePublished: {
selectors: [
'#footer-info-lastmod',
],
},
};
export default WikipediaExtractor;