You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
43 lines
740 B
JavaScript
43 lines
740 B
JavaScript
const WikipediaExtractor = {
|
|
domain: 'wikipedia.org',
|
|
content: {
|
|
selectors: [
|
|
'#mw-content-text',
|
|
],
|
|
|
|
// transform top infobox to an image with caption
|
|
transforms: {
|
|
'.infobox img': ($node) => {
|
|
$node.parents('.infobox').prepend($node);
|
|
},
|
|
'.infobox caption': 'figcaption',
|
|
'.infobox': 'figure',
|
|
},
|
|
|
|
// Selectors to remove from the extracted content
|
|
clean: [
|
|
'.mw-editsection',
|
|
'figure tr, figure td, figure tbody',
|
|
'#toc',
|
|
],
|
|
|
|
},
|
|
|
|
author: 'Wikipedia Contributors',
|
|
|
|
title: {
|
|
selectors: [
|
|
'h2.title',
|
|
],
|
|
},
|
|
|
|
datePublished: {
|
|
selectors: [
|
|
'#footer-info-lastmod',
|
|
],
|
|
},
|
|
|
|
};
|
|
|
|
export default WikipediaExtractor;
|