improved wiki extractor

This commit is contained in:
Adam Pash 2016-09-09 12:00:09 -04:00
parent 52e89a0229
commit f2729a5ee6

View File

@ -7,9 +7,10 @@ const WikipediaExtractor = {
// transform top infobox to an image with caption // transform top infobox to an image with caption
transforms: { transforms: {
'.infobox img': ($node, $) => { '.infobox img': ($node) => {
$node.parents('.infobox').prepend($node) $node.parents('.infobox').prepend($node)
}, },
'.infobox caption': 'figcaption',
'.infobox': 'figure', '.infobox': 'figure',
}, },
@ -17,6 +18,7 @@ const WikipediaExtractor = {
clean: [ clean: [
'.mw-editsection', '.mw-editsection',
'figure tr, figure td, figure tbody', 'figure tr, figure td, figure tbody',
'#toc',
], ],
}, },