improved wiki extractor

This commit is contained in:
Adam Pash 2016-09-09 12:00:09 -04:00
parent 52e89a0229
commit f2729a5ee6

View File

@ -7,9 +7,10 @@ const WikipediaExtractor = {
// transform top infobox to an image with caption
transforms: {
'.infobox img': ($node, $) => {
'.infobox img': ($node) => {
$node.parents('.infobox').prepend($node)
},
'.infobox caption': 'figcaption',
'.infobox': 'figure',
},
@ -17,6 +18,7 @@ const WikipediaExtractor = {
clean: [
'.mw-editsection',
'figure tr, figure td, figure tbody',
'#toc',
],
},