From f2729a5ee6db59c7bd2d8e0555bd2af738f88fea Mon Sep 17 00:00:00 2001 From: Adam Pash Date: Fri, 9 Sep 2016 12:00:09 -0400 Subject: [PATCH] improved wiki extractor --- src/extractor/custom/wikipedia.org/index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/extractor/custom/wikipedia.org/index.js b/src/extractor/custom/wikipedia.org/index.js index 41f3d006..73c07aca 100644 --- a/src/extractor/custom/wikipedia.org/index.js +++ b/src/extractor/custom/wikipedia.org/index.js @@ -7,9 +7,10 @@ const WikipediaExtractor = { // transform top infobox to an image with caption transforms: { - '.infobox img': ($node, $) => { + '.infobox img': ($node) => { $node.parents('.infobox').prepend($node) }, + '.infobox caption': 'figcaption', '.infobox': 'figure', }, @@ -17,6 +18,7 @@ const WikipediaExtractor = { clean: [ '.mw-editsection', 'figure tr, figure td, figure tbody', + '#toc', ], },