fix: added babel-polyfill for bug in Reflect

pull/1/head
Adam Pash 8 years ago
parent 93e844cdfe
commit e1ef25aab1

@ -12,6 +12,7 @@
"devDependencies": {
"babel-plugin-transform-es2015-destructuring": "^6.9.0",
"babel-plugin-transform-object-rest-spread": "^6.8.0",
"babel-polyfill": "^6.13.0",
"babel-preset-es2015": "^6.13.2",
"babel-register": "^6.11.6",
"mocha": "^3.0.2"

@ -1,4 +1,5 @@
import cheerio from 'cheerio'
import 'babel-polyfill'
import extractBestNode from './extract-best-node'
import nodeIsSufficient from '../utils/node-is-sufficient'
@ -14,13 +15,12 @@ const GenericContentExtractor = {
// Entry point for parsing html
parse(html, opts={}) {
let $ = cheerio.load(html)
opts = { ...this.defaultOpts, ...opts }
// TODO: Title is used to clean headers.
// Should be passed from title extraction.
const title = ''
return this.extract($, opts, title)
return this.extract(html, opts, title)
},
// Extract the content for this resource - initially, pass in our
@ -42,7 +42,9 @@ const GenericContentExtractor = {
//
// cleanConditionally: Clean the node to return of some
// superfluous content. Things like forms, ads, etc.
extract($, opts, title) {
extract(html, opts, title) {
let $ = cheerio.load(html)
// Cascade through our extraction-specific flags in an ordered fashion,
// turning them off as we try to extract content.
let node = extractCleanNode(
@ -51,26 +53,28 @@ const GenericContentExtractor = {
opts.cleanConditionally)
if (nodeIsSufficient(node)) {
console.log("success on first run!!!!!")
return this.cleanAndReturnNode(node, $)
} else {
// We didn't succeed on first pass, one by one disable our
// extraction flags and try again.
console.log("no success doing again!!!!!")
for (key in Reflect.ownKeys(opts).filter(key => opts[key] === true)) {
for (const key of Reflect.ownKeys(opts).filter(key => opts[key] === true)) {
opts[key] = false
$ = cheerio.load(html)
node = extractCleanNode(
extractBestNode($, opts),
$,
opts.cleanConditionally)
if (nodeIsSufficient(node)) {
break
}
}
}
return this.cleanAndReturnNode(node, $)
}
return node
return this.cleanAndReturnNode(node, $)
},
// Once we got here, either we're at our last-resort node, or

@ -12,7 +12,7 @@ describe('GenericContentExtractor', () => {
const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8')
const result = clean(GenericContentExtractor.parse(html))
// console.log(result)
console.log(result)
})
})
})

Loading…
Cancel
Save