chore: cleanup

pull/1/head
Adam Pash 8 years ago
parent e1ef25aab1
commit 95085d1a11

@ -24,13 +24,13 @@ const GenericContentExtractor = {
},
// Extract the content for this resource - initially, pass in our
// most restrictive flags which will return the highest quality
// content. On each failure, retry with slightly more lax flags.
// most restrictive opts which will return the highest quality
// content. On each failure, retry with slightly more lax opts.
//
// :param return_type: string. If "node", should return the content
// as a cheerio node rather than as an HTML string.
//
// Flags:
// Opts:
// stripUnlikelyCandidates: Remove any elements that match
// non-article-like criteria first.(Like, does this element
// have a classname of "comment")
@ -45,7 +45,7 @@ const GenericContentExtractor = {
extract(html, opts, title) {
let $ = cheerio.load(html)
// Cascade through our extraction-specific flags in an ordered fashion,
// Cascade through our extraction-specific opts in an ordered fashion,
// turning them off as we try to extract content.
let node = extractCleanNode(
extractBestNode($, opts),
@ -56,7 +56,7 @@ const GenericContentExtractor = {
return this.cleanAndReturnNode(node, $)
} else {
// We didn't succeed on first pass, one by one disable our
// extraction flags and try again.
// extraction opts and try again.
for (const key of Reflect.ownKeys(opts).filter(key => opts[key] === true)) {
opts[key] = false
$ = cheerio.load(html)
@ -99,22 +99,3 @@ const GenericContentExtractor = {
}
export default GenericContentExtractor
// if node is None:
// return None
//
// print "#######SCORE########"
// print self.high_score
// print self.top_node.tag
// # Remove our scoring information from our content
// if 'score' in node.attrib:
// del node.attrib['score']
// for scored_node in node.xpath('./#<{(|[@score]'):
// del scored_node.attrib['score']
//
// if return_type == "html":
// return normalize_spaces(node_to_html(node))
// else:
// return node
//

Loading…
Cancel
Save