You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/content/scoring/find-top-candidate.js

36 lines
902 B
JavaScript

import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';
import { getScore } from './index';
import mergeSiblings from './merge-siblings';
// After we've calculated scores, loop through all of the possible
// candidate nodes we found and find the one with the highest score.
export default function findTopCandidate($) {
let $candidate;
let topScore = 0;
$('*[score]').each((index, node) => {
const $node = $(node);
// Ignore tags like BR, HR, etc
if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {
return;
}
const score = getScore($node);
if (score > topScore) {
topScore = score;
$candidate = $node;
}
});
// If we don't have a candidate, return the body
// or whatever the first element is
if (!$candidate) {
return $('body') || $('*').first();
}
$candidate = mergeSiblings($candidate, topScore, $);
return $candidate;
}