You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/content/scoring/score-length.js

26 lines
607 B
JavaScript

const idkRe = new RegExp('^(p|pre)$', 'i');
export default function scoreLength(textLength, tagName = 'p') {
const chunks = textLength / 50;
if (chunks > 0) {
let lengthBonus;
// No idea why p or pre are being tamped down here
// but just following the source for now
// Not even sure why tagName is included here,
// since this is only being called from the context
// of scoreParagraph
if (idkRe.test(tagName)) {
lengthBonus = chunks - 2;
} else {
lengthBonus = chunks - 1.25;
}
return Math.min(Math.max(lengthBonus, 0), 3);
}
return 0;
}