You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
24 lines
892 B
JavaScript
24 lines
892 B
JavaScript
import difflib from 'difflib';
|
|
|
|
export default function scoreSimilarity(score, articleUrl, href) {
|
|
// Do this last and only if we have a real candidate, because it's
|
|
// potentially expensive computationally. Compare the link to this
|
|
// URL using difflib to get the % similarity of these URLs. On a
|
|
// sliding scale, subtract points from this link based on
|
|
// similarity.
|
|
if (score > 0) {
|
|
const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
|
|
// Subtract .1 from diff_percent when calculating modifier,
|
|
// which means that if it's less than 10% different, we give a
|
|
// bonus instead. Ex:
|
|
// 3% different = +17.5 points
|
|
// 10% different = 0 points
|
|
// 20% different = -25 points
|
|
const diffPercent = 1.0 - similarity;
|
|
const diffModifier = -(250 * (diffPercent - 0.2));
|
|
return score + diffModifier;
|
|
}
|
|
|
|
return 0;
|
|
}
|