You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/content/scoring/add-score.js

16 lines
285 B
JavaScript

import {
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
getOrInitScore,
setScore,
} from './index';
export default function addScore($node, $, amount) {
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
try {
const score = getOrInitScore($node, $) + amount;
setScore($node, $, score);
} catch (e) {
// Ignoring; error occurs in scoreNode
feat: implemented extractBestNode functionality Squashed commit of the following: commit 9af554dd975ff1778ed70c71fa9bde667fc5f880 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 15:19:32 2016 -0400 feat: add cleanHeaders commit 0dfea98eedc4f97fcbd78866322595c705e20521 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 14:30:49 2016 -0400 fix: scoring parent nodes recursively commit b6e5897a694adeb81e25a905aba72c0f45a8cc94 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 12:47:24 2016 -0400 feat: extract clean node up and running commit fb652c5db13db6bce7271efd68ba4b20515e9549 Author: Adam Pash <adam.pash@gmail.com> Date: Tue Aug 30 09:57:21 2016 -0400 chore: added test for p tags with nested tags (e.g., img, iframe) commit 731d0a2e4d89121dfafad195e9d0911805c4f8e4 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 17:50:33 2016 -0400 feat: extact clean node integrates most functions commit 322bc6534d30feb7c1c08d3813132badc6286b40 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:46:04 2016 -0400 feat: removing empty nodes as defined in constants commit f1d38932ea12a865814d2326970031fcb8515baa Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:33:31 2016 -0400 feat: cleaning attributes from nodes commit 0aa73ada6854af0ecd504bfe3d926a9524787ab5 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 16:09:56 2016 -0400 feat: cleaning h1s from text commit 12d4a309246285c278ce7765e4fbaa8271bb5889 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:52:03 2016 -0400 feat: removing spacer images commit 4e74ff830cc67586560f6fc72e2cfa432a3a2647 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:38:49 2016 -0400 feat: stripping unwanted html from doc commit c774166e90169fd0c1aa89898d3f7a975e82bf0a Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 15:17:32 2016 -0400 feat: removing small images, height attribute from images commit 3a8642f42cda451669c832482c5e1611b1ff2ea9 Author: Adam Pash <adam.pash@gmail.com> Date: Mon Aug 29 12:57:45 2016 -0400 feat: rewrite top level commit a1c03e779234b0aea02206d92ec3dcc15758507e Author: Adam Pash <adam.pash@gmail.com> Date: Fri Aug 26 17:34:36 2016 -0400 in a weird place rn
8 years ago
}
return $node;
}