diff --git a/TODO.md b/TODO.md index f5c7f8bf..5220876e 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,4 @@ TODO: -- run makeLinksAbsolute on extracted content before returning - remove logic for fetching meta attrs with custom props - Resource (fetches page, validates it, cleans it, normalizes meta tags (!), converts lazy-loaded images, makes links absolute, etc) - extractNextPageUrl @@ -12,6 +11,8 @@ TODO: - Separate constants into activity-specific folders (dom, scoring) DONE: +x cleaning embed and object nodes +x run makeLinksAbsolute on extracted content before returning x add option to fetch attrs in RootExtractor's select method x get custom datePublished selector to convert to date object (prob through cleaner) x extract and generalize cleaners diff --git a/src/utils/dom/constants.js b/src/utils/dom/constants.js index d652422d..76093ab5 100644 --- a/src/utils/dom/constants.js +++ b/src/utils/dom/constants.js @@ -9,6 +9,8 @@ export const STRIP_OUTPUT_TAGS = [ 'link', 'style', 'hr', + 'embed', + 'object', ] // cleanAttributes