From 52e89a0229e952f969b47a248a6aaef2b0ddbca4 Mon Sep 17 00:00:00 2001 From: Adam Pash Date: Fri, 9 Sep 2016 11:58:22 -0400 Subject: [PATCH] fix: cleaning embed and object nodes --- TODO.md | 3 ++- src/utils/dom/constants.js | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index f5c7f8bf..5220876e 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,4 @@ TODO: -- run makeLinksAbsolute on extracted content before returning - remove logic for fetching meta attrs with custom props - Resource (fetches page, validates it, cleans it, normalizes meta tags (!), converts lazy-loaded images, makes links absolute, etc) - extractNextPageUrl @@ -12,6 +11,8 @@ TODO: - Separate constants into activity-specific folders (dom, scoring) DONE: +x cleaning embed and object nodes +x run makeLinksAbsolute on extracted content before returning x add option to fetch attrs in RootExtractor's select method x get custom datePublished selector to convert to date object (prob through cleaner) x extract and generalize cleaners diff --git a/src/utils/dom/constants.js b/src/utils/dom/constants.js index d652422d..76093ab5 100644 --- a/src/utils/dom/constants.js +++ b/src/utils/dom/constants.js @@ -9,6 +9,8 @@ export const STRIP_OUTPUT_TAGS = [ 'link', 'style', 'hr', + 'embed', + 'object', ] // cleanAttributes