Sanitize HTML by default

merge-requests/2/head
gardenapple 3 years ago
parent 68f5a3b6c5
commit a91521846c
No known key found for this signature in database
GPG Key ID: CAF17E9ABE789268

@ -28,14 +28,14 @@ const locale = (
process.env.LC_MESSAGES || process.env.LC_MESSAGES ||
process.env.LANG || process.env.LANG ||
process.env.LANGUAGE || process.env.LANGUAGE ||
'en_US' "en_US"
).replace(/[.:].*/, ''); ).replace(/[.:].*/, '');
const yargs = require("yargs"); const yargs = require("yargs");
const __ = require("y18n")({ const __ = require("y18n")({
locale: locale, locale: locale,
updateFiles: false, updateFiles: false,
directory: path.resolve(__dirname, 'locales') directory: path.resolve(__dirname, "locales")
}).__; }).__;
//JSDOM, fs, Readability, and Readability-readerable are loaded on-demand. //JSDOM, fs, Readability, and Readability-readerable are loaded on-demand.
@ -62,8 +62,8 @@ function setErrored(exitCode) {
// //
const Properties = new Map([ const Properties = new Map([
["html-title", (article, singleLine, document) => ["html-title", (article, singleLine, window) =>
`<h1>${escapeHTML(Properties.get('title')(article, singleLine, document), document)}</h1>` `<h1>${escapeHTML(Properties.get("title")(article, singleLine, window), window.document)}</h1>`
], ],
["title", (article, singleLine) => ["title", (article, singleLine) =>
singleLine ? article.title.replace(/\n+/gm, ' ') : article.title singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
@ -76,8 +76,15 @@ const Properties = new Map([
], ],
["length", article => article.length], ["length", article => article.length],
["dir", article => article.dir], ["dir", article => article.dir],
["html-content", article => article.content], ["text-content", article => article.textContent],
["text-content", article => article.textContent] ["html-content", (article, _, window) => {
if (!args["insane"]) {
const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
return DOMPurify.sanitize(article.content);
}
return article.content;
}]
]); ]);
const LowConfidenceMode = { const LowConfidenceMode = {
@ -219,6 +226,11 @@ let args = yargs
hidden: true, hidden: true,
//deprecated: true //deprecated: true
}) })
.option("insane", {
alias: 'S',
type: "boolean",
desc: __`Don't sanitize HTML`
})
.option("json", { .option("json", {
alias: 'j', alias: 'j',
type: "boolean", type: "boolean",
@ -309,9 +321,9 @@ else
inputFile = inputArg; inputFile = inputArg;
const outputArg = args['output']; const outputArg = args["output"];
const documentURL = args["base"] || inputURL; const documentURL = args["base"] || inputURL;
const outputJSON = args['json']; const outputJSON = args["json"];
let wantedProperties = []; let wantedProperties = [];
@ -321,7 +333,7 @@ if (args["properties"]) {
wantedProperties = args["properties"]; wantedProperties = args["properties"];
wantedPropertiesCustom = true; wantedPropertiesCustom = true;
} else { } else {
wantedProperties = [ 'html-title', 'html-content' ]; wantedProperties = [ "html-title", "html-content" ];
} }
@ -335,13 +347,11 @@ async function read(stream) {
for await (const chunk of stream){ for await (const chunk of stream){
chunks.push(chunk); chunks.push(chunk);
} }
return Buffer.concat(chunks).toString('utf8'); return Buffer.concat(chunks).toString("utf8");
} }
const JSDOM = require("jsdom").JSDOM;
if (inputIsFromStdin) { if (inputIsFromStdin) {
if (!args["quiet"]) { if (!args["quiet"]) {
console.error("Reading..."); console.error("Reading...");
@ -349,11 +359,13 @@ if (inputIsFromStdin) {
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`) console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
} }
read(process.stdin).then(result => { read(process.stdin).then(result => {
const JSDOM = require("jsdom").JSDOM;
onLoadDOM(new JSDOM(result, { url: documentURL })); onLoadDOM(new JSDOM(result, { url: documentURL }));
}); });
} else { } else {
if (!args["quiet"]) if (!args["quiet"])
console.error(__`Retrieving...`); console.error(__`Retrieving...`);
const JSDOM = require("jsdom").JSDOM;
let promiseGetHTML; let promiseGetHTML;
if (inputURL) { if (inputURL) {
promiseGetHTML = JSDOM.fromURL(inputURL) promiseGetHTML = JSDOM.fromURL(inputURL)
@ -379,7 +391,8 @@ function escapeHTML(string, document) {
} }
function onLoadDOM(dom) { function onLoadDOM(dom) {
const document = dom.window.document; const window = dom.window
const document = window.document;
let shouldParseArticle = true; let shouldParseArticle = true;
if (args["low-confidence"] != LowConfidenceMode.force) if (args["low-confidence"] != LowConfidenceMode.force)
@ -413,7 +426,12 @@ function onLoadDOM(dom) {
if (!shouldParseArticle) { if (!shouldParseArticle) {
//Ignore wantedProperties, that should've thrown an error before //Ignore wantedProperties, that should've thrown an error before
writeStream.write(document.documentElement.outerHTML); const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
let outputHTML = document.documentElement.outerHTML;
if (!args["insane"])
outputHTML = DOMPurify.sanitize(outputHTML, {WHOLE_DOCUMENT: true});
writeStream.write(outputHTML);
return; return;
} }
@ -431,16 +449,16 @@ function onLoadDOM(dom) {
let result = {}; let result = {};
if (wantedPropertiesCustom) { if (wantedPropertiesCustom) {
for (propertyName of wantedProperties) for (propertyName of wantedProperties)
result[propertyName] = Properties.get(propertyName)(article, false, document); result[propertyName] = Properties.get(propertyName)(article, false, window);
} else { } else {
for (const [name, func] of Properties) { for (const [name, func] of Properties) {
result[name] = func(article, false, document); result[name] = func(article, false, window);
} }
} }
writeStream.write(JSON.stringify(result)); writeStream.write(JSON.stringify(result));
} else { } else {
for (propertyName of wantedProperties) for (propertyName of wantedProperties)
writeStream.write(Properties.get(propertyName)(article, true, document) + '\n'); writeStream.write(Properties.get(propertyName)(article, true, window) + '\n');
} }
} }

@ -8,6 +8,7 @@
"What to do if Readability.js is uncertain about what the core content actually is": "What to do if Readability.js is uncertain about what the core content actually is", "What to do if Readability.js is uncertain about what the core content actually is": "What to do if Readability.js is uncertain about what the core content actually is",
"Output specific properties of the parsed article": "Output specific properties of the parsed article", "Output specific properties of the parsed article": "Output specific properties of the parsed article",
"Don't output extra information to stderr": "Don't output extra information to stderr", "Don't output extra information to stderr": "Don't output extra information to stderr",
"Don't sanitize HTML": "Don't sanitize HTML",
"Set the document URL when parsing standard input or a local file (this affects relative links)": "Set the document URL when parsing standard input or a local file (this affects relative links)", "Set the document URL when parsing standard input or a local file (this affects relative links)": "Set the document URL when parsing standard input or a local file (this affects relative links)",
"(deprecated) alias for --base": "(deprecated) alias for --base", "(deprecated) alias for --base": "(deprecated) alias for --base",
"Interpret SOURCE as a file name rather than a URL": "Interpret SOURCE as a file name rather than a URL", "Interpret SOURCE as a file name rather than a URL": "Interpret SOURCE as a file name rather than a URL",

@ -8,6 +8,7 @@
"What to do if Readability.js is uncertain about what the core content actually is": "Что делать, когда Readability не может определить целевой контент", "What to do if Readability.js is uncertain about what the core content actually is": "Что делать, когда Readability не может определить целевой контент",
"Output specific properties of the parsed article": "Показать определённые характеристики текста", "Output specific properties of the parsed article": "Показать определённые характеристики текста",
"Don't output extra information to stderr": "Не выдавать лишнюю информацию в стандартный вывод ошибок", "Don't output extra information to stderr": "Не выдавать лишнюю информацию в стандартный вывод ошибок",
"Don't sanitize HTML": "Не убирать \"опасные\" элементы из HTML",
"Set the document URL when parsing standard input or a local file (this affects relative links)": "Указать URL документа при чтении из локального файла или стандартного ввода (влияет на относительные ссылки)", "Set the document URL when parsing standard input or a local file (this affects relative links)": "Указать URL документа при чтении из локального файла или стандартного ввода (влияет на относительные ссылки)",
"(deprecated) alias for --base": "(устаревшый) синоним для --base", "(deprecated) alias for --base": "(устаревшый) синоним для --base",
"Interpret SOURCE as a file name rather than a URL": "Интерпретировать [source] как файл, а не как URL", "Interpret SOURCE as a file name rather than a URL": "Интерпретировать [source] как файл, а не как URL",

7
package-lock.json generated

@ -1,6 +1,6 @@
{ {
"name": "readability-cli", "name": "readability-cli",
"version": "2.0.0-pre", "version": "2.0.0-pre.2",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {
@ -207,6 +207,11 @@
} }
} }
}, },
"dompurify": {
"version": "2.2.6",
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.2.6.tgz",
"integrity": "sha512-7b7ZArhhH0SP6W2R9cqK6RjaU82FZ2UPM7RO8qN1b1wyvC/NY1FNWcX1Pu00fFOAnzEORtwXe4bPaClg6pUybQ=="
},
"ecc-jsbn": { "ecc-jsbn": {
"version": "0.1.2", "version": "0.1.2",
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",

@ -25,8 +25,9 @@
"license": "GPL-3.0-only", "license": "GPL-3.0-only",
"dependencies": { "dependencies": {
"@mozilla/readability": "^0.4.1", "@mozilla/readability": "^0.4.1",
"dompurify": "^2.2.6",
"jsdom": "^16.4.0", "jsdom": "^16.4.0",
"yargs": "github:gardenappl/yargs", "y18n": "^5.0.5",
"y18n": "^5.0.5" "yargs": "github:gardenappl/yargs"
} }
} }

Loading…
Cancel
Save