Compare commits

...

12 Commits
v2.3.5 ... main

@ -19,15 +19,44 @@ Here is a before-and-after comparison, using [an article from The Guardian](http
## Installation
**readability-cli** can be installed on any system with [Node.js](https://nodejs.org/en/):
**readability-cli** can run via either [Node.js](https://nodejs.org/en/) or its newer and safer Rust counterpart [Deno](https://deno.land/).
### Node.js
Install the program and its man page:
`npm install -g readability-cli`
(Note to package maintainers: it might be a good idea to provide a symlink, so the man page can be accessed either as `readability-cli(1)` or as `readable(1)`.)
*(Note to package maintainers: it might be a good idea to provide a symlink, so the man page can be accessed either as `readability-cli(1)` or as `readable(1)`)*
### Deno
Deno support is still in development, running the script directly with `deno run <URL>` is not supported.
However, you can clone this Git repository and easily run the `readable.ts` script.
```sh
git clone https://gitlab.com/gardenappl/readability-cli/
cd readability-cli
./readable.ts
```
You can use `deno run` with the locally-downloaded script to fine-tune permissions, for example:
`curl https://example.com | deno run --no-check readable.ts`
By default Deno does not allow reading & writing files or accessing the network, meaning you have to rely on piping data in and out.
Read more about Deno permissions [in their manual](https://deno.land/manual/getting_started/permissions).
*(Package maintainers might consider adding a `readable-sandbox` executable which will run `readable` with restrictions)*
### Arch Linux
Arch Linux users may use the [readability-cli](https://aur.archlinux.org/packages/readability-cli/) AUR package instead.
Arch Linux users may use the "official" AUR packages:
* [nodejs-readability-cli](https://aur.archlinux.org/packages/nodejs-readability-cli/)
* [deno-readability-cli](https://aur.archlinux.org/packages/deno-readability-cli/)
## Usage

@ -0,0 +1,565 @@
/*
Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
Copyright (C) 2022 gardenapple
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
export default async function(
Buffer,
fs,
process,
yargs,
__,
Readability,
isProbablyReaderable,
printVersion,
parseDOM,
parseDOMFromFile,
parseDOMFromURL,
sanitizeDOM,
sanitizeHTML
) {
let errored = false;
function setErrored(exitCode) {
process.exitCode = exitCode;
errored = true;
}
const ExitCodes = {
badUsageCLI: 64,
dataError: 65,
noInput: 66,
noHost: 68,
serviceUnavailable: 69,
noPermission: 77
};
//
//Parsing arguments
//
const Properties = new Map([
["html-title", (article, singleLine, document) =>
`<h1>${escapeHTML(Properties.get("title")(article, singleLine, document))}</h1>`
],
["title", (article, singleLine) =>
singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
],
["excerpt", (article, singleLine) =>
singleLine ? article.excerpt.replace(/\n+/gm, ' ') : article.excerpt
],
["byline", (article, singleLine) =>
singleLine ? article.byline.replace(/\n+/gm, ' ') : article.byline
],
["length", article => article.length],
["dir", article => article.dir],
["text-content", article => article.textContent],
["html-content", article => article.content]
]);
const LowConfidenceMode = {
keep: "keep",
force: "force",
exit: "exit"
};
const readabilityOptions = {};
//backwards compat with old, comma-separated values
function yargsCompatProperties(args) {
if (args["properties"]) {
let i;
for (i = 0; i < args["properties"].length; i++) {
const property = args["properties"][i];
if (property.indexOf(',') > -1) {
const split = args["properties"][i].split(',');
args["properties"].splice(i, 1, ...split);
continue;
}
if (!Properties.has(property)) {
args["properties"].splice(i, 1);
i--;
if (!args["--"])
args["--"] = [ property ];
else
args["--"].push(property);
}
}
}
}
//Positional sometimes don't get recognized when they're put
//after other arguments, I think it's an oversight in yargs.
function yargsFixPositional(args) {
if (args["--"]) {
if (!args["source"])
args["source"] = args["--"].shift();
args["_"] = args["--"];
}
}
const args = yargs
.version(false)
.command("* [source]", __`Process HTML input`, (yargs) => {
yargs.positional("source", {
desc: __`A file, an http(s) URL, or '-' for standard input`,
type: "string"
});
})
.completion("--completion", false)
.middleware([ yargsCompatProperties, yargsFixPositional ], true) //middleware seems to be buggy
.option("help", {
alias: 'h',
type: "boolean",
desc: __`Show help`
})
.option("completion", {
type: "boolean",
desc: __`Print script for bash/zsh completion`,
hidden: typeof Deno !== "undefined"
})
.option("base", {
alias: 'b',
type: "string",
desc: __`Set the document URL when parsing standard input or a local file (this affects relative links)`
})
.option("insane", {
alias: 'S',
type: "boolean",
desc: __`Don't sanitize HTML`
})
.option("insecure", {
alias: 'K',
type: "boolean",
desc: __`Allow invalid SSL certificates`,
hidden: typeof Deno !== "undefined"
})
.option("is-file", {
alias: 'f',
type: "boolean",
desc: __`Interpret SOURCE as a file name rather than a URL`,
default: false,
hidden: true,
//deprecated: true
})
.option("is-url", {
alias: 'U',
type: "boolean",
desc: __`(deprecated) Interpret SOURCE as a URL rather than file name`,
hidden: true,
//deprecated: true
})
.option("json", {
alias: 'j',
type: "boolean",
desc: __`Output properties as a JSON payload`
})
.option("low-confidence", {
alias: 'l',
type: "string",
desc: __`What to do if Readability.js is uncertain about what the core content actually is`,
choices: ["keep", "force", "exit"]
//default: "no-op", //don't set default because completion won't work
})
.option("keep-classes", {
alias: 'C',
type: "boolean",
desc: __`Preserve all CSS classes for input elements, instead of adapting to Firefox's Reader Mode`
})
.option("output", {
alias: 'o',
type: "string",
desc: __`The file to which the result should be output`
})
.option("properties", {
alias: 'p',
type: "array",
desc: __`Output specific properties of the parsed article`,
choices: Array.from(Properties.keys())
})
.option("proxy", {
alias: 'x',
type: "string",
desc: __`Use specified proxy (can also use HTTPS_PROXY environment variable)`,
hidden: typeof Deno !== "undefined"
})
.option("quiet", {
alias: 'q',
type: "boolean",
desc: __`Don't output extra information to stderr`
})
.option("style", {
alias: 's',
type: "string",
desc: __`Specify .css file for stylesheet`
})
.option("url", {
alias: 'u',
type: "string",
desc: __`(deprecated) alias for --base`,
hidden: true,
//deprecated: true //completion script does not respect this value, so just say it in the description
})
.option("user-agent", {
alias: 'A',
type: "string",
desc: __`Set custom user agent string`
})
.option("version", {
alias: 'V',
type: "boolean",
desc: __`Print version`
})
.fail((msg, _err, _yargs) => {
console.error(msg);
setErrored(ExitCodes.badUsageCLI);
})
.epilogue(__`See the manual for more info: man readability-cli`)
.wrap(Math.min(yargs.terminalWidth(), 100))
.strict()
.parse();
if (args["is-url"]) {
console.error(__`Note: --is-url option is deprecated.`);
}
if (args["url"]) {
console.error(__`Note: --url option is deprecated, please use --base instead.`);
args["base"] = args["url"];
}
function printUsage() {
yargs.showHelp();
}
if (args["completion"]) {
yargs.showCompletionScript();
return;
}
if (args["version"]) {
printVersion();
return;
}
if (typeof Deno !== "undefined") {
for (const option of ["insecure", "proxy"]) {
if (args[option]) {
console.error(__`Warning: option --${option} is not supported in Deno.`);
setErrored(ExitCodes.badUsageCLI);
return;
}
}
}
if (args["keep-classes"]) {
readabilityOptions["keepClasses"] = true;
}
if (!args["low-confidence"]) {
args["low-confidence"] = LowConfidenceMode.keep;
args['l'] = LowConfidenceMode.keep;
} else if (!Object.values(LowConfidenceMode).includes(args["low-confidence"])) {
console.error(__`Unknown mode: ${args["low-confidence"]}\nPlease use one of: keep, force, exit`);
console.error(__`Use --help for more info.`);
setErrored(ExitCodes.badUsageCLI);
return;
}
let inputArg;
if (!args["source"]) {
if (process.stdin.isTTY) {
console.error(__`No input provided`);
printUsage();
setErrored(ExitCodes.badUsageCLI);
return;
} else {
inputArg = '-'
}
} else {
inputArg = args["source"];
}
//Get input parameter, remove inputArg from args
let inputFile;
let inputURL;
let inputIsFromStdin = false;
if (args["is-url"] && inputArg.search(/^\w+:\/\//) == -1)
inputArg = "https://" + inputArg;
if (!args["is-file"] && inputArg.search(/^\w+:\/\//) != -1)
inputURL = inputArg;
else if (inputArg == '-')
inputIsFromStdin = true;
else
inputFile = inputArg;
const outputArg = args["output"];
const documentURL = args["base"] || inputURL;
const outputJSON = args["json"];
let proxy = args["proxy"];
if (!proxy && typeof Deno === "undefined")
proxy = process.env.https_proxy || process.env.HTTPS_PROXY || process.env.http_proxy;
let wantedProperties;
if (args["properties"]) {
wantedProperties = args["properties"];
}
if (errored) {
printUsage();
return;
}
async function read(stream) {
const chunks = [];
for await (const chunk of stream){
chunks.push(chunk);
}
return Buffer.concat(chunks).toString("utf8");
}
let document, window
try {
if (inputIsFromStdin) {
if (!args["quiet"]) {
console.error("Reading...");
if (!documentURL)
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
}
const input = await read(process.stdin);
[document, window] = await parseDOM(input, documentURL);
} else {
if (!args["quiet"])
console.error(__`Retrieving...`);
let parseDOMPromise;
if (inputURL) {
parseDOMPromise = parseDOMFromURL(documentURL, proxy, !args["insecure"], args["user-agent"]);
} else if (inputFile) {
parseDOMPromise = parseDOMFromFile(inputFile, documentURL);
}
[document, window] = await parseDOMPromise;
}
} catch (e) {
let error = e
if (error.error) {
//Nested error?
error = error.error;
}
if (error instanceof TypeError && inputURL) {
console.error(__`Invalid URL: ${inputURL}`);
setErrored(ExitCodes.badUsageCLI);
} else if (error.code == "ENOENT") {
console.error(error.message);
setErrored(ExitCodes.noInput);
} else if (error.code == "EACCES") {
console.error(error.message);
setErrored(ExitCodes.noPermission);
} else if (error.code == "ENOTFOUND") {
console.error(__`Host not found: ${error.hostname}`);
setErrored(ExitCodes.noHost);
} else if (error.statusCode) {
console.error(__`Status error: ${error.response.statusMessage}`);
setErrored(ExitCodes.serviceUnavailable);
} else {
console.error(error.message);
//console.error(error);
setErrored(ExitCodes.serviceUnavailable);
}
return;
}
//Taken from https://stackoverflow.com/a/30970751
function escapeHTML(string) {
const lookup = {
'&': "&amp;",
'"': "&quot;",
'\'': "&apos;",
'<': "&lt;",
'>': "&gt;"
};
return string.replace( /[&"'<>]/g, c => lookup[c] );
}
async function getHTML(document, window) {
if (args["insane"])
return document.documentElement.outerHTML;
else
return await sanitizeDOM(document, window);
}
let shouldParseArticle = true;
if (args["low-confidence"] != LowConfidenceMode.force)
shouldParseArticle = isProbablyReaderable(document);
if (!shouldParseArticle) {
if (args["low-confidence"] == LowConfidenceMode.exit) {
console.error(__`Not sure if this document should be processed, exiting`);
setErrored(ExitCodes.dataError);
return;
} else {
if (!args["quiet"])
console.error(__`Not sure if this document should be processed. Not processing`);
if (args["json"] || wantedProperties) {
console.error(__`Can't output properties`);
setErrored(ExitCodes.dataError);
return;
}
shouldParseArticle = false;
}
}
let writeStream;
if (outputArg) {
writeStream = fs.createWriteStream(outputArg);
} else {
writeStream = process.stdout;
}
if (!shouldParseArticle) {
//Ignore wantedProperties, that should've thrown an error before
writeStream.write(await getHTML(document, window));
return;
}
if (!args["quiet"])
console.error(__`Processing...`);
const reader = new Readability(document, readabilityOptions);
const article = reader.parse();
if (!article) {
if (args["low-confidence"] == LowConfidenceMode.keep) {
if (!args["quiet"])
console.error(__`Couldn't process document.`);
writeStream.write(await getHTML(document, window));
} else {
console.error(__`Couldn't process document.`);
setErrored(ExitCodes.dataError);
}
return;
}
if (outputJSON) {
const result = {};
if (wantedProperties) {
for (const propertyName of wantedProperties)
result[propertyName] = Properties.get(propertyName)(article, false, document);
} else {
for (const [name, func] of Properties) {
result[name] = func(article, false, window);
}
}
writeStream.write(JSON.stringify(result));
} else {
if (wantedProperties) {
for (const propertyName of wantedProperties)
writeStream.write(Properties.get(propertyName)(article, true, document) + '\n');
} else {
writeStream.write(`<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">`);
if (args["style"] || !args["keep-classes"]) {
const cssHref = args["style"] || "chrome://global/skin/aboutReader.css";
writeStream.write(`
<link rel="stylesheet" href="${cssHref}" type="text/css">`);
}
writeStream.write(`
<title>${escapeHTML(Properties.get("title")(article, false, document))}</title>
</head>
`
);
if (!args["keep-classes"]) {
//Add a few divs and classes so that Firefox Reader Mode CSS works well
writeStream.write(`
<body class="light sans-serif loaded" style="--font-size:14pt; --content-width:40em;">
<div class="container" `
);
const contentDir = Properties.get("dir")(article, false, document);
if (contentDir)
writeStream.write(`dir="${contentDir}">`);
else
writeStream.write('>');
writeStream.write(`
<div class="header reader-header reader-show-element">
<h1 class="reader-title">${escapeHTML(Properties.get("title")(article, false, document))}</h1>`);
const author = Properties.get("byline")(article, false, document);
if (author) {
writeStream.write(`
<div class="credits reader-credits">${escapeHTML(author)}</div>`);
}
writeStream.write(`
</div>
<hr>
<div class="content">
<div class="moz-reader-content reader-show-element">
`
);
const html = Properties.get("html-content")(article, false, document);
if (!args["insane"])
writeStream.write(await sanitizeHTML(html, window));
else
writeStream.write(html);
writeStream.write(`
</div>
</div>
</div>
`
);
} else {
writeStream.write("\n<body>\n");
writeStream.write(Properties.get("html-title")(article, false, document));
writeStream.write('\n');
const author = Properties.get("byline")(article, false, document);
if (author) {
writeStream.write(`<p><i>${escapeHTML(author)}</i></p>`);
}
writeStream.write("\n<hr>\n");
const html = Properties.get("html-content")(article, false, document);
if (!args["insane"])
writeStream.write(await sanitizeHTML(html, window));
else
writeStream.write(html);
}
writeStream.write("\n</body></html>");
}
}
}

@ -0,0 +1,7 @@
{
"lint": {
"files": {
"exclude": ["node_modules/"]
}
}
}

@ -0,0 +1,168 @@
{
"version": "2",
"remote": {
"https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74",
"https://deno.land/std@0.130.0/_util/os.ts": "49b92edea1e82ba295ec946de8ffd956ed123e2948d9bd1d3e901b04e4307617",
"https://deno.land/std@0.130.0/fmt/colors.ts": "30455035d6d728394781c10755351742dd731e3db6771b1843f9b9e490104d37",
"https://deno.land/std@0.130.0/fmt/printf.ts": "e2c0f72146aed1efecf0c39ab928b26ae493a2278f670a871a0fbdcf36ff3379",
"https://deno.land/std@0.130.0/path/_constants.ts": "df1db3ffa6dd6d1252cc9617e5d72165cd2483df90e93833e13580687b6083c3",
"https://deno.land/std@0.130.0/path/_interface.ts": "ee3b431a336b80cf445441109d089b70d87d5e248f4f90ff906820889ecf8d09",
"https://deno.land/std@0.130.0/path/_util.ts": "c1e9686d0164e29f7d880b2158971d805b6e0efc3110d0b3e24e4b8af2190d2b",
"https://deno.land/std@0.130.0/path/common.ts": "bee563630abd2d97f99d83c96c2fa0cca7cee103e8cb4e7699ec4d5db7bd2633",
"https://deno.land/std@0.130.0/path/glob.ts": "cb5255638de1048973c3e69e420c77dc04f75755524cb3b2e160fe9277d939ee",
"https://deno.land/std@0.130.0/path/mod.ts": "4275129bb766f0e475ecc5246aa35689eeade419d72a48355203f31802640be7",
"https://deno.land/std@0.130.0/path/posix.ts": "663e4a6fe30a145f56aa41a22d95114c4c5582d8b57d2d7c9ed27ad2c47636bb",
"https://deno.land/std@0.130.0/path/separator.ts": "fe1816cb765a8068afb3e8f13ad272351c85cbc739af56dacfc7d93d710fe0f9",
"https://deno.land/std@0.130.0/path/win32.ts": "e7bdf63e8d9982b4d8a01ef5689425c93310ece950e517476e22af10f41a136e",
"https://deno.land/std@0.130.0/testing/_diff.ts": "9d849cd6877694152e01775b2d93f9d6b7aef7e24bfe3bfafc4d7a1ac8e9f392",
"https://deno.land/std@0.130.0/testing/asserts.ts": "b0ef969032882b1f7eb1c7571e313214baa1485f7b61cf35807b2434e254365c",
"https://deno.land/std@0.201.0/assert/assert.ts": "9a97dad6d98c238938e7540736b826440ad8c1c1e54430ca4c4e623e585607ee",
"https://deno.land/std@0.201.0/assert/assertion_error.ts": "4d0bde9b374dfbcbe8ac23f54f567b77024fb67dbb1906a852d67fe050d42f56",
"https://deno.land/std@0.201.0/path/_basename.ts": "057d420c9049821f983f784fd87fa73ac471901fb628920b67972b0f44319343",
"https://deno.land/std@0.201.0/path/_constants.ts": "e49961f6f4f48039c0dfed3c3f93e963ca3d92791c9d478ac5b43183413136e0",
"https://deno.land/std@0.201.0/path/_dirname.ts": "355e297236b2218600aee7a5301b937204c62e12da9db4b0b044993d9e658395",
"https://deno.land/std@0.201.0/path/_extname.ts": "eaaa5aae1acf1f03254d681bd6a8ce42a9cb5b7ff2213a9d4740e8ab31283664",
"https://deno.land/std@0.201.0/path/_format.ts": "4a99270d6810f082e614309164fad75d6f1a483b68eed97c830a506cc589f8b4",
"https://deno.land/std@0.201.0/path/_from_file_url.ts": "6eadfae2e6f63ad9ee46b26db4a1b16583055c0392acedfb50ed2fc694b6f581",
"https://deno.land/std@0.201.0/path/_interface.ts": "6471159dfbbc357e03882c2266d21ef9afdb1e4aa771b0545e90db58a0ba314b",
"https://deno.land/std@0.201.0/path/_is_absolute.ts": "05dac10b5e93c63198b92e3687baa2be178df5321c527dc555266c0f4f51558c",
"https://deno.land/std@0.201.0/path/_join.ts": "815f5e85b042285175b1492dd5781240ce126c23bd97bad6b8211fe7129c538e",
"https://deno.land/std@0.201.0/path/_normalize.ts": "a19ec8706b2707f9dd974662a5cd89fad438e62ab1857e08b314a8eb49a34d81",
"https://deno.land/std@0.201.0/path/_os.ts": "d932f56d41e4f6a6093d56044e29ce637f8dcc43c5a90af43504a889cf1775e3",
"https://deno.land/std@0.201.0/path/_parse.ts": "0f9b0ff43682dd9964eb1c4398610c4e165d8db9d3ac9d594220217adf480cfa",
"https://deno.land/std@0.201.0/path/_relative.ts": "27bdeffb5311a47d85be26d37ad1969979359f7636c5cd9fcf05dcd0d5099dc5",
"https://deno.land/std@0.201.0/path/_resolve.ts": "7a3616f1093735ed327e758313b79c3c04ea921808ca5f19ddf240cb68d0adf6",
"https://deno.land/std@0.201.0/path/_to_file_url.ts": "a141e4a525303e1a3a0c0571fd024552b5f3553a2af7d75d1ff3a503dcbb66d8",
"https://deno.land/std@0.201.0/path/_to_namespaced_path.ts": "0d5f4caa2ed98ef7a8786286df6af804b50e38859ae897b5b5b4c8c5930a75c8",
"https://deno.land/std@0.201.0/path/_util.ts": "4e191b1bac6b3bf0c31aab42e5ca2e01a86ab5a0d2e08b75acf8585047a86221",
"https://deno.land/std@0.201.0/path/basename.ts": "bdfa5a624c6a45564dc6758ef2077f2822978a6dbe77b0a3514f7d1f81362930",
"https://deno.land/std@0.201.0/path/common.ts": "ee7505ab01fd22de3963b64e46cff31f40de34f9f8de1fff6a1bd2fe79380000",
"https://deno.land/std@0.201.0/path/dirname.ts": "b6533f4ee4174a526dec50c279534df5345836dfdc15318400b08c62a62a39dd",
"https://deno.land/std@0.201.0/path/extname.ts": "62c4b376300795342fe1e4746c0de518b4dc9c4b0b4617bfee62a2973a9555cf",
"https://deno.land/std@0.201.0/path/format.ts": "110270b238514dd68455a4c54956215a1aff7e37e22e4427b7771cefe1920aa5",
"https://deno.land/std@0.201.0/path/from_file_url.ts": "9f5cb58d58be14c775ec2e57fc70029ac8b17ed3bd7fe93e475b07280adde0ac",
"https://deno.land/std@0.201.0/path/glob.ts": "593e2c3573883225c25c5a21aaa8e9382a696b8e175ea20a3b6a1471ad17aaed",
"https://deno.land/std@0.201.0/path/is_absolute.ts": "0b92eb35a0a8780e9f16f16bb23655b67dace6a8e0d92d42039e518ee38103c1",
"https://deno.land/std@0.201.0/path/join.ts": "31c5419f23d91655b08ec7aec403f4e4cd1a63d39e28f6e42642ea207c2734f8",
"https://deno.land/std@0.201.0/path/mod.ts": "6e1efb0b13121463aedb53ea51dabf5639a3172ab58c89900bbb72b486872532",
"https://deno.land/std@0.201.0/path/normalize.ts": "6ea523e0040979dd7ae2f1be5bf2083941881a252554c0f32566a18b03021955",
"https://deno.land/std@0.201.0/path/parse.ts": "be8de342bb9e1924d78dc4d93c45215c152db7bf738ec32475560424b119b394",
"https://deno.land/std@0.201.0/path/posix.ts": "0a1c1952d132323a88736d03e92bd236f3ed5f9f079e5823fae07c8d978ee61b",
"https://deno.land/std@0.201.0/path/relative.ts": "8bedac226afd360afc45d451a6c29fabceaf32978526bcb38e0c852661f66c61",
"https://deno.land/std@0.201.0/path/resolve.ts": "133161e4949fc97f9ca67988d51376b0f5eef8968a6372325ab84d39d30b80dc",
"https://deno.land/std@0.201.0/path/separator.ts": "40a3e9a4ad10bef23bc2cd6c610291b6c502a06237c2c4cd034a15ca78dedc1f",
"https://deno.land/std@0.201.0/path/to_file_url.ts": "00e6322373dd51ad109956b775e4e72e5f9fa68ce2c6b04e4af2a6eed3825d31",
"https://deno.land/std@0.201.0/path/to_namespaced_path.ts": "1b1db3055c343ab389901adfbda34e82b7386bcd1c744d54f9c1496ee0fd0c3d",
"https://deno.land/std@0.201.0/path/win32.ts": "8b3f80ef7a462511d5e8020ff490edcaa0a0d118f1b1e9da50e2916bdd73f9dd",
"https://deno.land/x/ammonia@0.3.1/mod.ts": "170075af1b2e2922b2f1229bac4acba5cb824b10e97de4604da55ea492db2e26",
"https://deno.land/x/ammonia@0.3.1/pkg/ammonia_wasm.js": "75a90cc78b52f1f2e4e998c1b574f97097de2d2ee7f3a55dca562c4f93a618e0",
"https://deno.land/x/ammonia@0.3.1/wasm.js": "60a03b400d2ff529d2d3a0a804f10abe564d5ffaa1bf8344c2c27799088f514e",
"https://deno.land/x/cliui@v7.0.4-deno/build/lib/index.js": "fb6030c7b12602a4fca4d81de3ddafa301ba84fd9df73c53de6f3bdda7b482d5",
"https://deno.land/x/cliui@v7.0.4-deno/build/lib/string-utils.js": "b3eb9d2e054a43a3064af17332fb1839a7dadb205c5371af4789616afb1a117f",
"https://deno.land/x/cliui@v7.0.4-deno/deno.ts": "d07bc3338661f8011e3a5fd215061d17a52107a5383c29f40ce0c1ecb8bb8cc3",
"https://deno.land/x/deno_dom@v0.1.38/build/deno-wasm/deno-wasm.js": "98b1ad24a1c13284557917659402202e5c5258ab1431b3f3a82434ad36ffa05a",
"https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm-noinit.ts": "f8798dbf3b3c54d16ec4d22c16e3c63e17f012b6b360179275b1999d8947182f",
"https://deno.land/x/deno_dom@v0.1.38/src/api.ts": "0ff5790f0a3eeecb4e00b7d8fbfa319b165962cf6d0182a65ba90f158d74f7d7",
"https://deno.land/x/deno_dom@v0.1.38/src/constructor-lock.ts": "59714df7e0571ec7bd338903b1f396202771a6d4d7f55a452936bd0de9deb186",
"https://deno.land/x/deno_dom@v0.1.38/src/deserialize.ts": "f4d34514ca00473ca428b69ad437ba345925744b5d791cb9552e2d7a0e7b0439",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/document-fragment.ts": "a40c6e18dd0efcf749a31552c1c9a6f7fa614452245e86ee38fc92ba0235e5ae",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/document.ts": "b8f4e4ccabaaa063d6562a0f2f8dea9c0419515d63d8bd79bfde95f7cd64bd93",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/dom-parser.ts": "609097b426f8c2358f3e5d2bca55ed026cf26cdf86562e94130dfdb0f2537f92",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/element.ts": "77c454e228dfeb5c570da5aa61d91850400116bfa0f5a85505acdd3c667171a4",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/elements/html-template-element.ts": "127bb291bb08afeb7e9a66294a5aa6ff2780f4eb4601fa6f7869fe8b70a81472",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/html-collection.ts": "ae90197f5270c32074926ad6cf30ee07d274d44596c7e413c354880cebce8565",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/node-list.ts": "4c6e4b4585301d4147addaccd90cb5f5a80e8d6290a1ba7058c5e3dfea16e15d",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/node.ts": "3069e6fc93ac4111a136ed68199d76673339842b9751610ba06f111ba7dc10a7",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/custom-api.ts": "852696bd58e534bc41bd3be9e2250b60b67cd95fd28ed16b1deff1d548531a71",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/nwsapi-types.ts": "c43b36c36acc5d32caabaa54fda8c9d239b2b0fcbce9a28efb93c84aa1021698",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/nwsapi.js": "985d7d8fc1eabbb88946b47a1c44c1b2d4aa79ff23c21424219f1528fa27a2ff",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/selectors.ts": "83eab57be2290fb48e3130533448c93c6c61239f2a2f3b85f1917f80ca0fdc75",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/sizzle-types.ts": "78149e2502409989ce861ed636b813b059e16bc267bb543e7c2b26ef43e4798b",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/selectors/sizzle.js": "c3aed60c1045a106d8e546ac2f85cc82e65f62d9af2f8f515210b9212286682a",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/utils-types.ts": "96db30e3e4a75b194201bb9fa30988215da7f91b380fca6a5143e51ece2a8436",
"https://deno.land/x/deno_dom@v0.1.38/src/dom/utils.ts": "55f3e9dc71d6c4a54605888d3f99d26fb0cf9973924709f159252a6933ceeabe",
"https://deno.land/x/deno_dom@v0.1.38/src/parser.ts": "b65eb7e673fa7ca611de871de109655f0aa9fa35ddc1de73df1a5fc2baafc332",
"https://deno.land/x/escalade@v3.0.3/sync.ts": "493bc66563292c5c10c4a75a467a5933f24dad67d74b0f5a87e7b988fe97c104",
"https://deno.land/x/lz4@v0.1.2/mod.ts": "4decfc1a3569d03fd1813bd39128b71c8f082850fe98ecfdde20025772916582",
"https://deno.land/x/lz4@v0.1.2/wasm.js": "b9c65605327ba273f0c76a6dc596ec534d4cda0f0225d7a94ebc606782319e46",
"https://deno.land/x/y18n@v5.0.0-deno/build/lib/index.js": "92c4624714aa508d33c6d21c0b0ffa072369a8b306e5f8c7727662f570bbd026",
"https://deno.land/x/y18n@v5.0.0-deno/deno.ts": "80997f0709a0b43d29931e2b33946f2bbc32b13fd82f80a5409628455427e28d",
"https://deno.land/x/y18n@v5.0.0-deno/lib/platform-shims/deno.ts": "8fa2c96ac03734966260cfd2c5bc240e41725c913e5b64a0297aede09f52b39d",
"https://deno.land/x/y18n@v5.0.8-deno/build/lib/index.js": "6d1e9719542e35baa034477f26f2482d35bbffe6a67e154cfe37b324dc440ab7",
"https://deno.land/x/y18n@v5.0.8-deno/deno.ts": "32748759463b3c621fe5f7a43de044ede4373613dd800f0152f99f4db2c0a992",
"https://deno.land/x/y18n@v5.0.8-deno/lib/platform-shims/deno.ts": "8fa2c96ac03734966260cfd2c5bc240e41725c913e5b64a0297aede09f52b39d",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/argsert.js": "eb085555452eac3ff300935994a42f35d16e04cf698cb775cb5ad4f5653c0627",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/command.js": "6249ffd299e16a1e531ccff13a23aed7b7eef37e20b6e6ab7f254413aece6ca6",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/completion-templates.js": "d9bbed244af4394b786f8abce9efbbdc3777a73458ebd7b6bf23b2495ac11027",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/completion.js": "62e41220b5baa7c082f72638c7eab23a69fff46a78011f2c448e2a2f1fcfd05a",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/middleware.js": "6ab9c953a83264739aa50d7fa6b1ab693500336dfd593b9958865e12beb8bdeb",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/parse-command.js": "327242c0afae207b7aefa13133439e3b321d7db4229febc5b7bd5285770ac7f7",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/typings/common-types.js": "9618b81a86acb88a61fd9988e9bc3ec21c5250d94fc2231ba7d898e71500789d",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/usage.js": "31faaa7aa61e5a57a2cac5a269b773aa8b1fcab2db7cac2f8252396f3ccc2f5e",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/apply-extends.js": "64640dce92669705abead3bdbe2c46c8318c8623843a55e4726fb3c55ff9dd1d",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/is-promise.js": "be45baa3090c5106dd4e442cceef6b357a268783a2ee28ec10fe131a8cd8db72",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/levenshtein.js": "d8638efc3376b5f794b1c8df6ef4f3d484b29d919127c7fdc242400e3cfded91",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/maybe-async-result.js": "31cf4026279e14c87d16faa14ac758f35c8cc5795d29393c5ce07120f5a3caf6",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/obj-filter.js": "5523fb2288d1e86ed48c460e176770b49587554df4ae2405b468c093786b040b",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/set-blocking.js": "6fa8ffc3299f456e42902736bae35fbc1f2dc96b3905a02ba9629f5bd9f80af1",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/utils/which-module.js": "9267633b2c9f8990b2c699101b641e59ae59932e0dee5270613c0508bfa13c5d",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/validation.js": "af040834cb9201d4238bbeb8f673eb2ebaff9611857270524a7c86dfcf2ca51b",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/yargs-factory.js": "05326932b431801d7459d5b14b21f73f13ebd74a8a74e9b7b8cec5f99ba14819",
"https://deno.land/x/yargs@v17.7.2-deno/build/lib/yerror.js": "9729aaa8bce1a0d00c57f470efb2ad76ad2988661bb48f3769e496a3435b4462",
"https://deno.land/x/yargs@v17.7.2-deno/deno.ts": "f3df0bfd08ba367ec36dc59ef6cab1a391ace49ad44387ec5fe5d76289af08af",
"https://deno.land/x/yargs@v17.7.2-deno/lib/platform-shims/deno.ts": "1d3d490a7f3c6f971a44dd92e12a042f988f1b6496df3a9c43ccc69563032dff",
"https://deno.land/x/yargs_parser@v20.2.4-deno/build/lib/string-utils.js": "12fc056b23703bc370aae5b179dc5abee53fca277abc30eaf76f78d2546d6413",
"https://deno.land/x/yargs_parser@v20.2.4-deno/build/lib/tokenize-arg-string.js": "7e0875b11795b8e217386e45f14b24a6e501ebbc62e15aa469aa8829d4d0ee61",
"https://deno.land/x/yargs_parser@v20.2.4-deno/build/lib/yargs-parser.js": "453200a7dfbb002e605d8009b7dad30f2b1d93665e046ab89c073a4fe63dfd48",
"https://deno.land/x/yargs_parser@v20.2.4-deno/deno.ts": "ad53c0c82c3982c4fc5be9472384b259e0a32ce1f7ae0f68de7b2445df5642fc"
},
"npm": {
"specifiers": {
"@mozilla/readability@^0.4.4": "@mozilla/readability@0.4.4",
"user-agents@1.0": "user-agents@1.0.1444"
},
"packages": {
"@mozilla/readability@0.4.4": {
"integrity": "sha512-MCgZyANpJ6msfvVMi6+A0UAsvZj//4OHREYUB9f2087uXHVoU+H+SWhuihvb1beKpM323bReQPRio0WNk2+V6g==",
"dependencies": {}
},
"detect-indent@6.0.0": {
"integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==",
"dependencies": {}
},
"docopt@0.6.2": {
"integrity": "sha512-NqTbaYeE4gA/wU1hdKFdU+AFahpDOpgGLzHP42k6H6DKExJd0A55KEVWYhL9FEmHmgeLvEU2vuKXDuU+4yToOw==",
"dependencies": {}
},
"dot-json@1.3.0": {
"integrity": "sha512-Pu11Prog/Yjf2lBICow82/DSV46n3a2XT1Rqt/CeuhkO1fuacF7xydYhI0SwQx2Ue0jCyLtQzgKPFEO6ewv+bQ==",
"dependencies": {
"detect-indent": "detect-indent@6.0.0",
"docopt": "docopt@0.6.2",
"underscore-keypath": "underscore-keypath@0.0.22"
}
},
"lodash.clonedeep@4.5.0": {
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==",
"dependencies": {}
},
"underscore-keypath@0.0.22": {
"integrity": "sha512-fU7aYj1J2LQd+jqdQ67AlCOZKK3Pl+VErS8fGYcgZG75XB9/bY+RLM+F2xEcKHhHNtLvqqFyXAoZQlLYfec3Xg==",
"dependencies": {
"underscore": "underscore@1.13.6"
}
},
"underscore@1.13.6": {
"integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==",
"dependencies": {}
},
"user-agents@1.0.1444": {
"integrity": "sha512-6WXJ0RZuUKgif1rW5FN02HnpoJ8EzH6COQoXCiVStZEVPz+YnAx3iA48etY3ZD4UwueYN9ALC7j4ayHvYEh7tA==",
"dependencies": {
"dot-json": "dot-json@1.3.0",
"lodash.clonedeep": "lodash.clonedeep@4.5.0"
}
}
}
}
}

@ -4,7 +4,7 @@
Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
Copyright (C) 2021 gardenapple
Copyright (C) 2022 gardenapple
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -20,7 +20,12 @@ Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const fs = require("fs");
const path = require("path");
const process = require("process");
const yargs = require("yargs");
const y18n = require("y18n");
// GNU gettext gives preference to LANGUAGE above all else, but this order is consistent with Yargs:
const locale = (
@ -31,541 +36,79 @@ const locale = (
"en_US"
).replace(/[.:].*/, '');
const yargs = require("yargs");
const __ = require("y18n")({
const __ = y18n({
locale: locale,
updateFiles: false,
directory: path.resolve(__dirname, "locales")
directory: path.resolve(__dirname, "./locales")
}).__;
//JSDOM, fs, Readability, and Readability-readerable are loaded on-demand.
const ExitCodes = {
badUsageCLI: 64,
dataError: 65,
noInput: 66,
noHost: 68,
serviceUnavailable: 69,
noPermission: 77
};
let errored = false;
function setErrored(exitCode) {
process.exitCode = exitCode;
errored = true;
}
//
//Parsing arguments
//
const Properties = new Map([
["html-title", (article, singleLine, window) =>
`<h1>${escapeHTML(Properties.get("title")(article, singleLine, window), window.document)}</h1>`
],
["title", (article, singleLine) =>
singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
],
["excerpt", (article, singleLine) =>
singleLine ? article.excerpt.replace(/\n+/gm, ' ') : article.excerpt
],
["byline", (article, singleLine) =>
singleLine ? article.byline.replace(/\n+/gm, ' ') : article.byline
],
["length", article => article.length],
["dir", article => article.dir],
["text-content", article => article.textContent],
["html-content", (article, _, window) => {
if (!args["insane"]) {
const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
return DOMPurify.sanitize(article.content);
}
return article.content;
}]
]);
const LowConfidenceMode = {
keep: "keep",
force: "force",
exit: "exit"
};
const readabilityOptions = {};
//backwards compat with old, comma-separated values
function yargsCompatProperties(args) {
if (args["properties"]) {
for (var i = 0; i < args["properties"].length; i++) {
const property = args["properties"][i];
if (property.indexOf(',') > -1) {
const split = args["properties"][i].split(',');
args["properties"].splice(i, 1, ...split);
continue;
}
if (!Properties.has(property)) {
args["properties"].splice(i, 1);
i--;
if (!args["--"])
args["--"] = [ property ];
else
args["--"].push(property);
}
}
}
}
//Positional sometimes don't get recognized when they're put
//after other arguments, I think it's an oversight in yargs.
function yargsFixPositional(args) {
if (args["--"]) {
if (!args["source"])
args["source"] = args["--"].shift();
args["_"] = args["--"];
}
}
let args = yargs
.version(false)
.command("* [source]", __`Process HTML input`, (yargs) => {
yargs.positional("source", {
desc: __`A file, an http(s) URL, or '-' for standard input`,
type: "string"
});
})
.completion('--completion', false)
.middleware([ yargsCompatProperties, yargsFixPositional ], true) //middleware seems to be buggy
.option("help", {
alias: 'h',
type: "boolean",
desc: __`Show help`
})
.option("completion", {
type: "boolean",
desc: __`Print script for bash/zsh completion`
})
.option("base", {
alias: 'b',
type: "string",
desc: __`Set the document URL when parsing standard input or a local file (this affects relative links)`
})
.option("insane", {
alias: 'S',
type: "boolean",
desc: __`Don't sanitize HTML`
})
.option("insecure", {
alias: 'K',
type: "boolean",
desc: __`Allow invalid SSL certificates`
})
.option("is-file", {
alias: 'f',
type: "boolean",
desc: __`Interpret SOURCE as a file name rather than a URL`,
default: false,
hidden: true,
//deprecated: true
})
.option("is-url", {
alias: 'U',
type: "boolean",
desc: __`(deprecated) Interpret SOURCE as a URL rather than file name`,
hidden: true,
//deprecated: true
})
.option("json", {
alias: 'j',
type: "boolean",
desc: __`Output properties as a JSON payload`
})
.option("low-confidence", {
alias: 'l',
type: "string",
desc: __`What to do if Readability.js is uncertain about what the core content actually is`,
choices: ["keep", "force", "exit"]
//default: "no-op", //don't set default because completion won't work
})
.option("keep-classes", {
alias: 'C',
type: "boolean",
desc: __`Preserve all CSS classes for input elements, instead of adapting to Firefox's Reader Mode`
})
.option("output", {
alias: 'o',
type: "string",
desc: __`The file to which the result should be output`
})
.option("properties", {
alias: 'p',
type: "array",
desc: __`Output specific properties of the parsed article`,
choices: Array.from(Properties.keys())
})
.option("proxy", {
alias: 'x',
type: "string",
desc: __`Use specified proxy (can also use HTTPS_PROXY environment variable)`
})
.option("quiet", {
alias: 'q',
type: "boolean",
desc: __`Don't output extra information to stderr`
})
.option("style", {
alias: 's',
type: "string",
desc: __`Specify .css file for stylesheet`
})
.option("url", {
alias: 'u',
type: "string",
desc: __`(deprecated) alias for --base`,
hidden: true,
//deprecated: true //completion script does not respect this value, so just say it in the description
})
.option("user-agent", {
alias: 'A',
type: "string",
desc: __`Set custom user agent string`
})
.option("version", {
alias: 'V',
type: "boolean",
desc: __`Print version`
})
.epilogue(__`See the manual for more info: man readability-cli`)
.wrap(Math.min(yargs.terminalWidth(), 100))
.strict()
.parse();
if (args["is-url"]) {
console.error(__`Note: --is-url option is deprecated.`);
}
if (args["url"]) {
console.error(__`Note: --url option is deprecated, please use --base instead.`);
args["base"] = args["url"];
}
function printUsage() {
yargs.showHelp();
}
if (args["completion"]) {
yargs.showCompletionScript();
return;
}
const { Readability, isProbablyReaderable } = require("@mozilla/readability");
if (args["version"]) {
function printVersion() {
console.log(`readability-cli v${require("./package.json").version}`);
console.log(`Node.js ${process.version}`);
return;
}
if (args["keep-classes"]) {
readabilityOptions["keepClasses"] = true;
}
if (!args["low-confidence"]) {
args["low-confidence"] = LowConfidenceMode.keep;
args['l'] = LowConfidenceMode.keep;
} else if (!Object.values(LowConfidenceMode).includes(args["low-confidence"])) {
console.error(__`Unknown mode: ${args["low-confidence"]}\nPlease use one of: keep, force, exit`);
console.error(__`Use --help for more info.`);
setErrored(ExitCodes.badUsageCLI);
return;
}
let inputArg;
if (!args["source"]) {
if (process.stdin.isTTY) {
console.error(__`No input provided`);
printUsage();
setErrored(ExitCodes.badUsageCLI);
return;
} else {
inputArg = '-'
}
} else {
inputArg = args["source"];
}
//Get input parameter, remove inputArg from args
let inputFile;
let inputURL;
let inputIsFromStdin = false;
if (args["is-url"] && inputArg.search(/^\w+:\/\//) == -1)
inputArg = "https://" + inputArg;
if (!args["is-file"] && inputArg.search(/^\w+:\/\//) != -1)
inputURL = inputArg;
else if (inputArg == '-')
inputIsFromStdin = true;
else
inputFile = inputArg;
const outputArg = args["output"];
const documentURL = args["base"] || inputURL;
const outputJSON = args["json"];
const proxy = args["proxy"] || process.env.https_proxy || process.env.HTTPS_PROXY || process.env.http_proxy;
let wantedProperties;
if (args["properties"]) {
wantedProperties = args["properties"];
}
if (errored) {
printUsage();
return;
}
async function read(stream) {
const chunks = [];
for await (const chunk of stream){
chunks.push(chunk);
}
return Buffer.concat(chunks).toString("utf8");
}
if (inputIsFromStdin) {
if (!args["quiet"]) {
console.error("Reading...");
if (!documentURL)
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
}
read(process.stdin).then(result => {
const JSDOM = require("jsdom").JSDOM;
onLoadDOM(new JSDOM(result, { url: documentURL }));
async function parseDOMFromURL(url, proxy, strictSSL, userAgent) {
const { JSDOM, ResourceLoader } = require("jsdom");
const resourceLoader = new ResourceLoader({
proxy: proxy,
strictSSL: strictSSL,
userAgent: userAgent
});
} else {
if (!args["quiet"])
console.error(__`Retrieving...`);
const jsdom = require("jsdom");
let promiseGetHTML;
if (inputURL) {
const resourceLoader = new jsdom.ResourceLoader({
proxy: proxy,
strictSSL: !args["insecure"],
userAgent: args["user-agent"]
});
promiseGetHTML = jsdom.JSDOM.fromURL(inputURL, {
resources: resourceLoader
});
} else if (inputFile) {
promiseGetHTML = jsdom.JSDOM.fromFile(inputFile, {
url: documentURL,
// workaround for https://gitlab.com/gardenappl/readability-cli/-/issues/9
contentType: "text/html; charset=utf-8"
});
}
promiseGetHTML.then(onLoadDOM, onLoadDOMError);
}
const { Readability, isProbablyReaderable } = require("@mozilla/readability");
//Taken from https://stackoverflow.com/a/22706073/5701177
function escapeHTML(string, document) {
var p = document.createElement("p");
p.appendChild(document.createTextNode(string));
return p.innerHTML;
}
function onLoadDOM(dom) {
const window = dom.window
let shouldParseArticle = true;
if (args["low-confidence"] != LowConfidenceMode.force)
shouldParseArticle = isProbablyReaderable(window.document);
if (!shouldParseArticle) {
if (args["low-confidence"] == LowConfidenceMode.exit) {
console.error(__`Not sure if this document should be processed, exiting`);
setErrored(ExitCodes.dataError);
return;
} else {
if (!args["quiet"])
console.error(__`Not sure if this document should be processed. Not processing`);
if (args["json"] || wantedProperties) {
console.error(__`Can't output properties`);
setErrored(ExitCodes.dataError);
return;
}
shouldParseArticle = false;
}
}
let writeStream;
if (outputArg) {
const fs = require("fs");
writeStream = fs.createWriteStream(outputArg);
} else {
writeStream = process.stdout;
}
if (!shouldParseArticle) {
//Ignore wantedProperties, that should've thrown an error before
writeStream.write(getHTML(window));
return;
}
if (!args["quiet"])
console.error(__`Processing...`);
const reader = new Readability(window.document, readabilityOptions);
const article = reader.parse();
if (!article) {
if (args["low-confidence"] == LowConfidenceMode.keep) {
if (!args["quiet"])
console.error(__`Couldn't process document.`);
writeStream.write(getHTML(window));
} else {
console.error(__`Couldn't process document.`);
setErrored(ExitCodes.dataError);
}
return;
}
if (outputJSON) {
let result = {};
if (wantedProperties) {
for (propertyName of wantedProperties)
result[propertyName] = Properties.get(propertyName)(article, false, window);
} else {
for (const [name, func] of Properties) {
result[name] = func(article, false, window);
}
}
writeStream.write(JSON.stringify(result));
} else {
if (wantedProperties) {
for (propertyName of wantedProperties)
writeStream.write(Properties.get(propertyName)(article, true, window) + '\n');
} else {
writeStream.write(`<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">`);
if (args["style"] || !args["keep-classes"]) {
const cssHref = args["style"] || "chrome://global/skin/aboutReader.css";
writeStream.write(`
<link rel="stylesheet" href="${cssHref}" type="text/css">`);
}
writeStream.write(`
<title>${escapeHTML(Properties.get("title")(article, false, window), window.document)}</title>
</head>
`
);
if (!args["keep-classes"]) {
//Add a few divs and classes so that Firefox Reader Mode CSS works well
writeStream.write(`
<body class="light sans-serif loaded" style="--font-size:14pt; --content-width:40em;">
<div class="container" `
);
const contentDir = Properties.get("dir")(article, false, window);
if (contentDir)
writeStream.write(`dir="${contentDir}">`);
else
writeStream.write('>');
writeStream.write(`
<div class="header reader-header reader-show-element">
<h1 class="reader-title">${escapeHTML(Properties.get("title")(article, false, window), window.document)}</h1>`);
const author = Properties.get("byline")(article, false, window);
if (author) {
writeStream.write(`
<div class="credits reader-credits">${escapeHTML(author, window.document)}</div>`);
}
writeStream.write(`
</div>
<hr>
<div class="content">
<div class="moz-reader-content reader-show-element">
`
);
writeStream.write(Properties.get("html-content")(article, false, window));
writeStream.write(`
</div>
</div>
</div>
`
);
} else {
writeStream.write("\n<body>\n");
writeStream.write(Properties.get("html-title")(article, false, window));
writeStream.write('\n');
const author = Properties.get("byline")(article, false, window);
if (author) {
writeStream.write(`<p><i>${escapeHTML(author, window.document)}</i></p>`);
}
writeStream.write("\n<hr>\n");
writeStream.write(Properties.get("html-content")(article, false, window));
}
writeStream.write("\n</body></html>");
}
}
const dom = await JSDOM.fromURL(url, {
resources: resourceLoader
});
return [dom.window.document, dom.window];
}
function onLoadDOMError(error) {
if (error.error) {
//Nested error?
error = error.error;
}
if (error instanceof TypeError && inputURL) {
console.error(__`Invalid URL: ${inputURL}`);
setErrored(ExitCodes.badUsageCLI);
} else if (error.code == "ENOENT") {
console.error(error.message);
setErrored(ExitCodes.noInput);
} else if (error.code == "EACCES") {
console.error(error.message);
setErrored(ExitCodes.noPermission);
} else if (error.code == "ENOTFOUND") {
console.error(__`Host not found: ${error.hostname}`);
setErrored(ExitCodes.noHost);
} else if (error.statusCode) {
console.error(__`Status error: ${error.response.statusMessage}`);
setErrored(ExitCodes.serviceUnavailable);
} else {
console.error(error.message);
//console.error(error);
setErrored(ExitCodes.serviceUnavailable);
}
function parseDOM(html, url) {
const { JSDOM } = require("jsdom");
const dom = new JSDOM(html, { url: url });
return [dom.window.document, dom.window];
}
function getHTML(window) {
let html = window.document.documentElement.outerHTML;
if (!args["insane"]) {
const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
return DOMPurify.sanitize(html, {IN_PLACE: true, WHOLE_DOCUMENT: true});
}
return html;
}
async function parseDOMFromFile(file, url) {
const { JSDOM } = require("jsdom");
const dom = await JSDOM.fromFile(file, {
url: url,
// workaround for https://gitlab.com/gardenappl/readability-cli/-/issues/9
contentType: "text/html; charset=utf-8"
});
return [dom.window.document, dom.window];
}
function sanitizeHTML(html, window) {
const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
return DOMPurify.sanitize(html);
}
function sanitizeDOM(document, window) {
const createDOMPurify = require("dompurify");
const DOMPurify = createDOMPurify(window);
DOMPurify.sanitize(document.documentElement, {IN_PLACE: true, WHOLE_DOCUMENT: true});
return document.documentElement.outerHTML;
}
import("./common.mjs").then((module) => {
const readable = module.default;
readable(
Buffer,
fs,
process,
yargs,
__,
Readability,
isProbablyReaderable,
printVersion,
parseDOM,
parseDOMFromFile,
parseDOMFromURL,
sanitizeDOM,
sanitizeHTML
);
});

@ -34,5 +34,6 @@
"Host not found: '%s'": "Host not found: '%s'",
"Unknown mode: %s\nPlease use one of: keep, force, exit": "Unknown mode: %s\nPlease use one of: keep, force, exit",
"Use --help for more info.": "Use --help for more info.",
"See the manual for more info: man readability-cli": "See the manual for more info: man readability-cli"
"See the manual for more info: man readability-cli": "See the manual for more info: man readability-cli",
"Error: option --%s is not supported in Deno.": "Error: option --%s is not supported in Deno."
}

@ -34,5 +34,6 @@
"Host not found: '%s'": "Сервер не найден: '%s'",
"Unknown mode: %s\nPlease use one of: keep, force, exit": "Неизвестный режим: %s\nПожалуйста, используйте один из: keep, force, exit",
"Use --help for more info.": "Чтобы узнать больше, воспользуйтесь --help",
"See the manual for more info: man readability-cli": "Чтобы узнать больше, загляните в мануал: man readability-cli"
"See the manual for more info: man readability-cli": "Чтобы узнать больше, загляните в мануал: man readability-cli",
"Error: option --%s is not supported in Deno.": "Ошибка: параметр --%s не поддерживается в Deno."
}

1498
package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -1,6 +1,6 @@
{
"name": "readability-cli",
"version": "2.3.5",
"version": "2.4.5",
"description": "Firefox Reader Mode in your terminal - get useful text from a web page using Mozilla's Readability library",
"main": "index.js",
"bin": {
@ -28,13 +28,13 @@
],
"license": "GPL-3.0-only",
"dependencies": {
"@mozilla/readability": "^0.4.1",
"dompurify": "^2.3.4",
"jsdom": "^19.0.0",
"@mozilla/readability": "^0.4.4",
"dompurify": "^3.0.5",
"jsdom": "^22.1.0",
"y18n": "^5.0.8",
"yargs": "^17.3.0"
"yargs": "^17.7.2"
},
"devDependencies": {
"marked-man": "^0.7.0"
"marked-man": "^1.3.1"
}
}

@ -1,4 +1,4 @@
.TH "READABILITY\-CLI" "1" "December 2021" "v2.3.5" ""
.TH "READABILITY\-CLI" "1" "September 2023" "v2.4.5"
.SH "NAME"
\fBreadability-cli\fR \- get useful text from a web page
.SH SYNOPSYS
@ -14,45 +14,52 @@ The \fISOURCE\fR can be a URL, a file, or '\-' for standard input\.
.SH OPTIONS
.P
\fB\-\-help\fP
.RS 0
.RS 1
.IP \(bu 2
Show help message, and exit\.
.RE
.P
\fB\-b\fP, \fB\-\-base\fP \fIURL\fR
.RS 0
.RS 1
.IP \(bu 2
Specify the document's URL\. This affects relative links: they will not work if \fBreadability\-cli\fR does not know the base URL\. You only need this option if you read HTML from a local file, or from standard input\.
.RE
.P
\fB\-i\fP, \fB\-\-insane\fP
.RS 0
\fB\-S\fP, \fB\-\-insane\fP
.RS 1
.IP \(bu 2
Don't sanitize HTML\.
.RE
.P
\fB\-K\fP, \fB\-\-insecure\fP
.RS 0
.RS 1
.IP \(bu 2
Allow invalid SSL certificates\.
(Node\.js version only) Allow invalid SSL certificates\.
.RE
.P
\fB\-j\fP, \fB\-\-json\fP
.RS 0
.RS 1
.IP \(bu 2
Output all known properties of the document as JSON (see \fBProperties\fR subsection)\.
.RE
.P
\fB\-l\fP, \fB\-\-low\-confidence\fP \fIMODE\fR
.RS 0
.RS 1
.IP \(bu 2
What to do if Readability is uncertain about what the core content actually is\. The possible modes are:
.RS
.RS 1
.IP \(bu 2
\fBkeep\fR \- When unsure, don't touch the HTML, output as\-is\.
.IP \(bu 2
@ -67,71 +74,81 @@ The default value is \fBkeep\fR\|\. If the \fB\-\-properties\fP or \fB\-\-json\f
.RE
.P
\fB\-C\fP, \fB\-\-keep\-classes\fP
.RS 0
.RS 1
.IP \(bu 2
Preserve CSS classes for input elements\. By default, CSS classes are stripped, and the input is adapted for Firefox's Reader View\.
.RE
.P
\fB\-o\fP, \fB\-\-output\fP \fIFILE\fR
.RS 0
.RS 1
.IP \(bu 2
Output the result to FILE\.
.RE
.P
\fB\-p\fP, \fB\-\-properties\fP \fIPROPERTIES\fR\|\.\.\.
.RS 0
.RS 1
.IP \(bu 2
Output specific properties of the document (see \fBProperties\fR subsection)\.
.RE
.P
\fB\-x\fP, \fB\-\-proxy\fP \fIURL\fR
.RS 0
.RS 1
.IP \(bu 2
Use specified proxy (can also use \fBHTTPS_PROXY\fP environment variable)\.
(Node\.js version only) Use specified proxy\. Node\.js and Deno can also use \fBHTTPS_PROXY\fP environment variable\.
.RE
.P
\fB\-q\fP, \fB\-\-quiet\fP
.RS 0
.RS 1
.IP \(bu 2
Don't print extra information\.
.RE
.P
\fB\-s\fP, \fB\-\-style\fP
.RS 0
.RS 1
.IP \(bu 2
Specify \fI\|\.css\fR file for stylesheet\.
.RE
.P
\fB\-A\fP, \fB\-\-user\-agent\fP \fISTRING\fR
.RS 0
.RS 1
.IP \(bu 2
Set custom user agent string\.
.RE
.P
\fB\-V\fP, \fB\-\-version\fP
.RS 0
.RS 1
.IP \(bu 2
Print \fBreadability\-cli\fR and Node\.js version, then exit\.
Print \fBreadability\-cli\fR and Node\.js/Deno version, then exit\.
.RE
.P
\fB\-\-completion\fP
.RS 0
.RS 1
.IP \(bu 2
Print script for shell completion, and exit\. Provides Zsh completion if the current shell is zsh, otherwise provides Bash completion\.
Print script for shell completion, and exit\. Provides Zsh completion if the current shell is zsh, otherwise provides Bash completion\. Currently broken when using Deno\.
.RE
.SS Properties
.P
The \fB\-\-properties\fP option accepts a list of values, separated by spaces\. Suitable values are:
.RS 0
.RS 1
.IP \(bu 2
\fBtitle\fR \- The title of the article\.
.IP \(bu 2
@ -157,38 +174,43 @@ Properties are printed line by line, in the order specified by the user\. Only "
As usual, exit code 0 indicates success, and anything other than 0 is an error\. \fBreadability\-cli\fR uses standard* error codes:
.TS
tab(|) expand nowarn box;
l l.
r l.
T{
Error code
T}|T{
Meaning
T}
_
=
T{
\fB64\fR
T}|T{
Bad CLI arguments
T}
_
T{
\fB65\fR
T}|T{
Data format error: can't parse document using Readability\.
T}
_
T{
\fB66\fR
T}|T{
No such file
T}
_
T{
\fB68\fR
T}|T{
Host not found
T}
_
T{
\fB69\fR
T}|T{
URL inaccessible
T}
_
T{
\fB77\fR
T}|T{
@ -201,11 +223,10 @@ T}
.P
\fBreadability\-cli\fR supports localization, using the environment variables \fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP and \fBLANGUAGE\fP, in that order\. Only one language at a time is supported\.
.P
\fBHTTPS_PROXY\fP will set the HTTPS proxy, as previously stated, however the \fB\-\-proxy\fP option overrides this\. Lowercase \fBhttps_proxy\fP and \fBhttp_proxy\fP are also recognized\.
\fBHTTPS_PROXY\fP will set the HTTPS proxy, as previously stated, however the \fB\-\-proxy\fP option overrides this\. Node\.js also recognizes lowercase \fBhttps_proxy\fP and \fBhttp_proxy\fP, for compatibility with \fBcurl\fP\|\.
.SH EXAMPLE
.P
\fBRead HTML from a file and output the result to the console:\fR
.P
.RS 2
.nf
readable index\.html
@ -213,7 +234,6 @@ readable index\.html
.RE
.P
\fBFetch a random Wikipedia article, get its title and an excerpt:\fR
.P
.RS 2
.nf
readable https://en\.wikipedia\.org/wiki/Special:Random \-p title,excerpt
@ -221,7 +241,6 @@ readable https://en\.wikipedia\.org/wiki/Special:Random \-p title,excerpt
.RE
.P
\fBFetch a web page and read it in W3M:\fR
.P
.RS 2
.nf
readable https://www\.nytimes\.com/2020/01/18/technology/clearview\-privacy\-facial\-recognition\.html | w3m \-T text/html
@ -229,7 +248,6 @@ readable https://www\.nytimes\.com/2020/01/18/technology/clearview\-privacy\-fac
.RE
.P
\fBDownload a web page using cURL, parse it and output as JSON:\fR
.P
.RS 2
.nf
curl https://github\.com/mozilla/readability | readable \-\-base=https://github\.com/mozilla/readability \-\-json
@ -239,5 +257,8 @@ curl https://github\.com/mozilla/readability | readable \-\-base=https://github\
.P
\fBcurl\fR(1), \fBw3m\fR(1), \fBsysexits\fR(3)
.P
Source code, license, bug tracker and merge requests may be found on GitLab \fIhttps://gitlab\.com/gardenappl/readability\-cli\fR\|\.
Source code, license, bug tracker and merge requests may be found on
.UR https://gitlab.com/gardenappl/readability-cli
.I GitLab
.UE .

@ -22,13 +22,13 @@ The *SOURCE* can be a URL, a file, or '-' for standard input.
* Specify the document's URL. This affects relative links: they will not work if **readability-cli** does not know the base URL. You only need this option if you read HTML from a local file, or from standard input.
`-i`, `--insane`
`-S`, `--insane`
* Don't sanitize HTML.
`-K`, `--insecure`
* Allow invalid SSL certificates.
* (Node.js version only) Allow invalid SSL certificates.
`-j`, `--json`
@ -58,7 +58,7 @@ The *SOURCE* can be a URL, a file, or '-' for standard input.
`-x`, `--proxy` *URL*
* Use specified proxy (can also use `HTTPS_PROXY` environment variable).
* (Node.js version only) Use specified proxy. Node.js and Deno can also use `HTTPS_PROXY` environment variable.
`-q`, `--quiet`
@ -74,11 +74,11 @@ The *SOURCE* can be a URL, a file, or '-' for standard input.
`-V`, `--version`
* Print **readability-cli** and Node.js version, then exit.
* Print **readability-cli** and Node.js/Deno version, then exit.
`--completion`
* Print script for shell completion, and exit. Provides Zsh completion if the current shell is zsh, otherwise provides Bash completion.
* Print script for shell completion, and exit. Provides Zsh completion if the current shell is zsh, otherwise provides Bash completion. Currently broken when using Deno.
### Properties
@ -114,7 +114,7 @@ As usual, exit code 0 indicates success, and anything other than 0 is an error.
**readability-cli** supports localization, using the environment variables `LC_ALL`, `LC_MESSAGES`, `LANG` and `LANGUAGE`, in that order. Only one language at a time is supported.
`HTTPS_PROXY` will set the HTTPS proxy, as previously stated, however the `--proxy` option overrides this. Lowercase `https_proxy` and `http_proxy` are also recognized.
`HTTPS_PROXY` will set the HTTPS proxy, as previously stated, however the `--proxy` option overrides this. Node.js also recognizes lowercase `https_proxy` and `http_proxy`, for compatibility with `curl`.
## EXAMPLE

@ -0,0 +1,131 @@
#!/usr/bin/env -S deno run --allow-net --allow-read --allow-write --allow-env=HTTPS_PROXY,LC_ALL,LC_MESSAGES,LANG,LANGUAGE --no-prompt --no-check --
const version = "2.4.5"
import * as path from "https://deno.land/std@0.201.0/path/mod.ts"
import yargs from "https://deno.land/x/yargs@v17.7.2-deno/deno.ts"
import y18n from "https://deno.land/x/y18n@v5.0.8-deno/deno.ts"
import { initParser, DOMParser, DOMParserMimeType, Document, Element } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm-noinit.ts"
import * as ammonia from "https://deno.land/x/ammonia@0.3.1/mod.ts"
import { Buffer } from "node:buffer"
import fs from "node:fs"
import process from "node:process"
import { Readability, isProbablyReaderable } from "npm:@mozilla/readability@^0.4.4"
import UserAgent from "npm:user-agents@1.0"
// GNU gettext gives preference to LANGUAGE above all else, but this order is consistent with Yargs:
const locale = (
Deno.env.get("LC_ALL") ||
Deno.env.get("LC_MESSAGES") ||
Deno.env.get("LANG") ||
Deno.env.get("LANGUAGE") ||
"en_US"
).replace(/[.:].*/, '')
const __ = y18n({
locale: locale,
updateFiles: false,
directory: path.join(path.dirname(path.fromFileUrl(import.meta.url)), "locales")
}).__
function printVersion() {
console.log(`readability-cli v${version}`)
console.log(`Deno ${Deno.version.deno}`)
}
async function parseDOMFromURL(url: string, _proxy: string, _strictSSL: boolean, userAgent: string) {
const initParserPromise = initParser()
const userAgentString = userAgent ?? new UserAgent({ deviceCategory: "desktop" }).toString()
const response = await fetch(url, {
headers: {
"User-Agent": userAgentString
}
})
if (!response.ok) {
throw {
statusCode: response.status,
response: {
statusMessage: response.statusText
}
}
}
const text = await response.text()
await initParserPromise
const contentType = response.headers.get("Content-Type")!
let mimeType = contentType.slice(0, contentType.indexOf(';'))
if (mimeType == "text/htm")
mimeType = "text/html"
return parseDOM(text, url, mimeType as DOMParserMimeType)
}
async function parseDOM(html: string, url?: string, mimeType?: DOMParserMimeType) {
await initParser()
const document = new DOMParser().parseFromString(html, mimeType ?? "text/html")!
const baseURLString = document.getElementsByTagName("base")[0]?.getAttribute("href") ?? url
if (baseURLString) {
const baseURL = new URL(baseURLString)
const nodes: Element[] = []
nodes.push(document.documentElement!)
while (nodes.length > 0) {
const element = nodes.pop()!
const href = element.getAttribute("href")
if (href) {
try {
// Try to parse absolute URL
new URL(href)
} catch {
// Assume href is a relative URL
element.setAttribute("href", new URL(href, baseURL))
}
}
nodes.push(...element.children)
}
}
return [document]
}
async function parseDOMFromFile(file: string, url: string) {
const data = await Deno.readFile(file)
return parseDOM(new TextDecoder().decode(data), url)
}
async function sanitizeHTML(html: string) {
await ammonia.init()
return ammonia.clean(html)
}
async function sanitizeDOM(document: Document) {
return await sanitizeHTML(document.documentElement!.outerHTML)
}
import readable from "./common.mjs"
await readable(
Buffer,
fs,
process,
yargs(Deno.args),
__,
Readability,
isProbablyReaderable,
printVersion,
parseDOM,
parseDOMFromFile,
parseDOMFromURL,
sanitizeDOM,
sanitizeHTML
)
if (process.exitCode) {
process.exit()
}
Loading…
Cancel
Save