You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
291 lines
8.0 KiB
JavaScript
291 lines
8.0 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/*
|
|
Firefox Reader Mode in your terminal! - CLI tool for Mozilla's Readability library
|
|
Copyright (C) 2020 gardenapple
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
const Readability = require("readability");
|
|
const JSDOM = require("jsdom").JSDOM;
|
|
const parseArgs = require("minimist");
|
|
const fs = require("fs");
|
|
const he = require("he");
|
|
|
|
|
|
const ExitCodes = {
|
|
badUsageCLI: 64,
|
|
dataError: 65,
|
|
noInput: 66,
|
|
noHost: 68,
|
|
noPermission: 77
|
|
};
|
|
|
|
let errored = false;
|
|
|
|
function setErrored(exitCode) {
|
|
process.exitCode = exitCode;
|
|
errored = true;
|
|
}
|
|
|
|
function printUsage() {
|
|
console.error(`
|
|
Usage:
|
|
readable [SOURCE] [options]
|
|
readable [options] -- [SOURCE]
|
|
(where SOURCE is a file, an http(s) URL, or '-' for standard input)
|
|
|
|
Options:
|
|
--help Print help
|
|
-o --output OUTPUT_FILE Output to OUTPUT_FILE
|
|
-p --properties PROPS... Output specific properties of the parsed article
|
|
-V --version Print version
|
|
-u --url Set the document URL when parsing standard input or a local file (this affects relative links and such)
|
|
-U --is-url Interpret SOURCE as a URL rather than file name
|
|
-q --quiet Don't output extra information to stderr
|
|
|
|
The --properties option accepts a comma-separated list of values (with no spaces in-between). Suitable values are:
|
|
html-title Outputs the article's title, wrapped in an <h1> tag.
|
|
title Outputs the title in the format "Title: $TITLE".
|
|
excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT"
|
|
byline Author metadata, in the format "Author: $AUTHOR"
|
|
length Length of the article in characters, in the format "Length: $LENGTH"
|
|
dir Content direction, is either "Direction: ltr" or "Direction: rtl"
|
|
html-content Outputs the article's main content as HTML.
|
|
text-content Outputs the article's main content as plain text.
|
|
|
|
Text-content and Html-content are mutually exclusive, and are always printed last.
|
|
Default value is "html-title,html-content".`);
|
|
}
|
|
|
|
|
|
|
|
const stringArgParams = ['_', "--", "output", "properties", "url"];
|
|
const boolArgParams = ["quiet", "help", "version", "is-url"];
|
|
const alias = {
|
|
"output": 'o',
|
|
"properties": 'p',
|
|
"version": 'V',
|
|
"url": 'u',
|
|
"is-url": 'U',
|
|
"quiet": 'q'
|
|
}
|
|
|
|
let args = parseArgs(process.argv.slice(2), {
|
|
string: stringArgParams,
|
|
boolean: boolArgParams,
|
|
default: {
|
|
"properties": "html-title,html-content",
|
|
"quiet": false
|
|
},
|
|
alias: alias,
|
|
"--": true
|
|
});
|
|
|
|
|
|
//Minimist's parseArgs accepts a function for handling unknown parameters,
|
|
//but it works in a stupid way, so I'm writing my own.
|
|
|
|
for (var key of Object.keys(args)) {
|
|
if (!stringArgParams.includes(key) && !boolArgParams.includes(key) &&
|
|
!Object.values(alias).includes(key)) {
|
|
console.error(`Unknown argument: ${key}`);
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
|
|
} else if (stringArgParams.includes(key) && args[key] === "") {
|
|
console.error(`Error: no value given for --${key}`);
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
}
|
|
|
|
}
|
|
if (errored) {
|
|
printUsage();
|
|
return;
|
|
}
|
|
|
|
if (args.help) {
|
|
printUsage();
|
|
return;
|
|
} else if (args.version) {
|
|
console.log(`readability-cli v${require("./package.json").version}`);
|
|
console.log(`Node.js ${process.version}`);
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
let inputArg;
|
|
const inputCount = args['_'].length + args['--'].length;
|
|
if (inputCount > 1) {
|
|
console.error("Too many input arguments");
|
|
printUsage();
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
return;
|
|
} else if (inputCount == 0) {
|
|
if (process.stdin.isTTY) {
|
|
console.error("No input provided");
|
|
printUsage();
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
return;
|
|
} else {
|
|
inputArg = '-'
|
|
}
|
|
} else {
|
|
inputArg = (args['_'].length > 0) ? args['_'][0] : args['--'][0];
|
|
}
|
|
|
|
//Get input parameter, remove inputArg from args
|
|
let inputFile;
|
|
let inputURL;
|
|
let inputIsFromStdin = false;
|
|
|
|
if (args["is-url"] || inputArg.startsWith("https://") || inputArg.startsWith("http://"))
|
|
inputURL = inputArg;
|
|
else if (inputArg == '-')
|
|
inputIsFromStdin = true;
|
|
else
|
|
inputFile = inputArg;
|
|
|
|
delete args['_'];
|
|
delete args['--'];
|
|
|
|
|
|
const outputArg = args['output'];
|
|
const documentURL = args["url"] || inputURL;
|
|
|
|
|
|
const Properties = {
|
|
htmlTitle: "html-title",
|
|
title: "title",
|
|
excerpt: "excerpt",
|
|
byline: "byline",
|
|
length: "length",
|
|
dir: "dir",
|
|
htmlContent: "html-content",
|
|
textContent: "text-content"
|
|
}
|
|
let wantedProperties = [];
|
|
|
|
if (args.properties) {
|
|
for (var property of args.properties.split(',')) {
|
|
if (Object.values(Properties).includes(property)) {
|
|
wantedProperties.push(property);
|
|
} else {
|
|
console.error(`Invalid property: ${property}`);
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
}
|
|
}
|
|
if (errored) {
|
|
printUsage();
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (inputIsFromStdin) {
|
|
onLoadDOM(new JSDOM(fs.readFileSync(0, 'utf-8')), {
|
|
url: documentURL
|
|
});
|
|
} else {
|
|
if (!args["quiet"])
|
|
console.error("Retrieving...");
|
|
let promiseGetHTML;
|
|
if (inputURL) {
|
|
promiseGetHTML = JSDOM.fromURL(inputURL).catch(error => {
|
|
if (error instanceof TypeError) {
|
|
console.error(`Invalid URL: ${inputURL}`);
|
|
setErrored(ExitCodes.dataError);
|
|
}
|
|
|
|
return Promise.reject();
|
|
});
|
|
} else if (inputFile) {
|
|
promiseGetHTML = JSDOM.fromFile(inputFile, {
|
|
url: documentURL
|
|
});
|
|
}
|
|
|
|
promiseGetHTML.then(onLoadDOM, onLoadDOMError)
|
|
}
|
|
|
|
function onLoadDOM(dom) {
|
|
const document = dom.window.document
|
|
if (!args["quiet"])
|
|
console.error("Parsing...");
|
|
let reader = new Readability(document);
|
|
let article = reader.parse();
|
|
if (!article) {
|
|
console.error("Couldn't parse document");
|
|
setErrored(ExitCodes.dataError);
|
|
return;
|
|
}
|
|
|
|
let writeStream;
|
|
if (outputArg) {
|
|
writeStream = fs.createWriteStream(outputArg);
|
|
} else {
|
|
writeStream = process.stdout;
|
|
}
|
|
|
|
if (wantedProperties.includes(Properties.title)) {
|
|
writeStream.write(`Title: ${article.title}\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.excerpt)) {
|
|
writeStream.write(`Excerpt: ${article.excerpt}\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.byline)) {
|
|
writeStream.write(`Author: ${article.byline}\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.length)) {
|
|
writeStream.write(`Length: ${article.length}\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.dir)) {
|
|
writeStream.write(`Direction: ${article.dir}\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.htmlTitle)) {
|
|
writeStream.write(`<h1>${he.escape(article.title)}</h1>\n`);
|
|
}
|
|
if (wantedProperties.includes(Properties.htmlContent)) {
|
|
writeStream.write(article.content);
|
|
} else if (wantedProperties.includes(Properties.textContent)) {
|
|
writeStream.write(article.textContent);
|
|
}
|
|
}
|
|
|
|
function onLoadDOMError(error) {
|
|
//resolved earlier
|
|
if (!error)
|
|
return;
|
|
|
|
if (error.code == "ENOENT") {
|
|
console.error(error.message);
|
|
setErrored(ExitCodes.noInput);
|
|
} else if (error.code == "EACCES") {
|
|
console.error(error.message);
|
|
setErrored(ExitCodes.noPermission);
|
|
} else if (error.error && error.error.code == "ENOTFOUND") {
|
|
console.error(`Host not found: '${error.error.hostname}'`);
|
|
setErrored(ExitCodes.noHost);
|
|
} else if (error.statusCode) {
|
|
console.error(`Status error: ${error.response.statusMessage}`);
|
|
setErrored(ExitCodes.noHost);
|
|
} else {
|
|
console.error(error);
|
|
if (error.stack)
|
|
console.error(error.stack);
|
|
}
|
|
}
|