You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
readability-cli/index.js

291 lines
8.0 KiB
JavaScript

#!/usr/bin/env node
/*
Firefox Reader Mode in your terminal! - CLI tool for Mozilla's Readability library
Copyright (C) 2020 gardenapple
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const Readability = require("readability");
const JSDOM = require("jsdom").JSDOM;
const parseArgs = require("minimist");
const fs = require("fs");
const he = require("he");
const ExitCodes = {
badUsageCLI: 64,
dataError: 65,
noInput: 66,
noHost: 68,
noPermission: 77
};
let errored = false;
function setErrored(exitCode) {
process.exitCode = exitCode;
errored = true;
}
function printUsage() {
console.error(`
Usage:
readable [SOURCE] [options]
readable [options] -- [SOURCE]
(where SOURCE is a file, an http(s) URL, or '-' for standard input)
Options:
--help Print help
-o --output OUTPUT_FILE Output to OUTPUT_FILE
-p --properties PROPS... Output specific properties of the parsed article
-V --version Print version
-u --url Set the document URL when parsing standard input or a local file (this affects relative links and such)
-U --is-url Interpret SOURCE as a URL rather than file name
-q --quiet Don't output extra information to stderr
The --properties option accepts a comma-separated list of values (with no spaces in-between). Suitable values are:
html-title Outputs the article's title, wrapped in an <h1> tag.
title Outputs the title in the format "Title: $TITLE".
excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT"
byline Author metadata, in the format "Author: $AUTHOR"
length Length of the article in characters, in the format "Length: $LENGTH"
dir Content direction, is either "Direction: ltr" or "Direction: rtl"
html-content Outputs the article's main content as HTML.
text-content Outputs the article's main content as plain text.
Text-content and Html-content are mutually exclusive, and are always printed last.
Default value is "html-title,html-content".`);
}
const stringArgParams = ['_', "--", "output", "properties", "url"];
const boolArgParams = ["quiet", "help", "version", "is-url"];
const alias = {
"output": 'o',
"properties": 'p',
"version": 'V',
"url": 'u',
"is-url": 'U',
"quiet": 'q'
}
let args = parseArgs(process.argv.slice(2), {
string: stringArgParams,
boolean: boolArgParams,
default: {
"properties": "html-title,html-content",
"quiet": false
},
alias: alias,
"--": true
});
//Minimist's parseArgs accepts a function for handling unknown parameters,
//but it works in a stupid way, so I'm writing my own.
for (var key of Object.keys(args)) {
if (!stringArgParams.includes(key) && !boolArgParams.includes(key) &&
!Object.values(alias).includes(key)) {
console.error(`Unknown argument: ${key}`);
setErrored(ExitCodes.badUsageCLI);
} else if (stringArgParams.includes(key) && args[key] === "") {
console.error(`Error: no value given for --${key}`);
setErrored(ExitCodes.badUsageCLI);
}
}
if (errored) {
printUsage();
return;
}
if (args.help) {
printUsage();
return;
} else if (args.version) {
console.log(`readability-cli v${require("./package.json").version}`);
console.log(`Node.js ${process.version}`);
return;
}
let inputArg;
const inputCount = args['_'].length + args['--'].length;
if (inputCount > 1) {
console.error("Too many input arguments");
printUsage();
setErrored(ExitCodes.badUsageCLI);
return;
} else if (inputCount == 0) {
if (process.stdin.isTTY) {
console.error("No input provided");
printUsage();
setErrored(ExitCodes.badUsageCLI);
return;
} else {
inputArg = '-'
}
} else {
inputArg = (args['_'].length > 0) ? args['_'][0] : args['--'][0];
}
//Get input parameter, remove inputArg from args
let inputFile;
let inputURL;
let inputIsFromStdin = false;
if (args["is-url"] || inputArg.startsWith("https://") || inputArg.startsWith("http://"))
inputURL = inputArg;
else if (inputArg == '-')
inputIsFromStdin = true;
else
inputFile = inputArg;
delete args['_'];
delete args['--'];
const outputArg = args['output'];
const documentURL = args["url"] || inputURL;
const Properties = {
htmlTitle: "html-title",
title: "title",
excerpt: "excerpt",
byline: "byline",
length: "length",
dir: "dir",
htmlContent: "html-content",
textContent: "text-content"
}
let wantedProperties = [];
if (args.properties) {
for (var property of args.properties.split(',')) {
if (Object.values(Properties).includes(property)) {
wantedProperties.push(property);
} else {
console.error(`Invalid property: ${property}`);
setErrored(ExitCodes.badUsageCLI);
}
}
if (errored) {
printUsage();
return;
}
}
if (inputIsFromStdin) {
onLoadDOM(new JSDOM(fs.readFileSync(0, 'utf-8')), {
url: documentURL
});
} else {
if (!args["quiet"])
console.error("Retrieving...");
let promiseGetHTML;
if (inputURL) {
promiseGetHTML = JSDOM.fromURL(inputURL).catch(error => {
if (error instanceof TypeError) {
console.error(`Invalid URL: ${inputURL}`);
setErrored(ExitCodes.dataError);
}
return Promise.reject();
});
} else if (inputFile) {
promiseGetHTML = JSDOM.fromFile(inputFile, {
url: documentURL
});
}
promiseGetHTML.then(onLoadDOM, onLoadDOMError)
}
function onLoadDOM(dom) {
const document = dom.window.document
if (!args["quiet"])
console.error("Parsing...");
let reader = new Readability(document);
let article = reader.parse();
if (!article) {
console.error("Couldn't parse document");
setErrored(ExitCodes.dataError);
return;
}
let writeStream;
if (outputArg) {
writeStream = fs.createWriteStream(outputArg);
} else {
writeStream = process.stdout;
}
if (wantedProperties.includes(Properties.title)) {
writeStream.write(`Title: ${article.title}\n`);
}
if (wantedProperties.includes(Properties.excerpt)) {
writeStream.write(`Excerpt: ${article.excerpt}\n`);
}
if (wantedProperties.includes(Properties.byline)) {
writeStream.write(`Author: ${article.byline}\n`);
}
if (wantedProperties.includes(Properties.length)) {
writeStream.write(`Length: ${article.length}\n`);
}
if (wantedProperties.includes(Properties.dir)) {
writeStream.write(`Direction: ${article.dir}\n`);
}
if (wantedProperties.includes(Properties.htmlTitle)) {
writeStream.write(`<h1>${he.escape(article.title)}</h1>\n`);
}
if (wantedProperties.includes(Properties.htmlContent)) {
writeStream.write(article.content);
} else if (wantedProperties.includes(Properties.textContent)) {
writeStream.write(article.textContent);
}
}
function onLoadDOMError(error) {
//resolved earlier
if (!error)
return;
if (error.code == "ENOENT") {
console.error(error.message);
setErrored(ExitCodes.noInput);
} else if (error.code == "EACCES") {
console.error(error.message);
setErrored(ExitCodes.noPermission);
} else if (error.error && error.error.code == "ENOTFOUND") {
console.error(`Host not found: '${error.error.hostname}'`);
setErrored(ExitCodes.noHost);
} else if (error.statusCode) {
console.error(`Status error: ${error.response.statusMessage}`);
setErrored(ExitCodes.noHost);
} else {
console.error(error);
if (error.stack)
console.error(error.stack);
}
}