|
|
|
@ -19,7 +19,7 @@ Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
const parseArgs = require("minimist");
|
|
|
|
|
//const parseArgs = require("minimist");
|
|
|
|
|
//JSDOM, fs, Readability, and Readability-readerable are loaded on-demand.
|
|
|
|
|
//To-do: lazy loading?
|
|
|
|
|
|
|
|
|
@ -38,93 +38,244 @@ function setErrored(exitCode) {
|
|
|
|
|
errored = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function printUsage() {
|
|
|
|
|
console.error(`
|
|
|
|
|
Usage:
|
|
|
|
|
readable [SOURCE] [options]
|
|
|
|
|
readable [options] -- [SOURCE]
|
|
|
|
|
(where SOURCE is a file, an http(s) URL, or '-' for standard input)
|
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
-h --help Print help
|
|
|
|
|
-o --output OUTPUT_FILE Output to OUTPUT_FILE
|
|
|
|
|
-p --properties PROPS... Output specific properties of the parsed article
|
|
|
|
|
-V --version Print version
|
|
|
|
|
-u --url Set the document URL when parsing standard input or a local file (this affects relative links)
|
|
|
|
|
-U --is-url Interpret SOURCE as a URL rather than file name
|
|
|
|
|
-q --quiet Don't output extra information to stderr
|
|
|
|
|
-l --low-confidence MODE What to do if Readability.js is uncertain about what the core content actually is
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
The --low-confidence option determines what should be done for documents where Readability can't tell what the core content is:
|
|
|
|
|
no-op When unsure, don't touch the HTML, output as-is. If the --properties option is used, this will make the program crash.
|
|
|
|
|
force Process the document even when unsure (may produce really bad output).
|
|
|
|
|
exit When unsure, exit with an error.
|
|
|
|
|
|
|
|
|
|
Default value is "no-op".
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
The --properties option accepts a comma-separated list of values (with no spaces in-between). Suitable values are:
|
|
|
|
|
html-title Outputs the article's title, wrapped in an <h1> tag.
|
|
|
|
|
title Outputs the title in the format "Title: $TITLE".
|
|
|
|
|
excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT"
|
|
|
|
|
byline Author metadata, in the format "Author: $AUTHOR"
|
|
|
|
|
length Length of the article in characters, in the format "Length: $LENGTH"
|
|
|
|
|
dir Content direction, is either "Direction: ltr" or "Direction: rtl"
|
|
|
|
|
html-content Outputs the article's main content as HTML.
|
|
|
|
|
text-content Outputs the article's main content as plain text.
|
|
|
|
|
|
|
|
|
|
Text-content and Html-content are mutually exclusive, and are always printed last.
|
|
|
|
|
Default value is "html-title,html-content".`);
|
|
|
|
|
}
|
|
|
|
|
//function printUsage() {
|
|
|
|
|
// console.error(`
|
|
|
|
|
//Usage:
|
|
|
|
|
// readable [SOURCE] [options]
|
|
|
|
|
// readable [options] -- [SOURCE]
|
|
|
|
|
// (where SOURCE is a file, an http(s) URL, or '-' for standard input)
|
|
|
|
|
//
|
|
|
|
|
//Options:
|
|
|
|
|
// -h --help Print help
|
|
|
|
|
// -o --output OUTPUT_FILE Output to OUTPUT_FILE
|
|
|
|
|
// -p --properties PROPS... Output specific properties of the parsed article
|
|
|
|
|
// -V --version Print version
|
|
|
|
|
// -u --url Set the document URL when parsing standard input or a local file (this affects relative links)
|
|
|
|
|
// -U --is-url Interpret SOURCE as a URL rather than file name
|
|
|
|
|
// -q --quiet Don't output extra information to stderr
|
|
|
|
|
// -l --low-confidence MODE What to do if Readability.js is uncertain about what the core content actually is
|
|
|
|
|
//
|
|
|
|
|
//
|
|
|
|
|
//The --low-confidence option determines what should be done for documents where Readability can't tell what the core content is:
|
|
|
|
|
// no-op When unsure, don't touch the HTML, output as-is. If the --properties option is used, this will make the program crash.
|
|
|
|
|
// force Process the document even when unsure (may produce really bad output).
|
|
|
|
|
// exit When unsure, exit with an error.
|
|
|
|
|
//
|
|
|
|
|
//Default value is "no-op".
|
|
|
|
|
//
|
|
|
|
|
//
|
|
|
|
|
//The --properties option accepts a comma-separated list of values (with no spaces in-between). Suitable values are:
|
|
|
|
|
// html-title Outputs the article's title, wrapped in an <h1> tag.
|
|
|
|
|
// title Outputs the title in the format "Title: $TITLE".
|
|
|
|
|
// excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT"
|
|
|
|
|
// byline Author metadata, in the format "Author: $AUTHOR"
|
|
|
|
|
// length Length of the article in characters, in the format "Length: $LENGTH"
|
|
|
|
|
// dir Content direction, is either "Direction: ltr" or "Direction: rtl"
|
|
|
|
|
// html-content Outputs the article's main content as HTML.
|
|
|
|
|
// text-content Outputs the article's main content as plain text.
|
|
|
|
|
//
|
|
|
|
|
//Text-content and Html-content are mutually exclusive, and are always printed last.
|
|
|
|
|
//Default value is "html-title,html-content".`);
|
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//const stringArgParams = ['_', "--", "low-confidence", "output", "properties", "url"];
|
|
|
|
|
//const boolArgParams = ["quiet", "help", "version", "is-url"];
|
|
|
|
|
//const alias = {
|
|
|
|
|
// "output": 'o',
|
|
|
|
|
// "properties": 'p',
|
|
|
|
|
// "version": 'V',
|
|
|
|
|
// "url": 'u',
|
|
|
|
|
// "is-url": 'U',
|
|
|
|
|
// "quiet": 'q',
|
|
|
|
|
// "low-confidence": 'l',
|
|
|
|
|
// "help": 'h'
|
|
|
|
|
//}
|
|
|
|
|
//
|
|
|
|
|
//let args = parseArgs(process.argv.slice(2), {
|
|
|
|
|
// string: stringArgParams,
|
|
|
|
|
// boolean: boolArgParams,
|
|
|
|
|
// default: {
|
|
|
|
|
// "low-confidence": "no-op",
|
|
|
|
|
// "quiet": false
|
|
|
|
|
// },
|
|
|
|
|
// alias: alias,
|
|
|
|
|
// "--": true
|
|
|
|
|
//});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
////backwards compatibility
|
|
|
|
|
//
|
|
|
|
|
//let shouldSplitNext = false;
|
|
|
|
|
//for (var i = 1; i < process.argv.length; i++) {
|
|
|
|
|
// const arg = process.argv[i];
|
|
|
|
|
// console.log(arg);
|
|
|
|
|
// //Turn comma-separated list into space-separated list
|
|
|
|
|
// let shouldSplit = false;
|
|
|
|
|
//
|
|
|
|
|
// if (shouldSplitNext) {
|
|
|
|
|
// shouldSplitNext = false;
|
|
|
|
|
// shouldSplit = true;
|
|
|
|
|
// } else if (arg.startsWith("--properties") || /-\w*p/.test(arg)) {
|
|
|
|
|
// shouldSplitNext = true;
|
|
|
|
|
// } else if (arg.startsWith("--properties=") || /-\w*p=/.test(arg)) {
|
|
|
|
|
// shouldSplit = true;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// if (shouldSplit) {
|
|
|
|
|
// const split = arg.split(',');
|
|
|
|
|
// process.argv.splice(i, 1, ...split);
|
|
|
|
|
// }
|
|
|
|
|
//}
|
|
|
|
|
//console.log("done");
|
|
|
|
|
//console.log(process.argv);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
//Parsing arguments
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
const Properties = {
|
|
|
|
|
htmlTitle: "html-title",
|
|
|
|
|
title: "title",
|
|
|
|
|
excerpt: "excerpt",
|
|
|
|
|
byline: "byline",
|
|
|
|
|
length: "length",
|
|
|
|
|
dir: "dir",
|
|
|
|
|
htmlContent: "html-content",
|
|
|
|
|
textContent: "text-content"
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const yargs = require("yargs");
|
|
|
|
|
|
|
|
|
|
//backwards compat with old, comma-separated values
|
|
|
|
|
function yargsCompatProperties(args) {
|
|
|
|
|
if (args["properties"]) {
|
|
|
|
|
for (var i = 0; i < args["properties"].length; i++) {
|
|
|
|
|
const property = args["properties"][i];
|
|
|
|
|
console.error(property);
|
|
|
|
|
if (property.indexOf(',') > -1) {
|
|
|
|
|
const split = args["properties"][i].split(',');
|
|
|
|
|
args["properties"].splice(i, 1, ...split);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (!Object.values(Properties).includes(property)) {
|
|
|
|
|
args["properties"].splice(i, 1);
|
|
|
|
|
i--;
|
|
|
|
|
if (!args["--"])
|
|
|
|
|
args["--"] = [ property ];
|
|
|
|
|
else
|
|
|
|
|
args["--"].push(property);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const stringArgParams = ['_', "--", "low-confidence", "output", "properties", "url"];
|
|
|
|
|
const boolArgParams = ["quiet", "help", "version", "is-url"];
|
|
|
|
|
const alias = {
|
|
|
|
|
"output": 'o',
|
|
|
|
|
"properties": 'p',
|
|
|
|
|
"version": 'V',
|
|
|
|
|
"url": 'u',
|
|
|
|
|
"is-url": 'U',
|
|
|
|
|
"quiet": 'q',
|
|
|
|
|
"low-confidence": 'l',
|
|
|
|
|
"help": 'h'
|
|
|
|
|
//Positional arguments sometimes don't get recognized when they're put
|
|
|
|
|
//after other arguments, I think it's an oversight in yargs.
|
|
|
|
|
function yargsFixPositional(args) {
|
|
|
|
|
if (args["-"]) {
|
|
|
|
|
if (args["source"])
|
|
|
|
|
args["source"] = args["-"];
|
|
|
|
|
else
|
|
|
|
|
args["source"].push(...args["-"]);
|
|
|
|
|
}
|
|
|
|
|
if (args["--"]) {
|
|
|
|
|
if (args["source"])
|
|
|
|
|
args["source"] = args["--"];
|
|
|
|
|
else
|
|
|
|
|
args["source"].push(...args["--"]);
|
|
|
|
|
delete args["--"];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let args = parseArgs(process.argv.slice(2), {
|
|
|
|
|
string: stringArgParams,
|
|
|
|
|
boolean: boolArgParams,
|
|
|
|
|
default: {
|
|
|
|
|
"low-confidence": "no-op",
|
|
|
|
|
"quiet": false
|
|
|
|
|
},
|
|
|
|
|
alias: alias,
|
|
|
|
|
"--": true
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Minimist's parseArgs accepts a function for handling unknown parameters,
|
|
|
|
|
//but it works in a stupid way, so I'm writing my own.
|
|
|
|
|
|
|
|
|
|
for (var key of Object.keys(args)) {
|
|
|
|
|
if (!stringArgParams.includes(key) && !boolArgParams.includes(key) &&
|
|
|
|
|
!Object.values(alias).includes(key)) {
|
|
|
|
|
console.error(`Unknown argument: ${key}`);
|
|
|
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
let args = yargs
|
|
|
|
|
.version(false)
|
|
|
|
|
.parserConfiguration({
|
|
|
|
|
"camel-case-expansion": false
|
|
|
|
|
})
|
|
|
|
|
.command("* [source]", "Process HTML input", (yargs) => {
|
|
|
|
|
yargs.positional("source", {
|
|
|
|
|
desc: "A file, an http(s) URL, or '-' for standard input",
|
|
|
|
|
type: "string"
|
|
|
|
|
});
|
|
|
|
|
})
|
|
|
|
|
.middleware([ yargsCompatProperties, yargsFixPositional ], true) //middleware seems to be buggy
|
|
|
|
|
.option('c', {
|
|
|
|
|
alias: "completion"
|
|
|
|
|
})
|
|
|
|
|
.option('V', {
|
|
|
|
|
alias: "version",
|
|
|
|
|
type: "boolean",
|
|
|
|
|
desc: "Print version"
|
|
|
|
|
})
|
|
|
|
|
.option('h', {
|
|
|
|
|
alias: "help",
|
|
|
|
|
desc: "Show help"
|
|
|
|
|
})
|
|
|
|
|
.option('o', {
|
|
|
|
|
alias: "output",
|
|
|
|
|
type: "string",
|
|
|
|
|
desc: "The file to which the result should be output"
|
|
|
|
|
})
|
|
|
|
|
.option('l', {
|
|
|
|
|
alias: "low-confidence",
|
|
|
|
|
type: "string",
|
|
|
|
|
desc: "What to do if Readability.js is uncertain about what the core content actually is",
|
|
|
|
|
choices: ["no-op", "force", "exit"],
|
|
|
|
|
default: "no-op"
|
|
|
|
|
})
|
|
|
|
|
.option('p', {
|
|
|
|
|
alias: "properties",
|
|
|
|
|
type: "array",
|
|
|
|
|
desc: "Output specific properties of the parsed article",
|
|
|
|
|
choices: ["html-title", "title", "excerpt", "byline", "length", "dir", "html-content", "text-content"]
|
|
|
|
|
})
|
|
|
|
|
.option('q', {
|
|
|
|
|
alias: "quiet",
|
|
|
|
|
type: "boolean",
|
|
|
|
|
desc: "Don't output extra information to stderr",
|
|
|
|
|
default: false
|
|
|
|
|
})
|
|
|
|
|
.wrap(Math.min(yargs.terminalWidth(), 100))
|
|
|
|
|
.strict()
|
|
|
|
|
//.wrap(yargs.terminalWidth())
|
|
|
|
|
.parse();
|
|
|
|
|
|
|
|
|
|
} else if (stringArgParams.includes(key) && args[key] === "") {
|
|
|
|
|
console.error(`Error: no value given for --${key}`);
|
|
|
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function printUsage() {
|
|
|
|
|
yargs.showHelp();
|
|
|
|
|
}
|
|
|
|
|
if (errored) {
|
|
|
|
|
printUsage();
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (args["completion"]) {
|
|
|
|
|
yargs.showCompletionScript();
|
|
|
|
|
process.exit();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////Minimist's parseArgs accepts a function for handling unknown parameters,
|
|
|
|
|
////but it works in a stupid way, so I'm writing my own.
|
|
|
|
|
//
|
|
|
|
|
//for (var key of Object.keys(args)) {
|
|
|
|
|
// if (!stringArgParams.includes(key) && !boolArgParams.includes(key) &&
|
|
|
|
|
// !Object.values(alias).includes(key)) {
|
|
|
|
|
// console.error(`Unknown argument: ${key}`);
|
|
|
|
|
// setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
//
|
|
|
|
|
// } else if (stringArgParams.includes(key) && args[key] === "") {
|
|
|
|
|
// console.error(`Error: no value given for --${key}`);
|
|
|
|
|
// setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
//}
|
|
|
|
|
//if (errored) {
|
|
|
|
|
// printUsage();
|
|
|
|
|
// return;
|
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (args["help"]) {
|
|
|
|
|
printUsage();
|
|
|
|
|
return;
|
|
|
|
@ -137,13 +288,14 @@ if (args["help"]) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let inputArg;
|
|
|
|
|
const inputCount = args['_'].length + args['--'].length;
|
|
|
|
|
if (inputCount > 1) {
|
|
|
|
|
console.error("Too many input arguments");
|
|
|
|
|
printUsage();
|
|
|
|
|
setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
return;
|
|
|
|
|
} else if (inputCount == 0) {
|
|
|
|
|
//const inputCount = args['_'].length + args['--'].length;
|
|
|
|
|
//if (inputCount > 1) {
|
|
|
|
|
// console.error("Too many input arguments");
|
|
|
|
|
// printUsage();
|
|
|
|
|
// setErrored(ExitCodes.badUsageCLI);
|
|
|
|
|
// return;
|
|
|
|
|
//} else if (inputCount == 0) {
|
|
|
|
|
if (!args["source"]) {
|
|
|
|
|
if (process.stdin.isTTY) {
|
|
|
|
|
console.error("No input provided");
|
|
|
|
|
printUsage();
|
|
|
|
@ -153,7 +305,8 @@ if (inputCount > 1) {
|
|
|
|
|
inputArg = '-'
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
inputArg = (args['_'].length > 0) ? args['_'][0] : args['--'][0];
|
|
|
|
|
// inputArg = (args['_'].length > 0) ? args['_'][0] : args['--'][0];
|
|
|
|
|
inputArg = args["source"];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Get input parameter, remove inputArg from args
|
|
|
|
@ -177,16 +330,6 @@ const documentURL = args["url"] || inputURL;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const Properties = {
|
|
|
|
|
htmlTitle: "html-title",
|
|
|
|
|
title: "title",
|
|
|
|
|
excerpt: "excerpt",
|
|
|
|
|
byline: "byline",
|
|
|
|
|
length: "length",
|
|
|
|
|
dir: "dir",
|
|
|
|
|
htmlContent: "html-content",
|
|
|
|
|
textContent: "text-content"
|
|
|
|
|
};
|
|
|
|
|
let wantedProperties = [];
|
|
|
|
|
let justOutputHtml = false;
|
|
|
|
|
|
|
|
|
|