Rework output

Breaking changes:
* Properties are now printed as a single line (all except html-content
  and text-content)
* Properties are now printed without prefixes or localization
* Properties are now printed in the order given by the user
merge-requests/2/head
gardenapple 3 years ago
parent cdd635d560
commit 68f5a3b6c5
No known key found for this signature in database
GPG Key ID: CAF17E9ABE789268

@ -22,7 +22,7 @@ Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
const path = require("path");
// GNU gettext gives preference to LANGUAGE, but this order is consistent with Yargs:
// GNU gettext gives preference to LANGUAGE above all else, but this order is consistent with Yargs:
const locale = (
process.env.LC_ALL ||
process.env.LC_MESSAGES ||
@ -61,16 +61,24 @@ function setErrored(exitCode) {
//Parsing arguments
//
const Properties = {
htmlTitle: "html-title",
title: "title",
excerpt: "excerpt",
byline: "byline",
length: "length",
dir: "dir",
htmlContent: "html-content",
textContent: "text-content"
};
const Properties = new Map([
["html-title", (article, singleLine, document) =>
`<h1>${escapeHTML(Properties.get('title')(article, singleLine, document), document)}</h1>`
],
["title", (article, singleLine) =>
singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
],
["excerpt", (article, singleLine) =>
singleLine ? article.excerpt.replace(/\n+/gm, ' ') : article.excerpt
],
["byline", (article, singleLine) =>
singleLine ? article.byline.replace(/\n+/gm, ' ') : article.byline
],
["length", article => article.length],
["dir", article => article.dir],
["html-content", article => article.content],
["text-content", article => article.textContent]
]);
const LowConfidenceMode = {
noOp: "no-op",
@ -89,7 +97,7 @@ function yargsCompatProperties(args) {
args["properties"].splice(i, 1, ...split);
continue;
}
if (!Object.values(Properties).includes(property)) {
if (!Properties.has(property)) {
args["properties"].splice(i, 1);
i--;
if (!args["--"])
@ -124,10 +132,10 @@ let args = yargs
if (args["properties"] !== undefined) {
const properties = args["properties"];
let possibleProperties = [];
for (var possibleProperty of Object.values(Properties)) {
if (possibleProperty.startsWith(properties[properties.length - 1])
&& !properties.includes(possibleProperty))
possibleProperties.push(possibleProperty);
for (const propertyName of Properties.keys()) {
if (propertyName.startsWith(properties[properties.length - 1])
&& !properties.includes(propertyName))
possibleProperties.push(propertyName);
}
if (possibleProperties.length > 0)
done(possibleProperties);
@ -176,7 +184,7 @@ let args = yargs
alias: 'p',
type: "array",
desc: __`Output specific properties of the parsed article`,
choices: ["html-title", "title", "excerpt", "byline", "length", "dir", "html-content", "text-content"]
choices: Array.from(Properties.keys())
})
.option("quiet", {
alias: 'q',
@ -225,16 +233,16 @@ __`Default value is "no-op".\n` +
'\n' +
'\n' +
__`The --properties option accepts a list of values, separated by spaces. Suitable values are:\n` +
__` html-title Outputs the article's title, wrapped in an <h1> tag.\n` +
__` title Outputs the title in the format "Title: $TITLE".\n` +
__` excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT".\n` +
__` byline Author metadata, in the format "Author: $AUTHOR".\n` +
__` length Length of the article in characters, in the format "Length: $LENGTH".\n` +
__` dir Content direction, is either "Direction: ltr" or "Direction: rtl".\n` +
__` html-content Outputs the article's main content as HTML.\n` +
__` text-content Outputs the article's main content as plain text.\n` +
__` title The title of the article.\n` +
__` html-title The title of the article, wrapped in an <h1> tag.\n` +
__` excerpt Article description, or short excerpt from the content.\n` +
__` byline Data about the page's author.\n` +
__` length Length of the article in characters.\n` +
__` dir Text direction, is either "ltr" for left-to-right or "rtl" for right-to-left.\n` +
__` text-content Output the article's main content as plain text.\n` +
__` html-content Output the article's main content as HTML.\n` +
'\n' +
__`Text-content and Html-content are mutually exclusive, and are always printed last.\n` +
__`Properties are printed line by line, in the order specified by the user. Only "text-content" and "html-content" is printed as multiple lines.\n` +
__`Default value is "html-title html-content".\n`)
.wrap(Math.min(yargs.terminalWidth(), 120))
.strict()
@ -313,12 +321,10 @@ if (args["properties"]) {
wantedProperties = args["properties"];
wantedPropertiesCustom = true;
} else {
wantedProperties = [ Properties.htmlTitle, Properties.htmlContent ];
wantedProperties = [ 'html-title', 'html-content' ];
}
if (errored) {
printUsage();
return;
@ -333,6 +339,9 @@ async function read(stream) {
}
const JSDOM = require("jsdom").JSDOM;
if (inputIsFromStdin) {
if (!args["quiet"]) {
console.error("Reading...");
@ -340,11 +349,9 @@ if (inputIsFromStdin) {
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
}
read(process.stdin).then(result => {
const JSDOM = require("jsdom").JSDOM;
onLoadDOM(new JSDOM(result, { url: documentURL }));
});
} else {
const JSDOM = require("jsdom").JSDOM;
if (!args["quiet"])
console.error(__`Retrieving...`);
let promiseGetHTML;
@ -359,10 +366,13 @@ if (inputIsFromStdin) {
promiseGetHTML.then(onLoadDOM, onLoadDOMError)
}
const { Readability, isProbablyReaderable } = require("@mozilla/readability");
//Taken from https://stackoverflow.com/a/22706073/5701177
function escapeHTML(string, document){
function escapeHTML(string, document) {
var p = document.createElement("p");
p.appendChild(document.createTextNode(string));
return p.innerHTML;
@ -401,68 +411,36 @@ function onLoadDOM(dom) {
}
if (shouldParseArticle) {
if (!args["quiet"])
console.error(__`Processing...`);
if (!shouldParseArticle) {
//Ignore wantedProperties, that should've thrown an error before
writeStream.write(document.documentElement.outerHTML);
return;
}
const reader = new Readability(document);
const article = reader.parse();
if (!article) {
console.error(__`Couldn't process document.`);
setErrored(ExitCodes.dataError);
return;
}
if (outputJSON) {
let result = {};
const jsonProperties = ["title", "excerpt", "byline", "length", "dir"];
for (jsonProperty of jsonProperties) {
if (!wantedPropertiesCustom || wantedProperties.includes(jsonProperty))
result[jsonProperty] = article[jsonProperty];
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.textContent)) {
result[Properties.textContent] = article.textContent;
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlContent)) {
result[Properties.htmlContent] = article.content;
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlTitle)) {
result[Properties.htmlTitle] = `<h1>${escapeHTML(article.title, document)}</h1>`
}
writeStream.write(JSON.stringify(result));
return;
}
if (!args["quiet"])
console.error(__`Processing...`);
if (wantedProperties.includes(Properties.title)) {
writeStream.write(__`Title: ${article.title}\n`);
}
if (wantedProperties.includes(Properties.excerpt)) {
writeStream.write(__`Excerpt: ${article.excerpt}\n`);
}
if (wantedProperties.includes(Properties.byline)) {
writeStream.write(__`Author: ${article.byline}\n`);
}
if (wantedProperties.includes(Properties.length)) {
writeStream.write(__`Length: ${article.length}\n`);
}
if (wantedProperties.includes(Properties.dir)) {
if (article.dir == 'ltr')
writeStream.write(__`Direction: ltr\n`);
else if (article.dir == 'rtl')
writeStream.write(__`Direction: rtl\n`);
else
writeStream.write(__`Direction: ${article.dir}\n`);
}
if (wantedProperties.includes(Properties.htmlTitle)) {
writeStream.write(`<h1>${escapeHTML(article.title, document)}</h1>\n`);
}
if (wantedProperties.includes(Properties.htmlContent)) {
writeStream.write(article.content);
} else if (wantedProperties.includes(Properties.textContent)) {
writeStream.write(article.textContent);
const reader = new Readability(document);
const article = reader.parse();
if (!article) {
console.error(__`Couldn't process document.`);
setErrored(ExitCodes.dataError);
return;
}
if (outputJSON) {
let result = {};
if (wantedPropertiesCustom) {
for (propertyName of wantedProperties)
result[propertyName] = Properties.get(propertyName)(article, false, document);
} else {
for (const [name, func] of Properties) {
result[name] = func(article, false, document);
}
}
writeStream.write(JSON.stringify(result));
} else {
//Ignore wantedProperties, that should've thrown an error before
writeStream.write(document.documentElement.outerHTML);
for (propertyName of wantedProperties)
writeStream.write(Properties.get(propertyName)(article, true, document) + '\n');
}
}

@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit When unsure, exit with an error.\n",
"Default value is \"no-op\".\n": "Default value is \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n",
" html-title Outputs the article's title, wrapped in an <h1> tag.\n": " html-title Outputs the article's title, wrapped in an <h1> tag.\n",
" title Outputs the title in the format \"Title: $TITLE\".\n": " title Outputs the title in the format \"Title: $TITLE\".\n",
" excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n",
" byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Author metadata, in the format \"Author: $AUTHOR\"\n",
" length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Length of the article in characters, in the format \"Length: $LENGTH\"\n",
" dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n",
" html-content Outputs the article's main content as HTML.\n": " html-content Outputs the article's main content as HTML.\n",
" text-content Outputs the article's main content as plain text.\n": " text-content Outputs the article's main content as plain text.\n",
"Text-content and Html-content are mutually exclusive, and are always printed last.\n": "Text-content and Html-content are mutually exclusive, and are always printed last.\n",
" title The title of the article.\n": " title The title of the article.\n",
" html-title The title of the article, wrapped in an <h1> tag.\n": " html-title The title of the article, wrapped in an <h1> tag.\n",
" excerpt Article description, or short excerpt from the content.\n": " excerpt Article description, or short excerpt from the content.\n",
" byline Data about the page's author.\n": " byline Data about the page's author.\n",
" length Length of the article in characters.\n": " length Length of the article in characters.\n",
" dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n",
" text-content Output the article's main content as plain text.\n": " text-content Output the article's main content as plain text.\n",
" html-content Output the article's main content as HTML.\n": " html-content Output the article's main content as HTML.\n",
"Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n",
"Default value is \"html-title html-content\".\n": "Default value is \"html-title html-content\".\n",
"No input provided": "No input provided.",
"Note: --is-url option is deprecated.": "Note: --is-url option is deprecated.",
@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Not sure if this document should be processed, exiting",
"Can't output properties": "Can't output properties",
"Couldn't process document.": "Couldn't process document.",
"Title: %s\n": "Title: %s\n",
"Excerpt: %s\n": "Excerpt: %s\n",
"Author: %s\n": "Author: %s\n",
"Length: %s\n": "Length: %s\n",
"Host not found: '%s'": "Host not found: '%s'",
"Direction: %s\n": "Direction: %s\n",
"Direction: ltr\n": "Direction: ltr\n",
"Direction: rtl\n": "Direction: rtl\n"
"Host not found: '%s'": "Host not found: '%s'"
}

@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit В случае неуверенности, выдать ошибку и выйти.\n",
"Default value is \"no-op\".\n": "По умолчанию выполняется \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "Параметр --properties принимает список значений (разделённых пробелами):\n",
" html-title Outputs the article's title, wrapped in an <h1> tag.\n": " html-title Вывести заголовок статьи в тэге <h1>.\n",
" title Outputs the title in the format \"Title: $TITLE\".\n": " title Вывести заголовок статьи в формате \"Заголовок: $ЗАГОЛОВОК\".\n",
" excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Краткий урывок из статьи, в формате \"Урывок: $УРЫВОК\".\n",
" byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Данные об авторе статьи, в формате \"Автор: $AUTHOR\".\n",
" length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Длинна статьи (количество символов) в формате \"Длинна: $ДЛИННА\".\n",
" dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Направление текста: либо \"Направление: слева направо\", либо \"Направление: справа налево\".\n",
" html-content Outputs the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n",
" text-content Outputs the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n",
"Text-content and Html-content are mutually exclusive, and are always printed last.\n": "text-content и html-content исключают друг друга, и всегда выводятся в конце.\n",
" title The title of the article.\n": " title Заголовок статьи.\n",
" html-title The title of the article, wrapped in an <h1> tag.\n": " html-title Заголовок статьи в тэге <h1>.\n",
" excerpt Article description, or short excerpt from the content.\n": " excerpt Описание контента, или краткий урывок из статьи.\n",
" byline Data about the page's author.\n": " byline Данные об авторе статьи.\n",
" length Length of the article in characters.\n": " length Длинна статьи (количество символов).\n",
" dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Направление текста: \"ltr\" если слева направо\", \"rtl\" если справа налево.\n",
" text-content Output the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n",
" html-content Output the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n",
"Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Эти данные выводятся в том порядке, который указал пользователь. Всё, кроме \"text-content\" и \"html-content\", выводится как одна строка.\n",
"Default value is \"html-title html-content\".\n": "По умолчанию \"html-title html-content\".\n",
"No input provided": "Данные не введены.",
"Note: --is-url option is deprecated.": "Внимание: параметр --is-url устаревший.",
@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Не уверен, стоит ли обрабатывать этот документ - выхожу",
"Can't output properties": "Не могу определить характеристики",
"Couldn't process document.": "Не удалось обработать документ.",
"Title: %s\n": "Заголовок: %s\n",
"Excerpt: %s\n": "Урывок: %s\n",
"Author: %s\n": "Автор: %s\n",
"Length: %s\n": "Длинна: %s\n",
"Host not found: '%s'": "Сервер не найден: '%s'",
"Direction: %s\n": "Направление: %s\n",
"Direction: ltr\n": "Направление: слева направо\n",
"Direction: rtl\n": "Направление: справа налево\n"
"Host not found: '%s'": "Сервер не найден: '%s'"
}

Loading…
Cancel
Save