Rework output

Breaking changes:
* Properties are now printed as a single line (all except html-content
  and text-content)
* Properties are now printed without prefixes or localization
* Properties are now printed in the order given by the user
merge-requests/2/head
gardenapple 3 years ago
parent cdd635d560
commit 68f5a3b6c5
No known key found for this signature in database
GPG Key ID: CAF17E9ABE789268

@ -22,7 +22,7 @@ Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
const path = require("path"); const path = require("path");
// GNU gettext gives preference to LANGUAGE, but this order is consistent with Yargs: // GNU gettext gives preference to LANGUAGE above all else, but this order is consistent with Yargs:
const locale = ( const locale = (
process.env.LC_ALL || process.env.LC_ALL ||
process.env.LC_MESSAGES || process.env.LC_MESSAGES ||
@ -61,16 +61,24 @@ function setErrored(exitCode) {
//Parsing arguments //Parsing arguments
// //
const Properties = { const Properties = new Map([
htmlTitle: "html-title", ["html-title", (article, singleLine, document) =>
title: "title", `<h1>${escapeHTML(Properties.get('title')(article, singleLine, document), document)}</h1>`
excerpt: "excerpt", ],
byline: "byline", ["title", (article, singleLine) =>
length: "length", singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
dir: "dir", ],
htmlContent: "html-content", ["excerpt", (article, singleLine) =>
textContent: "text-content" singleLine ? article.excerpt.replace(/\n+/gm, ' ') : article.excerpt
}; ],
["byline", (article, singleLine) =>
singleLine ? article.byline.replace(/\n+/gm, ' ') : article.byline
],
["length", article => article.length],
["dir", article => article.dir],
["html-content", article => article.content],
["text-content", article => article.textContent]
]);
const LowConfidenceMode = { const LowConfidenceMode = {
noOp: "no-op", noOp: "no-op",
@ -89,7 +97,7 @@ function yargsCompatProperties(args) {
args["properties"].splice(i, 1, ...split); args["properties"].splice(i, 1, ...split);
continue; continue;
} }
if (!Object.values(Properties).includes(property)) { if (!Properties.has(property)) {
args["properties"].splice(i, 1); args["properties"].splice(i, 1);
i--; i--;
if (!args["--"]) if (!args["--"])
@ -124,10 +132,10 @@ let args = yargs
if (args["properties"] !== undefined) { if (args["properties"] !== undefined) {
const properties = args["properties"]; const properties = args["properties"];
let possibleProperties = []; let possibleProperties = [];
for (var possibleProperty of Object.values(Properties)) { for (const propertyName of Properties.keys()) {
if (possibleProperty.startsWith(properties[properties.length - 1]) if (propertyName.startsWith(properties[properties.length - 1])
&& !properties.includes(possibleProperty)) && !properties.includes(propertyName))
possibleProperties.push(possibleProperty); possibleProperties.push(propertyName);
} }
if (possibleProperties.length > 0) if (possibleProperties.length > 0)
done(possibleProperties); done(possibleProperties);
@ -176,7 +184,7 @@ let args = yargs
alias: 'p', alias: 'p',
type: "array", type: "array",
desc: __`Output specific properties of the parsed article`, desc: __`Output specific properties of the parsed article`,
choices: ["html-title", "title", "excerpt", "byline", "length", "dir", "html-content", "text-content"] choices: Array.from(Properties.keys())
}) })
.option("quiet", { .option("quiet", {
alias: 'q', alias: 'q',
@ -225,16 +233,16 @@ __`Default value is "no-op".\n` +
'\n' + '\n' +
'\n' + '\n' +
__`The --properties option accepts a list of values, separated by spaces. Suitable values are:\n` + __`The --properties option accepts a list of values, separated by spaces. Suitable values are:\n` +
__` html-title Outputs the article's title, wrapped in an <h1> tag.\n` + __` title The title of the article.\n` +
__` title Outputs the title in the format "Title: $TITLE".\n` + __` html-title The title of the article, wrapped in an <h1> tag.\n` +
__` excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT".\n` + __` excerpt Article description, or short excerpt from the content.\n` +
__` byline Author metadata, in the format "Author: $AUTHOR".\n` + __` byline Data about the page's author.\n` +
__` length Length of the article in characters, in the format "Length: $LENGTH".\n` + __` length Length of the article in characters.\n` +
__` dir Content direction, is either "Direction: ltr" or "Direction: rtl".\n` + __` dir Text direction, is either "ltr" for left-to-right or "rtl" for right-to-left.\n` +
__` html-content Outputs the article's main content as HTML.\n` + __` text-content Output the article's main content as plain text.\n` +
__` text-content Outputs the article's main content as plain text.\n` + __` html-content Output the article's main content as HTML.\n` +
'\n' + '\n' +
__`Text-content and Html-content are mutually exclusive, and are always printed last.\n` + __`Properties are printed line by line, in the order specified by the user. Only "text-content" and "html-content" is printed as multiple lines.\n` +
__`Default value is "html-title html-content".\n`) __`Default value is "html-title html-content".\n`)
.wrap(Math.min(yargs.terminalWidth(), 120)) .wrap(Math.min(yargs.terminalWidth(), 120))
.strict() .strict()
@ -313,12 +321,10 @@ if (args["properties"]) {
wantedProperties = args["properties"]; wantedProperties = args["properties"];
wantedPropertiesCustom = true; wantedPropertiesCustom = true;
} else { } else {
wantedProperties = [ Properties.htmlTitle, Properties.htmlContent ]; wantedProperties = [ 'html-title', 'html-content' ];
} }
if (errored) { if (errored) {
printUsage(); printUsage();
return; return;
@ -333,6 +339,9 @@ async function read(stream) {
} }
const JSDOM = require("jsdom").JSDOM;
if (inputIsFromStdin) { if (inputIsFromStdin) {
if (!args["quiet"]) { if (!args["quiet"]) {
console.error("Reading..."); console.error("Reading...");
@ -340,11 +349,9 @@ if (inputIsFromStdin) {
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`) console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
} }
read(process.stdin).then(result => { read(process.stdin).then(result => {
const JSDOM = require("jsdom").JSDOM;
onLoadDOM(new JSDOM(result, { url: documentURL })); onLoadDOM(new JSDOM(result, { url: documentURL }));
}); });
} else { } else {
const JSDOM = require("jsdom").JSDOM;
if (!args["quiet"]) if (!args["quiet"])
console.error(__`Retrieving...`); console.error(__`Retrieving...`);
let promiseGetHTML; let promiseGetHTML;
@ -359,10 +366,13 @@ if (inputIsFromStdin) {
promiseGetHTML.then(onLoadDOM, onLoadDOMError) promiseGetHTML.then(onLoadDOM, onLoadDOMError)
} }
const { Readability, isProbablyReaderable } = require("@mozilla/readability"); const { Readability, isProbablyReaderable } = require("@mozilla/readability");
//Taken from https://stackoverflow.com/a/22706073/5701177 //Taken from https://stackoverflow.com/a/22706073/5701177
function escapeHTML(string, document){ function escapeHTML(string, document) {
var p = document.createElement("p"); var p = document.createElement("p");
p.appendChild(document.createTextNode(string)); p.appendChild(document.createTextNode(string));
return p.innerHTML; return p.innerHTML;
@ -401,68 +411,36 @@ function onLoadDOM(dom) {
} }
if (shouldParseArticle) { if (!shouldParseArticle) {
if (!args["quiet"]) //Ignore wantedProperties, that should've thrown an error before
console.error(__`Processing...`); writeStream.write(document.documentElement.outerHTML);
return;
}
const reader = new Readability(document); if (!args["quiet"])
const article = reader.parse(); console.error(__`Processing...`);
if (!article) {
console.error(__`Couldn't process document.`);
setErrored(ExitCodes.dataError);
return;
}
if (outputJSON) {
let result = {};
const jsonProperties = ["title", "excerpt", "byline", "length", "dir"];
for (jsonProperty of jsonProperties) {
if (!wantedPropertiesCustom || wantedProperties.includes(jsonProperty))
result[jsonProperty] = article[jsonProperty];
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.textContent)) {
result[Properties.textContent] = article.textContent;
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlContent)) {
result[Properties.htmlContent] = article.content;
}
if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlTitle)) {
result[Properties.htmlTitle] = `<h1>${escapeHTML(article.title, document)}</h1>`
}
writeStream.write(JSON.stringify(result));
return;
}
if (wantedProperties.includes(Properties.title)) { const reader = new Readability(document);
writeStream.write(__`Title: ${article.title}\n`); const article = reader.parse();
} if (!article) {
if (wantedProperties.includes(Properties.excerpt)) { console.error(__`Couldn't process document.`);
writeStream.write(__`Excerpt: ${article.excerpt}\n`); setErrored(ExitCodes.dataError);
} return;
if (wantedProperties.includes(Properties.byline)) { }
writeStream.write(__`Author: ${article.byline}\n`); if (outputJSON) {
} let result = {};
if (wantedProperties.includes(Properties.length)) { if (wantedPropertiesCustom) {
writeStream.write(__`Length: ${article.length}\n`); for (propertyName of wantedProperties)
} result[propertyName] = Properties.get(propertyName)(article, false, document);
if (wantedProperties.includes(Properties.dir)) { } else {
if (article.dir == 'ltr') for (const [name, func] of Properties) {
writeStream.write(__`Direction: ltr\n`); result[name] = func(article, false, document);
else if (article.dir == 'rtl') }
writeStream.write(__`Direction: rtl\n`);
else
writeStream.write(__`Direction: ${article.dir}\n`);
}
if (wantedProperties.includes(Properties.htmlTitle)) {
writeStream.write(`<h1>${escapeHTML(article.title, document)}</h1>\n`);
}
if (wantedProperties.includes(Properties.htmlContent)) {
writeStream.write(article.content);
} else if (wantedProperties.includes(Properties.textContent)) {
writeStream.write(article.textContent);
} }
writeStream.write(JSON.stringify(result));
} else { } else {
//Ignore wantedProperties, that should've thrown an error before for (propertyName of wantedProperties)
writeStream.write(document.documentElement.outerHTML); writeStream.write(Properties.get(propertyName)(article, true, document) + '\n');
} }
} }

@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit When unsure, exit with an error.\n", " exit When unsure, exit with an error.\n": " exit When unsure, exit with an error.\n",
"Default value is \"no-op\".\n": "Default value is \"no-op\".\n", "Default value is \"no-op\".\n": "Default value is \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n", "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n",
" html-title Outputs the article's title, wrapped in an <h1> tag.\n": " html-title Outputs the article's title, wrapped in an <h1> tag.\n", " title The title of the article.\n": " title The title of the article.\n",
" title Outputs the title in the format \"Title: $TITLE\".\n": " title Outputs the title in the format \"Title: $TITLE\".\n", " html-title The title of the article, wrapped in an <h1> tag.\n": " html-title The title of the article, wrapped in an <h1> tag.\n",
" excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n", " excerpt Article description, or short excerpt from the content.\n": " excerpt Article description, or short excerpt from the content.\n",
" byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Author metadata, in the format \"Author: $AUTHOR\"\n", " byline Data about the page's author.\n": " byline Data about the page's author.\n",
" length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Length of the article in characters, in the format \"Length: $LENGTH\"\n", " length Length of the article in characters.\n": " length Length of the article in characters.\n",
" dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n", " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n",
" html-content Outputs the article's main content as HTML.\n": " html-content Outputs the article's main content as HTML.\n", " text-content Output the article's main content as plain text.\n": " text-content Output the article's main content as plain text.\n",
" text-content Outputs the article's main content as plain text.\n": " text-content Outputs the article's main content as plain text.\n", " html-content Output the article's main content as HTML.\n": " html-content Output the article's main content as HTML.\n",
"Text-content and Html-content are mutually exclusive, and are always printed last.\n": "Text-content and Html-content are mutually exclusive, and are always printed last.\n", "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n",
"Default value is \"html-title html-content\".\n": "Default value is \"html-title html-content\".\n", "Default value is \"html-title html-content\".\n": "Default value is \"html-title html-content\".\n",
"No input provided": "No input provided.", "No input provided": "No input provided.",
"Note: --is-url option is deprecated.": "Note: --is-url option is deprecated.", "Note: --is-url option is deprecated.": "Note: --is-url option is deprecated.",
@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Not sure if this document should be processed, exiting", "Not sure if this document should be processed, exiting": "Not sure if this document should be processed, exiting",
"Can't output properties": "Can't output properties", "Can't output properties": "Can't output properties",
"Couldn't process document.": "Couldn't process document.", "Couldn't process document.": "Couldn't process document.",
"Title: %s\n": "Title: %s\n", "Host not found: '%s'": "Host not found: '%s'"
"Excerpt: %s\n": "Excerpt: %s\n",
"Author: %s\n": "Author: %s\n",
"Length: %s\n": "Length: %s\n",
"Host not found: '%s'": "Host not found: '%s'",
"Direction: %s\n": "Direction: %s\n",
"Direction: ltr\n": "Direction: ltr\n",
"Direction: rtl\n": "Direction: rtl\n"
} }

@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit В случае неуверенности, выдать ошибку и выйти.\n", " exit When unsure, exit with an error.\n": " exit В случае неуверенности, выдать ошибку и выйти.\n",
"Default value is \"no-op\".\n": "По умолчанию выполняется \"no-op\".\n", "Default value is \"no-op\".\n": "По умолчанию выполняется \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "Параметр --properties принимает список значений (разделённых пробелами):\n", "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "Параметр --properties принимает список значений (разделённых пробелами):\n",
" html-title Outputs the article's title, wrapped in an <h1> tag.\n": " html-title Вывести заголовок статьи в тэге <h1>.\n", " title The title of the article.\n": " title Заголовок статьи.\n",
" title Outputs the title in the format \"Title: $TITLE\".\n": " title Вывести заголовок статьи в формате \"Заголовок: $ЗАГОЛОВОК\".\n", " html-title The title of the article, wrapped in an <h1> tag.\n": " html-title Заголовок статьи в тэге <h1>.\n",
" excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Краткий урывок из статьи, в формате \"Урывок: $УРЫВОК\".\n", " excerpt Article description, or short excerpt from the content.\n": " excerpt Описание контента, или краткий урывок из статьи.\n",
" byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Данные об авторе статьи, в формате \"Автор: $AUTHOR\".\n", " byline Data about the page's author.\n": " byline Данные об авторе статьи.\n",
" length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Длинна статьи (количество символов) в формате \"Длинна: $ДЛИННА\".\n", " length Length of the article in characters.\n": " length Длинна статьи (количество символов).\n",
" dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Направление текста: либо \"Направление: слева направо\", либо \"Направление: справа налево\".\n", " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Направление текста: \"ltr\" если слева направо\", \"rtl\" если справа налево.\n",
" html-content Outputs the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n", " text-content Output the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n",
" text-content Outputs the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n", " html-content Output the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n",
"Text-content and Html-content are mutually exclusive, and are always printed last.\n": "text-content и html-content исключают друг друга, и всегда выводятся в конце.\n", "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Эти данные выводятся в том порядке, который указал пользователь. Всё, кроме \"text-content\" и \"html-content\", выводится как одна строка.\n",
"Default value is \"html-title html-content\".\n": "По умолчанию \"html-title html-content\".\n", "Default value is \"html-title html-content\".\n": "По умолчанию \"html-title html-content\".\n",
"No input provided": "Данные не введены.", "No input provided": "Данные не введены.",
"Note: --is-url option is deprecated.": "Внимание: параметр --is-url устаревший.", "Note: --is-url option is deprecated.": "Внимание: параметр --is-url устаревший.",
@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Не уверен, стоит ли обрабатывать этот документ - выхожу", "Not sure if this document should be processed, exiting": "Не уверен, стоит ли обрабатывать этот документ - выхожу",
"Can't output properties": "Не могу определить характеристики", "Can't output properties": "Не могу определить характеристики",
"Couldn't process document.": "Не удалось обработать документ.", "Couldn't process document.": "Не удалось обработать документ.",
"Title: %s\n": "Заголовок: %s\n", "Host not found: '%s'": "Сервер не найден: '%s'"
"Excerpt: %s\n": "Урывок: %s\n",
"Author: %s\n": "Автор: %s\n",
"Length: %s\n": "Длинна: %s\n",
"Host not found: '%s'": "Сервер не найден: '%s'",
"Direction: %s\n": "Направление: %s\n",
"Direction: ltr\n": "Направление: слева направо\n",
"Direction: rtl\n": "Направление: справа налево\n"
} }

Loading…
Cancel
Save