diff --git a/index.js b/index.js
index f35ca7f..74fc312 100755
--- a/index.js
+++ b/index.js
@@ -22,7 +22,7 @@ Firefox Reader Mode in your terminal! CLI tool for Mozilla's Readability library
const path = require("path");
-// GNU gettext gives preference to LANGUAGE, but this order is consistent with Yargs:
+// GNU gettext gives preference to LANGUAGE above all else, but this order is consistent with Yargs:
const locale = (
process.env.LC_ALL ||
process.env.LC_MESSAGES ||
@@ -61,16 +61,24 @@ function setErrored(exitCode) {
//Parsing arguments
//
-const Properties = {
- htmlTitle: "html-title",
- title: "title",
- excerpt: "excerpt",
- byline: "byline",
- length: "length",
- dir: "dir",
- htmlContent: "html-content",
- textContent: "text-content"
-};
+const Properties = new Map([
+ ["html-title", (article, singleLine, document) =>
+ `
${escapeHTML(Properties.get('title')(article, singleLine, document), document)}
`
+ ],
+ ["title", (article, singleLine) =>
+ singleLine ? article.title.replace(/\n+/gm, ' ') : article.title
+ ],
+ ["excerpt", (article, singleLine) =>
+ singleLine ? article.excerpt.replace(/\n+/gm, ' ') : article.excerpt
+ ],
+ ["byline", (article, singleLine) =>
+ singleLine ? article.byline.replace(/\n+/gm, ' ') : article.byline
+ ],
+ ["length", article => article.length],
+ ["dir", article => article.dir],
+ ["html-content", article => article.content],
+ ["text-content", article => article.textContent]
+]);
const LowConfidenceMode = {
noOp: "no-op",
@@ -89,7 +97,7 @@ function yargsCompatProperties(args) {
args["properties"].splice(i, 1, ...split);
continue;
}
- if (!Object.values(Properties).includes(property)) {
+ if (!Properties.has(property)) {
args["properties"].splice(i, 1);
i--;
if (!args["--"])
@@ -124,10 +132,10 @@ let args = yargs
if (args["properties"] !== undefined) {
const properties = args["properties"];
let possibleProperties = [];
- for (var possibleProperty of Object.values(Properties)) {
- if (possibleProperty.startsWith(properties[properties.length - 1])
- && !properties.includes(possibleProperty))
- possibleProperties.push(possibleProperty);
+ for (const propertyName of Properties.keys()) {
+ if (propertyName.startsWith(properties[properties.length - 1])
+ && !properties.includes(propertyName))
+ possibleProperties.push(propertyName);
}
if (possibleProperties.length > 0)
done(possibleProperties);
@@ -176,7 +184,7 @@ let args = yargs
alias: 'p',
type: "array",
desc: __`Output specific properties of the parsed article`,
- choices: ["html-title", "title", "excerpt", "byline", "length", "dir", "html-content", "text-content"]
+ choices: Array.from(Properties.keys())
})
.option("quiet", {
alias: 'q',
@@ -225,16 +233,16 @@ __`Default value is "no-op".\n` +
'\n' +
'\n' +
__`The --properties option accepts a list of values, separated by spaces. Suitable values are:\n` +
-__` html-title Outputs the article's title, wrapped in an tag.\n` +
-__` title Outputs the title in the format "Title: $TITLE".\n` +
-__` excerpt Article description, or short excerpt from the content, in the format "Excerpt: $EXCERPT".\n` +
-__` byline Author metadata, in the format "Author: $AUTHOR".\n` +
-__` length Length of the article in characters, in the format "Length: $LENGTH".\n` +
-__` dir Content direction, is either "Direction: ltr" or "Direction: rtl".\n` +
-__` html-content Outputs the article's main content as HTML.\n` +
-__` text-content Outputs the article's main content as plain text.\n` +
+__` title The title of the article.\n` +
+__` html-title The title of the article, wrapped in an tag.\n` +
+__` excerpt Article description, or short excerpt from the content.\n` +
+__` byline Data about the page's author.\n` +
+__` length Length of the article in characters.\n` +
+__` dir Text direction, is either "ltr" for left-to-right or "rtl" for right-to-left.\n` +
+__` text-content Output the article's main content as plain text.\n` +
+__` html-content Output the article's main content as HTML.\n` +
'\n' +
-__`Text-content and Html-content are mutually exclusive, and are always printed last.\n` +
+__`Properties are printed line by line, in the order specified by the user. Only "text-content" and "html-content" is printed as multiple lines.\n` +
__`Default value is "html-title html-content".\n`)
.wrap(Math.min(yargs.terminalWidth(), 120))
.strict()
@@ -313,12 +321,10 @@ if (args["properties"]) {
wantedProperties = args["properties"];
wantedPropertiesCustom = true;
} else {
- wantedProperties = [ Properties.htmlTitle, Properties.htmlContent ];
+ wantedProperties = [ 'html-title', 'html-content' ];
}
-
-
if (errored) {
printUsage();
return;
@@ -333,6 +339,9 @@ async function read(stream) {
}
+
+const JSDOM = require("jsdom").JSDOM;
+
if (inputIsFromStdin) {
if (!args["quiet"]) {
console.error("Reading...");
@@ -340,11 +349,9 @@ if (inputIsFromStdin) {
console.error(__`Warning: piping input with unknown URL. This means that relative links will be broken. Supply the --base parameter to fix.`)
}
read(process.stdin).then(result => {
- const JSDOM = require("jsdom").JSDOM;
onLoadDOM(new JSDOM(result, { url: documentURL }));
});
} else {
- const JSDOM = require("jsdom").JSDOM;
if (!args["quiet"])
console.error(__`Retrieving...`);
let promiseGetHTML;
@@ -359,10 +366,13 @@ if (inputIsFromStdin) {
promiseGetHTML.then(onLoadDOM, onLoadDOMError)
}
+
+
+
const { Readability, isProbablyReaderable } = require("@mozilla/readability");
//Taken from https://stackoverflow.com/a/22706073/5701177
-function escapeHTML(string, document){
+function escapeHTML(string, document) {
var p = document.createElement("p");
p.appendChild(document.createTextNode(string));
return p.innerHTML;
@@ -401,68 +411,36 @@ function onLoadDOM(dom) {
}
- if (shouldParseArticle) {
- if (!args["quiet"])
- console.error(__`Processing...`);
+ if (!shouldParseArticle) {
+ //Ignore wantedProperties, that should've thrown an error before
+ writeStream.write(document.documentElement.outerHTML);
+ return;
+ }
- const reader = new Readability(document);
- const article = reader.parse();
- if (!article) {
- console.error(__`Couldn't process document.`);
- setErrored(ExitCodes.dataError);
- return;
- }
- if (outputJSON) {
- let result = {};
- const jsonProperties = ["title", "excerpt", "byline", "length", "dir"];
- for (jsonProperty of jsonProperties) {
- if (!wantedPropertiesCustom || wantedProperties.includes(jsonProperty))
- result[jsonProperty] = article[jsonProperty];
- }
- if (!wantedPropertiesCustom || wantedProperties.includes(Properties.textContent)) {
- result[Properties.textContent] = article.textContent;
- }
- if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlContent)) {
- result[Properties.htmlContent] = article.content;
- }
- if (!wantedPropertiesCustom || wantedProperties.includes(Properties.htmlTitle)) {
- result[Properties.htmlTitle] = `${escapeHTML(article.title, document)}
`
- }
- writeStream.write(JSON.stringify(result));
- return;
- }
+ if (!args["quiet"])
+ console.error(__`Processing...`);
- if (wantedProperties.includes(Properties.title)) {
- writeStream.write(__`Title: ${article.title}\n`);
- }
- if (wantedProperties.includes(Properties.excerpt)) {
- writeStream.write(__`Excerpt: ${article.excerpt}\n`);
- }
- if (wantedProperties.includes(Properties.byline)) {
- writeStream.write(__`Author: ${article.byline}\n`);
- }
- if (wantedProperties.includes(Properties.length)) {
- writeStream.write(__`Length: ${article.length}\n`);
- }
- if (wantedProperties.includes(Properties.dir)) {
- if (article.dir == 'ltr')
- writeStream.write(__`Direction: ltr\n`);
- else if (article.dir == 'rtl')
- writeStream.write(__`Direction: rtl\n`);
- else
- writeStream.write(__`Direction: ${article.dir}\n`);
- }
- if (wantedProperties.includes(Properties.htmlTitle)) {
- writeStream.write(`${escapeHTML(article.title, document)}
\n`);
- }
- if (wantedProperties.includes(Properties.htmlContent)) {
- writeStream.write(article.content);
- } else if (wantedProperties.includes(Properties.textContent)) {
- writeStream.write(article.textContent);
+ const reader = new Readability(document);
+ const article = reader.parse();
+ if (!article) {
+ console.error(__`Couldn't process document.`);
+ setErrored(ExitCodes.dataError);
+ return;
+ }
+ if (outputJSON) {
+ let result = {};
+ if (wantedPropertiesCustom) {
+ for (propertyName of wantedProperties)
+ result[propertyName] = Properties.get(propertyName)(article, false, document);
+ } else {
+ for (const [name, func] of Properties) {
+ result[name] = func(article, false, document);
+ }
}
+ writeStream.write(JSON.stringify(result));
} else {
- //Ignore wantedProperties, that should've thrown an error before
- writeStream.write(document.documentElement.outerHTML);
+ for (propertyName of wantedProperties)
+ writeStream.write(Properties.get(propertyName)(article, true, document) + '\n');
}
}
diff --git a/locales/en.json b/locales/en.json
index 832f824..a935c35 100644
--- a/locales/en.json
+++ b/locales/en.json
@@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit When unsure, exit with an error.\n",
"Default value is \"no-op\".\n": "Default value is \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "The --properties option accepts a list of values, separated by spaces. Suitable values are:\n",
- " html-title Outputs the article's title, wrapped in an tag.\n": " html-title Outputs the article's title, wrapped in an tag.\n",
- " title Outputs the title in the format \"Title: $TITLE\".\n": " title Outputs the title in the format \"Title: $TITLE\".\n",
- " excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n",
- " byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Author metadata, in the format \"Author: $AUTHOR\"\n",
- " length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Length of the article in characters, in the format \"Length: $LENGTH\"\n",
- " dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n",
- " html-content Outputs the article's main content as HTML.\n": " html-content Outputs the article's main content as HTML.\n",
- " text-content Outputs the article's main content as plain text.\n": " text-content Outputs the article's main content as plain text.\n",
- "Text-content and Html-content are mutually exclusive, and are always printed last.\n": "Text-content and Html-content are mutually exclusive, and are always printed last.\n",
+ " title The title of the article.\n": " title The title of the article.\n",
+ " html-title The title of the article, wrapped in an tag.\n": " html-title The title of the article, wrapped in an tag.\n",
+ " excerpt Article description, or short excerpt from the content.\n": " excerpt Article description, or short excerpt from the content.\n",
+ " byline Data about the page's author.\n": " byline Data about the page's author.\n",
+ " length Length of the article in characters.\n": " length Length of the article in characters.\n",
+ " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n",
+ " text-content Output the article's main content as plain text.\n": " text-content Output the article's main content as plain text.\n",
+ " html-content Output the article's main content as HTML.\n": " html-content Output the article's main content as HTML.\n",
+ "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n",
"Default value is \"html-title html-content\".\n": "Default value is \"html-title html-content\".\n",
"No input provided": "No input provided.",
"Note: --is-url option is deprecated.": "Note: --is-url option is deprecated.",
@@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Not sure if this document should be processed, exiting",
"Can't output properties": "Can't output properties",
"Couldn't process document.": "Couldn't process document.",
- "Title: %s\n": "Title: %s\n",
- "Excerpt: %s\n": "Excerpt: %s\n",
- "Author: %s\n": "Author: %s\n",
- "Length: %s\n": "Length: %s\n",
- "Host not found: '%s'": "Host not found: '%s'",
- "Direction: %s\n": "Direction: %s\n",
- "Direction: ltr\n": "Direction: ltr\n",
- "Direction: rtl\n": "Direction: rtl\n"
+ "Host not found: '%s'": "Host not found: '%s'"
}
diff --git a/locales/ru.json b/locales/ru.json
index bcf119c..62e2159 100644
--- a/locales/ru.json
+++ b/locales/ru.json
@@ -19,15 +19,15 @@
" exit When unsure, exit with an error.\n": " exit В случае неуверенности, выдать ошибку и выйти.\n",
"Default value is \"no-op\".\n": "По умолчанию выполняется \"no-op\".\n",
"The --properties option accepts a list of values, separated by spaces. Suitable values are:\n": "Параметр --properties принимает список значений (разделённых пробелами):\n",
- " html-title Outputs the article's title, wrapped in an tag.\n": " html-title Вывести заголовок статьи в тэге .\n",
- " title Outputs the title in the format \"Title: $TITLE\".\n": " title Вывести заголовок статьи в формате \"Заголовок: $ЗАГОЛОВОК\".\n",
- " excerpt Article description, or short excerpt from the content, in the format \"Excerpt: $EXCERPT\"\n": " excerpt Краткий урывок из статьи, в формате \"Урывок: $УРЫВОК\".\n",
- " byline Author metadata, in the format \"Author: $AUTHOR\"\n": " byline Данные об авторе статьи, в формате \"Автор: $AUTHOR\".\n",
- " length Length of the article in characters, in the format \"Length: $LENGTH\"\n": " length Длинна статьи (количество символов) в формате \"Длинна: $ДЛИННА\".\n",
- " dir Content direction, is either \"Direction: ltr\" or \"Direction: rtl\"\n": " dir Направление текста: либо \"Направление: слева направо\", либо \"Направление: справа налево\".\n",
- " html-content Outputs the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n",
- " text-content Outputs the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n",
- "Text-content and Html-content are mutually exclusive, and are always printed last.\n": "text-content и html-content исключают друг друга, и всегда выводятся в конце.\n",
+ " title The title of the article.\n": " title Заголовок статьи.\n",
+ " html-title The title of the article, wrapped in an tag.\n": " html-title Заголовок статьи в тэге .\n",
+ " excerpt Article description, or short excerpt from the content.\n": " excerpt Описание контента, или краткий урывок из статьи.\n",
+ " byline Data about the page's author.\n": " byline Данные об авторе статьи.\n",
+ " length Length of the article in characters.\n": " length Длинна статьи (количество символов).\n",
+ " dir Text direction, is either \"ltr\" for left-to-right or \"rtl\" for right-to-left.\n": " dir Направление текста: \"ltr\" если слева направо\", \"rtl\" если справа налево.\n",
+ " text-content Output the article's main content as plain text.\n": " text-content Вывести главный контент страницы в виде обычного текста.\n",
+ " html-content Output the article's main content as HTML.\n": " html-content Вывести главный контент страницы в виде HTML.\n",
+ "Properties are printed line by line, in the order specified by the user. Only \"text-content\" and \"html-content\" is printed as multiple lines.\n": "Эти данные выводятся в том порядке, который указал пользователь. Всё, кроме \"text-content\" и \"html-content\", выводится как одна строка.\n",
"Default value is \"html-title html-content\".\n": "По умолчанию \"html-title html-content\".\n",
"No input provided": "Данные не введены.",
"Note: --is-url option is deprecated.": "Внимание: параметр --is-url устаревший.",
@@ -41,12 +41,5 @@
"Not sure if this document should be processed, exiting": "Не уверен, стоит ли обрабатывать этот документ - выхожу",
"Can't output properties": "Не могу определить характеристики",
"Couldn't process document.": "Не удалось обработать документ.",
- "Title: %s\n": "Заголовок: %s\n",
- "Excerpt: %s\n": "Урывок: %s\n",
- "Author: %s\n": "Автор: %s\n",
- "Length: %s\n": "Длинна: %s\n",
- "Host not found: '%s'": "Сервер не найден: '%s'",
- "Direction: %s\n": "Направление: %s\n",
- "Direction: ltr\n": "Направление: слева направо\n",
- "Direction: rtl\n": "Направление: справа налево\n"
+ "Host not found: '%s'": "Сервер не найден: '%s'"
}