mirror of
https://github.com/koreader/koreader
synced 2024-11-02 15:40:16 +00:00
d64e143297
Strings are prefilled by msgmerge with closely matching ones to reduce the amount of labor required by the translator. Often the string requires only minor adjustment, making the process faster, and when the string isn't a good match it's no big deal. However, these so-called fuzzy strings shouldn't be treated as if they were actually translated. Fixes the effect seen in <https://github.com/koreader/koreader/issues/5806>.
408 lines
14 KiB
Lua
408 lines
14 KiB
Lua
--[[--
|
|
A pure Lua implementation of a gettext subset.
|
|
|
|
Example:
|
|
local _ = require("gettext") -- @{gettext.gettext|gettext}()
|
|
local C_ = _.pgettext -- @{pgettext}()
|
|
local N_ = _.ngettext -- @{ngettext}()
|
|
local NC_ = _.npgettext -- @{npgettext}()
|
|
local T = require("ffi/util").template -- @{ffi.util.template}()
|
|
|
|
-- The most common use case with regular @{gettext.gettext|gettext}().
|
|
local simple_string = _("item")
|
|
|
|
-- A more complex example. The correct plural form will be automatically
|
|
-- selected by @{ngettext}() based on the number.
|
|
local numbered_string = T(N_("1 item", "%1 items", num_items), num_items)
|
|
|
|
It's required to pass along the number twice, because @{ngettext}() doesn't do anything with placeholders.
|
|
See @{ffi.util.template}() for more information about the template function.
|
|
--]]
|
|
|
|
local isAndroid, android = pcall(require, "android")
|
|
local logger = require("logger")
|
|
|
|
local GetText = {
|
|
translation = {},
|
|
current_lang = "C",
|
|
dirname = "l10n",
|
|
textdomain = "koreader",
|
|
plural_default = "n != 1",
|
|
}
|
|
|
|
local GetText_mt = {
|
|
__index = {}
|
|
}
|
|
|
|
-- wrapUntranslated() will be overriden by bidi.lua when UI language is RTL,
|
|
-- to wrap untranslated english strings as LTR-isolated segments.
|
|
-- It should do nothing when the UI language is LTR.
|
|
GetText.wrapUntranslated_nowrap = function(text) return text end
|
|
GetText.wrapUntranslated = GetText.wrapUntranslated_nowrap
|
|
-- Note: this won't be possible if we switch from our Lua GetText to
|
|
-- GetText through FFI (but hopefully, RTL languages will be fully
|
|
-- translated by then).
|
|
|
|
--[[--
|
|
Returns a translation.
|
|
|
|
@function gettext
|
|
|
|
@string msgid
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local translation = _("A meaningful message.")
|
|
--]]
|
|
function GetText_mt.__call(gettext, msgid)
|
|
return gettext.translation[msgid] or gettext.wrapUntranslated(msgid)
|
|
end
|
|
|
|
local function c_escape(what)
|
|
if what == "\n" then return ""
|
|
elseif what == "a" then return "\a"
|
|
elseif what == "b" then return "\b"
|
|
elseif what == "f" then return "\f"
|
|
elseif what == "n" then return "\n"
|
|
elseif what == "r" then return "\r"
|
|
elseif what == "t" then return "\t"
|
|
elseif what == "v" then return "\v"
|
|
elseif what == "0" then return "\0" -- shouldn't happen, though
|
|
else
|
|
return what
|
|
end
|
|
end
|
|
|
|
--- Converts C logical operators to Lua.
|
|
local function logicalCtoLua(logical_str)
|
|
logical_str = logical_str:gsub("&&", "and")
|
|
logical_str = logical_str:gsub("!=", "~=")
|
|
logical_str = logical_str:gsub("||", "or")
|
|
return logical_str
|
|
end
|
|
|
|
--- Default getPlural function.
|
|
local function getDefaultPlural(n)
|
|
if n ~= 1 then
|
|
return 1
|
|
else
|
|
return 0
|
|
end
|
|
end
|
|
|
|
--- Generates a proper Lua function out of logical gettext math tests.
|
|
local function getPluralFunc(pl_tests, nplurals, plural_default)
|
|
-- the return function() stuff is a bit of loadstring trickery
|
|
local plural_func_str = "return function(n) if "
|
|
|
|
if #pl_tests > 1 then
|
|
for i = 1, #pl_tests do
|
|
local pl_test = pl_tests[i]
|
|
pl_test = logicalCtoLua(pl_test)
|
|
|
|
if i > 1 and not (tonumber(pl_test) ~= nil) then
|
|
pl_test = " elseif "..pl_test
|
|
end
|
|
if tonumber(pl_test) ~= nil then
|
|
-- no condition, just a number
|
|
pl_test = " else return "..pl_test
|
|
end
|
|
pl_test = pl_test:gsub("?", " then return")
|
|
|
|
-- append to plural function
|
|
plural_func_str = plural_func_str..pl_test
|
|
end
|
|
plural_func_str = plural_func_str.." end end"
|
|
else
|
|
local pl_test = pl_tests[1]
|
|
-- Ensure JIT compiled function if we're dealing with one of the many simpler languages.
|
|
-- After all, loadstring won't be.
|
|
-- Potential workaround: write to file and use require.
|
|
if pl_test == plural_default then
|
|
return getDefaultPlural
|
|
end
|
|
-- language with no plural forms
|
|
if tonumber(pl_test) ~= nil then
|
|
plural_func_str = "return function(n) return "..pl_test.." end"
|
|
else
|
|
pl_test = logicalCtoLua(pl_test)
|
|
plural_func_str = "return function(n) if "..pl_test.." then return 1 else return 0 end end"
|
|
end
|
|
end
|
|
logger.dbg("gettext: plural function", plural_func_str)
|
|
return loadstring(plural_func_str)()
|
|
end
|
|
|
|
local function addTranslation(msgctxt, msgid, msgstr, n)
|
|
-- translated string
|
|
local unescaped_string = string.gsub(msgstr, "\\(.)", c_escape)
|
|
if msgctxt and msgctxt ~= "" then
|
|
if not GetText.context[msgctxt] then
|
|
GetText.context[msgctxt] = {}
|
|
end
|
|
if n then
|
|
if not GetText.context[msgctxt][msgid] then
|
|
GetText.context[msgctxt][msgid] = {}
|
|
end
|
|
GetText.context[msgctxt][msgid][n] = unescaped_string ~= "" and unescaped_string or nil
|
|
else
|
|
GetText.context[msgctxt][msgid] = unescaped_string ~= "" and unescaped_string or nil
|
|
end
|
|
else
|
|
if n then
|
|
if not GetText.translation[msgid] then
|
|
GetText.translation[msgid] = {}
|
|
end
|
|
GetText.translation[msgid][n] = unescaped_string ~= "" and unescaped_string or nil
|
|
else
|
|
GetText.translation[msgid] = unescaped_string ~= "" and unescaped_string or nil
|
|
end
|
|
end
|
|
end
|
|
|
|
-- for PO file syntax, see
|
|
-- https://www.gnu.org/software/gettext/manual/html_node/PO-Files.html
|
|
-- we only implement a sane subset for now
|
|
|
|
function GetText_mt.__index.changeLang(new_lang)
|
|
GetText.context = {}
|
|
GetText.translation = {}
|
|
GetText.current_lang = "C"
|
|
|
|
-- the "C" locale disables localization altogether
|
|
-- can be various things such as `en_US` or `en_US:en`
|
|
if new_lang == "C" or new_lang == nil or new_lang == ""
|
|
or new_lang:match("^en_US") == "en_US" then return end
|
|
|
|
-- strip encoding suffix in locale like "zh_CN.utf8"
|
|
new_lang = new_lang:sub(1, new_lang:find(".%."))
|
|
|
|
local file = GetText.dirname .. "/" .. new_lang .. "/" .. GetText.textdomain .. ".po"
|
|
local po = io.open(file, "r")
|
|
|
|
if not po then
|
|
logger.dbg("cannot open translation file:", file)
|
|
return false
|
|
end
|
|
|
|
local data = {}
|
|
local fuzzy = false
|
|
local headers
|
|
local what = nil
|
|
while true do
|
|
local line = po:read("*l")
|
|
if line == nil or line == "" then
|
|
if data.msgid and data.msgid_plural and data["msgstr[0]"] then
|
|
for k, v in pairs(data) do
|
|
local n = tonumber(k:match("msgstr%[([0-9]+)%]"))
|
|
local msgstr = v
|
|
|
|
if n and msgstr then
|
|
addTranslation(data.msgctxt, data.msgid, msgstr, n)
|
|
end
|
|
end
|
|
elseif data.msgid and data.msgstr and data.msgstr ~= "" then
|
|
-- header
|
|
if not headers and data.msgid == "" then
|
|
headers = data.msgstr
|
|
local plural_forms = data.msgstr:match("Plural%-Forms: (.*)")
|
|
local nplurals = plural_forms:match("nplurals=([0-9]+);") or 2
|
|
local plurals = plural_forms:match("plural=%((.*)%);")
|
|
|
|
-- Hardcoded workaround for Romanian which has 3 plural forms.
|
|
if plurals == "n == 1) ? 0 : ((n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2" then
|
|
plurals = "n == 1 ? 0 : (n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2"
|
|
end
|
|
|
|
if not plurals then
|
|
-- Some languages (e.g., Arabic) may not use parentheses.
|
|
-- However, the following more inclusive match is more likely
|
|
-- to accidentally include junk and seldom relevant.
|
|
-- We might also be dealing with a language without plurals.
|
|
-- That would look like `plural=0`.
|
|
plurals = plural_forms:match("plural=(.*);")
|
|
end
|
|
|
|
if plurals:find("[^n!=%%<>&:%(%)|?0-9 ]") then
|
|
-- we don't trust this input, go with default instead
|
|
plurals = GetText.plural_default
|
|
end
|
|
|
|
local pl_tests = {}
|
|
for pl_test in plurals:gmatch("[^:]+") do
|
|
table.insert(pl_tests, pl_test)
|
|
end
|
|
|
|
GetText.getPlural = getPluralFunc(pl_tests, nplurals, GetText.plural_default)
|
|
if not GetText.getPlural then
|
|
GetText.getPlural = getDefaultPlural
|
|
end
|
|
end
|
|
|
|
addTranslation(data.msgctxt, data.msgid, data.msgstr)
|
|
end
|
|
-- stop at EOF:
|
|
if line == nil then break end
|
|
data = {}
|
|
what = nil
|
|
else
|
|
-- comment
|
|
if not line:match("^#") then
|
|
-- new data item (msgid, msgstr, ...
|
|
local w, s = line:match("^%s*([%a_%[%]0-9]+)%s+\"(.*)\"%s*$")
|
|
if w then
|
|
what = w
|
|
else
|
|
-- string continuation
|
|
s = line:match("^%s*\"(.*)\"%s*$")
|
|
end
|
|
if what and s and not fuzzy then
|
|
-- unescape \n or msgid won't match
|
|
s = s:gsub("\\n", "\n")
|
|
-- unescape " or msgid won't match
|
|
s = s:gsub('\\"', '"')
|
|
data[what] = (data[what] or "") .. s
|
|
else
|
|
-- Don't save this fuzzy string and unset fuzzy for the next one.
|
|
fuzzy = false
|
|
end
|
|
elseif line:match("#, fuzzy") then
|
|
fuzzy = true
|
|
end
|
|
end
|
|
end
|
|
GetText.current_lang = new_lang
|
|
end
|
|
|
|
GetText_mt.__index.getPlural = getDefaultPlural
|
|
|
|
--[[--
|
|
Returns a plural form.
|
|
|
|
Many languages have more forms than just singular and plural. This function
|
|
abstracts the complexity away. The translation can contain as many
|
|
pluralizations as it requires.
|
|
|
|
See [gettext plural forms](https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html)
|
|
and [translating plural forms](https://www.gnu.org/software/gettext/manual/html_node/Translating-plural-forms.html)
|
|
for more information.
|
|
|
|
It's required to pass along the number twice, because @{ngettext}() doesn't do anything with placeholders.
|
|
See @{ffi.util.template}() for more information about the template function.
|
|
|
|
@function ngettext
|
|
|
|
@string msgid
|
|
@string msgid_plural
|
|
@int n
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local N_ = _.ngettext
|
|
local T = require("ffi/util").template
|
|
|
|
local items_string = T(N_("1 item", "%1 items", num_items), num_items)
|
|
--]]
|
|
function GetText_mt.__index.ngettext(msgid, msgid_plural, n)
|
|
local plural = GetText.getPlural(n)
|
|
|
|
if plural == 0 then
|
|
return GetText.translation[msgid] and GetText.translation[msgid][plural] or GetText.wrapUntranslated(msgid)
|
|
else
|
|
return GetText.translation[msgid] and GetText.translation[msgid][plural] or GetText.wrapUntranslated(msgid_plural)
|
|
end
|
|
end
|
|
|
|
--[[--
|
|
Returns a context-disambiguated plural form.
|
|
|
|
This is the logical combination between @{ngettext}() and @{pgettext}().
|
|
Please refer there for more information.
|
|
|
|
@function npgettext
|
|
|
|
@string msgctxt
|
|
@string msgid
|
|
@string msgid_plural
|
|
@int n
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local NC_ = _.npgettext
|
|
local T = require("ffi/util").template
|
|
|
|
local statistics_items_string = T(NC_("Statistics", "1 item", "%1 items", num_items), num_items)
|
|
local books_items_string = T(NC_("Books", "1 item", "%1 items", num_items), num_items)
|
|
--]]
|
|
function GetText_mt.__index.npgettext(msgctxt, msgid, msgid_plural, n)
|
|
local plural = GetText.getPlural(n)
|
|
|
|
if plural == 0 then
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] and GetText.context[msgctxt][msgid][plural] or GetText.wrapUntranslated(msgid)
|
|
else
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] and GetText.context[msgctxt][msgid][plural] or GetText.wrapUntranslated(msgid_plural)
|
|
end
|
|
end
|
|
|
|
--[[--
|
|
Returns a context-disambiguated translation.
|
|
|
|
The same string might occur multiple times, but require a different translation based on context.
|
|
An example within KOReader is **Pages** meaning *page styles* (within the context of style tweaks)
|
|
and **Pages** meaning *number of pages*.
|
|
|
|
We generally don't apply context unless a conflict is known. This is only likely to occur with
|
|
short strings, of which of course there are many.
|
|
|
|
See [gettext contexts](https://www.gnu.org/software/gettext/manual/html_node/Contexts.html) for more information.
|
|
|
|
@function pgettext
|
|
|
|
@string msgctxt
|
|
@string msgid
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local C_ = _.pgettext
|
|
|
|
local copy_file = C_("File", "Copy")
|
|
local copy_text = C_("Text", "Copy")
|
|
--]]
|
|
function GetText_mt.__index.pgettext(msgctxt, msgid)
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] or GetText.wrapUntranslated(msgid)
|
|
end
|
|
|
|
setmetatable(GetText, GetText_mt)
|
|
|
|
if os.getenv("LANGUAGE") then
|
|
GetText.changeLang(os.getenv("LANGUAGE"))
|
|
elseif os.getenv("LC_ALL") then
|
|
GetText.changeLang(os.getenv("LC_ALL"))
|
|
elseif os.getenv("LC_MESSAGES") then
|
|
GetText.changeLang(os.getenv("LC_MESSAGES"))
|
|
elseif os.getenv("LANG") then
|
|
GetText.changeLang(os.getenv("LANG"))
|
|
end
|
|
|
|
if isAndroid then
|
|
local ffi = require("ffi")
|
|
local buf = ffi.new("char[?]", 16)
|
|
android.lib.AConfiguration_getLanguage(android.app.config, buf)
|
|
local lang = ffi.string(buf)
|
|
android.lib.AConfiguration_getCountry(android.app.config, buf)
|
|
local country = ffi.string(buf)
|
|
if lang and country then
|
|
GetText.changeLang(lang.."_"..country)
|
|
end
|
|
end
|
|
|
|
return GetText
|