2
0
mirror of https://github.com/koreader/koreader synced 2024-11-16 06:12:56 +00:00

Fix translator, enable selected text translation

Update translator.lua to use a still working google translate API.
Add a method to show translations (main and alternates) in
a TextViewer.
Re-enable "Translate" button in text selection/highlight buttons
dialog.
Target language can be set with a manually added setting:
    translator_target_language = "fr"
and will fallback to the UI language.
This commit is contained in:
poire-z 2018-12-16 18:02:38 +01:00 committed by Frans de Jonge
parent 8e56f8cb7c
commit 22b7f17cd8
3 changed files with 606 additions and 108 deletions

View File

@ -487,13 +487,13 @@ end
function ReaderHighlight:translate(selected_text)
if selected_text.text ~= "" then
self.ui:handleEvent(Event:new("TranslateText", self, selected_text.text))
self:onTranslateText(selected_text.text)
-- or we will do OCR
else
local text = self.ui.document:getOCRText(self.hold_pos.page, selected_text)
logger.dbg("OCRed text:", text)
if text and text ~= "" then
self.ui:handleEvent(Event:new("TranslateText", self, text))
self:onTranslateText(text)
else
UIManager:show(InfoMessage:new{
text = info_message_ocr_text,
@ -502,6 +502,11 @@ function ReaderHighlight:translate(selected_text)
end
end
function ReaderHighlight:onTranslateText(text)
local Translator = require("ui/translator")
Translator:showTranslation(text)
end
function ReaderHighlight:onHoldRelease()
if self.hold_start_tv then
local hold_duration = TimeVal.now() - self.hold_start_tv
@ -516,89 +521,94 @@ function ReaderHighlight:onHoldRelease()
end
if self.selected_text then
logger.dbg("show highlight dialog")
self.highlight_dialog = ButtonDialog:new{
buttons = {
local highlight_buttons = {
{
{
{
text = _("Highlight"),
callback = function()
self:saveHighlight()
self:onClose()
end,
},
{
text = _("Add Note"),
enabled = false,
callback = function()
self:addNote()
self:onClose()
end,
},
text = _("Highlight"),
callback = function()
self:saveHighlight()
self:onClose()
end,
},
{
{
text = "Copy",
enabled = Device:hasClipboard(),
callback = function()
Device.input.setClipboardText(self.selected_text.text)
end,
},
{
text = _("View HTML"),
enabled = not self.ui.document.info.has_pages,
callback = function()
self:viewSelectionHTML()
end,
},
--[[
{
text = _("Translate"),
enabled = false,
callback = function()
self:translate(self.selected_text)
self:onClose()
end,
},
--]]
},
{
{
text = _("Wikipedia"),
callback = function()
UIManager:scheduleIn(0.1, function()
self:lookupWikipedia()
-- We don't call self:onClose(), we need the highlight
-- to still be there, as we may Highlight it from the
-- dict lookup widget
end)
end,
},
{
text = _("Dictionary"),
callback = function()
self:onHighlightDictLookup()
-- We don't call self:onClose(), same reason as above
end,
},
},
{
{
text = _("Follow Link"),
enabled = self.selected_link ~= nil,
callback = function()
self.ui.link:onGotoLink(self.selected_link)
self:onClose()
end,
},
{
text = _("Search"),
callback = function()
self:onHighlightSearch()
UIManager:close(self.highlight_dialog)
end,
},
text = _("Add Note"),
enabled = false,
callback = function()
self:addNote()
self:onClose()
end,
},
},
{
{
text = "Copy",
enabled = Device:hasClipboard(),
callback = function()
Device.input.setClipboardText(self.selected_text.text)
end,
},
{
text = _("View HTML"),
enabled = not self.ui.document.info.has_pages,
callback = function()
self:viewSelectionHTML()
end,
},
},
{
{
text = _("Wikipedia"),
callback = function()
UIManager:scheduleIn(0.1, function()
self:lookupWikipedia()
-- We don't call self:onClose(), we need the highlight
-- to still be there, as we may Highlight it from the
-- dict lookup widget
end)
end,
},
{
text = _("Dictionary"),
callback = function()
self:onHighlightDictLookup()
-- We don't call self:onClose(), same reason as above
end,
},
},
{
{
text = _("Translate"),
callback = function()
self:translate(self.selected_text)
-- We don't call self:onClose(), so one can still see
-- the highlighted text when moving the translated
-- text window, and also if NetworkMgr:promptWifiOn()
-- is needed, so the user can just tap again on this
-- button and does not need to select the text again.
end,
},
{
text = _("Search"),
callback = function()
self:onHighlightSearch()
UIManager:close(self.highlight_dialog)
end,
},
},
}
if self.selected_link ~= nil then
table.insert(highlight_buttons, { -- for now, a single button in an added row
{
text = _("Follow Link"),
callback = function()
self.ui.link:onGotoLink(self.selected_link)
self:onClose()
end,
},
})
end
self.highlight_dialog = ButtonDialog:new{
buttons = highlight_buttons,
tap_close_callback = function() self:handleEvent(Event:new("Tap")) end,
}
UIManager:show(self.highlight_dialog)

View File

@ -1,42 +1,83 @@
--[[--
This module translates text using Google Translate.
<http://translate.google.com/translate_a/t?client=z&ie=UTF-8&oe=UTF-8&hl=en&tl=en&text=hello>
<https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=fr&dt=t&q=alea%20jacta%20est>
--]]
-- Useful other implementation and discussion:
-- https://github.com/ssut/py-googletrans/blob/master/googletrans/client.py
-- https://stackoverflow.com/questions/26714426/what-is-the-meaning-of-google-translate-query-params
local JSON = require("json")
local logger = require("logger")
local Translator = {
trans_servers = {
"http://translate.google.cn",
"http://translate.google.com",
},
trans_path = "/translate_a/t",
trans_params = {
client = "z", -- client z returns normal JSON result
ie = "UTF-8",
oe = "UTF-8",
hl = "en",
tl = "en",
sl = nil, -- we don't specify source languagae to detect language
},
default_lang = "en",
trans_servers = {
"https://translate.googleapis.com/",
-- "http://translate.google.cn",
},
trans_path = "/translate_a/single",
trans_params = {
client = "gtx", -- (using "t" raises 403 Forbidden)
ie = "UTF-8", -- input encoding
oe = "UTF-8", -- output encoding
sl = "auto", -- source language (we need to specify "auto" to detect language)
tl = "en", -- target language
hl = "en", -- ?
otf = 1, -- ?
ssel = 0, -- ?
tsel = 0, -- ?
-- tk = "" -- auth token
dt = { -- what we want in result
"t", -- translation of source text
"at", -- alternate translations
-- Next options only give additional results when text is a single word
-- "bd", -- dictionary (articles, reverse translations, etc)
-- "ex", -- examples
-- "ld", -- ?
-- "md", -- definitions of source text
-- "qca", -- ?
-- "rw", -- "see also" list
-- "rm", -- transcription / transliteration of source and translated texts
-- "ss", -- synonyms of source text, if it's one word
}
-- q = text to translate
},
default_lang = "en",
}
function Translator:getTransServer()
return G_reader_settings:readSetting("trans_server") or self.trans_servers[1]
end
function Translator:getTargetLanguage()
-- One can manually set his prefered target language
local lang = G_reader_settings:readSetting("translator_target_language")
if not lang then
-- Fallback to the UI language the user has selected
lang = G_reader_settings:readSetting("language")
if lang and lang ~= "" then
-- convert "zh-CN" and "zh-TW" to "zh"
lang = lang:match("(.*)-") or lang
if lang == "C" then
lang="en"
end
lang = lang:lower()
end
end
return lang or "en"
end
--[[--
Returns decoded JSON table from translate server.
@string text
@string target_lang
@string source_lang
@string text
@treturn string result, or nil
--]]
function Translator:loadPage(target_lang, source_lang, text)
function Translator:loadPage(text, target_lang, source_lang)
local socket = require('socket')
local url = require('socket.url')
local http = require('socket.http')
@ -48,16 +89,27 @@ function Translator:loadPage(target_lang, source_lang, text)
self.trans_params.tl = target_lang
self.trans_params.sl = source_lang
for k,v in pairs(self.trans_params) do
query = query .. k .. '=' .. v .. '&'
if type(v) == "table" then
for _, v2 in ipairs(v) do
query = query .. k .. '=' .. v2 .. '&'
end
else
query = query .. k .. '=' .. v .. '&'
end
end
local parsed = url.parse(self:getTransServer())
parsed.path = self.trans_path
parsed.query = query .. "text=" .. url.escape(text)
parsed.query = query .. "q=" .. url.escape(text)
-- HTTP request
request['url'] = url.build(parsed)
logger.dbg("Calling", request.url)
request['method'] = 'GET'
request['sink'] = ltn12.sink.table(sink)
-- We may try to set a common User-Agent if it happens we're 403 Forbidden
-- request['headers'] = {
-- ["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
-- }
http.TIMEOUT, https.TIMEOUT = 10, 10
local httpRequest = parsed.scheme == 'http' and http.request or https.request
-- first argument returned by skip is code
@ -74,9 +126,15 @@ function Translator:loadPage(target_lang, source_lang, text)
end
local content = table.concat(sink)
if content ~= "" and string.sub(content, 1,1) == "{" then
local ok, result = pcall(JSON.decode, content)
-- logger.dbg("translator content:", content)
local first_char = content:sub(1, 1)
if content ~= "" and (first_char == "{" or first_char == "[") then
-- Get nil instead of functions for 'null' by using JSON.decode.simple
-- (so the result can be fully serialized when used
-- with Trapper:dismissableRunInSubprocess())
local ok, result = pcall(JSON.decode, content, JSON.decode.simple)
if ok and result then
logger.dbg("translator json:", result)
return result
else
logger.warn("translator error:", result)
@ -85,6 +143,20 @@ function Translator:loadPage(target_lang, source_lang, text)
logger.warn("not JSON in translator response:", content)
end
end
-- The JSON result is a list of 9 to 15 items:
-- 1: translation
-- 2: all-translations
-- 3: original-language
-- 6: possible-translations
-- 7: confidence
-- 8: possible-mistakes
-- 9: language
-- 12: synonyms
-- 13: definitions
-- 14: examples
-- 15: see-also
-- Depending on the 'dt' parameters used, some may be null or absent.
-- See bottom of this file for some sample results.
--[[--
Tries to automatically detect language of `text`.
@ -93,9 +165,9 @@ Tries to automatically detect language of `text`.
@treturn string lang (`"en"`, `"fr"`, ``)
--]]
function Translator:detect(text)
local result = self:loadPage("en", nil, text)
if result then
local src_lang = result.src
local result = self:loadPage(text, "en", "auto")
if result and result[3] then
local src_lang = result[3]
logger.dbg("detected language:", src_lang)
return src_lang
else
@ -103,4 +175,419 @@ function Translator:detect(text)
end
end
--[[--
Translate text, returns translation as a single string.
@string text
@string target_lang[opt] (`"en"`, `"fr"`, ``)
@string source_lang[opt="auto"] (`"en"`, `"fr"`, ``) or `"auto"` to auto-detect source language
@treturn string translated text, or nil
--]]
function Translator:translate(text, target_lang, source_lang)
if not target_lang then
target_lang = self:getTargetLanguage()
end
if not source_lang then
source_lang = "auto"
end
local result = self:loadPage(text, target_lang, source_lang)
if result and result[1] and type(result[1]) == "table" then
local translated = {}
for i, r in ipairs(result[1]) do
table.insert(translated, r[1])
end
return table.concat(translated, "")
end
return nil
end
--[[--
Show translated text in TextViewer, with alternate translations
@string text
@string target_lang[opt] (`"en"`, `"fr"`, ``)
@string source_lang[opt="auto"] (`"en"`, `"fr"`, ``) or `"auto"` to auto-detect source language
--]]
function Translator:showTranslation(text, target_lang, source_lang)
local NetworkMgr = require("ui/network/manager")
if not NetworkMgr:isOnline() then
NetworkMgr:promptWifiOn()
return
end
-- Wrap next function with Trapper to be able to interrupt
-- translation service query.
local Trapper = require("ui/trapper")
Trapper:wrap(function()
self:_showTranslation(text, target_lang, source_lang)
end)
end
function Translator:_showTranslation(text, target_lang, source_lang)
local InfoMessage = require("ui/widget/infomessage")
local TextViewer = require("ui/widget/textviewer")
local Trapper = require("ui/trapper")
local UIManager = require("ui/uimanager")
local util = require("util")
local Screen = require("device").screen
local T = require("ffi/util").template
local _ = require("gettext")
if not target_lang then
target_lang = self:getTargetLanguage()
end
if not source_lang then
source_lang = "auto"
end
local completed, result = Trapper:dismissableRunInSubprocess(function()
return self:loadPage(text, target_lang, source_lang)
end, _("Querying translation service…"))
if not completed then
UIManager:show(InfoMessage:new{
text = _("Translation interrupted.")
})
return
end
if not result or type(result) ~= "table" then
UIManager:show(InfoMessage:new{
text = _("Translation failed.")
})
return
end
if result[3] then
source_lang = result[3]
end
local output = {}
-- For both main and alternate translations, we may get multiple slices
-- of the original text and its translations.
if result[1] and type(result[1]) == "table" and #result[1] > 0 then
-- Main translation: we can make a single string from the multiple parts
-- for easier quick reading
local source = {}
local translated = {}
for i, r in ipairs(result[1]) do
local s = type(r[2]) == "string" and r[2] or ""
local t = type(r[1]) == "string" and r[1] or ""
table.insert(source, s)
table.insert(translated, t)
end
table.insert(output, "" .. table.concat(source, " "))
table.insert(output, "" .. table.concat(translated, " "))
end
if result[6] and type(result[6]) == "table" and #result[6] > 0 then
-- Alternative translations:
table.insert(output, "________")
for i, r in ipairs(result[6]) do
if type(r[3]) == "table" then
local s = type(r[1]) == "string" and r[1]:gsub("\n", "") or ""
table.insert(output, "" .. s)
for j, rt in ipairs(r[3]) do
-- Use number in solid black circle symbol (U+2776...277F)
local symbol = util.unicodeCodepointToUtf8(10101 + (j < 10 and j or 10))
local t = type(rt[1]) == "string" and rt[1]:gsub("\n", "") or ""
table.insert(output, symbol .. " " .. t)
end
end
table.insert(output, "")
end
end
-- table.insert(output, require("dump")(result)) -- for debugging
UIManager:show(TextViewer:new{
title = T(_("Translation from %1 to %2"), source_lang:upper(), target_lang:upper()),
text = table.concat(output, "\n"),
height = Screen:getHeight() * 3/4,
})
end
return Translator
-- Sample JSON results:
--
-- Multiple words result:
-- {
-- [1] = {
-- [1] = {
-- [1] = "I know you did not destroy your King's house, because then you had none. ",
-- [2] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.",
-- [5] = 3,
-- ["n"] = 5
-- },
-- [2] = {
-- [1] = "But you can not deny that you destroyed a royal palace. ",
-- [2] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.",
-- [5] = 3,
-- ["n"] = 5
-- },
-- [3] = {
-- [1] = "If the king is dead, then the kingdom remains, just as a ship remains, whose helmsman has fallen",
-- [2] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist",
-- [5] = 3,
-- ["n"] = 5
-- }
-- },
-- [3] = "de",
-- [6] = {
-- [1] = {
-- [1] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.",
-- [3] = {
-- [1] = {
-- [1] = "I know you did not destroy your King's house, because then you had none.",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- },
-- [2] = {
-- [1] = "I know that you have not destroyed your king house, because at that time you had not any.",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- }
-- },
-- [4] = {
-- [1] = {
-- [1] = 0,
-- [2] = 91
-- }
-- },
-- [5] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.",
-- [6] = 0,
-- [7] = 0
-- },
-- [2] = {
-- [1] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.",
-- [3] = {
-- [1] = {
-- [1] = "But you can not deny that you destroyed a royal palace.",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- },
-- [2] = {
-- [1] = "But you can not deny that you have destroyed a royal palace.",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- }
-- },
-- [4] = {
-- [1] = {
-- [1] = 0,
-- [2] = 72
-- }
-- },
-- [5] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.",
-- [6] = 0,
-- [7] = 0
-- },
-- [3] = {
-- [1] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist",
-- [3] = {
-- [1] = {
-- [1] = "If the king is dead, then the kingdom remains, just as a ship remains, whose helmsman has fallen",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- },
-- [2] = {
-- [1] = "yet the king dead, remains the kingdom stand remains as a ship the helmsman has fallen",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- }
-- },
-- [4] = {
-- [1] = {
-- [1] = 0,
-- [2] = 114
-- }
-- },
-- [5] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist",
-- [6] = 0,
-- [7] = 0
-- }
-- },
-- [7] = 1,
-- [9] = {
-- [1] = {
-- [1] = "de"
-- },
-- [3] = {
-- [1] = 1
-- },
-- [4] = {
-- [1] = "de"
-- }
-- },
-- ["n"] = 9
-- }
--
-- Single word result with all dt= enabled:
-- {
-- [1] = {
-- [1] = {
-- [1] = "fork",
-- [2] = "fourchette",
-- [5] = 0,
-- ["n"] = 5
-- }
-- },
-- [2] = {
-- [1] = {
-- [1] = "noun",
-- [2] = {
-- [1] = "fork"
-- },
-- [3] = {
-- [1] = {
-- [1] = "fork",
-- [2] = {
-- [1] = "fourche",
-- [2] = "fourchette",
-- [3] = "embranchement",
-- [4] = "chariot",
-- [5] = "chariot à fourche"
-- },
-- [4] = 0.21967085
-- }
-- },
-- [4] = "fourchette",
-- [5] = 1
-- }
-- },
-- [3] = "fr",
-- [6] = {
-- [1] = {
-- [1] = "fourchette",
-- [3] = {
-- [1] = {
-- [1] = "fork",
-- [2] = 1000,
-- [3] = true,
-- [4] = false
-- },
-- [2] = {
-- [1] = "band",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- },
-- [3] = {
-- [1] = "bracket",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- },
-- [4] = {
-- [1] = "range",
-- [2] = 0,
-- [3] = true,
-- [4] = false
-- }
-- },
-- [4] = {
-- [1] = {
-- [1] = 0,
-- [2] = 10
-- }
-- },
-- [5] = "fourchette",
-- [6] = 0,
-- [7] = 1
-- }
-- },
-- [7] = 1,
-- [9] = {
-- [1] = {
-- [1] = "fr"
-- },
-- [3] = {
-- [1] = 1
-- },
-- [4] = {
-- [1] = "fr"
-- }
-- },
-- [12] = {
-- [1] = {
-- [1] = "noun",
-- [2] = {
-- [1] = {
-- [1] = {
-- [1] = "ramification",
-- [2] = "enfourchure"
-- },
-- [2] = ""
-- },
-- [2] = {
-- [1] = {
-- [1] = "échéance",
-- [2] = "bande"
-- },
-- [2] = ""
-- },
-- [3] = {
-- [1] = {
-- [1] = "ramification",
-- [2] = "jambe"
-- },
-- [2] = ""
-- },
-- [4] = {
-- [1] = {
-- [1] = "bifurcation"
-- },
-- [2] = ""
-- },
-- [5] = {
-- [1] = {
-- [1] = "fourche",
-- [2] = "bifurcation",
-- [3] = "entrejambe"
-- },
-- [2] = ""
-- },
-- [6] = {
-- [1] = {
-- [1] = "fourche",
-- [2] = "bifurcation"
-- },
-- [2] = ""
-- }
-- },
-- [3] = "fourchette"
-- }
-- },
-- [13] = {
-- [1] = {
-- [1] = "noun",
-- [2] = {
-- [1] = {
-- [1] = "Ustensile de table.",
-- [2] = "12518.0",
-- [3] = "Des fourchettes, des couteaux et des cuillères ."
-- },
-- [2] = {
-- [1] = "Ecart entre deux valeurs.",
-- [2] = "12518.1",
-- [3] = "La fourchette des prix ."
-- }
-- },
-- [3] = "fourchette"
-- }
-- },
-- [14] = {
-- [1] = {
-- [1] = {
-- [1] = "La <b>fourchette</b> des prix .",
-- [5] = 3,
-- [6] = "12518.1",
-- ["n"] = 6
-- }
-- }
-- },
-- ["n"] = 14
-- }

View File

@ -7,21 +7,22 @@ describe("Translator module", function()
Translator = require("ui/translator")
end)
it("should return server", function()
assert.is.same("http://translate.google.cn", Translator:getTransServer())
assert.is.same("https://translate.googleapis.com/", Translator:getTransServer())
G_reader_settings:saveSetting("trans_server", "http://translate.google.nl")
G_reader_settings:flush()
assert.is.same("http://translate.google.nl", Translator:getTransServer())
G_reader_settings:delSetting("trans_server")
G_reader_settings:flush()
end)
it("should return translation #notest #nocov", function()
local translation_result = Translator:loadPage("en", "nl", dutch_wikipedia_text)
-- add " #notest #nocov" to the it("description string") when it does not work anymore
it("should return translation", function()
local translation_result = Translator:translate(dutch_wikipedia_text, "en")
assert.is.truthy(translation_result)
-- while some minor variation in the translation is possible it should
-- be between about 100 and 130 characters
assert.is_true(#translation_result > 50 and #translation_result < 200)
end)
it("should autodetect language #notest #nocov", function()
it("should autodetect language", function()
local detect_result = Translator:detect(dutch_wikipedia_text)
assert.is.same("nl", detect_result)
end)