Add lingva translation support in search (#360)

* Add support for Lingva translations in results

Searches that contain the word "translate" and are normal search queries
(i.e. not news/images/video/etc) now create an iframe to a Lingva url to
translate the user's search using their configured search language.

The Lingva url can be configured using the WHOOGLE_ALT_TL env var, or
will fall back to the official Lingva instance url (lingva.ml).

For more info, visit https://github.com/TheDavidDelta/lingva-translate

* Add basic test for lingva results

* Allow user specified lingva instances through csp frame-src

* Fix pep8 issue
This commit is contained in:
Ben Busby 2021-06-15 10:14:42 -04:00 committed by GitHub
parent 82ccace647
commit bcb1d8ecc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 109 additions and 42 deletions

View File

@ -55,6 +55,8 @@ ARG instagram_alt='bibliogram.art/u'
ENV WHOOGLE_ALT_IG=$instagram_alt
ARG reddit_alt='libredd.it'
ENV WHOOGLE_ALT_RD=$reddit_alt
ARG translate_alt='lingva.ml'
ENV WHOOGLE_ALT_TL=$translate_alt
WORKDIR /whoogle

View File

@ -193,6 +193,7 @@ Description=Whoogle
#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org
#Environment=WHOOGLE_ALT_IG=bibliogram.art/u
#Environment=WHOOGLE_ALT_RD=libredd.it
#Environment=WHOOGLE_ALT_TL=lingva.ml
# Load values from dotenv only
#Environment=WHOOGLE_DOTENV=1
Type=simple
@ -311,6 +312,7 @@ There are a few optional environment variables available for customizing a Whoog
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
### Config Environment Variables
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.

View File

@ -65,6 +65,11 @@
"value": "libredd.it",
"required": false
},
"WHOOGLE_ALT_TL": {
"description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.",
"value": "lingva.ml",
"required": false
},
"WHOOGLE_CONFIG_COUNTRY": {
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
"value": "",

View File

@ -52,7 +52,17 @@ app.config['BANG_PATH'] = os.getenv(
app.config['BANG_FILE'] = os.path.join(
app.config['BANG_PATH'],
'bangs.json')
# The alternative to Google Translate is treated a bit differently than other
# social media site alternatives, in that it is used for any translation
# related searches.
translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://lingva.ml')
if not translate_url.startswith('http'):
translate_url = 'https://' + translate_url
app.config['TRANSLATE_URL'] = translate_url
app.config['CSP'] = 'default-src \'none\';' \
'frame-src ' + translate_url + ';' \
'manifest-src \'self\';' \
'img-src \'self\' data:;' \
'style-src \'self\' \'unsafe-inline\';' \

View File

@ -230,6 +230,12 @@ def search():
if search_util.feeling_lucky:
return redirect(response, code=303)
# If the user is attempting to translate a string, determine the correct
# string for formatting the lingva.ml url
localization_lang = g.user_config.get_localization_lang()
translation = app.config['TRANSLATIONS'][localization_lang]
translate_to = localization_lang.replace('lang_', '')
# Return 503 if temporarily blocked by captcha
resp_code = 503 if has_captcha(str(response)) else 200
@ -238,9 +244,17 @@ def search():
query=urlparse.unquote(query),
search_type=search_util.search_type,
config=g.user_config,
translation=app.config['TRANSLATIONS'][
g.user_config.get_localization_lang()
],
lingva_url=app.config['TRANSLATE_URL'],
translation=translation,
translate_to=translate_to,
translate_str=query.replace(
'translate', ''
).replace(
translation['translate'], ''
),
is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only
response=response,
version_number=app.config['VERSION_NUMBER'],
search_header=(render_template(

View File

@ -25,3 +25,9 @@ details summary {
padding: 10px;
font-weight: bold;
}
#lingva-iframe {
width: 100%;
height: 650px;
border: 0;
}

View File

@ -1,4 +1,33 @@
{
"lang_en": {
"search": "Search",
"config": "Configuration",
"config-country": "Filter Results by Country",
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
"config-lang": "Interface Language",
"config-lang-search": "Search Language",
"config-near": "Near",
"config-near-help": "City Name",
"config-block": "Block",
"config-block-help": "Comma-separated site list",
"config-nojs": "Show NoJS Links",
"config-dark": "Dark Mode",
"config-safe": "Safe Search",
"config-alts": "Replace Social Media Links",
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
"config-new-tab": "Open Links in New Tab",
"config-images": "Full Size Image Search",
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
"config-tor": "Use Tor",
"config-get-only": "GET Requests Only",
"config-url": "Root URL",
"config-css": "Custom CSS",
"load": "Load",
"apply": "Apply",
"save-as": "Save As...",
"github-link": "View on GitHub",
"translate": "translate"
},
"lang_nl": {
"search": "Zoeken",
"config": "Instellingen",
@ -25,7 +54,8 @@
"load": "Laden",
"apply": "Opslaan",
"save-as": "Opslaan Als...",
"github-link": "Bekijk op GitHub"
"github-link": "Bekijk op GitHub",
"translate": "vertalen"
},
"lang_de": {
"search": "Suchen",
@ -53,35 +83,8 @@
"load": "Laden",
"apply": "Übernehmen",
"save-as": "Speichern unter...",
"github-link": "Auf GitHub öffnen"
},
"lang_en": {
"search": "Search",
"config": "Configuration",
"config-country": "Filter Results by Country",
"config-country-help": "Note: If enabled, a website will only appear in the search results if it is *hosted* in the selected country.",
"config-lang": "Interface Language",
"config-lang-search": "Search Language",
"config-near": "Near",
"config-near-help": "City Name",
"config-block": "Block",
"config-block-help": "Comma-separated site list",
"config-nojs": "Show NoJS Links",
"config-dark": "Dark Mode",
"config-safe": "Safe Search",
"config-alts": "Replace Social Media Links",
"config-alts-help": "Replaces Twitter/YouTube/Instagram/etc links with privacy respecting alternatives.",
"config-new-tab": "Open Links in New Tab",
"config-images": "Full Size Image Search",
"config-images-help": "(Experimental) Adds the 'View Image' option to desktop image searches. This will cause image result thumbnails to be lower resolution.",
"config-tor": "Use Tor",
"config-get-only": "GET Requests Only",
"config-url": "Root URL",
"config-css": "Custom CSS",
"load": "Load",
"apply": "Apply",
"save-as": "Save As...",
"github-link": "View on GitHub"
"github-link": "Auf GitHub öffnen",
"translate": "Übersetzen"
},
"lang_es": {
"search": "Buscar",
@ -109,7 +112,8 @@
"load": "Cargar",
"apply": "Aplicar",
"save-as": "Guardar como...",
"github-link": "Ver en GitHub"
"github-link": "Ver en GitHub",
"translate": "traducir"
},
"lang_it": {
"search": "Cerca",
@ -137,7 +141,8 @@
"load": "Carica",
"apply": "Applica",
"save-as": "Salva Come...",
"github-link": "Guarda su GitHub"
"github-link": "Guarda su GitHub",
"translate": "tradurre"
},
"lang_pt": {
"search": "Buscar",
@ -165,7 +170,8 @@
"load": "Carregar",
"apply": "Aplicar",
"save-as": "Salvar Como...",
"github-link": "Ver no GitHub"
"github-link": "Ver no GitHub",
"translate": "traduzir"
},
"lang_zh-CN": {
"search": "搜索",
@ -193,7 +199,8 @@
"load": "载入",
"apply": "应用",
"save-as": "另存为...",
"github-link": "在 GitHub 上查看"
"github-link": "在 GitHub 上查看",
"translate": "翻译"
},
"lang_si": {
"search": "සොයන්න",
@ -221,6 +228,7 @@
"load": "පූරනය කරන්න",
"apply": "යොදන්න",
"save-as": "...ලෙස සුරකින්න",
"github-link": "ගිට්හබ් හි බලන්න"
"github-link": "ගිට්හබ් හි බලන්න",
"translate": "පරිවර්තනය කරන්න"
}
}

View File

@ -15,6 +15,12 @@
</head>
<body>
{{ search_header|safe }}
{% if is_translation %}
<iframe
id="lingva-iframe"
src="{{ lingva_url }}/auto/{{ translate_to }}/{{ translate_str }}">
</iframe>
{% endif %}
{{ response|safe }}
</body>
<footer>

View File

@ -37,6 +37,7 @@ services:
#- WHOOGLE_ALT_YT=invidious.snopyta.org
#- WHOOGLE_ALT_IG=bibliogram.art/u
#- WHOOGLE_ALT_RD=libredd.it
#- WHOOGLE_ALT_TL=lingva.ml
#env_file: # Alternatively, load variables from whoogle.env
#- whoogle.env
ports:

View File

@ -35,8 +35,9 @@ def test_get_results(client):
# Depending on the search, there can be more
# than 10 result divs
assert len(get_search_results(rv.data)) >= 10
assert len(get_search_results(rv.data)) <= 15
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_post_results(client):
@ -45,8 +46,19 @@ def test_post_results(client):
# Depending on the search, there can be more
# than 10 result divs
assert len(get_search_results(rv.data)) >= 10
assert len(get_search_results(rv.data)) <= 15
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_translate_search(client):
rv = client.post('/search', data=dict(q='translate hola'))
assert rv._status_code == 200
# Pretty weak test, but better than nothing
str_data = str(rv.data)
assert 'iframe' in str_data
assert 'lingva.ml/auto/en/ hola' in str_data
def test_block_results(client):

View File

@ -8,6 +8,7 @@
#WHOOGLE_ALT_YT=invidious.snopyta.org
#WHOOGLE_ALT_IG=bibliogram.art/u
#WHOOGLE_ALT_RD=libredd.it
#WHOOGLE_ALT_TL=lingva.ml
#WHOOGLE_USER=""
#WHOOGLE_PASS=""
#WHOOGLE_PROXY_USER=""