Merge pull request #748 from a01200356/languages

[mod] Allow users to search in most engine supported languages
dependabot/pip/master/sphinx-6.1.3
Adam Tauber 8 years ago committed by GitHub
commit 9743bde25e

@ -43,7 +43,7 @@ generally made searx better:
- Kang-min Liu - Kang-min Liu
- Kirill Isakov - Kirill Isakov
- Guilhem Bonnefille - Guilhem Bonnefille
- Marc Abonce Seguin - Marc Abonce Seguin @a01200356
- @jibe-b - @jibe-b
- Christian Pietsch @pietsch - Christian Pietsch @pietsch
- @Maxqia - @Maxqia
@ -55,7 +55,6 @@ generally made searx better:
- Ammar Najjar @ammarnajjar - Ammar Najjar @ammarnajjar
- @stepshal - @stepshal
- François Revol @mmuman - François Revol @mmuman
- marc @a01200356
- Harry Wood @harry-wood - Harry Wood @harry-wood
- Thomas Renard @threnard - Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_ - Pydo `<https://github.com/pydo>`_

@ -81,17 +81,17 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:] engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc) lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id # check if query starts with language-id
if lang_id.startswith(engine_query): if lang_id.startswith(engine_query):
if len(engine_query) <= 2: if len(engine_query) <= 2:
results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0])) results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else: else:
results.append(':{lang_id}'.format(lang_id=lang_id)) results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name # check if query starts with language name
if lang_name.startswith(engine_query): if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(':{lang_name}'.format(lang_name=lang_name)) results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country # check if query starts with country

File diff suppressed because one or more lines are too long

@ -20,6 +20,8 @@ from os.path import realpath, dirname
import sys import sys
from flask_babel import gettext from flask_babel import gettext
from operator import itemgetter from operator import itemgetter
from json import loads
from requests import get
from searx import settings from searx import settings
from searx import logger from searx import logger
from searx.utils import load_module from searx.utils import load_module
@ -33,10 +35,13 @@ engines = {}
categories = {'general': []} categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
engine_shortcuts = {} engine_shortcuts = {}
engine_default_args = {'paging': False, engine_default_args = {'paging': False,
'categories': ['general'], 'categories': ['general'],
'language_support': True, 'language_support': True,
'supported_languages': [],
'safesearch': False, 'safesearch': False,
'timeout': settings['outgoing']['request_timeout'], 'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-', 'shortcut': '-',
@ -85,6 +90,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr)) .format(engine.name, engine_attr))
sys.exit(1) sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = { engine.stats = {
'result_count': 0, 'result_count': 0,
'search_count': 0, 'search_count': 0,

@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale): def locale_to_lang_code(locale):
if locale.find('_') >= 0: if locale.find('-') >= 0:
locale = locale.split('_')[0] locale = locale.split('-')[0]
return locale return locale
@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська', 'uk': 'Українська',
'zh': '简体中文' 'zh': '简体中文'
} }
supported_languages = dict(lang_urls, **main_langs)
# do search-request # do search-request

@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
supported_languages_url = 'https://www.bing.com/account/general'
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
@ -32,7 +33,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all': if params['language'] != 'all':
query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8') query.decode('utf-8')).encode('utf-8')
search_path = search_string.format( search_path = search_string.format(
@ -81,3 +82,15 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')
supported_languages.append(code)
return supported_languages

@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html from lxml import html
from json import loads from json import loads
import re import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en-US' language = 'en-US'
else: else:
language = params['language'].replace('_', '-') language = params['language']
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query=urlencode({'q': query}),

@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser from dateutil import parser
from lxml import etree from lxml import etree
from searx.utils import list_get from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en-US' language = 'en-US'
else: else:
language = params['language'].replace('_', '-') language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range']) params['url'] = _get_url(query, language, offset, params['time_range'])

@ -15,6 +15,7 @@
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from requests import get
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' 'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request # do search-request
def request(query, params): def request(query, params):
@ -74,3 +77,22 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}
name = language['native_name']
if name:
supported_languages[language['code']]['name'] = name
english_name = language['name']
if english_name:
supported_languages[language['code']]['english_name'] = english_name
return supported_languages

@ -15,13 +15,15 @@
from urllib import urlencode from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from requests import get
from json import loads
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.languages import language_codes
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
supported_languages_url = 'https://duckduckgo.com/d2030.js'
time_range_support = True time_range_support = True
# search-url # search-url
@ -46,19 +48,31 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30 offset = (params['pageno'] - 1) * 30
# custom fixes for languages
if params['language'] == 'all': if params['language'] == 'all':
locale = None locale = None
elif params['language'][:2] == 'ja':
locale = 'jp-jp'
elif params['language'][:2] == 'sl':
locale = 'sl-sl'
elif params['language'] == 'zh-TW':
locale = 'tw-tzh'
elif params['language'] == 'zh-HK':
locale = 'hk-tzh'
elif params['language'][-2:] == 'SA':
locale = 'xa-' + params['language'].split('-')[0]
elif params['language'][-2:] == 'GB':
locale = 'uk-' + params['language'].split('-')[0]
else: else:
locale = params['language'].split('_') locale = params['language'].split('-')
if len(locale) == 2: if len(locale) == 2:
# country code goes first # country code goes first
locale = locale[1].lower() + '-' + locale[0].lower() locale = locale[1].lower() + '-' + locale[0].lower()
else: else:
# tries to get a country code from language # tries to get a country code from language
locale = locale[0].lower() locale = locale[0].lower()
lang_codes = [x[0] for x in language_codes] for lc in supported_languages:
for lc in lang_codes: lc = lc.split('-')
lc = lc.split('_')
if locale == lc[0]: if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower() locale = lc[1].lower() + '-' + lc[0].lower()
break break
@ -102,3 +116,17 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]
regions_json = loads(response_page)
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
return supported_languages

@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html from lxml import html
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params): def request(query, params):
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
params['headers']['Accept-Language'] = params['language'] params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params return params

@ -14,6 +14,7 @@ from json import loads
from random import randint from random import randint
from time import time from time import time
from urllib import urlencode from urllib import urlencode
from lxml.html import fromstring
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -40,6 +41,8 @@ url_xpath = './/url'
title_xpath = './/title' title_xpath = './/title'
content_xpath = './/sum' content_xpath = './/sum'
supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
# do search-request # do search-request
def request(query, params): def request(query, params):
@ -48,7 +51,9 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'xx' language = 'xx'
else: else:
language = params['language'][0:2] language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['safesearch'] >= 1: if params['safesearch'] >= 1:
safesearch = 1 safesearch = 1
@ -82,3 +87,16 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]
if code != 'xx' and code not in supported_languages:
supported_languages.append(code)
return supported_languages

@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
maps_path = '/maps' maps_path = '/maps'
redirect_path = '/url' redirect_path = '/url'
images_path = '/images' images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables # specific xpath variables
results_xpath = '//div[@class="g"]' results_xpath = '//div[@class="g"]'
@ -167,8 +168,12 @@ def request(query, params):
language = 'en' language = 'en'
country = 'US' country = 'US'
url_lang = '' url_lang = ''
elif params['language'][:2] == 'jv':
language = 'jw'
country = 'ID'
url_lang = 'lang_jw'
else: else:
language_array = params['language'].lower().split('_') language_array = params['language'].lower().split('-')
if len(language_array) == 2: if len(language_array) == 2:
country = language_array[1] country = language_array[1]
else: else:
@ -355,3 +360,16 @@ def attributes_to_html(attributes):
retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>' retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
retval = retval + '</table>' retval = retval + '</table>'
return retval return retval
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
for option in options:
code = option.xpath('./@id')[0][1:]
name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages

@ -12,6 +12,8 @@
from lxml import html from lxml import html
from urllib import urlencode from urllib import urlencode
from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url # search-url
categories = ['news'] categories = ['news']
@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options)) search_options=urlencode(search_options))
if params['language'] != 'all': if params['language'] != 'all':
language_array = params['language'].lower().split('_') language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0] params['url'] += '&lr=lang_' + language_array[0]
return params return params

@ -46,7 +46,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en' language = 'en'
else: else:
language = params['language'].split('_')[0] language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings): if any(x[1] == 'language' for x in format_strings):

@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages # list of supported languages
allowed_languages = ['de', 'en', 'fr', 'it'] supported_languages = ['de', 'en', 'fr', 'it']
# do search-request # do search-request
@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all': if params['language'] != 'all':
language = params['language'].split('_')[0] language = params['language'].split('_')[0]
if language in allowed_languages: if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent # using searx User-Agent

@ -46,7 +46,7 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
params['url'] += '&locale=' + params['language'].lower() params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params return params

@ -47,7 +47,7 @@ def request(query, params):
# set language if specified # set language if specified
if params['language'] != 'all': if params['language'] != 'all':
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params return params

@ -22,7 +22,7 @@ language = ""
# search-url # search-url
url = 'http://www.subtitleseeker.com/' url = 'http://www.subtitleseeker.com/'
search_url = url + 'search/TITLES/{query}&p={pageno}' search_url = url + 'search/TITLES/{query}?p={pageno}'
# specific xpath variables # specific xpath variables
results_xpath = '//div[@class="boxRows"]' results_xpath = '//div[@class="boxRows"]'
@ -43,10 +43,16 @@ def response(resp):
search_lang = "" search_lang = ""
if resp.search_params['language'] != 'all': # dirty fix for languages named differenly in their site
search_lang = [lc[1] if resp.search_params['language'][:2] == 'fa':
search_lang = 'Farsi'
elif resp.search_params['language'] == 'pt-BR':
search_lang = 'Brazilian'
elif resp.search_params['language'] != 'all':
search_lang = [lc[3]
for lc in language_codes for lc in language_codes
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
search_lang = search_lang[0].split(' (')[0]
# parse results # parse results
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):

@ -13,6 +13,7 @@
from json import loads from json import loads
from urllib import urlencode, unquote from urllib import urlencode, unquote
import re import re
from lxml.html import fromstring
# engine dependent config # engine dependent config
categories = ['general', 'images'] categories = ['general', 'images']
@ -23,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/' base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}' search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex # regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*') regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -35,9 +38,11 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
ui_language = 'browser' ui_language = 'browser'
region = 'browser' region = 'browser'
elif params['language'].split('-')[0] == 'no':
region = 'nb-NO'
else: else:
region = params['language'].replace('_', '-') region = params['language']
ui_language = params['language'].split('_')[0] ui_language = params['language'].split('-')[0]
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'query': query, query=urlencode({'query': query,
@ -106,3 +111,15 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]
supported_languages.append(code)
return supported_languages

@ -40,7 +40,7 @@ def request(query, params):
# set language if specified # set language if specified
if params['language'] != 'all': if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0] params['cookies']['lang'] = params['language'].split('-')[0]
else: else:
params['cookies']['lang'] = 'en' params['cookies']['lang'] = 'en'

@ -14,6 +14,8 @@
from searx import logger from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params): def request(query, params):
language = params['language'].split('_')[0] language = params['language'].split('-')[0]
if language == 'all': if language == 'all':
language = 'en' language = 'en'
@ -70,7 +72,7 @@ def response(resp):
html = fromstring(resp.content) html = fromstring(resp.content)
wikidata_ids = html.xpath(wikidata_ids_xpath) wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('_')[0] language = resp.search_params['language'].split('-')[0]
if language == 'all': if language == 'all':
language = 'en' language = 'en'

@ -12,6 +12,8 @@
from json import loads from json import loads
from urllib import urlencode, quote from urllib import urlencode, quote
from lxml.html import fromstring
# search-url # search-url
base_url = 'https://{language}.wikipedia.org/' base_url = 'https://{language}.wikipedia.org/'
@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
'&explaintext'\ '&explaintext'\
'&pithumbsize=300'\ '&pithumbsize=300'\
'&redirects' '&redirects'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url # set language in base_url
def url_lang(lang): def url_lang(lang):
if lang == 'all': lang = lang.split('-')[0]
if lang == 'all' or lang not in supported_languages:
language = 'en' language = 'en'
else: else:
language = lang.split('_')[0] language = lang
return base_url.format(language=language) return base_url.format(language=language)
@ -111,3 +115,24 @@ def response(resp):
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row
trs = table.xpath('.//tr')[1:]
for tr in trs:
td = tr.xpath('./td')
code = td[3].xpath('./a')[0].text
name = td[2].xpath('./a')[0].text
english_name = td[1].xpath('./a')[0].text
articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
# exclude languages with too few articles
if articles >= 100000:
supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
return supported_languages

@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('_')[0] params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params return params

@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time' search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
supported_languages_url = 'https://search.yahoo.com/web/advanced'
# specific xpath variables # specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href' url_xpath = './/h3/a/@href'
@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params): def _get_language(params):
if params['language'] == 'all': if params['language'] == 'all':
return 'en' return 'en'
return params['language'].split('_')[0] elif params['language'][:2] == 'zh':
if params['language'] == 'zh' or params['language'] == 'zh-CH':
return 'szh'
else:
return 'tzh'
else:
return params['language'].split('-')[0]
# do search-request # do search-request
@ -132,3 +140,15 @@ def response(resp):
# return results # return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages

@ -12,7 +12,7 @@
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta from datetime import datetime, timedelta
import re import re
from dateutil import parser from dateutil import parser

@ -22,7 +22,9 @@ language_support = True # TODO
default_tld = 'com' default_tld = 'com'
language_map = {'ru': 'ru', language_map = {'ru': 'ru',
'ua': 'uk', 'ua': 'ua',
'be': 'by',
'kk': 'kz',
'tr': 'com.tr'} 'tr': 'com.tr'}
# search-url # search-url
@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params): def request(query, params):
lang = params['language'].split('_')[0] lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld) host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1, params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query})) query=urlencode({'text': query}))

@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params return params

@ -1,78 +1,131 @@
''' # -*- coding: utf-8 -*-
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
# list of language codes # list of language codes
# this file is generated automatically by utils/update_search_languages.py
language_codes = ( language_codes = (
("ar_XA", "Arabic", "Arabia"), (u"af", u"Afrikaans", u"", u""),
("bg_BG", "Bulgarian", "Bulgaria"), (u"am", u"አማርኛ", u"", u"Amharic"),
("cs_CZ", "Czech", "Czech Republic"), (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
("da_DK", "Danish", "Denmark"), (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
("de_AT", "German", "Austria"), (u"be", u"Беларуская", u"", u"Belarusian"),
("de_CH", "German", "Switzerland"), (u"bg-BG", u"Български", u"България", u"Bulgarian"),
("de_DE", "German", "Germany"), (u"bn", u"বাংলা", u"", u"Bengali"),
("el_GR", "Greek", "Greece"), (u"br", u"Brezhoneg", u"", u"Breton"),
("en_AU", "English", "Australia"), (u"bs", u"Bosnian", u"", u"Bosnian"),
("en_CA", "English", "Canada"), (u"ca", u"Català", u"", u"Catalan"),
("en_GB", "English", "United Kingdom"), (u"ca-CT", u"Català", u"", u"Catalan"),
("en_ID", "English", "Indonesia"), (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
("en_IE", "English", "Ireland"), (u"ce", u"Нохчийн", u"", u"Chechen"),
("en_IN", "English", "India"), (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
("en_MY", "English", "Malaysia"), (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
("en_NZ", "English", "New Zealand"), (u"cy", u"Cymraeg", u"", u"Welsh"),
("en_PH", "English", "Philippines"), (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
("en_SG", "English", "Singapore"), (u"de", u"Deutsch", u"", u"German"),
("en_US", "English", "United States"), (u"de-AT", u"Deutsch", u"Österreich", u"German"),
("en_XA", "English", "Arabia"), (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
("en_ZA", "English", "South Africa"), (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
("es_AR", "Spanish", "Argentina"), (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
("es_CL", "Spanish", "Chile"), (u"en", u"English", u"", u"English"),
("es_ES", "Spanish", "Spain"), (u"en-AU", u"English", u"Australia", u"English"),
("es_MX", "Spanish", "Mexico"), (u"en-CA", u"English", u"Canada", u"English"),
("es_US", "Spanish", "United States"), (u"en-GB", u"English", u"United Kingdom", u"English"),
("es_XL", "Spanish", "Latin America"), (u"en-ID", u"English", u"Indonesia", u"English"),
("et_EE", "Estonian", "Estonia"), (u"en-IE", u"English", u"Ireland", u"English"),
("fi_FI", "Finnish", "Finland"), (u"en-IN", u"English", u"India", u"English"),
("fr_BE", "French", "Belgium"), (u"en-MY", u"English", u"Malaysia", u"English"),
("fr_CA", "French", "Canada"), (u"en-NZ", u"English", u"New Zealand", u"English"),
("fr_CH", "French", "Switzerland"), (u"en-PH", u"English", u"Philippines", u"English"),
("fr_FR", "French", "France"), (u"en-SG", u"English", u"Singapore", u"English"),
("he_IL", "Hebrew", "Israel"), (u"en-US", u"English", u"United States", u"English"),
("hr_HR", "Croatian", "Croatia"), (u"en-ZA", u"English", u"South Africa", u"English"),
("hu_HU", "Hungarian", "Hungary"), (u"eo", u"Esperanto", u"", u"Esperanto"),
("it_IT", "Italian", "Italy"), (u"es", u"Español", u"", u"Spanish"),
("ja_JP", "Japanese", "Japan"), (u"es-AR", u"Español", u"Argentina", u"Spanish"),
("ko_KR", "Korean", "Korea"), (u"es-CL", u"Español", u"Chile", u"Spanish"),
("lt_LT", "Lithuanian", "Lithuania"), (u"es-CO", u"Español", u"Colombia", u"Spanish"),
("lv_LV", "Latvian", "Latvia"), (u"es-ES", u"Español", u"España", u"Spanish"),
("nb_NO", "Norwegian", "Norway"), (u"es-MX", u"Español", u"México", u"Spanish"),
("nl_BE", "Dutch", "Belgium"), (u"es-PE", u"Español", u"Perú", u"Spanish"),
("nl_NL", "Dutch", "Netherlands"), (u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
("oc_OC", "Occitan", "Occitan"), (u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
("pl_PL", "Polish", "Poland"), (u"eu", u"Euskara", u"", u"Basque"),
("pt_BR", "Portuguese", "Brazil"), (u"fa", u"فارسی", u"", u"Persian"),
("pt_PT", "Portuguese", "Portugal"), (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
("ro_RO", "Romanian", "Romania"), (u"fr", u"Français", u"", u"French"),
("ru_RU", "Russian", "Russia"), (u"fr-BE", u"Français", u"Belgique", u"French"),
("sk_SK", "Slovak", "Slovak Republic"), (u"fr-CA", u"Français", u"Canada", u"French"),
("sl_SL", "Slovenian", "Slovenia"), (u"fr-CH", u"Français", u"Suisse", u"French"),
("sv_SE", "Swedish", "Sweden"), (u"fr-FR", u"Français", u"France", u"French"),
("th_TH", "Thai", "Thailand"), (u"ga", u"Gaeilge", u"", u"Irish"),
("tr_TR", "Turkish", "Turkey"), (u"gl", u"Galego", u"", u"Galician"),
("uk_UA", "Ukrainian", "Ukraine"), (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
("zh_CN", "Chinese", "China"), (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
("zh_HK", "Chinese", "Hong Kong SAR"), (u"hi", u"हिन्दी", u"", u"Hindi"),
("zh_TW", "Chinese", "Taiwan")) (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
(u"hy", u"Հայերեն", u"", u"Armenian"),
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
(u"is", u"Íslenska", u"", u""),
(u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
(u"it-IT", u"Italiano", u"Italia", u"Italian"),
(u"iw", u"עברית", u"", u""),
(u"ja-JP", u"日本語", u"日本", u"Japanese"),
(u"ka", u"ქართული", u"", u"Georgian"),
(u"kk", u"Қазақша", u"", u"Kazakh"),
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"ko-KR", u"한국어", u"대한민국", u"Korean"),
(u"la", u"Latina", u"", u"Latin"),
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
(u"mi", u"Reo Māori", u"", u"Maori"),
(u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"mk", u"Македонски", u"", u"Macedonian"),
(u"mn", u"Монгол", u"", u"Mongolian"),
(u"mr", u"मराठी", u"", u"Marathi"),
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
(u"mt", u"Malti", u"", u"Maltese"),
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian"),
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
(u"oc", u"Occitan", u"", u"Occitan"),
(u"or", u"Oriya", u"", u"Oriya"),
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
(u"ps", u"Pushto", u"", u"Pushto"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"ro-RO", u"Română", u"România", u"Romanian"),
(u"ru-RU", u"Русский", u"Россия", u"Russian"),
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
(u"sw", u"Kiswahili", u"", u""),
(u"ta", u"தமிழ்", u"", u"Tamil"),
(u"th-TH", u"ไทย", u"ไทย", u"Thai"),
(u"ti", u"ትግርኛ", u"", u"Tigrinya"),
(u"tl-PH", u"Filipino", u"Pilipinas", u""),
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
(u"tt", u"Татарча", u"", u"Tatar"),
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
(u"ur", u"اردو", u"", u"Urdu"),
(u"uz", u"Ozbek", u"", u"Uzbek"),
(u"ve", u"Venda", u"", u"Venda"),
(u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
(u"vo", u"Volapük", u"", u"Volapük"),
(u"wa", u"Walon", u"", u"Walloon"),
(u"war", u"Winaray", u"", u"Waray-Waray"),
(u"xh", u"Xhosa", u"", u"Xhosa"),
(u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"),
(u"zh-HK", u"中文", u"香港", u"Chinese"),
(u"zh-TW", u"中文", u"台湾", u"Chinese"),
(u"zu", u"Isi-Zulu", u"", u"Zulu")
)

@ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting):
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
class SearchLanguageSetting(EnumStringSetting):
"""Available choices may change, so user's value may not be in choices anymore"""
def parse(self, data):
if data not in self.choices and data != self.value:
# hack to give some backwards compatibility with old language cookies
data = str(data).replace('_', '-')
lang = data.split('-')[0]
if data in self.choices:
pass
elif lang in self.choices:
data = lang
elif data == 'ar-XA':
data = 'ar-SA'
else:
data = self.value
self.value = data
class MapSetting(Setting): class MapSetting(Setting):
"""Setting of a value that has to be translated in order to be storable""" """Setting of a value that has to be translated in order to be storable"""
@ -216,8 +235,8 @@ class Preferences(object):
super(Preferences, self).__init__() super(Preferences, self).__init__()
self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
'language': EnumStringSetting(settings['search']['language'], 'language': SearchLanguageSetting(settings['search']['language'],
choices=LANGUAGE_CODES), choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'], 'locale': EnumStringSetting(settings['ui']['default_locale'],
choices=settings['locales'].keys() + ['']), choices=settings['locales'].keys() + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'], 'autocomplete': EnumStringSetting(settings['search']['autocomplete'],

@ -71,21 +71,24 @@ class RawTextQuery(object):
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc) lang_id, lang_name, country, english_name = map(unicode.lower, lc)
# if correct language-code is found # if correct language-code is found
# set it as new search-language # set it as new search-language
if lang == lang_id\ if lang == lang_id\
or lang_id.startswith(lang)\ or lang_id.startswith(lang)\
or lang == lang_name\ or lang == lang_name\
or lang == english_name\
or lang.replace('_', ' ') == country: or lang.replace('_', ' ') == country:
parse_next = True parse_next = True
self.languages.append(lang) self.languages.append(lang_id)
break # to ensure best match (first match is not necessarily the best one)
if lang == lang_id:
break
# this force a engine or category # this force a engine or category
if query_part[0] == '!' or query_part[0] == '?': if query_part[0] == '!' or query_part[0] == '?':
prefix = query_part[1:].replace('_', ' ') prefix = query_part[1:].replace('-', ' ')
# check if prefix is equal with engine shortcut # check if prefix is equal with engine shortcut
if prefix in engine_shortcuts: if prefix in engine_shortcuts:

@ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form):
# set query # set query
query = raw_text_query.getSearchQuery() query = raw_text_query.getSearchQuery()
# get last selected language in query, if possible # set specific language if set on request, query or preferences
# TODO support search with multible languages # TODO support search with multible languages
if len(raw_text_query.languages): if len(raw_text_query.languages):
query_lang = raw_text_query.languages[-1] query_lang = raw_text_query.languages[-1]
elif 'language' in form:
query_lang = form.get('language')
else:
query_lang = preferences.get_value('language')
query_time_range = form.get('time_range') query_time_range = form.get('time_range')

@ -15,5 +15,10 @@ $(document).ready(function() {
$('#search_form').submit(); $('#search_form').submit();
} }
}); });
$('#language').change(function(e) {
if($('#q').val()) {
$('#search_form').submit();
}
});
} }
}); });

@ -13,9 +13,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

@ -14,9 +14,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

@ -6,4 +6,5 @@
<div id="advanced-search-container"> <div id="advanced-search-container">
{% include 'oscar/categories.html' %} {% include 'oscar/categories.html' %}
{% include 'oscar/time-range.html' %} {% include 'oscar/time-range.html' %}
{% include 'oscar/languages.html' %}
</div> </div>

@ -0,0 +1,12 @@
{% if preferences %}
<select class="form-control" name='language'>
{% else %}
<select class="time_range" id='language' name='language'>
{% endif %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
</option>
{% endfor %}
</select>

@ -40,12 +40,7 @@
{% set language_label = _('Search language') %} {% set language_label = _('Search language') %}
{% set language_info = _('What language do you prefer for search?') %} {% set language_info = _('What language do you prefer for search?') %}
{{ preferences_item_header(language_info, language_label, rtl) }} {{ preferences_item_header(language_info, language_label, rtl) }}
<select class="form-control" name='language'> {% include 'oscar/languages.html' %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
{% endfor %}
</select>
{{ preferences_item_footer(language_info, language_label, rtl) }} {{ preferences_item_footer(language_info, language_label, rtl) }}
{% set locale_label = _('Interface language') %} {% set locale_label = _('Interface language') %}
@ -153,6 +148,7 @@
<th>{{ _("Allow") }}</th> <th>{{ _("Allow") }}</th>
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th> <th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
@ -161,6 +157,7 @@
<th>{{ _("Max time") }}</th> <th>{{ _("Max time") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Allow") }}</th> <th>{{ _("Allow") }}</th>
@ -175,6 +172,7 @@
</td> </td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@ -183,6 +181,7 @@
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td class="onoff-checkbox"> <td class="onoff-checkbox">

@ -9,9 +9,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
kwargs['language_codes'] = language_codes
if 'current_language' not in kwargs:
kwargs['current_language'] = request.preferences.get_value('language')
# override url_for function in templates # override url_for function in templates
kwargs['url_for'] = url_for_theme kwargs['url_for'] = url_for_theme
@ -510,6 +514,7 @@ def index():
answers=result_container.answers, answers=result_container.answers,
infoboxes=result_container.infoboxes, infoboxes=result_container.infoboxes,
paging=result_container.paging, paging=result_container.paging,
current_language=search_query.lang,
base_url=get_base_url(), base_url=get_base_url(),
theme=get_current_theme_name(), theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())] favicons=global_favicons[themes.index(get_current_theme_name())]
@ -552,7 +557,7 @@ def autocompleter():
if not language or language == 'all': if not language or language == 'all':
language = 'en' language = 'en'
else: else:
language = language.split('_')[0] language = language.split('-')[0]
# run autocompletion # run autocompletion
raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
@ -615,9 +620,7 @@ def preferences():
return render('preferences.html', return render('preferences.html',
locales=settings['locales'], locales=settings['locales'],
current_locale=get_locale(), current_locale=get_locale(),
current_language=lang,
image_proxy=image_proxy, image_proxy=image_proxy,
language_codes=language_codes,
engines_by_category=categories, engines_by_category=categories,
stats=stats, stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@ -627,7 +630,8 @@ def preferences():
themes=themes, themes=themes,
plugins=plugins, plugins=plugins,
allowed_plugins=allowed_plugins, allowed_plugins=allowed_plugins,
theme=get_current_theme_name()) theme=get_current_theme_name(),
preferences=True)
@app.route('/image_proxy', methods=['GET']) @app.route('/image_proxy', methods=['GET'])

@ -101,11 +101,11 @@ Change search language
Page Should Contain about Page Should Contain about
Page Should Contain preferences Page Should Contain preferences
Go To http://localhost:11111/preferences Go To http://localhost:11111/preferences
List Selection Should Be language Automatic List Selection Should Be language Default language
Select From List language Turkish (Turkey) - tr_TR Select From List language Türkçe (Türkiye) - tr-TR
Submit Preferences Submit Preferences
Go To http://localhost:11111/preferences Go To http://localhost:11111/preferences
List Selection Should Be language Turkish (Turkey) - tr_TR List Selection Should Be language Türkçe (Türkiye) - tr-TR
Change autocomplete Change autocomplete
Page Should Contain about Page Should Contain about

@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title') self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = bing._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<form>
<div id="limit-languages">
<div>
<div><input id="es" value="es"></input></div>
</div>
<div>
<div><input id="pt_BR" value="pt_BR"></input></div>
<div><input id="pt_PT" value="pt_PT"></input></div>
</div>
</div>
</form>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = bing._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('es', languages)
self.assertIn('pt-BR', languages)
self.assertIn('pt-PT', languages)

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from collections import defaultdict from collections import defaultdict
import mock import mock
from searx.engines import dailymotion from searx.engines import dailymotion
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response) results = dailymotion.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
json = r"""
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
"localized_name":"Afrikaans","display_name":"Afrikaans"},
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
"localized_name":"Arabic","display_name":"Arabic"},
{"code":"la","name":"Latin","native_name":null,
"localized_name":"Latin","display_name":"Latin"}
]}
"""
response = mock.Mock(text=json)
languages = dailymotion._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('af', languages)
self.assertIn('ar', languages)
self.assertIn('la', languages)
self.assertEqual(type(languages['af']), dict)
self.assertEqual(type(languages['ar']), dict)
self.assertEqual(type(languages['la']), dict)
self.assertIn('name', languages['af'])
self.assertIn('name', languages['ar'])
self.assertNotIn('name', languages['la'])
self.assertIn('english_name', languages['af'])
self.assertIn('english_name', languages['ar'])
self.assertIn('english_name', languages['la'])
self.assertEqual(languages['af']['name'], 'Afrikaans')
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
self.assertEqual(languages['ar']['name'], u'العربية')
self.assertEqual(languages['ar']['english_name'], 'Arabic')
self.assertEqual(languages['la']['english_name'], 'Latin')

@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'de_CH' dicto['language'] = 'de-CH'
dicto['time_range'] = '' dicto['time_range'] = ''
params = duckduckgo.request(query, dicto) params = duckduckgo.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertIn('duckduckgo.com', params['url']) self.assertIn('duckduckgo.com', params['url'])
self.assertIn('ch-de', params['url']) self.assertIn('ch-de', params['url'])
# when ddg uses non standard code
dicto['language'] = 'en-GB'
params = duckduckgo.request(query, dicto)
self.assertIn('uk-en', params['url'])
# no country given
duckduckgo.supported_languages = ['de-CH', 'en-US']
dicto['language'] = 'de'
params = duckduckgo.request(query, dicto)
self.assertIn('ch-de', params['url'])
def test_no_url_in_request_year_time_range(self): def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict) dicto = defaultdict(dict)
query = 'test_query' query = 'test_query'
@ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
js = """some code...regions:{
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
languages = duckduckgo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
self.assertIn('en-AU', languages)
self.assertIn('de-AT', languages)
self.assertIn('fr-BE', languages)

@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'es'
params = duckduckgo_definitions.request(query, dicto) params = duckduckgo_definitions.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
self.assertIn('duckduckgo.com', params['url']) self.assertIn('duckduckgo.com', params['url'])
self.assertIn('headers', params)
self.assertIn('Accept-Language', params['headers'])
self.assertIn('es', params['headers']['Accept-Language'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, duckduckgo_definitions.response, None) self.assertRaises(AttributeError, duckduckgo_definitions.response, None)

@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase):
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
self.assertTrue('gigablast.com' in params['url']) self.assertTrue('gigablast.com' in params['url'])
self.assertTrue('xx' in params['url'])
dicto['language'] = 'en-US'
params = gigablast.request(query, dicto)
self.assertTrue('en' in params['url'])
self.assertFalse('en-US' in params['url'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, gigablast.response, None) self.assertRaises(AttributeError, gigablast.response, None)
@ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016') self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com') self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.') self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = gigablast._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<span id="menu2">
<a href="/search?&rxikd=1&qlang=xx"></a>
<a href="/search?&rxikd=1&qlang=en"></a>
<a href="/search?&rxikd=1&qlang=fr"></a>
</span>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = gigablast._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 2)
self.assertIn('en', languages)
self.assertIn('fr', languages)

@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
dicto['time_range'] = '' dicto['time_range'] = ''
params = google.request(query, dicto) params = google.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '') self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '') self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('en', languages)
self.assertIn('zh-CN', languages)
self.assertIn('zh-TW', languages)
self.assertEquals(type(languages['en']), dict)
self.assertEquals(type(languages['zh-CN']), dict)
self.assertEquals(type(languages['zh-TW']), dict)
self.assertIn('name', languages['en'])
self.assertIn('name', languages['zh-CN'])
self.assertIn('name', languages['zh-TW'])
self.assertEquals(languages['en']['name'], 'English')
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
qwant.categories = [''] qwant.categories = ['']
params = qwant.request(query, dicto) params = qwant.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)

@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr-FR'
params = subtitleseeker.request(query, dicto) params = subtitleseeker.request(query, dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
@ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
def test_response(self): def test_response(self):
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
response = mock.Mock(search_params=dicto) response = mock.Mock(search_params=dicto)
self.assertRaises(AttributeError, subtitleseeker.response, None) self.assertRaises(AttributeError, subtitleseeker.response, None)
@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase):
self.assertIn('1039 Subs', results[0]['content']) self.assertIn('1039 Subs', results[0]['content'])
self.assertIn('Alternative Title', results[0]['content']) self.assertIn('Alternative Title', results[0]['content'])
dicto['language'] = 'pt-BR'
results = subtitleseeker.response(response)
self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
html = """ html = """
<div class="boxRows"> <div class="boxRows">
<div class="boxRowsInner" style="width:600px;"> <div class="boxRowsInner" style="width:600px;">

@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'de_DE' dicto['language'] = 'de-DE'
params = swisscows.request(query, dicto) params = swisscows.request(query, dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html') self.assertEqual(results[2]['template'], 'images.html')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 0)
html = """
<html>
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
</ul>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('de-CH', languages)
self.assertIn('fr-CH', languages)

@ -8,9 +8,11 @@ from searx.testing import SearxTestCase
class TestWikipediaEngine(SearxTestCase): class TestWikipediaEngine(SearxTestCase):
def test_request(self): def test_request(self):
wikipedia.supported_languages = ['fr', 'en']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
params = wikipedia.request(query, dicto) params = wikipedia.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
@ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase):
params = wikipedia.request(query, dicto) params = wikipedia.request(query, dicto)
self.assertIn('en', params['url']) self.assertIn('en', params['url'])
dicto['language'] = 'xx'
params = wikipedia.request(query, dicto)
self.assertIn('en', params['url'])
def test_response(self): def test_response(self):
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['language'] = 'fr' dicto['language'] = 'fr'
@ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2) self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊') self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content']) self.assertIn(u'披头士乐队...', results[1]['content'])
def test_fetch_supported_languages(self):
html = u"""<html></html>"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<div>
<div>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Swedish</a></td>
<td><a>Svenska</a></td>
<td><a>sv</a></td>
<td><a><b>3000000</b></a></td>
</tr>
<tr>
<td>3</td>
<td><a>Cebuano</a></td>
<td><a>Sinugboanong Binisaya</a></td>
<td><a>ceb</a></td>
<td><a><b>3000000</b></a></td>
</tr>
</tbody>
</table>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Norwegian (Bokmål)</a></td>
<td><a>Norsk (Bokmål)</a></td>
<td><a>no</a></td>
<td><a><b>100000</b></a></td>
</tr>
</tbody>
</table>
</div>
</div>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('sv', languages)
self.assertIn('ceb', languages)
self.assertIn('no', languages)
self.assertEqual(type(languages['sv']), dict)
self.assertEqual(type(languages['ceb']), dict)
self.assertEqual(type(languages['no']), dict)
self.assertIn('name', languages['sv'])
self.assertIn('english_name', languages['sv'])
self.assertIn('articles', languages['sv'])
self.assertEqual(languages['sv']['name'], 'Svenska')
self.assertEqual(languages['sv']['english_name'], 'Swedish')
self.assertEqual(languages['sv']['articles'], 3000000)
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
self.assertEqual(languages['ceb']['articles'], 3000000)
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
self.assertEqual(languages['no']['articles'], 100000)

@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response) results = yahoo.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = yahoo._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<div>
<div id="yschlang">
<span>
<label><input value="lang_ar"></input></label>
</span>
<span>
<label><input value="lang_zh_chs"></input></label>
<label><input value="lang_zh_cht"></input></label>
</span>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = yahoo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
self.assertIn('zh-chs', languages)
self.assertIn('zh-cht', languages)

@ -1,4 +1,4 @@
from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
MultipleChoiceSetting, PluginsSetting, ValidationException) MultipleChoiceSetting, PluginsSetting, ValidationException)
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
@ -88,6 +88,27 @@ class TestSettings(SearxTestCase):
setting.parse('2') setting.parse('2')
self.assertEquals(setting.get_value(), ['2']) self.assertEquals(setting.get_value(), ['2'])
# search language settings
def test_lang_setting_valid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('de')
self.assertEquals(setting.get_value(), 'de')
def test_lang_setting_invalid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('xx')
self.assertEquals(setting.get_value(), 'all')
def test_lang_setting_old_cookie_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_XA')
self.assertEquals(setting.get_value(), 'es')
def test_lang_setting_old_cookie_format(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_ES')
self.assertEquals(setting.get_value(), 'es-ES')
# plugins settings # plugins settings
def test_plugins_setting_all_default_enabled(self): def test_plugins_setting_all_default_enabled(self):
plugin1 = PluginStub('plugin1', True) plugin1 = PluginStub('plugin1', True)

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
# This script generates languages.py from intersecting each engine's supported languages.
#
# The country names are obtained from http://api.geonames.org which requires registering as a user.
#
# Output files (engines_languages.json and languages.py)
# are written in current directory to avoid overwriting in case something goes wrong.
from requests import get
from urllib import urlencode
from lxml.html import fromstring
from json import loads, dumps
import io
from sys import path
path.append('../searx') # noqa
from searx.engines import engines
# Geonames API for country names.
geonames_user = '' # ADD USER NAME HERE
country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
# Output files.
engines_languages_file = 'engines_languages.json'
languages_file = 'languages.py'
engines_languages = {}
languages = {}
# To filter out invalid codes and dialects.
def valid_code(lang_code):
# filter invalid codes
# sl-SL is technically not invalid, but still a mistake
invalid_codes = ['sl-SL', 'wt-WT', 'jw']
invalid_countries = ['UK', 'XA', 'XL']
if lang_code[:2] == 'xx'\
or lang_code in invalid_codes\
or lang_code[-2:] in invalid_countries\
or is_dialect(lang_code):
return False
return True
# Language codes with any additional tags other than language and country.
def is_dialect(lang_code):
lang_code = lang_code.split('-')
if len(lang_code) > 2 or len(lang_code[0]) > 3:
return True
if len(lang_code) == 2 and len(lang_code[1]) > 2:
return True
return False
# Get country name in specified language.
def get_country_name(locale):
if geonames_user is '':
return ''
locale = locale.split('-')
if len(locale) != 2:
return ''
url = country_names_url.format(parameters=urlencode({'lang': locale[0],
'country': locale[1],
'username': geonames_user}))
response = get(url)
json = loads(response.text)
content = json.get('geonames', None)
if content is None or len(content) != 1:
print "No country name found for " + locale[0] + "-" + locale[1]
return ''
return content[0].get('countryName', '')
# Fetchs supported languages for each engine and writes json file with those.
def fetch_supported_languages():
for engine_name in engines:
if hasattr(engines[engine_name], 'fetch_supported_languages'):
try:
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
except Exception as e:
print e
# write json file
with io.open(engines_languages_file, "w", encoding="utf-8") as f:
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
# Join all language lists.
# Iterate all languages supported by each engine.
def join_language_lists():
# include wikipedia first for more accurate language names
languages.update({code: lang for code, lang
in engines_languages['wikipedia'].iteritems()
if valid_code(code)})
for engine_name in engines_languages:
for locale in engines_languages[engine_name]:
if not valid_code(locale):
continue
# if language is not on list or if it has no name yet
if locale not in languages or not languages[locale].get('name'):
if isinstance(engines_languages[engine_name], dict):
languages[locale] = engines_languages[engine_name][locale]
else:
languages[locale] = {}
# get locales that have no name or country yet
for locale in languages.keys():
# try to get language names
if not languages[locale].get('name'):
name = languages.get(locale.split('-')[0], {}).get('name', None)
if name:
languages[locale]['name'] = name
else:
# filter out locales with no name
del languages[locale]
continue
# try to get language name in english
if not languages[locale].get('english_name'):
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
# try to get country name
if locale.find('-') > 0 and not languages[locale].get('country'):
languages[locale]['country'] = get_country_name(locale) or ''
# Remove countryless language if language is featured in only one country.
def filter_single_country_languages():
prev_lang = None
for code in sorted(languages):
lang = code.split('-')[0]
if lang == prev_lang:
countries += 1
else:
if prev_lang is not None and countries == 1:
del languages[prev_lang]
countries = 0
prev_lang = lang
# Write languages.py.
def write_languages_file():
new_file = open(languages_file, 'w')
file_content = '# -*- coding: utf-8 -*-\n'\
+ '# list of language codes\n'\
+ '# this file is generated automatically by utils/update_search_languages.py\n'\
+ '\nlanguage_codes = ('
for code in sorted(languages):
file_content += '\n (u"' + code + '"'\
+ ', u"' + languages[code]['name'].split(' (')[0] + '"'\
+ ', u"' + languages[code].get('country', '') + '"'\
+ ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
# remove last comma
file_content = file_content[:-1]
file_content += '\n)\n'
new_file.write(file_content.encode('utf8'))
new_file.close()
if __name__ == "__main__":
fetch_supported_languages()
join_language_lists()
filter_single_country_languages()
write_languages_file()
Loading…
Cancel
Save