tests for _fetch_supported_languages in engines

and refactor method to make it testable without making requests
dependabot/pip/master/sphinx-6.1.3
marc 8 years ago
parent e0c270bd72
commit af35eee10b

File diff suppressed because one or more lines are too long

Binary file not shown.

@ -21,6 +21,7 @@ import sys
from flask_babel import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@ -79,9 +80,6 @@ def load_engine(engine_data):
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# checking required variables
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
@ -91,6 +89,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = {
'result_count': 0,
'search_count': 0,

@ -15,7 +15,6 @@
from urllib import urlencode
from lxml import html
from requests import get
from searx.engines.xpath import extract_text
# engine dependent config
@ -86,10 +85,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')

@ -19,7 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']

@ -17,7 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']

@ -80,11 +80,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
response_json = loads(response.text)
response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}

@ -119,11 +119,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
response = get(supported_languages_url)
def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
response_page = response.text
response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]

@ -4,7 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import fetch_supported_languages
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'

@ -14,7 +14,6 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -91,10 +90,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]

@ -12,7 +12,6 @@ import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
from requests import get
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
@ -364,14 +363,13 @@ def attributes_to_html(attributes):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = html.fromstring(response.text)
options = dom.xpath('//select[@name="hl"]/option')
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
for option in options:
code = option.xpath('./@value')[0].split('-')[0]
name = option.text[:-1].title()
code = option.xpath('./@id')[0][1:]
name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages

@ -13,7 +13,7 @@
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import fetch_supported_languages
from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']

@ -13,7 +13,6 @@
from json import loads
from urllib import urlencode, unquote
import re
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -25,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -113,10 +114,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(base_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]

@ -15,7 +15,7 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import fetch_supported_languages
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring

@ -12,7 +12,6 @@
from json import loads
from urllib import urlencode, quote
from requests import get
from lxml.html import fromstring
@ -119,10 +118,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row

@ -14,7 +14,6 @@
from urllib import urlencode
from urlparse import unquote
from lxml import html
from requests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
@ -144,13 +143,12 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:]
code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages

@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, fetch_supported_languages
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser

@ -3,36 +3,27 @@
# this file is generated automatically by utils/update_search_languages.py
language_codes = (
(u"ach", u"Acoli", u"", u""),
(u"af", u"Afrikaans", u"", u""),
(u"ak", u"Akan", u"", u""),
(u"am", u"አማርኛ", u"", u""),
(u"am", u"አማርኛ", u"", u"Amharic"),
(u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
(u"ban", u"Balinese", u"", u""),
(u"be", u"Беларуская", u"", u"Belarusian"),
(u"bem", u"Ichibemba", u"", u""),
(u"bg-BG", u"Български", u"България", u"Bulgarian"),
(u"bn", u"বাংলা", u"", u""),
(u"br", u"Brezhoneg", u"", u""),
(u"bs", u"Bosanski", u"", u""),
(u"bn", u"বাংলা", u"", u"Bengali"),
(u"br", u"Brezhoneg", u"", u"Breton"),
(u"bs", u"Bosnian", u"", u"Bosnian"),
(u"ca", u"Català", u"", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"ce", u"Нохчийн", u"", u"Chechen"),
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
(u"chr", u"ᏣᎳᎩ", u"", u""),
(u"ckb", u"Central Kurdish", u"", u""),
(u"co", u"Corsican", u"", u""),
(u"crs", u"Seychellois Creole", u"", u""),
(u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
(u"cy", u"Cymraeg", u"", u""),
(u"cy", u"Cymraeg", u"", u"Welsh"),
(u"da-DK", u"Dansk", u"Danmark", u"Danish"),
(u"de", u"Deutsch", u"", u"German"),
(u"de-AT", u"Deutsch", u"Österreich", u"German"),
(u"de-CH", u"Deutsch", u"Schweiz", u"German"),
(u"de-DE", u"Deutsch", u"Deutschland", u"German"),
(u"ee", u"Eʋegbe", u"", u""),
(u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
(u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"),
@ -60,30 +51,20 @@ language_codes = (
(u"eu", u"Euskara", u"", u"Basque"),
(u"fa", u"فارسی", u"", u"Persian"),
(u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
(u"fo", u"Føroyskt", u"", u""),
(u"fr", u"Français", u"", u"French"),
(u"fr-BE", u"Français", u"Belgique", u"French"),
(u"fr-CA", u"Français", u"Canada", u"French"),
(u"fr-CH", u"Français", u"Suisse", u"French"),
(u"fr-FR", u"Français", u"France", u"French"),
(u"fy", u"West-Frysk", u"", u""),
(u"ga", u"Gaeilge", u"", u""),
(u"gaa", u"Ga", u"", u""),
(u"gd", u"Gàidhlig", u"", u""),
(u"ga", u"Gaeilge", u"", u"Irish"),
(u"gl", u"Galego", u"", u"Galician"),
(u"gn", u"Guarani", u"", u""),
(u"gu", u"ગુજરાતી", u"", u""),
(u"ha", u"Hausa", u"", u""),
(u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
(u"gu", u"ગુજરાતી", u"", u"Gujarati"),
(u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
(u"hi", u"हिन्दी", u"", u"Hindi"),
(u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
(u"ht", u"Haitian Creole", u"", u""),
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
(u"hy", u"Հայերեն", u"", u"Armenian"),
(u"ia", u"Interlingua", u"", u""),
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
(u"ig", u"Igbo", u"", u""),
(u"is", u"Íslenska", u"", u""),
(u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
@ -91,86 +72,48 @@ language_codes = (
(u"iw", u"עברית", u"", u""),
(u"ja-JP", u"日本語", u"日本", u"Japanese"),
(u"ka", u"ქართული", u"", u"Georgian"),
(u"kg", u"Kongo", u"", u""),
(u"kk", u"Қазақша", u"", u"Kazakh"),
(u"km", u"ខ្មែរ", u"", u""),
(u"kn", u"ಕನ್ನಡ", u"", u""),
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"ko-KR", u"한국어", u"대한민국", u"Korean"),
(u"kri", u"Krio", u"", u""),
(u"ky", u"Кыргызча", u"", u""),
(u"la", u"Latina", u"", u"Latin"),
(u"lg", u"Luganda", u"", u""),
(u"ln", u"Lingála", u"", u""),
(u"lo", u"ລາວ", u"", u""),
(u"loz", u"Lozi", u"", u""),
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
(u"lua", u"Luba-Lulua", u"", u""),
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
(u"mfe", u"Kreol Morisien", u"", u""),
(u"mg", u"Malagasy", u"", u""),
(u"mi", u"Maori", u"", u""),
(u"mi", u"Reo Māori", u"", u"Maori"),
(u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"mk", u"Македонски", u"", u""),
(u"ml", u"മലയാളം", u"", u""),
(u"mn", u"Монгол", u"", u""),
(u"mr", u"मराठी", u"", u""),
(u"mk", u"Македонски", u"", u"Macedonian"),
(u"mn", u"Монгол", u"", u"Mongolian"),
(u"mr", u"मराठी", u"", u"Marathi"),
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
(u"mt", u"Malti", u"", u""),
(u"my", u"ဗမာ", u"", u""),
(u"mt", u"Malti", u"", u"Maltese"),
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
(u"ne", u"नेपाली", u"", u""),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian"),
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
(u"nso", u"Northern Sotho", u"", u""),
(u"ny", u"Nyanja", u"", u""),
(u"nyn", u"Runyankore", u"", u""),
(u"oc", u"Occitan", u"", u""),
(u"om", u"Oromoo", u"", u""),
(u"or", u"ଓଡ଼ିଆ", u"", u""),
(u"pa", u"ਪੰਜਾਬੀ", u"", u""),
(u"pcm", u"Nigerian Pidgin", u"", u""),
(u"oc", u"Occitan", u"", u"Occitan"),
(u"or", u"Oriya", u"", u"Oriya"),
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
(u"ps", u"پښتو", u"", u""),
(u"ps", u"Pushto", u"", u"Pushto"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"qu", u"Runasimi", u"", u""),
(u"rm", u"Rumantsch", u"", u""),
(u"rn", u"Ikirundi", u"", u""),
(u"ro-RO", u"Română", u"România", u"Romanian"),
(u"ru-RU", u"Русский", u"Россия", u"Russian"),
(u"rw", u"Kinyarwanda", u"", u""),
(u"sd", u"Sindhi", u"", u""),
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"si", u"සිංහල", u"", u""),
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sn", u"Chishona", u"", u""),
(u"so", u"Soomaali", u"", u""),
(u"sq", u"Shqip", u"", u""),
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"st", u"Southern Sotho", u"", u""),
(u"su", u"Sundanese", u"", u""),
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
(u"sw", u"Kiswahili", u"", u""),
(u"ta", u"தமிழ்", u"", u""),
(u"te", u"తెలుగు", u"", u""),
(u"tg", u"Tajik", u"", u""),
(u"ta", u"தமிழ்", u"", u"Tamil"),
(u"th-TH", u"ไทย", u"ไทย", u"Thai"),
(u"ti", u"ትግርኛ", u"", u""),
(u"tk", u"Turkmen", u"", u""),
(u"ti", u"ትግርኛ", u"", u"Tigrinya"),
(u"tl-PH", u"Filipino", u"Pilipinas", u""),
(u"tlh", u"Klingon", u"", u""),
(u"tn", u"Tswana", u"", u""),
(u"to", u"Lea Fakatonga", u"", u""),
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
(u"tt", u"Tatar", u"", u""),
(u"tum", u"Tumbuka", u"", u""),
(u"tw", u"Twi", u"", u""),
(u"ug", u"ئۇيغۇرچە", u"", u""),
(u"tt", u"Татарча", u"", u"Tatar"),
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
(u"ur", u"اردو", u"", u"Urdu"),
(u"uz", u"Ozbek", u"", u"Uzbek"),
@ -179,13 +122,10 @@ language_codes = (
(u"vo", u"Volapük", u"", u"Volapük"),
(u"wa", u"Walon", u"", u"Walloon"),
(u"war", u"Winaray", u"", u"Waray-Waray"),
(u"wo", u"Wolof", u"", u""),
(u"xh", u"Xhosa", u"", u""),
(u"yi", u"ייִדיש", u"", u""),
(u"yo", u"Èdè Yorùbá", u"", u""),
(u"xh", u"Xhosa", u"", u"Xhosa"),
(u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u""),
(u"zh-HK", u"中文", u"香港", u"Chinese"),
(u"zh-TW", u"中文", u"台湾", u"Chinese"),
(u"zu", u"Isizulu", u"", u"")
(u"zh-TW", u"中文", u"台湾", u""),
(u"zu", u"Isi-Zulu", u"", u"Zulu")
)

@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = bing._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<form>
<div id="limit-languages">
<div>
<div><input id="es" value="es"></input></div>
</div>
<div>
<div><input id="pt_BR" value="pt_BR"></input></div>
<div><input id="pt_PT" value="pt_PT"></input></div>
</div>
</div>
</form>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = bing._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('es', languages)
self.assertIn('pt-BR', languages)
self.assertIn('pt-PT', languages)

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import dailymotion
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
json = r"""
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
"localized_name":"Afrikaans","display_name":"Afrikaans"},
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
"localized_name":"Arabic","display_name":"Arabic"},
{"code":"la","name":"Latin","native_name":null,
"localized_name":"Latin","display_name":"Latin"}
]}
"""
response = mock.Mock(text=json)
languages = dailymotion._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('af', languages)
self.assertIn('ar', languages)
self.assertIn('la', languages)
self.assertEqual(type(languages['af']), dict)
self.assertEqual(type(languages['ar']), dict)
self.assertEqual(type(languages['la']), dict)
self.assertIn('name', languages['af'])
self.assertIn('name', languages['ar'])
self.assertNotIn('name', languages['la'])
self.assertIn('english_name', languages['af'])
self.assertIn('english_name', languages['ar'])
self.assertIn('english_name', languages['la'])
self.assertEqual(languages['af']['name'], 'Afrikaans')
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
self.assertEqual(languages['ar']['name'], u'العربية')
self.assertEqual(languages['ar']['english_name'], 'Arabic')
self.assertEqual(languages['la']['english_name'], 'Latin')

@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
js = """some code...regions:{
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
languages = duckduckgo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
self.assertIn('en-AU', languages)
self.assertIn('de-AT', languages)
self.assertIn('fr-BE', languages)

@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = gigablast._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<span id="menu2">
<a href="/search?&rxikd=1&qlang=xx"></a>
<a href="/search?&rxikd=1&qlang=en"></a>
<a href="/search?&rxikd=1&qlang=fr"></a>
</span>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = gigablast._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 2)
self.assertIn('en', languages)
self.assertIn('fr', languages)

@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('en', languages)
self.assertIn('zh-CN', languages)
self.assertIn('zh-TW', languages)
self.assertEquals(type(languages['en']), dict)
self.assertEquals(type(languages['zh-CN']), dict)
self.assertEquals(type(languages['zh-TW']), dict)
self.assertIn('name', languages['en'])
self.assertIn('name', languages['zh-CN'])
self.assertIn('name', languages['zh-TW'])
self.assertEquals(languages['en']['name'], 'English')
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 0)
html = """
<html>
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
</ul>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('de-CH', languages)
self.assertIn('fr-CH', languages)

@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content'])
def test_fetch_supported_languages(self):
html = u"""<html></html>"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<div>
<div>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Swedish</a></td>
<td><a>Svenska</a></td>
<td><a>sv</a></td>
<td><a><b>3000000</b></a></td>
</tr>
<tr>
<td>3</td>
<td><a>Cebuano</a></td>
<td><a>Sinugboanong Binisaya</a></td>
<td><a>ceb</a></td>
<td><a><b>3000000</b></a></td>
</tr>
</tbody>
</table>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Norwegian (Bokmål)</a></td>
<td><a>Norsk (Bokmål)</a></td>
<td><a>no</a></td>
<td><a><b>100000</b></a></td>
</tr>
</tbody>
</table>
</div>
</div>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('sv', languages)
self.assertIn('ceb', languages)
self.assertIn('no', languages)
self.assertEqual(type(languages['sv']), dict)
self.assertEqual(type(languages['ceb']), dict)
self.assertEqual(type(languages['no']), dict)
self.assertIn('name', languages['sv'])
self.assertIn('english_name', languages['sv'])
self.assertIn('articles', languages['sv'])
self.assertEqual(languages['sv']['name'], 'Svenska')
self.assertEqual(languages['sv']['english_name'], 'Swedish')
self.assertEqual(languages['sv']['articles'], 3000000)
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
self.assertEqual(languages['ceb']['articles'], 3000000)
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
self.assertEqual(languages['no']['articles'], 100000)

@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = yahoo._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<div>
<div id="yschlang">
<span>
<label><input value="lang_ar"></input></label>
</span>
<span>
<label><input value="lang_zh_chs"></input></label>
<label><input value="lang_zh_cht"></input></label>
</span>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = yahoo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
self.assertIn('zh-chs', languages)
self.assertIn('zh-cht', languages)

@ -84,7 +84,7 @@ def fetch_supported_languages():
# write json file
f = io.open(engines_languages_file, "w", encoding="utf-8")
f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
f.close()
@ -110,18 +110,22 @@ def join_language_lists():
else:
languages[locale] = {}
# get locales that have no name yet
# get locales that have no name or country yet
for locale in languages.keys():
if not languages[locale].get('name'):
# try to get language and country names
# try to get language names
name = languages.get(locale.split('-')[0], {}).get('name', None)
if name:
languages[locale]['name'] = name
languages[locale]['country'] = get_country_name(locale) or ''
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
else:
# filter out locales with no name
del languages[locale]
continue
# try to get country name
if locale.find('-') > 0 and not languages[locale].get('country'):
languages[locale]['country'] = get_country_name(locale) or ''
# Remove countryless language if language is featured in only one country.

Loading…
Cancel
Save