2015-02-11 16:16:52 +00:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
import mock
|
|
|
|
|
import lxml
|
|
|
|
|
from searx.engines import google
|
|
|
|
|
from searx.testing import SearxTestCase
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGoogleEngine(SearxTestCase):
|
|
|
|
|
|
2015-05-30 15:41:40 +00:00
|
|
|
|
def mock_response(self, text):
|
2015-12-22 19:05:42 +00:00
|
|
|
|
response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
|
2015-05-30 15:41:40 +00:00
|
|
|
|
response.search_params = mock.Mock()
|
|
|
|
|
response.search_params.get = mock.Mock(return_value='www.google.com')
|
|
|
|
|
return response
|
|
|
|
|
|
2015-02-11 16:16:52 +00:00
|
|
|
|
def test_request(self):
|
2018-11-26 05:32:48 +00:00
|
|
|
|
google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
|
|
|
|
|
google.language_aliases = {'he': 'iw'}
|
2018-03-01 04:30:48 +00:00
|
|
|
|
|
2015-02-11 16:16:52 +00:00
|
|
|
|
query = 'test_query'
|
|
|
|
|
dicto = defaultdict(dict)
|
|
|
|
|
dicto['pageno'] = 1
|
2016-10-30 02:04:01 +00:00
|
|
|
|
dicto['language'] = 'fr-FR'
|
2016-07-25 22:22:05 +00:00
|
|
|
|
dicto['time_range'] = ''
|
2015-02-11 16:16:52 +00:00
|
|
|
|
params = google.request(query, dicto)
|
|
|
|
|
self.assertIn('url', params)
|
|
|
|
|
self.assertIn(query, params['url'])
|
2015-05-30 15:41:40 +00:00
|
|
|
|
self.assertIn('google.fr', params['url'])
|
2018-04-19 03:55:37 +00:00
|
|
|
|
self.assertIn('fr', params['url'])
|
2015-02-11 16:16:52 +00:00
|
|
|
|
self.assertIn('fr', params['headers']['Accept-Language'])
|
|
|
|
|
|
2017-07-20 20:47:20 +00:00
|
|
|
|
dicto['language'] = 'en-US'
|
2015-02-11 16:16:52 +00:00
|
|
|
|
params = google.request(query, dicto)
|
2018-04-19 03:55:37 +00:00
|
|
|
|
self.assertIn('google.com', params['url'])
|
|
|
|
|
self.assertIn('en', params['url'])
|
2015-02-11 16:16:52 +00:00
|
|
|
|
self.assertIn('en', params['headers']['Accept-Language'])
|
|
|
|
|
|
2018-03-01 04:30:48 +00:00
|
|
|
|
dicto['language'] = 'zh'
|
|
|
|
|
params = google.request(query, dicto)
|
|
|
|
|
self.assertIn('google.com', params['url'])
|
2018-04-19 03:55:37 +00:00
|
|
|
|
self.assertIn('zh-CN', params['url'])
|
2018-03-01 04:30:48 +00:00
|
|
|
|
self.assertIn('zh-CN', params['headers']['Accept-Language'])
|
|
|
|
|
|
2018-11-26 05:32:48 +00:00
|
|
|
|
dicto['language'] = 'he'
|
|
|
|
|
params = google.request(query, dicto)
|
|
|
|
|
self.assertIn('google.com', params['url'])
|
|
|
|
|
self.assertIn('iw', params['url'])
|
|
|
|
|
self.assertIn('iw', params['headers']['Accept-Language'])
|
|
|
|
|
|
2015-02-11 16:16:52 +00:00
|
|
|
|
def test_response(self):
|
|
|
|
|
self.assertRaises(AttributeError, google.response, None)
|
|
|
|
|
self.assertRaises(AttributeError, google.response, [])
|
|
|
|
|
self.assertRaises(AttributeError, google.response, '')
|
|
|
|
|
self.assertRaises(AttributeError, google.response, '[]')
|
|
|
|
|
|
2015-05-30 15:41:40 +00:00
|
|
|
|
response = self.mock_response('<html></html>')
|
2015-02-11 16:16:52 +00:00
|
|
|
|
self.assertEqual(google.response(response), [])
|
|
|
|
|
|
|
|
|
|
html = """
|
2016-01-10 17:49:50 +00:00
|
|
|
|
<div class="g">
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<h3 class="r">
|
|
|
|
|
<a href="http://this.should.be.the.link/">
|
|
|
|
|
<b>This</b> is <b>the</b> title
|
|
|
|
|
</a>
|
|
|
|
|
</h3>
|
|
|
|
|
<div class="s">
|
|
|
|
|
<div class="kv" style="margin-bottom:2px">
|
|
|
|
|
<cite>
|
|
|
|
|
<b>test</b>.psychologies.com/
|
|
|
|
|
</cite>
|
|
|
|
|
<div class="_nBb">
|
|
|
|
|
<div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
|
|
|
|
|
aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
|
|
|
|
|
<span class="_O0">
|
|
|
|
|
</span>
|
|
|
|
|
</div>
|
|
|
|
|
<div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
|
|
|
|
|
<ul>
|
|
|
|
|
<li class="_Ykb">
|
|
|
|
|
<a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
|
|
|
|
|
.com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
|
|
|
|
|
En cache
|
|
|
|
|
</a>
|
|
|
|
|
</li>
|
|
|
|
|
<li class="_Ykb">
|
|
|
|
|
<a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/">
|
|
|
|
|
Pages similaires
|
|
|
|
|
</a>
|
|
|
|
|
</li>
|
|
|
|
|
</ul>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
|
|
|
|
<span class="st">
|
|
|
|
|
This should be the content.
|
|
|
|
|
</span>
|
|
|
|
|
<br>
|
|
|
|
|
<div class="osl">
|
|
|
|
|
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
|
|
|
|
|
Test Personnalité
|
|
|
|
|
</a> -
|
|
|
|
|
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
|
|
|
|
|
Tests - Moi
|
|
|
|
|
</a> -
|
|
|
|
|
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
|
|
|
|
|
Test Couple
|
|
|
|
|
</a>
|
|
|
|
|
-
|
|
|
|
|
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
|
|
|
|
|
Test Amour
|
|
|
|
|
</a>
|
|
|
|
|
</div>
|
|
|
|
|
</div>
|
2016-01-10 17:49:50 +00:00
|
|
|
|
</div>
|
|
|
|
|
<div class="g">
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<h3 class="r">
|
|
|
|
|
<a href="http://www.google.com/images?q=toto">
|
|
|
|
|
<b>This</b>
|
|
|
|
|
</a>
|
|
|
|
|
</h3>
|
2016-01-10 17:49:50 +00:00
|
|
|
|
</div>
|
|
|
|
|
<div class="g">
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<h3 class="r">
|
|
|
|
|
<a href="http://www.google.com/search?q=toto">
|
|
|
|
|
<b>This</b> is
|
|
|
|
|
</a>
|
|
|
|
|
</h3>
|
2016-01-10 17:49:50 +00:00
|
|
|
|
</div>
|
|
|
|
|
<div class="g">
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<h3 class="r">
|
|
|
|
|
<a href="€">
|
|
|
|
|
<b>This</b> is <b>the</b>
|
|
|
|
|
</a>
|
|
|
|
|
</h3>
|
2016-01-10 17:49:50 +00:00
|
|
|
|
</div>
|
|
|
|
|
<div class="g">
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<h3 class="r">
|
|
|
|
|
<a href="/url?q=url">
|
|
|
|
|
<b>This</b> is <b>the</b>
|
|
|
|
|
</a>
|
|
|
|
|
</h3>
|
2016-01-10 17:49:50 +00:00
|
|
|
|
</div>
|
2015-02-11 16:16:52 +00:00
|
|
|
|
<p class="_Bmc" style="margin:3px 8px">
|
|
|
|
|
<a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved=">
|
|
|
|
|
suggestion <b>title</b>
|
|
|
|
|
</a>
|
|
|
|
|
</p>
|
|
|
|
|
"""
|
2015-05-30 15:41:40 +00:00
|
|
|
|
response = self.mock_response(html)
|
2015-02-11 16:16:52 +00:00
|
|
|
|
results = google.response(response)
|
|
|
|
|
self.assertEqual(type(results), list)
|
|
|
|
|
self.assertEqual(len(results), 2)
|
|
|
|
|
self.assertEqual(results[0]['title'], 'This is the title')
|
|
|
|
|
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
|
|
|
|
|
self.assertEqual(results[0]['content'], 'This should be the content.')
|
|
|
|
|
self.assertEqual(results[1]['suggestion'], 'suggestion title')
|
|
|
|
|
|
|
|
|
|
html = """
|
|
|
|
|
<li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
|
|
|
|
|
</li>
|
|
|
|
|
"""
|
2015-05-30 15:41:40 +00:00
|
|
|
|
response = self.mock_response(html)
|
2015-02-11 16:16:52 +00:00
|
|
|
|
results = google.response(response)
|
|
|
|
|
self.assertEqual(type(results), list)
|
|
|
|
|
self.assertEqual(len(results), 0)
|
|
|
|
|
|
2015-05-30 15:41:40 +00:00
|
|
|
|
response = mock.Mock(text='<html></html>', url='https://sorry.google.com')
|
|
|
|
|
response.search_params = mock.Mock()
|
|
|
|
|
response.search_params.get = mock.Mock(return_value='www.google.com')
|
|
|
|
|
self.assertRaises(RuntimeWarning, google.response, response)
|
|
|
|
|
|
|
|
|
|
response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')
|
|
|
|
|
response.search_params = mock.Mock()
|
|
|
|
|
response.search_params.get = mock.Mock(return_value='www.google.com')
|
|
|
|
|
self.assertRaises(RuntimeWarning, google.response, response)
|
|
|
|
|
|
2015-02-11 16:16:52 +00:00
|
|
|
|
def test_parse_images(self):
|
|
|
|
|
html = """
|
|
|
|
|
<li>
|
|
|
|
|
<div>
|
|
|
|
|
<a href="http://www.google.com/url?q=http://this.is.the.url/">
|
|
|
|
|
<img style="margin:3px 0;margin-right:6px;padding:0" height="90"
|
|
|
|
|
src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
|
|
|
|
|
</a>
|
|
|
|
|
</div>
|
|
|
|
|
</li>
|
|
|
|
|
"""
|
|
|
|
|
dom = lxml.html.fromstring(html)
|
2015-05-30 15:41:40 +00:00
|
|
|
|
results = google.parse_images(dom, 'www.google.com')
|
2015-02-11 16:16:52 +00:00
|
|
|
|
self.assertEqual(type(results), list)
|
|
|
|
|
self.assertEqual(len(results), 1)
|
|
|
|
|
self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
|
|
|
|
|
self.assertEqual(results[0]['title'], '')
|
|
|
|
|
self.assertEqual(results[0]['content'], '')
|
|
|
|
|
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
|
2016-12-15 06:34:43 +00:00
|
|
|
|
|
|
|
|
|
def test_fetch_supported_languages(self):
|
|
|
|
|
html = """<html></html>"""
|
|
|
|
|
response = mock.Mock(text=html)
|
|
|
|
|
languages = google._fetch_supported_languages(response)
|
|
|
|
|
self.assertEqual(type(languages), dict)
|
|
|
|
|
self.assertEqual(len(languages), 0)
|
|
|
|
|
|
|
|
|
|
html = u"""
|
|
|
|
|
<html>
|
|
|
|
|
<body>
|
|
|
|
|
<table>
|
|
|
|
|
<tbody>
|
|
|
|
|
<tr>
|
|
|
|
|
<td>
|
|
|
|
|
<font>
|
|
|
|
|
<label>
|
|
|
|
|
<span id="ten">English</span>
|
|
|
|
|
</label>
|
|
|
|
|
</font>
|
|
|
|
|
</td>
|
|
|
|
|
<td>
|
|
|
|
|
<font>
|
|
|
|
|
<label>
|
|
|
|
|
<span id="tzh-CN">中文 (简体)</span>
|
|
|
|
|
</label>
|
|
|
|
|
<label>
|
|
|
|
|
<span id="tzh-TW">中文 (繁體)</span>
|
|
|
|
|
</label>
|
|
|
|
|
</font>
|
|
|
|
|
</td>
|
|
|
|
|
</tr>
|
|
|
|
|
</tbody>
|
|
|
|
|
</table>
|
|
|
|
|
</body>
|
|
|
|
|
</html>
|
|
|
|
|
"""
|
|
|
|
|
response = mock.Mock(text=html)
|
|
|
|
|
languages = google._fetch_supported_languages(response)
|
|
|
|
|
self.assertEqual(type(languages), dict)
|
|
|
|
|
self.assertEqual(len(languages), 3)
|
|
|
|
|
|
|
|
|
|
self.assertIn('en', languages)
|
|
|
|
|
self.assertIn('zh-CN', languages)
|
|
|
|
|
self.assertIn('zh-TW', languages)
|
|
|
|
|
|
|
|
|
|
self.assertEquals(type(languages['en']), dict)
|
|
|
|
|
self.assertEquals(type(languages['zh-CN']), dict)
|
|
|
|
|
self.assertEquals(type(languages['zh-TW']), dict)
|
|
|
|
|
|
|
|
|
|
self.assertIn('name', languages['en'])
|
|
|
|
|
self.assertIn('name', languages['zh-CN'])
|
|
|
|
|
self.assertIn('name', languages['zh-TW'])
|
|
|
|
|
|
|
|
|
|
self.assertEquals(languages['en']['name'], 'English')
|
|
|
|
|
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
|
|
|
|
|
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')
|