From ba590de7f1506b8b8e281403dc47b6dcdbe3ead1 Mon Sep 17 00:00:00 2001 From: Noemi Vanyi Date: Tue, 19 Jul 2016 09:37:02 +0200 Subject: [PATCH] [fix] deviantart engine xpaths --- searx/engines/deviantart.py | 9 +++-- tests/unit/engines/test_deviantart.py | 47 +++++---------------------- 2 files changed, 12 insertions(+), 44 deletions(-) diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index c7816b9bc..70761370c 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -50,11 +50,10 @@ def response(resp): regex = re.compile(r'\/200H\/') # parse results - for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): - link = result.xpath('.//a[contains(@class, "thumb")]')[0] - url = urljoin(base_url, link.attrib.get('href')) - title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') - title = extract_text(title_links[0]) + for result in dom.xpath('.//span[@class="thumb wide"]'): + link = result.xpath('.//a[@class="torpedo-thumb-link"]')[0] + url = link.attrib.get('href') + title = extract_text(result.xpath('.//span[@class="title"]')) thumbnail_src = link.xpath('.//img')[0].attrib.get('src') img_src = regex.sub('/', thumbnail_src) diff --git a/tests/unit/engines/test_deviantart.py b/tests/unit/engines/test_deviantart.py index 78a391334..c00bb8fab 100644 --- a/tests/unit/engines/test_deviantart.py +++ b/tests/unit/engines/test_deviantart.py @@ -28,44 +28,13 @@ class TestDeviantartEngine(SearxTestCase): self.assertEqual(deviantart.response(response), []) html = """ -
- - - - - - - Test - - - - - - - Title of image - - - - 5 years ago - - in Animation - - - - More Like This - - - +
+ + + + + Title of image
""" response = mock.Mock(text=html) @@ -73,7 +42,7 @@ class TestDeviantartEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'Title of image') - self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url') + self.assertEqual(results[0]['url'], 'https://url.of.image') self.assertNotIn('content', results[0]) self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')