From 442772a6c5011f5dfd1e9d651d935b179cd14568 Mon Sep 17 00:00:00 2001 From: dimqua Date: Mon, 3 Dec 2018 00:39:04 +0300 Subject: [PATCH] remove findx engine (#1452) --- searx/engines/findx.py | 115 ----------------------------------------- searx/settings.yml | 18 ------- 2 files changed, 133 deletions(-) delete mode 100644 searx/engines/findx.py diff --git a/searx/engines/findx.py b/searx/engines/findx.py deleted file mode 100644 index 87c9d503c..000000000 --- a/searx/engines/findx.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -FindX (General, Images, Videos) - -@website https://www.findx.com -@provide-api no -@using-api no -@results HTML -@stable no -@parse url, title, content, embedded, img_src, thumbnail_src -""" - -from dateutil import parser -from json import loads -import re - -from lxml import html - -from searx import logger -from searx.engines.xpath import extract_text -from searx.engines.youtube_noapi import base_youtube_url, embedded_url -from searx.url_utils import urlencode - - -paging = True -results_xpath = '//script[@id="initial-state"]' -search_url = 'https://www.findx.com/{category}?{q}' -type_map = { - 'none': 'web', - 'general': 'web', - 'images': 'images', - 'videos': 'videos', -} - - -def request(query, params): - params['url'] = search_url.format( - category=type_map[params['category']], - q=urlencode({ - 'q': query, - 'page': params['pageno'] - }) - ) - return params - - -def response(resp): - dom = html.fromstring(resp.text) - results_raw_json = dom.xpath(results_xpath) - results_json = loads(extract_text(results_raw_json)) - - if len(results_json['web']['results']) > 0: - return _general_results(results_json['web']['results']['webSearch']['results']) - - if len(results_json['images']['results']) > 0: - return _images_results(results_json['images']['results']) - - if len(results_json['video']['results']) > 0: - return _videos_results(results_json['video']['results']) - - return [] - - -def _general_results(general_results): - results = [] - for result in general_results: - results.append({ - 'url': result['url'], - 'title': result['title'], - 'content': result['sum'], - }) - return results - - -def _images_results(image_results): - results = [] - for result in image_results: - results.append({ - 'url': result['sourceURL'], - 'title': result['title'], - 'content': result['source'], - 'thumbnail_src': _extract_url(result['assets']['thumb']['url']), - 'img_src': _extract_url(result['assets']['file']['url']), - 'template': 'images.html', - }) - return results - - -def _videos_results(video_results): - results = [] - for result in video_results: - if not result['kind'].startswith('youtube'): - logger.warn('Unknown video kind in findx: {}'.format(result['kind'])) - continue - - description = result['snippet']['description'] - if len(description) > 300: - description = description[:300] + '...' - - results.append({ - 'url': base_youtube_url + result['id'], - 'title': result['snippet']['title'], - 'content': description, - 'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']), - 'publishedDate': parser.parse(result['snippet']['publishedAt']), - 'embedded': embedded_url.format(videoid=result['id']), - 'template': 'videos.html', - }) - return results - - -def _extract_url(url): - matching = re.search('(/https?://[^)]+)', url) - if matching: - return matching.group(0)[1:] - return '' diff --git a/searx/settings.yml b/searx/settings.yml index dda054e21..c223e5915 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -218,24 +218,6 @@ engines: shortcut : fd disabled : True - - name : findx - engine : findx - shortcut : fx - categories : general - disabled : True - - - name : findx images - engine : findx - shortcut : fxi - categories : images - disabled : True - - - name : findx videos - engine : findx - shortcut : fxv - categories : videos - disabled : True - - name : flickr categories : images shortcut : fl