From 696c35d2c33d60eb7b965fe017e08a00a939d4c4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 5 Sep 2023 20:25:13 +0200 Subject: [PATCH] [fix] engine - duckduckgo_images / determination of vqd value incorrect Signed-off-by: Markus Heiser --- searx/engines/duckduckgo.py | 16 ++++++---------- searx/engines/duckduckgo_images.py | 12 ++++-------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 8349ad8e3..edd586f78 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -25,7 +25,6 @@ from searx.utils import ( from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx import redisdb from searx.enginelib.traits import EngineTraits -from searx.exceptions import SearxEngineAPIException if TYPE_CHECKING: import logging @@ -78,7 +77,7 @@ def cache_vqd(query, value): c.set(key, value, ex=600) -def get_vqd(query, headers): +def get_vqd(query): """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the response. @@ -94,13 +93,10 @@ def get_vqd(query, headers): logger.debug("re-use cached vqd value: %s", value) return value - query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query})) - res = get(query_url, headers=headers) - content = res.text # type: ignore - if content.find('vqd=\"') == -1: - raise SearxEngineAPIException('Request failed') - value = content[content.find('vqd=\"') + 5 :] - value = value[: value.find('\'')] + query_url = 'https://lite.duckduckgo.com/lite/?{args}'.format(args=urlencode({'q': query})) + res = get(query_url) + doc = lxml.html.fromstring(res.text) + value = doc.xpath("//input[@name='vqd']/@value")[0] logger.debug("new vqd value: %s", value) cache_vqd(query, value) return value @@ -240,7 +236,7 @@ def request(query, params): params['data']['dc'] = offset + 1 # request needs a vqd argument - params['data']['vqd'] = get_vqd(query, params["headers"]) + params['data']['vqd'] = get_vqd(query) # initial page does not have additional data in the input form if params['pageno'] > 1: diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index d8a6f1340..7e7f133b1 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -50,7 +50,8 @@ def request(query, params): 'o': 'json', # 'u': 'bing', 'l': eng_region, - 'vqd': get_vqd(query, params["headers"]), + 'f': ',,,,,', + 'vqd': get_vqd(query), } if params['pageno'] > 1: @@ -59,7 +60,6 @@ def request(query, params): params['cookies']['ad'] = eng_lang # zh_CN params['cookies']['ah'] = eng_region # "us-en,de-de" params['cookies']['l'] = eng_region # "hk-tzh" - logger.debug("cookies: %s", params['cookies']) safe_search = safesearch_cookies.get(params['safesearch']) if safe_search is not None: @@ -68,13 +68,9 @@ def request(query, params): if safe_search is not None: args['p'] = safe_search # "-1", "1" + logger.debug("cookies: %s", params['cookies']) args = urlencode(args) - params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,') - - params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01' - params['headers']['Referer'] = 'https://duckduckgo.com/' - params['headers']['X-Requested-With'] = 'XMLHttpRequest' - logger.debug("headers: %s", params['headers']) + params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args) return params