diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 8d024fed..5c4681cd 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -4,6 +4,7 @@ - https://github.com/searx/searx/issues/2019#issuecomment-648227442 """ +# pylint: disable=too-many-branches import re from urllib.parse import urlencode, urlparse, parse_qs @@ -74,7 +75,6 @@ def request(query, params): def response(resp): - results = [] result_len = 0 @@ -84,12 +84,20 @@ def response(resp): url_to_resolve = [] url_to_resolve_index = [] - for i, result in enumerate(eval_xpath_list(dom, '//li[@class="b_algo"]')): + for i, result in enumerate(eval_xpath_list(dom, '//li[contains(@class, "b_algo")]')): link = eval_xpath(result, './/h2/a')[0] url = link.attrib.get('href') title = extract_text(link) - content = extract_text(eval_xpath(result, './/p')) + + # Make sure that the element is free of links and + content = eval_xpath(result, '(.//p)[1]') + for p in content: + for e in p.xpath('.//a'): + e.getparent().remove(e) + for e in p.xpath('.//span[@class="algoSlug_icon"]'): + e.getparent().remove(e) + content = extract_text(content) # get the real URL either using the URL shown to user or following the Bing URL if url.startswith('https://www.bing.com/ck/a?'):