From 9ee99423fe22550ef566245ef23e2a9e8ee76c27 Mon Sep 17 00:00:00 2001
From: ahmad-alkadri <ahmad.alkadri@outlook.com>
Date: Tue, 3 Jan 2023 22:59:01 +0100
Subject: [PATCH] [fix] Bing-Web engine: XPath to get the wikipedia result

Modify the XPath selector to get the wikipedia result plus small fixes.

About result content: especially with the Wikipedia result, we'd get several
paragraph elements, only the first paragraph would be taken and displayed on the
search result
---
 searx/engines/bing.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 8d024fed..5c4681cd 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -4,6 +4,7 @@
 
 - https://github.com/searx/searx/issues/2019#issuecomment-648227442
 """
+# pylint: disable=too-many-branches
 
 import re
 from urllib.parse import urlencode, urlparse, parse_qs
@@ -74,7 +75,6 @@ def request(query, params):
 
 
 def response(resp):
-
     results = []
     result_len = 0
 
@@ -84,12 +84,20 @@ def response(resp):
 
     url_to_resolve = []
     url_to_resolve_index = []
-    for i, result in enumerate(eval_xpath_list(dom, '//li[@class="b_algo"]')):
+    for i, result in enumerate(eval_xpath_list(dom, '//li[contains(@class, "b_algo")]')):
 
         link = eval_xpath(result, './/h2/a')[0]
         url = link.attrib.get('href')
         title = extract_text(link)
-        content = extract_text(eval_xpath(result, './/p'))
+
+        # Make sure that the element is free of <a href> links and <span class='algoSlug_icon'>
+        content = eval_xpath(result, '(.//p)[1]')
+        for p in content:
+            for e in p.xpath('.//a'):
+                e.getparent().remove(e)
+            for e in p.xpath('.//span[@class="algoSlug_icon"]'):
+                e.getparent().remove(e)
+        content = extract_text(content)
 
         # get the real URL either using the URL shown to user or following the Bing URL
         if url.startswith('https://www.bing.com/ck/a?'):