searxng/searx/engines/ask.py

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Ask.com"""

from urllib.parse import urlencode
import re
from lxml import html

# Metadata
about = {
    "website": "https://www.ask.com/",
    "wikidata_id": 'Q847564',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "HTML",
}

# Engine Configuration
categories = ['general']
paging = True

# Base URL
base_url = "https://www.ask.com/web"


def request(query, params):

    query_params = {
        "q": query,
        "page": params["pageno"],
    }

    params["url"] = f"{base_url}?{urlencode(query_params)}"
    return params


def response(resp):

    text = html.fromstring(resp.text).text_content()
    urls_match = re.findall(r'"url":"(.*?)"', text)
    titles_match = re.findall(r'"title":"(.*?)"', text)[3:]
    content_match = re.findall(r'"abstract":"(.*?)"', text)

    results = [
        {
            "url": url,
            "title": title,
            "content": content,
        }
        for url, title, content in zip(urls_match, titles_match, content_match)
        if "&qo=relatedSearchNarrow" not in url
        # Related searches shouldn't be in the search results: www.ask.com/web&q=related
    ]

    return results
[feat] engine: implementation of ask.com 2024-02-07 22:18:13 +00:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
			`# lint: pylint`
			`"""Ask.com"""`

			`from urllib.parse import urlencode`
			`import re`
			`from lxml import html`

			`# Metadata`
			`about = {`
			`"website": "https://www.ask.com/",`
			`"wikidata_id": 'Q847564',`
			`"official_api_documentation": None,`
			`"use_official_api": False,`
			`"require_api_key": False,`
			`"results": "HTML",`
			`}`

			`# Engine Configuration`
			`categories = ['general']`
			`paging = True`

			`# Base URL`
			`base_url = "https://www.ask.com/web"`


			`def request(query, params):`

			`query_params = {`
			`"q": query,`
			`"page": params["pageno"],`
			`}`

			`params["url"] = f"{base_url}?{urlencode(query_params)}"`
			`return params`


			`def response(resp):`

			`text = html.fromstring(resp.text).text_content()`
			`urls_match = re.findall(r'"url":"(.*?)"', text)`
			`titles_match = re.findall(r'"title":"(.*?)"', text)[3:]`
			`content_match = re.findall(r'"abstract":"(.*?)"', text)`

			`results = [`
			`{`
			`"url": url,`
			`"title": title,`
			`"content": content,`
			`}`
			`for url, title, content in zip(urls_match, titles_match, content_match)`
			`if "&qo=relatedSearchNarrow" not in url`
			`# Related searches shouldn't be in the search results: www.ask.com/web&q=related`
			`]`

			`return results`