diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index e1f676dd..528f8d21 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,28 +1,20 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google images engine. +"""This is the implementation of the google images engine using the google +internal API used the Google Go Android app. -.. admonition:: Content-Security-Policy (CSP) +This internal API offer results in - This engine needs to allow images from the `data URLs`_ (prefixed with the - ``data:`` scheme):: +- JSON (_fmt:json) +- Protobuf (_fmt:pb) +- Protobuf compressed? (_fmt:pc) +- HTML (_fmt:html) +- Protobuf encoded in JSON (_fmt:jspb). - Header set Content-Security-Policy "img-src 'self' data: ;" - -.. _data URLs: - https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs """ -import re -from urllib.parse import urlencode, unquote -from lxml import html - -from searx.utils import ( - eval_xpath, - eval_xpath_list, - eval_xpath_getindex, - extract_text, -) +from urllib.parse import urlencode +from json import loads from searx.engines.google import ( get_lang_info, @@ -42,12 +34,12 @@ about = { "official_api_documentation": 'https://developers.google.com/custom-search', "use_official_api": False, "require_api_key": False, - "results": 'HTML', + "results": 'JSON', } # engine dependent config categories = ['images', 'web'] -paging = False +paging = True use_locale_domain = True time_range_support = True safesearch = True @@ -56,74 +48,8 @@ send_accept_language_header = True filter_mapping = {0: 'images', 1: 'active', 2: 'active'} -def scrap_out_thumbs(dom): - """Scrap out thumbnail data from - # - # - # The second script contains the URLs of the images. - - # The AF_initDataCallback(..) is called with very large dictionary, that - # looks like JSON but it is not JSON since it contains JS variables and - # constants like 'null' (we can't use a JSON parser for). - # - # The alternative is to parse the entire