From eeb0998787733c86dd5435f266e36d967b41708e Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 10:08:41 +0200 Subject: [PATCH 1/8] [mod] refactor: move Flask proxy fix to searx.flaskfix module --- searx/flaskfix.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++ searx/webapp.py | 71 ++----------------------------------------- 2 files changed, 79 insertions(+), 69 deletions(-) create mode 100644 searx/flaskfix.py diff --git a/searx/flaskfix.py b/searx/flaskfix.py new file mode 100644 index 00000000..c069df45 --- /dev/null +++ b/searx/flaskfix.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-module-docstring,missing-function-docstring + +from urllib.parse import urlparse + +from werkzeug.middleware.proxy_fix import ProxyFix +from werkzeug.serving import WSGIRequestHandler + +from searx import settings + + +class ReverseProxyPathFix: + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + http://flask.pocoo.org/snippets/35/ + + In nginx: + location /myprefix { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + + :param wsgi_app: the WSGI application + ''' + # pylint: disable=too-few-public-methods + + def __init__(self, wsgi_app): + + self.wsgi_app = wsgi_app + self.script_name = None + self.scheme = None + self.server = None + + if settings['server']['base_url']: + + # If base_url is specified, then these values from are given + # preference over any Flask's generics. + + base_url = urlparse(settings['server']['base_url']) + self.script_name = base_url.path + if self.script_name.endswith('/'): + # remove trailing slash to avoid infinite redirect on the index + # see https://github.com/searx/searx/issues/2729 + self.script_name = self.script_name[:-1] + self.scheme = base_url.scheme + self.server = base_url.netloc + + def __call__(self, environ, start_response): + script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] + + scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + + server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '') + if server: + environ['HTTP_HOST'] = server + return self.wsgi_app(environ, start_response) + + +def patch_application(app): + # serve pages with HTTP/1.1 + WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) + # patch app to handle non root url-s behind proxy & wsgi + app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app)) diff --git a/searx/webapp.py b/searx/webapp.py index 9b97b3ef..db15c95c 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -28,9 +28,6 @@ from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module -from werkzeug.middleware.proxy_fix import ProxyFix -from werkzeug.serving import WSGIRequestHandler - import flask from flask import ( @@ -106,6 +103,7 @@ from searx.metrics import ( histogram, counter, ) +from searx.flaskfix import patch_application # renaming names from searx imports ... @@ -137,9 +135,6 @@ if sys.version_info[0] < 3: logger = logger.getChild('webapp') -# serve pages with HTTP/1.1 -WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) - # check secret_key if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey': logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.') @@ -1338,70 +1333,8 @@ def run(): ], ) - -class ReverseProxyPathFix: - '''Wrap the application in this middleware and configure the - front-end server to add these headers, to let you quietly bind - this to a URL other than / and to an HTTP scheme that is - different than what is used locally. - - http://flask.pocoo.org/snippets/35/ - - In nginx: - location /myprefix { - proxy_pass http://127.0.0.1:8000; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Scheme $scheme; - proxy_set_header X-Script-Name /myprefix; - } - - :param wsgi_app: the WSGI application - ''' - # pylint: disable=too-few-public-methods - - def __init__(self, wsgi_app): - - self.wsgi_app = wsgi_app - self.script_name = None - self.scheme = None - self.server = None - - if settings['server']['base_url']: - - # If base_url is specified, then these values from are given - # preference over any Flask's generics. - - base_url = urlparse(settings['server']['base_url']) - self.script_name = base_url.path - if self.script_name.endswith('/'): - # remove trailing slash to avoid infinite redirect on the index - # see https://github.com/searx/searx/issues/2729 - self.script_name = self.script_name[:-1] - self.scheme = base_url.scheme - self.server = base_url.netloc - - def __call__(self, environ, start_response): - script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '') - if script_name: - environ['SCRIPT_NAME'] = script_name - path_info = environ['PATH_INFO'] - if path_info.startswith(script_name): - environ['PATH_INFO'] = path_info[len(script_name):] - - scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') - if scheme: - environ['wsgi.url_scheme'] = scheme - - server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '') - if server: - environ['HTTP_HOST'] = server - return self.wsgi_app(environ, start_response) - - application = app -# patch app to handle non root url-s behind proxy & wsgi -app.wsgi_app = ReverseProxyPathFix(ProxyFix(application.wsgi_app)) +patch_application(app) if __name__ == "__main__": run() From aedf0aa5feda5c7a944e7d057de7e948eaf84df5 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 10:14:00 +0200 Subject: [PATCH 2/8] [mod] remove searx.webapp.get_base_url function see the result of: git grep "base_url" searx/templates "base_url" is not used any more. --- searx/webapp.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index db15c95c..9c4c2a02 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -359,10 +359,6 @@ def extract_domain(url): return urlparse(url)[1] -def get_base_url(): - return url_for('index', _external=True) - - def get_current_theme_name(override=None): """Returns theme name. @@ -632,7 +628,6 @@ def index_error(output_format, error_message): results=[], q=request.form['q'] if 'q' in request.form else '', number_of_results=0, - base_url=get_base_url(), error_message=error_message, override_theme='__common__', ) @@ -810,7 +805,6 @@ def search(): suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, - base_url=get_base_url(), override_theme='__common__', ) return Response(response_rss, mimetype='text/xml') @@ -848,7 +842,6 @@ def search(): current_language=match_language(search_query.lang, LANGUAGE_CODES, fallback=request.preferences.get_value("language")), - base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())], timeout_limit=request.form.get('timeout_limit', None) @@ -1075,7 +1068,6 @@ def preferences(): allowed_plugins=allowed_plugins, theme=get_current_theme_name(), preferences_url_params=request.preferences.get_as_url_params(), - base_url=get_base_url(), locked_preferences=settings['preferences']['lock'], preferences=True) From 15567955b4bd03dc9e848b263495e88720654283 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 10:18:44 +0200 Subject: [PATCH 3/8] [mod] remove check of Python 2. --- searx/webapp.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 9c4c2a02..b6f0c28c 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -129,10 +129,6 @@ else: setproctitle.setthreadtitle(self._name) threading.Thread.__init__ = new_thread_init -if sys.version_info[0] < 3: - print('\033[1;31m Python2 is no longer supported\033[0m') - sys.exit(1) - logger = logger.getChild('webapp') # check secret_key From 91856e8f6a55ba88d928a2ddc2facfd66c1208d2 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 10:34:32 +0200 Subject: [PATCH 4/8] [mod] refactor searx.webapp.render: remove unused parameters --- searx/webapp.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index b6f0c28c..44c68d66 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -451,8 +451,7 @@ def render(template_name, override_theme=None, **kwargs): _get_ordered_categories() if x in enabled_categories] - if 'autocomplete' not in kwargs: - kwargs['autocomplete'] = request.preferences.get_value('autocomplete') + kwargs['autocomplete'] = request.preferences.get_value('autocomplete') locale = request.preferences.get_value('locale') @@ -485,8 +484,6 @@ def render(template_name, override_theme=None, **kwargs): kwargs['theme'] = get_current_theme_name(override=override_theme) - kwargs['template_name'] = template_name - kwargs['cookies'] = request.cookies kwargs['errors'] = request.errors From cfa07b0a9a49d9efc072ba5c322798b5aaf9c673 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 11:17:55 +0200 Subject: [PATCH 5/8] [mod] refactor searx.webapp.render no functional change, reorder declarations --- searx/webapp.py | 73 ++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 44c68d66..d35ccf4c 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -440,74 +440,65 @@ def get_translations(): def render(template_name, override_theme=None, **kwargs): - disabled_engines = request.preferences.engines.get_disabled() + # values from the HTTP requests + kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint + kwargs['cookies'] = request.cookies + kwargs['errors'] = request.errors - enabled_categories = set(category for engine_name in engines - for category in engines[engine_name].categories - if (engine_name, category) not in disabled_engines) + # values from the preferences + kwargs['preferences'] = request.preferences + kwargs['method'] = request.preferences.get_value('method') + kwargs['autocomplete'] = request.preferences.get_value('autocomplete') + kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') + kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) + kwargs['theme'] = get_current_theme_name(override=override_theme) if 'categories' not in kwargs: + disabled_engines = request.preferences.engines.get_disabled() + enabled_categories = set(category for engine_name in engines + for category in engines[engine_name].categories + if (engine_name, category) not in disabled_engines) kwargs['categories'] = [x for x in _get_ordered_categories() if x in enabled_categories] - kwargs['autocomplete'] = request.preferences.get_value('autocomplete') + # i18n + kwargs['language_codes'] = languages # from searx.languages + kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) locale = request.preferences.get_value('locale') - if locale in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True - - kwargs['searx_version'] = VERSION_STRING - - kwargs['method'] = request.preferences.get_value('method') - - kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) - - kwargs['language_codes'] = languages if 'current_language' not in kwargs: kwargs['current_language'] = match_language(request.preferences.get_value('language'), LANGUAGE_CODES) - # override url_for function in templates - kwargs['url_for'] = url_for_theme - - kwargs['image_proxify'] = image_proxify - - kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None - kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) - - kwargs['opensearch_url'] = url_for('opensearch') + '?' \ - + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) - - kwargs['get_result_template'] = get_result_template - - kwargs['theme'] = get_current_theme_name(override=override_theme) - - kwargs['cookies'] = request.cookies - - kwargs['errors'] = request.errors - - kwargs['instance_name'] = settings['general']['instance_name'] - - kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') - - kwargs['preferences'] = request.preferences - + # values from settings kwargs['search_formats'] = [ x for x in settings['search']['formats'] if x != 'html'] + # brand + kwargs['instance_name'] = settings['general']['instance_name'] + kwargs['searx_version'] = VERSION_STRING kwargs['brand'] = brand - kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) + # helpers to create links to other pages + kwargs['url_for'] = url_for_theme # override url_for function in templates + kwargs['image_proxify'] = image_proxify + kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None + kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) + kwargs['get_result_template'] = get_result_template + kwargs['opensearch_url'] = url_for('opensearch') + '?' \ + + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) + # scripts from plugins kwargs['scripts'] = set() - kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) + # styles from plugins kwargs['styles'] = set() for plugin in request.user_plugins: for css in plugin.css_dependencies: From 0e23113f706aa3b123f93054e55bdf52e928a5b0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 11:22:46 +0200 Subject: [PATCH 6/8] [mod] remove unused filter searx.webapp.extract_domain it was used in the pix-art theme --- searx/webapp.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index d35ccf4c..9c237420 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,10 +17,7 @@ from html import escape from io import StringIO import urllib -from urllib.parse import ( - urlencode, - urlparse, -) +from urllib.parse import urlencode import httpx @@ -349,12 +346,6 @@ def code_highlighter(codelines, language=None): return html_code -# Extract domain from url -@app.template_filter('extract_domain') -def extract_domain(url): - return urlparse(url)[1] - - def get_current_theme_name(override=None): """Returns theme name. From 70bbd14b1ae122f2d1db7b8907d47394d549a149 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 11:34:36 +0200 Subject: [PATCH 7/8] [mod] move hook to set Unix thread name into searx.unixthreadname requires setproctitle (but optional) --- searx/__init__.py | 1 + searx/unixthreadname.py | 20 ++++++++++++++++++++ searx/webapp.py | 15 --------------- 3 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 searx/unixthreadname.py diff --git a/searx/__init__.py b/searx/__init__.py index a65266de..8452dd7b 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -5,6 +5,7 @@ from os.path import dirname, abspath import logging +import searx.unixthreadname import searx.settings_loader from searx.settings_defaults import settings_set_defaults diff --git a/searx/unixthreadname.py b/searx/unixthreadname.py new file mode 100644 index 00000000..9a6f5381 --- /dev/null +++ b/searx/unixthreadname.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +""" +if setproctitle is installed. +set Unix thread name with the Python thread name +""" + +try: + import setproctitle +except ImportError: + pass +else: + import threading + old_thread_init = threading.Thread.__init__ + + def new_thread_init(self, *args, **kwargs): + # pylint: disable=protected-access, disable=c-extension-no-member, disable=missing-function-docstring + old_thread_init(self, *args, **kwargs) + setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init diff --git a/searx/webapp.py b/searx/webapp.py index 9c237420..9ce23d3c 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -111,21 +111,6 @@ from searx.network import stream as http_stream from searx.search.checker import get_result as checker_get_result from searx.settings_loader import get_default_settings_path -# set Unix thread name -try: - import setproctitle -except ImportError: - pass -else: - import threading - old_thread_init = threading.Thread.__init__ - - def new_thread_init(self, *args, **kwargs): - # pylint: disable=protected-access, disable=c-extension-no-member - old_thread_init(self, *args, **kwargs) - setproctitle.setthreadtitle(self._name) - threading.Thread.__init__ = new_thread_init - logger = logger.getChild('webapp') # check secret_key From 2a109d0a5fb55a5cc7f9d2efdda5c7389e5f172a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 8 Jun 2021 11:56:30 +0200 Subject: [PATCH 8/8] [mod] refactor searx.webapp.render render automatically adds these variables to the template context: * advanced_search * all_categories * categories before render was checking if the variable was already set but it is actually never set by the callers --- searx/webapp.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 9ce23d3c..d33fd0f5 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -415,6 +415,22 @@ def get_translations(): } +def _get_ordered_categories(): + ordered_categories = list(settings['ui']['categories_order']) + ordered_categories.extend(x for x in sorted(categories.keys()) if x not in ordered_categories) + return ordered_categories + + +def _get_enable_categories(all_categories): + disabled_engines = request.preferences.engines.get_disabled() + enabled_categories = set(category for engine_name in engines + for category in engines[engine_name].categories + if (engine_name, category) not in disabled_engines) + return [x for x in + all_categories + if x in enabled_categories] + + def render(template_name, override_theme=None, **kwargs): # values from the HTTP requests kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint @@ -426,17 +442,11 @@ def render(template_name, override_theme=None, **kwargs): kwargs['method'] = request.preferences.get_value('method') kwargs['autocomplete'] = request.preferences.get_value('autocomplete') kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') + kwargs['advanced_search'] = request.preferences.get_value('advanced_search') kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['theme'] = get_current_theme_name(override=override_theme) - - if 'categories' not in kwargs: - disabled_engines = request.preferences.engines.get_disabled() - enabled_categories = set(category for engine_name in engines - for category in engines[engine_name].categories - if (engine_name, category) not in disabled_engines) - kwargs['categories'] = [x for x in - _get_ordered_categories() - if x in enabled_categories] + kwargs['all_categories'] = _get_ordered_categories() + kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) # i18n kwargs['language_codes'] = languages # from searx.languages @@ -488,12 +498,6 @@ def render(template_name, override_theme=None, **kwargs): return result -def _get_ordered_categories(): - ordered_categories = list(settings['ui']['categories_order']) - ordered_categories.extend(x for x in sorted(categories.keys()) if x not in ordered_categories) - return ordered_categories - - @app.before_request def pre_request(): request.start_time = default_timer() # pylint: disable=assigning-non-slot @@ -605,9 +609,6 @@ def index_error(output_format, error_message): def index(): """Render index page.""" - # UI - advanced_search = request.preferences.get_value('advanced_search') - # redirect to search if there's a query in the request if request.form.get('q'): query = ('?' + request.query_string.decode()) if request.query_string else '' @@ -616,7 +617,6 @@ def index(): return render( 'index.html', selected_categories=get_selected_categories(request.preferences, request.form), - advanced_search=advanced_search, ) @@ -642,7 +642,6 @@ def search(): if output_format == 'html': return render( 'index.html', - advanced_search=request.preferences.get_value('advanced_search'), selected_categories=get_selected_categories(request.preferences, request.form), ) return index_error(output_format, 'No query'), 400 @@ -1008,7 +1007,6 @@ def preferences(): # return render('preferences.html', selected_categories=get_selected_categories(request.preferences, request.form), - all_categories=_get_ordered_categories(), locales=settings['locales'], current_locale=request.preferences.get_value("locale"), image_proxy=image_proxy,