Merge pull request #2132 from dalf/update_pr_1967

search.suspended_time settings: bug fixes
This commit is contained in:
Alexandre Flament 2023-01-29 20:48:43 +01:00 committed by GitHub
commit 9d102fb08f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 63 additions and 30 deletions

View File

@ -110,6 +110,13 @@ Global Settings
default_lang: ""
ban_time_on_fail: 5
max_ban_time_on_fail: 120
suspended_times:
SearxEngineAccessDenied: 86400
SearxEngineCaptcha: 86400
SearxEngineTooManyRequests: 3600
cf_SearxEngineCaptcha: 1296000
cf_SearxEngineAccessDenied: 86400
recaptcha_SearxEngineCaptcha: 604800
formats:
- html
@ -159,6 +166,25 @@ Global Settings
``max_ban_time_on_fail``:
Max ban time in seconds after engine errors.
``suspended_times``:
Engine suspension time after error (in seconds; set to 0 to disable)
``SearxEngineAccessDenied``: 86400
For error "Access denied" and "HTTP error [402, 403]"
``SearxEngineCaptcha``: 86400
For error "CAPTCHA"
``SearxEngineTooManyRequests``: 3600
For error "Too many request" and "HTTP error 429"
Cloudflare CAPTCHA:
- ``cf_SearxEngineCaptcha``: 1296000
- ``cf_SearxEngineAccessDenied``: 86400
Google CAPTCHA:
- ``recaptcha_SearxEngineCaptcha``: 604800
``formats``:
Result formats available from web, remove format to deny access (use lower
case).
@ -168,6 +194,7 @@ Global Settings
- ``json``
- ``rss``
.. _settings server:
``server:``

View File

@ -0,0 +1,8 @@
.. _searx.exceptions:
==================
SearXNG Exceptions
==================
.. automodule:: searx.exceptions
:members:

View File

@ -62,8 +62,7 @@ sc_code = ''
def raise_captcha(resp):
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
# suspend CAPTCHA for 7 days
raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
raise SearxEngineCaptchaException()
def get_sc_code(headers):

View File

@ -1,29 +1,19 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2017- by Alexandre Flament, <alex@al-f.net>
'''
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Exception types raised by SearXNG modules.
"""
from typing import Optional, Union
class SearxException(Exception):
pass
"""Base SearXNG exception."""
class SearxParameterException(SearxException):
"""Raised when query miss a required paramater"""
def __init__(self, name, value):
if value == '' or value is None:
message = 'Empty ' + name + ' parameter'
@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""The website is blocking the access"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
def __init__(self, suspended_time=None, message='Access denied'):
def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
"""Generic exception to raise when an engine denies access to the results.
:param suspended_time: How long the engine is going to be suspended in
second. Defaults to None.
:type suspended_time: int, None
:param message: Internal message. Defaults to ``Access denied``
:type message: str
"""
suspended_time = suspended_time or self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time))
self.suspended_time = suspended_time
self.message = message
def _get_default_suspended_time(self):
from searx import get_setting
from searx import get_setting # pylint: disable=C0415
return get_setting(self.SUSPEND_TIME_SETTING)
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
"""The website has returned a CAPTCHA
By default, searx stops sending requests to this engine for 1 day.
"""
"""The website has returned a CAPTCHA."""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
def __init__(self, suspended_time=None, message='CAPTCHA'):
super().__init__(message=message, suspended_time=suspended_time)
@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
"""This settings contains the default suspended time (default 3660 sec / 1
hour)."""
def __init__(self, suspended_time=None, message='Too many request'):
super().__init__(message=message, suspended_time=suspended_time)

View File

@ -72,9 +72,7 @@ def raise_for_httperror(resp):
if resp.status_code and resp.status_code >= 400:
raise_for_captcha(resp)
if resp.status_code in (402, 403):
raise SearxEngineAccessDeniedException(
message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
)
raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
if resp.status_code == 429:
raise SearxEngineTooManyRequestsException()
resp.raise_for_status()

View File

@ -45,7 +45,7 @@ search:
ban_time_on_fail: 5
# max ban time in seconds after engine errors
max_ban_time_on_fail: 120
suspend_times:
suspended_times:
# Engine suspension time after error (in seconds; set to 0 to disable)
# For error "Access denied" and "HTTP error [402, 403]"
SearxEngineAccessDenied: 86400