forked from Archives/searxng
The checker requires Redis
Remove the abstraction in searx.shared.SharedDict. Implement a basic and dedicated scheduler for the checker using a Redis script.dependabot/pip/master/sphinx-6.1.3
parent
d764d94a70
commit
fe419e355b
@ -0,0 +1,36 @@
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
-- This script is not a string in scheduler.py, so editors can provide syntax highlighting.
|
||||
|
||||
-- The Redis KEY is defined here and not in Python on purpose:
|
||||
-- only this LUA script can read and update this key to avoid lock and concurrency issues.
|
||||
local redis_key = 'SearXNG_checker_next_call_ts'
|
||||
|
||||
local now = redis.call('TIME')[1]
|
||||
local start_after_from = ARGV[1]
|
||||
local start_after_to = ARGV[2]
|
||||
local every_from = ARGV[3]
|
||||
local every_to = ARGV[4]
|
||||
|
||||
local next_call_ts = redis.call('GET', redis_key)
|
||||
|
||||
if (next_call_ts == false or next_call_ts == nil) then
|
||||
-- the scheduler has never run on this Redis instance, so:
|
||||
-- 1/ the scheduler does not run now
|
||||
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||
local delay = start_after_from + math.random(start_after_to - start_after_from)
|
||||
redis.call('SET', redis_key, now + delay)
|
||||
return { false, delay }
|
||||
end
|
||||
|
||||
-- next_call_ts is defined
|
||||
-- --> if now is lower than next_call_ts then we don't run the embedded checker
|
||||
-- --> if now is higher then we update next_call_ts and ask to run the embedded checker now.
|
||||
local call_now = next_call_ts <= now
|
||||
if call_now then
|
||||
-- the checker runs now, define the timestamp of the next call:
|
||||
-- this is a random delay between every_from and every_to
|
||||
local periodic_delay = every_from + math.random(every_to - every_from)
|
||||
next_call_ts = redis.call('INCRBY', redis_key, periodic_delay)
|
||||
end
|
||||
return { call_now, next_call_ts - now }
|
@ -0,0 +1,57 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
"""Lame scheduler which use Redis as a source of truth:
|
||||
* the Redis key SearXNG_checker_next_call_ts contains the next time the embedded checker should run.
|
||||
* to avoid lock, a unique Redis script reads and updates the Redis key SearXNG_checker_next_call_ts.
|
||||
* this Redis script returns a list of two elements:
|
||||
* the first one is a boolean. If True, the embedded checker must run now in this worker.
|
||||
* the second element is the delay in second to wait before the next call to the Redis script.
|
||||
|
||||
This scheduler is not generic on purpose: if more feature are required, a dedicate scheduler must be used
|
||||
(= a better scheduler should not use the web workers)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import importlib
|
||||
from typing import Callable
|
||||
|
||||
from searx.shared.redisdb import client as get_redis_client
|
||||
from searx.redislib import lua_script_storage
|
||||
|
||||
|
||||
logger = logging.getLogger('searx.search.checker')
|
||||
|
||||
|
||||
def scheduler_function(start_after_from: int, start_after_to: int, every_from: int, every_to: int, callback: Callable):
|
||||
"""Run the checker periodically. The function never returns.
|
||||
|
||||
Parameters:
|
||||
* start_after_from and start_after_to: when to call "callback" for the first on the Redis instance
|
||||
* every_from and every_to: after the first call, how often to call "callback"
|
||||
|
||||
There is no issue:
|
||||
* to call this function is multiple workers
|
||||
* to kill workers at any time as long there is one at least one worker
|
||||
"""
|
||||
scheduler_now_script = importlib.resources.read_text(__package__, "scheduler.lua")
|
||||
while True:
|
||||
# ask the Redis script what to do
|
||||
# the script says
|
||||
# * if the checker must run now.
|
||||
# * how to long to way before calling the script again (it can be call earlier, but not later).
|
||||
script = lua_script_storage(get_redis_client(), scheduler_now_script)
|
||||
call_now, wait_time = script(args=[start_after_from, start_after_to, every_from, every_to])
|
||||
|
||||
# does the worker run the checker now?
|
||||
if call_now:
|
||||
# run the checker
|
||||
try:
|
||||
callback()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
logger.exception("Error calling the embedded checker")
|
||||
# only worker display the wait_time
|
||||
logger.info("Next call to the checker in %s seconds", wait_time)
|
||||
# wait until the next call
|
||||
time.sleep(wait_time)
|
@ -1,39 +1,6 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Initialization of a *shared* storage.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import importlib
|
||||
|
||||
logger = logging.getLogger('searx.shared')
|
||||
|
||||
__all__ = ['SharedDict', 'schedule']
|
||||
|
||||
try:
|
||||
uwsgi = importlib.import_module('uwsgi')
|
||||
except:
|
||||
# no uwsgi
|
||||
from .shared_simple import SimpleSharedDict as SharedDict, schedule
|
||||
|
||||
logger.info('Use shared_simple implementation')
|
||||
else:
|
||||
try:
|
||||
uwsgi.cache_update('dummy', b'dummy')
|
||||
if uwsgi.cache_get('dummy') != b'dummy':
|
||||
raise Exception()
|
||||
except:
|
||||
# uwsgi.ini configuration problem: disable all scheduling
|
||||
logger.error(
|
||||
'uwsgi.ini configuration error, add this line to your uwsgi.ini\n'
|
||||
'cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1'
|
||||
)
|
||||
from .shared_simple import SimpleSharedDict as SharedDict
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
return False
|
||||
|
||||
else:
|
||||
# uwsgi
|
||||
from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule
|
||||
|
||||
logger.info('Use shared_uwsgi implementation')
|
||||
|
||||
storage = SharedDict()
|
||||
from . import redisdb
|
||||
|
@ -1,22 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pyright: strict
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SharedDict(ABC):
|
||||
@abstractmethod
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_int(self, key: str, value: int):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_str(self, key: str, value: str):
|
||||
pass
|
@ -1,40 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from . import shared_abstract
|
||||
|
||||
|
||||
class SimpleSharedDict(shared_abstract.SharedDict):
|
||||
|
||||
__slots__ = ('d',)
|
||||
|
||||
def __init__(self):
|
||||
self.d = {}
|
||||
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
return self.d.get(key, None)
|
||||
|
||||
def set_int(self, key: str, value: int):
|
||||
self.d[key] = value
|
||||
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
return self.d.get(key, None)
|
||||
|
||||
def set_str(self, key: str, value: str):
|
||||
self.d[key] = value
|
||||
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
def call_later():
|
||||
t = threading.Timer(delay, wrapper)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
def wrapper():
|
||||
call_later()
|
||||
func(*args)
|
||||
|
||||
call_later()
|
||||
return True
|
@ -1,64 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
import uwsgi # pyright: ignore # pylint: disable=E0401
|
||||
from . import shared_abstract
|
||||
|
||||
|
||||
_last_signal = 10
|
||||
|
||||
|
||||
class UwsgiCacheSharedDict(shared_abstract.SharedDict):
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
value = uwsgi.cache_get(key)
|
||||
if value is None:
|
||||
return value
|
||||
else:
|
||||
return int.from_bytes(value, 'big')
|
||||
|
||||
def set_int(self, key: str, value: int):
|
||||
b = value.to_bytes(4, 'big')
|
||||
uwsgi.cache_update(key, b)
|
||||
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
value = uwsgi.cache_get(key)
|
||||
if value is None:
|
||||
return value
|
||||
else:
|
||||
return value.decode('utf-8')
|
||||
|
||||
def set_str(self, key: str, value: str):
|
||||
b = value.encode('utf-8')
|
||||
uwsgi.cache_update(key, b)
|
||||
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
"""
|
||||
Can be implemented using a spooler.
|
||||
https://uwsgi-docs.readthedocs.io/en/latest/PythonDecorators.html
|
||||
|
||||
To make the uwsgi configuration simple, use the alternative implementation.
|
||||
"""
|
||||
global _last_signal
|
||||
|
||||
def sighandler(signum):
|
||||
now = int(time.time())
|
||||
key = 'scheduler_call_time_signal_' + str(signum)
|
||||
uwsgi.lock()
|
||||
try:
|
||||
updating = uwsgi.cache_get(key)
|
||||
if updating is not None:
|
||||
updating = int.from_bytes(updating, 'big')
|
||||
if now - updating < delay:
|
||||
return
|
||||
uwsgi.cache_update(key, now.to_bytes(4, 'big'))
|
||||
finally:
|
||||
uwsgi.unlock()
|
||||
func(*args)
|
||||
|
||||
signal_num = _last_signal
|
||||
_last_signal += 1
|
||||
uwsgi.register_signal(signal_num, 'worker', sighandler)
|
||||
uwsgi.add_timer(signal_num, delay)
|
||||
return True
|
Loading…
Reference in New Issue