forked from Archives/searxng
Merge pull request #1856 from dalf/checker_requires_redis
The checker requires Redisdependabot/pip/master/sphinx-6.1.3
commit
d37afb8ab9
@ -0,0 +1,36 @@
|
|||||||
|
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
--
|
||||||
|
-- This script is not a string in scheduler.py, so editors can provide syntax highlighting.
|
||||||
|
|
||||||
|
-- The Redis KEY is defined here and not in Python on purpose:
|
||||||
|
-- only this LUA script can read and update this key to avoid lock and concurrency issues.
|
||||||
|
local redis_key = 'SearXNG_checker_next_call_ts'
|
||||||
|
|
||||||
|
local now = redis.call('TIME')[1]
|
||||||
|
local start_after_from = ARGV[1]
|
||||||
|
local start_after_to = ARGV[2]
|
||||||
|
local every_from = ARGV[3]
|
||||||
|
local every_to = ARGV[4]
|
||||||
|
|
||||||
|
local next_call_ts = redis.call('GET', redis_key)
|
||||||
|
|
||||||
|
if (next_call_ts == false or next_call_ts == nil) then
|
||||||
|
-- the scheduler has never run on this Redis instance, so:
|
||||||
|
-- 1/ the scheduler does not run now
|
||||||
|
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||||
|
local initial_delay = math.random(start_after_from, start_after_to)
|
||||||
|
redis.call('SET', redis_key, now + initial_delay)
|
||||||
|
return { false, delay }
|
||||||
|
end
|
||||||
|
|
||||||
|
-- next_call_ts is defined
|
||||||
|
-- --> if now is lower than next_call_ts then we don't run the embedded checker
|
||||||
|
-- --> if now is higher then we update next_call_ts and ask to run the embedded checker now.
|
||||||
|
local call_now = next_call_ts <= now
|
||||||
|
if call_now then
|
||||||
|
-- the checker runs now, define the timestamp of the next call:
|
||||||
|
-- this is a random delay between every_from and every_to
|
||||||
|
local periodic_delay = math.random(every_from, every_to)
|
||||||
|
next_call_ts = redis.call('INCRBY', redis_key, periodic_delay)
|
||||||
|
end
|
||||||
|
return { call_now, next_call_ts - now }
|
@ -0,0 +1,57 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
# pylint: disable=missing-module-docstring
|
||||||
|
"""Lame scheduler which use Redis as a source of truth:
|
||||||
|
* the Redis key SearXNG_checker_next_call_ts contains the next time the embedded checker should run.
|
||||||
|
* to avoid lock, a unique Redis script reads and updates the Redis key SearXNG_checker_next_call_ts.
|
||||||
|
* this Redis script returns a list of two elements:
|
||||||
|
* the first one is a boolean. If True, the embedded checker must run now in this worker.
|
||||||
|
* the second element is the delay in second to wait before the next call to the Redis script.
|
||||||
|
|
||||||
|
This scheduler is not generic on purpose: if more feature are required, a dedicate scheduler must be used
|
||||||
|
(= a better scheduler should not use the web workers)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import importlib
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
from searx.shared.redisdb import client as get_redis_client
|
||||||
|
from searx.redislib import lua_script_storage
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger('searx.search.checker')
|
||||||
|
|
||||||
|
|
||||||
|
def scheduler_function(start_after_from: int, start_after_to: int, every_from: int, every_to: int, callback: Callable):
|
||||||
|
"""Run the checker periodically. The function never returns.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
* start_after_from and start_after_to: when to call "callback" for the first on the Redis instance
|
||||||
|
* every_from and every_to: after the first call, how often to call "callback"
|
||||||
|
|
||||||
|
There is no issue:
|
||||||
|
* to call this function is multiple workers
|
||||||
|
* to kill workers at any time as long there is one at least one worker
|
||||||
|
"""
|
||||||
|
scheduler_now_script = importlib.resources.read_text(__package__, "scheduler.lua")
|
||||||
|
while True:
|
||||||
|
# ask the Redis script what to do
|
||||||
|
# the script says
|
||||||
|
# * if the checker must run now.
|
||||||
|
# * how to long to way before calling the script again (it can be call earlier, but not later).
|
||||||
|
script = lua_script_storage(get_redis_client(), scheduler_now_script)
|
||||||
|
call_now, wait_time = script(args=[start_after_from, start_after_to, every_from, every_to])
|
||||||
|
|
||||||
|
# does the worker run the checker now?
|
||||||
|
if call_now:
|
||||||
|
# run the checker
|
||||||
|
try:
|
||||||
|
callback()
|
||||||
|
except Exception: # pylint: disable=broad-except
|
||||||
|
logger.exception("Error calling the embedded checker")
|
||||||
|
# only worker display the wait_time
|
||||||
|
logger.info("Next call to the checker in %s seconds", wait_time)
|
||||||
|
# wait until the next call
|
||||||
|
time.sleep(wait_time)
|
@ -1,39 +1,6 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""Initialization of a *shared* storage.
|
||||||
|
"""
|
||||||
|
|
||||||
import logging
|
from . import redisdb
|
||||||
import importlib
|
|
||||||
|
|
||||||
logger = logging.getLogger('searx.shared')
|
|
||||||
|
|
||||||
__all__ = ['SharedDict', 'schedule']
|
|
||||||
|
|
||||||
try:
|
|
||||||
uwsgi = importlib.import_module('uwsgi')
|
|
||||||
except:
|
|
||||||
# no uwsgi
|
|
||||||
from .shared_simple import SimpleSharedDict as SharedDict, schedule
|
|
||||||
|
|
||||||
logger.info('Use shared_simple implementation')
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
uwsgi.cache_update('dummy', b'dummy')
|
|
||||||
if uwsgi.cache_get('dummy') != b'dummy':
|
|
||||||
raise Exception()
|
|
||||||
except:
|
|
||||||
# uwsgi.ini configuration problem: disable all scheduling
|
|
||||||
logger.error(
|
|
||||||
'uwsgi.ini configuration error, add this line to your uwsgi.ini\n'
|
|
||||||
'cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1'
|
|
||||||
)
|
|
||||||
from .shared_simple import SimpleSharedDict as SharedDict
|
|
||||||
|
|
||||||
def schedule(delay, func, *args):
|
|
||||||
return False
|
|
||||||
|
|
||||||
else:
|
|
||||||
# uwsgi
|
|
||||||
from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule
|
|
||||||
|
|
||||||
logger.info('Use shared_uwsgi implementation')
|
|
||||||
|
|
||||||
storage = SharedDict()
|
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# pyright: strict
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
class SharedDict(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
def get_int(self, key: str) -> Optional[int]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def set_int(self, key: str, value: int):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_str(self, key: str) -> Optional[str]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def set_str(self, key: str, value: str):
|
|
||||||
pass
|
|
@ -1,40 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
|
|
||||||
import threading
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from . import shared_abstract
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleSharedDict(shared_abstract.SharedDict):
|
|
||||||
|
|
||||||
__slots__ = ('d',)
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.d = {}
|
|
||||||
|
|
||||||
def get_int(self, key: str) -> Optional[int]:
|
|
||||||
return self.d.get(key, None)
|
|
||||||
|
|
||||||
def set_int(self, key: str, value: int):
|
|
||||||
self.d[key] = value
|
|
||||||
|
|
||||||
def get_str(self, key: str) -> Optional[str]:
|
|
||||||
return self.d.get(key, None)
|
|
||||||
|
|
||||||
def set_str(self, key: str, value: str):
|
|
||||||
self.d[key] = value
|
|
||||||
|
|
||||||
|
|
||||||
def schedule(delay, func, *args):
|
|
||||||
def call_later():
|
|
||||||
t = threading.Timer(delay, wrapper)
|
|
||||||
t.daemon = True
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
def wrapper():
|
|
||||||
call_later()
|
|
||||||
func(*args)
|
|
||||||
|
|
||||||
call_later()
|
|
||||||
return True
|
|
@ -1,64 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
|
|
||||||
import time
|
|
||||||
from typing import Optional
|
|
||||||
import uwsgi # pyright: ignore # pylint: disable=E0401
|
|
||||||
from . import shared_abstract
|
|
||||||
|
|
||||||
|
|
||||||
_last_signal = 10
|
|
||||||
|
|
||||||
|
|
||||||
class UwsgiCacheSharedDict(shared_abstract.SharedDict):
|
|
||||||
def get_int(self, key: str) -> Optional[int]:
|
|
||||||
value = uwsgi.cache_get(key)
|
|
||||||
if value is None:
|
|
||||||
return value
|
|
||||||
else:
|
|
||||||
return int.from_bytes(value, 'big')
|
|
||||||
|
|
||||||
def set_int(self, key: str, value: int):
|
|
||||||
b = value.to_bytes(4, 'big')
|
|
||||||
uwsgi.cache_update(key, b)
|
|
||||||
|
|
||||||
def get_str(self, key: str) -> Optional[str]:
|
|
||||||
value = uwsgi.cache_get(key)
|
|
||||||
if value is None:
|
|
||||||
return value
|
|
||||||
else:
|
|
||||||
return value.decode('utf-8')
|
|
||||||
|
|
||||||
def set_str(self, key: str, value: str):
|
|
||||||
b = value.encode('utf-8')
|
|
||||||
uwsgi.cache_update(key, b)
|
|
||||||
|
|
||||||
|
|
||||||
def schedule(delay, func, *args):
|
|
||||||
"""
|
|
||||||
Can be implemented using a spooler.
|
|
||||||
https://uwsgi-docs.readthedocs.io/en/latest/PythonDecorators.html
|
|
||||||
|
|
||||||
To make the uwsgi configuration simple, use the alternative implementation.
|
|
||||||
"""
|
|
||||||
global _last_signal
|
|
||||||
|
|
||||||
def sighandler(signum):
|
|
||||||
now = int(time.time())
|
|
||||||
key = 'scheduler_call_time_signal_' + str(signum)
|
|
||||||
uwsgi.lock()
|
|
||||||
try:
|
|
||||||
updating = uwsgi.cache_get(key)
|
|
||||||
if updating is not None:
|
|
||||||
updating = int.from_bytes(updating, 'big')
|
|
||||||
if now - updating < delay:
|
|
||||||
return
|
|
||||||
uwsgi.cache_update(key, now.to_bytes(4, 'big'))
|
|
||||||
finally:
|
|
||||||
uwsgi.unlock()
|
|
||||||
func(*args)
|
|
||||||
|
|
||||||
signal_num = _last_signal
|
|
||||||
_last_signal += 1
|
|
||||||
uwsgi.register_signal(signal_num, 'worker', sighandler)
|
|
||||||
uwsgi.add_timer(signal_num, delay)
|
|
||||||
return True
|
|
Loading…
Reference in New Issue