From 4103908a8da9fa315ffa1762f94ff39e4ad772e8 Mon Sep 17 00:00:00 2001 From: Jeff Becker Date: Thu, 27 Oct 2022 13:51:45 -0400 Subject: [PATCH] system layer manager (llarp::sys::service_manager) the win32 and sd_notify components provided a disjointed set of similar high level functionality so we consolidate these duplicate code paths into one that has the same lifecycle regardless of platform to reduce complexity of this feature. this new component is responsible for reporting state changes to the system layer and optionally propagating state change to lokinet requested by the system layer (used by windows service). --- daemon/lokinet.cpp | 108 ++++++-------------------- include/llarp.hpp | 1 + llarp/CMakeLists.txt | 9 ++- llarp/context.cpp | 8 ++ llarp/linux/sd_service_manager.cpp | 60 +++++++++++++++ llarp/router/abstractrouter.hpp | 3 + llarp/router/router.cpp | 118 ++++++++++++++--------------- llarp/router/router.hpp | 4 + llarp/util/nop_service_manager.cpp | 7 ++ llarp/util/service_manager.hpp | 118 +++++++++++++++++++++++++++++ llarp/win32/service_manager.cpp | 85 +++++++++++++++++++++ llarp/win32/service_manager.hpp | 24 ++++++ test/check_main.cpp | 2 + 13 files changed, 400 insertions(+), 147 deletions(-) create mode 100644 llarp/linux/sd_service_manager.cpp create mode 100644 llarp/util/nop_service_manager.cpp create mode 100644 llarp/util/service_manager.hpp create mode 100644 llarp/win32/service_manager.cpp create mode 100644 llarp/win32/service_manager.hpp diff --git a/daemon/lokinet.cpp b/daemon/lokinet.cpp index 391495509..8eb799f29 100644 --- a/daemon/lokinet.cpp +++ b/daemon/lokinet.cpp @@ -7,7 +7,10 @@ #include #ifdef _WIN32 +#include #include +#else +#include #endif #include @@ -21,19 +24,18 @@ int lokinet_main(int, char**); #ifdef _WIN32 -#include extern "C" LONG FAR PASCAL win32_signal_handler(EXCEPTION_POINTERS*); extern "C" VOID FAR PASCAL win32_daemon_entry(DWORD, LPTSTR*); -BOOL ReportSvcStatus(DWORD, DWORD, DWORD); + VOID insert_description(); -SERVICE_STATUS SvcStatus; -SERVICE_STATUS_HANDLE SvcStatusHandle; -bool start_as_daemon = false; + #endif +bool run_as_daemon{false}; + static auto logcat = llarp::log::Cat("main"); std::shared_ptr ctx; std::promise exit_code; @@ -84,9 +86,6 @@ install_win32_daemon() llarp::LogError("Cannot install service ", GetLastError()); return; } - // just put the flag here. we eat it later on and specify the - // config path in the daemon entry point - StringCchCat(szPath.data(), 1024, " --win32-daemon"); // Get a handle to the SCM database. schSCManager = OpenSCManager( @@ -294,37 +293,6 @@ run_main_context(std::optional confFile, const llarp::RuntimeOptions o } #ifdef _WIN32 -void -TellWindowsServiceStopped() -{ - ::WSACleanup(); - if (not start_as_daemon) - return; - - llarp::LogInfo("Telling Windows the service has stopped."); - if (not ReportSvcStatus(SERVICE_STOPPED, NO_ERROR, 0)) - { - auto error_code = GetLastError(); - if (error_code == ERROR_INVALID_DATA) - llarp::LogError( - "SetServiceStatus failed: \"The specified service status structure is invalid.\""); - else if (error_code == ERROR_INVALID_HANDLE) - llarp::LogError("SetServiceStatus failed: \"The specified handle is invalid.\""); - else - llarp::LogError("SetServiceStatus failed with an unknown error."); - } -} - -class WindowsServiceStopped -{ - public: - WindowsServiceStopped() = default; - - ~WindowsServiceStopped() - { - TellWindowsServiceStopped(); - } -}; /// minidump generation for windows jizz /// will make a coredump when there is an unhandled exception @@ -370,9 +338,9 @@ main(int argc, char* argv[]) #else SERVICE_TABLE_ENTRY DispatchTable[] = { {strdup("lokinet"), (LPSERVICE_MAIN_FUNCTION)win32_daemon_entry}, {NULL, NULL}}; - if (lstrcmpi(argv[1], "--win32-daemon") == 0) + if (std::string{argv[1]} == "--win32-daemon") { - start_as_daemon = true; + run_as_daemon = true; StartServiceCtrlDispatcher(DispatchTable); } else @@ -383,6 +351,10 @@ main(int argc, char* argv[]) int lokinet_main(int argc, char** argv) { + // if we are not running as a service disable reporting + if (llarp::platform::is_windows and not run_as_daemon) + llarp::sys::service_manager->disable(); + if (auto result = Lokinet_INIT()) return result; @@ -398,7 +370,6 @@ lokinet_main(int argc, char** argv) opts.showBanner = false; #ifdef _WIN32 - WindowsServiceStopped stopped_raii; if (startWinsock()) return -1; SetConsoleCtrlHandler(handle_signal_win32, TRUE); @@ -545,13 +516,9 @@ lokinet_main(int argc, char** argv) SetUnhandledExceptionFilter(&GenerateDump); #endif - std::thread main_thread{[&] { run_main_context(configFile, opts); }}; + std::thread main_thread{[configFile, opts] { run_main_context(configFile, opts); }}; auto ftr = exit_code.get_future(); -#ifdef _WIN32 - ReportSvcStatus(SERVICE_RUNNING, NO_ERROR, 0); -#endif - do { // do periodic non lokinet related tasks here @@ -582,9 +549,7 @@ lokinet_main(int argc, char** argv) llarp::log::critical(deadlock_cat, wtf); llarp::log::flush(); } -#ifdef _WIN32 - TellWindowsServiceStopped(); -#endif + llarp::sys::service_manager->failed(); std::abort(); } } while (ftr.wait_for(std::chrono::seconds(1)) != std::future_status::ready); @@ -609,6 +574,7 @@ lokinet_main(int argc, char** argv) } llarp::log::flush(); + llarp::sys::service_manager->stopped(); if (ctx) { ctx.reset(); @@ -617,29 +583,6 @@ lokinet_main(int argc, char** argv) } #ifdef _WIN32 -BOOL -ReportSvcStatus(DWORD dwCurrentState, DWORD dwWin32ExitCode, DWORD dwWaitHint) -{ - static DWORD dwCheckPoint = 1; - - // Fill in the SERVICE_STATUS structure. - SvcStatus.dwCurrentState = dwCurrentState; - SvcStatus.dwWin32ExitCode = dwWin32ExitCode; - SvcStatus.dwWaitHint = dwWaitHint; - - if (dwCurrentState == SERVICE_START_PENDING) - SvcStatus.dwControlsAccepted = 0; - else - SvcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP; - - if ((dwCurrentState == SERVICE_RUNNING) || (dwCurrentState == SERVICE_STOPPED)) - SvcStatus.dwCheckPoint = 0; - else - SvcStatus.dwCheckPoint = dwCheckPoint++; - - // Report the status of the service to the SCM. - return SetServiceStatus(SvcStatusHandle, &SvcStatus); -} VOID FAR PASCAL SvcCtrlHandler(DWORD dwCtrl) @@ -651,14 +594,13 @@ SvcCtrlHandler(DWORD dwCtrl) case SERVICE_CONTROL_STOP: // tell service we are stopping llarp::log::info(logcat, "Windows service controller gave SERVICE_CONTROL_STOP"); - ReportSvcStatus(SERVICE_STOP_PENDING, NO_ERROR, 0); - // do the actual tear down - handle_signal(SIGINT); + llarp::sys::service_manager->system_changed_our_state(llarp::sys::ServiceState::Stopping); return; case SERVICE_CONTROL_INTERROGATE: // report status - SetServiceStatus(SvcStatusHandle, &SvcStatus); + llarp::log::debug(logcat, "Got win32 service interrogate signal"); + llarp::sys::service_manager->report_changed_state(); return; default: @@ -673,21 +615,15 @@ VOID FAR PASCAL win32_daemon_entry(DWORD, LPTSTR* argv) { // Register the handler function for the service - SvcStatusHandle = RegisterServiceCtrlHandler("lokinet", SvcCtrlHandler); + auto* svc = dynamic_cast(llarp::sys::service_manager); + svc->handle = RegisterServiceCtrlHandler("lokinet", SvcCtrlHandler); - if (!SvcStatusHandle) + if (svc->handle == nullptr) { llarp::LogError("failed to register daemon control handler"); return; } - // These SERVICE_STATUS members remain as set here - SvcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS; - SvcStatus.dwServiceSpecificExitCode = 0; - - // Report initial status to the SCM - ReportSvcStatus(SERVICE_START_PENDING, NO_ERROR, 3000); - // we hard code the args to lokinet_main. // we yoink argv[0] (lokinet.exe path) and pass in the new args. std::array args = { diff --git a/include/llarp.hpp b/include/llarp.hpp index 402ffa52e..cb8ca495b 100644 --- a/include/llarp.hpp +++ b/include/llarp.hpp @@ -45,6 +45,7 @@ namespace llarp std::shared_ptr nodedb = nullptr; std::string nodedb_dir; + Context(); virtual ~Context() = default; void diff --git a/llarp/CMakeLists.txt b/llarp/CMakeLists.txt index 2e3c558df..77542c105 100644 --- a/llarp/CMakeLists.txt +++ b/llarp/CMakeLists.txt @@ -49,17 +49,23 @@ target_link_libraries(lokinet-platform PUBLIC lokinet-cryptography lokinet-util target_link_libraries(lokinet-platform PRIVATE oxenmq::oxenmq) if (ANDROID) - target_sources(lokinet-platform PRIVATE android/ifaddrs.c) + target_sources(lokinet-platform PRIVATE android/ifaddrs.c util/nop_service_manager.cpp) endif() if(CMAKE_SYSTEM_NAME MATCHES "Linux") target_sources(lokinet-platform PRIVATE linux/dbus.cpp) + if(WITH_SYSTEMD) + target_sources(lokinet-platform PRIVATE linux/sd_service_manager.cpp) + else() + target_sources(lokinet-platform PRIVATE util/nop_service_manager.cpp) + endif() endif() if (WIN32) target_sources(lokinet-platform PRIVATE net/win32.cpp vpn/win32.cpp + win32/service_manager.cpp win32/exec.cpp) add_library(lokinet-win32 STATIC win32/dll.cpp @@ -312,6 +318,7 @@ endif() if(APPLE) add_subdirectory(apple) + target_sources(lokinet-platform PRIVATE util/nop_system_manager.cpp) endif() file(GLOB_RECURSE docs_SRC */*.hpp *.hpp) diff --git a/llarp/context.cpp b/llarp/context.cpp index 9e818e589..1901afc6e 100644 --- a/llarp/context.cpp +++ b/llarp/context.cpp @@ -12,6 +12,8 @@ #include "service/context.hpp" #include "util/logging.hpp" +#include + #include #include #include @@ -213,4 +215,10 @@ namespace llarp loop.reset(); } + Context::Context() + { + // service_manager is a global and context isnt + llarp::sys::service_manager->give_context(this); + } + } // namespace llarp diff --git a/llarp/linux/sd_service_manager.cpp b/llarp/linux/sd_service_manager.cpp new file mode 100644 index 000000000..1ea2e8fbe --- /dev/null +++ b/llarp/linux/sd_service_manager.cpp @@ -0,0 +1,60 @@ +#include + +#include +#include +#include +#include +#include + +namespace llarp::sys +{ + class SD_Manager : public I_SystemLayerManager + { + llarp::sys::ServiceState m_State{ServiceState::Initial}; + + public: + /// change our state and report it to the system layer + void + we_changed_our_state(ServiceState st) override + { + assert(m_State != st); + m_State = st; + report_changed_state(); + } + + void + report_changed_state() override + { + if (m_State == ServiceState::Running) + { + ::sd_notify(0, "READY=1"); + return; + } + if (m_State == ServiceState::Stopping) + { + ::sd_notify(0, "STOPPING=1"); + return; + } + } + + void + report_periodic_stats() override + { + if (m_Context and m_Context->router and not m_disable) + { + auto status = fmt::format("WATCHDOG=1\nSTATUS={}", m_Context->router->status_line()); + ::sd_notify(0, status.c_str()); + } + } + + void + system_changed_our_state(ServiceState) override + { + // not applicable on systemd + } + }; + + SD_Manager _manager{}; + I_SystemLayerManager* const service_manager = &_manager; + +} // namespace llarp::sys diff --git a/llarp/router/abstractrouter.hpp b/llarp/router/abstractrouter.hpp index 7f6a368a7..162b50616 100644 --- a/llarp/router/abstractrouter.hpp +++ b/llarp/router/abstractrouter.hpp @@ -361,6 +361,9 @@ namespace llarp virtual void GossipRCIfNeeded(const RouterContact rc) = 0; + virtual std::string + status_line() = 0; + /// Templated convenience function to generate a RouterHive event and /// delegate to non-templated (and overridable) function for handling. template diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index d0edc5cfa..86e5a6990 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -393,6 +393,8 @@ namespace llarp bool Router::Configure(std::shared_ptr c, bool isSNode, std::shared_ptr nodedb) { + llarp::sys::service_manager->starting(); + m_Config = std::move(c); auto& conf = *m_Config; @@ -870,6 +872,58 @@ namespace llarp m_LastStatsReport = now; } + std::string + Router::status_line() + { + std::string status; + auto out = std::back_inserter(status); + fmt::format_to(out, "v{}", llarp::VERSION_STR); + if (IsServiceNode()) + { + fmt::format_to( + out, + " snode | known/svc/clients: {}/{}/{}", + nodedb()->NumLoaded(), + NumberOfConnectedRouters(), + NumberOfConnectedClients()); + fmt::format_to( + out, + " | {} active paths | block {} ", + pathContext().CurrentTransitPaths(), + (m_lokidRpcClient ? m_lokidRpcClient->BlockHeight() : 0)); + auto maybe_last = _rcGossiper.LastGossipAt(); + fmt::format_to( + out, + " | gossip: (next/last) {} / {}", + short_time_from_now(_rcGossiper.NextGossipAt()), + maybe_last ? short_time_from_now(*maybe_last) : "never"); + } + else + { + fmt::format_to( + out, + " client | known/connected: {}/{}", + nodedb()->NumLoaded(), + NumberOfConnectedRouters()); + + if (auto ep = hiddenServiceContext().GetDefault()) + { + fmt::format_to( + out, + " | paths/endpoints {}/{}", + pathContext().CurrentOwnedPaths(), + ep->UniqueEndpoints()); + + if (auto success_rate = ep->CurrentBuildStats().SuccessRatio(); success_rate < 0.5) + { + fmt::format_to( + out, " [ !!! Low Build Success Rate ({:.1f}%) !!! ]", (100.0 * success_rate)); + } + }; + } + return status; + } + void Router::Tick() { @@ -884,57 +938,7 @@ namespace llarp Thaw(); } -#if defined(WITH_SYSTEMD) - { - std::string status; - auto out = std::back_inserter(status); - fmt::format_to(out, "WATCHDOG=1\nSTATUS=v{}", llarp::VERSION_STR); - if (IsServiceNode()) - { - fmt::format_to( - out, - " snode | known/svc/clients: {}/{}/{}", - nodedb()->NumLoaded(), - NumberOfConnectedRouters(), - NumberOfConnectedClients()); - fmt::format_to( - out, - " | {} active paths | block {} ", - pathContext().CurrentTransitPaths(), - (m_lokidRpcClient ? m_lokidRpcClient->BlockHeight() : 0)); - auto maybe_last = _rcGossiper.LastGossipAt(); - fmt::format_to( - out, - " | gossip: (next/last) {} / {}", - short_time_from_now(_rcGossiper.NextGossipAt()), - maybe_last ? short_time_from_now(*maybe_last) : "never"); - } - else - { - fmt::format_to( - out, - " client | known/connected: {}/{}", - nodedb()->NumLoaded(), - NumberOfConnectedRouters()); - - if (auto ep = hiddenServiceContext().GetDefault()) - { - fmt::format_to( - out, - " | paths/endpoints {}/{}", - pathContext().CurrentOwnedPaths(), - ep->UniqueEndpoints()); - - if (auto success_rate = ep->CurrentBuildStats().SuccessRatio(); success_rate < 0.5) - { - fmt::format_to( - out, " [ !!! Low Build Success Rate ({:.1f}%) !!! ]", (100.0 * success_rate)); - } - }; - } - ::sd_notify(0, status.c_str()); - } -#endif + llarp::sys::service_manager->report_periodic_stats(); m_PathBuildLimiter.Decay(now); @@ -1399,9 +1403,6 @@ namespace llarp m_RoutePoker->Start(this); _running.store(true); _startedAt = Now(); -#if defined(WITH_SYSTEMD) - ::sd_notify(0, "READY=1"); -#endif if (whitelistRouters) { // do service node testing if we are in service node whitelist mode @@ -1474,6 +1475,7 @@ namespace llarp } }); } + llarp::sys::service_manager->ready(); return _running; } @@ -1525,9 +1527,7 @@ namespace llarp if (log::get_level_default() != log::Level::off) log::reset_level(log::Level::info); LogWarn("stopping router hard"); -#if defined(WITH_SYSTEMD) - sd_notify(0, "STOPPING=1\nSTATUS=Shutting down HARD"); -#endif + llarp::sys::service_manager->stopping(); hiddenServiceContext().StopAll(); _exitContext.Stop(); StopLinks(); @@ -1546,9 +1546,7 @@ namespace llarp if (log::get_level_default() != log::Level::off) log::reset_level(log::Level::info); LogInfo("stopping router"); -#if defined(WITH_SYSTEMD) - sd_notify(0, "STOPPING=1\nSTATUS=Shutting down"); -#endif + llarp::sys::service_manager->stopping(); hiddenServiceContext().StopAll(); _exitContext.Stop(); paths.PumpUpstream(); diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index 53b24e4aa..3e86cff07 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -315,6 +316,9 @@ namespace llarp RCLookupHandler _rcLookupHandler; RCGossiper _rcGossiper; + std::string + status_line() override; + using Clock_t = std::chrono::steady_clock; using TimePoint_t = Clock_t::time_point; diff --git a/llarp/util/nop_service_manager.cpp b/llarp/util/nop_service_manager.cpp new file mode 100644 index 000000000..e07025087 --- /dev/null +++ b/llarp/util/nop_service_manager.cpp @@ -0,0 +1,7 @@ +#include "service_manager.hpp" + +namespace llarp::sys +{ + NOP_SystemLayerHandler _manager{}; + I_SystemLayerManager* const service_manager = &_manager; +} // namespace llarp::sys diff --git a/llarp/util/service_manager.hpp b/llarp/util/service_manager.hpp new file mode 100644 index 000000000..d4f66eb22 --- /dev/null +++ b/llarp/util/service_manager.hpp @@ -0,0 +1,118 @@ +#pragma once + +namespace llarp +{ + struct Context; +} + +namespace llarp::sys +{ + + // what state lokinet will report we are in to the system layer + enum class ServiceState + { + Initial, + Starting, + Running, + Stopping, + Stopped, + HardStop, + Failed, + }; + + /// interface type for interacting with the os dependant system layer + class I_SystemLayerManager + { + protected: + bool m_disable{false}; + llarp::Context* m_Context{nullptr}; + + /// change our state and report it to the system layer + virtual void + we_changed_our_state(ServiceState st) = 0; + + public: + virtual ~I_SystemLayerManager() = default; + + /// disable all reporting to system layer + inline void + disable() + { + m_disable = true; + } + + /// give our current lokinet context to the system layer manager + inline void + give_context(llarp::Context* ctx) + { + m_Context = ctx; + } + + /// system told us to enter this state + virtual void + system_changed_our_state(ServiceState st) = 0; + + /// report our current state to the system layer + virtual void + report_changed_state() = 0; + + /// report our stats on each timer tick + virtual void + report_periodic_stats(){}; + + void + starting() + { + if (m_disable) + return; + we_changed_our_state(ServiceState::Starting); + } + + void + ready() + { + if (m_disable) + return; + we_changed_our_state(ServiceState::Running); + } + + void + stopping() + { + if (m_disable) + return; + we_changed_our_state(ServiceState::Stopping); + } + + void + stopped() + { + if (m_disable) + return; + we_changed_our_state(ServiceState::Stopped); + } + + void + failed() + { + if (m_disable) + return; + we_changed_our_state(ServiceState::Failed); + } + }; + + extern I_SystemLayerManager* const service_manager; + + class NOP_SystemLayerHandler : public I_SystemLayerManager + { + protected: + void + we_changed_our_state(ServiceState) override + {} + + public: + void + report_changed_state() override{}; + void system_changed_our_state(ServiceState) override{}; + }; +} // namespace llarp::sys diff --git a/llarp/win32/service_manager.cpp b/llarp/win32/service_manager.cpp new file mode 100644 index 000000000..d2c6342ab --- /dev/null +++ b/llarp/win32/service_manager.cpp @@ -0,0 +1,85 @@ +#include +#include +#include "service_manager.hpp" +#include +#include +#include +#include + +namespace llarp::sys +{ + + namespace + { + + std::optional + to_win32_state(ServiceState st) + { + switch (st) + { + case ServiceState::Starting: + return SERVICE_START_PENDING; + case ServiceState::Running: + return SERVICE_RUNNING; + case ServiceState::Stopping: + return SERVICE_STOP_PENDING; + case ServiceState::Stopped: + return SERVICE_STOPPED; + default: + return std::nullopt; + } + } + } // namespace + + SVC_Manager::SVC_Manager() + { + _status.dwServiceType = SERVICE_WIN32_OWN_PROCESS; + } + + void + SVC_Manager::system_changed_our_state(ServiceState st) + { + if (m_disable) + return; + if (st == ServiceState::Stopping) + { + we_changed_our_state(st); + m_Context->HandleSignal(SIGINT); + } + } + + void + SVC_Manager::report_changed_state() + { + if (m_disable) + return; + SetServiceStatus(handle, &_status); + } + + void + SVC_Manager::we_changed_our_state(ServiceState st) + { + if (st == ServiceState::Failed) + { + _status.dwWin32ExitCode = ERROR_SERVICE_SPECIFIC_ERROR; + _status.dwServiceSpecificExitCode = 2; // TODO: propagate more info ? + report_changed_state(); + } + else if (auto maybe_state = to_win32_state(st)) + { + auto new_state = *maybe_state; + assert(_status.dwCurrentState != new_state); + _status.dwCurrentState = new_state; + // tell windows it takes 5s at most to start or stop + if (st == ServiceState::Starting or st == ServiceState::Stopping) + _status.dwCheckPoint++; + else + _status.dwCheckPoint = 0; + + report_changed_state(); + } + } + + SVC_Manager _manager{}; + I_SystemLayerManager* const service_manager = &_manager; +} // namespace llarp::sys diff --git a/llarp/win32/service_manager.hpp b/llarp/win32/service_manager.hpp new file mode 100644 index 000000000..567c354ac --- /dev/null +++ b/llarp/win32/service_manager.hpp @@ -0,0 +1,24 @@ +#pragma once +#include +namespace llarp::sys +{ + + class SVC_Manager : public I_SystemLayerManager + { + SERVICE_STATUS _status; + + public: + SERVICE_STATUS_HANDLE handle; + + SVC_Manager(); + + void + system_changed_our_state(ServiceState st) override; + + void + report_changed_state() override; + + void + we_changed_our_state(ServiceState st) override; + }; +} // namespace llarp::sys diff --git a/test/check_main.cpp b/test/check_main.cpp index e98bdcbfb..87299c833 100644 --- a/test/check_main.cpp +++ b/test/check_main.cpp @@ -2,6 +2,7 @@ #include #include +#include #ifdef _WIN32 #include @@ -23,6 +24,7 @@ startWinsock() int main(int argc, char* argv[]) { + llarp::sys::service_manager->disable(); llarp::log::reset_level(llarp::log::Level::off); #ifdef _WIN32