system layer manager (llarp::sys::service_manager)

the win32 and sd_notify components provided a disjointed set of
similar high level functionality so we consolidate these duplicate
code paths into one that has the same lifecycle regardless of platform
to reduce complexity of this feature.

this new component is responsible for reporting state changes to the
system layer and optionally propagating state change to lokinet
requested by the system layer (used by windows service).
pull/2045/head
Jeff Becker 2 years ago
parent a7f3c3595b
commit 4103908a8d
No known key found for this signature in database
GPG Key ID: 025C02EE3A092F2D

@ -7,7 +7,10 @@
#include <llarp/util/str.hpp>
#ifdef _WIN32
#include <llarp/win32/service_manager.hpp>
#include <dbghelp.h>
#else
#include <llarp/util/service_manager.hpp>
#endif
#include <csignal>
@ -21,19 +24,18 @@ int
lokinet_main(int, char**);
#ifdef _WIN32
#include <strsafe.h>
extern "C" LONG FAR PASCAL
win32_signal_handler(EXCEPTION_POINTERS*);
extern "C" VOID FAR PASCAL
win32_daemon_entry(DWORD, LPTSTR*);
BOOL ReportSvcStatus(DWORD, DWORD, DWORD);
VOID
insert_description();
SERVICE_STATUS SvcStatus;
SERVICE_STATUS_HANDLE SvcStatusHandle;
bool start_as_daemon = false;
#endif
bool run_as_daemon{false};
static auto logcat = llarp::log::Cat("main");
std::shared_ptr<llarp::Context> ctx;
std::promise<int> exit_code;
@ -84,9 +86,6 @@ install_win32_daemon()
llarp::LogError("Cannot install service ", GetLastError());
return;
}
// just put the flag here. we eat it later on and specify the
// config path in the daemon entry point
StringCchCat(szPath.data(), 1024, " --win32-daemon");
// Get a handle to the SCM database.
schSCManager = OpenSCManager(
@ -294,37 +293,6 @@ run_main_context(std::optional<fs::path> confFile, const llarp::RuntimeOptions o
}
#ifdef _WIN32
void
TellWindowsServiceStopped()
{
::WSACleanup();
if (not start_as_daemon)
return;
llarp::LogInfo("Telling Windows the service has stopped.");
if (not ReportSvcStatus(SERVICE_STOPPED, NO_ERROR, 0))
{
auto error_code = GetLastError();
if (error_code == ERROR_INVALID_DATA)
llarp::LogError(
"SetServiceStatus failed: \"The specified service status structure is invalid.\"");
else if (error_code == ERROR_INVALID_HANDLE)
llarp::LogError("SetServiceStatus failed: \"The specified handle is invalid.\"");
else
llarp::LogError("SetServiceStatus failed with an unknown error.");
}
}
class WindowsServiceStopped
{
public:
WindowsServiceStopped() = default;
~WindowsServiceStopped()
{
TellWindowsServiceStopped();
}
};
/// minidump generation for windows jizz
/// will make a coredump when there is an unhandled exception
@ -370,9 +338,9 @@ main(int argc, char* argv[])
#else
SERVICE_TABLE_ENTRY DispatchTable[] = {
{strdup("lokinet"), (LPSERVICE_MAIN_FUNCTION)win32_daemon_entry}, {NULL, NULL}};
if (lstrcmpi(argv[1], "--win32-daemon") == 0)
if (std::string{argv[1]} == "--win32-daemon")
{
start_as_daemon = true;
run_as_daemon = true;
StartServiceCtrlDispatcher(DispatchTable);
}
else
@ -383,6 +351,10 @@ main(int argc, char* argv[])
int
lokinet_main(int argc, char** argv)
{
// if we are not running as a service disable reporting
if (llarp::platform::is_windows and not run_as_daemon)
llarp::sys::service_manager->disable();
if (auto result = Lokinet_INIT())
return result;
@ -398,7 +370,6 @@ lokinet_main(int argc, char** argv)
opts.showBanner = false;
#ifdef _WIN32
WindowsServiceStopped stopped_raii;
if (startWinsock())
return -1;
SetConsoleCtrlHandler(handle_signal_win32, TRUE);
@ -545,13 +516,9 @@ lokinet_main(int argc, char** argv)
SetUnhandledExceptionFilter(&GenerateDump);
#endif
std::thread main_thread{[&] { run_main_context(configFile, opts); }};
std::thread main_thread{[configFile, opts] { run_main_context(configFile, opts); }};
auto ftr = exit_code.get_future();
#ifdef _WIN32
ReportSvcStatus(SERVICE_RUNNING, NO_ERROR, 0);
#endif
do
{
// do periodic non lokinet related tasks here
@ -582,9 +549,7 @@ lokinet_main(int argc, char** argv)
llarp::log::critical(deadlock_cat, wtf);
llarp::log::flush();
}
#ifdef _WIN32
TellWindowsServiceStopped();
#endif
llarp::sys::service_manager->failed();
std::abort();
}
} while (ftr.wait_for(std::chrono::seconds(1)) != std::future_status::ready);
@ -609,6 +574,7 @@ lokinet_main(int argc, char** argv)
}
llarp::log::flush();
llarp::sys::service_manager->stopped();
if (ctx)
{
ctx.reset();
@ -617,29 +583,6 @@ lokinet_main(int argc, char** argv)
}
#ifdef _WIN32
BOOL
ReportSvcStatus(DWORD dwCurrentState, DWORD dwWin32ExitCode, DWORD dwWaitHint)
{
static DWORD dwCheckPoint = 1;
// Fill in the SERVICE_STATUS structure.
SvcStatus.dwCurrentState = dwCurrentState;
SvcStatus.dwWin32ExitCode = dwWin32ExitCode;
SvcStatus.dwWaitHint = dwWaitHint;
if (dwCurrentState == SERVICE_START_PENDING)
SvcStatus.dwControlsAccepted = 0;
else
SvcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP;
if ((dwCurrentState == SERVICE_RUNNING) || (dwCurrentState == SERVICE_STOPPED))
SvcStatus.dwCheckPoint = 0;
else
SvcStatus.dwCheckPoint = dwCheckPoint++;
// Report the status of the service to the SCM.
return SetServiceStatus(SvcStatusHandle, &SvcStatus);
}
VOID FAR PASCAL
SvcCtrlHandler(DWORD dwCtrl)
@ -651,14 +594,13 @@ SvcCtrlHandler(DWORD dwCtrl)
case SERVICE_CONTROL_STOP:
// tell service we are stopping
llarp::log::info(logcat, "Windows service controller gave SERVICE_CONTROL_STOP");
ReportSvcStatus(SERVICE_STOP_PENDING, NO_ERROR, 0);
// do the actual tear down
handle_signal(SIGINT);
llarp::sys::service_manager->system_changed_our_state(llarp::sys::ServiceState::Stopping);
return;
case SERVICE_CONTROL_INTERROGATE:
// report status
SetServiceStatus(SvcStatusHandle, &SvcStatus);
llarp::log::debug(logcat, "Got win32 service interrogate signal");
llarp::sys::service_manager->report_changed_state();
return;
default:
@ -673,21 +615,15 @@ VOID FAR PASCAL
win32_daemon_entry(DWORD, LPTSTR* argv)
{
// Register the handler function for the service
SvcStatusHandle = RegisterServiceCtrlHandler("lokinet", SvcCtrlHandler);
auto* svc = dynamic_cast<llarp::sys::SVC_Manager*>(llarp::sys::service_manager);
svc->handle = RegisterServiceCtrlHandler("lokinet", SvcCtrlHandler);
if (!SvcStatusHandle)
if (svc->handle == nullptr)
{
llarp::LogError("failed to register daemon control handler");
return;
}
// These SERVICE_STATUS members remain as set here
SvcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
SvcStatus.dwServiceSpecificExitCode = 0;
// Report initial status to the SCM
ReportSvcStatus(SERVICE_START_PENDING, NO_ERROR, 3000);
// we hard code the args to lokinet_main.
// we yoink argv[0] (lokinet.exe path) and pass in the new args.
std::array args = {

@ -45,6 +45,7 @@ namespace llarp
std::shared_ptr<NodeDB> nodedb = nullptr;
std::string nodedb_dir;
Context();
virtual ~Context() = default;
void

@ -49,17 +49,23 @@ target_link_libraries(lokinet-platform PUBLIC lokinet-cryptography lokinet-util
target_link_libraries(lokinet-platform PRIVATE oxenmq::oxenmq)
if (ANDROID)
target_sources(lokinet-platform PRIVATE android/ifaddrs.c)
target_sources(lokinet-platform PRIVATE android/ifaddrs.c util/nop_service_manager.cpp)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
target_sources(lokinet-platform PRIVATE linux/dbus.cpp)
if(WITH_SYSTEMD)
target_sources(lokinet-platform PRIVATE linux/sd_service_manager.cpp)
else()
target_sources(lokinet-platform PRIVATE util/nop_service_manager.cpp)
endif()
endif()
if (WIN32)
target_sources(lokinet-platform PRIVATE
net/win32.cpp
vpn/win32.cpp
win32/service_manager.cpp
win32/exec.cpp)
add_library(lokinet-win32 STATIC
win32/dll.cpp
@ -312,6 +318,7 @@ endif()
if(APPLE)
add_subdirectory(apple)
target_sources(lokinet-platform PRIVATE util/nop_system_manager.cpp)
endif()
file(GLOB_RECURSE docs_SRC */*.hpp *.hpp)

@ -12,6 +12,8 @@
#include "service/context.hpp"
#include "util/logging.hpp"
#include <llarp/util/service_manager.hpp>
#include <cxxopts.hpp>
#include <csignal>
#include <stdexcept>
@ -213,4 +215,10 @@ namespace llarp
loop.reset();
}
Context::Context()
{
// service_manager is a global and context isnt
llarp::sys::service_manager->give_context(this);
}
} // namespace llarp

@ -0,0 +1,60 @@
#include <llarp/util/service_manager.hpp>
#include <systemd/sd-daemon.h>
#include <cassert>
#include <llarp.hpp>
#include <llarp/router/router.hpp>
#include <llarp/util/logging.hpp>
namespace llarp::sys
{
class SD_Manager : public I_SystemLayerManager
{
llarp::sys::ServiceState m_State{ServiceState::Initial};
public:
/// change our state and report it to the system layer
void
we_changed_our_state(ServiceState st) override
{
assert(m_State != st);
m_State = st;
report_changed_state();
}
void
report_changed_state() override
{
if (m_State == ServiceState::Running)
{
::sd_notify(0, "READY=1");
return;
}
if (m_State == ServiceState::Stopping)
{
::sd_notify(0, "STOPPING=1");
return;
}
}
void
report_periodic_stats() override
{
if (m_Context and m_Context->router and not m_disable)
{
auto status = fmt::format("WATCHDOG=1\nSTATUS={}", m_Context->router->status_line());
::sd_notify(0, status.c_str());
}
}
void
system_changed_our_state(ServiceState) override
{
// not applicable on systemd
}
};
SD_Manager _manager{};
I_SystemLayerManager* const service_manager = &_manager;
} // namespace llarp::sys

@ -361,6 +361,9 @@ namespace llarp
virtual void
GossipRCIfNeeded(const RouterContact rc) = 0;
virtual std::string
status_line() = 0;
/// Templated convenience function to generate a RouterHive event and
/// delegate to non-templated (and overridable) function for handling.
template <class EventType, class... Params>

@ -393,6 +393,8 @@ namespace llarp
bool
Router::Configure(std::shared_ptr<Config> c, bool isSNode, std::shared_ptr<NodeDB> nodedb)
{
llarp::sys::service_manager->starting();
m_Config = std::move(c);
auto& conf = *m_Config;
@ -870,6 +872,58 @@ namespace llarp
m_LastStatsReport = now;
}
std::string
Router::status_line()
{
std::string status;
auto out = std::back_inserter(status);
fmt::format_to(out, "v{}", llarp::VERSION_STR);
if (IsServiceNode())
{
fmt::format_to(
out,
" snode | known/svc/clients: {}/{}/{}",
nodedb()->NumLoaded(),
NumberOfConnectedRouters(),
NumberOfConnectedClients());
fmt::format_to(
out,
" | {} active paths | block {} ",
pathContext().CurrentTransitPaths(),
(m_lokidRpcClient ? m_lokidRpcClient->BlockHeight() : 0));
auto maybe_last = _rcGossiper.LastGossipAt();
fmt::format_to(
out,
" | gossip: (next/last) {} / {}",
short_time_from_now(_rcGossiper.NextGossipAt()),
maybe_last ? short_time_from_now(*maybe_last) : "never");
}
else
{
fmt::format_to(
out,
" client | known/connected: {}/{}",
nodedb()->NumLoaded(),
NumberOfConnectedRouters());
if (auto ep = hiddenServiceContext().GetDefault())
{
fmt::format_to(
out,
" | paths/endpoints {}/{}",
pathContext().CurrentOwnedPaths(),
ep->UniqueEndpoints());
if (auto success_rate = ep->CurrentBuildStats().SuccessRatio(); success_rate < 0.5)
{
fmt::format_to(
out, " [ !!! Low Build Success Rate ({:.1f}%) !!! ]", (100.0 * success_rate));
}
};
}
return status;
}
void
Router::Tick()
{
@ -884,57 +938,7 @@ namespace llarp
Thaw();
}
#if defined(WITH_SYSTEMD)
{
std::string status;
auto out = std::back_inserter(status);
fmt::format_to(out, "WATCHDOG=1\nSTATUS=v{}", llarp::VERSION_STR);
if (IsServiceNode())
{
fmt::format_to(
out,
" snode | known/svc/clients: {}/{}/{}",
nodedb()->NumLoaded(),
NumberOfConnectedRouters(),
NumberOfConnectedClients());
fmt::format_to(
out,
" | {} active paths | block {} ",
pathContext().CurrentTransitPaths(),
(m_lokidRpcClient ? m_lokidRpcClient->BlockHeight() : 0));
auto maybe_last = _rcGossiper.LastGossipAt();
fmt::format_to(
out,
" | gossip: (next/last) {} / {}",
short_time_from_now(_rcGossiper.NextGossipAt()),
maybe_last ? short_time_from_now(*maybe_last) : "never");
}
else
{
fmt::format_to(
out,
" client | known/connected: {}/{}",
nodedb()->NumLoaded(),
NumberOfConnectedRouters());
if (auto ep = hiddenServiceContext().GetDefault())
{
fmt::format_to(
out,
" | paths/endpoints {}/{}",
pathContext().CurrentOwnedPaths(),
ep->UniqueEndpoints());
if (auto success_rate = ep->CurrentBuildStats().SuccessRatio(); success_rate < 0.5)
{
fmt::format_to(
out, " [ !!! Low Build Success Rate ({:.1f}%) !!! ]", (100.0 * success_rate));
}
};
}
::sd_notify(0, status.c_str());
}
#endif
llarp::sys::service_manager->report_periodic_stats();
m_PathBuildLimiter.Decay(now);
@ -1399,9 +1403,6 @@ namespace llarp
m_RoutePoker->Start(this);
_running.store(true);
_startedAt = Now();
#if defined(WITH_SYSTEMD)
::sd_notify(0, "READY=1");
#endif
if (whitelistRouters)
{
// do service node testing if we are in service node whitelist mode
@ -1474,6 +1475,7 @@ namespace llarp
}
});
}
llarp::sys::service_manager->ready();
return _running;
}
@ -1525,9 +1527,7 @@ namespace llarp
if (log::get_level_default() != log::Level::off)
log::reset_level(log::Level::info);
LogWarn("stopping router hard");
#if defined(WITH_SYSTEMD)
sd_notify(0, "STOPPING=1\nSTATUS=Shutting down HARD");
#endif
llarp::sys::service_manager->stopping();
hiddenServiceContext().StopAll();
_exitContext.Stop();
StopLinks();
@ -1546,9 +1546,7 @@ namespace llarp
if (log::get_level_default() != log::Level::off)
log::reset_level(log::Level::info);
LogInfo("stopping router");
#if defined(WITH_SYSTEMD)
sd_notify(0, "STOPPING=1\nSTATUS=Shutting down");
#endif
llarp::sys::service_manager->stopping();
hiddenServiceContext().StopAll();
_exitContext.Stop();
paths.PumpUpstream();

@ -35,6 +35,7 @@
#include <llarp/util/status.hpp>
#include <llarp/util/str.hpp>
#include <llarp/util/time.hpp>
#include <llarp/util/service_manager.hpp>
#include <functional>
#include <list>
@ -315,6 +316,9 @@ namespace llarp
RCLookupHandler _rcLookupHandler;
RCGossiper _rcGossiper;
std::string
status_line() override;
using Clock_t = std::chrono::steady_clock;
using TimePoint_t = Clock_t::time_point;

@ -0,0 +1,7 @@
#include "service_manager.hpp"
namespace llarp::sys
{
NOP_SystemLayerHandler _manager{};
I_SystemLayerManager* const service_manager = &_manager;
} // namespace llarp::sys

@ -0,0 +1,118 @@
#pragma once
namespace llarp
{
struct Context;
}
namespace llarp::sys
{
// what state lokinet will report we are in to the system layer
enum class ServiceState
{
Initial,
Starting,
Running,
Stopping,
Stopped,
HardStop,
Failed,
};
/// interface type for interacting with the os dependant system layer
class I_SystemLayerManager
{
protected:
bool m_disable{false};
llarp::Context* m_Context{nullptr};
/// change our state and report it to the system layer
virtual void
we_changed_our_state(ServiceState st) = 0;
public:
virtual ~I_SystemLayerManager() = default;
/// disable all reporting to system layer
inline void
disable()
{
m_disable = true;
}
/// give our current lokinet context to the system layer manager
inline void
give_context(llarp::Context* ctx)
{
m_Context = ctx;
}
/// system told us to enter this state
virtual void
system_changed_our_state(ServiceState st) = 0;
/// report our current state to the system layer
virtual void
report_changed_state() = 0;
/// report our stats on each timer tick
virtual void
report_periodic_stats(){};
void
starting()
{
if (m_disable)
return;
we_changed_our_state(ServiceState::Starting);
}
void
ready()
{
if (m_disable)
return;
we_changed_our_state(ServiceState::Running);
}
void
stopping()
{
if (m_disable)
return;
we_changed_our_state(ServiceState::Stopping);
}
void
stopped()
{
if (m_disable)
return;
we_changed_our_state(ServiceState::Stopped);
}
void
failed()
{
if (m_disable)
return;
we_changed_our_state(ServiceState::Failed);
}
};
extern I_SystemLayerManager* const service_manager;
class NOP_SystemLayerHandler : public I_SystemLayerManager
{
protected:
void
we_changed_our_state(ServiceState) override
{}
public:
void
report_changed_state() override{};
void system_changed_our_state(ServiceState) override{};
};
} // namespace llarp::sys

@ -0,0 +1,85 @@
#include <windows.h>
#include <llarp.hpp>
#include "service_manager.hpp"
#include <dbghelp.h>
#include <cassert>
#include <csignal>
#include <optional>
namespace llarp::sys
{
namespace
{
std::optional<DWORD>
to_win32_state(ServiceState st)
{
switch (st)
{
case ServiceState::Starting:
return SERVICE_START_PENDING;
case ServiceState::Running:
return SERVICE_RUNNING;
case ServiceState::Stopping:
return SERVICE_STOP_PENDING;
case ServiceState::Stopped:
return SERVICE_STOPPED;
default:
return std::nullopt;
}
}
} // namespace
SVC_Manager::SVC_Manager()
{
_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
}
void
SVC_Manager::system_changed_our_state(ServiceState st)
{
if (m_disable)
return;
if (st == ServiceState::Stopping)
{
we_changed_our_state(st);
m_Context->HandleSignal(SIGINT);
}
}
void
SVC_Manager::report_changed_state()
{
if (m_disable)
return;
SetServiceStatus(handle, &_status);
}
void
SVC_Manager::we_changed_our_state(ServiceState st)
{
if (st == ServiceState::Failed)
{
_status.dwWin32ExitCode = ERROR_SERVICE_SPECIFIC_ERROR;
_status.dwServiceSpecificExitCode = 2; // TODO: propagate more info ?
report_changed_state();
}
else if (auto maybe_state = to_win32_state(st))
{
auto new_state = *maybe_state;
assert(_status.dwCurrentState != new_state);
_status.dwCurrentState = new_state;
// tell windows it takes 5s at most to start or stop
if (st == ServiceState::Starting or st == ServiceState::Stopping)
_status.dwCheckPoint++;
else
_status.dwCheckPoint = 0;
report_changed_state();
}
}
SVC_Manager _manager{};
I_SystemLayerManager* const service_manager = &_manager;
} // namespace llarp::sys

@ -0,0 +1,24 @@
#pragma once
#include <llarp/util/service_manager.hpp>
namespace llarp::sys
{
class SVC_Manager : public I_SystemLayerManager
{
SERVICE_STATUS _status;
public:
SERVICE_STATUS_HANDLE handle;
SVC_Manager();
void
system_changed_our_state(ServiceState st) override;
void
report_changed_state() override;
void
we_changed_our_state(ServiceState st) override;
};
} // namespace llarp::sys

@ -2,6 +2,7 @@
#include <catch2/catch.hpp>
#include <util/logging.hpp>
#include <util/service_manager.hpp>
#ifdef _WIN32
#include <winsock2.h>
@ -23,6 +24,7 @@ startWinsock()
int
main(int argc, char* argv[])
{
llarp::sys::service_manager->disable();
llarp::log::reset_level(llarp::log::Level::off);
#ifdef _WIN32

Loading…
Cancel
Save