From c5e787b8cb16510d7472870395724778a5b118bd Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 14 Oct 2022 20:55:21 -0300 Subject: [PATCH] Oxend error ping + unfunded tracking Currently (from a recent PR) we aren't pinging oxend if not active, but that behaviour ended up being quite wrong because lokinet needs to ping even when decommissioned or deregistered (when decommissioned we need the ping to get commissioned again, and if not registered we need the ping to get past the "lokinet isn't pinging" nag screen to prepare a registration). This considerably revises the pinging behaviour: - We ping oxend *unless* there is a specific error with our connections (i.e. we *should* be establishing peer connections but don't have any) - If we do have such an error, we send a new oxend "error" ping to report the error to oxend and get oxend to hold off on sending uptime proofs. Along the way this also changes how we handle the current node state: instead of just tracking deregistered/decommissioned, we now track three states: - LooksRegistered -- which means the SN is known to the network (but not necessarily active or fully staked) - LooksFunded -- which means it is known *and* is fully funded, but not necessarily active - LooksDecommissioned -- which means it is known, funded, and not currently active (which implies decommissioned). The funded (or more precisely, unfunded) state is now tracked in rc_lookup_handler in a "greenlist" -- i.e. new SNs that are so new (i.e. "green") that they aren't even fully staked or active yet. --- llarp/router/abstractrouter.hpp | 18 ++++---- llarp/router/i_rc_lookup_handler.hpp | 10 ++++- llarp/router/rc_lookup_handler.cpp | 40 ++++++++++++------ llarp/router/rc_lookup_handler.hpp | 21 +++++++++- llarp/router/router.cpp | 49 +++++++++++++--------- llarp/router/router.hpp | 28 ++++++++----- llarp/rpc/lokid_rpc_client.cpp | 62 +++++++++++++++------------- 7 files changed, 147 insertions(+), 81 deletions(-) diff --git a/llarp/router/abstractrouter.hpp b/llarp/router/abstractrouter.hpp index 022e6767a..7f6a368a7 100644 --- a/llarp/router/abstractrouter.hpp +++ b/llarp/router/abstractrouter.hpp @@ -199,14 +199,12 @@ namespace llarp virtual bool IsServiceNode() const = 0; - virtual bool - IsActiveServiceNode() const = 0; - - /// If we are running as a service node and appear active, i.e. registered and not - /// decommissioned, we should *not* ping core if we know of too few peers, to indicate to core - /// we are not in a good state. - virtual bool - ShouldPingOxen() const = 0; + /// Called to determine if we're in a bad state (which gets reported to our oxend) that should + /// prevent uptime proofs from going out to the network (so that the error state gets noticed). + /// Currently this means we require a decent number of peers whenever we are fully staked + /// (active or decommed). + virtual std::optional + OxendErrorState() const = 0; virtual bool StartRpcServer() = 0; @@ -315,7 +313,9 @@ namespace llarp /// set router's service node whitelist virtual void SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) = 0; + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& unfundedlist) = 0; virtual std::unordered_set GetRouterWhitelist() const = 0; diff --git a/llarp/router/i_rc_lookup_handler.hpp b/llarp/router/i_rc_lookup_handler.hpp index ffc5a3a4b..4efee04c6 100644 --- a/llarp/router/i_rc_lookup_handler.hpp +++ b/llarp/router/i_rc_lookup_handler.hpp @@ -34,7 +34,9 @@ namespace llarp virtual void SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) = 0; + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& greenlist) = 0; virtual void GetRC(const RouterID& router, RCRequestCallback callback, bool forceLookup = false) = 0; @@ -48,6 +50,12 @@ namespace llarp virtual bool IsGreylisted(const RouterID& remote) const = 0; + virtual bool + IsGreenlisted(const RouterID& remote) const = 0; + + virtual bool + IsRegistered(const RouterID& remote) const = 0; + virtual bool CheckRC(const RouterContact& rc) const = 0; diff --git a/llarp/router/rc_lookup_handler.cpp b/llarp/router/rc_lookup_handler.cpp index 48150090f..dc0285972 100644 --- a/llarp/router/rc_lookup_handler.cpp +++ b/llarp/router/rc_lookup_handler.cpp @@ -32,26 +32,28 @@ namespace llarp whitelistRouters.erase(router); } + static void + loadColourList(std::unordered_set& beigelist, const std::vector& new_beige) + { + beigelist.clear(); + beigelist.insert(new_beige.begin(), new_beige.end()); + } + void RCLookupHandler::SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& greenlist) { if (whitelist.empty()) return; util::Lock l(_mutex); - whitelistRouters.clear(); - greylistRouters.clear(); - for (auto& router : whitelist) - { - whitelistRouters.emplace(router); - } - for (auto& router : greylist) - { - greylistRouters.emplace(router); - } + loadColourList(whitelistRouters, whitelist); + loadColourList(greylistRouters, greylist); + loadColourList(greenlistRouters, greenlist); - LogInfo("lokinet service node list now has ", whitelistRouters.size(), " routers"); + LogInfo("lokinet service node list now has ", whitelistRouters.size(), " active routers"); } bool @@ -140,6 +142,20 @@ namespace llarp return greylistRouters.count(remote); } + bool + RCLookupHandler::IsGreenlisted(const RouterID& remote) const + { + util::Lock lock{_mutex}; + return greenlistRouters.count(remote); + } + + bool + RCLookupHandler::IsRegistered(const RouterID& remote) const + { + util::Lock lock{_mutex}; + return whitelistRouters.count(remote) || greylistRouters.count(remote) || greenlistRouters.count(remote); + } + bool RCLookupHandler::PathIsAllowed(const RouterID& remote) const { diff --git a/llarp/router/rc_lookup_handler.hpp b/llarp/router/rc_lookup_handler.hpp index 8beee74b7..184b8d102 100644 --- a/llarp/router/rc_lookup_handler.hpp +++ b/llarp/router/rc_lookup_handler.hpp @@ -42,8 +42,11 @@ namespace llarp void SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) override - EXCLUDES(_mutex); + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& greenlist + + ) override EXCLUDES(_mutex); bool HaveReceivedWhitelist() const override; @@ -61,6 +64,16 @@ namespace llarp bool IsGreylisted(const RouterID& remote) const override EXCLUDES(_mutex); + // "greenlist" = new routers (i.e. "green") that aren't fully funded yet + bool + IsGreenlisted(const RouterID& remote) const override EXCLUDES(_mutex); + + // registered just means that there is at least an operator stake, but doesn't require the node + // be fully funded, active, or not decommed. (In other words: it is any of the white, grey, or + // green list). + bool + IsRegistered(const RouterID& remote) const override EXCLUDES(_mutex); + bool CheckRC(const RouterContact& rc) const override; @@ -134,8 +147,12 @@ namespace llarp bool useWhitelist = false; bool isServiceNode = false; + // whitelist = active routers std::unordered_set whitelistRouters GUARDED_BY(_mutex); + // greylist = fully funded, but decommissioned routers std::unordered_set greylistRouters GUARDED_BY(_mutex); + // greenlist = registered but not fully-staked routers + std::unordered_set greenlistRouters GUARDED_BY(_mutex); using TimePoint = std::chrono::steady_clock::time_point; std::unordered_map _routerLookupTimes; diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 0349e1179..b272e36e8 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -471,16 +471,14 @@ namespace llarp return nodedb()->NumLoaded() < KnownPeerWarningThreshold; } - bool - Router::IsActiveServiceNode() const + std::optional + Router::OxendErrorState() const { - return IsServiceNode() and not(LooksDeregistered() or LooksDecommissioned()); - } - - bool - Router::ShouldPingOxen() const - { - return IsActiveServiceNode() and not TooFewPeers(); + // If we're in the white or gray list then we *should* be establishing connections to other + // routers, so if we have almost no peers then something is almost certainly wrong. + if (LooksFunded() and TooFewPeers()) + return "too few peer connections; lokinet is not adequately connected to the network"; + return std::nullopt; } void @@ -508,10 +506,17 @@ namespace llarp } bool - Router::LooksDeregistered() const + Router::LooksFunded() const { return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist() - and not _rcLookupHandler.SessionIsAllowed(pubkey()); + and _rcLookupHandler.SessionIsAllowed(pubkey()); + } + + bool + Router::LooksRegistered() const + { + return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist() + and _rcLookupHandler.IsRegistered(pubkey()); } bool @@ -1061,12 +1066,16 @@ namespace llarp if (now >= m_NextDecommissionWarn) { constexpr auto DecommissionWarnInterval = 5min; - if (auto dereg = LooksDeregistered(); dereg or decom) + if (auto registered = LooksRegistered(), funded = LooksFunded(); + not(registered and funded and not decom)) { - // complain about being deregistered - LogError( - "We are running as a service node but we seem to be ", - dereg ? "deregistered" : "decommissioned"); + // complain about being deregistered/decommed/unfunded + log::error( + logcat, + "We are running as a service node but we seem to be {}", + not registered ? "deregistered" + : decom ? "decommissioned" + : "not fully staked"); m_NextDecommissionWarn = now + DecommissionWarnInterval; } else if (isSvcNode and TooFewPeers()) @@ -1081,7 +1090,7 @@ namespace llarp // if we need more sessions to routers and we are not a service node kicked from the network // we shall connect out to others - if (connected < connectToNum and not LooksDeregistered()) + if (connected < connectToNum and LooksFunded()) { size_t dlt = connectToNum - connected; LogDebug("connecting to ", dlt, " random routers to keep alive"); @@ -1233,9 +1242,11 @@ namespace llarp void Router::SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& unfundedlist) { - _rcLookupHandler.SetRouterWhitelist(whitelist, greylist); + _rcLookupHandler.SetRouterWhitelist(whitelist, greylist, unfundedlist); } bool diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index 8eb751410..53b24e4aa 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -143,7 +143,9 @@ namespace llarp void SetRouterWhitelist( - const std::vector& whitelist, const std::vector& greylist) override; + const std::vector& whitelist, + const std::vector& greylist, + const std::vector& unfunded) override; std::unordered_set GetRouterWhitelist() const override @@ -203,9 +205,16 @@ namespace llarp bool LooksDecommissioned() const; - /// return true if we look like we are a deregistered service node + /// return true if we look like we are a registered, fully-staked service node (either active or + /// decommissioned). This condition determines when we are allowed to (and attempt to) connect + /// to other peers when running as a service node. bool - LooksDeregistered() const; + LooksFunded() const; + + /// return true if we a registered service node; not that this only requires a partial stake, + /// and does not imply that this service node is *active* or fully funded. + bool + LooksRegistered() const; /// return true if we look like we are allowed and able to test other routers bool @@ -378,12 +387,8 @@ namespace llarp bool IsServiceNode() const override; - /// return true if service node *and* not deregistered or decommissioned - bool - IsActiveServiceNode() const override; - - bool - ShouldPingOxen() const override; + std::optional + OxendErrorState() const override; void Close(); @@ -556,8 +561,11 @@ namespace llarp bool m_isServiceNode = false; + // Delay warning about being decommed/dereged until we've had enough time to sync up with oxend + static constexpr auto DECOMM_WARNING_STARTUP_DELAY = 15s; + llarp_time_t m_LastStatsReport = 0s; - llarp_time_t m_NextDecommissionWarn = 0s; + llarp_time_t m_NextDecommissionWarn = time_now_ms() + DECOMM_WARNING_STARTUP_DELAY; std::shared_ptr m_keyManager; std::shared_ptr m_peerDb; diff --git a/llarp/rpc/lokid_rpc_client.cpp b/llarp/rpc/lokid_rpc_client.cpp index 78b3e47ff..a40c33383 100644 --- a/llarp/rpc/lokid_rpc_client.cpp +++ b/llarp/rpc/lokid_rpc_client.cpp @@ -174,25 +174,27 @@ namespace llarp auto makePingRequest = [self = shared_from_this()]() { // send a ping PubKey pk{}; - bool should_ping = false; - if (auto r = self->m_Router.lock()) - { - pk = r->pubkey(); - should_ping = r->ShouldPingOxen(); - } - if (should_ping) - { - nlohmann::json payload = { - {"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())}, - {"version", {VERSION[0], VERSION[1], VERSION[2]}}}; - self->Request( - "admin.lokinet_ping", - [](bool success, std::vector data) { - (void)data; - LogDebug("Received response for ping. Successful: ", success); - }, - payload.dump()); - } + auto r = self->m_Router.lock(); + if (not r) + return; // router has gone away, maybe shutting down? + + pk = r->pubkey(); + + nlohmann::json payload = { + {"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())}, + {"version", {VERSION[0], VERSION[1], VERSION[2]}}}; + + if (auto err = r->OxendErrorState()) + payload["error"] = *err; + + self->Request( + "admin.lokinet_ping", + [](bool success, std::vector data) { + (void)data; + LogDebug("Received response for ping. Successful: ", success); + }, + payload.dump()); + // subscribe to block updates self->Request("sub.block", [](bool success, std::vector data) { if (data.empty() or not success) @@ -216,18 +218,13 @@ namespace llarp LokidRpcClient::HandleNewServiceNodeList(const nlohmann::json& j) { std::unordered_map keymap; - std::vector activeNodeList, nonActiveNodeList; + std::vector activeNodeList, decommNodeList, unfundedNodeList; if (not j.is_array()) throw std::runtime_error{ "Invalid service node list: expected array of service node states"}; for (auto& snode : j) { - // Skip unstaked snodes: - if (const auto funded_itr = snode.find("funded"); funded_itr == snode.end() - or not funded_itr->is_boolean() or not funded_itr->get()) - continue; - const auto ed_itr = snode.find("pubkey_ed25519"); if (ed_itr == snode.end() or not ed_itr->is_string()) continue; @@ -238,6 +235,10 @@ namespace llarp if (active_itr == snode.end() or not active_itr->is_boolean()) continue; const bool active = active_itr->get(); + const auto funded_itr = snode.find("funded"); + if (funded_itr == snode.end() or not funded_itr->is_boolean()) + continue; + const bool funded = funded_itr->get(); RouterID rid; PubKey pk; @@ -246,7 +247,10 @@ namespace llarp continue; keymap[rid] = pk; - (active ? activeNodeList : nonActiveNodeList).push_back(std::move(rid)); + (active ? activeNodeList + : funded ? decommNodeList + : unfundedNodeList) + .push_back(std::move(rid)); } if (activeNodeList.empty()) @@ -254,17 +258,19 @@ namespace llarp LogWarn("got empty service node list, ignoring."); return; } + // inform router about the new list if (auto router = m_Router.lock()) { auto& loop = router->loop(); loop->call([this, active = std::move(activeNodeList), - inactive = std::move(nonActiveNodeList), + decomm = std::move(decommNodeList), + unfunded = std::move(unfundedNodeList), keymap = std::move(keymap), router = std::move(router)]() mutable { m_KeyMap = std::move(keymap); - router->SetRouterWhitelist(active, inactive); + router->SetRouterWhitelist(active, decomm, unfunded); }); } else