Oxend error ping + unfunded tracking

Currently (from a recent PR) we aren't pinging oxend if not active, but
that behaviour ended up being quite wrong because lokinet needs to ping
even when decommissioned or deregistered (when decommissioned we need
the ping to get commissioned again, and if not registered we need the
ping to get past the "lokinet isn't pinging" nag screen to prepare a
registration).

This considerably revises the pinging behaviour:

- We ping oxend *unless* there is a specific error with our connections
  (i.e. we *should* be establishing peer connections but don't have any)
- If we do have such an error, we send a new oxend "error" ping to
  report the error to oxend and get oxend to hold off on sending uptime
  proofs.

Along the way this also changes how we handle the current node state:
instead of just tracking deregistered/decommissioned, we now track three
states:

- LooksRegistered -- which means the SN is known to the network (but not
  necessarily active or fully staked)
- LooksFunded -- which means it is known *and* is fully funded, but not
  necessarily active
- LooksDecommissioned -- which means it is known, funded, and not
  currently active (which implies decommissioned).

The funded (or more precisely, unfunded) state is now tracked in
rc_lookup_handler in a "greenlist" -- i.e. new SNs that are so new (i.e.
"green") that they aren't even fully staked or active yet.
pull/2015/head
Jason Rhinelander 2 years ago
parent bd869b3b07
commit c5e787b8cb

@ -199,14 +199,12 @@ namespace llarp
virtual bool
IsServiceNode() const = 0;
virtual bool
IsActiveServiceNode() const = 0;
/// If we are running as a service node and appear active, i.e. registered and not
/// decommissioned, we should *not* ping core if we know of too few peers, to indicate to core
/// we are not in a good state.
virtual bool
ShouldPingOxen() const = 0;
/// Called to determine if we're in a bad state (which gets reported to our oxend) that should
/// prevent uptime proofs from going out to the network (so that the error state gets noticed).
/// Currently this means we require a decent number of peers whenever we are fully staked
/// (active or decommed).
virtual std::optional<std::string>
OxendErrorState() const = 0;
virtual bool
StartRpcServer() = 0;
@ -315,7 +313,9 @@ namespace llarp
/// set router's service node whitelist
virtual void
SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) = 0;
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfundedlist) = 0;
virtual std::unordered_set<RouterID>
GetRouterWhitelist() const = 0;

@ -34,7 +34,9 @@ namespace llarp
virtual void
SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) = 0;
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist) = 0;
virtual void
GetRC(const RouterID& router, RCRequestCallback callback, bool forceLookup = false) = 0;
@ -48,6 +50,12 @@ namespace llarp
virtual bool
IsGreylisted(const RouterID& remote) const = 0;
virtual bool
IsGreenlisted(const RouterID& remote) const = 0;
virtual bool
IsRegistered(const RouterID& remote) const = 0;
virtual bool
CheckRC(const RouterContact& rc) const = 0;

@ -32,26 +32,28 @@ namespace llarp
whitelistRouters.erase(router);
}
static void
loadColourList(std::unordered_set<RouterID>& beigelist, const std::vector<RouterID>& new_beige)
{
beigelist.clear();
beigelist.insert(new_beige.begin(), new_beige.end());
}
void
RCLookupHandler::SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist)
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist)
{
if (whitelist.empty())
return;
util::Lock l(_mutex);
whitelistRouters.clear();
greylistRouters.clear();
for (auto& router : whitelist)
{
whitelistRouters.emplace(router);
}
for (auto& router : greylist)
{
greylistRouters.emplace(router);
}
loadColourList(whitelistRouters, whitelist);
loadColourList(greylistRouters, greylist);
loadColourList(greenlistRouters, greenlist);
LogInfo("lokinet service node list now has ", whitelistRouters.size(), " routers");
LogInfo("lokinet service node list now has ", whitelistRouters.size(), " active routers");
}
bool
@ -140,6 +142,20 @@ namespace llarp
return greylistRouters.count(remote);
}
bool
RCLookupHandler::IsGreenlisted(const RouterID& remote) const
{
util::Lock lock{_mutex};
return greenlistRouters.count(remote);
}
bool
RCLookupHandler::IsRegistered(const RouterID& remote) const
{
util::Lock lock{_mutex};
return whitelistRouters.count(remote) || greylistRouters.count(remote) || greenlistRouters.count(remote);
}
bool
RCLookupHandler::PathIsAllowed(const RouterID& remote) const
{

@ -42,8 +42,11 @@ namespace llarp
void
SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) override
EXCLUDES(_mutex);
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& greenlist
) override EXCLUDES(_mutex);
bool
HaveReceivedWhitelist() const override;
@ -61,6 +64,16 @@ namespace llarp
bool
IsGreylisted(const RouterID& remote) const override EXCLUDES(_mutex);
// "greenlist" = new routers (i.e. "green") that aren't fully funded yet
bool
IsGreenlisted(const RouterID& remote) const override EXCLUDES(_mutex);
// registered just means that there is at least an operator stake, but doesn't require the node
// be fully funded, active, or not decommed. (In other words: it is any of the white, grey, or
// green list).
bool
IsRegistered(const RouterID& remote) const override EXCLUDES(_mutex);
bool
CheckRC(const RouterContact& rc) const override;
@ -134,8 +147,12 @@ namespace llarp
bool useWhitelist = false;
bool isServiceNode = false;
// whitelist = active routers
std::unordered_set<RouterID> whitelistRouters GUARDED_BY(_mutex);
// greylist = fully funded, but decommissioned routers
std::unordered_set<RouterID> greylistRouters GUARDED_BY(_mutex);
// greenlist = registered but not fully-staked routers
std::unordered_set<RouterID> greenlistRouters GUARDED_BY(_mutex);
using TimePoint = std::chrono::steady_clock::time_point;
std::unordered_map<RouterID, TimePoint> _routerLookupTimes;

@ -471,16 +471,14 @@ namespace llarp
return nodedb()->NumLoaded() < KnownPeerWarningThreshold;
}
bool
Router::IsActiveServiceNode() const
std::optional<std::string>
Router::OxendErrorState() const
{
return IsServiceNode() and not(LooksDeregistered() or LooksDecommissioned());
}
bool
Router::ShouldPingOxen() const
{
return IsActiveServiceNode() and not TooFewPeers();
// If we're in the white or gray list then we *should* be establishing connections to other
// routers, so if we have almost no peers then something is almost certainly wrong.
if (LooksFunded() and TooFewPeers())
return "too few peer connections; lokinet is not adequately connected to the network";
return std::nullopt;
}
void
@ -508,10 +506,17 @@ namespace llarp
}
bool
Router::LooksDeregistered() const
Router::LooksFunded() const
{
return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist()
and not _rcLookupHandler.SessionIsAllowed(pubkey());
and _rcLookupHandler.SessionIsAllowed(pubkey());
}
bool
Router::LooksRegistered() const
{
return IsServiceNode() and whitelistRouters and _rcLookupHandler.HaveReceivedWhitelist()
and _rcLookupHandler.IsRegistered(pubkey());
}
bool
@ -1061,12 +1066,16 @@ namespace llarp
if (now >= m_NextDecommissionWarn)
{
constexpr auto DecommissionWarnInterval = 5min;
if (auto dereg = LooksDeregistered(); dereg or decom)
if (auto registered = LooksRegistered(), funded = LooksFunded();
not(registered and funded and not decom))
{
// complain about being deregistered
LogError(
"We are running as a service node but we seem to be ",
dereg ? "deregistered" : "decommissioned");
// complain about being deregistered/decommed/unfunded
log::error(
logcat,
"We are running as a service node but we seem to be {}",
not registered ? "deregistered"
: decom ? "decommissioned"
: "not fully staked");
m_NextDecommissionWarn = now + DecommissionWarnInterval;
}
else if (isSvcNode and TooFewPeers())
@ -1081,7 +1090,7 @@ namespace llarp
// if we need more sessions to routers and we are not a service node kicked from the network
// we shall connect out to others
if (connected < connectToNum and not LooksDeregistered())
if (connected < connectToNum and LooksFunded())
{
size_t dlt = connectToNum - connected;
LogDebug("connecting to ", dlt, " random routers to keep alive");
@ -1233,9 +1242,11 @@ namespace llarp
void
Router::SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist)
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfundedlist)
{
_rcLookupHandler.SetRouterWhitelist(whitelist, greylist);
_rcLookupHandler.SetRouterWhitelist(whitelist, greylist, unfundedlist);
}
bool

@ -143,7 +143,9 @@ namespace llarp
void
SetRouterWhitelist(
const std::vector<RouterID>& whitelist, const std::vector<RouterID>& greylist) override;
const std::vector<RouterID>& whitelist,
const std::vector<RouterID>& greylist,
const std::vector<RouterID>& unfunded) override;
std::unordered_set<RouterID>
GetRouterWhitelist() const override
@ -203,9 +205,16 @@ namespace llarp
bool
LooksDecommissioned() const;
/// return true if we look like we are a deregistered service node
/// return true if we look like we are a registered, fully-staked service node (either active or
/// decommissioned). This condition determines when we are allowed to (and attempt to) connect
/// to other peers when running as a service node.
bool
LooksDeregistered() const;
LooksFunded() const;
/// return true if we a registered service node; not that this only requires a partial stake,
/// and does not imply that this service node is *active* or fully funded.
bool
LooksRegistered() const;
/// return true if we look like we are allowed and able to test other routers
bool
@ -378,12 +387,8 @@ namespace llarp
bool
IsServiceNode() const override;
/// return true if service node *and* not deregistered or decommissioned
bool
IsActiveServiceNode() const override;
bool
ShouldPingOxen() const override;
std::optional<std::string>
OxendErrorState() const override;
void
Close();
@ -556,8 +561,11 @@ namespace llarp
bool m_isServiceNode = false;
// Delay warning about being decommed/dereged until we've had enough time to sync up with oxend
static constexpr auto DECOMM_WARNING_STARTUP_DELAY = 15s;
llarp_time_t m_LastStatsReport = 0s;
llarp_time_t m_NextDecommissionWarn = 0s;
llarp_time_t m_NextDecommissionWarn = time_now_ms() + DECOMM_WARNING_STARTUP_DELAY;
std::shared_ptr<llarp::KeyManager> m_keyManager;
std::shared_ptr<PeerDb> m_peerDb;

@ -174,25 +174,27 @@ namespace llarp
auto makePingRequest = [self = shared_from_this()]() {
// send a ping
PubKey pk{};
bool should_ping = false;
if (auto r = self->m_Router.lock())
{
pk = r->pubkey();
should_ping = r->ShouldPingOxen();
}
if (should_ping)
{
nlohmann::json payload = {
{"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())},
{"version", {VERSION[0], VERSION[1], VERSION[2]}}};
self->Request(
"admin.lokinet_ping",
[](bool success, std::vector<std::string> data) {
(void)data;
LogDebug("Received response for ping. Successful: ", success);
},
payload.dump());
}
auto r = self->m_Router.lock();
if (not r)
return; // router has gone away, maybe shutting down?
pk = r->pubkey();
nlohmann::json payload = {
{"pubkey_ed25519", oxenc::to_hex(pk.begin(), pk.end())},
{"version", {VERSION[0], VERSION[1], VERSION[2]}}};
if (auto err = r->OxendErrorState())
payload["error"] = *err;
self->Request(
"admin.lokinet_ping",
[](bool success, std::vector<std::string> data) {
(void)data;
LogDebug("Received response for ping. Successful: ", success);
},
payload.dump());
// subscribe to block updates
self->Request("sub.block", [](bool success, std::vector<std::string> data) {
if (data.empty() or not success)
@ -216,18 +218,13 @@ namespace llarp
LokidRpcClient::HandleNewServiceNodeList(const nlohmann::json& j)
{
std::unordered_map<RouterID, PubKey> keymap;
std::vector<RouterID> activeNodeList, nonActiveNodeList;
std::vector<RouterID> activeNodeList, decommNodeList, unfundedNodeList;
if (not j.is_array())
throw std::runtime_error{
"Invalid service node list: expected array of service node states"};
for (auto& snode : j)
{
// Skip unstaked snodes:
if (const auto funded_itr = snode.find("funded"); funded_itr == snode.end()
or not funded_itr->is_boolean() or not funded_itr->get<bool>())
continue;
const auto ed_itr = snode.find("pubkey_ed25519");
if (ed_itr == snode.end() or not ed_itr->is_string())
continue;
@ -238,6 +235,10 @@ namespace llarp
if (active_itr == snode.end() or not active_itr->is_boolean())
continue;
const bool active = active_itr->get<bool>();
const auto funded_itr = snode.find("funded");
if (funded_itr == snode.end() or not funded_itr->is_boolean())
continue;
const bool funded = funded_itr->get<bool>();
RouterID rid;
PubKey pk;
@ -246,7 +247,10 @@ namespace llarp
continue;
keymap[rid] = pk;
(active ? activeNodeList : nonActiveNodeList).push_back(std::move(rid));
(active ? activeNodeList
: funded ? decommNodeList
: unfundedNodeList)
.push_back(std::move(rid));
}
if (activeNodeList.empty())
@ -254,17 +258,19 @@ namespace llarp
LogWarn("got empty service node list, ignoring.");
return;
}
// inform router about the new list
if (auto router = m_Router.lock())
{
auto& loop = router->loop();
loop->call([this,
active = std::move(activeNodeList),
inactive = std::move(nonActiveNodeList),
decomm = std::move(decommNodeList),
unfunded = std::move(unfundedNodeList),
keymap = std::move(keymap),
router = std::move(router)]() mutable {
m_KeyMap = std::move(keymap);
router->SetRouterWhitelist(active, inactive);
router->SetRouterWhitelist(active, decomm, unfunded);
});
}
else

Loading…
Cancel
Save