Sort out peerstats receive <-> expiry windows

pull/1312/head
Stephen Shelton 4 years ago
parent 4b4284ccf4
commit aa1c8f257f
No known key found for this signature in database
GPG Key ID: EE4BADACCE8B631C

@ -145,32 +145,85 @@ namespace llarp
return itr->second;
}
/// Assume we receive an RC at some point `R` in time which was signed at some point `S` in time
/// and expires at some point `E` in time, as depicted below:
///
/// +-----------------------------+
/// | signed rc | <- useful lifetime of RC
/// +-----------------------------+
/// ^ [ . . . . . . . . ] <----------- window in which we receive this RC gossiped to us
/// | ^ ^
/// | | |
/// S R E
///
/// One useful metric from this is the difference between (E - R), the useful contact time of this
/// RC. As we track this metric over time, the high and low watermarks serve to tell us how
/// quickly we receive signed RCs from a given router and how close to expiration they are when
/// we receive them. The latter is particularly useful, and should always be a positive number for
/// a healthy router. A negative number indicates that we are receiving an expired RC.
///
/// TODO: we actually discard expired RCs, so we currently would not detect a negative value for
/// (E - R)
///
/// Another related metric is the distance between a newly received RC and the previous RC's
/// expiration, which represents how close we came to having no useful RC to work with. This
/// should be a high (positive) number for a healthy router, and if negative indicates that we
/// had no way to contact this router for a period of time.
///
/// E1 E2 E3
/// | | |
/// v | |
/// +-----------------------------+ | |
/// | signed rc 1 | | |
/// +-----------------------------+ | |
/// [ . . . . . ] v |
/// ^ +-----------------------------+ |
/// | | signed rc 2 | |
/// | +-----------------------------+ |
/// | [ . . . . . . . . . . ] v
/// | ^ +-----------------------------+
/// | | | signed rc 3 |
/// | | +-----------------------------+
/// | | [ . . ]
/// | | ^
/// | | |
/// R1 R2 R3
///
/// Example: the delta between (E1 - R2) is healthy, but the delta between (E2 - R3) is indicates
/// that we had a brief period of time where we had no valid (non-expired) RC for this router
/// (because it is negative).
void
PeerDb::handleGossipedRC(const RouterContact& rc, llarp_time_t now)
{
std::lock_guard gaurd(m_statsLock);
LogWarn("Handling gossiped RC", rc);
RouterID id(rc.pubkey);
auto& stats = m_peerStats[id];
stats.routerId = id.ToString();
if (stats.lastRCUpdated < rc.last_updated.count())
const bool isNewRC = (stats.lastRCUpdated < rc.last_updated.count());
if (isNewRC)
{
if (stats.numDistinctRCsReceived > 0)
stats.numDistinctRCsReceived++;
if (stats.numDistinctRCsReceived > 1)
{
// we track max expiry as the delta between (time received - last expiration time),
// and this value will often be negative for a healthy router
const int64_t prevRCExpiration = (stats.lastRCUpdated + RouterContact::Lifetime.count());
// we track max expiry as the delta between (last expiration time - time received),
// and this value will be negative for an unhealthy router
// TODO: handle case where new RC is also expired? just ignore?
int64_t expiry = (now.count() - (stats.lastRCUpdated + RouterContact::Lifetime.count()));
stats.mostExpiredRCMs = std::max(stats.mostExpiredRCMs, expiry);
int64_t expiry = (prevRCExpiration - now.count());
if (stats.numDistinctRCsReceived == 1)
if (stats.numDistinctRCsReceived == 2)
stats.mostExpiredRCMs = expiry;
else
stats.mostExpiredRCMs = std::max(stats.mostExpiredRCMs, expiry);
stats.mostExpiredRCMs = std::min(stats.mostExpiredRCMs, expiry);
}
stats.numDistinctRCsReceived++;
stats.lastRCUpdated = rc.last_updated.count();
stats.stale = true;
}
@ -206,10 +259,8 @@ namespace llarp
std::vector<util::StatusObject> statsObjs;
statsObjs.reserve(m_peerStats.size());
LogInfo("Building peer stats...");
for (const auto& pair : m_peerStats)
{
LogInfo("Stat here");
statsObjs.push_back(pair.second.toJson());
}

@ -3,6 +3,7 @@
#include <numeric>
#include <catch2/catch.hpp>
#include "peerstats/types.hpp"
#include "router_contact.hpp"
#include "util/time.hpp"
@ -147,8 +148,70 @@ TEST_CASE("Test PeerDb handleGossipedRC", "[PeerDb]")
db.handleGossipedRC(rc, now);
stats = db.getCurrentPeerStats(id);
// new RC received at 9sec, making it (expiration time - 9 sec) expired (a negative number)
CHECK(stats.value().mostExpiredRCMs == (9s - (now + rcLifetime)).count());
// should be (previous expiration time - new received time)
CHECK(stats.value().mostExpiredRCMs == ((10s + rcLifetime) - now).count());
CHECK(stats.value().numDistinctRCsReceived == 2);
CHECK(stats.value().lastRCUpdated == 11000);
}
TEST_CASE("Test PeerDb handleGossipedRC expiry calcs", "[PeerDb]")
{
const llarp::RouterID id = llarp::test::makeBuf<llarp::RouterID>(0xF9);
// see comments in peer_db.cpp above PeerDb::handleGossipedRC() for some context around these
// tests and esp. these numbers
const llarp_time_t ref = 48h;
const llarp_time_t rcLifetime = llarp::RouterContact::Lifetime;
// rc1, first rc received
const llarp_time_t s1 = ref;
const llarp_time_t r1 = s1 + 30s;
const llarp_time_t e1 = s1 + rcLifetime;
llarp::RouterContact rc1;
rc1.pubkey = llarp::PubKey(id);
rc1.last_updated = s1;
// rc2, second rc received
// received "healthily", with lots of room to spare before rc1 expires
const llarp_time_t s2 = s1 + 8h;
const llarp_time_t r2 = s2 + 30s; // healthy recv time
const llarp_time_t e2 = s2 + rcLifetime;
llarp::RouterContact rc2;
rc2.pubkey = llarp::PubKey(id);
rc2.last_updated = s2;
// rc3, third rc received
// received "unhealthily" (after rc2 expires)
const llarp_time_t s3 = s2 + 8h;
const llarp_time_t r3 = e2 + 1h; // received after e2
const llarp_time_t e3 = s3 + rcLifetime;
llarp::RouterContact rc3;
rc3.pubkey = llarp::PubKey(id);
rc3.last_updated = s3;
llarp::PeerDb db;
db.handleGossipedRC(rc1, r1);
auto stats1 = db.getCurrentPeerStats(id);
CHECK(stats1.has_value());
CHECK(stats1.value().mostExpiredRCMs == 0);
CHECK(stats1.value().numDistinctRCsReceived == 1);
CHECK(stats1.value().lastRCUpdated == s1.count());
db.handleGossipedRC(rc2, r2);
auto stats2 = db.getCurrentPeerStats(id);
CHECK(stats2.has_value());
CHECK(stats2.value().mostExpiredRCMs == (e1 - r2).count());
CHECK(stats2.value().mostExpiredRCMs > 0); // ensure positive indicates healthy
CHECK(stats2.value().numDistinctRCsReceived == 2);
CHECK(stats2.value().lastRCUpdated == s2.count());
db.handleGossipedRC(rc3, r3);
auto stats3 = db.getCurrentPeerStats(id);
CHECK(stats3.has_value());
CHECK(stats3.value().mostExpiredRCMs == (e2 - r3).count());
CHECK(
stats3.value().mostExpiredRCMs < 0); // ensure negative indicates unhealthy and we use min()
CHECK(stats3.value().numDistinctRCsReceived == 3);
CHECK(stats3.value().lastRCUpdated == s3.count());
}

Loading…
Cancel
Save