diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index fb763b7fa..700cdc35c 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -193,43 +193,60 @@ namespace llarp } bool - NodeDB::process_fetched_rcs(std::vector& rcs) + NodeDB::process_fetched_rcs(std::set& rcs) { - std::unordered_set inter_set; + std::set confirmed_set, unconfirmed_set; + // the intersection of local RC's and received RC's is our confirmed set std::set_intersection( known_rcs.begin(), known_rcs.end(), rcs.begin(), rcs.end(), - std::inserter(inter_set, inter_set.begin())); + std::inserter(confirmed_set, confirmed_set.begin())); + + // the intersection of the confirmed set and received RC's is our unconfirmed set + std::set_intersection( + rcs.begin(), + rcs.end(), + confirmed_set.begin(), + confirmed_set.end(), + std::inserter(unconfirmed_set, unconfirmed_set.begin())); // the total number of rcs received const auto num_received = static_cast(rcs.size()); // the number of returned "good" rcs (that are also found locally) - const auto inter_size = inter_set.size(); - // the number of rcs currently held locally - const auto local_count = static_cast(known_rcs.size()); + const auto inter_size = confirmed_set.size(); const auto fetch_threshold = (double)inter_size / num_received; - const auto local_alignment = (double)inter_size / local_count; - /** We are checking 3 things here: + /** We are checking 2 things here: 1) The number of "good" rcs is above MIN_GOOD_RC_FETCH_TOTAL 2) The ratio of "good" rcs to total received is above MIN_GOOD_RC_FETCH_THRESHOLD - 3) The ratio of received and found locally to total found locally is above - LOCAL_RC_ALIGNMENT_THRESHOLD */ - return inter_size > MIN_GOOD_RC_FETCH_TOTAL and fetch_threshold > MIN_GOOD_RC_FETCH_THRESHOLD - and local_alignment > LOCAL_RC_ALIGNMENT_THRESHOLD; + bool success = false; + if (success = + inter_size > MIN_GOOD_RC_FETCH_TOTAL and fetch_threshold > MIN_GOOD_RC_FETCH_THRESHOLD; + success) + { + // set rcs to be intersection set + rcs = std::move(confirmed_set); + + process_results(std::move(unconfirmed_set), unconfirmed_rcs, known_rcs); + } + + return success; } bool - NodeDB::ingest_fetched_rcs(std::vector rcs, rc_time timestamp) + NodeDB::ingest_fetched_rcs(std::set rcs, rc_time timestamp) { // if we are not bootstrapping, we should check the rc's against the ones we currently hold if (not _using_bootstrap_fallback) - {} + { + if (not process_fetched_rcs(rcs)) + return false; + } for (auto& rc : rcs) put_rc_if_newer(std::move(rc), timestamp); @@ -237,19 +254,6 @@ namespace llarp return true; } - void - NodeDB::ingest_rid_fetch_responses(const RouterID& source, std::unordered_set rids) - { - if (rids.empty()) - { - fail_sources.insert(source); - return; - } - - for (const auto& rid : rids) - fetch_counters[rid] += 1; - } - /** We only call into this function after ensuring two conditions: 1) We have received all 12 responses from the queried RouterID sources, whether that response was a timeout or not @@ -269,12 +273,14 @@ namespace llarp bool NodeDB::process_fetched_rids() { - std::unordered_set union_set, intersection_set; + std::set union_set, confirmed_set, unconfirmed_set; for (const auto& [rid, count] : fetch_counters) { if (count > MIN_RID_FETCH_FREQ) union_set.insert(rid); + else + unconfirmed_set.insert(rid); } // get the intersection of accepted rids and local rids @@ -283,33 +289,45 @@ namespace llarp known_rids.end(), union_set.begin(), union_set.end(), - std::inserter(intersection_set, intersection_set.begin())); + std::inserter(confirmed_set, confirmed_set.begin())); // the total number of rids received const auto num_received = (double)fetch_counters.size(); // the total number of received AND accepted rids const auto union_size = union_set.size(); - // the number of rids currently held locally - const auto local_count = (double)known_rids.size(); - // the number of accepted rids that are also found locally - const auto inter_size = (double)intersection_set.size(); const auto fetch_threshold = (double)union_size / num_received; - const auto local_alignment = (double)inter_size / local_count; /** We are checking 2, potentially 3 things here: 1) The ratio of received/accepted to total received is above GOOD_RID_FETCH_THRESHOLD. This tells us how well the rid source's sets of rids "agree" with one another 2) The total number received is above MIN_RID_FETCH_TOTAL. This ensures that we are receiving a sufficient amount to make a comparison of any sorts - 3) If we are not bootstrapping, then the ratio of received/accepted found locally to - the total number locally held is above LOCAL_RID_ALIGNMENT_THRESHOLD. This gives us - an estimate of how "aligned" the rid source's set of rid's is to ours */ - return (fetch_threshold > GOOD_RID_FETCH_THRESHOLD) and (union_size > MIN_GOOD_RID_FETCH_TOTAL) - and (not _using_bootstrap_fallback) - ? local_alignment > LOCAL_RID_ALIGNMENT_THRESHOLD - : true; + bool success = false; + if (success = (fetch_threshold > GOOD_RID_FETCH_THRESHOLD) + and (union_size > MIN_GOOD_RID_FETCH_TOTAL); + success) + { + process_results(std::move(unconfirmed_set), unconfirmed_rids, known_rids); + + known_rids.merge(confirmed_set); + } + + return success; + } + + void + NodeDB::ingest_rid_fetch_responses(const RouterID& source, std::set rids) + { + if (rids.empty()) + { + fail_sources.insert(source); + return; + } + + for (const auto& rid : rids) + fetch_counters[rid] += 1; } void @@ -375,10 +393,10 @@ namespace llarp auto btlc = btdc.require("rcs"sv); auto timestamp = rc_time{std::chrono::seconds{btdc.require("time"sv)}}; - std::vector rcs; + std::set rcs; while (not btlc.is_finished()) - rcs.emplace_back(btlc.consume_dict_consumer()); + rcs.emplace(btlc.consume_dict_consumer()); // if process_fetched_rcs returns false, then the trust model rejected the fetched RC's fetch_rcs_result(initial, not ingest_fetched_rcs(std::move(rcs), timestamp)); @@ -447,7 +465,7 @@ namespace llarp "Failed to verify signature for fetch RouterIDs response."}; }); - std::unordered_set router_ids; + std::set router_ids; for (const auto& s : router_id_strings) { @@ -644,7 +662,7 @@ namespace llarp return; } - std::unordered_set rids; + std::set rids; try { @@ -707,7 +725,7 @@ namespace llarp } void - NodeDB::reselect_router_id_sources(std::unordered_set specific) + NodeDB::reselect_router_id_sources(std::set specific) { replace_subset(rid_sources, specific, known_rids, RID_SOURCE_COUNT, csrng); } diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index d476298f3..0cd66e74a 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -29,8 +29,6 @@ namespace llarp inline constexpr size_t MIN_GOOD_RC_FETCH_TOTAL{}; // the ratio of returned rcs found locally to to total returned should be above this ratio inline constexpr double MIN_GOOD_RC_FETCH_THRESHOLD{}; - // the ratio of returned rcs that are found locally to total held locally should above this ratio - inline constexpr double LOCAL_RC_ALIGNMENT_THRESHOLD{}; /* RID Fetch Constants */ inline constexpr size_t MIN_ACTIVE_RIDS{24}; @@ -39,14 +37,11 @@ namespace llarp // upper limit on how many rid fetch requests to rid sources can fail inline constexpr size_t MAX_RID_ERRORS{4}; // each returned rid must appear this number of times across all responses - inline constexpr int MIN_RID_FETCH_FREQ{6}; + inline constexpr int MIN_RID_FETCH_FREQ{RID_SOURCE_COUNT - MAX_RID_ERRORS - 1}; // the total number of accepted returned rids should be above this number inline constexpr size_t MIN_GOOD_RID_FETCH_TOTAL{}; // the ratio of accepted:rejected rids must be above this ratio inline constexpr double GOOD_RID_FETCH_THRESHOLD{}; - // if we are not bootstrapping, the ratio of accepted:local rids must be above this ratio - inline constexpr double LOCAL_RID_ALIGNMENT_THRESHOLD{}; - /* Bootstrap Constants */ // the number of rc's we query the bootstrap for inline constexpr size_t BOOTSTRAP_SOURCE_COUNT{50}; @@ -55,8 +50,53 @@ namespace llarp // if all bootstraps fail, router will trigger re-bootstrapping after this cooldown inline constexpr auto BOOTSTRAP_COOLDOWN{1min}; + /* Other Constants */ + // the maximum number of RC/RID fetches that can pass w/o an unconfirmed rc/rid appearing + inline constexpr int MAX_CONFIRMATION_ATTEMPTS{5}; + // threshold amount of verifications to promote an unconfirmed rc/rid + inline constexpr int CONFIRMATION_THRESHOLD{3}; + inline constexpr auto FLUSH_INTERVAL{5min}; + template < + typename ID_t, + std::enable_if_t || std::is_same_v, int> = 0> + struct Unconfirmed + { + const ID_t id; + int attempts = 0; + int verifications = 0; + + Unconfirmed() = delete; + Unconfirmed(const ID_t& obj) : id{obj} + {} + Unconfirmed(ID_t&& obj) : id{std::move(obj)} + {} + + int + strikes() const + { + return attempts; + } + + operator bool() const + { + return verifications == CONFIRMATION_THRESHOLD; + } + + bool + operator==(const Unconfirmed& other) const + { + return id == other.id; + } + + bool + operator<(const Unconfirmed& other) const + { + return id < other.id; + } + }; + class NodeDB { Router& _router; @@ -73,14 +113,22 @@ namespace llarp populated during startup and RouterID fetching. This is meant to represent the client instance's most recent perspective of the network, and record which RouterID's were recently "active" and connected to + - unconfirmed_rids: holds new rids returned in fetch requests to be verified by subsequent + fetch requests - known_rcs: populated during startup and when RC's are updated both during gossip and periodic RC fetching + - unconfirmed_rcs: holds new rcs to be verified by subsequent fetch requests, similar to + the unknown_rids container - rc_lookup: holds all the same rc's as known_rcs, but can be used to look them up by - their rid. Deleting an rid key deletes the corresponding rc in known_rcs + their rid */ - std::unordered_set known_rids; - std::unordered_set known_rcs; - std::unordered_map rc_lookup; // TODO: look into this again + std::set known_rids; + std::set> unconfirmed_rids; + + std::set known_rcs; + std::set> unconfirmed_rcs; + + std::map rc_lookup; /** RouterID lists - white: active routers @@ -92,18 +140,18 @@ namespace llarp std::unordered_set router_greenlist; // All registered relays (service nodes) - std::unordered_set registered_routers; + std::set registered_routers; // timing (note: Router holds the variables for last rc and rid request times) std::unordered_map last_rc_update_times; // if populated from a config file, lists specific exclusively used as path first-hops - std::unordered_set _pinned_edges; + std::set _pinned_edges; // source of "truth" for RC updating. This relay will also mediate requests to the // 12 selected active RID's for RID fetching RouterID fetch_source; // set of 12 randomly selected RID's from the client's set of routers - std::unordered_set rid_sources{}; + std::set rid_sources{}; // logs the RID's that resulted in an error during RID fetching - std::unordered_set fail_sources{}; + std::set fail_sources{}; // tracks the number of times each rid appears in the above responses std::unordered_map fetch_counters{}; @@ -152,13 +200,13 @@ namespace llarp } bool - ingest_fetched_rcs(std::vector rcs, rc_time timestamp); + ingest_fetched_rcs(std::set rcs, rc_time timestamp); bool - process_fetched_rcs(std::vector& rcs); + process_fetched_rcs(std::set& rcs); void - ingest_rid_fetch_responses(const RouterID& source, std::unordered_set ids = {}); + ingest_rid_fetch_responses(const RouterID& source, std::set ids = {}); bool process_fetched_rids(); @@ -182,7 +230,7 @@ namespace llarp void fetch_rids_result(bool initial = false); - // Bootstrap fallback + // Bootstrap fallback fetching void fallback_to_bootstrap(); void @@ -192,7 +240,7 @@ namespace llarp // if only specific RID's need to be re-selected; to re-select all, pass the member // variable ::known_rids void - reselect_router_id_sources(std::unordered_set specific); + reselect_router_id_sources(std::set specific); void set_router_whitelist( @@ -230,7 +278,7 @@ namespace llarp bool is_first_hop_allowed(const RouterID& remote) const; - std::unordered_set& + std::set& pinned_edges() { return _pinned_edges; @@ -257,13 +305,13 @@ namespace llarp return router_greylist; } - const std::unordered_set& + const std::set& get_registered_routers() const { return registered_routers; } - const std::unordered_set& + const std::set& get_rcs() const { return known_rcs; @@ -336,33 +384,14 @@ namespace llarp std::optional> get_n_random_rcs_conditional(size_t n, std::function hook) const; - template - std::optional - GetRandom(Filter visit) const - { - return _router.loop()->call_get([visit, this]() mutable -> std::optional { - std::vector rcs{known_rcs.begin(), known_rcs.end()}; - - std::shuffle(rcs.begin(), rcs.end(), llarp::csrng); - - for (const auto& entry : known_rcs) - { - if (visit(entry)) - return entry; - } - - return std::nullopt; - }); - } - // Updates `current` to not contain any of the elements of `replace` and resamples (up to // `target_size`) from population to refill it. template void replace_subset( - std::unordered_set& current, - const std::unordered_set& replace, - std::unordered_set population, + std::set& current, + const std::set& replace, + std::set population, size_t target_size, RNG&& rng) { @@ -423,6 +452,52 @@ namespace llarp }); } + template < + typename ID_t, + std::enable_if_t || std::is_same_v, int> = 0> + void + process_results( + std::set unconfirmed, std::set>& container, std::set& known) + { + // before we add the unconfirmed set, we check to see if our local set of unconfirmed + // rcs/rids appeared in the latest unconfirmed set; if so, we will increment their number + // of verifications and reset the attempts counter. Once appearing in 3 different requests, + // the rc/rid will be "verified" and promoted to the known_{rcs,rids} container + for (auto itr = container.begin(); itr != container.end();) + { + auto& id = itr->id; + auto& count = const_cast(itr->attempts); + auto& verifications = const_cast(itr->verifications); + + if (auto found = unconfirmed.find(id); found != unconfirmed.end()) + { + if (++verifications >= CONFIRMATION_THRESHOLD) + { + if constexpr (std::is_same_v) + put_rc_if_newer(id); + else + known.emplace(id); + itr = container.erase(itr); + } + else + { + // reset attempt counter and continue + count = 0; + ++itr; + } + + unconfirmed.erase(found); + } + + itr = (++count >= MAX_CONFIRMATION_ATTEMPTS) ? container.erase(itr) : ++itr; + } + + for (auto& id : unconfirmed) + { + container.emplace(std::move(id)); + } + } + /// remove rcs that are older than we want to keep. For relays, this is when /// they become "outdated" (i.e. 12hrs). Clients will hang on to them until /// they are fully "expired" (i.e. 30 days), as the client may go offline for @@ -441,3 +516,14 @@ namespace llarp put_rc_if_newer(RemoteRC rc, rc_time now = time_point_now()); }; } // namespace llarp + +namespace std +{ + template <> + struct hash> : public hash + {}; + + template <> + struct hash> : hash + {}; +} // namespace std diff --git a/llarp/router_contact.hpp b/llarp/router_contact.hpp index 1812f76dc..ce99d3822 100644 --- a/llarp/router_contact.hpp +++ b/llarp/router_contact.hpp @@ -366,22 +366,10 @@ namespace std }; template <> - struct hash final : public hash - { - size_t - operator()(const llarp::RouterContact& r) const override - { - return std::hash{}(r.router_id()); - } - }; + struct hash : public hash + {}; template <> struct hash final : public hash - { - size_t - operator()(const llarp::RouterContact& r) const override - { - return std::hash{}(r.router_id()); - } - }; + {}; } // namespace std