lokinet/llarp/profiling.cpp

322 lines
7.6 KiB
C++
Raw Normal View History

#include "profiling.hpp"
2023-10-19 21:59:57 +00:00
#include "util/file.hpp"
#include <oxenc/bt_producer.h>
#include <oxenc/bt_serialize.h>
using oxenc::bt_dict_consumer;
using oxenc::bt_dict_producer;
namespace llarp
{
static auto logcat = log::Cat("profiling");
RouterProfile::RouterProfile(bt_dict_consumer dict)
{
BDecode(std::move(dict));
}
void
RouterProfile::BEncode(bt_dict_producer& dict) const
{
dict.append("g", connectGoodCount);
dict.append("p", pathSuccessCount);
dict.append("q", pathTimeoutCount);
dict.append("s", pathFailCount);
dict.append("t", connectTimeoutCount);
dict.append("u", lastUpdated.count());
dict.append("v", version);
}
void
RouterProfile::BDecode(bt_dict_consumer dict)
{
if (dict.skip_until("g"))
connectGoodCount = dict.consume_integer<uint64_t>();
if (dict.skip_until("p"))
pathSuccessCount = dict.consume_integer<uint64_t>();
if (dict.skip_until("q"))
pathTimeoutCount = dict.consume_integer<uint64_t>();
if (dict.skip_until("s"))
pathFailCount = dict.consume_integer<uint64_t>();
if (dict.skip_until("t"))
connectTimeoutCount = dict.consume_integer<uint64_t>();
if (dict.skip_until("u"))
lastUpdated = llarp_time_t{dict.consume_integer<uint64_t>()};
if (dict.skip_until("v"))
version = dict.consume_integer<uint64_t>();
}
2019-03-04 17:03:18 +00:00
void
RouterProfile::Decay()
2019-03-04 17:03:18 +00:00
{
connectGoodCount /= 2;
connectTimeoutCount /= 2;
pathSuccessCount /= 2;
pathFailCount /= 2;
pathTimeoutCount /= 2;
2019-04-16 17:30:07 +00:00
lastDecay = llarp::time_now_ms();
2019-03-04 17:03:18 +00:00
}
void
RouterProfile::Tick()
{
static constexpr auto updateInterval = 30s;
const auto now = llarp::time_now_ms();
if (lastDecay < now && now - lastDecay > updateInterval)
Decay();
2019-03-04 17:03:18 +00:00
}
bool
RouterProfile::IsGood(uint64_t chances) const
{
if (connectTimeoutCount > chances)
return connectTimeoutCount < connectGoodCount && (pathSuccessCount * chances) > pathFailCount;
2019-04-05 14:58:22 +00:00
return (pathSuccessCount * chances) > pathFailCount;
}
static bool constexpr checkIsGood(uint64_t fails, uint64_t success, uint64_t chances)
2019-04-16 11:44:55 +00:00
{
if (fails > 0 && (fails + success) >= chances)
return (success / fails) > 1;
if (success == 0)
2019-04-16 11:44:55 +00:00
return fails < chances;
return true;
}
bool
RouterProfile::IsGoodForConnect(uint64_t chances) const
{
return checkIsGood(connectTimeoutCount, connectGoodCount, chances);
}
2019-04-16 11:44:55 +00:00
bool
RouterProfile::IsGoodForPath(uint64_t chances) const
{
if (pathTimeoutCount > chances)
return false;
return checkIsGood(pathFailCount, pathSuccessCount, chances);
2019-04-16 11:44:55 +00:00
}
2019-05-24 02:01:36 +00:00
Profiling::Profiling() : m_DisableProfiling(false)
Config file improvements (#1397) * Config file API/comment improvements API improvements: ================= Make the config API use position-independent tag parameters (Required, Default{123}, MultiValue) rather than a sequence of bools with overloads. For example, instead of: conf.defineOption<int>("a", "b", false, true, 123, [] { ... }); you now write: conf.defineOption<int>("a", "b", MultiValue, Default{123}, [] { ... }); The tags are: - Required - MultiValue - Default{value} plus new abilities (see below): - Hidden - RelayOnly - ClientOnly - Comment{"line1", "line2", "line3"} Made option definition more powerful: ===================================== - `Hidden` allows you to define an option that won't show up in the generated config file if it isn't set. - `RelayOnly`/`ClientOnly` sets up an option that is only accepted and only shows up for relay or client configs. (If neither is specified the option shows up in both modes). - `Comment{...}` lets the option comments be specified as part of the defineOption. Comment improvements ==================== - Rewrote comments for various options to expand on details. - Inlined all the comments with the option definitions. - Several options that were missing comments got comments added. - Made various options for deprecated and or internal options hidden by default so that they don't show up in a default config file. - show the section comment (but not option comments) *after* the [section] tag instead of before it as it makes more sense that way (particularly for the [bind] section which has a new long comment to describe how it works). Disable profiling by default ============================ We had this weird state where we use and store profiling by default but never *load* it when starting up. This commit makes us just not use profiling at all unless explicitly enabled. Other misc changes: =================== - change default worker threads to 0 (= num cpus) instead of 1, and fix it to allow 0. - Actually apply worker-threads option - fixed default data-dir value erroneously having quotes around it - reordered ifname/ifaddr/mapaddr (was previously mapaddr/ifaddr/ifname) as mapaddr is a sort of specialization of ifaddr and so makes more sense to come after it (particularly because it now references ifaddr in its help message). - removed peer-stats option (since we always require it for relays and never use it for clients) - removed router profiles filename option (this doesn't need to be configurable) - removed defunct `service-node-seed` option - Change default logging output file to "" (which means stdout), and also made "-" work for stdout. * Router hive compilation fixes * Comments for SNApp SRV settings in ini file * Add extra blank line after section comments * Better deprecated option handling Allow {client,relay}-only options in {relay,client} configs to be specified as implicitly deprecated options: they warn, and don't set anything. Add an explicit `Deprecated` tag and move deprecated option handling into definition.cpp. * Move backwards compat options into section definitions Keep the "addBackwardsCompatibleConfigOptions" only for options in sections that no longer exist. * Fix INI parsing issues & C++17-ify - don't allow inline comments because it seems they aren't allowed in ini formats in general, and is going to cause problems if there is a comment character in a value (e.g. an exit auth string). Additionally it was breaking on a line such as: # some comment; see? because it was treating only `; see?` as the comment and then producing an error message about the rest of the line being invalid. - make section parsing stricter: the `[` and `]` have to be at the beginning at end of the line now (after stripping whitespace). - Move whitespace stripping to the top since everything in here does it. - chop off string_view suffix/prefix rather than maintaining position values - fix potential infinite loop/segfault when given a line such as `]foo[` * Make config parsing failure fatal Load() LogError's and returns false on failure, so we weren't aborting on config file errors. * Formatting: allow `{}` for empty functions/structs Instead of using two lines when empty: { } * Make default dns bind 127.0.0.1 on non-Linux * Don't show empty section; fix tests We can conceivably have sections that only make sense for clients or relays, and so want to completely omit that section if we have no options for the type of config being generated. Also fixes missing empty lines between tests. Co-authored-by: Thomas Winget <tewinget@gmail.com>
2020-10-07 22:22:58 +00:00
{}
void
Profiling::Disable()
{
m_DisableProfiling.store(true);
}
void
Profiling::Enable()
{
m_DisableProfiling.store(false);
}
2019-04-16 11:44:55 +00:00
bool
Profiling::IsBadForConnect(const RouterID& r, uint64_t chances)
2019-04-16 11:44:55 +00:00
{
if (m_DisableProfiling.load())
return false;
util::Lock lock{m_ProfilesMutex};
2019-04-16 11:44:55 +00:00
auto itr = m_Profiles.find(r);
if (itr == m_Profiles.end())
2019-04-16 11:44:55 +00:00
return false;
return not itr->second.IsGoodForConnect(chances);
2019-04-16 11:44:55 +00:00
}
bool
Profiling::IsBadForPath(const RouterID& r, uint64_t chances)
2019-04-16 11:44:55 +00:00
{
if (m_DisableProfiling.load())
return false;
util::Lock lock{m_ProfilesMutex};
2019-04-16 11:44:55 +00:00
auto itr = m_Profiles.find(r);
if (itr == m_Profiles.end())
2019-04-16 11:44:55 +00:00
return false;
return not itr->second.IsGoodForPath(chances);
2019-04-16 11:44:55 +00:00
}
bool
Profiling::IsBad(const RouterID& r, uint64_t chances)
{
if (m_DisableProfiling.load())
return false;
util::Lock lock{m_ProfilesMutex};
auto itr = m_Profiles.find(r);
if (itr == m_Profiles.end())
return false;
return not itr->second.IsGood(chances);
}
2019-03-04 17:03:18 +00:00
void
Profiling::Tick()
{
De-abseil, part 2: mutex, locks, (most) time - util::Mutex is now a std::shared_timed_mutex, which is capable of exclusive and shared locks. - util::Lock is still present as a std::lock_guard<util::Mutex>. - the locking annotations are preserved, but updated to the latest supported by clang rather than using abseil's older/deprecated ones. - ACQUIRE_LOCK macro is gone since we don't pass mutexes by pointer into locks anymore (WTF abseil). - ReleasableLock is gone. Instead there are now some llarp::util helper methods to obtain unique and/or shared locks: - `auto lock = util::unique_lock(mutex);` gets an RAII-but-also unlockable object (std::unique_lock<T>, with T inferred from `mutex`). - `auto lock = util::shared_lock(mutex);` gets an RAII shared (i.e. "reader") lock of the mutex. - `auto lock = util::unique_locks(mutex1, mutex2, mutex3);` can be used to atomically lock multiple mutexes at once (returning a tuple of the locks). This are templated on the mutex which makes them a bit more flexible than using a concrete type: they can be used for any type of lockable mutex, not only util::Mutex. (Some of the code here uses them for getting locks around a std::mutex). Until C++17, using the RAII types is painfully verbose: ```C++ // pre-C++17 - needing to figure out the mutex type here is annoying: std::unique_lock<util::Mutex> lock(mutex); // pre-C++17 and even more verbose (but at least the type isn't needed): std::unique_lock<decltype(mutex)> lock(mutex); // our compromise: auto lock = util::unique_lock(mutex); // C++17: std::unique_lock lock(mutex); ``` All of these functions will also warn (under gcc or clang) if you discard the return value. You can also do fancy things like `auto l = util::unique_lock(mutex, std::adopt_lock)` (which lets a lock take over an already-locked mutex). - metrics code is gone, which also removes a big pile of code that was only used by metrics: - llarp::util::Scheduler - llarp::thread::TimerQueue - llarp::util::Stopwatch
2020-02-21 17:21:11 +00:00
util::Lock lock(m_ProfilesMutex);
for (auto& [rid, profile] : m_Profiles)
profile.Tick();
2019-03-04 17:03:18 +00:00
}
void
Profiling::MarkConnectTimeout(const RouterID& r)
{
util::Lock lock{m_ProfilesMutex};
auto& profile = m_Profiles[r];
profile.connectTimeoutCount += 1;
profile.lastUpdated = llarp::time_now_ms();
}
void
Profiling::MarkConnectSuccess(const RouterID& r)
{
util::Lock lock{m_ProfilesMutex};
auto& profile = m_Profiles[r];
profile.connectGoodCount += 1;
profile.lastUpdated = llarp::time_now_ms();
}
2019-03-31 15:25:13 +00:00
void
Profiling::ClearProfile(const RouterID& r)
{
util::Lock lock{m_ProfilesMutex};
2019-03-31 15:25:13 +00:00
m_Profiles.erase(r);
}
void
Profiling::MarkHopFail(const RouterID& r)
{
util::Lock lock{m_ProfilesMutex};
auto& profile = m_Profiles[r];
profile.pathFailCount += 1;
profile.lastUpdated = llarp::time_now_ms();
}
void
Profiling::MarkPathFail(path::Path* p)
{
util::Lock lock{m_ProfilesMutex};
bool first = true;
for (const auto& hop : p->hops)
{
// don't mark first hop as failure because we are connected to it directly
if (first)
first = false;
else
{
auto& profile = m_Profiles[hop.rc.pubkey];
profile.pathFailCount += 1;
profile.lastUpdated = llarp::time_now_ms();
}
}
}
void
Profiling::MarkPathTimeout(path::Path* p)
{
util::Lock lock{m_ProfilesMutex};
for (const auto& hop : p->hops)
{
auto& profile = m_Profiles[hop.rc.pubkey];
profile.pathTimeoutCount += 1;
profile.lastUpdated = llarp::time_now_ms();
}
}
void
Profiling::MarkPathSuccess(path::Path* p)
{
util::Lock lock{m_ProfilesMutex};
2019-03-25 15:41:37 +00:00
const auto sz = p->hops.size();
for (const auto& hop : p->hops)
{
auto& profile = m_Profiles[hop.rc.pubkey];
// redeem previous fails by halfing the fail count and setting timeout to zero
profile.pathFailCount /= 2;
profile.pathTimeoutCount = 0;
// mark success at hop
profile.pathSuccessCount += sz;
profile.lastUpdated = llarp::time_now_ms();
}
}
bool
Profiling::Save(const fs::path fpath)
{
std::string buf;
{
util::Lock lock{m_ProfilesMutex};
buf.resize((m_Profiles.size() * (RouterProfile::MaxSize + 32 + 8)) + 8);
bt_dict_producer d{buf.data(), buf.size()};
try
{
BEncode(d);
}
catch (const std::exception& e)
{
log::warning(logcat, "Failed to encode profiling data: {}", e.what());
2019-06-24 16:39:03 +00:00
return false;
}
buf.resize(d.end() - buf.data());
}
try
{
util::dump_file(fpath, buf);
}
catch (const std::exception& e)
{
log::warning(logcat, "Failed to save profiling data to {}: {}", fpath, e.what());
return false;
}
m_LastSave = llarp::time_now_ms();
return true;
}
void
Profiling::BEncode(bt_dict_producer& dict) const
{
for (const auto& [r_id, profile] : m_Profiles)
profile.BEncode(dict.append_dict(r_id.ToView()));
}
void
Profiling::BDecode(bt_dict_consumer dict)
{
m_Profiles.clear();
while (dict)
{
auto [rid, subdict] = dict.next_dict_consumer();
if (rid.size() != RouterID::SIZE)
throw std::invalid_argument{"invalid RouterID"};
m_Profiles.emplace(reinterpret_cast<const byte_t*>(rid.data()), subdict);
}
}
bool
Profiling::Load(const fs::path fname)
{
try
{
std::string data = util::slurp_file(fname);
util::Lock lock{m_ProfilesMutex};
BDecode(bt_dict_consumer{data});
}
catch (const std::exception& e)
{
log::warning(logcat, "failed to load router profiles from {}: {}", fname, e.what());
return false;
}
m_LastSave = llarp::time_now_ms();
return true;
}
2019-03-25 15:41:37 +00:00
bool
Profiling::ShouldSave(llarp_time_t now) const
{
auto dlt = now - m_LastSave;
2020-02-24 19:40:45 +00:00
return dlt > 1min;
2019-03-25 15:41:37 +00:00
}
} // namespace llarp