Fix crashy race condition in shutdown

Query->Cancel() will remove the Query, but that introduces a race
condition where unbound may still try to invoke the callback (with a
no-longer-valid pointer) if we do it before the ub_ctx_delete call.

Move to it afterwards so that we only cancel things that unbound didn't
pull/2045/head
Jason Rhinelander 2 years ago committed by Jeff Becker
parent 9aa6b64c1e
commit b8678a767e
No known key found for this signature in database
GPG Key ID: 025C02EE3A092F2D

@ -22,7 +22,7 @@ namespace llarp::dns
static auto logcat = log::Cat("dns"); static auto logcat = log::Cat("dns");
void void
QueryJob_Base::Cancel() const QueryJob_Base::Cancel()
{ {
Message reply{m_Query}; Message reply{m_Query};
reply.AddServFail(); reply.AddServFail();
@ -108,8 +108,8 @@ namespace llarp::dns
std::weak_ptr<Resolver> parent; std::weak_ptr<Resolver> parent;
int id{}; int id{};
virtual void void
SendReply(llarp::OwnedBuffer replyBuf) const override; SendReply(llarp::OwnedBuffer replyBuf) override;
}; };
/// Resolver_Base that uses libunbound /// Resolver_Base that uses libunbound
@ -126,7 +126,7 @@ namespace llarp::dns
#endif #endif
std::optional<SockAddr> m_LocalAddr; std::optional<SockAddr> m_LocalAddr;
std::unordered_map<int, std::shared_ptr<Query>> m_Pending; std::unordered_set<std::shared_ptr<Query>> m_Pending;
struct ub_result_deleter struct ub_result_deleter
{ {
@ -149,10 +149,7 @@ namespace llarp::dns
// take ownership of ub_result // take ownership of ub_result
std::unique_ptr<ub_result, ub_result_deleter> result{_result}; std::unique_ptr<ub_result, ub_result_deleter> result{_result};
// borrow query // borrow query
auto weak_query = static_cast<Query*>(data)->weak_from_this(); auto* query = static_cast<Query*>(data);
auto query = weak_query.lock();
if (not query)
return;
if (err) if (err)
{ {
// some kind of error from upstream // some kind of error from upstream
@ -344,9 +341,9 @@ namespace llarp::dns
} }
void void
RemovePending(int id) RemovePending(const std::shared_ptr<Query>& query)
{ {
m_Pending.erase(id); m_Pending.erase(query);
} }
void void
@ -423,16 +420,17 @@ namespace llarp::dns
#endif #endif
if (m_ctx) if (m_ctx)
{ {
log::debug(logcat, "cancelling {} pending queries", m_Pending.size());
// cancel pending queries
for (const auto& [id, query] : m_Pending)
query->Cancel();
if (auto err = ::ub_wait(m_ctx))
log::warning(logcat, "issue tearing down unbound: {}", ub_strerror(err));
::ub_ctx_delete(m_ctx); ::ub_ctx_delete(m_ctx);
m_ctx = nullptr; m_ctx = nullptr;
// destroy any outstanding queries that unbound hasn't fired yet
if (not m_Pending.empty())
{
log::debug(logcat, "cancelling {} pending queries", m_Pending.size());
for (const auto& query : m_Pending)
query->Cancel();
m_Pending.clear();
}
} }
} }
@ -525,35 +523,33 @@ namespace llarp::dns
q.qclass, q.qclass,
tmp.get(), tmp.get(),
&Resolver::Callback, &Resolver::Callback,
&tmp->id)) nullptr))
{ {
log::warning( log::warning(
logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err)); logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err));
tmp->Cancel(); tmp->Cancel();
} }
else else
m_Pending.emplace(tmp->id, tmp); m_Pending.insert(std::move(tmp));
return true; return true;
} }
}; };
void void
Query::SendReply(llarp::OwnedBuffer replyBuf) const Query::SendReply(llarp::OwnedBuffer replyBuf)
{ {
if (m_Done.test_and_set())
return;
auto parent_ptr = parent.lock(); auto parent_ptr = parent.lock();
if (parent_ptr) if (parent_ptr)
{ {
parent_ptr->call([parent_ptr, parent_ptr->call(
id = id, [self = shared_from_this(), parent_ptr = std::move(parent_ptr), buf = replyBuf.copy()] {
src = src, self->src->SendTo(self->askerAddr, self->resolverAddr, OwnedBuffer::copy_from(buf));
from = resolverAddr, // remove query
to = askerAddr, parent_ptr->RemovePending(self);
buf = replyBuf.copy()] { });
src->SendTo(to, from, OwnedBuffer::copy_from(buf));
// remove query
parent_ptr->RemovePending(id);
});
} }
else else
log::error(logcat, "no parent"); log::error(logcat, "no parent");

@ -17,6 +17,9 @@ namespace llarp::dns
/// the original dns query /// the original dns query
Message m_Query; Message m_Query;
/// True if we've sent a reply (including via a call to cancel)
std::atomic_flag m_Done = ATOMIC_FLAG_INIT;
public: public:
explicit QueryJob_Base(Message query) : m_Query{std::move(query)} explicit QueryJob_Base(Message query) : m_Query{std::move(query)}
{} {}
@ -37,11 +40,11 @@ namespace llarp::dns
/// cancel this operation and inform anyone who cares /// cancel this operation and inform anyone who cares
void void
Cancel() const; Cancel();
/// send a raw buffer back to the querier /// send a raw buffer back to the querier
virtual void virtual void
SendReply(llarp::OwnedBuffer replyBuf) const = 0; SendReply(llarp::OwnedBuffer replyBuf) = 0;
}; };
class PacketSource_Base class PacketSource_Base
@ -130,7 +133,7 @@ namespace llarp::dns
{} {}
void void
SendReply(llarp::OwnedBuffer replyBuf) const override SendReply(llarp::OwnedBuffer replyBuf) override
{ {
src->SendTo(asker, resolver, std::move(replyBuf)); src->SendTo(asker, resolver, std::move(replyBuf));
} }

Loading…
Cancel
Save