Fix crashy race condition in shutdown

Query->Cancel() will remove the Query, but that introduces a race
condition where unbound may still try to invoke the callback (with a
no-longer-valid pointer) if we do it before the ub_ctx_delete call.

Move to it afterwards so that we only cancel things that unbound didn't
pull/2045/head
Jason Rhinelander 2 years ago committed by Jeff Becker
parent 9aa6b64c1e
commit b8678a767e
No known key found for this signature in database
GPG Key ID: 025C02EE3A092F2D

@ -22,7 +22,7 @@ namespace llarp::dns
static auto logcat = log::Cat("dns");
void
QueryJob_Base::Cancel() const
QueryJob_Base::Cancel()
{
Message reply{m_Query};
reply.AddServFail();
@ -108,8 +108,8 @@ namespace llarp::dns
std::weak_ptr<Resolver> parent;
int id{};
virtual void
SendReply(llarp::OwnedBuffer replyBuf) const override;
void
SendReply(llarp::OwnedBuffer replyBuf) override;
};
/// Resolver_Base that uses libunbound
@ -126,7 +126,7 @@ namespace llarp::dns
#endif
std::optional<SockAddr> m_LocalAddr;
std::unordered_map<int, std::shared_ptr<Query>> m_Pending;
std::unordered_set<std::shared_ptr<Query>> m_Pending;
struct ub_result_deleter
{
@ -149,10 +149,7 @@ namespace llarp::dns
// take ownership of ub_result
std::unique_ptr<ub_result, ub_result_deleter> result{_result};
// borrow query
auto weak_query = static_cast<Query*>(data)->weak_from_this();
auto query = weak_query.lock();
if (not query)
return;
auto* query = static_cast<Query*>(data);
if (err)
{
// some kind of error from upstream
@ -344,9 +341,9 @@ namespace llarp::dns
}
void
RemovePending(int id)
RemovePending(const std::shared_ptr<Query>& query)
{
m_Pending.erase(id);
m_Pending.erase(query);
}
void
@ -423,16 +420,17 @@ namespace llarp::dns
#endif
if (m_ctx)
{
log::debug(logcat, "cancelling {} pending queries", m_Pending.size());
// cancel pending queries
for (const auto& [id, query] : m_Pending)
query->Cancel();
if (auto err = ::ub_wait(m_ctx))
log::warning(logcat, "issue tearing down unbound: {}", ub_strerror(err));
::ub_ctx_delete(m_ctx);
m_ctx = nullptr;
// destroy any outstanding queries that unbound hasn't fired yet
if (not m_Pending.empty())
{
log::debug(logcat, "cancelling {} pending queries", m_Pending.size());
for (const auto& query : m_Pending)
query->Cancel();
m_Pending.clear();
}
}
}
@ -525,35 +523,33 @@ namespace llarp::dns
q.qclass,
tmp.get(),
&Resolver::Callback,
&tmp->id))
nullptr))
{
log::warning(
logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err));
tmp->Cancel();
}
else
m_Pending.emplace(tmp->id, tmp);
m_Pending.insert(std::move(tmp));
return true;
}
};
void
Query::SendReply(llarp::OwnedBuffer replyBuf) const
Query::SendReply(llarp::OwnedBuffer replyBuf)
{
if (m_Done.test_and_set())
return;
auto parent_ptr = parent.lock();
if (parent_ptr)
{
parent_ptr->call([parent_ptr,
id = id,
src = src,
from = resolverAddr,
to = askerAddr,
buf = replyBuf.copy()] {
src->SendTo(to, from, OwnedBuffer::copy_from(buf));
// remove query
parent_ptr->RemovePending(id);
});
parent_ptr->call(
[self = shared_from_this(), parent_ptr = std::move(parent_ptr), buf = replyBuf.copy()] {
self->src->SendTo(self->askerAddr, self->resolverAddr, OwnedBuffer::copy_from(buf));
// remove query
parent_ptr->RemovePending(self);
});
}
else
log::error(logcat, "no parent");

@ -17,6 +17,9 @@ namespace llarp::dns
/// the original dns query
Message m_Query;
/// True if we've sent a reply (including via a call to cancel)
std::atomic_flag m_Done = ATOMIC_FLAG_INIT;
public:
explicit QueryJob_Base(Message query) : m_Query{std::move(query)}
{}
@ -37,11 +40,11 @@ namespace llarp::dns
/// cancel this operation and inform anyone who cares
void
Cancel() const;
Cancel();
/// send a raw buffer back to the querier
virtual void
SendReply(llarp::OwnedBuffer replyBuf) const = 0;
SendReply(llarp::OwnedBuffer replyBuf) = 0;
};
class PacketSource_Base
@ -130,7 +133,7 @@ namespace llarp::dns
{}
void
SendReply(llarp::OwnedBuffer replyBuf) const override
SendReply(llarp::OwnedBuffer replyBuf) override
{
src->SendTo(asker, resolver, std::move(replyBuf));
}

Loading…
Cancel
Save