Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deadlock in InboundLedgers and NetworkOPs #5124

Merged
merged 6 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions include/xrpl/basics/scope.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define RIPPLE_BASICS_SCOPE_H_INCLUDED

#include <exception>
#include <mutex>
#include <type_traits>
#include <utility>

Expand Down Expand Up @@ -186,6 +187,70 @@ class scope_success
template <class EF>
scope_success(EF) -> scope_success<EF>;

/**
Automatically unlocks and re-locks a unique_lock object.

This is the reverse of a std::unique_lock object - instead of locking the
mutex for the lifetime of this object, it unlocks it.

Make sure you don't try to unlock mutexes that aren't actually locked!

This is essentially a less-versatile boost::reverse_lock.

e.g. @code

std::mutex mut;

for (;;)
{
std::unique_lock myScopedLock{mut};
// mut is now locked

... do some stuff with it locked ..

while (xyz)
{
... do some stuff with it locked ..

scope_unlock unlocker{myScopedLock};

// mut is now unlocked for the remainder of this block,
// and re-locked at the end.

...do some stuff with it unlocked ...
} // mut gets locked here.

} // mut gets unlocked here
@endcode
Comment on lines +221 to +224
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to drive the point home:

Suggested change
} // mut gets locked here.
} // mut gets unlocked here
@endcode
} // mut gets locked here.
... do some more stuff with it locked ...
} // mut gets unlocked here
@endcode

*/

template <class Mutex>
class scope_unlock
{
std::unique_lock<Mutex>* plock;

public:
explicit scope_unlock(std::unique_lock<Mutex>& lock) noexcept(true)
: plock(&lock)
{
assert(plock->owns_lock());
plock->unlock();
}

// Immovable type
scope_unlock(scope_unlock const&) = delete;
scope_unlock&
operator=(scope_unlock const&) = delete;

~scope_unlock() noexcept(true)
{
plock->lock();
}
};

template <class Mutex>
scope_unlock(std::unique_lock<Mutex>&) -> scope_unlock<Mutex>;

} // namespace ripple

#endif
5 changes: 3 additions & 2 deletions src/xrpld/app/ledger/detail/InboundLedgers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
#include <xrpld/perflog/PerfLog.h>
#include <xrpl/basics/DecayingSample.h>
#include <xrpl/basics/Log.h>
#include <xrpl/basics/scope.h>
#include <xrpl/beast/container/aged_map.h>
#include <xrpl/beast/core/LexicalCast.h>
#include <xrpl/protocol/jss.h>

#include <exception>
#include <memory>
#include <mutex>
Expand Down Expand Up @@ -139,7 +141,7 @@ class InboundLedgersImp : public InboundLedgers
if (pendingAcquires_.contains(hash))
return;
pendingAcquires_.insert(hash);
lock.unlock();
scope_unlock unlock(lock);
acquire(hash, seq, reason);
}
catch (std::exception const& e)
Expand All @@ -154,7 +156,6 @@ class InboundLedgersImp : public InboundLedgers
<< "Unknown exception thrown for acquiring new inbound ledger "
<< hash;
}
lock.lock();
pendingAcquires_.erase(hash);
}

Expand Down
90 changes: 6 additions & 84 deletions src/xrpld/app/ledger/detail/LedgerMaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@
#include <xrpl/basics/UptimeClock.h>
#include <xrpl/basics/contract.h>
#include <xrpl/basics/safe_cast.h>
#include <xrpl/basics/scope.h>
#include <xrpl/protocol/BuildInfo.h>
#include <xrpl/protocol/HashPrefix.h>
#include <xrpl/protocol/digest.h>
#include <xrpl/resource/Fees.h>

#include <algorithm>
#include <cassert>
#include <chrono>
Expand All @@ -60,86 +62,6 @@

namespace ripple {

namespace {

//==============================================================================
/**
Automatically unlocks and re-locks a unique_lock object.

This is the reverse of a std::unique_lock object - instead of locking the
mutex for the lifetime of this object, it unlocks it.

Make sure you don't try to unlock mutexes that aren't actually locked!

This is essentially a less-versatile boost::reverse_lock.

e.g. @code

std::mutex mut;

for (;;)
{
std::unique_lock myScopedLock{mut};
// mut is now locked

... do some stuff with it locked ..

while (xyz)
{
... do some stuff with it locked ..

ScopedUnlock unlocker{myScopedLock};

// mut is now unlocked for the remainder of this block,
// and re-locked at the end.

...do some stuff with it unlocked ...
} // mut gets locked here.

} // mut gets unlocked here
@endcode
*/
template <class MutexType>
class ScopedUnlock
{
std::unique_lock<MutexType>& lock_;

public:
/** Creates a ScopedUnlock.

As soon as it is created, this will unlock the unique_lock, and
when the ScopedLock object is deleted, the unique_lock will
be re-locked.

Make sure this object is created and deleted by the same thread,
otherwise there are no guarantees what will happen! Best just to use it
as a local stack object, rather than creating on the heap.
*/
explicit ScopedUnlock(std::unique_lock<MutexType>& lock) : lock_(lock)
{
assert(lock_.owns_lock());
lock_.unlock();
}

ScopedUnlock(ScopedUnlock const&) = delete;
ScopedUnlock&
operator=(ScopedUnlock const&) = delete;

/** Destructor.

The unique_lock will be locked after the destructor is called.

Make sure this object is created and deleted by the same thread,
otherwise there are no guarantees what will happen!
*/
~ScopedUnlock() noexcept(false)
{
lock_.lock();
}
};

} // namespace

// Don't catch up more than 100 ledgers (cannot exceed 256)
static constexpr int MAX_LEDGER_GAP{100};

Expand Down Expand Up @@ -1336,7 +1258,7 @@
auto valLedger = mValidLedger.get();
std::uint32_t valSeq = valLedger->info().seq;

ScopedUnlock sul{sl};
scope_unlock sul{sl};
try
{
for (std::uint32_t seq = pubSeq; seq <= valSeq; ++seq)
Expand Down Expand Up @@ -1882,7 +1804,7 @@
InboundLedger::Reason reason,
std::unique_lock<std::recursive_mutex>& sl)
{
ScopedUnlock sul{sl};
scope_unlock sul{sl};

Check warning on line 1807 in src/xrpld/app/ledger/detail/LedgerMaster.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/ledger/detail/LedgerMaster.cpp#L1807

Added line #L1807 was not covered by tests
if (auto hash = getLedgerHashForHistory(missing, reason))
{
assert(hash->isNonZero());
Expand Down Expand Up @@ -2052,7 +1974,7 @@
for (auto const& ledger : pubLedgers)
{
{
ScopedUnlock sul{sl};
scope_unlock sul{sl};
JLOG(m_journal.debug())
<< "tryAdvance publishing seq " << ledger->info().seq;
setFullLedger(ledger, true, true);
Expand All @@ -2061,7 +1983,7 @@
setPubLedger(ledger);

{
ScopedUnlock sul{sl};
scope_unlock sul{sl};
app_.getOPs().pubLedger(ledger);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/xrpld/app/misc/NetworkOPs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include <xrpl/basics/UptimeClock.h>
#include <xrpl/basics/mulDiv.h>
#include <xrpl/basics/safe_cast.h>
#include <xrpl/basics/scope.h>
#include <xrpl/beast/rfc2616.h>
#include <xrpl/beast/utility/rngfill.h>
#include <xrpl/crypto/RFC1751.h>
Expand Down Expand Up @@ -2310,7 +2311,7 @@
bypassAccept = BypassAccept::yes;
else
pendingValidations_.insert(val->getLedgerHash());
lock.unlock();
scope_unlock unlock(lock);

Check warning on line 2314 in src/xrpld/app/misc/NetworkOPs.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/misc/NetworkOPs.cpp#L2314

Added line #L2314 was not covered by tests
handleNewValidation(app_, val, source, bypassAccept, m_journal);
}
catch (std::exception const& e)
Expand All @@ -2327,10 +2328,9 @@
}
if (bypassAccept == BypassAccept::no)
{
lock.lock();
pendingValidations_.erase(val->getLedgerHash());
lock.unlock();
}
lock.unlock();

pubValidation(val);

Expand Down
Loading