Skip to content

Commit

Permalink
Use PyMutex instead of std::mutex in free-threaded build. (#5219)
Browse files Browse the repository at this point in the history
* Use PyMutex instead of std::mutex in free-threaded build.

PyMutex is now part of the public C API as of 3.13.0b3 and generally has
slightly less overhead than std::mutex.

* style: pre-commit fixes

* Fix instance_map_shard padding

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
colesbury and pre-commit-ci[bot] authored Jul 2, 2024
1 parent b21b049 commit bb05e08
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions include/pybind11/detail/internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,20 +148,35 @@ struct override_hash {

using instance_map = std::unordered_multimap<const void *, instance *>;

#ifdef Py_GIL_DISABLED
// Wrapper around PyMutex to provide BasicLockable semantics
class pymutex {
PyMutex mutex;

public:
pymutex() : mutex({}) {}
void lock() { PyMutex_Lock(&mutex); }
void unlock() { PyMutex_Unlock(&mutex); }
};

// Instance map shards are used to reduce mutex contention in free-threaded Python.
struct instance_map_shard {
std::mutex mutex;
instance_map registered_instances;
pymutex mutex;
// alignas(64) would be better, but causes compile errors in macOS before 10.14 (see #5200)
char padding[64 - (sizeof(std::mutex) + sizeof(instance_map)) % 64];
char padding[64 - (sizeof(instance_map) + sizeof(pymutex)) % 64];
};

static_assert(sizeof(instance_map_shard) % 64 == 0,
"instance_map_shard size is not a multiple of 64 bytes");
#endif

/// Internal data structure used to track registered instances and types.
/// Whenever binary incompatible changes are made to this structure,
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
struct internals {
#ifdef Py_GIL_DISABLED
std::mutex mutex;
pymutex mutex;
#endif
// std::type_index -> pybind11's type information
type_map<type_info *> registered_types_cpp;
Expand Down Expand Up @@ -614,7 +629,7 @@ inline local_internals &get_local_internals() {
}

#ifdef Py_GIL_DISABLED
# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<std::mutex> lock((internals).mutex)
# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<pymutex> lock((internals).mutex)
#else
# define PYBIND11_LOCK_INTERNALS(internals)
#endif
Expand Down Expand Up @@ -651,7 +666,7 @@ inline auto with_instance_map(const void *ptr,
auto idx = static_cast<size_t>(hash & internals.instance_shards_mask);

auto &shard = internals.instance_shards[idx];
std::unique_lock<std::mutex> lock(shard.mutex);
std::unique_lock<pymutex> lock(shard.mutex);
return cb(shard.registered_instances);
#else
(void) ptr;
Expand All @@ -667,7 +682,7 @@ inline size_t num_registered_instances() {
size_t count = 0;
for (size_t i = 0; i <= internals.instance_shards_mask; ++i) {
auto &shard = internals.instance_shards[i];
std::unique_lock<std::mutex> lock(shard.mutex);
std::unique_lock<pymutex> lock(shard.mutex);
count += shard.registered_instances.size();
}
return count;
Expand Down

0 comments on commit bb05e08

Please sign in to comment.