From 01615c985a34a077e0d671ff977bb081e3df32b2 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Fri, 9 Aug 2024 16:23:22 +0200 Subject: [PATCH 01/16] Iterate over dict items in `DictIterator` --- src/types/dict.rs | 73 +++++++++++------------------------------------ 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 9b7d8697d20..ddaa0896386 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -1,4 +1,4 @@ -use super::PyMapping; +use super::{PyIterator, PyMapping, PyTuple}; use crate::err::{self, PyErr, PyResult}; use crate::ffi::Py_ssize_t; use crate::ffi_ptr_ext::FfiPtrExt; @@ -402,10 +402,8 @@ fn dict_len(dict: &Bound<'_, PyDict>) -> Py_ssize_t { /// PyO3 implementation of an iterator for a Python `dict` object. pub struct BoundDictIterator<'py> { - dict: Bound<'py, PyDict>, - ppos: ffi::Py_ssize_t, - di_used: ffi::Py_ssize_t, - len: ffi::Py_ssize_t, + iter: Bound<'py, PyIterator>, + remaining: usize, } impl<'py> Iterator for BoundDictIterator<'py> { @@ -413,50 +411,13 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] fn next(&mut self) -> Option { - let ma_used = dict_len(&self.dict); - - // These checks are similar to what CPython does. - // - // If the dimension of the dict changes e.g. key-value pairs are removed - // or added during iteration, this will panic next time when `next` is called - if self.di_used != ma_used { - self.di_used = -1; - panic!("dictionary changed size during iteration"); - }; - - // If the dict is changed in such a way that the length remains constant - // then this will panic at the end of iteration - similar to this: - // - // d = {"a":1, "b":2, "c": 3} - // - // for k, v in d.items(): - // d[f"{k}_"] = 4 - // del d[k] - // print(k) - // - if self.len == -1 { - self.di_used = -1; - panic!("dictionary keys changed during iteration"); - }; - - let mut key: *mut ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut ffi::PyObject = std::ptr::null_mut(); - - if unsafe { ffi::PyDict_Next(self.dict.as_ptr(), &mut self.ppos, &mut key, &mut value) } - != 0 - { - self.len -= 1; - let py = self.dict.py(); - // Safety: - // - PyDict_Next returns borrowed values - // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null - Some(( - unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), - unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), - )) - } else { - None - } + self.remaining = self.remaining.saturating_sub(1); + self.iter.next().map(Result::unwrap).map(|pair| { + let tuple = pair.downcast::().unwrap(); + let key = tuple.get_item(0).unwrap(); + let value = tuple.get_item(1).unwrap(); + (key, value) + }) } #[inline] @@ -468,19 +429,16 @@ impl<'py> Iterator for BoundDictIterator<'py> { impl ExactSizeIterator for BoundDictIterator<'_> { fn len(&self) -> usize { - self.len as usize + self.remaining } } impl<'py> BoundDictIterator<'py> { fn new(dict: Bound<'py, PyDict>) -> Self { - let len = dict_len(&dict); - BoundDictIterator { - dict, - ppos: 0, - di_used: len, - len, - } + let remaining = dict_len(&dict) as usize; + let iter = PyIterator::from_object(&dict.items()).unwrap(); + + BoundDictIterator { iter, remaining } } } @@ -553,6 +511,7 @@ mod borrowed_iter { impl<'a, 'py> BorrowedDictIter<'a, 'py> { pub(super) fn new(dict: Borrowed<'a, 'py, PyDict>) -> Self { let len = dict_len(&dict); + BorrowedDictIter { dict, ppos: 0, len } } } From addfbc320b4ab38873dffbff44043f70112fb6bd Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Thu, 29 Aug 2024 00:23:15 +0200 Subject: [PATCH 02/16] Use python instead of C API to get dict items --- src/types/dict.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index ddaa0896386..6c19c20c639 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -436,7 +436,8 @@ impl ExactSizeIterator for BoundDictIterator<'_> { impl<'py> BoundDictIterator<'py> { fn new(dict: Bound<'py, PyDict>) -> Self { let remaining = dict_len(&dict) as usize; - let iter = PyIterator::from_object(&dict.items()).unwrap(); + let items = dict.call_method0(intern!(dict.py(), "items")).unwrap(); + let iter = PyIterator::from_object(&items).unwrap(); BoundDictIterator { iter, remaining } } From ce3a916e71c1062cc3d0c9741623e18cd55bb58b Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Thu, 29 Aug 2024 18:23:27 +0200 Subject: [PATCH 03/16] Use plain dict iterator when not on free-threaded & not a subclass --- src/types/dict.rs | 115 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 97 insertions(+), 18 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 6c19c20c639..711bbd7f4e9 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -1,11 +1,9 @@ -use super::{PyIterator, PyMapping, PyTuple}; use crate::err::{self, PyErr, PyResult}; use crate::ffi::Py_ssize_t; use crate::ffi_ptr_ext::FfiPtrExt; use crate::instance::{Borrowed, Bound}; use crate::py_result_ext::PyResultExt; -use crate::types::any::PyAnyMethods; -use crate::types::{PyAny, PyList}; +use crate::types::{PyAny, PyAnyMethods, PyIterator, PyList, PyMapping, PyTuple, PyTupleMethods}; use crate::{ffi, BoundObject, IntoPyObject, Python}; /// Represents a Python `dict`. @@ -401,9 +399,23 @@ fn dict_len(dict: &Bound<'_, PyDict>) -> Py_ssize_t { } /// PyO3 implementation of an iterator for a Python `dict` object. -pub struct BoundDictIterator<'py> { - iter: Bound<'py, PyIterator>, - remaining: usize, +pub enum BoundDictIterator<'py> { + /// Iterator over the items of the dictionary, obtained by calling the python method `items()`. + #[allow(missing_docs)] + ItemIter { + iter: Bound<'py, PyIterator>, + remaining: ffi::Py_ssize_t, + }, + /// Iterator over the items of the dictionary, using the C-API `PyDict_Next`. This variant is + /// only used when the dictionary is an exact instance of `PyDict` and the GIL enabled. + #[allow(missing_docs)] + #[cfg(not(Py_GIL_DISABLED))] + DictIter { + dict: Bound<'py, PyDict>, + ppos: ffi::Py_ssize_t, + di_used: ffi::Py_ssize_t, + remaining: ffi::Py_ssize_t, + }, } impl<'py> Iterator for BoundDictIterator<'py> { @@ -411,13 +423,67 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] fn next(&mut self) -> Option { - self.remaining = self.remaining.saturating_sub(1); - self.iter.next().map(Result::unwrap).map(|pair| { - let tuple = pair.downcast::().unwrap(); - let key = tuple.get_item(0).unwrap(); - let value = tuple.get_item(1).unwrap(); - (key, value) - }) + match self { + BoundDictIterator::ItemIter { iter, remaining } => { + *remaining = remaining.saturating_sub(1); + iter.next().map(Result::unwrap).map(|tuple| { + let tuple = tuple.downcast::().unwrap(); + let key = tuple.get_item(0).unwrap(); + let value = tuple.get_item(1).unwrap(); + (key, value) + }) + } + #[cfg(not(Py_GIL_DISABLED))] + BoundDictIterator::DictIter { + dict, + ppos, + di_used, + remaining, + } => { + let ma_used = dict_len(dict); + + // These checks are similar to what CPython does. + // + // If the dimension of the dict changes e.g. key-value pairs are removed + // or added during iteration, this will panic next time when `next` is called + if *di_used != ma_used { + *di_used = -1; + panic!("dictionary changed size during iteration"); + }; + + // If the dict is changed in such a way that the length remains constant + // then this will panic at the end of iteration - similar to this: + // + // d = {"a":1, "b":2, "c": 3} + // + // for k, v in d.items(): + // d[f"{k}_"] = 4 + // del d[k] + // print(k) + // + if *remaining == -1 { + *di_used = -1; + panic!("dictionary keys changed during iteration"); + }; + + let mut key: *mut ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut ffi::PyObject = std::ptr::null_mut(); + + if unsafe { ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) } != 0 { + *remaining -= 1; + let py = dict.py(); + // Safety: + // - PyDict_Next returns borrowed values + // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null + Some(( + unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), + unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), + )) + } else { + None + } + } + } } #[inline] @@ -429,17 +495,31 @@ impl<'py> Iterator for BoundDictIterator<'py> { impl ExactSizeIterator for BoundDictIterator<'_> { fn len(&self) -> usize { - self.remaining + match self { + BoundDictIterator::ItemIter { remaining, .. } => *remaining as usize, + #[cfg(not(Py_GIL_DISABLED))] + BoundDictIterator::DictIter { remaining, .. } => *remaining as usize, + } } } impl<'py> BoundDictIterator<'py> { fn new(dict: Bound<'py, PyDict>) -> Self { - let remaining = dict_len(&dict) as usize; + let remaining = dict_len(&dict); + + #[cfg(not(Py_GIL_DISABLED))] + if dict.is_exact_instance_of::() { + return BoundDictIterator::DictIter { + dict, + ppos: 0, + di_used: remaining, + remaining, + }; + }; + let items = dict.call_method0(intern!(dict.py(), "items")).unwrap(); let iter = PyIterator::from_object(&items).unwrap(); - - BoundDictIterator { iter, remaining } + BoundDictIterator::ItemIter { iter, remaining } } } @@ -512,7 +592,6 @@ mod borrowed_iter { impl<'a, 'py> BorrowedDictIter<'a, 'py> { pub(super) fn new(dict: Borrowed<'a, 'py, PyDict>) -> Self { let len = dict_len(&dict); - BorrowedDictIter { dict, ppos: 0, len } } } From 8d9b37b809d0c91d51cc2e285cb7d6b0752663bd Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 3 Sep 2024 18:04:01 +0200 Subject: [PATCH 04/16] Copy dict on free-threaded builds to prevent concurrent modifications --- src/types/dict.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 711bbd7f4e9..fceb10902e2 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -409,7 +409,6 @@ pub enum BoundDictIterator<'py> { /// Iterator over the items of the dictionary, using the C-API `PyDict_Next`. This variant is /// only used when the dictionary is an exact instance of `PyDict` and the GIL enabled. #[allow(missing_docs)] - #[cfg(not(Py_GIL_DISABLED))] DictIter { dict: Bound<'py, PyDict>, ppos: ffi::Py_ssize_t, @@ -433,7 +432,6 @@ impl<'py> Iterator for BoundDictIterator<'py> { (key, value) }) } - #[cfg(not(Py_GIL_DISABLED))] BoundDictIterator::DictIter { dict, ppos, @@ -497,7 +495,6 @@ impl ExactSizeIterator for BoundDictIterator<'_> { fn len(&self) -> usize { match self { BoundDictIterator::ItemIter { remaining, .. } => *remaining as usize, - #[cfg(not(Py_GIL_DISABLED))] BoundDictIterator::DictIter { remaining, .. } => *remaining as usize, } } @@ -507,8 +504,12 @@ impl<'py> BoundDictIterator<'py> { fn new(dict: Bound<'py, PyDict>) -> Self { let remaining = dict_len(&dict); - #[cfg(not(Py_GIL_DISABLED))] if dict.is_exact_instance_of::() { + // Copy the dictionary if the GIL is disabled, as we can't guarantee that the dictionary + // won't be modified during iteration. + #[cfg(Py_GIL_DISABLED)] + let dict = dict.copy().unwrap(); + return BoundDictIterator::DictIter { dict, ppos: 0, From 6c05268c55e14836c03706090d639f41ff9a7570 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 3 Sep 2024 20:43:48 +0200 Subject: [PATCH 05/16] Add test for dict subclass iters --- src/types/dict.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index fceb10902e2..04e25ef1e55 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -666,7 +666,9 @@ where #[cfg(test)] mod tests { use super::*; - use crate::types::PyTuple; + use crate::tests::common::generate_unique_module_name; + use crate::types::{PyModule, PyTuple}; + use pyo3_ffi::c_str; use std::collections::{BTreeMap, HashMap}; #[test] @@ -977,6 +979,44 @@ mod tests { }); } + #[test] + fn test_iter_subclass() { + Python::with_gil(|py| { + let mut v = HashMap::new(); + v.insert(7, 32); + v.insert(8, 42); + v.insert(9, 123); + let dict = v.into_pyobject(py).unwrap(); + + let module = PyModule::from_code( + py, + c_str!("class DictSubclass(dict): pass"), + c_str!("test.py"), + &generate_unique_module_name("test"), + ) + .unwrap(); + + let subclass = module + .getattr("DictSubclass") + .unwrap() + .call1((dict,)) + .unwrap() + .downcast_into::() + .unwrap(); + + let mut key_sum = 0; + let mut value_sum = 0; + let iter = subclass.iter(); + assert!(matches!(iter, BoundDictIterator::ItemIter { .. })); + for (key, value) in iter { + key_sum += key.extract::().unwrap(); + value_sum += value.extract::().unwrap(); + } + assert_eq!(7 + 8 + 9, key_sum); + assert_eq!(32 + 42 + 123, value_sum); + }); + } + #[test] fn test_iter_bound() { Python::with_gil(|py| { From 5224d5f1b8e8bf2a37eb83c569a759d797c4d8a5 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Fri, 27 Sep 2024 10:45:15 +0200 Subject: [PATCH 06/16] Implement `PyDict::locked_for_each` --- src/types/dict.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/types/dict.rs b/src/types/dict.rs index 04e25ef1e55..3574b0c2cda 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -178,6 +178,15 @@ pub trait PyDictMethods<'py>: crate::sealed::Sealed { /// so long as the set of keys does not change. fn iter(&self) -> BoundDictIterator<'py>; + /// Iterates over the contents of this dictionary while holding a critical section on the dict. + /// This is useful when the GIL is disabled and the dictionary is shared between threads. + /// It is not guaranteed that the dictionary will not be modified during iteration when the + /// closure calls arbitrary Python code that releases the current critical section. + #[cfg(Py_GIL_DISABLED)] + fn locked_for_each(&self, closure: F) -> PyResult<()> + where + F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>; + /// Returns `self` cast as a `PyMapping`. fn as_mapping(&self) -> &Bound<'py, PyMapping>; @@ -355,6 +364,25 @@ impl<'py> PyDictMethods<'py> for Bound<'py, PyDict> { BoundDictIterator::new(self.clone()) } + #[cfg(Py_GIL_DISABLED)] + fn locked_for_each(&self, closure: F) -> PyResult<()> + where + F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + for (key, value) in self { + if let Err(err) = closure(key, value) { + unsafe { ffi::PyCriticalSection_End(&mut section) }; + return Err(err); + } + } + + unsafe { ffi::PyCriticalSection_End(&mut section) }; + Ok(()) + } + fn as_mapping(&self) -> &Bound<'py, PyMapping> { unsafe { self.downcast_unchecked() } } From c7ca632c1f90be835d8c8d59ae9a129cd9d3aa06 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Fri, 27 Sep 2024 10:50:26 +0200 Subject: [PATCH 07/16] Lock `BoundDictIterator::next` on each iteration --- src/types/dict.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 3574b0c2cda..e0c8e717bb0 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -466,6 +466,13 @@ impl<'py> Iterator for BoundDictIterator<'py> { di_used, remaining, } => { + #[cfg(Py_GIL_DISABLED)] + let mut section = unsafe { std::mem::zeroed() }; + #[cfg(Py_GIL_DISABLED)] + unsafe { + ffi::PyCriticalSection_Begin(&mut section, op) + }; + let ma_used = dict_len(dict); // These checks are similar to what CPython does. @@ -495,7 +502,10 @@ impl<'py> Iterator for BoundDictIterator<'py> { let mut key: *mut ffi::PyObject = std::ptr::null_mut(); let mut value: *mut ffi::PyObject = std::ptr::null_mut(); - if unsafe { ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) } != 0 { + let result = if unsafe { + ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) + } != 0 + { *remaining -= 1; let py = dict.py(); // Safety: @@ -507,7 +517,14 @@ impl<'py> Iterator for BoundDictIterator<'py> { )) } else { None + }; + + #[cfg(Py_GIL_DISABLED)] + unsafe { + ffi::PyCriticalSection_End(&mut section); } + + result } } } From 59615ca5a14dea306f80c4eb8173dfba8fd96811 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Fri, 27 Sep 2024 10:31:51 +0200 Subject: [PATCH 08/16] Implement locked `fold` & `try_fold` --- src/lib.rs | 5 +++- src/types/dict.rs | 67 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7de32ca264f..520d332381e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ #![warn(missing_docs)] -#![cfg_attr(feature = "nightly", feature(auto_traits, negative_impls))] +#![cfg_attr( + feature = "nightly", + feature(auto_traits, negative_impls, try_trait_v2) +)] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] // Deny some lints in doctests. // Use `#[allow(...)]` locally to override. diff --git a/src/types/dict.rs b/src/types/dict.rs index e0c8e717bb0..e264440bc3d 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -435,7 +435,7 @@ pub enum BoundDictIterator<'py> { remaining: ffi::Py_ssize_t, }, /// Iterator over the items of the dictionary, using the C-API `PyDict_Next`. This variant is - /// only used when the dictionary is an exact instance of `PyDict` and the GIL enabled. + /// only used when the dictionary is an exact instance of `PyDict`. #[allow(missing_docs)] DictIter { dict: Bound<'py, PyDict>, @@ -534,6 +534,66 @@ impl<'py> Iterator for BoundDictIterator<'py> { let len = self.len(); (len, Some(len)) } + + #[inline] + #[cfg(Py_GIL_DISABLED)] + fn fold(mut self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + let op = match self { + BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), + BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), + }; + + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, op) }; + + let mut accum = init; + for x in &mut self { + accum = f(accum, x); + } + unsafe { + ffi::PyCriticalSection_End(&mut section); + } + accum + } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, feature = "nightly"))] + fn try_fold(&mut self, init: B, mut f: F) -> R + where + Self: Sized, + F: FnMut(B, Self::Item) -> R, + R: std::ops::Try, + { + use std::ops::ControlFlow; + + let op = match self { + BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), + BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), + }; + + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, op) }; + + let mut accum = init; + + for x in &mut self { + match f(accum, x).branch() { + ControlFlow::Continue(a) => accum = a, + ControlFlow::Break(err) => { + unsafe { ffi::PyCriticalSection_End(&mut section) } + return R::from_residual(err); + } + } + } + unsafe { + ffi::PyCriticalSection_End(&mut section); + } + R::from_output(accum) + } } impl ExactSizeIterator for BoundDictIterator<'_> { @@ -550,11 +610,6 @@ impl<'py> BoundDictIterator<'py> { let remaining = dict_len(&dict); if dict.is_exact_instance_of::() { - // Copy the dictionary if the GIL is disabled, as we can't guarantee that the dictionary - // won't be modified during iteration. - #[cfg(Py_GIL_DISABLED)] - let dict = dict.copy().unwrap(); - return BoundDictIterator::DictIter { dict, ppos: 0, From bb4392b77b159340a1f56c2d54b4d3376a0f6cff Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Sun, 29 Sep 2024 15:59:04 +0200 Subject: [PATCH 09/16] Implement `all`,`any`,`find`,`find_map`,`position` when not on nightly --- src/types/dict.rs | 178 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 157 insertions(+), 21 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index e264440bc3d..0a0a74e04b8 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -5,6 +5,8 @@ use crate::instance::{Borrowed, Bound}; use crate::py_result_ext::PyResultExt; use crate::types::{PyAny, PyAnyMethods, PyIterator, PyList, PyMapping, PyTuple, PyTupleMethods}; use crate::{ffi, BoundObject, IntoPyObject, Python}; +#[cfg(Py_GIL_DISABLED)] +use std::ops::ControlFlow; /// Represents a Python `dict`. /// @@ -470,7 +472,7 @@ impl<'py> Iterator for BoundDictIterator<'py> { let mut section = unsafe { std::mem::zeroed() }; #[cfg(Py_GIL_DISABLED)] unsafe { - ffi::PyCriticalSection_Begin(&mut section, op) + ffi::PyCriticalSection_Begin(&mut section, dict.as_ptr()); }; let ma_used = dict_len(dict); @@ -542,21 +544,14 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(B, Self::Item) -> B, { - let op = match self { - BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), - BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), - }; - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, op) }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; let mut accum = init; for x in &mut self { accum = f(accum, x); } - unsafe { - ffi::PyCriticalSection_End(&mut section); - } + unsafe { ffi::PyCriticalSection_End(&mut section) }; accum } @@ -568,15 +563,8 @@ impl<'py> Iterator for BoundDictIterator<'py> { F: FnMut(B, Self::Item) -> R, R: std::ops::Try, { - use std::ops::ControlFlow; - - let op = match self { - BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), - BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), - }; - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, op) }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; let mut accum = init; @@ -589,11 +577,150 @@ impl<'py> Iterator for BoundDictIterator<'py> { } } } - unsafe { - ffi::PyCriticalSection_End(&mut section); - } + unsafe { ffi::PyCriticalSection_End(&mut section) }; R::from_output(accum) } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn all(&mut self, f: F) -> bool + where + Self: Sized, + F: FnMut(Self::Item) -> bool, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + #[inline] + fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { + move |(), x| { + if f(x) { + ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) + } + } + } + let result = self.try_fold((), check(f)) == ControlFlow::Continue(()); + unsafe { ffi::PyCriticalSection_End(&mut section) }; + result + } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn any(&mut self, f: F) -> bool + where + Self: Sized, + F: FnMut(Self::Item) -> bool, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + #[inline] + fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { + move |(), x| { + if f(x) { + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) + } + } + } + + let result = self.try_fold((), check(f)) == ControlFlow::Break(()); + unsafe { ffi::PyCriticalSection_End(&mut section) }; + result + } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn find

(&mut self, predicate: P) -> Option + where + Self: Sized, + P: FnMut(&Self::Item) -> bool, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + #[inline] + fn check(mut predicate: impl FnMut(&T) -> bool) -> impl FnMut((), T) -> ControlFlow { + move |(), x| { + if predicate(&x) { + ControlFlow::Break(x) + } else { + ControlFlow::Continue(()) + } + } + } + + let result = match self.try_fold((), check(predicate)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + }; + unsafe { ffi::PyCriticalSection_End(&mut section) }; + result + } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn find_map(&mut self, f: F) -> Option + where + Self: Sized, + F: FnMut(Self::Item) -> Option, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + #[inline] + fn check(mut f: impl FnMut(T) -> Option) -> impl FnMut((), T) -> ControlFlow { + move |(), x| match f(x) { + Some(x) => ControlFlow::Break(x), + None => ControlFlow::Continue(()), + } + } + + let result = match self.try_fold((), check(f)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + }; + unsafe { ffi::PyCriticalSection_End(&mut section) }; + result + } + + #[inline] + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn position

(&mut self, predicate: P) -> Option + where + Self: Sized, + P: FnMut(Self::Item) -> bool, + { + let mut section = unsafe { std::mem::zeroed() }; + unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; + + #[inline] + fn check<'a, T>( + mut predicate: impl FnMut(T) -> bool + 'a, + acc: &'a mut usize, + ) -> impl FnMut((), T) -> ControlFlow + 'a { + move |_, x| { + if predicate(x) { + ControlFlow::Break(*acc) + } else { + *acc += 1; + ControlFlow::Continue(()) + } + } + } + + let mut acc = 0; + let result = match self.try_fold((), check(predicate, &mut acc)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + }; + + unsafe { ffi::PyCriticalSection_End(&mut section) }; + result + } } impl ExactSizeIterator for BoundDictIterator<'_> { @@ -622,6 +749,15 @@ impl<'py> BoundDictIterator<'py> { let iter = PyIterator::from_object(&items).unwrap(); BoundDictIterator::ItemIter { iter, remaining } } + + #[inline] + #[cfg(Py_GIL_DISABLED)] + fn as_ptr(&self) -> *mut ffi::PyObject { + match self { + BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), + BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), + } + } } impl<'py> IntoIterator for Bound<'py, PyDict> { From 2996df7fd05d282c0906dbd8833134a72437cdb2 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Sun, 29 Sep 2024 17:36:16 +0200 Subject: [PATCH 10/16] Add changelog --- newsfragments/4439.changed.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 newsfragments/4439.changed.md diff --git a/newsfragments/4439.changed.md b/newsfragments/4439.changed.md new file mode 100644 index 00000000000..9cb01a4d2b8 --- /dev/null +++ b/newsfragments/4439.changed.md @@ -0,0 +1,3 @@ +* Make `PyDict` iterator compatible with free-threaded build +* Added `PyDict::locked_for_each` method to iterate on free-threaded builds to prevent the dict being mutated during iteration +* Iterate over `dict.items()` when dict is subclassed from `PyDict` From 5bdbfb2f5971cf4b2d821157b43739e9d0d7224b Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Sun, 29 Sep 2024 22:06:33 +0200 Subject: [PATCH 11/16] Use critical section wrapper --- src/types/dict.rs | 267 ++++++++++++++++++++-------------------------- 1 file changed, 118 insertions(+), 149 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 0a0a74e04b8..8322e291f7a 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -366,23 +366,22 @@ impl<'py> PyDictMethods<'py> for Bound<'py, PyDict> { BoundDictIterator::new(self.clone()) } - #[cfg(Py_GIL_DISABLED)] - fn locked_for_each(&self, closure: F) -> PyResult<()> + #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn locked_for_each(&self, f: F) -> PyResult<()> where F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - for (key, value) in self { - if let Err(err) = closure(key, value) { - unsafe { ffi::PyCriticalSection_End(&mut section) }; - return Err(err); - } + #[cfg(feature = "nightly")] + { + self.iter().try_for_each(|(key, value)| f(key, value)) } - unsafe { ffi::PyCriticalSection_End(&mut section) }; - Ok(()) + #[cfg(not(feature = "nightly"))] + { + crate::sync::with_critical_section(self, || { + self.iter().try_for_each(|(key, value)| f(key, value)) + }) + } } fn as_mapping(&self) -> &Bound<'py, PyMapping> { @@ -452,10 +451,13 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] fn next(&mut self) -> Option { - match self { - BoundDictIterator::ItemIter { iter, remaining } => { + self.with_critical_section(|iter| match iter { + BoundDictIterator::ItemIter { + iter: ref mut py_iter, + ref mut remaining, + } => { *remaining = remaining.saturating_sub(1); - iter.next().map(Result::unwrap).map(|tuple| { + py_iter.next().map(Result::unwrap).map(|tuple| { let tuple = tuple.downcast::().unwrap(); let key = tuple.get_item(0).unwrap(); let value = tuple.get_item(1).unwrap(); @@ -463,18 +465,11 @@ impl<'py> Iterator for BoundDictIterator<'py> { }) } BoundDictIterator::DictIter { - dict, - ppos, - di_used, - remaining, + ref mut dict, + ref mut ppos, + ref mut di_used, + ref mut remaining, } => { - #[cfg(Py_GIL_DISABLED)] - let mut section = unsafe { std::mem::zeroed() }; - #[cfg(Py_GIL_DISABLED)] - unsafe { - ffi::PyCriticalSection_Begin(&mut section, dict.as_ptr()); - }; - let ma_used = dict_len(dict); // These checks are similar to what CPython does. @@ -504,10 +499,7 @@ impl<'py> Iterator for BoundDictIterator<'py> { let mut key: *mut ffi::PyObject = std::ptr::null_mut(); let mut value: *mut ffi::PyObject = std::ptr::null_mut(); - let result = if unsafe { - ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) - } != 0 - { + if unsafe { ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) } != 0 { *remaining -= 1; let py = dict.py(); // Safety: @@ -519,16 +511,9 @@ impl<'py> Iterator for BoundDictIterator<'py> { )) } else { None - }; - - #[cfg(Py_GIL_DISABLED)] - unsafe { - ffi::PyCriticalSection_End(&mut section); } - - result } - } + }) } #[inline] @@ -544,15 +529,13 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(B, Self::Item) -> B, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - let mut accum = init; - for x in &mut self { - accum = f(accum, x); - } - unsafe { ffi::PyCriticalSection_End(&mut section) }; - accum + self.with_critical_section(|mut iter| { + let mut accum = init; + for x in &mut iter { + accum = f(accum, x); + } + accum + }) } #[inline] @@ -563,22 +546,13 @@ impl<'py> Iterator for BoundDictIterator<'py> { F: FnMut(B, Self::Item) -> R, R: std::ops::Try, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - let mut accum = init; - - for x in &mut self { - match f(accum, x).branch() { - ControlFlow::Continue(a) => accum = a, - ControlFlow::Break(err) => { - unsafe { ffi::PyCriticalSection_End(&mut section) } - return R::from_residual(err); - } + self.with_critical_section(|mut iter| { + let mut accum = init; + for x in &mut iter { + accum = f(accum, x)? } - } - unsafe { ffi::PyCriticalSection_End(&mut section) }; - R::from_output(accum) + R::from_output(accum) + }) } #[inline] @@ -588,22 +562,19 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(Self::Item) -> bool, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - #[inline] - fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { - move |(), x| { - if f(x) { - ControlFlow::Continue(()) - } else { - ControlFlow::Break(()) + self.with_critical_section(|iter| { + #[inline] + fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { + move |(), x| { + if f(x) { + ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) + } } } - } - let result = self.try_fold((), check(f)) == ControlFlow::Continue(()); - unsafe { ffi::PyCriticalSection_End(&mut section) }; - result + iter.try_fold((), check(f)) == ControlFlow::Continue(()) + }) } #[inline] @@ -613,23 +584,20 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(Self::Item) -> bool, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - #[inline] - fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { - move |(), x| { - if f(x) { - ControlFlow::Break(()) - } else { - ControlFlow::Continue(()) + self.with_critical_section(|iter| { + #[inline] + fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { + move |(), x| { + if f(x) { + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) + } } } - } - let result = self.try_fold((), check(f)) == ControlFlow::Break(()); - unsafe { ffi::PyCriticalSection_End(&mut section) }; - result + iter.try_fold((), check(f)) == ControlFlow::Break(()) + }) } #[inline] @@ -639,26 +607,25 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, P: FnMut(&Self::Item) -> bool, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - #[inline] - fn check(mut predicate: impl FnMut(&T) -> bool) -> impl FnMut((), T) -> ControlFlow { - move |(), x| { - if predicate(&x) { - ControlFlow::Break(x) - } else { - ControlFlow::Continue(()) + self.with_critical_section(|iter| { + #[inline] + fn check( + mut predicate: impl FnMut(&T) -> bool, + ) -> impl FnMut((), T) -> ControlFlow { + move |(), x| { + if predicate(&x) { + ControlFlow::Break(x) + } else { + ControlFlow::Continue(()) + } } } - } - let result = match self.try_fold((), check(predicate)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), - }; - unsafe { ffi::PyCriticalSection_End(&mut section) }; - result + match iter.try_fold((), check(predicate)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + } + }) } #[inline] @@ -668,23 +635,22 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(Self::Item) -> Option, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - #[inline] - fn check(mut f: impl FnMut(T) -> Option) -> impl FnMut((), T) -> ControlFlow { - move |(), x| match f(x) { - Some(x) => ControlFlow::Break(x), - None => ControlFlow::Continue(()), + self.with_critical_section(|iter| { + #[inline] + fn check( + mut f: impl FnMut(T) -> Option, + ) -> impl FnMut((), T) -> ControlFlow { + move |(), x| match f(x) { + Some(x) => ControlFlow::Break(x), + None => ControlFlow::Continue(()), + } } - } - let result = match self.try_fold((), check(f)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), - }; - unsafe { ffi::PyCriticalSection_End(&mut section) }; - result + match iter.try_fold((), check(f)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + } + }) } #[inline] @@ -694,32 +660,28 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, P: FnMut(Self::Item) -> bool, { - let mut section = unsafe { std::mem::zeroed() }; - unsafe { ffi::PyCriticalSection_Begin(&mut section, self.as_ptr()) }; - - #[inline] - fn check<'a, T>( - mut predicate: impl FnMut(T) -> bool + 'a, - acc: &'a mut usize, - ) -> impl FnMut((), T) -> ControlFlow + 'a { - move |_, x| { - if predicate(x) { - ControlFlow::Break(*acc) - } else { - *acc += 1; - ControlFlow::Continue(()) + self.with_critical_section(|iter| { + #[inline] + fn check<'a, T>( + mut predicate: impl FnMut(T) -> bool + 'a, + acc: &'a mut usize, + ) -> impl FnMut((), T) -> ControlFlow + 'a { + move |_, x| { + if predicate(x) { + ControlFlow::Break(*acc) + } else { + *acc += 1; + ControlFlow::Continue(()) + } } } - } - let mut acc = 0; - let result = match self.try_fold((), check(predicate, &mut acc)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), - }; - - unsafe { ffi::PyCriticalSection_End(&mut section) }; - result + let mut acc = 0; + match iter.try_fold((), check(predicate, &mut acc)) { + ControlFlow::Continue(_) => None, + ControlFlow::Break(x) => Some(x), + } + }) } } @@ -751,11 +713,18 @@ impl<'py> BoundDictIterator<'py> { } #[inline] - #[cfg(Py_GIL_DISABLED)] - fn as_ptr(&self) -> *mut ffi::PyObject { + fn with_critical_section(&mut self, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { match self { - BoundDictIterator::ItemIter { ref iter, .. } => iter.as_ptr(), - BoundDictIterator::DictIter { ref dict, .. } => dict.as_ptr(), + BoundDictIterator::ItemIter { .. } => f(self), + #[cfg(not(Py_GIL_DISABLED))] + BoundDictIterator::DictIter { .. } => f(self), + #[cfg(Py_GIL_DISABLED)] + BoundDictIterator::DictIter { ref dict, .. } => { + crate::sync::with_critical_section(dict.clone().as_ref(), || f(self)) + } } } } From a656c8bc91f6763a7316ed115bf05258785f6193 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Mon, 21 Oct 2024 16:58:15 +0200 Subject: [PATCH 12/16] Make `dict::locked_for_each` available on all builds --- src/types/dict.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 8322e291f7a..8c2dd3b812e 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -184,7 +184,6 @@ pub trait PyDictMethods<'py>: crate::sealed::Sealed { /// This is useful when the GIL is disabled and the dictionary is shared between threads. /// It is not guaranteed that the dictionary will not be modified during iteration when the /// closure calls arbitrary Python code that releases the current critical section. - #[cfg(Py_GIL_DISABLED)] fn locked_for_each(&self, closure: F) -> PyResult<()> where F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>; @@ -365,14 +364,15 @@ impl<'py> PyDictMethods<'py> for Bound<'py, PyDict> { fn iter(&self) -> BoundDictIterator<'py> { BoundDictIterator::new(self.clone()) } - - #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] + fn locked_for_each(&self, f: F) -> PyResult<()> where F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>, { #[cfg(feature = "nightly")] { + // We don't need a critical section when the nightly feature is enabled because + // try_for_each is locked by the implementation of try_fold. self.iter().try_for_each(|(key, value)| f(key, value)) } From d6d029c929cbf007490438ce04a9139d56611aea Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 22 Oct 2024 11:08:34 +0200 Subject: [PATCH 13/16] Remove item() iter --- src/types/dict.rs | 202 ++++++++++++++-------------------------------- 1 file changed, 59 insertions(+), 143 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 8c2dd3b812e..56828db9d1a 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -3,7 +3,7 @@ use crate::ffi::Py_ssize_t; use crate::ffi_ptr_ext::FfiPtrExt; use crate::instance::{Borrowed, Bound}; use crate::py_result_ext::PyResultExt; -use crate::types::{PyAny, PyAnyMethods, PyIterator, PyList, PyMapping, PyTuple, PyTupleMethods}; +use crate::types::{PyAny, PyAnyMethods, PyList, PyMapping}; use crate::{ffi, BoundObject, IntoPyObject, Python}; #[cfg(Py_GIL_DISABLED)] use std::ops::ControlFlow; @@ -364,7 +364,7 @@ impl<'py> PyDictMethods<'py> for Bound<'py, PyDict> { fn iter(&self) -> BoundDictIterator<'py> { BoundDictIterator::new(self.clone()) } - + fn locked_for_each(&self, f: F) -> PyResult<()> where F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>, @@ -428,22 +428,11 @@ fn dict_len(dict: &Bound<'_, PyDict>) -> Py_ssize_t { } /// PyO3 implementation of an iterator for a Python `dict` object. -pub enum BoundDictIterator<'py> { - /// Iterator over the items of the dictionary, obtained by calling the python method `items()`. - #[allow(missing_docs)] - ItemIter { - iter: Bound<'py, PyIterator>, - remaining: ffi::Py_ssize_t, - }, - /// Iterator over the items of the dictionary, using the C-API `PyDict_Next`. This variant is - /// only used when the dictionary is an exact instance of `PyDict`. - #[allow(missing_docs)] - DictIter { - dict: Bound<'py, PyDict>, - ppos: ffi::Py_ssize_t, - di_used: ffi::Py_ssize_t, - remaining: ffi::Py_ssize_t, - }, +pub struct BoundDictIterator<'py> { + dict: Bound<'py, PyDict>, + ppos: ffi::Py_ssize_t, + di_used: ffi::Py_ssize_t, + remaining: ffi::Py_ssize_t, } impl<'py> Iterator for BoundDictIterator<'py> { @@ -451,67 +440,50 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] fn next(&mut self) -> Option { - self.with_critical_section(|iter| match iter { - BoundDictIterator::ItemIter { - iter: ref mut py_iter, - ref mut remaining, - } => { - *remaining = remaining.saturating_sub(1); - py_iter.next().map(Result::unwrap).map(|tuple| { - let tuple = tuple.downcast::().unwrap(); - let key = tuple.get_item(0).unwrap(); - let value = tuple.get_item(1).unwrap(); - (key, value) - }) - } - BoundDictIterator::DictIter { - ref mut dict, - ref mut ppos, - ref mut di_used, - ref mut remaining, - } => { - let ma_used = dict_len(dict); - - // These checks are similar to what CPython does. - // - // If the dimension of the dict changes e.g. key-value pairs are removed - // or added during iteration, this will panic next time when `next` is called - if *di_used != ma_used { - *di_used = -1; - panic!("dictionary changed size during iteration"); - }; + crate::sync::with_critical_section(self.dict.as_any(), || { + let ma_used = dict_len(&self.dict); + + // These checks are similar to what CPython does. + // + // If the dimension of the dict changes e.g. key-value pairs are removed + // or added during iteration, this will panic next time when `next` is called + if self.di_used != ma_used { + self.di_used = -1; + panic!("dictionary changed size during iteration"); + }; - // If the dict is changed in such a way that the length remains constant - // then this will panic at the end of iteration - similar to this: - // - // d = {"a":1, "b":2, "c": 3} - // - // for k, v in d.items(): - // d[f"{k}_"] = 4 - // del d[k] - // print(k) - // - if *remaining == -1 { - *di_used = -1; - panic!("dictionary keys changed during iteration"); - }; + // If the dict is changed in such a way that the length remains constant + // then this will panic at the end of iteration - similar to this: + // + // d = {"a":1, "b":2, "c": 3} + // + // for k, v in d.items(): + // d[f"{k}_"] = 4 + // del d[k] + // print(k) + // + if self.remaining == -1 { + self.di_used = -1; + panic!("dictionary keys changed during iteration"); + }; - let mut key: *mut ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut ffi::PyObject = std::ptr::null_mut(); - - if unsafe { ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) } != 0 { - *remaining -= 1; - let py = dict.py(); - // Safety: - // - PyDict_Next returns borrowed values - // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null - Some(( - unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), - unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), - )) - } else { - None - } + let mut key: *mut ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut ffi::PyObject = std::ptr::null_mut(); + + if unsafe { ffi::PyDict_Next(self.dict.as_ptr(), &mut self.ppos, &mut key, &mut value) } + != 0 + { + self.remaining -= 1; + let py = self.dict.py(); + // Safety: + // - PyDict_Next returns borrowed values + // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null + Some(( + unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), + unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), + )) + } else { + None } }) } @@ -687,10 +659,7 @@ impl<'py> Iterator for BoundDictIterator<'py> { impl ExactSizeIterator for BoundDictIterator<'_> { fn len(&self) -> usize { - match self { - BoundDictIterator::ItemIter { remaining, .. } => *remaining as usize, - BoundDictIterator::DictIter { remaining, .. } => *remaining as usize, - } + self.remaining as usize } } @@ -698,34 +667,21 @@ impl<'py> BoundDictIterator<'py> { fn new(dict: Bound<'py, PyDict>) -> Self { let remaining = dict_len(&dict); - if dict.is_exact_instance_of::() { - return BoundDictIterator::DictIter { - dict, - ppos: 0, - di_used: remaining, - remaining, - }; - }; - - let items = dict.call_method0(intern!(dict.py(), "items")).unwrap(); - let iter = PyIterator::from_object(&items).unwrap(); - BoundDictIterator::ItemIter { iter, remaining } + Self { + dict, + ppos: 0, + di_used: remaining, + remaining, + } } + #[cfg(Py_GIL_DISABLED)] #[inline] fn with_critical_section(&mut self, f: F) -> R where F: FnOnce(&mut Self) -> R, { - match self { - BoundDictIterator::ItemIter { .. } => f(self), - #[cfg(not(Py_GIL_DISABLED))] - BoundDictIterator::DictIter { .. } => f(self), - #[cfg(Py_GIL_DISABLED)] - BoundDictIterator::DictIter { ref dict, .. } => { - crate::sync::with_critical_section(dict.clone().as_ref(), || f(self)) - } - } + crate::sync::with_critical_section(&self.dict.clone(), || f(self)) } } @@ -871,9 +827,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::tests::common::generate_unique_module_name; - use crate::types::{PyModule, PyTuple}; - use pyo3_ffi::c_str; + use crate::types::PyTuple; use std::collections::{BTreeMap, HashMap}; #[test] @@ -1184,44 +1138,6 @@ mod tests { }); } - #[test] - fn test_iter_subclass() { - Python::with_gil(|py| { - let mut v = HashMap::new(); - v.insert(7, 32); - v.insert(8, 42); - v.insert(9, 123); - let dict = v.into_pyobject(py).unwrap(); - - let module = PyModule::from_code( - py, - c_str!("class DictSubclass(dict): pass"), - c_str!("test.py"), - &generate_unique_module_name("test"), - ) - .unwrap(); - - let subclass = module - .getattr("DictSubclass") - .unwrap() - .call1((dict,)) - .unwrap() - .downcast_into::() - .unwrap(); - - let mut key_sum = 0; - let mut value_sum = 0; - let iter = subclass.iter(); - assert!(matches!(iter, BoundDictIterator::ItemIter { .. })); - for (key, value) in iter { - key_sum += key.extract::().unwrap(); - value_sum += value.extract::().unwrap(); - } - assert_eq!(7 + 8 + 9, key_sum); - assert_eq!(32 + 42 + 123, value_sum); - }); - } - #[test] fn test_iter_bound() { Python::with_gil(|py| { From 080499626be29ea975ccaafe1234e645db2cc887 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 22 Oct 2024 12:01:03 +0200 Subject: [PATCH 14/16] Add tests for `PyDict::iter()` reducers --- src/types/dict.rs | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/types/dict.rs b/src/types/dict.rs index 56828db9d1a..cd2cc3f4fad 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -1561,4 +1561,107 @@ mod tests { ); }) } + + #[test] + fn test_iter_all() { + Python::with_gil(|py| { + let dict = [(1, true), (2, true), (3, true)].into_py_dict(py).unwrap(); + assert!(dict.iter().all(|(_, v)| v.extract::().unwrap())); + + let dict = [(1, true), (2, false), (3, true)].into_py_dict(py).unwrap(); + assert!(!dict.iter().all(|(_, v)| v.extract::().unwrap())); + }); + } + + #[test] + fn test_iter_any() { + Python::with_gil(|py| { + let dict = [(1, true), (2, false), (3, false)] + .into_py_dict(py) + .unwrap(); + assert!(dict.iter().any(|(_, v)| v.extract::().unwrap())); + + let dict = [(1, false), (2, false), (3, false)] + .into_py_dict(py) + .unwrap(); + assert!(!dict.iter().any(|(_, v)| v.extract::().unwrap())); + }); + } + + #[test] + #[allow(clippy::search_is_some)] + fn test_iter_find() { + Python::with_gil(|py| { + let dict = [(1, false), (2, true), (3, false)] + .into_py_dict(py) + .unwrap(); + + assert_eq!( + Some((2, true)), + dict.iter() + .find(|(_, v)| v.extract::().unwrap()) + .map(|(k, v)| (k.extract().unwrap(), v.extract().unwrap())) + ); + + let dict = [(1, false), (2, false), (3, false)] + .into_py_dict(py) + .unwrap(); + + assert!(dict + .iter() + .find(|(_, v)| v.extract::().unwrap()) + .is_none()); + }); + } + + #[test] + #[allow(clippy::search_is_some)] + fn test_iter_position() { + Python::with_gil(|py| { + let dict = [(1, false), (2, false), (3, true)] + .into_py_dict(py) + .unwrap(); + assert_eq!( + Some(2), + dict.iter().position(|(_, v)| v.extract::().unwrap()) + ); + + let dict = [(1, false), (2, false), (3, false)] + .into_py_dict(py) + .unwrap(); + assert!(dict + .iter() + .position(|(_, v)| v.extract::().unwrap()) + .is_none()); + }); + } + + #[test] + fn test_iter_fold() { + Python::with_gil(|py| { + let dict = [(1, 1), (2, 2), (3, 3)].into_py_dict(py).unwrap(); + let sum = dict + .iter() + .fold(0, |acc, (_, v)| acc + v.extract::().unwrap()); + assert_eq!(sum, 6); + }); + } + + #[test] + fn test_iter_try_fold() { + Python::with_gil(|py| { + let dict = [(1, 1), (2, 2), (3, 3)].into_py_dict(py).unwrap(); + let sum = dict + .iter() + .try_fold(0, |acc, (_, v)| PyResult::Ok(acc + v.extract::()?)) + .unwrap(); + assert_eq!(sum, 6); + + let dict = [(1, "foo"), (2, "bar")].into_py_dict(py).unwrap(); + assert!(dict + .iter() + .try_fold(0, |acc, (_, v)| PyResult::Ok(acc + v.extract::()?)) + .is_err()); + }); + } } From 3d1ec80212a4e460d62bb595525130b6e8ccc7dd Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 22 Oct 2024 14:35:37 +0200 Subject: [PATCH 15/16] Add more docs to locked_for_each --- src/types/dict.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/types/dict.rs b/src/types/dict.rs index cd2cc3f4fad..56260df2616 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -184,6 +184,11 @@ pub trait PyDictMethods<'py>: crate::sealed::Sealed { /// This is useful when the GIL is disabled and the dictionary is shared between threads. /// It is not guaranteed that the dictionary will not be modified during iteration when the /// closure calls arbitrary Python code that releases the current critical section. + /// + /// This method is a small performance optimization over `.iter().try_for_each()` when the + /// nightly feature is not enabled because we cannot implement an optimised version of + /// `iter().try_fold()` on stable yet. If your iteration is infallible then this method has the + /// same performance as `.iter().for_each()`. fn locked_for_each(&self, closure: F) -> PyResult<()> where F: Fn(Bound<'py, PyAny>, Bound<'py, PyAny>) -> PyResult<()>; From e76c7b331e08f30ab1c023a76b9d7d44dc75d417 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 22 Oct 2024 17:29:27 +0200 Subject: [PATCH 16/16] Move iter implementation into inner struct --- src/types/dict.rs | 267 ++++++++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 142 deletions(-) diff --git a/src/types/dict.rs b/src/types/dict.rs index 56260df2616..6987e4525ec 100644 --- a/src/types/dict.rs +++ b/src/types/dict.rs @@ -5,8 +5,6 @@ use crate::instance::{Borrowed, Bound}; use crate::py_result_ext::PyResultExt; use crate::types::{PyAny, PyAnyMethods, PyList, PyMapping}; use crate::{ffi, BoundObject, IntoPyObject, Python}; -#[cfg(Py_GIL_DISABLED)] -use std::ops::ControlFlow; /// Represents a Python `dict`. /// @@ -435,9 +433,86 @@ fn dict_len(dict: &Bound<'_, PyDict>) -> Py_ssize_t { /// PyO3 implementation of an iterator for a Python `dict` object. pub struct BoundDictIterator<'py> { dict: Bound<'py, PyDict>, - ppos: ffi::Py_ssize_t, - di_used: ffi::Py_ssize_t, - remaining: ffi::Py_ssize_t, + inner: DictIterImpl, +} + +enum DictIterImpl { + DictIter { + ppos: ffi::Py_ssize_t, + di_used: ffi::Py_ssize_t, + remaining: ffi::Py_ssize_t, + }, +} + +impl DictIterImpl { + #[inline] + fn next<'py>( + &mut self, + dict: &Bound<'py, PyDict>, + ) -> Option<(Bound<'py, PyAny>, Bound<'py, PyAny>)> { + match self { + Self::DictIter { + di_used, + remaining, + ppos, + .. + } => crate::sync::with_critical_section(dict, || { + let ma_used = dict_len(dict); + + // These checks are similar to what CPython does. + // + // If the dimension of the dict changes e.g. key-value pairs are removed + // or added during iteration, this will panic next time when `next` is called + if *di_used != ma_used { + *di_used = -1; + panic!("dictionary changed size during iteration"); + }; + + // If the dict is changed in such a way that the length remains constant + // then this will panic at the end of iteration - similar to this: + // + // d = {"a":1, "b":2, "c": 3} + // + // for k, v in d.items(): + // d[f"{k}_"] = 4 + // del d[k] + // print(k) + // + if *remaining == -1 { + *di_used = -1; + panic!("dictionary keys changed during iteration"); + }; + + let mut key: *mut ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut ffi::PyObject = std::ptr::null_mut(); + + if unsafe { ffi::PyDict_Next(dict.as_ptr(), ppos, &mut key, &mut value) } != 0 { + *remaining -= 1; + let py = dict.py(); + // Safety: + // - PyDict_Next returns borrowed values + // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null + Some(( + unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), + unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), + )) + } else { + None + } + }), + } + } + + #[cfg(Py_GIL_DISABLED)] + #[inline] + fn with_critical_section(&mut self, dict: &Bound<'_, PyDict>, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + match self { + Self::DictIter { .. } => crate::sync::with_critical_section(dict, || f(self)), + } + } } impl<'py> Iterator for BoundDictIterator<'py> { @@ -445,52 +520,7 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] fn next(&mut self) -> Option { - crate::sync::with_critical_section(self.dict.as_any(), || { - let ma_used = dict_len(&self.dict); - - // These checks are similar to what CPython does. - // - // If the dimension of the dict changes e.g. key-value pairs are removed - // or added during iteration, this will panic next time when `next` is called - if self.di_used != ma_used { - self.di_used = -1; - panic!("dictionary changed size during iteration"); - }; - - // If the dict is changed in such a way that the length remains constant - // then this will panic at the end of iteration - similar to this: - // - // d = {"a":1, "b":2, "c": 3} - // - // for k, v in d.items(): - // d[f"{k}_"] = 4 - // del d[k] - // print(k) - // - if self.remaining == -1 { - self.di_used = -1; - panic!("dictionary keys changed during iteration"); - }; - - let mut key: *mut ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut ffi::PyObject = std::ptr::null_mut(); - - if unsafe { ffi::PyDict_Next(self.dict.as_ptr(), &mut self.ppos, &mut key, &mut value) } - != 0 - { - self.remaining -= 1; - let py = self.dict.py(); - // Safety: - // - PyDict_Next returns borrowed values - // - we have already checked that `PyDict_Next` succeeded, so we can assume these to be non-null - Some(( - unsafe { key.assume_borrowed_unchecked(py) }.to_owned(), - unsafe { value.assume_borrowed_unchecked(py) }.to_owned(), - )) - } else { - None - } - }) + self.inner.next(&self.dict) } #[inline] @@ -506,9 +536,9 @@ impl<'py> Iterator for BoundDictIterator<'py> { Self: Sized, F: FnMut(B, Self::Item) -> B, { - self.with_critical_section(|mut iter| { + self.inner.with_critical_section(&self.dict, |inner| { let mut accum = init; - for x in &mut iter { + while let Some(x) = inner.next(&self.dict) { accum = f(accum, x); } accum @@ -523,9 +553,9 @@ impl<'py> Iterator for BoundDictIterator<'py> { F: FnMut(B, Self::Item) -> R, R: std::ops::Try, { - self.with_critical_section(|mut iter| { + self.inner.with_critical_section(&self.dict, |inner| { let mut accum = init; - for x in &mut iter { + while let Some(x) = inner.next(&self.dict) { accum = f(accum, x)? } R::from_output(accum) @@ -534,137 +564,97 @@ impl<'py> Iterator for BoundDictIterator<'py> { #[inline] #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] - fn all(&mut self, f: F) -> bool + fn all(&mut self, mut f: F) -> bool where Self: Sized, F: FnMut(Self::Item) -> bool, { - self.with_critical_section(|iter| { - #[inline] - fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { - move |(), x| { - if f(x) { - ControlFlow::Continue(()) - } else { - ControlFlow::Break(()) - } + self.inner.with_critical_section(&self.dict, |inner| { + while let Some(x) = inner.next(&self.dict) { + if !f(x) { + return false; } } - iter.try_fold((), check(f)) == ControlFlow::Continue(()) + true }) } #[inline] #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] - fn any(&mut self, f: F) -> bool + fn any(&mut self, mut f: F) -> bool where Self: Sized, F: FnMut(Self::Item) -> bool, { - self.with_critical_section(|iter| { - #[inline] - fn check(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> ControlFlow<()> { - move |(), x| { - if f(x) { - ControlFlow::Break(()) - } else { - ControlFlow::Continue(()) - } + self.inner.with_critical_section(&self.dict, |inner| { + while let Some(x) = inner.next(&self.dict) { + if f(x) { + return true; } } - - iter.try_fold((), check(f)) == ControlFlow::Break(()) + false }) } #[inline] #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] - fn find

(&mut self, predicate: P) -> Option + fn find

(&mut self, mut predicate: P) -> Option where Self: Sized, P: FnMut(&Self::Item) -> bool, { - self.with_critical_section(|iter| { - #[inline] - fn check( - mut predicate: impl FnMut(&T) -> bool, - ) -> impl FnMut((), T) -> ControlFlow { - move |(), x| { - if predicate(&x) { - ControlFlow::Break(x) - } else { - ControlFlow::Continue(()) - } + self.inner.with_critical_section(&self.dict, |inner| { + while let Some(x) = inner.next(&self.dict) { + if predicate(&x) { + return Some(x); } } - - match iter.try_fold((), check(predicate)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), - } + None }) } #[inline] #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] - fn find_map(&mut self, f: F) -> Option + fn find_map(&mut self, mut f: F) -> Option where Self: Sized, F: FnMut(Self::Item) -> Option, { - self.with_critical_section(|iter| { - #[inline] - fn check( - mut f: impl FnMut(T) -> Option, - ) -> impl FnMut((), T) -> ControlFlow { - move |(), x| match f(x) { - Some(x) => ControlFlow::Break(x), - None => ControlFlow::Continue(()), + self.inner.with_critical_section(&self.dict, |inner| { + while let Some(x) = inner.next(&self.dict) { + if let found @ Some(_) = f(x) { + return found; } } - - match iter.try_fold((), check(f)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), - } + None }) } #[inline] #[cfg(all(Py_GIL_DISABLED, not(feature = "nightly")))] - fn position

(&mut self, predicate: P) -> Option + fn position

(&mut self, mut predicate: P) -> Option where Self: Sized, P: FnMut(Self::Item) -> bool, { - self.with_critical_section(|iter| { - #[inline] - fn check<'a, T>( - mut predicate: impl FnMut(T) -> bool + 'a, - acc: &'a mut usize, - ) -> impl FnMut((), T) -> ControlFlow + 'a { - move |_, x| { - if predicate(x) { - ControlFlow::Break(*acc) - } else { - *acc += 1; - ControlFlow::Continue(()) - } - } - } - + self.inner.with_critical_section(&self.dict, |inner| { let mut acc = 0; - match iter.try_fold((), check(predicate, &mut acc)) { - ControlFlow::Continue(_) => None, - ControlFlow::Break(x) => Some(x), + while let Some(x) = inner.next(&self.dict) { + if predicate(x) { + return Some(acc); + } + acc += 1; } + None }) } } impl ExactSizeIterator for BoundDictIterator<'_> { fn len(&self) -> usize { - self.remaining as usize + match self.inner { + DictIterImpl::DictIter { remaining, .. } => remaining as usize, + } } } @@ -674,20 +664,13 @@ impl<'py> BoundDictIterator<'py> { Self { dict, - ppos: 0, - di_used: remaining, - remaining, + inner: DictIterImpl::DictIter { + ppos: 0, + di_used: remaining, + remaining, + }, } } - - #[cfg(Py_GIL_DISABLED)] - #[inline] - fn with_critical_section(&mut self, f: F) -> R - where - F: FnOnce(&mut Self) -> R, - { - crate::sync::with_critical_section(&self.dict.clone(), || f(self)) - } } impl<'py> IntoIterator for Bound<'py, PyDict> {