From b33986abaaf1094c8d0b90e51e2bdad64eb997c5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 24 Oct 2024 15:38:23 -0700 Subject: [PATCH] Make CALL_ALLOC_AND_ENTER_INIT thread-safe - Modify `get_init_for_simple_managed_python_class` to return both init as well as the type version at the time of lookup. - Modify caching logic to verify that the current version of the type matches the version at the time of lookup. This prevents potentially caching a stale value if we race with an update to __init__. - Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the borrowed reference to __init__ that is stored in the cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes: 1. The type version is cleared before the reference in the MRO to __init__ is destroyed. 2. If the reference in (1) was the last reference then the __init__ method will be queued for deletion the next time GC runs. 3. GC requires stopping the world, which forces a synchronizes-with operation between all threads. 4. If the GC collects the cached __init__, then type's version will have been updated *and* the update will be visible to all threads, so the guard cannot pass. - There are no escaping calls in between loading from the specialization cache and pushing the frame. This is a requirement for the default build. --- Include/internal/pycore_object.h | 2 ++ Objects/typeobject.c | 21 +++++++++++++++ Python/bytecodes.c | 4 +-- Python/specialize.c | 46 ++++++++++++++++++++------------ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 6bf81158197d09..8bd1edac33d545 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -777,6 +777,8 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, unsigned int *); +extern int _PyType_CacheInitForSpecialization(PyTypeObject *, PyObject *, + unsigned int); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 747deaa36d4560..135d5b87e2f8f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5643,6 +5643,27 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return res; } + +int +_PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init, + unsigned int tp_version) +{ + if (!init || !tp_version) { + return 0; + } + int can_cache = type->tp_version_tag == tp_version; + BEGIN_TYPE_LOCK(); + #ifdef Py_GIL_DISABLED + can_cache = can_cache && _PyObject_HasDeferredRefcount(init); + #endif + if (can_cache) { + PyHeapTypeObject *ht = (PyHeapTypeObject*) type; + FT_ATOMIC_STORE_PTR_RELAXED(ht->_spec_cache.init, init); + } + END_TYPE_LOCK(); + return can_cache; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e56d3c70c202b9..a8fd621cb4c7bc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3688,10 +3688,10 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null[0])); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); diff --git a/Python/specialize.c b/Python/specialize.c index 36677da2103f52..3acd38a951c3f1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1920,38 +1920,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD cache->counter = adaptive_counter_cooldown(); } -/* Returns a borrowed reference. - * The reference is only valid if guarded by a type version check. - */ -static PyFunctionObject * -get_init_for_simple_managed_python_class(PyTypeObject *tp) +/* Returns a strong reference. */ +static PyObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version) { assert(tp->tp_new == PyBaseObject_Type.tp_new); if (tp->tp_alloc != PyType_GenericAlloc) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN); return NULL; } - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { + unsigned long tp_flags = PyType_GetFlags(tp); + if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES); return NULL; } - if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) { /* Is this possible? */ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR); return NULL; } - PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__)); + PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version); if (init == NULL || !PyFunction_Check(init)) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON); + Py_XDECREF(init); return NULL; } int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE); + Py_DECREF(init); return NULL; } - ((PyHeapTypeObject *)tp)->_spec_cache.init = init; - return (PyFunctionObject *)init; + return init; } static void @@ -1984,21 +1984,23 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (Py_TYPE(tp) != &PyType_Type) { goto generic; } - #ifndef Py_GIL_DISABLED if (tp->tp_new == PyBaseObject_Type.tp_new) { - PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); - if (type_get_version(tp, CALL) == 0) { - unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION); + unsigned int tp_version = 0; + PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); + if (!tp_version) { + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + Py_XDECREF(init); return; } - if (init != NULL) { + if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) { _PyCallCache cache; - write_u32(cache.func_version, tp->tp_version_tag); + write_u32(cache.func_version, tp_version); specialize(instr, CALL_ALLOC_AND_ENTER_INIT, &cache); + Py_DECREF(init); return; } + Py_XDECREF(init); } - #endif generic: specialize(instr, CALL_NON_PY_GENERAL, NULL); } @@ -2808,6 +2810,13 @@ static const PyBytesObject no_location = { .ob_sval = { NO_LOC_4 } }; +#ifdef Py_GIL_DISABLED +static _PyCodeArray init_cleanup_tlbc = { + .size = 1, + .entries = {(char*) &_Py_InitCleanup.co_code_adaptive}, +}; +#endif + const struct _PyCode8 _Py_InitCleanup = { _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), @@ -2823,6 +2832,9 @@ const struct _PyCode8 _Py_InitCleanup = { ._co_firsttraceable = 4, .co_stacksize = 2, .co_framesize = 2 + FRAME_SPECIALS_SIZE, +#ifdef Py_GIL_DISABLED + .co_tlbc = &init_cleanup_tlbc, +#endif .co_code_adaptive = { EXIT_INIT_CHECK, 0, RETURN_VALUE, 0,