diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 5379ac3abba227..73adceb54ffcc9 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -679,6 +679,18 @@ xml.parsers.expat .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack +abc +--- + +* Reduce memory usage of :func:`issubclass` checks for classes inheriting abstract classes. + + :class:`abc.ABCMeta` hook ``__subclasscheck__`` now includes + a guard which is triggered then the hook is called from a parent class + (``issubclass(cls, RootClass)`` -> ``issubclass(cls, NestedClass)`` -> ...). + This guard prevents adding ``cls`` to ``NestedClass`` positive and negative caches, + preventing memory bloat in some cases (thousands of classes inherited from ABC). + + (Contributed by Maxim Martynov in :gh:`92810`.) zlib ---- diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index c31c33657002ec..3c1e53e123095e 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -45,6 +45,8 @@ extern "C" { _Py_atomic_load_uint16_relaxed(&value) #define FT_ATOMIC_LOAD_UINT32_RELAXED(value) \ _Py_atomic_load_uint32_relaxed(&value) +#define FT_ATOMIC_LOAD_UINT64_RELAXED(value) \ + _Py_atomic_load_uint64_relaxed(&value) #define FT_ATOMIC_LOAD_ULONG_RELAXED(value) \ _Py_atomic_load_ulong_relaxed(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ @@ -61,6 +63,8 @@ extern "C" { _Py_atomic_store_uint16_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) \ _Py_atomic_store_uint32_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT64_RELAXED(value, new_value) \ + _Py_atomic_store_uint64_relaxed(&value, new_value) #define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) \ _Py_atomic_store_char_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_CHAR_RELAXED(value) \ @@ -111,6 +115,8 @@ extern "C" { _Py_atomic_load_ullong_relaxed(&value) #define FT_ATOMIC_ADD_SSIZE(value, new_value) \ (void)_Py_atomic_add_ssize(&value, new_value) +#define FT_ATOMIC_ADD_UINT64(value, new_value) \ + (void)_Py_atomic_add_uint64(&value, new_value) #define FT_MUTEX_LOCK(lock) PyMutex_Lock(lock) #define FT_MUTEX_UNLOCK(lock) PyMutex_Unlock(lock) @@ -128,6 +134,7 @@ extern "C" { #define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT16_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT32_RELAXED(value) value +#define FT_ATOMIC_LOAD_UINT64_RELAXED(value) value #define FT_ATOMIC_LOAD_ULONG_RELAXED(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value @@ -136,6 +143,7 @@ extern "C" { #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT64_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_CHAR_RELAXED(value) value #define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) value @@ -161,6 +169,7 @@ extern "C" { #define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) value #define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_ADD_SSIZE(value, new_value) (void)(value += new_value) +#define FT_ATOMIC_ADD_UINT64(value, new_value) (void)(value += new_value) #define FT_MUTEX_LOCK(lock) do {} while (0) #define FT_MUTEX_UNLOCK(lock) do {} while (0) diff --git a/Lib/_py_abc.py b/Lib/_py_abc.py index c870ae9048b4f1..113adde22944bf 100644 --- a/Lib/_py_abc.py +++ b/Lib/_py_abc.py @@ -49,6 +49,7 @@ def __new__(mcls, name, bases, namespace, /, **kwargs): cls._abc_cache = WeakSet() cls._abc_negative_cache = WeakSet() cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + cls._abc_issubclasscheck_recursive = False return cls def register(cls, subclass): @@ -65,7 +66,10 @@ def register(cls, subclass): if issubclass(cls, subclass): # This would create a cycle, which is bad for the algorithm below raise RuntimeError("Refusing to create an inheritance cycle") + # Add registry entry cls._abc_registry.add(subclass) + # Automatically include cache entry + cls._abc_cache.add(subclass) ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache return subclass @@ -139,9 +143,25 @@ def __subclasscheck__(cls, subclass): return True # Check if it's a subclass of a subclass (recursive) for scls in cls.__subclasses__(): - if issubclass(subclass, scls): - cls._abc_cache.add(subclass) + # If inside recursive issubclass check, avoid adding classes + # to any cache because this may drastically increase memory usage. + # Unfortunately, issubclass/__subclasscheck__ don't accept third + # argument with context, so using global context within ABCMeta. + # This is done only on first method call, next will use cache anyway. + scls_is_abc = hasattr(scls, "_abc_issubclasscheck_recursive") + if scls_is_abc: + scls._abc_issubclasscheck_recursive = True + try: + # Perform recursive check + result = issubclass(subclass, scls) + finally: + if scls_is_abc: + scls._abc_issubclasscheck_recursive = False + if result: + if not cls._abc_issubclasscheck_recursive: + cls._abc_cache.add(subclass) return True # No dice; update negative cache - cls._abc_negative_cache.add(subclass) + if not cls._abc_issubclasscheck_recursive: + cls._abc_negative_cache.add(subclass) return False diff --git a/Lib/test/test_abc.py b/Lib/test/test_abc.py index 80ee9e0ba56e75..3cf7c7529e51ba 100644 --- a/Lib/test/test_abc.py +++ b/Lib/test/test_abc.py @@ -70,6 +70,25 @@ def foo(): return 4 class TestABC(unittest.TestCase): + def check_isinstance(self, obj, target_class): + self.assertIsInstance(obj, target_class) + self.assertIsInstance(obj, (target_class,)) + self.assertIsInstance(obj, target_class | target_class) + + def check_not_isinstance(self, obj, target_class): + self.assertNotIsInstance(obj, target_class) + self.assertNotIsInstance(obj, (target_class,)) + self.assertNotIsInstance(obj, target_class | target_class) + + def check_issubclass(self, klass, target_class): + self.assertIsSubclass(klass, target_class) + self.assertIsSubclass(klass, (target_class,)) + self.assertIsSubclass(klass, target_class | target_class) + + def check_not_issubclass(self, klass, target_class): + self.assertNotIsSubclass(klass, target_class) + self.assertNotIsSubclass(klass, (target_class,)) + self.assertNotIsSubclass(klass, target_class | target_class) def test_ABC_helper(self): # create an ABC using the helper class and perform basic checks @@ -270,29 +289,75 @@ def x(self): class C(metaclass=meta): pass + def test_isinstance_direct_inheritance(self): + class A(metaclass=abc_ABCMeta): + pass + class B(A): + pass + class C(A): + pass + + a = A() + b = B() + c = C() + # trigger caching + for _ in range(2): + self.check_isinstance(a, A) + self.check_not_isinstance(a, B) + self.check_not_isinstance(a, C) + + self.check_isinstance(b, B) + self.check_isinstance(b, A) + self.check_not_isinstance(b, C) + + self.check_isinstance(c, C) + self.check_isinstance(c, A) + self.check_not_isinstance(c, B) + + self.check_issubclass(B, A) + self.check_issubclass(C, A) + self.check_not_issubclass(B, C) + self.check_not_issubclass(C, B) + self.check_not_issubclass(A, B) + self.check_not_issubclass(A, C) + def test_registration_basics(self): class A(metaclass=abc_ABCMeta): pass class B(object): pass + + a = A() b = B() - self.assertNotIsSubclass(B, A) - self.assertNotIsSubclass(B, (A,)) - self.assertNotIsInstance(b, A) - self.assertNotIsInstance(b, (A,)) + # trigger caching + for _ in range(2): + self.check_not_issubclass(B, A) + self.check_not_isinstance(b, A) + + self.check_not_issubclass(A, B) + self.check_not_isinstance(a, B) + B1 = A.register(B) - self.assertIsSubclass(B, A) - self.assertIsSubclass(B, (A,)) - self.assertIsInstance(b, A) - self.assertIsInstance(b, (A,)) - self.assertIs(B1, B) + # trigger caching + for _ in range(2): + self.check_issubclass(B, A) + self.check_isinstance(b, A) + self.assertIs(B1, B) + + self.check_not_issubclass(A, B) + self.check_not_isinstance(a, B) + class C(B): pass + c = C() - self.assertIsSubclass(C, A) - self.assertIsSubclass(C, (A,)) - self.assertIsInstance(c, A) - self.assertIsInstance(c, (A,)) + # trigger caching + for _ in range(2): + self.check_issubclass(C, A) + self.check_isinstance(c, A) + + self.check_not_issubclass(A, C) + self.check_not_isinstance(a, C) def test_register_as_class_deco(self): class A(metaclass=abc_ABCMeta): @@ -377,39 +442,73 @@ class A(metaclass=abc_ABCMeta): pass self.assertIsSubclass(A, A) self.assertIsSubclass(A, (A,)) + class B(metaclass=abc_ABCMeta): pass self.assertNotIsSubclass(A, B) self.assertNotIsSubclass(A, (B,)) self.assertNotIsSubclass(B, A) self.assertNotIsSubclass(B, (A,)) + class C(metaclass=abc_ABCMeta): pass A.register(B) class B1(B): pass - self.assertIsSubclass(B1, A) - self.assertIsSubclass(B1, (A,)) + # trigger caching + for _ in range(2): + self.assertIsSubclass(B1, A) + self.assertIsSubclass(B1, (A,)) + class C1(C): pass B1.register(C1) - self.assertNotIsSubclass(C, B) - self.assertNotIsSubclass(C, (B,)) - self.assertNotIsSubclass(C, B1) - self.assertNotIsSubclass(C, (B1,)) - self.assertIsSubclass(C1, A) - self.assertIsSubclass(C1, (A,)) - self.assertIsSubclass(C1, B) - self.assertIsSubclass(C1, (B,)) - self.assertIsSubclass(C1, B1) - self.assertIsSubclass(C1, (B1,)) + # trigger caching + for _ in range(2): + self.assertNotIsSubclass(C, B) + self.assertNotIsSubclass(C, (B,)) + self.assertNotIsSubclass(C, B1) + self.assertNotIsSubclass(C, (B1,)) + self.assertIsSubclass(C1, A) + self.assertIsSubclass(C1, (A,)) + self.assertIsSubclass(C1, B) + self.assertIsSubclass(C1, (B,)) + self.assertIsSubclass(C1, B1) + self.assertIsSubclass(C1, (B1,)) + C1.register(int) class MyInt(int): pass - self.assertIsSubclass(MyInt, A) - self.assertIsSubclass(MyInt, (A,)) - self.assertIsInstance(42, A) - self.assertIsInstance(42, (A,)) + # trigger caching + for _ in range(2): + self.assertIsSubclass(MyInt, A) + self.assertIsSubclass(MyInt, (A,)) + self.assertIsInstance(42, A) + self.assertIsInstance(42, (A,)) + + def test_custom_subclasses(self): + class A: pass + class B: pass + + class Parent1(metaclass=abc_ABCMeta): + @classmethod + def __subclasses__(cls): + return [A] + + class Parent2(metaclass=abc_ABCMeta): + __subclasses__ = lambda: [A] + + # trigger caching + for _ in range(2): + self.check_isinstance(A(), Parent1) + self.check_issubclass(A, Parent1) + self.check_not_isinstance(B(), Parent1) + self.check_not_issubclass(B, Parent1) + + self.check_isinstance(A(), Parent2) + self.check_issubclass(A, Parent2) + self.check_not_isinstance(B(), Parent2) + self.check_not_issubclass(B, Parent2) def test_issubclass_bad_arguments(self): class A(metaclass=abc_ABCMeta): @@ -460,8 +559,32 @@ class S(metaclass=abc_ABCMeta): with self.assertRaisesRegex(CustomError, exc_msg): issubclass(int, S) - def test_subclasshook(self): + def test_issubclass_bad_class(self): class A(metaclass=abc.ABCMeta): + pass + + A._abc_impl = 1 + error_msg = "_abc_impl is set to a wrong type" + with self.assertRaisesRegex(TypeError, error_msg): + issubclass(A, A) + + class B(metaclass=_py_abc.ABCMeta): + pass + + B._abc_cache = 1 + error_msg = "argument of type 'int' is not a container or iterable" + with self.assertRaisesRegex(TypeError, error_msg): + issubclass(B, B) + + class C(metaclass=_py_abc.ABCMeta): + pass + + C._abc_negative_cache = 1 + with self.assertRaisesRegex(TypeError, error_msg): + issubclass(C, C) + + def test_subclasshook(self): + class A(metaclass=abc_ABCMeta): @classmethod def __subclasshook__(cls, C): if cls is A: @@ -478,6 +601,26 @@ class C: self.assertNotIsSubclass(C, A) self.assertNotIsSubclass(C, (A,)) + def test_subclasshook_exception(self): + # Check that issubclass() propagates exceptions raised by + # __subclasshook__. + class CustomError(Exception): ... + exc_msg = "exception from __subclasshook__" + class A(metaclass=abc_ABCMeta): + @classmethod + def __subclasshook__(cls, C): + raise CustomError(exc_msg) + with self.assertRaisesRegex(CustomError, exc_msg): + issubclass(A, A) + class B(A): + pass + with self.assertRaisesRegex(CustomError, exc_msg): + issubclass(B, A) + class C: + pass + with self.assertRaisesRegex(CustomError, exc_msg): + issubclass(C, A) + def test_all_new_methods_are_called(self): class A(metaclass=abc_ABCMeta): pass @@ -522,7 +665,6 @@ def foo(self): self.assertEqual(A.__abstractmethods__, set()) A() - def test_update_new_abstractmethods(self): class A(metaclass=abc_ABCMeta): @abc.abstractmethod diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index f440fc28ee7b7d..1786f052f37923 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -353,6 +353,28 @@ class B: with support.infinite_recursion(25): self.assertRaises(RecursionError, issubclass, X(), int) + def test_custom_subclasses_are_ignored(self): + class A: pass + class B: pass + + class Parent1: + @classmethod + def __subclasses__(cls): + return [A, B] + + class Parent2: + __subclasses__ = lambda: [A, B] + + self.assertNotIsInstance(A(), Parent1) + self.assertNotIsInstance(B(), Parent1) + self.assertNotIsSubclass(A, Parent1) + self.assertNotIsSubclass(B, Parent1) + + self.assertNotIsInstance(A(), Parent2) + self.assertNotIsInstance(B(), Parent2) + self.assertNotIsSubclass(A, Parent2) + self.assertNotIsSubclass(B, Parent2) + def blowstack(fxn, arg, compare_to): # Make sure that calling isinstance with a deeply nested tuple for its diff --git a/Misc/NEWS.d/next/Library/2025-06-13-17-22-34.gh-issue-92810.Tb6x6C.rst b/Misc/NEWS.d/next/Library/2025-06-13-17-22-34.gh-issue-92810.Tb6x6C.rst new file mode 100644 index 00000000000000..13cd06eb821bf4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-13-17-22-34.gh-issue-92810.Tb6x6C.rst @@ -0,0 +1,2 @@ +Reduce memory usage by :meth:`~type.__subclasscheck__` +for :class:`abc.ABCMeta` and large class trees. diff --git a/Modules/_abc.c b/Modules/_abc.c index f87a5c702946bc..defa219b98acf0 100644 --- a/Modules/_abc.c +++ b/Modules/_abc.c @@ -35,21 +35,13 @@ get_abc_state(PyObject *module) static inline uint64_t get_invalidation_counter(_abcmodule_state *state) { -#ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint64(&state->abc_invalidation_counter); -#else - return state->abc_invalidation_counter; -#endif + return FT_ATOMIC_LOAD_UINT64_RELAXED(state->abc_invalidation_counter); } static inline void increment_invalidation_counter(_abcmodule_state *state) { -#ifdef Py_GIL_DISABLED - _Py_atomic_add_uint64(&state->abc_invalidation_counter, 1); -#else - state->abc_invalidation_counter++; -#endif + FT_ATOMIC_ADD_UINT64(state->abc_invalidation_counter, 1); } /* This object stores internal state for ABCs. @@ -65,6 +57,7 @@ typedef struct { PyObject *_abc_cache; PyObject *_abc_negative_cache; uint64_t _abc_negative_cache_version; + uint8_t _abc_issubclasscheck_recursive; } _abc_data; #define _abc_data_CAST(op) ((_abc_data *)(op)) @@ -72,21 +65,31 @@ typedef struct { static inline uint64_t get_cache_version(_abc_data *impl) { -#ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint64(&impl->_abc_negative_cache_version); -#else - return impl->_abc_negative_cache_version; -#endif + return FT_ATOMIC_LOAD_UINT64_RELAXED(impl->_abc_negative_cache_version); } static inline void set_cache_version(_abc_data *impl, uint64_t version) { -#ifdef Py_GIL_DISABLED - _Py_atomic_store_uint64(&impl->_abc_negative_cache_version, version); -#else - impl->_abc_negative_cache_version = version; -#endif + FT_ATOMIC_STORE_UINT64_RELAXED(impl->_abc_negative_cache_version, version); +} + +static inline uint8_t +is_issubclasscheck_recursive(_abc_data *impl) +{ + return FT_ATOMIC_LOAD_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive); +} + +static inline void +set_issubclasscheck_recursive(_abc_data *impl) +{ + FT_ATOMIC_STORE_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive, 1); +} + +static inline void +unset_issubclasscheck_recursive(_abc_data *impl) +{ + FT_ATOMIC_STORE_UINT8_RELAXED(impl->_abc_issubclasscheck_recursive, 0); } static int @@ -139,6 +142,7 @@ abc_data_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->_abc_cache = NULL; self->_abc_negative_cache = NULL; self->_abc_negative_cache_version = get_invalidation_counter(state); + self->_abc_issubclasscheck_recursive = 0; return (PyObject *) self; } @@ -177,6 +181,30 @@ _get_impl(PyObject *module, PyObject *self) return (_abc_data *)impl; } +/* If class is inherited from ABC, set data to point to internal ABC state of class, and return 1. + If object is not inherited from ABC, return 0. + If error is encountered, return -1. + */ +static int +_get_optional_impl(_abcmodule_state *state, PyObject *self, _abc_data **data) +{ + assert(data != NULL); + PyObject *impl = NULL; + int res = PyObject_GetOptionalAttr(self, &_Py_ID(_abc_impl), &impl); + if (res <= 0) { + *data = NULL; + return res; + } + if (!Py_IS_TYPE(impl, state->_abc_data_type)) { + PyErr_SetString(PyExc_TypeError, "_abc_impl is set to a wrong type"); + Py_DECREF(impl); + *data = NULL; + return -1; + } + *data = (_abc_data *)impl; + return 1; +} + static int _in_weak_set(_abc_data *impl, PyObject **pset, PyObject *obj) { @@ -347,11 +375,12 @@ _abc__get_dump(PyObject *module, PyObject *self) } PyObject *res; Py_BEGIN_CRITICAL_SECTION(impl); - res = Py_BuildValue("NNNK", + res = Py_BuildValue("NNNKK", PySet_New(impl->_abc_registry), PySet_New(impl->_abc_cache), PySet_New(impl->_abc_negative_cache), - get_cache_version(impl)); + get_cache_version(impl), + is_issubclasscheck_recursive(impl)); Py_END_CRITICAL_SECTION(); Py_DECREF(impl); return res; @@ -584,10 +613,16 @@ _abc__abc_register_impl(PyObject *module, PyObject *self, PyObject *subclass) if (impl == NULL) { return NULL; } + // Add registry entry if (_add_to_weak_set(impl, &impl->_abc_registry, subclass) < 0) { Py_DECREF(impl); return NULL; } + // Automatically include cache entry + if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) { + Py_DECREF(impl); + return NULL; + } Py_DECREF(impl); /* Invalidate negative cache */ @@ -818,23 +853,53 @@ _abc__abc_subclasscheck_impl(PyObject *module, PyObject *self, if (scls == NULL) { goto end; } + + _abc_data *scls_impl; + int scls_is_abc = _get_optional_impl(state, scls, &scls_impl); + if (scls_is_abc < 0) { + Py_DECREF(scls); + goto end; + } + if (scls_is_abc > 0) { + /* + If inside recursive issubclass check, avoid adding classes + to any cache because this may drastically increase memory usage. + Unfortunately, issubclass/__subclasscheck__ don't accept third + argument with context, so using global context within ABCMeta. + This is done only on first method call, next will use cache. + */ + set_issubclasscheck_recursive(scls_impl); + } + + // Perform recursive check int r = PyObject_IsSubclass(subclass, scls); Py_DECREF(scls); + + if (scls_is_abc > 0) { + // reset recursion guard even if exception was raised in __subclasscheck__ + unset_issubclasscheck_recursive(scls_impl); + } + Py_XDECREF(scls_impl); + + if (r < 0) { + goto end; + } if (r > 0) { - if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) { - goto end; + if (!is_issubclasscheck_recursive(impl)) { + if (_add_to_weak_set(impl, &impl->_abc_cache, subclass) < 0) { + goto end; + } } result = Py_True; goto end; } - if (r < 0) { - goto end; - } } /* No dice; update negative cache. */ - if (_add_to_weak_set(impl, &impl->_abc_negative_cache, subclass) < 0) { - goto end; + if (!is_issubclasscheck_recursive(impl)) { + if (_add_to_weak_set(impl, &impl->_abc_negative_cache, subclass) < 0) { + goto end; + } } result = Py_False; @@ -849,7 +914,7 @@ static int subclasscheck_check_registry(_abc_data *impl, PyObject *subclass, PyObject **result) { - // Fast path: check subclass is in weakref directly. + // Fast path: check subclass is in weakset directly. int ret = _in_weak_set(impl, &impl->_abc_registry, subclass); if (ret < 0) { *result = NULL;