https://github.com/python/cpython/commit/8227883d1f1bbb6560e5f175d7ee49f013c094bd
commit: 8227883d1f1bbb6560e5f175d7ee49f013c094bd
branch: main
author: Alex Waygood <[email protected]>
committer: AlexWaygood <[email protected]>
date: 2024-04-24T15:55:02+01:00
summary:

gh-118013: Use weakrefs for the cache key in `inspect._shadowed_dict` (#118202)

files:
A Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst
M Doc/whatsnew/3.12.rst
M Lib/inspect.py
M Lib/test/libregrtest/utils.py
M Lib/test/test_inspect/test_inspect.py

diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index ce3d9ec6a29de8..8757311a484257 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -734,8 +734,7 @@ inspect
 
 * The performance of :func:`inspect.getattr_static` has been considerably
   improved. Most calls to the function should be at least 2x faster than they
-  were in Python 3.11, and some may be 6x faster or more. (Contributed by Alex
-  Waygood in :gh:`103193`.)
+  were in Python 3.11. (Contributed by Alex Waygood in :gh:`103193`.)
 
 itertools
 ---------
@@ -1006,8 +1005,8 @@ typing
   :func:`runtime-checkable protocols <typing.runtime_checkable>` has changed
   significantly. Most ``isinstance()`` checks against protocols with only a few
   members should be at least 2x faster than in 3.11, and some may be 20x
-  faster or more. However, ``isinstance()`` checks against protocols with 
fourteen
-  or more members may be slower than in Python 3.11. (Contributed by Alex
+  faster or more. However, ``isinstance()`` checks against protocols with many
+  members may be slower than in Python 3.11. (Contributed by Alex
   Waygood in :gh:`74690` and :gh:`103193`.)
 
 * All :data:`typing.TypedDict` and :data:`typing.NamedTuple` classes now have 
the
diff --git a/Lib/inspect.py b/Lib/inspect.py
index 422c09a92ad141..3c346b27b1f06d 100644
--- a/Lib/inspect.py
+++ b/Lib/inspect.py
@@ -160,6 +160,7 @@
 from keyword import iskeyword
 from operator import attrgetter
 from collections import namedtuple, OrderedDict
+from weakref import ref as make_weakref
 
 # Create constants for the compiler flags in Include/code.h
 # We try to get them from dis to avoid duplication
@@ -1832,9 +1833,16 @@ def _check_class(klass, attr):
             return entry.__dict__[attr]
     return _sentinel
 
+
 @functools.lru_cache()
-def _shadowed_dict_from_mro_tuple(mro):
-    for entry in mro:
+def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro):
+    for weakref_entry in weakref_mro:
+        # Normally we'd have to check whether the result of weakref_entry()
+        # is None here, in case the object the weakref is pointing to has died.
+        # In this specific case, however, we know that the only caller of this
+        # function is `_shadowed_dict()`, and that therefore this weakref is
+        # guaranteed to point to an object that is still alive.
+        entry = weakref_entry()
         dunder_dict = _get_dunder_dict_of_class(entry)
         if '__dict__' in dunder_dict:
             class_dict = dunder_dict['__dict__']
@@ -1844,8 +1852,19 @@ def _shadowed_dict_from_mro_tuple(mro):
                 return class_dict
     return _sentinel
 
+
 def _shadowed_dict(klass):
-    return _shadowed_dict_from_mro_tuple(_static_getmro(klass))
+    # gh-118013: the inner function here is decorated with lru_cache for
+    # performance reasons, *but* make sure not to pass strong references
+    # to the items in the mro. Doing so can lead to unexpected memory
+    # consumption in cases where classes are dynamically created and
+    # destroyed, and the dynamically created classes happen to be the only
+    # objects that hold strong references to other objects that take up a
+    # significant amount of memory.
+    return _shadowed_dict_from_weakref_mro_tuple(
+        *[make_weakref(entry) for entry in _static_getmro(klass)]
+    )
+
 
 def getattr_static(obj, attr, default=_sentinel):
     """Retrieve attributes without triggering dynamic lookup via the
diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py
index 791f996127ea58..8253d330b95b81 100644
--- a/Lib/test/libregrtest/utils.py
+++ b/Lib/test/libregrtest/utils.py
@@ -275,7 +275,7 @@ def clear_caches():
     except KeyError:
         pass
     else:
-        inspect._shadowed_dict_from_mro_tuple.cache_clear()
+        inspect._shadowed_dict_from_weakref_mro_tuple.cache_clear()
         inspect._filesbymodname.clear()
         inspect.modulesbyfile.clear()
 
diff --git a/Lib/test/test_inspect/test_inspect.py 
b/Lib/test/test_inspect/test_inspect.py
index b2265e44e0c79b..169d1edb706fc3 100644
--- a/Lib/test/test_inspect/test_inspect.py
+++ b/Lib/test/test_inspect/test_inspect.py
@@ -4,6 +4,7 @@
 import copy
 import datetime
 import functools
+import gc
 import importlib
 import inspect
 import io
@@ -25,6 +26,7 @@
 import unittest
 import unittest.mock
 import warnings
+import weakref
 
 
 try:
@@ -2302,6 +2304,13 @@ def __dict__(self):
         self.assertEqual(inspect.getattr_static(foo, 'a'), 3)
         self.assertFalse(test.called)
 
+        class Bar(Foo): pass
+
+        bar = Bar()
+        bar.a = 5
+        self.assertEqual(inspect.getattr_static(bar, 'a'), 3)
+        self.assertFalse(test.called)
+
     def test_mutated_mro(self):
         test = self
         test.called = False
@@ -2406,6 +2415,21 @@ def __getattribute__(self, attr):
 
         self.assertFalse(test.called)
 
+    def test_cache_does_not_cause_classes_to_persist(self):
+        # regression test for gh-118013:
+        # check that the internal _shadowed_dict cache does not cause
+        # dynamically created classes to have extended lifetimes even
+        # when no other strong references to those classes remain.
+        # Since these classes can themselves hold strong references to
+        # other objects, this can cause unexpected memory consumption.
+        class Foo: pass
+        Foo.instance = Foo()
+        weakref_to_class = weakref.ref(Foo)
+        inspect.getattr_static(Foo.instance, 'whatever', 'irrelevant')
+        del Foo
+        gc.collect()
+        self.assertIsNone(weakref_to_class())
+
 
 class TestGetGeneratorState(unittest.TestCase):
 
diff --git 
a/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst 
b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst
new file mode 100644
index 00000000000000..8eb68ebe99ba15
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst
@@ -0,0 +1,9 @@
+Fix regression introduced in gh-103193 that meant that calling
+:func:`inspect.getattr_static` on an instance would cause a strong reference
+to that instance's class to persist in an internal cache in the
+:mod:`inspect` module. This caused unexpected memory consumption if the
+class was dynamically created, the class held strong references to other
+objects which took up a significant amount of memory, and the cache
+contained the sole strong reference to the class. The fix for the regression
+leads to a slowdown in :func:`getattr_static`, but the function should still
+be signficantly faster than it was in Python 3.11. Patch by Alex Waygood.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to