https://github.com/python/cpython/commit/3203a7412977b8da3aba2770308136a37f48c927
commit: 3203a7412977b8da3aba2770308136a37f48c927
branch: main
author: Eddie Elizondo <[email protected]>
committer: encukou <[email protected]>
date: 2024-08-15T11:55:09Z
summary:

gh-113190: Reenable non-debug interned string cleanup (GH-113601)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst
M Doc/c-api/init.rst
M Doc/whatsnew/3.14.rst
M Lib/test/_test_embed_structseq.py
M Objects/unicodeobject.c

diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 1fab3f577f2f89..567b5f6855a48a 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -394,8 +394,7 @@ Initializing and finalizing the interpreter
    Undo all initializations made by :c:func:`Py_Initialize` and subsequent use 
of
    Python/C API functions, and destroy all sub-interpreters (see
    :c:func:`Py_NewInterpreter` below) that were created and not yet destroyed 
since
-   the last call to :c:func:`Py_Initialize`.  Ideally, this frees all memory
-   allocated by the Python interpreter.  This is a no-op when called for a 
second
+   the last call to :c:func:`Py_Initialize`.  This is a no-op when called for 
a second
    time (without calling :c:func:`Py_Initialize` again first).
 
    Since this is the reverse of :c:func:`Py_Initialize`, it should be called
@@ -407,6 +406,12 @@ Initializing and finalizing the interpreter
    If there were errors during finalization (flushing buffered data),
    ``-1`` is returned.
 
+   Note that Python will do a best effort at freeing all memory allocated by 
the Python
+   interpreter.  Therefore, any C-Extension should make sure to correctly 
clean up all
+   of the preveiously allocated PyObjects before using them in subsequent 
calls to
+   :c:func:`Py_Initialize`.  Otherwise it could introduce vulnerabilities and 
incorrect
+   behavior.
+
    This function is provided for a number of reasons.  An embedding application
    might want to restart Python without having to restart the application 
itself.
    An application that has loaded the Python interpreter from a dynamically
@@ -421,10 +426,11 @@ Initializing and finalizing the interpreter
    loaded extension modules loaded by Python are not unloaded.  Small amounts 
of
    memory allocated by the Python interpreter may not be freed (if you find a 
leak,
    please report it).  Memory tied up in circular references between objects 
is not
-   freed.  Some memory allocated by extension modules may not be freed.  Some
-   extensions may not work properly if their initialization routine is called 
more
-   than once; this can happen if an application calls :c:func:`Py_Initialize` 
and
-   :c:func:`Py_FinalizeEx` more than once.
+   freed.  Interned strings will all be deallocated regarldess of their 
reference count.
+   Some memory allocated by extension modules may not be freed.  Some 
extensions may not
+   work properly if their initialization routine is called more than once; 
this can
+   happen if an application calls :c:func:`Py_Initialize` and 
:c:func:`Py_FinalizeEx`
+   more than once.
 
    .. audit-event:: cpython._PySys_ClearAuditHooks "" c.Py_FinalizeEx
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 267860712b9967..088f70d9e9fad4 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -419,6 +419,16 @@ New Features
   which has an ambiguous return value.
   (Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.)
 
+* :c:func:`Py_Finalize` now deletes all interned strings. This
+  is backwards incompatible to any C-Extension that holds onto an interned
+  string after a call to :c:func:`Py_Finalize` and is then reused after a
+  call to :c:func:`Py_Initialize`.  Any issues arising from this behavior will
+  normally result in crashes during the exectuion of the subsequent call to
+  :c:func:`Py_Initialize` from accessing uninitialized memory. To fix, use
+  an address sanitizer to identify any use-after-free coming from
+  an interned string and deallocate it during module shutdown.
+  (Contribued by Eddie Elizondo in :gh:`113601`.)
+
 Porting to Python 3.14
 ----------------------
 
diff --git a/Lib/test/_test_embed_structseq.py 
b/Lib/test/_test_embed_structseq.py
index 834daa4df55fec..868f9f83e8be77 100644
--- a/Lib/test/_test_embed_structseq.py
+++ b/Lib/test/_test_embed_structseq.py
@@ -1,31 +1,27 @@
 import sys
 import types
+import unittest
 
-# Note: This test file can't import `unittest` since the runtime can't
-# currently guarantee that it will not leak memory. Doing so will mark
-# the test as passing but with reference leaks. This can safely import
-# the `unittest` library once there's a strict guarantee of no leaks
-# during runtime shutdown.
 
 # bpo-46417: Test that structseq types used by the sys module are still
 # valid when Py_Finalize()/Py_Initialize() are called multiple times.
-class TestStructSeq:
+class TestStructSeq(unittest.TestCase):
     # test PyTypeObject members
-    def _check_structseq(self, obj_type):
+    def check_structseq(self, obj_type):
         # ob_refcnt
-        assert sys.getrefcount(obj_type) > 1
+        self.assertGreaterEqual(sys.getrefcount(obj_type), 1)
         # tp_base
-        assert issubclass(obj_type, tuple)
+        self.assertTrue(issubclass(obj_type, tuple))
         # tp_bases
-        assert obj_type.__bases__ == (tuple,)
+        self.assertEqual(obj_type.__bases__, (tuple,))
         # tp_dict
-        assert isinstance(obj_type.__dict__, types.MappingProxyType)
+        self.assertIsInstance(obj_type.__dict__, types.MappingProxyType)
         # tp_mro
-        assert obj_type.__mro__ == (obj_type, tuple, object)
+        self.assertEqual(obj_type.__mro__, (obj_type, tuple, object))
         # tp_name
-        assert isinstance(type.__name__, str)
+        self.assertIsInstance(type.__name__, str)
         # tp_subclasses
-        assert obj_type.__subclasses__() == []
+        self.assertEqual(obj_type.__subclasses__(), [])
 
     def test_sys_attrs(self):
         for attr_name in (
@@ -36,23 +32,23 @@ def test_sys_attrs(self):
             'thread_info',    # ThreadInfoType
             'version_info',   # VersionInfoType
         ):
-            attr = getattr(sys, attr_name)
-            self._check_structseq(type(attr))
+            with self.subTest(attr=attr_name):
+                attr = getattr(sys, attr_name)
+                self.check_structseq(type(attr))
 
     def test_sys_funcs(self):
         func_names = ['get_asyncgen_hooks']  # AsyncGenHooksType
         if hasattr(sys, 'getwindowsversion'):
             func_names.append('getwindowsversion')  # WindowsVersionType
         for func_name in func_names:
-            func = getattr(sys, func_name)
-            obj = func()
-            self._check_structseq(type(obj))
+            with self.subTest(func=func_name):
+                func = getattr(sys, func_name)
+                obj = func()
+                self.check_structseq(type(obj))
 
 
 try:
-    tests = TestStructSeq()
-    tests.test_sys_attrs()
-    tests.test_sys_funcs()
+    unittest.main()
 except SystemExit as exc:
     if exc.args[0] != 0:
         raise
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst
new file mode 100644
index 00000000000000..4c12870c3df548
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst 
@@ -0,0 +1 @@
+:c:func:`Py_Finalize` now deletes all interned strings.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index da9c5857cc3bee..148d3e55bf830e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15623,19 +15623,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
         int shared = 0;
         switch (PyUnicode_CHECK_INTERNED(s)) {
         case SSTATE_INTERNED_IMMORTAL:
-            /* Make immortal interned strings mortal again.
-             *
-             * Currently, the runtime is not able to guarantee that it can exit
-             * without allocations that carry over to a future initialization
-             * of Python within the same process. i.e:
-             *   ./python -X showrefcount -c 'import itertools'
-             *   [237 refs, 237 blocks]
-             *
-             * This should remain disabled (`Py_DEBUG` only) until there is a
-             * strict guarantee that no memory will be left after
-             * `Py_Finalize`.
-             */
-#ifdef Py_DEBUG
+            /* Make immortal interned strings mortal again. */
             // Skip the Immortal Instance check and restore
             // the two references (key and value) ignored
             // by PyUnicode_InternInPlace().
@@ -15648,7 +15636,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
 #ifdef INTERNED_STATS
             total_length += PyUnicode_GET_LENGTH(s);
 #endif
-#endif // Py_DEBUG
             break;
         case SSTATE_INTERNED_IMMORTAL_STATIC:
             /* It is shared between interpreters, so we should unmark it

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to