https://github.com/python/cpython/commit/bfd9c3ea5336f2898e3c58e0c3999bfc25d7da89
commit: bfd9c3ea5336f2898e3c58e0c3999bfc25d7da89
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: ericsnowcurrently <[email protected]>
date: 2024-05-22T12:09:48-06:00
summary:

[3.13] gh-119213: Be More Careful About _PyArg_Parser.kwtuple Across 
Interpreters (gh-119331) (gh-119410)

_PyArg_Parser holds static global data generated for modules by Argument 
Clinic.  The _PyArg_Parser.kwtuple field is a tuple object, even though it's 
stored within a static global.  In some cases the tuple is statically allocated 
and thus it's okay that it gets shared by multiple interpreters.  However, in 
other cases the tuple is set lazily, allocated from the heap using the active 
interprepreter at the point the tuple is needed.

This is a problem once that interpreter is destroyed since 
_PyArg_Parser.kwtuple becomes at dangling pointer, leading to crashes.  It 
isn't a problem if the tuple is allocated under the main interpreter, since its 
lifetime is bound to the lifetime of the runtime.  The solution here is to 
temporarily switch to the main interpreter.  The alternative would be to always 
statically allocate the tuple.

This change also fixes a bug where only the most recent parser was added to the 
global linked list.

(cherry picked from commit 81865002aee8eaaeb3c7e402f86183afa6de77bf)

Co-authored-by: Eric Snow <[email protected]>

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-05-21-11-27-14.gh-issue-119213.nxjxrt.rst
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject_generated.h
M Lib/test/test_capi/test_getargs.py
M Modules/_testinternalcapi.c
M Modules/clinic/_testinternalcapi.c.h
M Python/getargs.c
M Tools/clinic/libclinic/parse_args.py

diff --git a/Include/internal/pycore_global_objects_fini_generated.h 
b/Include/internal/pycore_global_objects_fini_generated.h
index ca7355b2b61aa7..45a1429ad499a2 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1221,6 +1221,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sort));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source_traceback));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(spam));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src_dir_fd));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stacklevel));
diff --git a/Include/internal/pycore_global_strings.h 
b/Include/internal/pycore_global_strings.h
index fbb25285f0f282..86d9ffdd72fe42 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -710,6 +710,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(sort)
         STRUCT_FOR_ID(source)
         STRUCT_FOR_ID(source_traceback)
+        STRUCT_FOR_ID(spam)
         STRUCT_FOR_ID(src)
         STRUCT_FOR_ID(src_dir_fd)
         STRUCT_FOR_ID(stacklevel)
diff --git a/Include/internal/pycore_runtime_init_generated.h 
b/Include/internal/pycore_runtime_init_generated.h
index 508da40c53422d..d52d2938bc08b5 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1219,6 +1219,7 @@ extern "C" {
     INIT_ID(sort), \
     INIT_ID(source), \
     INIT_ID(source_traceback), \
+    INIT_ID(spam), \
     INIT_ID(src), \
     INIT_ID(src_dir_fd), \
     INIT_ID(stacklevel), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h 
b/Include/internal/pycore_unicodeobject_generated.h
index cc2fc15ac5cabf..c4f79bf91e57b3 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1971,6 +1971,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     string = &_Py_ID(source_traceback);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
+    string = &_Py_ID(spam);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    _PyUnicode_InternInPlace(interp, &string);
     string = &_Py_ID(src);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
diff --git a/Lib/test/test_capi/test_getargs.py 
b/Lib/test/test_capi/test_getargs.py
index e710400f75c235..232aa2a80025dc 100644
--- a/Lib/test/test_capi/test_getargs.py
+++ b/Lib/test/test_capi/test_getargs.py
@@ -4,11 +4,17 @@
 import sys
 from test import support
 from test.support import import_helper
+from test.support import script_helper
 from test.support import warnings_helper
 # Skip this test if the _testcapi module isn't available.
 _testcapi = import_helper.import_module('_testcapi')
 from _testcapi import getargs_keywords, getargs_keyword_only
 
+try:
+    import _testinternalcapi
+except ImportError:
+    _testinternalcapi = NULL
+
 # > How about the following counterproposal. This also changes some of
 # > the other format codes to be a little more regular.
 # >
@@ -1346,6 +1352,33 @@ def test_nested_tuple(self):
                     "argument 1 must be sequence of length 1, not 0"):
                 parse(((),), {}, '(' + f + ')', ['a'])
 
+    @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi')
+    def test_gh_119213(self):
+        rc, out, err = script_helper.assert_python_ok("-c", """if True:
+            from test import support
+            script = '''if True:
+                import _testinternalcapi
+                _testinternalcapi.gh_119213_getargs(spam='eggs')
+                '''
+            config = dict(
+                allow_fork=False,
+                allow_exec=False,
+                allow_threads=True,
+                allow_daemon_threads=False,
+                use_main_obmalloc=False,
+                gil=2,
+                check_multi_interp_extensions=True,
+            )
+            rc = support.run_in_subinterp_with_config(script, **config)
+            assert rc == 0
+
+            # The crash is different if the interpreter was not destroyed 
first.
+            #interpid = _testinternalcapi.create_interpreter()
+            #rc = _testinternalcapi.exec_interpreter(interpid, script)
+            #assert rc == 0
+            """)
+        self.assertEqual(rc, 0)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-05-21-11-27-14.gh-issue-119213.nxjxrt.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-05-21-11-27-14.gh-issue-119213.nxjxrt.rst
new file mode 100644
index 00000000000000..e9073b4ba08798
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-05-21-11-27-14.gh-issue-119213.nxjxrt.rst 
@@ -0,0 +1,3 @@
+Non-builtin modules built with argument clinic were crashing if used in a
+subinterpreter before the main interpreter.  The objects that were causing
+the problem by leaking between interpreters carelessly have been fixed.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index e209c7e05264f2..129c136906739d 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2006,6 +2006,25 @@ has_inline_values(PyObject *self, PyObject *obj)
     Py_RETURN_FALSE;
 }
 
+
+/*[clinic input]
+gh_119213_getargs
+
+    spam: object = None
+
+Test _PyArg_Parser.kwtuple
+[clinic start generated code]*/
+
+static PyObject *
+gh_119213_getargs_impl(PyObject *module, PyObject *spam)
+/*[clinic end generated code: output=d8d9c95d5b446802 input=65ef47511da80fc2]*/
+{
+    // It must never have been called in the main interprer
+    assert(!_Py_IsMainInterpreter(PyInterpreterState_Get()));
+    return Py_NewRef(spam);
+}
+
+
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -2096,6 +2115,7 @@ static PyMethodDef module_functions[] = {
 #ifdef _Py_TIER2
     {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
 #endif
+    GH_119213_GETARGS_METHODDEF
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Modules/clinic/_testinternalcapi.c.h 
b/Modules/clinic/_testinternalcapi.c.h
index a61858561d5ef8..dcca9ecf735723 100644
--- a/Modules/clinic/_testinternalcapi.c.h
+++ b/Modules/clinic/_testinternalcapi.c.h
@@ -300,4 +300,64 @@ _testinternalcapi_test_long_numbits(PyObject *module, 
PyObject *Py_UNUSED(ignore
 {
     return _testinternalcapi_test_long_numbits_impl(module);
 }
-/*[clinic end generated code: output=9804015d77d36c77 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(gh_119213_getargs__doc__,
+"gh_119213_getargs($module, /, spam=None)\n"
+"--\n"
+"\n"
+"Test _PyArg_Parser.kwtuple");
+
+#define GH_119213_GETARGS_METHODDEF    \
+    {"gh_119213_getargs", _PyCFunction_CAST(gh_119213_getargs), 
METH_FASTCALL|METH_KEYWORDS, gh_119213_getargs__doc__},
+
+static PyObject *
+gh_119213_getargs_impl(PyObject *module, PyObject *spam);
+
+static PyObject *
+gh_119213_getargs(PyObject *module, PyObject *const *args, Py_ssize_t nargs, 
PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(spam), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"spam", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "gh_119213_getargs",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 
0;
+    PyObject *spam = Py_None;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 
0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_pos;
+    }
+    spam = args[0];
+skip_optional_pos:
+    return_value = gh_119213_getargs_impl(module, spam);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=4d0770a1c20fbf40 input=a9049054013a1b77]*/
diff --git a/Python/getargs.c b/Python/getargs.c
index 539925e471f54c..f9a836679fe55e 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -7,6 +7,7 @@
 #include "pycore_dict.h"          // _PyDict_HasOnlyStringKeys()
 #include "pycore_modsupport.h"    // export _PyArg_NoKeywords()
 #include "pycore_pylifecycle.h"   // _PyArg_Fini
+#include "pycore_pystate.h"       // _Py_IsMainInterpreter()
 #include "pycore_tuple.h"         // _PyTuple_ITEMS()
 #include "pycore_pyerrors.h"      // _Py_CalculateSuggestions()
 
@@ -1947,7 +1948,23 @@ _parser_init(void *arg)
     int owned;
     PyObject *kwtuple = parser->kwtuple;
     if (kwtuple == NULL) {
+        /* We may temporarily switch to the main interpreter to avoid
+         * creating a tuple that could outlive its owning interpreter. */
+        PyThreadState *save_tstate = NULL;
+        PyThreadState *temp_tstate = NULL;
+        if (!_Py_IsMainInterpreter(PyInterpreterState_Get())) {
+            temp_tstate = PyThreadState_New(_PyInterpreterState_Main());
+            if (temp_tstate == NULL) {
+                return -1;
+            }
+            save_tstate = PyThreadState_Swap(temp_tstate);
+        }
         kwtuple = new_kwtuple(keywords, len, pos);
+        if (temp_tstate != NULL) {
+            PyThreadState_Clear(temp_tstate);
+            (void)PyThreadState_Swap(save_tstate);
+            PyThreadState_Delete(temp_tstate);
+        }
         if (kwtuple == NULL) {
             return -1;
         }
@@ -1969,8 +1986,8 @@ _parser_init(void *arg)
     parser->next = _Py_atomic_load_ptr(&_PyRuntime.getargs.static_parsers);
     do {
         // compare-exchange updates parser->next on failure
-    } while 
(_Py_atomic_compare_exchange_ptr(&_PyRuntime.getargs.static_parsers,
-                                             &parser->next, parser));
+    } while 
(!_Py_atomic_compare_exchange_ptr(&_PyRuntime.getargs.static_parsers,
+                                              &parser->next, parser));
     return 0;
 }
 
diff --git a/Tools/clinic/libclinic/parse_args.py 
b/Tools/clinic/libclinic/parse_args.py
index 905f2a0ba94f4c..7ac88bd0458b82 100644
--- a/Tools/clinic/libclinic/parse_args.py
+++ b/Tools/clinic/libclinic/parse_args.py
@@ -51,6 +51,8 @@ def declare_parser(
             #endif
         """
     else:
+        # XXX Why do we not statically allocate the tuple
+        # for non-builtin modules?
         declarations = """
             #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to