https://github.com/python/cpython/commit/6edb2ddb5f3695cf4938979d645f31d7fba43ec8
commit: 6edb2ddb5f3695cf4938979d645f31d7fba43ec8
branch: main
author: Sebastian Pipping <[email protected]>
committer: picnixz <[email protected]>
date: 2025-10-05T17:37:42+02:00
summary:

gh-139400: Make sure that parent parsers outlive their subparsers in `pyexpat` 
(#139403)

* Modules/pyexpat.c: Disallow collection of in-use parent parsers.

Within libexpat, a parser created via `XML_ExternalEntityParserCreate`
is relying on its parent parser throughout its entire lifetime.
Prior to this fix, is was possible for the parent parser to be
garbage-collected too early.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
M Lib/test/test_pyexpat.py
M Modules/pyexpat.c

diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 8e0f7374b26fd0..b4ce72dfd51774 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -771,6 +771,42 @@ def resolve_entity(context, base, system_id, public_id):
         self.assertEqual(handler_call_args, [("bar", "baz")])
 
 
+class ParentParserLifetimeTest(unittest.TestCase):
+    """
+    Subparsers make use of their parent XML_Parser inside of Expat.
+    As a result, parent parsers need to outlive subparsers.
+
+    See https://github.com/python/cpython/issues/139400.
+    """
+
+    def test_parent_parser_outlives_its_subparsers__single(self):
+        parser = expat.ParserCreate()
+        subparser = parser.ExternalEntityParserCreate(None)
+
+        # Now try to cause garbage collection of the parent parser
+        # while it's still being referenced by a related subparser.
+        del parser
+
+    def test_parent_parser_outlives_its_subparsers__multiple(self):
+        parser = expat.ParserCreate()
+        subparser_one = parser.ExternalEntityParserCreate(None)
+        subparser_two = parser.ExternalEntityParserCreate(None)
+
+        # Now try to cause garbage collection of the parent parser
+        # while it's still being referenced by a related subparser.
+        del parser
+
+    def test_parent_parser_outlives_its_subparsers__chain(self):
+        parser = expat.ParserCreate()
+        subparser = parser.ExternalEntityParserCreate(None)
+        subsubparser = subparser.ExternalEntityParserCreate(None)
+
+        # Now try to cause garbage collection of the parent parsers
+        # while they are still being referenced by a related subparser.
+        del parser
+        del subparser
+
+
 class ReparseDeferralTest(unittest.TestCase):
     def test_getter_setter_round_trip(self):
         parser = expat.ParserCreate()
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
new file mode 100644
index 00000000000000..a5dea3b5f8147a
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
@@ -0,0 +1,4 @@
+:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
+garbage-collected once they are no longer referenced by subparsers created
+by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
+Patch by Sebastian Pipping.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 9949e185dce9c7..9c252be9cf22b2 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -76,6 +76,15 @@ typedef struct {
     PyObject_HEAD
 
     XML_Parser itself;
+    /*
+     * Strong reference to a parent `xmlparseobject` if this parser
+     * is a child parser. Set to NULL if this parser is a root parser.
+     * This is needed to keep the parent parser alive as long as it has
+     * at least one child parser.
+     *
+     * See https://github.com/python/cpython/issues/139400 for details.
+     */
+    PyObject *parent;
     int ordered_attributes;     /* Return attributes as a list. */
     int specified_attributes;   /* Report only specified attributes. */
     int in_callback;            /* Is a callback active? */
@@ -1067,6 +1076,11 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
         return NULL;
     }
 
+    // The new subparser will make use of the parent XML_Parser inside of 
Expat.
+    // So we need to take subparsers into account with the reference counting
+    // of their parent parser.
+    Py_INCREF(self);
+
     new_parser->buffer_size = self->buffer_size;
     new_parser->buffer_used = 0;
     new_parser->buffer = NULL;
@@ -1076,6 +1090,7 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     new_parser->ns_prefixes = self->ns_prefixes;
     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
                                                         encoding);
+    new_parser->parent = (PyObject *)self;
     new_parser->handlers = 0;
     new_parser->intern = Py_XNewRef(self->intern);
 
@@ -1083,11 +1098,13 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
         if (new_parser->buffer == NULL) {
             Py_DECREF(new_parser);
+            Py_DECREF(self);
             return PyErr_NoMemory();
         }
     }
     if (!new_parser->itself) {
         Py_DECREF(new_parser);
+        Py_DECREF(self);
         return PyErr_NoMemory();
     }
 
@@ -1101,6 +1118,7 @@ 
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     new_parser->handlers = PyMem_New(PyObject *, i);
     if (!new_parser->handlers) {
         Py_DECREF(new_parser);
+        Py_DECREF(self);
         return PyErr_NoMemory();
     }
     clear_handlers(new_parser, 1);
@@ -1481,6 +1499,7 @@ newxmlparseobject(pyexpat_state *state, const char 
*encoding,
     /* namespace_separator is either NULL or contains one char + \0 */
     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
                                        namespace_separator);
+    self->parent = NULL;
     if (self->itself == NULL) {
         PyErr_SetString(PyExc_RuntimeError,
                         "XML_ParserCreate failed");
@@ -1517,6 +1536,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void 
*arg)
     for (size_t i = 0; handler_info[i].name != NULL; i++) {
         Py_VISIT(self->handlers[i]);
     }
+    Py_VISIT(self->parent);
     Py_VISIT(Py_TYPE(op));
     return 0;
 }
@@ -1527,6 +1547,10 @@ xmlparse_clear(PyObject *op)
     xmlparseobject *self = xmlparseobject_CAST(op);
     clear_handlers(self, 0);
     Py_CLEAR(self->intern);
+    // NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
+    //       XML_ParserFree(self->itself), or a subparser could lose its parent
+    //       XML_Parser while still making use of it internally.
+    //       https://github.com/python/cpython/issues/139400
     return 0;
 }
 
@@ -1540,6 +1564,7 @@ xmlparse_dealloc(PyObject *op)
         XML_ParserFree(self->itself);
     }
     self->itself = NULL;
+    Py_CLEAR(self->parent);
 
     if (self->handlers != NULL) {
         PyMem_Free(self->handlers);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to