https://github.com/python/cpython/commit/6edb2ddb5f3695cf4938979d645f31d7fba43ec8
commit: 6edb2ddb5f3695cf4938979d645f31d7fba43ec8
branch: main
author: Sebastian Pipping <[email protected]>
committer: picnixz <[email protected]>
date: 2025-10-05T17:37:42+02:00
summary:
gh-139400: Make sure that parent parsers outlive their subparsers in `pyexpat`
(#139403)
* Modules/pyexpat.c: Disallow collection of in-use parent parsers.
Within libexpat, a parser created via `XML_ExternalEntityParserCreate`
is relying on its parent parser throughout its entire lifetime.
Prior to this fix, is was possible for the parent parser to be
garbage-collected too early.
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
M Lib/test/test_pyexpat.py
M Modules/pyexpat.c
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 8e0f7374b26fd0..b4ce72dfd51774 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -771,6 +771,42 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])
+class ParentParserLifetimeTest(unittest.TestCase):
+ """
+ Subparsers make use of their parent XML_Parser inside of Expat.
+ As a result, parent parsers need to outlive subparsers.
+
+ See https://github.com/python/cpython/issues/139400.
+ """
+
+ def test_parent_parser_outlives_its_subparsers__single(self):
+ parser = expat.ParserCreate()
+ subparser = parser.ExternalEntityParserCreate(None)
+
+ # Now try to cause garbage collection of the parent parser
+ # while it's still being referenced by a related subparser.
+ del parser
+
+ def test_parent_parser_outlives_its_subparsers__multiple(self):
+ parser = expat.ParserCreate()
+ subparser_one = parser.ExternalEntityParserCreate(None)
+ subparser_two = parser.ExternalEntityParserCreate(None)
+
+ # Now try to cause garbage collection of the parent parser
+ # while it's still being referenced by a related subparser.
+ del parser
+
+ def test_parent_parser_outlives_its_subparsers__chain(self):
+ parser = expat.ParserCreate()
+ subparser = parser.ExternalEntityParserCreate(None)
+ subsubparser = subparser.ExternalEntityParserCreate(None)
+
+ # Now try to cause garbage collection of the parent parsers
+ # while they are still being referenced by a related subparser.
+ del parser
+ del subparser
+
+
class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
new file mode 100644
index 00000000000000..a5dea3b5f8147a
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-29-00-01-28.gh-issue-139400.X2T-jO.rst
@@ -0,0 +1,4 @@
+:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
+garbage-collected once they are no longer referenced by subparsers created
+by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
+Patch by Sebastian Pipping.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 9949e185dce9c7..9c252be9cf22b2 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -76,6 +76,15 @@ typedef struct {
PyObject_HEAD
XML_Parser itself;
+ /*
+ * Strong reference to a parent `xmlparseobject` if this parser
+ * is a child parser. Set to NULL if this parser is a root parser.
+ * This is needed to keep the parent parser alive as long as it has
+ * at least one child parser.
+ *
+ * See https://github.com/python/cpython/issues/139400 for details.
+ */
+ PyObject *parent;
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
@@ -1067,6 +1076,11 @@
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
return NULL;
}
+ // The new subparser will make use of the parent XML_Parser inside of
Expat.
+ // So we need to take subparsers into account with the reference counting
+ // of their parent parser.
+ Py_INCREF(self);
+
new_parser->buffer_size = self->buffer_size;
new_parser->buffer_used = 0;
new_parser->buffer = NULL;
@@ -1076,6 +1090,7 @@
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
+ new_parser->parent = (PyObject *)self;
new_parser->handlers = 0;
new_parser->intern = Py_XNewRef(self->intern);
@@ -1083,11 +1098,13 @@
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
if (new_parser->buffer == NULL) {
Py_DECREF(new_parser);
+ Py_DECREF(self);
return PyErr_NoMemory();
}
}
if (!new_parser->itself) {
Py_DECREF(new_parser);
+ Py_DECREF(self);
return PyErr_NoMemory();
}
@@ -1101,6 +1118,7 @@
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->handlers = PyMem_New(PyObject *, i);
if (!new_parser->handlers) {
Py_DECREF(new_parser);
+ Py_DECREF(self);
return PyErr_NoMemory();
}
clear_handlers(new_parser, 1);
@@ -1481,6 +1499,7 @@ newxmlparseobject(pyexpat_state *state, const char
*encoding,
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
namespace_separator);
+ self->parent = NULL;
if (self->itself == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"XML_ParserCreate failed");
@@ -1517,6 +1536,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void
*arg)
for (size_t i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(self->handlers[i]);
}
+ Py_VISIT(self->parent);
Py_VISIT(Py_TYPE(op));
return 0;
}
@@ -1527,6 +1547,10 @@ xmlparse_clear(PyObject *op)
xmlparseobject *self = xmlparseobject_CAST(op);
clear_handlers(self, 0);
Py_CLEAR(self->intern);
+ // NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
+ // XML_ParserFree(self->itself), or a subparser could lose its parent
+ // XML_Parser while still making use of it internally.
+ // https://github.com/python/cpython/issues/139400
return 0;
}
@@ -1540,6 +1564,7 @@ xmlparse_dealloc(PyObject *op)
XML_ParserFree(self->itself);
}
self->itself = NULL;
+ Py_CLEAR(self->parent);
if (self->handlers != NULL) {
PyMem_Free(self->handlers);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]