https://github.com/python/cpython/commit/ce01ab536f22a3cf095d621f3b3579c1e3567859
commit: ce01ab536f22a3cf095d621f3b3579c1e3567859
branch: main
author: Sam Gross <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-01-23T20:14:46Z
summary:
gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269)
The iterator returned by ElementTree.iterparse() may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the file
descriptor if the returned iterator was not exhausted.
files:
A Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
M Lib/xml/etree/ElementTree.py
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 42574eefd81beb..ae6575028be11c 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@
import collections
import collections.abc
import contextlib
+import weakref
from . import ElementPath
@@ -1223,13 +1224,14 @@ def iterparse(source, events=None, parser=None):
# parser argument of iterparse is removed, this can be killed.
pullparser = XMLPullParser(events=events, _parser=parser)
- def iterator(source):
+ if not hasattr(source, "read"):
+ source = open(source, "rb")
+ close_source = True
+ else:
close_source = False
+
+ def iterator(source):
try:
- if not hasattr(source, "read"):
- source = open(source, "rb")
- close_source = True
- yield None
while True:
yield from pullparser.read_events()
# load event buffer
@@ -1239,18 +1241,23 @@ def iterator(source):
pullparser.feed(data)
root = pullparser._close_and_return_root()
yield from pullparser.read_events()
- it.root = root
+ it = wr()
+ if it is not None:
+ it.root = root
finally:
if close_source:
source.close()
class IterParseIterator(collections.abc.Iterator):
__next__ = iterator(source).__next__
- it = IterParseIterator()
- it.root = None
- del iterator, IterParseIterator
- next(it)
+ def __del__(self):
+ if close_source:
+ source.close()
+
+ it = IterParseIterator()
+ wr = weakref.ref(it)
+ del IterParseIterator
return it
diff --git
a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
new file mode 100644
index 00000000000000..9b69b5deb1b5a0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]