https://github.com/python/cpython/commit/8a978a75c2d1e6d081e8f89a240e356be10c7714
commit: 8a978a75c2d1e6d081e8f89a240e356be10c7714
branch: 3.11
author: Inada Naoki <[email protected]>
committer: hugovk <[email protected]>
date: 2024-08-09T17:04:36Z
summary:

[3.11] gh-119506: fix _io.TextIOWrapper.write() write during flush (#119507) 
(#120314)

Co-authored-by: Hugo van Kemenade <[email protected]>
fix _io.TextIOWrapper.write() write during flush (#119507)

files:
A Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst
M Lib/test/test_io.py
M Modules/_io/textio.c

diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 537c9fa7b9835b..1e444d2566e855 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -3960,6 +3960,28 @@ def write(self, data):
         t.write("x"*chunk_size)
         self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], 
buf._write_stack)
 
+    def test_issue119506(self):
+        chunk_size = 8192
+
+        class MockIO(self.MockRawIO):
+            written = False
+            def write(self, data):
+                if not self.written:
+                    self.written = True
+                    t.write("middle")
+                return super().write(data)
+
+        buf = MockIO()
+        t = self.TextIOWrapper(buf)
+        t.write("abc")
+        t.write("def")
+        # writing data which size >= chunk_size cause flushing buffer before 
write.
+        t.write("g" * chunk_size)
+        t.flush()
+
+        self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size],
+                         buf._write_stack)
+
 
 class PyTextIOWrapperTest(TextIOWrapperTest):
     io = pyio
diff --git 
a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst 
b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst
new file mode 100644
index 00000000000000..f9b764ae0c49b3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst
@@ -0,0 +1 @@
+Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the 
method is called again during flushing internal buffer.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 3de4c06704b8b9..ba69e2afd27570 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1701,34 +1701,56 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject 
*text)
         bytes_len = PyBytes_GET_SIZE(b);
     }
 
-    if (self->pending_bytes == NULL) {
-        self->pending_bytes_count = 0;
-        self->pending_bytes = b;
-    }
-    else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
-        // Prevent to concatenate more than chunk_size data.
-        if (_textiowrapper_writeflush(self) < 0) {
-            Py_DECREF(b);
-            return NULL;
+    // We should avoid concatinating huge data.
+    // Flush the buffer before adding b to the buffer if b is not small.
+    // https://github.com/python/cpython/issues/87426
+    if (bytes_len >= self->chunk_size) {
+        // _textiowrapper_writeflush() calls buffer.write().
+        // self->pending_bytes can be appended during buffer->write()
+        // or other thread.
+        // We need to loop until buffer becomes empty.
+        // https://github.com/python/cpython/issues/118138
+        // https://github.com/python/cpython/issues/119506
+        while (self->pending_bytes != NULL) {
+            if (_textiowrapper_writeflush(self) < 0) {
+                Py_DECREF(b);
+                return NULL;
+            }
         }
-        self->pending_bytes = b;
     }
-    else if (!PyList_CheckExact(self->pending_bytes)) {
-        PyObject *list = PyList_New(2);
-        if (list == NULL) {
-            Py_DECREF(b);
-            return NULL;
-        }
-        PyList_SET_ITEM(list, 0, self->pending_bytes);
-        PyList_SET_ITEM(list, 1, b);
-        self->pending_bytes = list;
+
+    if (self->pending_bytes == NULL) {
+        assert(self->pending_bytes_count == 0);
+        self->pending_bytes = b;
     }
     else {
-        if (PyList_Append(self->pending_bytes, b) < 0) {
-            Py_DECREF(b);
-            return NULL;
+        if (!PyList_CheckExact(self->pending_bytes)) {
+            PyObject *list = PyList_New(0);
+            if (list == NULL) {
+                Py_DECREF(b);
+                return NULL;
+            }
+            // PyList_New() may trigger GC and other thread may call write().
+            // So, we need to check the self->pending_bytes is a list again.
+            if (PyList_CheckExact(self->pending_bytes)) {
+                // Releasing empty list won't trigger GC and/or __del__.
+                Py_DECREF(list);
+            }
+            else {
+                if (PyList_Append(list, self->pending_bytes) < 0) {
+                    Py_DECREF(list);
+                    Py_DECREF(b);
+                    return NULL;
+                }
+                Py_SETREF(self->pending_bytes, list);
+            }
         }
+
+        int ret = PyList_Append(self->pending_bytes, b);
         Py_DECREF(b);
+        if (ret < 0) {
+            return NULL;
+        }
     }
 
     self->pending_bytes_count += bytes_len;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to