https://github.com/python/cpython/commit/f927204f64b3f8dbecec784e05bc8e25d2a78b2e
commit: f927204f64b3f8dbecec784e05bc8e25d2a78b2e
branch: main
author: Cody Maloney <[email protected]>
committer: vstinner <[email protected]>
date: 2025-01-30T11:14:23Z
summary:
gh-129005: Align FileIO.readall() allocation (#129458)
Both now use a pre-allocated buffer of length `bufsize`, fill it using
a readinto(), and have matching "expand buffer" logic.
On my machine this takes:
`./python -m test -M8g -uall test_largefile -m test_large_read -v`
from ~3.7 seconds to ~3.4 seconds.
files:
A Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst
M Lib/_pyio.py
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 023478aa78c6a0..76a27910da4d5f 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1674,22 +1674,31 @@ def readall(self):
except OSError:
pass
- result = bytearray()
+ result = bytearray(bufsize)
+ bytes_read = 0
while True:
- if len(result) >= bufsize:
- bufsize = len(result)
- bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
- n = bufsize - len(result)
+ if bytes_read >= bufsize:
+ # Parallels _io/fileio.c new_buffersize
+ if bufsize > 65536:
+ addend = bufsize >> 3
+ else:
+ addend = bufsize + 256
+ if addend < DEFAULT_BUFFER_SIZE:
+ addend = DEFAULT_BUFFER_SIZE
+ bufsize += addend
+ result[bytes_read:bufsize] = b'\0'
+ assert bufsize - bytes_read > 0, "Should always try and read at
least one byte"
try:
- chunk = os.read(self._fd, n)
+ n = os.readinto(self._fd, memoryview(result)[bytes_read:])
except BlockingIOError:
- if result:
+ if bytes_read > 0:
break
return None
- if not chunk: # reached the end of the file
+ if n == 0: # reached the end of the file
break
- result += chunk
+ bytes_read += n
+ del result[bytes_read:]
return bytes(result)
def readinto(self, buffer):
diff --git
a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst
b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst
new file mode 100644
index 00000000000000..c76fb05e196f87
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst
@@ -0,0 +1,2 @@
+``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer
using
+the same algorithm ``_io.FileIO.readall()`` uses.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]