https://github.com/python/cpython/commit/e265ce8a563ba7f91c5ada0592de8cb85622b433
commit: e265ce8a563ba7f91c5ada0592de8cb85622b433
branch: main
author: Cody Maloney <[email protected]>
committer: encukou <[email protected]>
date: 2025-11-20T08:49:05+01:00
summary:
gh-139871: Optimize small takes in bytearray.take_bytes (GH-141741)
When less than half the buffer is taken just copy that small part out
rather than doing a big alloc + memmove + big shrink.
files:
M Lib/test/test_bytes.py
M Objects/bytearrayobject.c
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 86898bfcab9135..7ca38bb8c8421e 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -1524,6 +1524,32 @@ def test_take_bytes(self):
self.assertRaises(BufferError, ba.take_bytes)
self.assertEqual(ba.take_bytes(), b'abc')
+ @support.cpython_only # tests an implementation detail
+ def test_take_bytes_optimization(self):
+ # Validate optimization around taking lots of little chunks out of a
+ # much bigger buffer. Save work by only copying a little rather than
+ # moving a lot.
+ ba = bytearray(b'abcdef' + b'0' * 1000)
+ start_alloc = ba.__alloc__()
+
+ # Take two bytes at a time, checking alloc doesn't change.
+ self.assertEqual(ba.take_bytes(2), b'ab')
+ self.assertEqual(ba.__alloc__(), start_alloc)
+ self.assertEqual(len(ba), 4 + 1000)
+ self.assertEqual(ba.take_bytes(2), b'cd')
+ self.assertEqual(ba.__alloc__(), start_alloc)
+ self.assertEqual(len(ba), 2 + 1000)
+ self.assertEqual(ba.take_bytes(2), b'ef')
+ self.assertEqual(ba.__alloc__(), start_alloc)
+ self.assertEqual(len(ba), 0 + 1000)
+ self.assertEqual(ba.__alloc__(), start_alloc)
+
+ # Take over half, alloc shrinks to exact size.
+ self.assertEqual(ba.take_bytes(501), b'0' * 501)
+ self.assertEqual(len(ba), 499)
+ bytes_header_size = sys.getsizeof(b'')
+ self.assertEqual(ba.__alloc__(), 499 + bytes_header_size)
+
def test_setitem(self):
def setitem_as_mapping(b, i, val):
b[i] = val
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 99bfdec89f6c3a..99e1c9b13f7879 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self,
PyObject *n)
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}
- // Copy remaining bytes to a new bytes.
Py_ssize_t remaining_length = size - to_take;
+ // optimization: If taking less than leaving, just copy the small to_take
+ // portion out and move ob_start.
+ if (to_take < remaining_length) {
+ PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take);
+ if (ret == NULL) {
+ return NULL;
+ }
+ self->ob_start += to_take;
+ Py_SET_SIZE(self, remaining_length);
+ return ret;
+ }
+
+ // Copy remaining bytes to a new bytes.
PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
remaining_length);
if (remaining == NULL) {
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]