https://github.com/python/cpython/commit/732224e1139f7ed4fe0259a2dad900f84910949e commit: 732224e1139f7ed4fe0259a2dad900f84910949e branch: main author: Cody Maloney <[email protected]> committer: encukou <[email protected]> date: 2025-11-13T13:19:44Z summary:
gh-139871: Add `bytearray.take_bytes([n])` to efficiently extract `bytes` (GH-140128) Update `bytearray` to contain a `bytes` and provide a zero-copy path to "extract" the `bytes`. This allows making several code paths more efficient. This does not move any codepaths to make use of this new API. The documentation changes include common code patterns which can be made more efficient with this API. --- When just changing `bytearray` to contain `bytes` I ran pyperformance on a `--with-lto --enable-optimizations --with-static-libpython` build and don't see any major speedups or slowdowns with this; all seems to be in the noise of my machine (Generally changes under 5% or benchmarks that don't touch bytes/bytearray). Co-authored-by: Victor Stinner <[email protected]> Co-authored-by: Maurycy Pawłowski-Wieroński <[email protected]> files: A Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst M Doc/library/stdtypes.rst M Doc/whatsnew/3.15.rst M Include/cpython/bytearrayobject.h M Include/internal/pycore_bytesobject.h M Lib/test/test_bytes.py M Lib/test/test_capi/test_bytearray.py M Lib/test/test_sys.py M Objects/bytearrayobject.c M Objects/bytesobject.c M Objects/clinic/bytearrayobject.c.h diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 97e7e08364e0bd..c539345e598777 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -3173,6 +3173,30 @@ objects. .. versionadded:: 3.14 + .. method:: take_bytes(n=None, /) + + Remove the first *n* bytes from the bytearray and return them as an immutable + :class:`bytes`. + By default (if *n* is ``None``), return all bytes and clear the bytearray. + + If *n* is negative, index from the end and take the first :func:`len` + plus *n* bytes. If *n* is out of bounds, raise :exc:`IndexError`. + + Taking less than the full length will leave remaining bytes in the + :class:`bytearray`, which requires a copy. If the remaining bytes should be + discarded, use :func:`~bytearray.resize` or :keyword:`del` to truncate + then :func:`~bytearray.take_bytes` without a size. + + .. impl-detail:: + + Taking all bytes is a zero-copy operation. + + .. versionadded:: next + + See the :ref:`What's New <whatsnew315-bytearray-take-bytes>` entry for + common code patterns which can be optimized with + :func:`bytearray.take_bytes`. + Since bytearray objects are sequences of integers (akin to a list), for a bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytearray object of length 1. (This contrasts with text strings, where diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 3cb766978a7217..d7c9a41eeb2759 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -307,6 +307,86 @@ Other language changes not only integers or floats, although this does not improve precision. (Contributed by Serhiy Storchaka in :gh:`67795`.) +.. _whatsnew315-bytearray-take-bytes: + +* Added :meth:`bytearray.take_bytes(n=None, /) <bytearray.take_bytes>` to take + bytes out of a :class:`bytearray` without copying. This enables optimizing code + which must return :class:`bytes` after working with a mutable buffer of bytes + such as data buffering, network protocol parsing, encoding, decoding, + and compression. Common code patterns which can be optimized with + :func:`~bytearray.take_bytes` are listed below. + + (Contributed by Cody Maloney in :gh:`139871`.) + + .. list-table:: Suggested Optimizing Refactors + :header-rows: 1 + + * - Description + - Old + - New + + * - Return :class:`bytes` after working with :class:`bytearray` + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return bytes(buffer) + + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return buffer.take_bytes() + + * - Empty a buffer getting the bytes + - .. code:: python + + buffer = bytearray(1024) + ... + data = bytes(buffer) + buffer.clear() + + - .. code:: python + + buffer = bytearray(1024) + ... + data = buffer.take_bytes() + + * - Split a buffer at a specific separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n + 1]) + del buffer[:n + 1] + assert data == b'abc' + assert buffer == bytearray(b'def') + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = buffer.take_bytes(n + 1) + + * - Split a buffer at a specific separator; discard after the separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n]) + buffer.clear() + assert data == b'abc' + assert len(buffer) == 0 + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + buffer.resize(n) + data = buffer.take_bytes() + * Many functions related to compiling or parsing Python code, such as :func:`compile`, :func:`ast.parse`, :func:`symtable.symtable`, and :func:`importlib.abc.InspectLoader.source_to_code`, now allow to pass diff --git a/Include/cpython/bytearrayobject.h b/Include/cpython/bytearrayobject.h index 4dddef713ce097..1edd082074206c 100644 --- a/Include/cpython/bytearrayobject.h +++ b/Include/cpython/bytearrayobject.h @@ -5,25 +5,25 @@ /* Object layout */ typedef struct { PyObject_VAR_HEAD - Py_ssize_t ob_alloc; /* How many bytes allocated in ob_bytes */ + /* How many bytes allocated in ob_bytes + + In the current implementation this is equivalent to Py_SIZE(ob_bytes_object). + The value is always loaded and stored atomically for thread safety. + There are API compatibilty concerns with removing so keeping for now. */ + Py_ssize_t ob_alloc; char *ob_bytes; /* Physical backing buffer */ char *ob_start; /* Logical start inside ob_bytes */ Py_ssize_t ob_exports; /* How many buffer exports */ + PyObject *ob_bytes_object; /* PyBytes for zero-copy bytes conversion */ } PyByteArrayObject; -PyAPI_DATA(char) _PyByteArray_empty_string[]; - /* Macros and static inline functions, trading safety for speed */ #define _PyByteArray_CAST(op) \ (assert(PyByteArray_Check(op)), _Py_CAST(PyByteArrayObject*, op)) static inline char* PyByteArray_AS_STRING(PyObject *op) { - PyByteArrayObject *self = _PyByteArray_CAST(op); - if (Py_SIZE(self)) { - return self->ob_start; - } - return _PyByteArray_empty_string; + return _PyByteArray_CAST(op)->ob_start; } #define PyByteArray_AS_STRING(self) PyByteArray_AS_STRING(_PyObject_CAST(self)) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index c7bc53b6073770..8e8fa696ee0350 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -60,6 +60,14 @@ PyAPI_FUNC(void) _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src); +/* _PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation + for a bytes object of length n should request PyBytesObject_SIZE + n bytes. + + Using _PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves + 3 or 7 bytes per bytes object allocation on a typical system. +*/ +#define _PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) + /* --- PyBytesWriter ------------------------------------------------------ */ struct PyBytesWriter { diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index e012042159d223..86898bfcab9135 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1397,6 +1397,16 @@ def test_clear(self): b.append(ord('p')) self.assertEqual(b, b'p') + # Cleared object should be empty. + b = bytearray(b'abc') + b.clear() + self.assertEqual(b.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(b), base_size) + c = b.copy() + self.assertEqual(c.__alloc__(), 0) + self.assertEqual(sys.getsizeof(c), base_size) + def test_copy(self): b = bytearray(b'abc') bb = b.copy() @@ -1458,6 +1468,61 @@ def test_resize(self): self.assertRaises(MemoryError, bytearray().resize, sys.maxsize) self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize) + def test_take_bytes(self): + ba = bytearray(b'ab') + self.assertEqual(ba.take_bytes(), b'ab') + self.assertEqual(len(ba), 0) + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(ba.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(ba), base_size) + + # Positive and negative slicing. + ba = bytearray(b'abcdef') + self.assertEqual(ba.take_bytes(1), b'a') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-5), b'') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-3), b'bc') + self.assertEqual(ba, bytearray(b'def')) + self.assertEqual(len(ba), 3) + self.assertEqual(ba.take_bytes(3), b'def') + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(len(ba), 0) + + # Take nothing from emptiness. + self.assertEqual(ba.take_bytes(0), b'') + self.assertEqual(ba.take_bytes(), b'') + self.assertEqual(ba.take_bytes(None), b'') + + # Out of bounds, bad take value. + self.assertRaises(IndexError, ba.take_bytes, -1) + self.assertRaises(TypeError, ba.take_bytes, 3.14) + ba = bytearray(b'abcdef') + self.assertRaises(IndexError, ba.take_bytes, 7) + + # Offset between physical and logical start (ob_bytes != ob_start). + ba = bytearray(b'abcde') + del ba[:2] + self.assertEqual(ba, bytearray(b'cde')) + self.assertEqual(ba.take_bytes(), b'cde') + + # Overallocation at end. + ba = bytearray(b'abcde') + del ba[-2:] + self.assertEqual(ba, bytearray(b'abc')) + self.assertEqual(ba.take_bytes(), b'abc') + ba = bytearray(b'abcde') + ba.resize(4) + self.assertEqual(ba.take_bytes(), b'abcd') + + # Take of a bytearray with references should fail. + ba = bytearray(b'abc') + with memoryview(ba) as mv: + self.assertRaises(BufferError, ba.take_bytes) + self.assertEqual(ba.take_bytes(), b'abc') def test_setitem(self): def setitem_as_mapping(b, i, val): @@ -2564,6 +2629,18 @@ def zfill(b, a): c = a.zfill(0x400000) assert not c or c[-1] not in (0xdd, 0xcd) + def take_bytes(b, a): # MODIFIES! + b.wait() + c = a.take_bytes() + assert not c or c[0] == 48 # '0' + + def take_bytes_n(b, a): # MODIFIES! + b.wait() + try: + c = a.take_bytes(10) + assert c == b'0123456789' + except IndexError: pass + def check(funcs, a=None, *args): if a is None: a = bytearray(b'0' * 0x400000) @@ -2625,6 +2702,10 @@ def check(funcs, a=None, *args): check([clear] + [startswith] * 10) check([clear] + [strip] * 10) + check([clear] + [take_bytes] * 10) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 0x400)) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 5)) + check([clear] + [contains] * 10) check([clear] + [subscript] * 10) check([clear2] + [ass_subscript2] * 10, None, bytearray(b'0' * 0x400000)) diff --git a/Lib/test/test_capi/test_bytearray.py b/Lib/test/test_capi/test_bytearray.py index 52565ea34c61b8..cb7ad8b22252d9 100644 --- a/Lib/test/test_capi/test_bytearray.py +++ b/Lib/test/test_capi/test_bytearray.py @@ -1,3 +1,4 @@ +import sys import unittest from test.support import import_helper @@ -55,7 +56,9 @@ def test_fromstringandsize(self): self.assertEqual(fromstringandsize(b'', 0), bytearray()) self.assertEqual(fromstringandsize(NULL, 0), bytearray()) self.assertEqual(len(fromstringandsize(NULL, 3)), 3) - self.assertRaises(MemoryError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, + PY_SSIZE_T_MAX-sys.getsizeof(b'') + 1) self.assertRaises(SystemError, fromstringandsize, b'abc', -1) self.assertRaises(SystemError, fromstringandsize, b'abc', PY_SSIZE_T_MIN) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 3ceed019ac43cf..9d3248d972e8d1 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1583,7 +1583,7 @@ def test_objecttypes(self): samples = [b'', b'u'*100000] for sample in samples: x = bytearray(sample) - check(x, vsize('n2Pi') + x.__alloc__()) + check(x, vsize('n2PiP') + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size('nP')) # bytes diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst new file mode 100644 index 00000000000000..d4b8578afe3afc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst @@ -0,0 +1,2 @@ +Update :class:`bytearray` to use a :class:`bytes` under the hood as its buffer +and add :func:`bytearray.take_bytes` to take it out. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index a73bfff340ce48..99bfdec89f6c3a 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -17,8 +17,8 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=5535b77c37a119e0]*/ -/* For PyByteArray_AS_STRING(). */ -char _PyByteArray_empty_string[] = ""; +/* Max number of bytes a bytearray can contain */ +#define PyByteArray_SIZE_MAX ((Py_ssize_t)(PY_SSIZE_T_MAX - _PyBytesObject_SIZE)) /* Helpers */ @@ -43,6 +43,14 @@ _getbytevalue(PyObject* arg, int *value) return 1; } +static void +bytearray_reinit_from_bytes(PyByteArrayObject *self, Py_ssize_t size, + Py_ssize_t alloc) { + self->ob_bytes = self->ob_start = PyBytes_AS_STRING(self->ob_bytes_object); + Py_SET_SIZE(self, size); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, alloc); +} + static int bytearray_getbuffer_lock_held(PyObject *self, Py_buffer *view, int flags) { @@ -127,7 +135,6 @@ PyObject * PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) { PyByteArrayObject *new; - Py_ssize_t alloc; if (size < 0) { PyErr_SetString(PyExc_SystemError, @@ -135,34 +142,31 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) return NULL; } - /* Prevent buffer overflow when setting alloc to size+1. */ - if (size == PY_SSIZE_T_MAX) { - return PyErr_NoMemory(); - } - new = PyObject_New(PyByteArrayObject, &PyByteArray_Type); - if (new == NULL) + if (new == NULL) { return NULL; + } + + /* Fill values used in bytearray_dealloc. + + In an optimized build the memory isn't zeroed and ob_exports would be + uninitialized when when PyBytes_FromStringAndSize errored leading to + intermittent test failures. */ + new->ob_exports = 0; + + /* Optimization: size=0 bytearray should not allocate space - if (size == 0) { - new->ob_bytes = NULL; - alloc = 0; + PyBytes_FromStringAndSize returns the empty bytes global when size=0 so + no allocation occurs. */ + new->ob_bytes_object = PyBytes_FromStringAndSize(NULL, size); + if (new->ob_bytes_object == NULL) { + Py_DECREF(new); + return NULL; } - else { - alloc = size + 1; - new->ob_bytes = PyMem_Malloc(alloc); - if (new->ob_bytes == NULL) { - Py_DECREF(new); - return PyErr_NoMemory(); - } - if (bytes != NULL && size > 0) - memcpy(new->ob_bytes, bytes, size); - new->ob_bytes[size] = '\0'; /* Trailing null byte */ + bytearray_reinit_from_bytes(new, size, size); + if (bytes != NULL && size > 0) { + memcpy(new->ob_bytes, bytes, size); } - Py_SET_SIZE(new, size); - new->ob_alloc = alloc; - new->ob_start = new->ob_bytes; - new->ob_exports = 0; return (PyObject *)new; } @@ -189,7 +193,6 @@ static int bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); - void *sval; PyByteArrayObject *obj = ((PyByteArrayObject *)self); /* All computations are done unsigned to avoid integer overflows (see issue #22335). */ @@ -214,16 +217,17 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) return -1; } - if (size + logical_offset + 1 <= alloc) { + if (size + logical_offset <= alloc) { /* Current buffer is large enough to host the requested size, decide on a strategy. */ if (size < alloc / 2) { /* Major downsize; resize down to exact size */ - alloc = size + 1; + alloc = size; } else { /* Minor downsize; quick exit */ Py_SET_SIZE(self, size); + /* Add mid-buffer null; end provided by bytes. */ PyByteArray_AS_STRING(self)[size] = '\0'; /* Trailing null */ return 0; } @@ -236,38 +240,36 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) } else { /* Major upsize; resize up to exact size */ - alloc = size + 1; + alloc = size; } } - if (alloc > PY_SSIZE_T_MAX) { + if (alloc > PyByteArray_SIZE_MAX) { PyErr_NoMemory(); return -1; } + /* Re-align data to the start of the allocation. */ if (logical_offset > 0) { - sval = PyMem_Malloc(alloc); - if (sval == NULL) { - PyErr_NoMemory(); - return -1; - } - memcpy(sval, PyByteArray_AS_STRING(self), - Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self))); - PyMem_Free(obj->ob_bytes); - } - else { - sval = PyMem_Realloc(obj->ob_bytes, alloc); - if (sval == NULL) { - PyErr_NoMemory(); - return -1; - } + /* optimization tradeoff: This is faster than a new allocation when + the number of bytes being removed in a resize is small; for large + size changes it may be better to just make a new bytes object as + _PyBytes_Resize will do a malloc + memcpy internally. */ + memmove(obj->ob_bytes, obj->ob_start, + Py_MIN(requested_size, Py_SIZE(self))); } - obj->ob_bytes = obj->ob_start = sval; - Py_SET_SIZE(self, size); - FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc); - obj->ob_bytes[size] = '\0'; /* Trailing null byte */ + int ret = _PyBytes_Resize(&obj->ob_bytes_object, alloc); + if (ret == -1) { + obj->ob_bytes_object = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + size = alloc = 0; + } + bytearray_reinit_from_bytes(obj, size, alloc); + if (alloc != size) { + /* Add mid-buffer null; end provided by bytes. */ + obj->ob_bytes[size] = '\0'; + } - return 0; + return ret; } int @@ -295,7 +297,7 @@ PyByteArray_Concat(PyObject *a, PyObject *b) goto done; } - if (va.len > PY_SSIZE_T_MAX - vb.len) { + if (va.len > PyByteArray_SIZE_MAX - vb.len) { PyErr_NoMemory(); goto done; } @@ -339,7 +341,7 @@ bytearray_iconcat_lock_held(PyObject *op, PyObject *other) } Py_ssize_t size = Py_SIZE(self); - if (size > PY_SSIZE_T_MAX - vo.len) { + if (size > PyByteArray_SIZE_MAX - vo.len) { PyBuffer_Release(&vo); return PyErr_NoMemory(); } @@ -373,7 +375,7 @@ bytearray_repeat_lock_held(PyObject *op, Py_ssize_t count) count = 0; } const Py_ssize_t mysize = Py_SIZE(self); - if (count > 0 && mysize > PY_SSIZE_T_MAX / count) { + if (count > 0 && mysize > PyByteArray_SIZE_MAX / count) { return PyErr_NoMemory(); } Py_ssize_t size = mysize * count; @@ -409,7 +411,7 @@ bytearray_irepeat_lock_held(PyObject *op, Py_ssize_t count) } const Py_ssize_t mysize = Py_SIZE(self); - if (count > 0 && mysize > PY_SSIZE_T_MAX / count) { + if (count > 0 && mysize > PyByteArray_SIZE_MAX / count) { return PyErr_NoMemory(); } const Py_ssize_t size = mysize * count; @@ -585,7 +587,7 @@ bytearray_setslice_linear(PyByteArrayObject *self, buf = PyByteArray_AS_STRING(self); } else if (growth > 0) { - if (Py_SIZE(self) > (Py_ssize_t)PY_SSIZE_T_MAX - growth) { + if (Py_SIZE(self) > PyByteArray_SIZE_MAX - growth) { PyErr_NoMemory(); return -1; } @@ -899,6 +901,13 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, PyObject *it; PyObject *(*iternext)(PyObject *); + /* First __init__; set ob_bytes_object so ob_bytes is always non-null. */ + if (self->ob_bytes_object == NULL) { + self->ob_bytes_object = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + bytearray_reinit_from_bytes(self, 0, 0); + self->ob_exports = 0; + } + if (Py_SIZE(self) != 0) { /* Empty previous contents (yes, do this first of all!) */ if (PyByteArray_Resize((PyObject *)self, 0) < 0) @@ -1169,9 +1178,7 @@ bytearray_dealloc(PyObject *op) "deallocated bytearray object has exported buffers"); PyErr_Print(); } - if (self->ob_bytes != 0) { - PyMem_Free(self->ob_bytes); - } + Py_XDECREF(self->ob_bytes_object); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1491,6 +1498,82 @@ bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size) } +/*[clinic input] +@critical_section +bytearray.take_bytes + n: object = None + Bytes to take, negative indexes from end. None indicates all bytes. + / +Take *n* bytes from the bytearray and return them as a bytes object. +[clinic start generated code]*/ + +static PyObject * +bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n) +/*[clinic end generated code: output=3147fbc0bbbe8d94 input=b15b5172cdc6deda]*/ +{ + Py_ssize_t to_take; + Py_ssize_t size = Py_SIZE(self); + if (Py_IsNone(n)) { + to_take = size; + } + // Integer index, from start (zero, positive) or end (negative). + else if (_PyIndex_Check(n)) { + to_take = PyNumber_AsSsize_t(n, PyExc_IndexError); + if (to_take == -1 && PyErr_Occurred()) { + return NULL; + } + if (to_take < 0) { + to_take += size; + } + } + else { + PyErr_SetString(PyExc_TypeError, "n must be an integer or None"); + return NULL; + } + + if (to_take < 0 || to_take > size) { + PyErr_Format(PyExc_IndexError, + "can't take %zd bytes outside size %zd", + to_take, size); + return NULL; + } + + // Exports may change the contents. No mutable bytes allowed. + if (!_canresize(self)) { + return NULL; + } + + if (to_take == 0 || size == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + + // Copy remaining bytes to a new bytes. + Py_ssize_t remaining_length = size - to_take; + PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take, + remaining_length); + if (remaining == NULL) { + return NULL; + } + + // If the bytes are offset inside the buffer must first align. + if (self->ob_start != self->ob_bytes) { + memmove(self->ob_bytes, self->ob_start, to_take); + self->ob_start = self->ob_bytes; + } + + if (_PyBytes_Resize(&self->ob_bytes_object, to_take) == -1) { + Py_DECREF(remaining); + return NULL; + } + + // Point the bytearray towards the buffer with the remaining data. + PyObject *result = self->ob_bytes_object; + self->ob_bytes_object = remaining; + bytearray_reinit_from_bytes(self, remaining_length, remaining_length); + return result; +} + + /*[clinic input] @critical_section bytearray.translate @@ -1868,11 +1951,6 @@ bytearray_insert_impl(PyByteArrayObject *self, Py_ssize_t index, int item) Py_ssize_t n = Py_SIZE(self); char *buf; - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to bytearray"); - return NULL; - } if (bytearray_resize_lock_held((PyObject *)self, n + 1) < 0) return NULL; buf = PyByteArray_AS_STRING(self); @@ -1987,11 +2065,6 @@ bytearray_append_impl(PyByteArrayObject *self, int item) { Py_ssize_t n = Py_SIZE(self); - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to bytearray"); - return NULL; - } if (bytearray_resize_lock_held((PyObject *)self, n + 1) < 0) return NULL; @@ -2099,16 +2172,16 @@ bytearray_extend_impl(PyByteArrayObject *self, PyObject *iterable_of_ints) if (len >= buf_size) { Py_ssize_t addition; - if (len == PY_SSIZE_T_MAX) { + if (len == PyByteArray_SIZE_MAX) { Py_DECREF(it); Py_DECREF(bytearray_obj); return PyErr_NoMemory(); } addition = len >> 1; - if (addition > PY_SSIZE_T_MAX - len - 1) - buf_size = PY_SSIZE_T_MAX; + if (addition > PyByteArray_SIZE_MAX - len) + buf_size = PyByteArray_SIZE_MAX; else - buf_size = len + addition + 1; + buf_size = len + addition; if (bytearray_resize_lock_held((PyObject *)bytearray_obj, buf_size) < 0) { Py_DECREF(it); Py_DECREF(bytearray_obj); @@ -2405,7 +2478,11 @@ static PyObject * bytearray_alloc(PyObject *op, PyObject *Py_UNUSED(ignored)) { PyByteArrayObject *self = _PyByteArray_CAST(op); - return PyLong_FromSsize_t(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc)); + Py_ssize_t alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc); + if (alloc > 0) { + alloc += _PyBytesObject_SIZE; + } + return PyLong_FromSsize_t(alloc); } /*[clinic input] @@ -2601,9 +2678,13 @@ static PyObject * bytearray_sizeof_impl(PyByteArrayObject *self) /*[clinic end generated code: output=738abdd17951c427 input=e27320fd98a4bc5a]*/ { - size_t res = _PyObject_SIZE(Py_TYPE(self)); - res += (size_t)FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc) * sizeof(char); - return PyLong_FromSize_t(res); + Py_ssize_t res = _PyObject_SIZE(Py_TYPE(self)); + Py_ssize_t alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc); + if (alloc > 0) { + res += _PyBytesObject_SIZE + alloc; + } + + return PyLong_FromSsize_t(res); } static PySequenceMethods bytearray_as_sequence = { @@ -2686,6 +2767,7 @@ static PyMethodDef bytearray_methods[] = { BYTEARRAY_STARTSWITH_METHODDEF BYTEARRAY_STRIP_METHODDEF {"swapcase", bytearray_swapcase, METH_NOARGS, _Py_swapcase__doc__}, + BYTEARRAY_TAKE_BYTES_METHODDEF {"title", bytearray_title, METH_NOARGS, _Py_title__doc__}, BYTEARRAY_TRANSLATE_METHODDEF {"upper", bytearray_upper, METH_NOARGS, _Py_upper__doc__}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2b9513abe91956..2b0925017f29e4 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -25,13 +25,7 @@ class bytes "PyBytesObject *" "&PyBytes_Type" #include "clinic/bytesobject.c.h" -/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation - for a bytes object of length n should request PyBytesObject_SIZE + n bytes. - - Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves - 3 or 7 bytes per bytes object allocation on a typical system. -*/ -#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) +#define PyBytesObject_SIZE _PyBytesObject_SIZE /* Forward declaration */ static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 6f13865177dde5..be704ccf68f669 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -631,6 +631,43 @@ bytearray_resize(PyObject *self, PyObject *arg) return return_value; } +PyDoc_STRVAR(bytearray_take_bytes__doc__, +"take_bytes($self, n=None, /)\n" +"--\n" +"\n" +"Take *n* bytes from the bytearray and return them as a bytes object.\n" +"\n" +" n\n" +" Bytes to take, negative indexes from end. None indicates all bytes."); + +#define BYTEARRAY_TAKE_BYTES_METHODDEF \ + {"take_bytes", _PyCFunction_CAST(bytearray_take_bytes), METH_FASTCALL, bytearray_take_bytes__doc__}, + +static PyObject * +bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n); + +static PyObject * +bytearray_take_bytes(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *n = Py_None; + + if (!_PyArg_CheckPositional("take_bytes", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + n = args[0]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = bytearray_take_bytes_impl((PyByteArrayObject *)self, n); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(bytearray_translate__doc__, "translate($self, table, /, delete=b\'\')\n" "--\n" @@ -1796,4 +1833,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl((PyByteArrayObject *)self); } -/*[clinic end generated code: output=fdfe41139c91e409 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5eddefde2a001ceb input=a9049054013a1b77]*/ _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
