https://github.com/python/cpython/commit/0c1a42cf9c8cd0d4534d5c1d58f118ce7c5c446e
commit: 0c1a42cf9c8cd0d4534d5c1d58f118ce7c5c446e
branch: main
author: Serhiy Storchaka <[email protected]>
committer: encukou <[email protected]>
date: 2024-03-25T16:32:11+01:00
summary:

gh-87193: Support bytes objects with refcount > 1 in _PyBytes_Resize() 
(GH-117160)

Create a new bytes object and destroy the old one if it has refcount > 1.

files:
A Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst
A Modules/_testcapi/bytes.c
M Doc/c-api/bytes.rst
M Lib/test/test_capi/test_bytes.py
M Modules/Setup.stdlib.in
M Modules/_testcapi/parts.h
M Modules/_testcapimodule.c
M Objects/bytesobject.c
M Objects/fileobject.c
M PCbuild/_testcapi.vcxproj
M PCbuild/_testcapi.vcxproj.filters

diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst
index 4790d3b2da4375..bca78a9c369385 100644
--- a/Doc/c-api/bytes.rst
+++ b/Doc/c-api/bytes.rst
@@ -191,10 +191,10 @@ called with a non-bytes parameter.
 
 .. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize)
 
-   A way to resize a bytes object even though it is "immutable". Only use this
-   to build up a brand new bytes object; don't use this if the bytes may 
already
-   be known in other parts of the code.  It is an error to call this function 
if
-   the refcount on the input bytes object is not one. Pass the address of an
+   Resize a bytes object. *newsize* will be the new length of the bytes object.
+   You can think of it as creating a new bytes object and destroying the old
+   one, only more efficiently.
+   Pass the address of an
    existing bytes object as an lvalue (it may be written into), and the new 
size
    desired.  On success, *\*bytes* holds the resized bytes object and ``0`` is
    returned; the address in *\*bytes* may differ from its input value.  If the
diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py
index a2ba7708f8fd26..f14d5545c829e5 100644
--- a/Lib/test/test_capi/test_bytes.py
+++ b/Lib/test/test_capi/test_bytes.py
@@ -2,6 +2,7 @@
 from test.support import import_helper
 
 _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
+_testcapi = import_helper.import_module('_testcapi')
 from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX
 
 NULL = None
@@ -217,6 +218,35 @@ def test_decodeescape(self):
         # CRASHES decodeescape(b'abc', NULL, -1)
         # CRASHES decodeescape(NULL, NULL, 1)
 
+    def test_resize(self):
+        """Test _PyBytes_Resize()"""
+        resize = _testcapi.bytes_resize
+
+        for new in True, False:
+            self.assertEqual(resize(b'abc', 0, new), b'')
+            self.assertEqual(resize(b'abc', 1, new), b'a')
+            self.assertEqual(resize(b'abc', 2, new), b'ab')
+            self.assertEqual(resize(b'abc', 3, new), b'abc')
+            b = resize(b'abc', 4, new)
+            self.assertEqual(len(b), 4)
+            self.assertEqual(b[:3], b'abc')
+
+            self.assertEqual(resize(b'a', 0, new), b'')
+            self.assertEqual(resize(b'a', 1, new), b'a')
+            b = resize(b'a', 2, new)
+            self.assertEqual(len(b), 2)
+            self.assertEqual(b[:1], b'a')
+
+            self.assertEqual(resize(b'', 0, new), b'')
+            self.assertEqual(len(resize(b'', 1, new)), 1)
+            self.assertEqual(len(resize(b'', 2, new)), 2)
+
+        self.assertRaises(SystemError, resize, b'abc', -1, False)
+        self.assertRaises(SystemError, resize, bytearray(b'abc'), 3, False)
+
+        # CRASHES resize(NULL, 0, False)
+        # CRASHES resize(NULL, 3, False)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/C 
API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst b/Misc/NEWS.d/next/C 
API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst
new file mode 100644
index 00000000000000..cb921a9c7bf36e
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst      
@@ -0,0 +1,3 @@
+:c:func:`_PyBytes_Resize` can now be called for bytes objects with reference
+count > 1, including 1-byte bytes objects. It creates a new bytes object and
+destroys the old one if it has reference count > 1.
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 09d6f3b2bb7e8d..ff5c05f88d0d40 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -162,7 +162,7 @@
 @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c 
_xxtestfuzz/fuzzer.c
 @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
 @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c 
_testinternalcapi/test_lock.c _testinternalcapi/pytime.c 
_testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
-@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c 
_testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c 
_testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c 
_testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c 
_testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c 
_testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c 
_testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c 
_testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c
+@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c 
_testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c 
_testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c 
_testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c 
_testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c 
_testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c 
_testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c 
_testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c 
_testcapi/bytes.c
 @MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c 
_testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c 
_testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c 
_testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c 
_testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c 
_testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c 
_testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
 @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
 @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c
new file mode 100644
index 00000000000000..02294d8887abb7
--- /dev/null
+++ b/Modules/_testcapi/bytes.c
@@ -0,0 +1,53 @@
+#include "parts.h"
+#include "util.h"
+
+
+/* Test _PyBytes_Resize() */
+static PyObject *
+bytes_resize(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *obj;
+    Py_ssize_t newsize;
+    int new;
+
+    if (!PyArg_ParseTuple(args, "Onp", &obj, &newsize, &new))
+        return NULL;
+
+    NULLABLE(obj);
+    if (new) {
+        assert(obj != NULL);
+        assert(PyBytes_CheckExact(obj));
+        PyObject *newobj = PyBytes_FromStringAndSize(NULL, PyBytes_Size(obj));
+        if (newobj == NULL) {
+            return NULL;
+        }
+        memcpy(PyBytes_AsString(newobj), PyBytes_AsString(obj), 
PyBytes_Size(obj));
+        obj = newobj;
+    }
+    else {
+        Py_XINCREF(obj);
+    }
+    if (_PyBytes_Resize(&obj, newsize) < 0) {
+        assert(obj == NULL);
+    }
+    else {
+        assert(obj != NULL);
+    }
+    return obj;
+}
+
+
+static PyMethodDef test_methods[] = {
+    {"bytes_resize", bytes_resize, METH_VARARGS},
+    {NULL},
+};
+
+int
+_PyTestCapi_Init_Bytes(PyObject *m)
+{
+    if (PyModule_AddFunctions(m, test_methods) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h
index f9bdd830775a75..e7c868f6bcff6e 100644
--- a/Modules/_testcapi/parts.h
+++ b/Modules/_testcapi/parts.h
@@ -31,6 +31,7 @@
 int _PyTestCapi_Init_Vectorcall(PyObject *module);
 int _PyTestCapi_Init_Heaptype(PyObject *module);
 int _PyTestCapi_Init_Abstract(PyObject *module);
+int _PyTestCapi_Init_Bytes(PyObject *module);
 int _PyTestCapi_Init_Unicode(PyObject *module);
 int _PyTestCapi_Init_GetArgs(PyObject *module);
 int _PyTestCapi_Init_DateTime(PyObject *module);
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 16b5e1d257eed2..3c30381be6d538 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -3971,6 +3971,9 @@ PyInit__testcapi(void)
     if (_PyTestCapi_Init_Abstract(m) < 0) {
         return NULL;
     }
+    if (_PyTestCapi_Init_Bytes(m) < 0) {
+        return NULL;
+    }
     if (_PyTestCapi_Init_Unicode(m) < 0) {
         return NULL;
     }
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 26227dd251122d..256e01f54f0782 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3025,11 +3025,9 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
 
 
 /* The following function breaks the notion that bytes are immutable:
-   it changes the size of a bytes object.  We get away with this only if there
-   is only one module referencing the object.  You can also think of it
+   it changes the size of a bytes object.  You can think of it
    as creating a new bytes object and destroying the old one, only
-   more efficiently.  In any case, don't use this if the bytes object may
-   already be known to some other part of the code...
+   more efficiently.
    Note that if there's not enough memory to resize the bytes object, the
    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
    memory" exception is set, and -1 is returned.  Else (on success) 0 is
@@ -3045,28 +3043,40 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
     PyBytesObject *sv;
     v = *pv;
     if (!PyBytes_Check(v) || newsize < 0) {
-        goto error;
+        *pv = 0;
+        Py_DECREF(v);
+        PyErr_BadInternalCall();
+        return -1;
     }
-    if (Py_SIZE(v) == newsize) {
+    Py_ssize_t oldsize = PyBytes_GET_SIZE(v);
+    if (oldsize == newsize) {
         /* return early if newsize equals to v->ob_size */
         return 0;
     }
-    if (Py_SIZE(v) == 0) {
-        if (newsize == 0) {
-            return 0;
-        }
+    if (oldsize == 0) {
         *pv = _PyBytes_FromSize(newsize, 0);
         Py_DECREF(v);
         return (*pv == NULL) ? -1 : 0;
     }
-    if (Py_REFCNT(v) != 1) {
-        goto error;
-    }
     if (newsize == 0) {
         *pv = bytes_get_empty();
         Py_DECREF(v);
         return 0;
     }
+    if (Py_REFCNT(v) != 1) {
+        if (oldsize < newsize) {
+            *pv = _PyBytes_FromSize(newsize, 0);
+            if (*pv) {
+                memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize);
+            }
+        }
+        else {
+            *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize);
+        }
+        Py_DECREF(v);
+        return (*pv == NULL) ? -1 : 0;
+    }
+
 #ifdef Py_TRACE_REFS
     _Py_ForgetReference(v);
 #endif
@@ -3089,11 +3099,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
     sv->ob_shash = -1;          /* invalidate cached hash value */
 _Py_COMP_DIAG_POP
     return 0;
-error:
-    *pv = 0;
-    Py_DECREF(v);
-    PyErr_BadInternalCall();
-    return -1;
 }
 
 
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index e30ab952dff571..bae49d367b65ee 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -80,13 +80,7 @@ PyFile_GetLine(PyObject *f, int n)
                             "EOF when reading a line");
         }
         else if (s[len-1] == '\n') {
-            if (Py_REFCNT(result) == 1)
-                _PyBytes_Resize(&result, len-1);
-            else {
-                PyObject *v;
-                v = PyBytes_FromStringAndSize(s, len-1);
-                Py_SETREF(result, v);
-            }
+            (void) _PyBytes_Resize(&result, len-1);
         }
     }
     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj
index 6522cb1fcf5c63..615d73d5e003b4 100644
--- a/PCbuild/_testcapi.vcxproj
+++ b/PCbuild/_testcapi.vcxproj
@@ -98,6 +98,7 @@
     <ClCompile Include="..\Modules\_testcapi\vectorcall.c" />
     <ClCompile Include="..\Modules\_testcapi\heaptype.c" />
     <ClCompile Include="..\Modules\_testcapi\abstract.c" />
+    <ClCompile Include="..\Modules\_testcapi\bytes.c" />
     <ClCompile Include="..\Modules\_testcapi\unicode.c" />
     <ClCompile Include="..\Modules\_testcapi\dict.c" />
     <ClCompile Include="..\Modules\_testcapi\set.c" />
diff --git a/PCbuild/_testcapi.vcxproj.filters 
b/PCbuild/_testcapi.vcxproj.filters
index 772a9a861517ec..0c11e918556ff5 100644
--- a/PCbuild/_testcapi.vcxproj.filters
+++ b/PCbuild/_testcapi.vcxproj.filters
@@ -30,6 +30,9 @@
     <ClCompile Include="..\Modules\_testcapi\abstract.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Modules\_testcapi\bytes.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\Modules\_testcapi\unicode.c">
       <Filter>Source Files</Filter>
     </ClCompile>

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to