https://github.com/python/cpython/commit/c9a79a02a8dce6058cfde2916cc4573c2c913faa
commit: c9a79a02a8dce6058cfde2916cc4573c2c913faa
branch: main
author: Victor Stinner <[email protected]>
committer: vstinner <[email protected]>
date: 2025-09-24T16:15:34+02:00
summary:
gh-139156: Use PyBytesWriter in _PyUnicode_EncodeCharmap() (#139251)
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the
PyBytesWriter API.
Add _PyBytesWriter_GetSize() and _PyBytesWriter_GetData() static
inline functions.
files:
M Include/internal/pycore_bytesobject.h
M Objects/bytesobject.c
M Objects/unicodeobject.c
diff --git a/Include/internal/pycore_bytesobject.h
b/Include/internal/pycore_bytesobject.h
index 6c6e2ed21e3761..c7bc53b6073770 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -73,6 +73,26 @@ struct PyBytesWriter {
// Export for '_testcapi' shared extension
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(Py_ssize_t size);
+static inline Py_ssize_t
+_PyBytesWriter_GetSize(PyBytesWriter *writer)
+{
+ return writer->size;
+}
+
+static inline char*
+_PyBytesWriter_GetData(PyBytesWriter *writer)
+{
+ if (writer->obj == NULL) {
+ return writer->small_buffer;
+ }
+ else if (writer->use_bytearray) {
+ return PyByteArray_AS_STRING(writer->obj);
+ }
+ else {
+ return PyBytes_AS_STRING(writer->obj);
+ }
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 91d20cb9afa7ba..de8ab26db1e966 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3480,15 +3480,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
static inline char*
byteswriter_data(PyBytesWriter *writer)
{
- if (writer->obj == NULL) {
- return writer->small_buffer;
- }
- else if (writer->use_bytearray) {
- return PyByteArray_AS_STRING(writer->obj);
- }
- else {
- return PyBytes_AS_STRING(writer->obj);
- }
+ return _PyBytesWriter_GetData(writer);
}
@@ -3710,7 +3702,7 @@ PyBytesWriter_GetData(PyBytesWriter *writer)
Py_ssize_t
PyBytesWriter_GetSize(PyBytesWriter *writer)
{
- return writer->size;
+ return _PyBytesWriter_GetSize(writer);
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5799d92211aa97..2714df329d32bb 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8842,15 +8842,13 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping,
unsigned char *replace)
}
static int
-charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t
requiredsize)
+charmapencode_resize(PyBytesWriter *writer, Py_ssize_t *outpos, Py_ssize_t
requiredsize)
{
- Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
+ Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
/* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2*outsize)
- requiredsize = 2*outsize;
- if (_PyBytes_Resize(outobj, requiredsize))
- return -1;
- return 0;
+ if (requiredsize < 2 * outsize)
+ requiredsize = 2 * outsize;
+ return PyBytesWriter_Resize(writer, requiredsize);
}
typedef enum charmapencode_result {
@@ -8864,12 +8862,12 @@ typedef enum charmapencode_result {
reallocation error occurred. The caller must decref the result */
static charmapencode_result
charmapencode_output(Py_UCS4 c, PyObject *mapping,
- PyObject **outobj, Py_ssize_t *outpos)
+ PyBytesWriter *writer, Py_ssize_t *outpos)
{
PyObject *rep;
unsigned char replace;
char *outstart;
- Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
+ Py_ssize_t outsize = _PyBytesWriter_GetSize(writer);
if (Py_IS_TYPE(mapping, &EncodingMapType)) {
int res = encoding_map_lookup(c, mapping);
@@ -8877,9 +8875,9 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
if (res == -1)
return enc_FAILED;
if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize))
+ if (charmapencode_resize(writer, outpos, requiredsize))
return enc_EXCEPTION;
- outstart = PyBytes_AS_STRING(*outobj);
+ outstart = _PyBytesWriter_GetData(writer);
outstart[(*outpos)++] = (char)res;
return enc_SUCCESS;
}
@@ -8894,11 +8892,11 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
if (PyLong_Check(rep)) {
Py_ssize_t requiredsize = *outpos+1;
if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize)) {
+ if (charmapencode_resize(writer, outpos, requiredsize)) {
Py_DECREF(rep);
return enc_EXCEPTION;
}
- outstart = PyBytes_AS_STRING(*outobj);
+ outstart = _PyBytesWriter_GetData(writer);
outstart[(*outpos)++] = (char)replace;
}
else {
@@ -8906,11 +8904,11 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
Py_ssize_t requiredsize = *outpos+repsize;
if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize)) {
+ if (charmapencode_resize(writer, outpos, requiredsize)) {
Py_DECREF(rep);
return enc_EXCEPTION;
}
- outstart = PyBytes_AS_STRING(*outobj);
+ outstart = _PyBytesWriter_GetData(writer);
memcpy(outstart + *outpos, repchars, repsize);
*outpos += repsize;
}
@@ -8926,7 +8924,7 @@ charmap_encoding_error(
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject,
_Py_error_handler *error_handler, PyObject **error_handler_obj, const char
*errors,
- PyObject **res, Py_ssize_t *respos)
+ PyBytesWriter *writer, Py_ssize_t *respos)
{
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
Py_ssize_t size, repsize;
@@ -8981,7 +8979,7 @@ charmap_encoding_error(
case _Py_ERROR_REPLACE:
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
- x = charmapencode_output('?', mapping, res, respos);
+ x = charmapencode_output('?', mapping, writer, respos);
if (x==enc_EXCEPTION) {
return -1;
}
@@ -9002,7 +9000,7 @@ charmap_encoding_error(
char *cp;
sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode,
collpos));
for (cp = buffer; *cp; ++cp) {
- x = charmapencode_output(*cp, mapping, res, respos);
+ x = charmapencode_output(*cp, mapping, writer, respos);
if (x==enc_EXCEPTION)
return -1;
else if (x==enc_FAILED) {
@@ -9022,17 +9020,17 @@ charmap_encoding_error(
return -1;
if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */
- Py_ssize_t outsize = PyBytes_Size(*res);
+ Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
Py_ssize_t requiredsize;
repsize = PyBytes_Size(repunicode);
requiredsize = *respos + repsize;
if (requiredsize > outsize)
/* Make room for all additional bytes. */
- if (charmapencode_resize(res, respos, requiredsize)) {
+ if (charmapencode_resize(writer, respos, requiredsize)) {
Py_DECREF(repunicode);
return -1;
}
- memcpy(PyBytes_AsString(*res) + *respos,
+ memcpy((char*)PyBytesWriter_GetData(writer) + *respos,
PyBytes_AsString(repunicode), repsize);
*respos += repsize;
*inpos = newpos;
@@ -9045,7 +9043,7 @@ charmap_encoding_error(
kind = PyUnicode_KIND(repunicode);
for (index = 0; index < repsize; index++) {
Py_UCS4 repch = PyUnicode_READ(kind, data, index);
- x = charmapencode_output(repch, mapping, res, respos);
+ x = charmapencode_output(repch, mapping, writer, respos);
if (x==enc_EXCEPTION) {
Py_DECREF(repunicode);
return -1;
@@ -9067,65 +9065,64 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
PyObject *mapping,
const char *errors)
{
- /* output object */
- PyObject *res = NULL;
- /* current input position */
- Py_ssize_t inpos = 0;
- Py_ssize_t size;
- /* current output position */
- Py_ssize_t respos = 0;
- PyObject *error_handler_obj = NULL;
- PyObject *exc = NULL;
- _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
- const void *data;
- int kind;
-
- size = PyUnicode_GET_LENGTH(unicode);
- data = PyUnicode_DATA(unicode);
- kind = PyUnicode_KIND(unicode);
-
/* Default to Latin-1 */
- if (mapping == NULL)
+ if (mapping == NULL) {
return unicode_encode_ucs1(unicode, errors, 256);
+ }
+
+ Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+ if (size == 0) {
+ return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
+ }
+ const void *data = PyUnicode_DATA(unicode);
+ int kind = PyUnicode_KIND(unicode);
+ PyObject *error_handler_obj = NULL;
+ PyObject *exc = NULL;
+
+ /* output object */
+ PyBytesWriter *writer;
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
- res = PyBytes_FromStringAndSize(NULL, size);
- if (res == NULL)
+ writer = PyBytesWriter_Create(size);
+ if (writer == NULL) {
goto onError;
- if (size == 0)
- return res;
+ }
+
+ /* current input position */
+ Py_ssize_t inpos = 0;
+ /* current output position */
+ Py_ssize_t respos = 0;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
while (inpos<size) {
Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
/* try to encode it */
- charmapencode_result x = charmapencode_output(ch, mapping, &res,
&respos);
+ charmapencode_result x = charmapencode_output(ch, mapping, writer,
&respos);
if (x==enc_EXCEPTION) /* error */
goto onError;
if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(unicode, &inpos, mapping,
&exc,
&error_handler, &error_handler_obj,
errors,
- &res, &respos)) {
+ writer, &respos)) {
goto onError;
}
}
- else
+ else {
/* done with this character => adjust input position */
++inpos;
+ }
}
- /* Resize if we allocated to much */
- if (respos<PyBytes_GET_SIZE(res))
- if (_PyBytes_Resize(&res, respos) < 0)
- goto onError;
-
Py_XDECREF(exc);
Py_XDECREF(error_handler_obj);
- return res;
+
+ /* Resize if we allocated too much */
+ return PyBytesWriter_FinishWithSize(writer, respos);
onError:
- Py_XDECREF(res);
+ PyBytesWriter_Discard(writer);
Py_XDECREF(exc);
Py_XDECREF(error_handler_obj);
return NULL;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]