https://github.com/python/cpython/commit/a471a32f4b59c549378a7c1de96e0eba036ec77e
commit: a471a32f4b59c549378a7c1de96e0eba036ec77e
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-01-14T14:44:53+02:00
summary:
gh-143214: Add the wrapcol parameter in binascii.b2a_base64() and
base64.b64encode() (GH-143216)
files:
A Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst
M Doc/library/base64.rst
M Doc/library/binascii.rst
M Doc/whatsnew/3.15.rst
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject_generated.h
M Lib/base64.py
M Lib/email/base64mime.py
M Lib/email/contentmanager.py
M Lib/plistlib.py
M Lib/ssl.py
M Lib/test/test_base64.py
M Lib/test/test_binascii.py
M Modules/binascii.c
M Modules/clinic/binascii.c.h
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 2d901824335145..c07a5369b6f495 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -51,7 +51,7 @@ The :rfc:`4648` encodings are suitable for encoding binary
data so that it can b
safely sent by email, used as parts of URLs, or included as part of an HTTP
POST request.
-.. function:: b64encode(s, altchars=None)
+.. function:: b64encode(s, altchars=None, *, wrapcol=0)
Encode the :term:`bytes-like object` *s* using Base64 and return the encoded
:class:`bytes`.
@@ -61,9 +61,16 @@ POST request.
This allows an application to e.g. generate URL or filesystem safe Base64
strings. The default is ``None``, for which the standard Base64 alphabet
is used.
+ If *wrapcol* is non-zero, insert a newline (``b'\n'``) character
+ after at most every *wrapcol* characters.
+ If *wrapcol* is zero (default), do not insert any newlines.
+
May assert or raise a :exc:`ValueError` if the length of *altchars* is not
2. Raises a
:exc:`TypeError` if *altchars* is not a :term:`bytes-like object`.
+ .. versionchanged:: next
+ Added the *wrapcol* parameter.
+
.. function:: b64decode(s, altchars=None, validate=False)
@@ -214,9 +221,9 @@ Refer to the documentation of the individual functions for
more information.
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
feature is not supported by the "standard" Ascii85 encoding.
- *wrapcol* controls whether the output should have newline (``b'\n'``)
- characters added to it. If this is non-zero, each output line will be
- at most this many characters long, excluding the trailing newline.
+ If *wrapcol* is non-zero, insert a newline (``b'\n'``) character
+ after at most every *wrapcol* characters.
+ If *wrapcol* is zero (default), do not insert any newlines.
*pad* controls whether the input is padded to a multiple of 4
before encoding. Note that the ``btoa`` implementation always pads.
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index 1bab785684bbab..d467a7159ed26e 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -58,7 +58,7 @@ The :mod:`binascii` module defines the following functions:
Valid base64:
- * Conforms to :rfc:`3548`.
+ * Conforms to :rfc:`4648`.
* Contains only characters from the base64 alphabet.
* Contains no excess data after padding (including excess padding,
newlines, etc.).
* Does not start with a padding.
@@ -67,15 +67,24 @@ The :mod:`binascii` module defines the following functions:
Added the *strict_mode* parameter.
-.. function:: b2a_base64(data, *, newline=True)
+.. function:: b2a_base64(data, *, wrapcol=0, newline=True)
- Convert binary data to a line of ASCII characters in base64 coding. The
return
- value is the converted line, including a newline char if *newline* is
- true. The output of this function conforms to :rfc:`3548`.
+ Convert binary data to a line(s) of ASCII characters in base64 coding,
+ as specified in :rfc:`4648`.
+
+ If *wrapcol* is non-zero, insert a newline (``b'\n'``) character
+ after at most every *wrapcol* characters.
+ If *wrapcol* is zero (default), do not insert any newlines.
+
+ If *newline* is true (default), a newline character will be added
+ at the end of the output.
.. versionchanged:: 3.6
Added the *newline* parameter.
+ .. versionchanged:: next
+ Added the *wrapcol* parameter.
+
.. function:: a2b_qp(data, header=False)
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 95549d5fa7b2b4..08afda053f72e0 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -435,12 +435,22 @@ argparse
inline code when color output is enabled.
(Contributed by Savannah Ostrowski in :gh:`142390`.)
-base64 & binascii
------------------
+base64
+------
+
+* Added the *pad* parameter in :func:`~base64.z85encode`.
+ (Contributed by Hauke Dämpfling in :gh:`143103`.)
+
+* Added the *wrapcol* parameter in :func:`~base64.b64encode`.
+ (Contributed by Serhiy Storchaka in :gh:`143214`.)
+
+
+binascii
+--------
+
+* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
+ (Contributed by Serhiy Storchaka in :gh:`143214`.)
-* CPython's underlying base64 implementation now encodes 2x faster and decodes
3x
- faster thanks to simple CPU pipelining optimizations.
- (Contributed by Gregory P. Smith & Serhiy Storchaka in :gh:`143262`.)
calendar
--------
@@ -878,6 +888,13 @@ Optimizations
(Contributed by Chris Eibl, Ken Jin, and Brandt Bucher in :gh:`143068`.
Special thanks to the MSVC team including Hulon Jenkins.)
+base64 & binascii
+-----------------
+
+* CPython's underlying base64 implementation now encodes 2x faster and decodes
3x
+ faster thanks to simple CPU pipelining optimizations.
+ (Contributed by Gregory P. Smith and Serhiy Storchaka in :gh:`143262`.)
+
csv
---
diff --git a/Include/internal/pycore_global_objects_fini_generated.h
b/Include/internal/pycore_global_objects_fini_generated.h
index e625bf2fef1912..705721021e9f49 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -2142,6 +2142,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wrapcol));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through));
diff --git a/Include/internal/pycore_global_strings.h
b/Include/internal/pycore_global_strings.h
index 771f0f8cb4ad87..7c2f44ef6dbe7a 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -865,6 +865,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(which)
STRUCT_FOR_ID(who)
STRUCT_FOR_ID(withdata)
+ STRUCT_FOR_ID(wrapcol)
STRUCT_FOR_ID(writable)
STRUCT_FOR_ID(write)
STRUCT_FOR_ID(write_through)
diff --git a/Include/internal/pycore_runtime_init_generated.h
b/Include/internal/pycore_runtime_init_generated.h
index 499a2569b9a06c..6e7bad986dbeda 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -2140,6 +2140,7 @@ extern "C" {
INIT_ID(which), \
INIT_ID(who), \
INIT_ID(withdata), \
+ INIT_ID(wrapcol), \
INIT_ID(writable), \
INIT_ID(write), \
INIT_ID(write_through), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h
b/Include/internal/pycore_unicodeobject_generated.h
index 1375f46018f943..660115931da0a0 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -3240,6 +3240,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp)
{
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(wrapcol);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(writable);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/base64.py b/Lib/base64.py
index c2fdee8eab9690..e62ae6aff580fa 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -45,14 +45,17 @@ def _bytes_from_decode_data(s):
# Base64 encoding/decoding uses binascii
-def b64encode(s, altchars=None):
+def b64encode(s, altchars=None, *, wrapcol=0):
"""Encode the bytes-like object s using Base64 and return a bytes object.
Optional altchars should be a byte string of length 2 which specifies an
alternative alphabet for the '+' and '/' characters. This allows an
application to e.g. generate url or filesystem safe Base64 strings.
+
+ If wrapcol is non-zero, insert a newline (b'\\n') character after at most
+ every wrapcol characters.
"""
- encoded = binascii.b2a_base64(s, newline=False)
+ encoded = binascii.b2a_base64(s, wrapcol=wrapcol, newline=False)
if altchars is not None:
assert len(altchars) == 2, repr(altchars)
return encoded.translate(bytes.maketrans(b'+/', altchars))
@@ -327,9 +330,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False,
adobe=False):
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
feature is not supported by the "standard" Adobe encoding.
- wrapcol controls whether the output should have newline (b'\\n') characters
- added to it. If this is non-zero, each output line will be at most this
- many characters long, excluding the trailing newline.
+ If wrapcol is non-zero, insert a newline (b'\\n') character after at most
+ every wrapcol characters.
pad controls whether the input is padded to a multiple of 4 before
encoding. Note that the btoa implementation always pads.
@@ -566,11 +568,10 @@ def encodebytes(s):
"""Encode a bytestring into a bytes object containing multiple lines
of base-64 data."""
_input_type_check(s)
- pieces = []
- for i in range(0, len(s), MAXBINSIZE):
- chunk = s[i : i + MAXBINSIZE]
- pieces.append(binascii.b2a_base64(chunk))
- return b"".join(pieces)
+ result = binascii.b2a_base64(s, wrapcol=MAXLINESIZE)
+ if result == b'\n':
+ return b''
+ return result
def decodebytes(s):
diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py
index a5a3f737a97b51..5766f9ad655bc3 100644
--- a/Lib/email/base64mime.py
+++ b/Lib/email/base64mime.py
@@ -83,16 +83,15 @@ def body_encode(s, maxlinelen=76, eol=NL):
if not s:
return ""
- encvec = []
- max_unencoded = maxlinelen * 3 // 4
- for i in range(0, len(s), max_unencoded):
- # BAW: should encode() inherit b2a_base64()'s dubious behavior in
- # adding a newline to the encoded string?
- enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
- if enc.endswith(NL) and eol != NL:
- enc = enc[:-1] + eol
- encvec.append(enc)
- return EMPTYSTRING.join(encvec)
+ if not eol:
+ return b2a_base64(s, newline=False).decode("ascii")
+
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s, wrapcol=maxlinelen).decode("ascii")
+ if eol != NL:
+ enc = enc.replace(NL, eol)
+ return enc
def decode(string):
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
index 11d1536db27d79..13fcb9787f1f32 100644
--- a/Lib/email/contentmanager.py
+++ b/Lib/email/contentmanager.py
@@ -129,19 +129,6 @@ def _finalize_set(msg, disposition, filename, cid, params):
msg.set_param(key, value)
-# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug
-# fix in the calculation of unencoded_bytes_per_line). It would be nice to
-# drop both this and quoprimime.body_encode in favor of enhanced binascii
-# routines that accepted a max_line_length parameter.
-def _encode_base64(data, max_line_length):
- encoded_lines = []
- unencoded_bytes_per_line = max_line_length // 4 * 3
- for i in range(0, len(data), unencoded_bytes_per_line):
- thisline = data[i:i+unencoded_bytes_per_line]
- encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
- return ''.join(encoded_lines)
-
-
def _encode_text(string, charset, cte, policy):
# If max_line_length is 0 or None, there is no limit.
maxlen = policy.max_line_length or sys.maxsize
@@ -176,7 +163,7 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
maxlen)
elif cte == 'base64':
- data = _encode_base64(embedded_body(lines), maxlen)
+ data = binascii.b2a_base64(embedded_body(lines),
wrapcol=maxlen).decode('ascii')
else:
raise ValueError("Unknown content transfer encoding {}".format(cte))
return cte, data
@@ -234,7 +221,8 @@ def set_bytes_content(msg, data, maintype, subtype,
cte='base64',
params=None, headers=None):
_prepare_set(msg, maintype, subtype, headers)
if cte == 'base64':
- data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
+ data = binascii.b2a_base64(data, wrapcol=msg.policy.max_line_length)
+ data = data.decode('ascii')
elif cte == 'quoted-printable':
# XXX: quoprimime.body_encode won't encode newline characters in data,
# so we can't use it. This means max_line_length is ignored. Another
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index 5b2b4e42c95a83..cae38672f641b7 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -122,13 +122,7 @@ def __hash__(self):
r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
def _encode_base64(s, maxlinelength=76):
- # copied from base64.encodebytes(), with added maxlinelength argument
- maxbinsize = (maxlinelength//4)*3
- pieces = []
- for i in range(0, len(s), maxbinsize):
- chunk = s[i : i + maxbinsize]
- pieces.append(binascii.b2a_base64(chunk))
- return b''.join(pieces)
+ return binascii.b2a_base64(s, wrapcol=maxlinelength, newline=False)
def _decode_base64(s):
if isinstance(s, str):
@@ -382,11 +376,10 @@ def write_value(self, value):
def write_bytes(self, data):
self.begin_element("data")
self._indent_level -= 1
- maxlinelength = max(
- 16,
- 76 - len((self.indent * self._indent_level).expandtabs()))
-
- for line in _encode_base64(data, maxlinelength).split(b"\n"):
+ wrapcol = 76 - len((self.indent * self._indent_level).expandtabs())
+ wrapcol = max(16, wrapcol)
+ encoded = binascii.b2a_base64(data, wrapcol=wrapcol, newline=False)
+ for line in encoded.split(b"\n"):
if line:
self.writeln(line)
self._indent_level += 1
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 67a2990b2817e2..612b32cd0765ec 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -1534,11 +1534,8 @@ def DER_cert_to_PEM_cert(der_cert_bytes):
"""Takes a certificate in binary DER format and returns the
PEM version of it as a string."""
- f = str(base64.standard_b64encode(der_cert_bytes), 'ASCII', 'strict')
- ss = [PEM_HEADER]
- ss += [f[i:i+64] for i in range(0, len(f), 64)]
- ss.append(PEM_FOOTER + '\n')
- return '\n'.join(ss)
+ f = str(base64.b64encode(der_cert_bytes, wrapcol=64), 'ASCII')
+ return f'{PEM_HEADER}\n{f}\n{PEM_FOOTER}\n'
def PEM_cert_to_DER_cert(pem_cert_string):
"""Takes a certificate in ASCII PEM format and returns the
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 288caf663e8321..120c5824a42a40 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -2,9 +2,10 @@
import base64
import binascii
import string
+import sys
import os
from array import array
-from test.support import cpython_only
+from test.support import cpython_only, check_impl_detail
from test.support import os_helper
from test.support import script_helper
from test.support.import_helper import ensure_lazy_imports
@@ -172,6 +173,7 @@ def test_b64encode(self):
b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
+
# Test with arbitrary alternative characters
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=b'*$'),
b'01a*b$cd')
eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=bytearray(b'*$')),
@@ -207,6 +209,31 @@ def test_b64encode(self):
b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd')
self.check_encode_type_errors(base64.urlsafe_b64encode)
+ def test_b64encode_wrapcol(self):
+ eq = self.assertEqual
+ b = b'www.python.org'
+ eq(base64.b64encode(b, wrapcol=0), b'd3d3LnB5dGhvbi5vcmc=')
+ eq(base64.b64encode(b, wrapcol=8), b'd3d3LnB5\ndGhvbi5v\ncmc=')
+ eq(base64.b64encode(b, wrapcol=11), b'd3d3LnB5\ndGhvbi5v\ncmc=')
+ eq(base64.b64encode(b, wrapcol=76), b'd3d3LnB5dGhvbi5vcmc=')
+ eq(base64.b64encode(b, wrapcol=1), b'd3d3\nLnB5\ndGhv\nbi5v\ncmc=')
+ eq(base64.b64encode(b, wrapcol=sys.maxsize), b'd3d3LnB5dGhvbi5vcmc=')
+ if check_impl_detail():
+ eq(base64.b64encode(b, wrapcol=sys.maxsize*2),
+ b'd3d3LnB5dGhvbi5vcmc=')
+ with self.assertRaises(OverflowError):
+ base64.b64encode(b, wrapcol=2**1000)
+ with self.assertRaises(ValueError):
+ base64.b64encode(b, wrapcol=-8)
+ with self.assertRaises(TypeError):
+ base64.b64encode(b, wrapcol=8.0)
+ with self.assertRaises(TypeError):
+ base64.b64encode(b, wrapcol='8')
+ with self.assertRaises(TypeError):
+ base64.b64encode(b, wrapcol=None)
+ eq(base64.b64encode(b'', wrapcol=0), b'')
+ eq(base64.b64encode(b'', wrapcol=8), b'')
+
def test_b64decode(self):
eq = self.assertEqual
@@ -614,18 +641,46 @@ def test_a85encode(self):
self.assertRaises(TypeError, base64.a85encode, "")
- eq(base64.a85encode(b"www.python.org", wrapcol=7, adobe=False),
- b'GB\\6`E-\nZP=Df.1\nGEb>')
- eq(base64.a85encode(b"\0\0\0\0www.python.org", wrapcol=7, adobe=False),
- b'zGB\\6`E\n-ZP=Df.\n1GEb>')
- eq(base64.a85encode(b"www.python.org", wrapcol=7, adobe=True),
- b'<~GB\\6`\nE-ZP=Df\n.1GEb>\n~>')
-
eq(base64.a85encode(b' '*8, foldspaces=True, adobe=False), b'yy')
eq(base64.a85encode(b' '*7, foldspaces=True, adobe=False), b'y+<Vd')
eq(base64.a85encode(b' '*6, foldspaces=True, adobe=False), b'y+<U')
eq(base64.a85encode(b' '*5, foldspaces=True, adobe=False), b'y+9')
+ def test_a85encode_wrapcol(self):
+ eq = self.assertEqual
+ b = b'www.python.org'
+ eq(base64.a85encode(b, wrapcol=0), b'GB\\6`E-ZP=Df.1GEb>')
+ eq(base64.a85encode(b, wrapcol=7), b'GB\\6`E-\nZP=Df.1\nGEb>')
+ eq(base64.a85encode(b"\0\0\0\0www.python.org", wrapcol=7),
+ b'zGB\\6`E\n-ZP=Df.\n1GEb>')
+ eq(base64.a85encode(b, wrapcol=75), b'GB\\6`E-ZP=Df.1GEb>')
+ eq(base64.a85encode(b, wrapcol=1),
+ b'G\nB\n\\\n6\n`\nE\n-\nZ\nP\n=\nD\nf\n.\n1\nG\nE\nb\n>')
+ eq(base64.a85encode(b, wrapcol=7, adobe=True),
+ b'<~GB\\6`\nE-ZP=Df\n.1GEb>\n~>')
+ eq(base64.a85encode(b, wrapcol=1),
+ b'G\nB\n\\\n6\n`\nE\n-\nZ\nP\n=\nD\nf\n.\n1\nG\nE\nb\n>')
+ eq(base64.a85encode(b, wrapcol=1, adobe=True),
+ b'<~\nGB\n\\6\n`E\n-Z\nP=\nDf\n.1\nGE\nb>\n~>')
+ eq(base64.a85encode(b, wrapcol=sys.maxsize), b'GB\\6`E-ZP=Df.1GEb>')
+ if check_impl_detail():
+ eq(base64.a85encode(b, wrapcol=2**1000), b'GB\\6`E-ZP=Df.1GEb>')
+ eq(base64.a85encode(b, wrapcol=-7),
+ b'G\nB\n\\\n6\n`\nE\n-\nZ\nP\n=\nD\nf\n.\n1\nG\nE\nb\n>')
+ eq(base64.a85encode(b, wrapcol=-7, adobe=True),
+ b'<~\nGB\n\\6\n`E\n-Z\nP=\nDf\n.1\nGE\nb>\n~>')
+ with self.assertRaises(TypeError):
+ base64.a85encode(b, wrapcol=7.0)
+ with self.assertRaises(TypeError):
+ base64.a85encode(b, wrapcol='7')
+ if check_impl_detail():
+ eq(base64.a85encode(b, wrapcol=None), b'GB\\6`E-ZP=Df.1GEb>')
+ eq(base64.a85encode(b'', wrapcol=0), b'')
+ eq(base64.a85encode(b'', wrapcol=7), b'')
+ eq(base64.a85encode(b'', wrapcol=1, adobe=True), b'<~\n~>')
+ eq(base64.a85encode(b'', wrapcol=3, adobe=True), b'<~\n~>')
+ eq(base64.a85encode(b'', wrapcol=4, adobe=True), b'<~~>')
+
def test_b85encode(self):
eq = self.assertEqual
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 7ed7d7c47b6de1..47e1e6ab035a17 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -4,7 +4,8 @@
import binascii
import array
import re
-from test.support import bigmemtest, _1G, _4G
+import sys
+from test.support import bigmemtest, _1G, _4G, check_impl_detail
from test.support.hypothesis_helper import hypothesis
@@ -479,6 +480,45 @@ def test_b2a_base64_newline(self):
b'aGVsbG8=\n')
self.assertEqual(binascii.b2a_base64(b, newline=False),
b'aGVsbG8=')
+ b = self.type2test(b'')
+ self.assertEqual(binascii.b2a_base64(b), b'\n')
+ self.assertEqual(binascii.b2a_base64(b, newline=True), b'\n')
+ self.assertEqual(binascii.b2a_base64(b, newline=False), b'')
+
+ def test_b2a_base64_wrapcol(self):
+ b = self.type2test(b'www.python.org')
+ self.assertEqual(binascii.b2a_base64(b),
+ b'd3d3LnB5dGhvbi5vcmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=0),
+ b'd3d3LnB5dGhvbi5vcmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=8),
+ b'd3d3LnB5\ndGhvbi5v\ncmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=11),
+ b'd3d3LnB5\ndGhvbi5v\ncmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=76),
+ b'd3d3LnB5dGhvbi5vcmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False),
+ b'd3d3LnB5\ndGhvbi5v\ncmc=')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=1),
+ b'd3d3\nLnB5\ndGhv\nbi5v\ncmc=\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize),
+ b'd3d3LnB5dGhvbi5vcmc=\n')
+ if check_impl_detail():
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize*2),
+ b'd3d3LnB5dGhvbi5vcmc=\n')
+ with self.assertRaises(OverflowError):
+ binascii.b2a_base64(b, wrapcol=2**1000)
+ with self.assertRaises(ValueError):
+ binascii.b2a_base64(b, wrapcol=-8)
+ with self.assertRaises(TypeError):
+ binascii.b2a_base64(b, wrapcol=8.0)
+ with self.assertRaises(TypeError):
+ binascii.b2a_base64(b, wrapcol='8')
+ b = self.type2test(b'')
+ self.assertEqual(binascii.b2a_base64(b), b'\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=0), b'\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=8), b'\n')
+ self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False), b'')
@hypothesis.given(
binary=hypothesis.strategies.binary(),
diff --git
a/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst
b/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst
new file mode 100644
index 00000000000000..0936c4d8e4f708
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst
@@ -0,0 +1,2 @@
+Add the *wrapcol* parameter in :func:`binascii.b2a_base64` and
+:func:`base64.b64encode`.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index a0a2960eef5ab0..c569d3187f2e67 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -272,6 +272,32 @@ ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
return Py_CLEANUP_SUPPORTED;
}
+/* The function inserts '\n' each width characters, moving the data right.
+ * It assumes that we allocated enough space for all of the newlines in data.
+ * Returns the size of the data including the newlines.
+ */
+static Py_ssize_t
+wraplines(unsigned char *data, Py_ssize_t size, size_t width)
+{
+ if ((size_t)size <= width) {
+ return size;
+ }
+ unsigned char *src = data + size;
+ Py_ssize_t newlines = (size - 1) / width;
+ Py_ssize_t line_len = size - newlines * width;
+ size += newlines;
+ unsigned char *dst = data + size;
+
+ while ((src -= line_len) != data) {
+ dst -= line_len;
+ memmove(dst, src, line_len);
+ *--dst = '\n';
+ line_len = width;
+ }
+ assert(dst == data + width);
+ return size;
+}
+
#include "clinic/binascii.c.h"
/*[clinic input]
@@ -622,39 +648,44 @@ binascii.b2a_base64
data: Py_buffer
/
*
+ wrapcol: size_t = 0
newline: bool = True
Base64-code line of data.
[clinic start generated code]*/
static PyObject *
-binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
-/*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/
+binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol,
+ int newline)
+/*[clinic end generated code: output=2edc7311a9515eac input=2ee4214e6d489e2e]*/
{
- const unsigned char *bin_data;
- Py_ssize_t bin_len;
- binascii_state *state;
-
- bin_data = data->buf;
- bin_len = data->len;
-
+ const unsigned char *bin_data = data->buf;
+ Py_ssize_t bin_len = data->len;
assert(bin_len >= 0);
- if ( bin_len > BASE64_MAXBIN ) {
- state = get_binascii_state(module);
- if (state == NULL) {
- return NULL;
+ /* Each group of 3 bytes (rounded up) gets encoded as 4 characters,
+ * not counting newlines.
+ * Note that 'b' gets encoded as 'Yg==' (1 in, 4 out).
+ *
+ * Use unsigned integer arithmetic to avoid signed integer overflow.
+ */
+ size_t out_len = ((size_t)bin_len + 2u) / 3u * 4u;
+ if (out_len > PY_SSIZE_T_MAX) {
+ goto toolong;
+ }
+ if (wrapcol && out_len) {
+ /* Each line should encode a whole number of bytes. */
+ wrapcol = wrapcol < 4 ? 4 : wrapcol / 4 * 4;
+ out_len += (out_len - 1u) / wrapcol;
+ if (out_len > PY_SSIZE_T_MAX) {
+ goto toolong;
}
- PyErr_SetString(state->Error, "Too much data for base64 line");
- return NULL;
}
-
- /* We're lazy and allocate too much (fixed up later).
- "+2" leaves room for up to two pad characters.
- Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
- Py_ssize_t out_len = bin_len*2 + 2;
if (newline) {
out_len++;
+ if (out_len > PY_SSIZE_T_MAX) {
+ goto toolong;
+ }
}
PyBytesWriter *writer = PyBytesWriter_Create(out_len);
if (writer == NULL) {
@@ -687,10 +718,22 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer
*data, int newline)
*ascii_data++ = BASE64_PAD;
}
+ if (wrapcol) {
+ unsigned char *start = PyBytesWriter_GetData(writer);
+ ascii_data = start + wraplines(start, ascii_data - start, wrapcol);
+ }
if (newline)
*ascii_data++ = '\n'; /* Append a courtesy newline */
return PyBytesWriter_FinishWithPointer(writer, ascii_data);
+
+toolong:;
+ binascii_state *state = get_binascii_state(module);
+ if (state == NULL) {
+ return NULL;
+ }
+ PyErr_SetString(state->Error, "Too much data for base64");
+ return NULL;
}
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index ce29e0d11a45cd..524f5fc93d0c21 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -6,6 +6,7 @@ preserve
# include "pycore_gc.h" // PyGC_Head
# include "pycore_runtime.h" // _Py_ID()
#endif
+#include "pycore_long.h" // _PyLong_Size_t_Converter()
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
PyDoc_STRVAR(binascii_a2b_uu__doc__,
@@ -193,7 +194,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
}
PyDoc_STRVAR(binascii_b2a_base64__doc__,
-"b2a_base64($module, data, /, *, newline=True)\n"
+"b2a_base64($module, data, /, *, wrapcol=0, newline=True)\n"
"--\n"
"\n"
"Base64-code line of data.");
@@ -202,7 +203,8 @@ PyDoc_STRVAR(binascii_b2a_base64__doc__,
{"b2a_base64", _PyCFunction_CAST(binascii_b2a_base64),
METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base64__doc__},
static PyObject *
-binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline);
+binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol,
+ int newline);
static PyObject *
binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs,
PyObject *kwnames)
@@ -210,7 +212,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
- #define NUM_KEYWORDS 1
+ #define NUM_KEYWORDS 2
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
@@ -219,7 +221,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
- .ob_item = { &_Py_ID(newline), },
+ .ob_item = { &_Py_ID(wrapcol), &_Py_ID(newline), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -228,16 +230,17 @@ binascii_b2a_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
- static const char * const _keywords[] = {"", "newline", NULL};
+ static const char * const _keywords[] = {"", "wrapcol", "newline", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "b2a_base64",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
- PyObject *argsbuf[2];
+ PyObject *argsbuf[3];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) -
1;
Py_buffer data = {NULL, NULL};
+ size_t wrapcol = 0;
int newline = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
@@ -251,12 +254,20 @@ binascii_b2a_base64(PyObject *module, PyObject *const
*args, Py_ssize_t nargs, P
if (!noptargs) {
goto skip_optional_kwonly;
}
- newline = PyObject_IsTrue(args[1]);
+ if (args[1]) {
+ if (!_PyLong_Size_t_Converter(args[1], &wrapcol)) {
+ goto exit;
+ }
+ if (!--noptargs) {
+ goto skip_optional_kwonly;
+ }
+ }
+ newline = PyObject_IsTrue(args[2]);
if (newline < 0) {
goto exit;
}
skip_optional_kwonly:
- return_value = binascii_b2a_base64_impl(module, &data, newline);
+ return_value = binascii_b2a_base64_impl(module, &data, wrapcol, newline);
exit:
/* Cleanup for data */
@@ -812,4 +823,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args,
Py_ssize_t nargs, PyObj
return return_value;
}
-/*[clinic end generated code: output=fba6a71e0d7d092f input=a9049054013a1b77]*/
+/*[clinic end generated code: output=644ccdc8e0d56e65 input=a9049054013a1b77]*/
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]