Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-cbor2 for openSUSE:Factory checked in at 2026-03-25 21:19:44 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-cbor2 (Old) and /work/SRC/openSUSE:Factory/.python-cbor2.new.8177 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-cbor2" Wed Mar 25 21:19:44 2026 rev:21 rq:1342387 version:5.9.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-cbor2/python-cbor2.changes 2026-01-07 16:01:26.499184530 +0100 +++ /work/SRC/openSUSE:Factory/.python-cbor2.new.8177/python-cbor2.changes 2026-03-27 06:47:39.157385949 +0100 @@ -1,0 +2,18 @@ +Tue Mar 24 11:58:11 UTC 2026 - Markéta Machová <[email protected]> + +- Update to 5.9.0 (CVE-2026-26209, bsc#1260367) + * Added the max_depth decoder parameter to limit the maximum allowed + nesting level of containers, with a default value of 400 levels. + * Changed the default read_size from 4096 to 1 for backwards + compatibility. + * Fixed C encoder not respecting string referencing when encoding + string-type datetimes. + * Fixed a missed check for an exception in the C implementation of + CBOREncoder.encode_shared(). + * Fixed two reference/memory leaks in the C extension's long string + decoder. + * Fixed C decoder ignoring the str_errors setting when decoding + strings, and improved string decoding performance by using stack + allocation for small strings and purging unnecessary conditionals. + +------------------------------------------------------------------- Old: ---- cbor2-5.8.0.tar.gz New: ---- cbor2-5.9.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-cbor2.spec ++++++ --- /var/tmp/diff_new_pack.4pXurH/_old 2026-03-27 06:47:39.701408405 +0100 +++ /var/tmp/diff_new_pack.4pXurH/_new 2026-03-27 06:47:39.705408570 +0100 @@ -23,7 +23,7 @@ %endif %{?sle15_python_module_pythons} Name: python-cbor2 -Version: 5.8.0 +Version: 5.9.0 Release: 0 Summary: Pure Python CBOR (de)serializer with extensive tag support License: MIT ++++++ cbor2-5.8.0.tar.gz -> cbor2-5.9.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/.github/FUNDING.yml new/cbor2-5.9.0/.github/FUNDING.yml --- old/cbor2-5.8.0/.github/FUNDING.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/cbor2-5.9.0/.github/FUNDING.yml 2026-03-22 16:49:24.000000000 +0100 @@ -0,0 +1 @@ +tidelift: pypi/cbor2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/.github/SECURITY.md new/cbor2-5.9.0/.github/SECURITY.md --- old/cbor2-5.8.0/.github/SECURITY.md 1970-01-01 01:00:00.000000000 +0100 +++ new/cbor2-5.9.0/.github/SECURITY.md 2026-03-22 16:49:24.000000000 +0100 @@ -0,0 +1,16 @@ +# Security Policy + +## Supported Versions + +The latest major/minor versions will get security updates and bug fixes. +Earlier versions are unsupported. + +| Version | Supported | +|---------| ------------------ | +| 5.9.x | :white_check_mark: | +| < 5.9.x | :x: | + +## Reporting a Vulnerability + +See the [security](https://github.com/agronholm/cbor2/security) +section to view existing security advisories or to report a new vulnerability. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/.pre-commit-config.yaml new/cbor2-5.9.0/.pre-commit-config.yaml --- old/cbor2-5.8.0/.pre-commit-config.yaml 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/.pre-commit-config.yaml 2026-03-22 16:49:24.000000000 +0100 @@ -16,9 +16,9 @@ - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.10 + rev: v0.15.6 hooks: - - id: ruff + - id: ruff-check args: [--fix, --show-fixes] - id: ruff-format diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/PKG-INFO new/cbor2-5.9.0/PKG-INFO --- old/cbor2-5.8.0/PKG-INFO 2025-12-30 19:37:23.094826700 +0100 +++ new/cbor2-5.9.0/PKG-INFO 2026-03-22 16:49:28.831297000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: cbor2 -Version: 5.8.0 +Version: 5.9.0 Summary: CBOR (de)serializer with extensive tag support Author-email: Alex Grönholm <[email protected]> License-Expression: MIT @@ -37,6 +37,9 @@ .. image:: https://readthedocs.org/projects/cbor2/badge/?version=latest :target: https://cbor2.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status +.. image:: https://tidelift.com/badges/package/pypi/cbor2 + :target: https://tidelift.com/subscription/pkg/pypi-cbor2 + :alt: Tidelift About ===== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/README.rst new/cbor2-5.9.0/README.rst --- old/cbor2-5.8.0/README.rst 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/README.rst 2026-03-22 16:49:24.000000000 +0100 @@ -10,6 +10,9 @@ .. image:: https://readthedocs.org/projects/cbor2/badge/?version=latest :target: https://cbor2.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status +.. image:: https://tidelift.com/badges/package/pypi/cbor2 + :target: https://tidelift.com/subscription/pkg/pypi-cbor2 + :alt: Tidelift About ===== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/SECURITY.md new/cbor2-5.9.0/SECURITY.md --- old/cbor2-5.8.0/SECURITY.md 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/SECURITY.md 1970-01-01 01:00:00.000000000 +0100 @@ -1,16 +0,0 @@ -# Security Policy - -## Supported Versions - -The latest major/minor versions will get security updates and bug fixes. -Earlier versions are unsupported. - -| Version | Supported | -|----------| ------------------ | -| 5.7.x | :white_check_mark: | -| < 5.7.x | :x: | - -## Reporting a Vulnerability - -See the [security](https://github.com/agronholm/cbor2/security) -section to view existing security advisories or to report a new vulnerability. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/cbor2/_decoder.py new/cbor2-5.9.0/cbor2/_decoder.py --- old/cbor2-5.8.0/cbor2/_decoder.py 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/cbor2/_decoder.py 2026-03-22 16:49:24.000000000 +0100 @@ -4,14 +4,14 @@ import struct import sys from codecs import getincrementaldecoder -from collections.abc import Callable, Generator, Mapping, Sequence -from contextlib import contextmanager +from collections.abc import Callable, Mapping, Sequence from datetime import date, datetime, timedelta, timezone from io import BytesIO -from typing import IO, TYPE_CHECKING, Any, TypeVar, cast, overload +from typing import IO, TYPE_CHECKING, Any, Literal, TypeVar, cast, overload from ._types import ( CBORDecodeEOF, + CBORDecodeError, CBORDecodeValueError, CBORSimpleValue, CBORTag, @@ -25,7 +25,6 @@ from email.message import Message from fractions import Fraction from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network - from typing import Literal from uuid import UUID T = TypeVar("T") @@ -60,18 +59,23 @@ "_immutable", "_str_errors", "_stringref_namespace", + "_max_depth", "_decode_depth", ) _fp: IO[bytes] _fp_read: Callable[[int], bytes] + _str_errors: str def __init__( self, fp: IO[bytes], tag_hook: Callable[[CBORDecoder, CBORTag], Any] | None = None, object_hook: Callable[[CBORDecoder, dict[Any, Any]], Any] | None = None, - str_errors: Literal["strict", "error", "replace"] = "strict", + str_errors: str = "strict", + read_size: int = 1, + *, + max_depth: int = 400, ): """ :param fp: @@ -90,6 +94,15 @@ :param str_errors: determines how to handle unicode decoding errors (see the `Error Handlers`_ section in the standard library documentation for details) + :param read_size: + the minimum number of bytes to read at a time. + Setting this to a higher value like 4096 improves performance, + but is likely to read past the end of the CBOR value, advancing the stream + position beyond the decoded data. This only matters if you need to reuse the + stream after decoding. + Ignored in the pure Python implementation, but included for API compatibility. + :param max_depth: + the maximum allowed container nesting depth .. _Error Handlers: https://docs.python.org/3/library/codecs.html#error-handlers @@ -102,6 +115,7 @@ self._shareables: list[object] = [] self._stringref_namespace: list[str | bytes] | None = None self._immutable = False + self._max_depth = max_depth self._decode_depth = 0 @property @@ -151,17 +165,19 @@ raise ValueError("object_hook must be None or a callable") @property - def str_errors(self) -> Literal["strict", "error", "replace"]: + def str_errors(self) -> str: return self._str_errors @str_errors.setter - def str_errors(self, value: Literal["strict", "error", "replace"]) -> None: - if value in ("strict", "error", "replace"): + def str_errors(self, value: str) -> None: + if value == "error": + self._str_errors = "strict" + elif value in ("strict", "error", "replace", "backslashreplace", "surrogateescape"): self._str_errors = value else: raise ValueError( - f"invalid str_errors value {value!r} (must be one of 'strict', " - "'error', or 'replace')" + f"invalid str_errors value {value!r} (must be 'strict', 'error', 'replace', " + f"'backslashreplace' or 'surrogateescape')" ) def set_shareable(self, value: T) -> T: @@ -209,13 +225,24 @@ return data - def _decode(self, immutable: bool = False, unshared: bool = False) -> Any: + def decode(self, immutable: bool = False, unshared: bool = False) -> Any: + """ + Decode the next value from the stream. + + :raises CBORDecodeError: if there is any problem decoding the stream + + """ + if self._decode_depth > self._max_depth: + raise CBORDecodeError(f"maximum container nesting depth ({self._max_depth}) exceeded") + if immutable: old_immutable = self._immutable self._immutable = True if unshared: old_index = self._share_index self._share_index = None + + self._decode_depth += 1 try: initial_byte = self.read(1)[0] major_type = initial_byte >> 5 @@ -228,34 +255,12 @@ if unshared: self._share_index = old_index - @contextmanager - def _decoding_context(self) -> Generator[None]: - """ - Context manager for tracking decode depth and clearing shared state. - - Shared state is cleared at the end of each top-level decode to prevent - shared references from leaking between independent decode operations. - Nested calls (from hooks) must preserve the state. - """ - self._decode_depth += 1 - try: - yield - finally: self._decode_depth -= 1 assert self._decode_depth >= 0 if self._decode_depth == 0: self._shareables.clear() self._share_index = None - def decode(self) -> object: - """ - Decode the next value from the stream. - - :raises CBORDecodeError: if there is any problem decoding the stream - """ - with self._decoding_context(): - return self._decode() - def decode_from_bytes(self, buf: bytes) -> object: """ Wrap the given bytestring as a file and call :meth:`decode` with it as @@ -265,13 +270,12 @@ object needs to be decoded separately from the rest but while still taking advantage of the shared value registry. """ - with self._decoding_context(): - with BytesIO(buf) as fp: - old_fp = self.fp - self.fp = fp - retval = self._decode() - self.fp = old_fp - return retval + with BytesIO(buf) as fp: + old_fp = self.fp + self.fp = fp + retval = self.decode() + self.fp = old_fp + return retval @overload def _decode_length(self, subtype: int) -> int: ... @@ -422,7 +426,7 @@ if not self._immutable: self.set_shareable(items) while True: - value = self._decode(unshared=True) + value = self.decode(unshared=True) if value is break_marker: break else: @@ -436,7 +440,7 @@ self.set_shareable(items) for index in range(length): - items.append(self._decode(unshared=True)) + items.append(self.decode(unshared=True)) if self._immutable: items_tuple = tuple(items) @@ -453,17 +457,17 @@ dictionary: dict[Any, Any] = {} self.set_shareable(dictionary) while True: - key = self._decode(immutable=True, unshared=True) + key = self.decode(immutable=True, unshared=True) if key is break_marker: break else: - dictionary[key] = self._decode(unshared=True) + dictionary[key] = self.decode(unshared=True) else: dictionary = {} self.set_shareable(dictionary) for _ in range(length): - key = self._decode(immutable=True, unshared=True) - dictionary[key] = self._decode(unshared=True) + key = self.decode(immutable=True, unshared=True) + dictionary[key] = self.decode(unshared=True) if self._object_hook: dictionary = self._object_hook(self, dictionary) @@ -483,7 +487,7 @@ tag = CBORTag(tagnum, None) self.set_shareable(tag) - tag.value = self._decode(unshared=True) + tag.value = self.decode(unshared=True) if self._tag_hook: tag = self._tag_hook(self, tag) @@ -508,17 +512,17 @@ # def decode_epoch_date(self) -> date: # Semantic tag 100 - value = self._decode() + value = self.decode() return self.set_shareable(date.fromordinal(value + 719163)) def decode_date_string(self) -> date: # Semantic tag 1004 - value = self._decode() + value = self.decode() return self.set_shareable(date.fromisoformat(value)) def decode_datetime_string(self) -> datetime: # Semantic tag 0 - value = self._decode() + value = self.decode() match = timestamp_re.match(value) if match: ( @@ -566,7 +570,7 @@ def decode_epoch_datetime(self) -> datetime: # Semantic tag 1 - value = self._decode() + value = self.decode() try: tmp = datetime.fromtimestamp(value, timezone.utc) @@ -579,7 +583,7 @@ # Semantic tag 2 from binascii import hexlify - value = self._decode() + value = self.decode() if not isinstance(value, bytes): raise CBORDecodeValueError("invalid bignum value " + str(value)) @@ -594,7 +598,7 @@ from decimal import Decimal try: - exp, sig = self._decode() + exp, sig = self.decode() except (TypeError, ValueError) as e: raise CBORDecodeValueError("Incorrect tag 4 payload") from e tmp = Decimal(sig).as_tuple() @@ -605,7 +609,7 @@ from decimal import Decimal try: - exp, sig = self._decode() + exp, sig = self.decode() except (TypeError, ValueError) as e: raise CBORDecodeValueError("Incorrect tag 5 payload") from e @@ -616,7 +620,7 @@ if self._stringref_namespace is None: raise CBORDecodeValueError("string reference outside of namespace") - index: int = self._decode() + index: int = self.decode() try: value = self._stringref_namespace[index] except IndexError: @@ -630,13 +634,13 @@ self._share_index = len(self._shareables) self._shareables.append(None) try: - return self._decode() + return self.decode() finally: self._share_index = old_index def decode_sharedref(self) -> Any: # Semantic tag 29 - value = self._decode(unshared=True) + value = self.decode(unshared=True) try: shared = self._shareables[value] except IndexError: @@ -649,7 +653,7 @@ def decode_complex(self) -> complex: # Semantic tag 43000 - inputval = self._decode(immutable=True, unshared=True) + inputval = self.decode(immutable=True, unshared=True) try: value = complex(*inputval) except TypeError as exc: @@ -666,7 +670,7 @@ # Semantic tag 30 from fractions import Fraction - inputval = self._decode(immutable=True, unshared=True) + inputval = self.decode(immutable=True, unshared=True) try: value = Fraction(*inputval) except (TypeError, ZeroDivisionError) as exc: @@ -682,7 +686,7 @@ def decode_regexp(self) -> re.Pattern[str]: # Semantic tag 35 try: - value = re.compile(self._decode()) + value = re.compile(self.decode()) except re.error as exc: raise CBORDecodeValueError("error decoding regular expression") from exc @@ -693,7 +697,7 @@ from email.parser import Parser try: - value = Parser().parsestr(self._decode()) + value = Parser().parsestr(self.decode()) except TypeError as exc: raise CBORDecodeValueError("error decoding MIME message") from exc @@ -704,7 +708,7 @@ from uuid import UUID try: - value = UUID(bytes=self._decode()) + value = UUID(bytes=self.decode()) except (TypeError, ValueError) as exc: raise CBORDecodeValueError("error decoding UUID value") from exc @@ -714,16 +718,16 @@ # Semantic tag 256 old_namespace = self._stringref_namespace self._stringref_namespace = [] - value = self._decode() + value = self.decode() self._stringref_namespace = old_namespace return value def decode_set(self) -> set[Any] | frozenset[Any]: # Semantic tag 258 if self._immutable: - return self.set_shareable(frozenset(self._decode(immutable=True))) + return self.set_shareable(frozenset(self.decode(immutable=True))) else: - return self.set_shareable(set(self._decode(immutable=True))) + return self.set_shareable(set(self.decode(immutable=True))) def decode_ipaddress(self) -> IPv4Address | IPv6Address | CBORTag: # Semantic tag 260 @@ -756,7 +760,7 @@ def decode_self_describe_cbor(self) -> Any: # Semantic tag 55799 - return self._decode() + return self.decode() # # Special decoders (major tag 7) @@ -829,6 +833,9 @@ tag_hook: Callable[[CBORDecoder, CBORTag], Any] | None = None, object_hook: Callable[[CBORDecoder, dict[Any, Any]], Any] | None = None, str_errors: Literal["strict", "error", "replace"] = "strict", + read_size: int = 1, + *, + max_depth: int = 400, ) -> Any: """ Deserialize an object from a bytestring. @@ -847,6 +854,12 @@ :param str_errors: determines how to handle unicode decoding errors (see the `Error Handlers`_ section in the standard library documentation for details) + :param read_size: + the minimum number of bytes to read at a time. + Setting this to a higher value like 4096 improves performance. + Ignored in the pure Python implementation, but included for API compatibility. + :param max_depth: + the maximum allowed container nesting depth :return: the deserialized object @@ -855,7 +868,12 @@ """ with BytesIO(s) as fp: return CBORDecoder( - fp, tag_hook=tag_hook, object_hook=object_hook, str_errors=str_errors + fp, + tag_hook=tag_hook, + object_hook=object_hook, + str_errors=str_errors, + read_size=read_size, + max_depth=max_depth, ).decode() @@ -864,6 +882,9 @@ tag_hook: Callable[[CBORDecoder, CBORTag], Any] | None = None, object_hook: Callable[[CBORDecoder, dict[Any, Any]], Any] | None = None, str_errors: Literal["strict", "error", "replace"] = "strict", + read_size: int = 1, + *, + max_depth: int = 400, ) -> Any: """ Deserialize an object from an open file. @@ -882,6 +903,15 @@ :param str_errors: determines how to handle unicode decoding errors (see the `Error Handlers`_ section in the standard library documentation for details) + :param read_size: + the minimum number of bytes to read at a time. + Setting this to a higher value like 4096 improves performance, + but is likely to read past the end of the CBOR value, advancing the stream + position beyond the decoded data. This only matters if you need to reuse the + stream after decoding. + Ignored in the pure Python implementation, but included for API compatibility. + :param max_depth: + the maximum allowed container nesting depth :return: the deserialized object @@ -889,5 +919,10 @@ """ return CBORDecoder( - fp, tag_hook=tag_hook, object_hook=object_hook, str_errors=str_errors + fp, + tag_hook=tag_hook, + object_hook=object_hook, + str_errors=str_errors, + read_size=read_size, + max_depth=max_depth, ).decode() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/cbor2.egg-info/PKG-INFO new/cbor2-5.9.0/cbor2.egg-info/PKG-INFO --- old/cbor2-5.8.0/cbor2.egg-info/PKG-INFO 2025-12-30 19:37:23.000000000 +0100 +++ new/cbor2-5.9.0/cbor2.egg-info/PKG-INFO 2026-03-22 16:49:28.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: cbor2 -Version: 5.8.0 +Version: 5.9.0 Summary: CBOR (de)serializer with extensive tag support Author-email: Alex Grönholm <[email protected]> License-Expression: MIT @@ -37,6 +37,9 @@ .. image:: https://readthedocs.org/projects/cbor2/badge/?version=latest :target: https://cbor2.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status +.. image:: https://tidelift.com/badges/package/pypi/cbor2 + :target: https://tidelift.com/subscription/pkg/pypi-cbor2 + :alt: Tidelift About ===== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/cbor2.egg-info/SOURCES.txt new/cbor2-5.9.0/cbor2.egg-info/SOURCES.txt --- old/cbor2-5.8.0/cbor2.egg-info/SOURCES.txt 2025-12-30 19:37:23.000000000 +0100 +++ new/cbor2-5.9.0/cbor2.egg-info/SOURCES.txt 2026-03-22 16:49:28.000000000 +0100 @@ -3,9 +3,10 @@ .readthedocs.yml LICENSE.txt README.rst -SECURITY.md pyproject.toml setup.py +.github/FUNDING.yml +.github/SECURITY.md .github/dependabot.yml .github/pull_request_template.md .github/ISSUE_TEMPLATE/bug_report.yaml @@ -59,4 +60,5 @@ tests/test_decoder.py tests/test_encoder.py tests/test_tool.py -tests/test_types.py \ No newline at end of file +tests/test_types.py +tests/fuzzers/loads_fuzzer.py \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/docs/usage.rst new/cbor2-5.9.0/docs/usage.rst --- old/cbor2-5.8.0/docs/usage.rst 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/docs/usage.rst 2026-03-22 16:49:24.000000000 +0100 @@ -74,6 +74,17 @@ .. warning:: Support for string referencing is rare in other CBOR implementations, so think carefully whether you want to enable it. +Performance tuning +------------------ + +By default, the decoder only reads the exact amount of bytes it needs. But this can negatively +impact the performance due to the potentially large number of individual read operations. +To make it faster, you can pass a different ``read_size`` parameter (say, 4096), to :func:`load`, +:func:`loads` or :class:`CBORDecoder`. + +.. warning:: If the input stream contains data other than the CBOR stream, that data (or parts of) + may be lost. + Tag support ----------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/docs/versionhistory.rst new/cbor2-5.9.0/docs/versionhistory.rst --- old/cbor2-5.8.0/docs/versionhistory.rst 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/docs/versionhistory.rst 2026-03-22 16:49:24.000000000 +0100 @@ -5,34 +5,56 @@ This library adheres to `Semantic Versioning 2.0 <http://semver.org/>`_. +**5.9.0** (2026-03-22) + +- Added the ``max_depth`` decoder parameter to limit the maximum allowed nesting level of + containers, with a default value of 400 levels (CVE-2026-26209) +- Changed the default ``read_size`` from 4096 to 1 for backwards compatibility. + The buffered reads introduced in 5.8.0 could cause issues when code needs to + access the stream position after decoding. Users can opt-in to faster decoding + by passing ``read_size=4096`` when they don't need to access the stream directly + after decoding. Added a direct read path for ``read_size=1`` to avoid buffer + management overhead. + (`#275 <https://github.com/agronholm/cbor2/pull/275>`_; PR by @andreer) +- Fixed C encoder not respecting string referencing when encoding string-type datetimes (tag 0) + (`#254 <https://github.com/agronholm/cbor2/issues/254>`_) +- Fixed a missed check for an exception in the C implementation of ``CBOREncoder.encode_shared()`` + (`#287 <https://github.com/agronholm/cbor2/issues/287>`_) +- Fixed two reference/memory leaks in the C extension's long string decoder + (`#290 <https://github.com/agronholm/cbor2/pull/290>`_ PR by @killiancowan82) +- Fixed C decoder ignoring the ``str_errors`` setting when decoding strings, and improved + string decoding performance by using stack allocation for small strings and eliminating + unnecessary conditionals. Benchmarks show 9-17% faster deserialization. + (`#255 <https://github.com/agronholm/cbor2/issues/255>`_; PR by @andreer) + **5.8.0** (2025-12-30) - Added readahead buffering to C decoder for improved performance. The decoder now uses a 4 KB buffer by default to reduce the number of read calls. Benchmarks show 20-140% performance improvements for decoding operations. - (`#268+ <https://github.com/agronholm/cbor2/pull/268>`_; PR by @andreer) + (`#268 <https://github.com/agronholm/cbor2/pull/268>`_; PR by @andreer) - Fixed Python decoder not preserving share index when decoding array items containing nested shareable tags, causing shared references to resolve to wrong objects - (`#267+ <https://github.com/agronholm/cbor2/pull/267>`_; PR by @andreer) + (`#267 <https://github.com/agronholm/cbor2/pull/267>`_; PR by @andreer) - Reset shared reference state at the start of each top-level encode/decode operation - (`#266+ <https://github.com/agronholm/cbor2/pull/266>`_; PR by @andreer) + (`#266 <https://github.com/agronholm/cbor2/pull/266>`_; PR by @andreer) **5.7.1** (2025-10-24) - Improved performance on decoding large definite bytestrings - (#240 <https://github.com/agronholm/cbor2/issues/240>_; PR by @dwpaley) + (`#240 <https://github.com/agronholm/cbor2/issues/240>`_; PR by @dwpaley) - Fixed a read(-1) vulnerability caused by boundary handling error - (#264 <https://github.com/agronholm/cbor2/issues/264>_; PR by @tylzh97) + (`#264 <https://github.com/agronholm/cbor2/issues/264>`_; PR by @tylzh97) **5.7.0** (2025-08-14) - Added support for Python 3.14 (no free-threading support yet, sorry) - Dropped support for Python 3.8 - (`#247+ <https://github.com/agronholm/cbor2/pull/247>`_; PR by @hugovk) + (`#247 <https://github.com/agronholm/cbor2/pull/247>`_; PR by @hugovk) - Added support for encoding indefinite containers - (`#256+ <https://github.com/agronholm/cbor2/pull/256>`_; PR by @CZDanol) + (`#256 <https://github.com/agronholm/cbor2/pull/256>`_; PR by @CZDanol) - Added complex number support (tag 43000) - (`#249+ <https://github.com/agronholm/cbor2/pull/249>`_; PR by @chillenb) + (`#249 <https://github.com/agronholm/cbor2/pull/249>`_; PR by @chillenb) **5.6.5** (2024-10-09) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/pyproject.toml new/cbor2-5.9.0/pyproject.toml --- old/cbor2-5.8.0/pyproject.toml 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/pyproject.toml 2026-03-22 16:49:24.000000000 +0100 @@ -53,6 +53,9 @@ benchmarks = [ "pytest-benchmark==4.0.0", ] +fuzz = [ + "atheris", +] [tool.setuptools.packages.find] include = ["cbor2"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/scripts/ref_leak_test.py new/cbor2-5.9.0/scripts/ref_leak_test.py --- old/cbor2-5.8.0/scripts/ref_leak_test.py 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/scripts/ref_leak_test.py 2026-03-22 16:49:24.000000000 +0100 @@ -85,6 +85,8 @@ ), ("tag", {}, c_cbor2.CBORTag(1, 1)), ("nestedtag", {}, {c_cbor2.CBORTag(1, 1): 1}), + ("longstr_128k", {}, "x" * 131072), + ("longstr_multi_utf8", {}, ("a" * 65535 + "€") * 2), ] Leaks = namedtuple("Leaks", ("count", "comparison")) @@ -105,7 +107,7 @@ # NOTE: Filter pointing to the op() line in the loop below, because we're # only interested in memory allocated by that line. Naturally, if this file # is edited, the lineno parameter below must be adjusted! - only_op = tracemalloc.Filter(True, __file__, lineno=102, all_frames=True) + only_op = tracemalloc.Filter(True, __file__, lineno=119, all_frames=True) tracemalloc.start(10) try: # Perform a pre-run of op so that any one-time memory allocation diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/source/decoder.c new/cbor2-5.9.0/source/decoder.c --- old/cbor2-5.8.0/source/decoder.c 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/source/decoder.c 2026-03-22 16:49:24.000000000 +0100 @@ -34,6 +34,9 @@ // copied from cpython/Objects/bytesobject.c for bounds checks #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) +// Threshold for using stack allocation vs heap allocation for short strings +#define SMALL_STRING_STACK_THRESHOLD 256 + enum DecodeOption { DECODE_NORMAL = 0, DECODE_IMMUTABLE = 1, @@ -47,6 +50,10 @@ static int _CBORDecoder_set_object_hook(CBORDecoderObject *, PyObject *, void *); static int _CBORDecoder_set_str_errors(CBORDecoderObject *, PyObject *, void *); +// Forward declarations for read dispatch functions +static int fp_read_unbuffered(CBORDecoderObject *, char *, Py_ssize_t); +static int fp_read_buffered(CBORDecoderObject *, char *, Py_ssize_t); + static PyObject * decode(CBORDecoderObject *, DecodeOptions); static PyObject * decode_bytestring(CBORDecoderObject *, uint8_t); static PyObject * decode_string(CBORDecoderObject *, uint8_t); @@ -102,7 +109,6 @@ Py_CLEAR(self->object_hook); Py_CLEAR(self->shareables); Py_CLEAR(self->stringref_namespace); - Py_CLEAR(self->str_errors); if (self->readahead) { PyMem_Free(self->readahead); self->readahead = NULL; @@ -148,7 +154,8 @@ self->tag_hook = Py_None; Py_INCREF(Py_None); self->object_hook = Py_None; - self->str_errors = PyBytes_FromString("strict"); + self->str_errors = NULL; // NULL means strict mode + self->max_depth = CBOR2_DEFAULT_MAX_DEPTH; self->immutable = false; self->shared_index = -1; self->decode_depth = 0; @@ -156,6 +163,7 @@ self->readahead_size = 0; self->read_pos = 0; self->read_len = 0; + self->fp_read = fp_read_unbuffered; // default, will be set properly in init } return (PyObject *) self; error: @@ -165,19 +173,19 @@ // CBORDecoder.__init__(self, fp=None, tag_hook=None, object_hook=None, -// str_errors='strict', read_size=4096) +// str_errors='strict', read_size=1, *, max_depth=400) int CBORDecoder_init(CBORDecoderObject *self, PyObject *args, PyObject *kwargs) { static char *keywords[] = { - "fp", "tag_hook", "object_hook", "str_errors", "read_size", NULL + "fp", "tag_hook", "object_hook", "str_errors", "read_size", "max_depth", NULL }; PyObject *fp = NULL, *tag_hook = NULL, *object_hook = NULL, *str_errors = NULL; Py_ssize_t read_size = CBOR2_DEFAULT_READ_SIZE; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOn", keywords, - &fp, &tag_hook, &object_hook, &str_errors, &read_size)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOnn", keywords, + &fp, &tag_hook, &object_hook, &str_errors, &read_size, &self->max_depth)) return -1; if (read_size < 1) { @@ -234,7 +242,8 @@ return -1; } - if (self->readahead == NULL || self->readahead_size != read_size) { + // Skip buffer allocation for read_size=1 (direct read path doesn't use buffer) + if (read_size > 1 && (self->readahead == NULL || self->readahead_size != read_size)) { new_buffer = (char *)PyMem_Malloc(read_size); if (!new_buffer) { Py_DECREF(read); @@ -255,8 +264,15 @@ if (new_buffer) { PyMem_Free(self->readahead); self->readahead = new_buffer; - self->readahead_size = read_size; + } else if (read_size == 1 && self->readahead != NULL) { + // Free existing buffer when switching to direct read path (read_size=1) + PyMem_Free(self->readahead); + self->readahead = NULL; } + self->readahead_size = read_size; + + // Set read dispatch function - eliminates runtime check on every read + self->fp_read = (read_size == 1) ? fp_read_unbuffered : fp_read_buffered; return 0; } @@ -348,9 +364,8 @@ static PyObject * _CBORDecoder_get_str_errors(CBORDecoderObject *self, void *closure) { - return PyUnicode_DecodeASCII( - PyBytes_AS_STRING(self->str_errors), - PyBytes_GET_SIZE(self->str_errors), "strict"); + const char *mode = self->str_errors ? self->str_errors : "strict"; + return PyUnicode_FromString(mode); } @@ -359,30 +374,46 @@ _CBORDecoder_set_str_errors(CBORDecoderObject *self, PyObject *value, void *closure) { - PyObject *tmp, *bytes; - if (!value) { PyErr_SetString(PyExc_AttributeError, "cannot delete str_errors attribute"); return -1; } if (PyUnicode_Check(value)) { - bytes = PyUnicode_AsASCIIString(value); + PyObject *bytes = PyUnicode_AsASCIIString(value); if (bytes) { - if (!strcmp(PyBytes_AS_STRING(bytes), "strict") || - !strcmp(PyBytes_AS_STRING(bytes), "error") || - !strcmp(PyBytes_AS_STRING(bytes), "replace")) { - tmp = self->str_errors; - self->str_errors = bytes; - Py_DECREF(tmp); + const char *mode = PyBytes_AS_STRING(bytes); + if (!strcmp(mode, "strict") || !strcmp(mode, "error")) { + self->str_errors = NULL; + Py_DECREF(bytes); + return 0; + } + if (!strcmp(mode, "replace")) { + self->str_errors = "replace"; + Py_DECREF(bytes); + return 0; + } + if (!strcmp(mode, "ignore")) { + self->str_errors = "ignore"; + Py_DECREF(bytes); + return 0; + } + if (!strcmp(mode, "backslashreplace")) { + self->str_errors = "backslashreplace"; + Py_DECREF(bytes); + return 0; + } + if (!strcmp(mode, "surrogateescape")) { + self->str_errors = "surrogateescape"; + Py_DECREF(bytes); return 0; } Py_DECREF(bytes); } } PyErr_Format(PyExc_ValueError, - "invalid str_errors value %R (must be one of 'strict', " - "'error', or 'replace')", value); + "invalid str_errors value %R (must be 'strict', 'error', 'replace', " + "'backslashreplace' or 'surrogateescape')", value); return -1; } @@ -448,9 +479,25 @@ return bytes_read; } -// Read into caller's buffer using the readahead buffer +// Unbuffered read - used when read_size=1 (backwards compatible mode) +// This matches the 5.7.1 behavior with no runtime overhead +static int +fp_read_unbuffered(CBORDecoderObject *self, char *buf, Py_ssize_t size) +{ + Py_ssize_t bytes_read = fp_read_bytes(self, buf, size); + if (bytes_read == size) + return 0; + if (bytes_read >= 0) + PyErr_Format( + _CBOR2_CBORDecodeEOF, + "premature end of stream (expected to read %zd bytes, " + "got %zd instead)", size, bytes_read); + return -1; +} + +// Buffered read - used when read_size > 1 for improved performance static int -fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size) +fp_read_buffered(CBORDecoderObject *self, char *buf, Py_ssize_t size) { Py_ssize_t available, to_copy, remaining, total_copied; @@ -508,7 +555,7 @@ if (!ret) return NULL; - if (fp_read(self, PyBytes_AS_STRING(ret), size) == -1) { + if (self->fp_read(self, PyBytes_AS_STRING(ret), size) == -1) { Py_DECREF(ret); return NULL; } @@ -529,7 +576,7 @@ return NULL; ret = PyBytes_FromStringAndSize(NULL, len); if (ret) { - if (fp_read(self, PyBytes_AS_STRING(ret), len) == -1) { + if (self->fp_read(self, PyBytes_AS_STRING(ret), len) == -1) { Py_DECREF(ret); ret = NULL; } @@ -577,19 +624,19 @@ if (subtype < 24) { *length = subtype; } else if (subtype == 24) { - if (fp_read(self, value.u8.buf, sizeof(uint8_t)) == -1) + if (self->fp_read(self, value.u8.buf, sizeof(uint8_t)) == -1) return -1; *length = value.u8.value; } else if (subtype == 25) { - if (fp_read(self, value.u16.buf, sizeof(uint16_t)) == -1) + if (self->fp_read(self, value.u16.buf, sizeof(uint16_t)) == -1) return -1; *length = be16toh(value.u16.value); } else if (subtype == 26) { - if (fp_read(self, value.u32.buf, sizeof(uint32_t)) == -1) + if (self->fp_read(self, value.u32.buf, sizeof(uint32_t)) == -1) return -1; *length = be32toh(value.u32.value); } else { - if (fp_read(self, value.u64.buf, sizeof(uint64_t)) == -1) + if (self->fp_read(self, value.u64.buf, sizeof(uint64_t)) == -1) return -1; *length = be64toh(value.u64.value); } @@ -753,7 +800,7 @@ list = PyList_New(0); if (list) { while (1) { - if (fp_read(self, &lead.byte, 1) == -1) + if (self->fp_read(self, &lead.byte, 1) == -1) break; if (lead.major == 2 && lead.subtype != 31) { ret = decode_bytestring(self, lead.subtype); @@ -831,13 +878,22 @@ static PyObject * decode_definite_short_string(CBORDecoderObject *self, Py_ssize_t length) { - PyObject *bytes_obj = fp_read_object(self, length); - if (!bytes_obj) + char stack_buf[SMALL_STRING_STACK_THRESHOLD]; + char *buf = (length <= SMALL_STRING_STACK_THRESHOLD) ? stack_buf : PyMem_Malloc(length); + if (!buf) + return PyErr_NoMemory(); + + if (self->fp_read(self, buf, length) == -1) { + if (buf != stack_buf) + PyMem_Free(buf); return NULL; + } + + PyObject *ret = PyUnicode_DecodeUTF8(buf, length, self->str_errors); + + if (buf != stack_buf) + PyMem_Free(buf); - const char *bytes = PyBytes_AS_STRING(bytes_obj); - PyObject *ret = PyUnicode_FromStringAndSize(bytes, length); - Py_DECREF(bytes_obj); if (ret && string_namespace_add(self, ret, length) == -1) { Py_DECREF(ret); return NULL; @@ -895,18 +951,19 @@ } consumed = chunk_length; // workaround for https://github.com/python/cpython/issues/99612 - string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, NULL, &consumed); + string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, self->str_errors, &consumed); if (!string) goto error; if (ret) { // Concatenate the result to the existing result PyObject *joined = PyUnicode_Concat(ret, string); + Py_DECREF(string); + string = NULL; if (!joined) goto error; - Py_DECREF(string); - string = NULL; + Py_DECREF(ret); ret = joined; } else { // Set the result to the decoded string @@ -936,8 +993,34 @@ chunk = NULL; } - if (ret && string_namespace_add(self, ret, length) == -1) - goto error; + // Handle any remaining bytes in the buffer (incomplete UTF-8 sequences) + if (buffer_length > 0) { + string = PyUnicode_DecodeUTF8(buffer, buffer_length, self->str_errors); + if (!string) + goto error; + + if (ret) { + PyObject *joined = PyUnicode_Concat(ret, string); + Py_DECREF(string); + string = NULL; + if (!joined) + goto error; + + Py_DECREF(ret); + ret = joined; + } else { + ret = string; + string = NULL; + } + } + + if (buffer) + PyMem_Free(buffer); + + if (ret && string_namespace_add(self, ret, length) == -1) { + Py_DECREF(ret); + return NULL; + } return ret; error: @@ -960,7 +1043,7 @@ list = PyList_New(0); if (list) { while (1) { - if (fp_read(self, &lead.byte, 1) == -1) + if (self->fp_read(self, &lead.byte, 1) == -1) break; if (lead.major == 3 && lead.subtype != 31) { ret = decode_string(self, lead.subtype); @@ -2065,7 +2148,7 @@ PyObject *tag, *ret = NULL; uint8_t buf; - if (fp_read(self, (char*)&buf, sizeof(uint8_t)) == 0) { + if (self->fp_read(self, (char*)&buf, sizeof(uint8_t)) == 0) { tag = PyStructSequence_New(&CBORSimpleValueType); if (tag) { PyStructSequence_SET_ITEM(tag, 0, PyLong_FromLong(buf)); @@ -2091,7 +2174,7 @@ char buf[sizeof(uint16_t)]; } u; - if (fp_read(self, u.buf, sizeof(uint16_t)) == 0) + if (self->fp_read(self, u.buf, sizeof(uint16_t)) == 0) ret = PyFloat_FromDouble(unpack_float16(u.i)); set_shareable(self, ret); return ret; @@ -2109,7 +2192,7 @@ char buf[sizeof(float)]; } u; - if (fp_read(self, u.buf, sizeof(float)) == 0) { + if (self->fp_read(self, u.buf, sizeof(float)) == 0) { u.i = be32toh(u.i); ret = PyFloat_FromDouble(u.f); } @@ -2129,7 +2212,7 @@ char buf[sizeof(double)]; } u; - if (fp_read(self, u.buf, sizeof(double)) == 0) { + if (self->fp_read(self, u.buf, sizeof(double)) == 0) { u.i = be64toh(u.i); ret = PyFloat_FromDouble(u.f); } @@ -2155,10 +2238,18 @@ self->shared_index = -1; } + if (self->decode_depth == self->max_depth) { + PyErr_Format( + _CBOR2_CBORDecodeError, + "maximum container nesting depth (%u) exceeded", self->max_depth); + return NULL; + } + if (Py_EnterRecursiveCall(" in CBORDecoder.decode")) return NULL; - if (fp_read(self, &lead.byte, 1) == 0) { + self->decode_depth++; + if (self->fp_read(self, &lead.byte, 1) == 0) { switch (lead.major) { case 0: ret = decode_uint(self, lead.subtype); break; case 1: ret = decode_negint(self, lead.subtype); break; @@ -2173,6 +2264,8 @@ } Py_LeaveRecursiveCall(); + self->decode_depth--; + if (options & DECODE_IMMUTABLE) self->immutable = old_immutable; if (options & DECODE_UNSHARED) @@ -2197,10 +2290,7 @@ CBORDecoder_decode(CBORDecoderObject *self) { PyObject *ret; - self->decode_depth++; ret = decode(self, DECODE_NORMAL); - self->decode_depth--; - assert(self->decode_depth >= 0); if (self->decode_depth == 0) { clear_shareable_state(self); } @@ -2224,7 +2314,6 @@ if (!buf) return NULL; - self->decode_depth++; save_read = self->read; Py_INCREF(save_read); // Keep alive while we use a different read method save_read_pos = self->read_pos; @@ -2244,7 +2333,6 @@ } Py_DECREF(save_read); Py_DECREF(buf); - self->decode_depth--; return NULL; } @@ -2253,7 +2341,6 @@ Py_XDECREF(self->read); // Decrement BytesIO read method self->read = save_read; // Restore saved read (already has correct refcount) Py_DECREF(buf); - self->decode_depth--; if (is_nested) { PyMem_Free(self->readahead); @@ -2262,7 +2349,6 @@ self->read_pos = save_read_pos; self->read_len = save_read_len; - assert(self->decode_depth >= 0); if (self->decode_depth == 0) { clear_shareable_state(self); } @@ -2414,13 +2500,12 @@ " :class:`dict` object. The return value is substituted for the dict\n" " in the deserialized output.\n" ":param read_size:\n" -" the size of the read buffer (default 4096). The decoder reads from\n" -" the stream in chunks of this size for performance. This means the\n" -" stream position may advance beyond the bytes actually decoded. For\n" -" large values (bytestrings, text strings), reads may be larger than\n" -" ``read_size``. Code that needs to read from the stream after\n" -" decoding should use :meth:`decode_from_bytes` instead, or set\n" -" ``read_size=1`` to disable buffering (at a performance cost).\n" +" the minimum number of bytes to read at a time.\n" +" Setting this to a higher value like 4096 improves performance,\n" +" but is likely to read past the end of the CBOR value, advancing the stream\n" +" position beyond the decoded data. This only matters if you need to reuse the\n" +" stream after decoding.\n" +" Ignored in the pure Python implementation, but included for API compatibility.\n" "\n" ".. _CBOR: https://cbor.io/\n" ); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/source/decoder.h new/cbor2-5.9.0/source/decoder.h --- old/cbor2-5.8.0/source/decoder.h 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/source/decoder.h 2026-03-22 16:49:24.000000000 +0100 @@ -3,17 +3,26 @@ #include <stdbool.h> #include <stdint.h> -// Default readahead buffer size for streaming reads -#define CBOR2_DEFAULT_READ_SIZE 4096 +// Default readahead buffer size for streaming reads. +// Set to 1 for backwards compatibility (no buffering). +#define CBOR2_DEFAULT_READ_SIZE 1 +#define CBOR2_DEFAULT_MAX_DEPTH 400 -typedef struct { +// Forward declaration for function pointer typedef +struct CBORDecoderObject_; + +// Function pointer type for read dispatch (eliminates runtime check) +typedef int (*fp_read_fn)(struct CBORDecoderObject_ *, char *, Py_ssize_t); + +typedef struct CBORDecoderObject_ { PyObject_HEAD PyObject *read; // cached read() method of fp PyObject *tag_hook; PyObject *object_hook; PyObject *shareables; PyObject *stringref_namespace; - PyObject *str_errors; + const char *str_errors; // NULL for strict, "replace" for replace mode + Py_ssize_t max_depth; bool immutable; Py_ssize_t shared_index; Py_ssize_t decode_depth; @@ -23,6 +32,9 @@ Py_ssize_t readahead_size; // size of allocated buffer Py_ssize_t read_pos; // current position in buffer Py_ssize_t read_len; // valid bytes in buffer + + // Read dispatch - points to unbuffered or buffered implementation + fp_read_fn fp_read; } CBORDecoderObject; extern PyTypeObject CBORDecoderType; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/source/encoder.c new/cbor2-5.9.0/source/encoder.c --- old/cbor2-5.8.0/source/encoder.c 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/source/encoder.c 2026-03-22 16:49:24.000000000 +0100 @@ -1004,36 +1004,6 @@ static PyObject * -encode_datestr(CBOREncoderObject *self, PyObject *datestr) -{ - const char *buf; - Py_ssize_t length, match; - - match = PyUnicode_Tailmatch( - datestr, _CBOR2_str_utc_suffix, PyUnicode_GET_LENGTH(datestr) - 6, - PyUnicode_GET_LENGTH(datestr), 1); - if (match != -1) { - buf = PyUnicode_AsUTF8AndSize(datestr, &length); - if (buf) { - if (fp_write(self, "\xC0", 1) == 0) { - if (match) { - if (encode_length(self, 3, length - 5) == 0) - if (fp_write(self, buf, length - 6) == 0) - if (fp_write(self, "Z", 1) == 0) - Py_RETURN_NONE; - } else { - if (encode_length(self, 3, length) == 0) - if (fp_write(self, buf, length) == 0) - Py_RETURN_NONE; - } - } - } - } - return NULL; -} - - -static PyObject * encode_timestamp(CBOREncoderObject *self, PyObject *timestamp) { PyObject *ret = NULL; @@ -1095,8 +1065,16 @@ } else { tmp = PyObject_CallMethodObjArgs( value, _CBOR2_str_isoformat, NULL); - if (tmp) - ret = encode_datestr(self, tmp); + if (tmp) { + PyObject *replaced = PyUnicode_Replace( + tmp, _CBOR2_str_utc_suffix, _CBOR2_str_z, 1); + if (replaced) { + Py_DECREF(tmp); + tmp = replaced; + if (fp_write(self, "\xc0", 1) == 0) + ret = CBOREncoder_encode_string(self, tmp); + } + } } Py_XDECREF(tmp); Py_DECREF(value); @@ -1344,7 +1322,10 @@ if (tuple) { if (PyDict_SetItem(self->shared, id, tuple) == 0) { ret = encoder(self, value); - PyDict_DelItem(self->shared, id); + if (PyDict_DelItem(self->shared, id) == -1) { + Py_XDECREF(ret); + ret = NULL; + } } Py_DECREF(tuple); } @@ -1960,10 +1941,12 @@ // Don't generate string references when sorting keys self->string_referencing = false; - if (PyDict_Check(value)) + if (PyDict_Check(value)) { list = dict_to_canonical_list(self, value); - else + } + else { list = mapping_to_canonical_list(self, value); + } self->string_referencing = string_referencing_old; if (list) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/source/module.c new/cbor2-5.9.0/source/module.c --- old/cbor2-5.8.0/source/module.c 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/source/module.c 2026-03-22 16:49:24.000000000 +0100 @@ -648,6 +648,7 @@ PyObject *_CBOR2_str_utc_suffix = NULL; PyObject *_CBOR2_str_UUID = NULL; PyObject *_CBOR2_str_write = NULL; +PyObject *_CBOR2_str_z = NULL; PyObject *_CBOR2_CBORError = NULL; PyObject *_CBOR2_CBOREncodeError = NULL; @@ -992,6 +993,9 @@ if (!_CBOR2_str_utc_suffix && !(_CBOR2_str_utc_suffix = PyUnicode_InternFromString("+00:00"))) goto error; + if (!_CBOR2_str_z && + !(_CBOR2_str_z = PyUnicode_InternFromString("Z"))) + goto error; if (!_CBOR2_str_datetimestr_re && !(_CBOR2_str_datetimestr_re = PyUnicode_InternFromString( "^(\\d{4})-(\\d\\d)-(\\d\\d)T" // Y-m-d diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/source/module.h new/cbor2-5.9.0/source/module.h --- old/cbor2-5.8.0/source/module.h 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/source/module.h 2026-03-22 16:49:24.000000000 +0100 @@ -80,6 +80,7 @@ extern PyObject *_CBOR2_str_utc_suffix; extern PyObject *_CBOR2_str_UUID; extern PyObject *_CBOR2_str_write; +extern PyObject *_CBOR2_str_z; // Exception classes extern PyObject *_CBOR2_CBORError; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/tests/fuzzers/loads_fuzzer.py new/cbor2-5.9.0/tests/fuzzers/loads_fuzzer.py --- old/cbor2-5.8.0/tests/fuzzers/loads_fuzzer.py 1970-01-01 01:00:00.000000000 +0100 +++ new/cbor2-5.9.0/tests/fuzzers/loads_fuzzer.py 2026-03-22 16:49:24.000000000 +0100 @@ -0,0 +1,19 @@ +import sys + +import atheris + +# _cbor2 ensures the C library is imported +from _cbor2 import loads + + +def test_one_input(data: bytes): + try: + loads(data) + except Exception: + # We're searching for memory corruption, not Python exceptions + pass + + +if __name__ == "__main__": + atheris.Setup(sys.argv, test_one_input) + atheris.Fuzz() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/tests/test_decoder.py new/cbor2-5.9.0/tests/test_decoder.py --- old/cbor2-5.8.0/tests/test_decoder.py 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/tests/test_decoder.py 2026-03-22 16:49:24.000000000 +0100 @@ -123,6 +123,36 @@ assert impl.load(fp=stream) == 1 +def test_stream_position_after_decode(impl): + """Test that stream position is exactly at end of decoded CBOR value.""" + # CBOR: integer 1 (1 byte: 0x01) followed by extra data + cbor_data = b"\x01" + extra_data = b"extra" + with BytesIO(cbor_data + extra_data) as stream: + decoder = impl.CBORDecoder(stream) + result = decoder.decode() + assert result == 1 + # Stream position should be exactly at end of CBOR data + assert stream.tell() == len(cbor_data) + # Should be able to read the extra data + assert stream.read() == extra_data + + +class TestMaximumDepth: + def test_default(self, impl) -> None: + with pytest.raises( + impl.CBORDecodeError, + match="maximum container nesting depth \\(400\\) exceeded", + ): + impl.loads(b"\x81" * 401 + b"\x80") + + def test_explicit(self, impl) -> None: + with pytest.raises( + impl.CBORDecodeError, match=r"maximum container nesting depth \(9\) exceeded" + ): + impl.loads(b"\x81" * 10 + b"\x80", max_depth=9) + + @pytest.mark.parametrize( "payload, expected", [ @@ -1174,3 +1204,76 @@ assert result[0] == [1, 2, 3] assert result[1] == "after" assert result[2] == "final" + + +def test_str_errors_error_alias(impl): + """'error' is not a valid Python string error handler, normalize to 'strict'.""" + with BytesIO(b"\x65hello") as stream: + decoder = impl.CBORDecoder(stream, str_errors="error") + assert decoder.str_errors == "strict" + + +def test_str_errors_invalid_mode(impl): + payload = b"\x65hello" + with pytest.raises(ValueError, match="invalid str_errors value 'invalid'"): + impl.loads(payload, str_errors="invalid") + + [email protected]( + "mode, expected", + [ + ("strict", None), # Should raise exception + ("replace", "hello\ufffdworld"), # Should replace invalid byte with U+FFFD + ], + ids=["strict_mode", "replace_mode"], +) +def test_str_errors_handling(impl, mode, expected): + invalid_utf8 = b"\x6bhello\xffworld" # \xFF is invalid UTF-8 + + if expected is None: + with pytest.raises(impl.CBORDecodeValueError, match="error decoding unicode string"): + impl.loads(invalid_utf8, str_errors=mode) + else: + result = impl.loads(invalid_utf8, str_errors=mode) + assert result == expected + assert len(result) == 11 + assert result[5] == "\ufffd" + + [email protected]( + "payload, mode, expected", + [ + (b"\x66hello\xff", "replace", "hello\ufffd"), # <=256 bytes: stack path + ( + b"\x79\x01\x05" + b"a" * 260 + b"\xff", + "replace", + "a" * 260 + "\ufffd", + ), # >256: heap path + ], + ids=["short_string", "long_string"], +) +def test_str_errors_different_lengths(impl, payload, mode, expected): + """Tests both stack (<=256 bytes) and heap (>256 bytes) allocation paths.""" + result = impl.loads(payload, str_errors=mode) + assert result == expected + assert result[-1] == "\ufffd" + + +def test_str_errors_long_string_over_65536_bytes(impl): + """Issue #255: str_errors not respected for strings >65536 bytes.""" + # 65537 bytes: 65536 'a' + 1 invalid UTF-8 byte + payload = unhexlify("7a00010001" + "61" * 65536 + "c3") + result = impl.loads(payload, str_errors="replace") + assert len(result) == 65537 + assert result[-1] == "\ufffd" + + +def test_str_errors_long_string_invalid_middle(impl): + """Test str_errors with invalid UTF-8 in the middle of a long string.""" + # 65536 'a' + invalid byte + 65536 'b' = 131073 bytes + payload = unhexlify("7a00020001" + "61" * 65536 + "c3" + "62" * 65536) + result = impl.loads(payload, str_errors="replace") + assert len(result) == 131073 + assert result[65536] == "\ufffd" + assert result[:65536] == "a" * 65536 + assert result[65537:] == "b" * 65536 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.8.0/tests/test_encoder.py new/cbor2-5.9.0/tests/test_encoder.py --- old/cbor2-5.8.0/tests/test_encoder.py 2025-12-30 19:37:15.000000000 +0100 +++ new/cbor2-5.9.0/tests/test_encoder.py 2026-03-22 16:49:24.000000000 +0100 @@ -674,6 +674,12 @@ assert impl.dumps(value, string_referencing=True, canonical=True) == expected +def test_encode_stringrefs_datetime(impl): + value = [datetime(2026, 1, 19, tzinfo=timezone.utc), "abc", "abc"] + expected = unhexlify("D9010083C074323032362D30312D31395430303A30303A30305A63616263D81901") + assert impl.dumps(value, string_referencing=True) == expected + + @pytest.mark.parametrize("tag", [-1, 2**64, "f"], ids=["too small", "too large", "wrong type"]) def test_invalid_tag(impl, tag): with pytest.raises(TypeError):
