Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-cbor2 for openSUSE:Factory checked in at 2026-01-03 17:27:58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-cbor2 (Old) and /work/SRC/openSUSE:Factory/.python-cbor2.new.1928 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-cbor2" Sat Jan 3 17:27:58 2026 rev:19 rq:1325153 version:5.8.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-cbor2/python-cbor2.changes 2025-10-30 17:11:41.411370358 +0100 +++ /work/SRC/openSUSE:Factory/.python-cbor2.new.1928/python-cbor2.changes 2026-01-03 17:28:30.738425008 +0100 @@ -1,0 +2,11 @@ +Fri Jan 2 10:14:38 UTC 2026 - Markéta Machová <[email protected]> + +- Update to 5.8.0 + * Added readahead buffering to C decoder for improved performance. + * Fixed Python decoder not preserving share index when decoding + array items containing nested shareable tags, causing shared + references to resolve to wrong objects. + * Reset shared reference state at the start of each top-level + encode/decode operation. + +------------------------------------------------------------------- Old: ---- cbor2-5.7.1.tar.gz New: ---- cbor2-5.8.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-cbor2.spec ++++++ --- /var/tmp/diff_new_pack.gOQeQT/_old 2026-01-03 17:28:32.706505527 +0100 +++ /var/tmp/diff_new_pack.gOQeQT/_new 2026-01-03 17:28:32.714505854 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-cbor2 # -# Copyright (c) 2025 SUSE LLC and contributors +# Copyright (c) 2026 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -23,7 +23,7 @@ %endif %{?sle15_python_module_pythons} Name: python-cbor2 -Version: 5.7.1 +Version: 5.8.0 Release: 0 Summary: Pure Python CBOR (de)serializer with extensive tag support License: MIT ++++++ cbor2-5.7.1.tar.gz -> cbor2-5.8.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/.github/dependabot.yml new/cbor2-5.8.0/.github/dependabot.yml --- old/cbor2-5.7.1/.github/dependabot.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/cbor2-5.8.0/.github/dependabot.yml 2025-12-30 19:37:15.000000000 +0100 @@ -0,0 +1,13 @@ +# Keep GitHub Actions up to date with GitHub's Dependabot... +# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + groups: + github-actions: + patterns: + - "*" # Group all Actions updates into a single larger pull request + schedule: + interval: quarterly diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/.github/workflows/publish.yml new/cbor2-5.8.0/.github/workflows/publish.yml --- old/cbor2-5.7.1/.github/workflows/publish.yml 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/.github/workflows/publish.yml 2025-12-30 19:37:15.000000000 +0100 @@ -17,21 +17,20 @@ - ubuntu-24.04-arm - windows-latest - windows-11-arm - - macos-13 - macos-latest runs-on: ${{ matrix.os }} environment: release steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build wheels - uses: pypa/[email protected] + uses: pypa/[email protected] env: CBOR2_BUILD_C_EXTENSION: "1" CIBW_SKIP: "*t-*" CIBW_ARCHS: "auto64" CIBW_TEST_GROUPS: "test" CIBW_TEST_COMMAND: "python -m pytest {project}/tests" - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }} path: wheelhouse/*.whl @@ -40,9 +39,9 @@ runs-on: ubuntu-latest environment: release steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.x - name: Install dependencies @@ -51,7 +50,7 @@ run: python -m build . env: CBOR2_BUILD_C_EXTENSION: "0" - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: sdist path: dist/* @@ -66,7 +65,7 @@ id-token: write steps: - name: Download generated packaging artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 - name: Move the packages to dist/ run: | mkdir dist @@ -83,7 +82,7 @@ permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - id: changelog uses: agronholm/release-notes@v1 with: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/.github/workflows/test.yml new/cbor2-5.8.0/.github/workflows/test.yml --- old/cbor2-5.7.1/.github/workflows/test.yml 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/.github/workflows/test.yml 2025-12-30 19:37:15.000000000 +0100 @@ -23,9 +23,9 @@ python-version: "3.13" runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} allow-prereleases: true diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/.pre-commit-config.yaml new/cbor2-5.8.0/.pre-commit-config.yaml --- old/cbor2-5.7.1/.pre-commit-config.yaml 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/.pre-commit-config.yaml 2025-12-30 19:37:15.000000000 +0100 @@ -5,7 +5,7 @@ # * Run "pre-commit install". repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: check-toml - id: check-yaml @@ -16,14 +16,14 @@ - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.7 + rev: v0.14.10 hooks: - id: ruff args: [--fix, --show-fixes] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 + rev: v1.19.1 hooks: - id: mypy additional_dependencies: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/PKG-INFO new/cbor2-5.8.0/PKG-INFO --- old/cbor2-5.7.1/PKG-INFO 2025-10-24 11:16:23.524546000 +0200 +++ new/cbor2-5.8.0/PKG-INFO 2025-12-30 19:37:23.094826700 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: cbor2 -Version: 5.7.1 +Version: 5.8.0 Summary: CBOR (de)serializer with extensive tag support Author-email: Alex Grönholm <[email protected]> License-Expression: MIT diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/SECURITY.md new/cbor2-5.8.0/SECURITY.md --- old/cbor2-5.7.1/SECURITY.md 1970-01-01 01:00:00.000000000 +0100 +++ new/cbor2-5.8.0/SECURITY.md 2025-12-30 19:37:15.000000000 +0100 @@ -0,0 +1,16 @@ +# Security Policy + +## Supported Versions + +The latest major/minor versions will get security updates and bug fixes. +Earlier versions are unsupported. + +| Version | Supported | +|----------| ------------------ | +| 5.7.x | :white_check_mark: | +| < 5.7.x | :x: | + +## Reporting a Vulnerability + +See the [security](https://github.com/agronholm/cbor2/security) +section to view existing security advisories or to report a new vulnerability. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/cbor2/_decoder.py new/cbor2-5.8.0/cbor2/_decoder.py --- old/cbor2-5.7.1/cbor2/_decoder.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/cbor2/_decoder.py 2025-12-30 19:37:15.000000000 +0100 @@ -4,7 +4,8 @@ import struct import sys from codecs import getincrementaldecoder -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Callable, Generator, Mapping, Sequence +from contextlib import contextmanager from datetime import date, datetime, timedelta, timezone from io import BytesIO from typing import IO, TYPE_CHECKING, Any, TypeVar, cast, overload @@ -59,6 +60,7 @@ "_immutable", "_str_errors", "_stringref_namespace", + "_decode_depth", ) _fp: IO[bytes] @@ -100,6 +102,7 @@ self._shareables: list[object] = [] self._stringref_namespace: list[str | bytes] | None = None self._immutable = False + self._decode_depth = 0 @property def immutable(self) -> bool: @@ -141,7 +144,7 @@ return self._object_hook @object_hook.setter - def object_hook(self, value: Callable[[CBORDecoder, Mapping[Any, Any]], Any] | None) -> None: + def object_hook(self, value: Callable[[CBORDecoder, dict[Any, Any]], Any] | None) -> None: if value is None or callable(value): self._object_hook = value else: @@ -225,13 +228,33 @@ if unshared: self._share_index = old_index + @contextmanager + def _decoding_context(self) -> Generator[None]: + """ + Context manager for tracking decode depth and clearing shared state. + + Shared state is cleared at the end of each top-level decode to prevent + shared references from leaking between independent decode operations. + Nested calls (from hooks) must preserve the state. + """ + self._decode_depth += 1 + try: + yield + finally: + self._decode_depth -= 1 + assert self._decode_depth >= 0 + if self._decode_depth == 0: + self._shareables.clear() + self._share_index = None + def decode(self) -> object: """ Decode the next value from the stream. :raises CBORDecodeError: if there is any problem decoding the stream """ - return self._decode() + with self._decoding_context(): + return self._decode() def decode_from_bytes(self, buf: bytes) -> object: """ @@ -242,12 +265,13 @@ object needs to be decoded separately from the rest but while still taking advantage of the shared value registry. """ - with BytesIO(buf) as fp: - old_fp = self.fp - self.fp = fp - retval = self._decode() - self.fp = old_fp - return retval + with self._decoding_context(): + with BytesIO(buf) as fp: + old_fp = self.fp + self.fp = fp + retval = self._decode() + self.fp = old_fp + return retval @overload def _decode_length(self, subtype: int) -> int: ... @@ -398,7 +422,7 @@ if not self._immutable: self.set_shareable(items) while True: - value = self._decode() + value = self._decode(unshared=True) if value is break_marker: break else: @@ -412,7 +436,7 @@ self.set_shareable(items) for index in range(length): - items.append(self._decode()) + items.append(self._decode(unshared=True)) if self._immutable: items_tuple = tuple(items) @@ -596,7 +620,7 @@ try: value = self._stringref_namespace[index] except IndexError: - raise CBORDecodeValueError("string reference %d not found" % index) + raise CBORDecodeValueError(f"string reference {index} not found") return value @@ -616,10 +640,10 @@ try: shared = self._shareables[value] except IndexError: - raise CBORDecodeValueError("shared reference %d not found" % value) + raise CBORDecodeValueError(f"shared reference {value} not found") if shared is None: - raise CBORDecodeValueError("shared value %d has not been initialized" % value) + raise CBORDecodeValueError(f"shared value {value} has not been initialized") else: return shared diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/cbor2/_encoder.py new/cbor2-5.8.0/cbor2/_encoder.py --- old/cbor2-5.7.1/cbor2/_encoder.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/cbor2/_encoder.py 2025-12-30 19:37:15.000000000 +0100 @@ -124,6 +124,7 @@ "string_namespacing", "_string_references", "indefinite_containers", + "_encode_depth", ) _fp: IO[bytes] @@ -188,6 +189,7 @@ int, tuple[object, int | None] ] = {} # indexes used for value sharing self._string_references: dict[str | bytes, int] = {} # indexes used for string references + self._encode_depth = 0 self._encoders = default_encoders.copy() if canonical: self._encoders.update(canonical_encoders) @@ -303,6 +305,24 @@ """ self._fp_write(data) + @contextmanager + def _encoding_context(self) -> Generator[None]: + """ + Context manager for tracking encode depth and clearing shared state. + + Shared state is cleared at the end of each top-level encode to prevent + shared references from leaking between independent encode operations. + Nested calls (from hooks) must preserve the state. + """ + self._encode_depth += 1 + try: + yield + finally: + self._encode_depth -= 1 + if self._encode_depth == 0: + self._shared_containers.clear() + self._string_references.clear() + def encode(self, obj: Any) -> None: """ Encode the given object using CBOR. @@ -310,6 +330,16 @@ :param obj: the object to encode """ + with self._encoding_context(): + self._encode_value(obj) + + def _encode_value(self, obj: Any) -> None: + """ + Internal fast path for encoding - used by built-in encoders. + + External code should use encode() instead, which properly manages + shared state between independent encode operations. + """ obj_type = obj.__class__ encoder = self._encoders.get(obj_type) or self._find_encoder(obj_type) or self._default if not encoder: @@ -459,7 +489,7 @@ def encode_array(self, value: Sequence[Any]) -> None: self.encode_length(4, len(value) if not self.indefinite_containers else None) for item in value: - self.encode(item) + self._encode_value(item) if self.indefinite_containers: self.encode_break() @@ -468,8 +498,8 @@ def encode_map(self, value: Mapping[Any, Any]) -> None: self.encode_length(5, len(value) if not self.indefinite_containers else None) for key, val in value.items(): - self.encode(key) - self.encode(val) + self._encode_value(key) + self._encode_value(val) if self.indefinite_containers: self.encode_break() @@ -494,10 +524,10 @@ # String referencing requires that the order encoded is # the same as the order emitted so string references are # generated after an order is determined - self.encode(realkey) + self._encode_value(realkey) else: self._fp_write(sortkey[1]) - self.encode(value) + self._encode_value(value) if self.indefinite_containers: self.encode_break() @@ -511,7 +541,7 @@ self._string_references = {} self.encode_length(6, value.tag) - self.encode(value.value) + self._encode_value(value.value) self.string_referencing = old_string_referencing self._string_references = old_string_references @@ -527,7 +557,7 @@ value = value.replace(tzinfo=self._timezone) else: raise CBOREncodeValueError( - f"naive datetime {value!r} encountered and no default timezone " "has been set" + f"naive datetime {value!r} encountered and no default timezone has been set" ) if self.datetime_as_timestamp: @@ -574,7 +604,7 @@ def encode_stringref(self, value: str | bytes) -> None: # Semantic tag 25 if not self._stringref(value): - self.encode(value) + self._encode_value(value) def encode_rational(self, value: Fraction) -> None: # Semantic tag 30 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/cbor2/tool.py new/cbor2-5.8.0/cbor2/tool.py --- old/cbor2-5.7.1/cbor2/tool.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/cbor2/tool.py 2025-12-30 19:37:15.000000000 +0100 @@ -114,11 +114,11 @@ k = str(k) if isinstance(v, dict): - v = key_to_str(v, dict_ids) + rval[k] = key_to_str(v, dict_ids) elif isinstance(v, (tuple, list, set)): - v = [key_to_str(x, dict_ids) for x in v] - - rval[k] = v + rval[k] = [key_to_str(x, dict_ids) for x in v] + else: + rval[k] = v return rval diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/cbor2.egg-info/PKG-INFO new/cbor2-5.8.0/cbor2.egg-info/PKG-INFO --- old/cbor2-5.7.1/cbor2.egg-info/PKG-INFO 2025-10-24 11:16:23.000000000 +0200 +++ new/cbor2-5.8.0/cbor2.egg-info/PKG-INFO 2025-12-30 19:37:23.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: cbor2 -Version: 5.7.1 +Version: 5.8.0 Summary: CBOR (de)serializer with extensive tag support Author-email: Alex Grönholm <[email protected]> License-Expression: MIT diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/cbor2.egg-info/SOURCES.txt new/cbor2-5.8.0/cbor2.egg-info/SOURCES.txt --- old/cbor2-5.7.1/cbor2.egg-info/SOURCES.txt 2025-10-24 11:16:23.000000000 +0200 +++ new/cbor2-5.8.0/cbor2.egg-info/SOURCES.txt 2025-12-30 19:37:23.000000000 +0100 @@ -3,8 +3,10 @@ .readthedocs.yml LICENSE.txt README.rst +SECURITY.md pyproject.toml setup.py +.github/dependabot.yml .github/pull_request_template.md .github/ISSUE_TEMPLATE/bug_report.yaml .github/ISSUE_TEMPLATE/config.yml diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/docs/versionhistory.rst new/cbor2-5.8.0/docs/versionhistory.rst --- old/cbor2-5.7.1/docs/versionhistory.rst 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/docs/versionhistory.rst 2025-12-30 19:37:15.000000000 +0100 @@ -3,7 +3,19 @@ .. currentmodule:: cbor2 -This library adheres to `Semantic Versioning <https://semver.org/>`_. +This library adheres to `Semantic Versioning 2.0 <http://semver.org/>`_. + +**5.8.0** (2025-12-30) + +- Added readahead buffering to C decoder for improved performance. + The decoder now uses a 4 KB buffer by default to reduce the number of read calls. + Benchmarks show 20-140% performance improvements for decoding operations. + (`#268+ <https://github.com/agronholm/cbor2/pull/268>`_; PR by @andreer) +- Fixed Python decoder not preserving share index when decoding array items containing + nested shareable tags, causing shared references to resolve to wrong objects + (`#267+ <https://github.com/agronholm/cbor2/pull/267>`_; PR by @andreer) +- Reset shared reference state at the start of each top-level encode/decode operation + (`#266+ <https://github.com/agronholm/cbor2/pull/266>`_; PR by @andreer) **5.7.1** (2025-10-24) @@ -16,11 +28,11 @@ - Added support for Python 3.14 (no free-threading support yet, sorry) - Dropped support for Python 3.8 - (#247 <https://github.com/agronholm/cbor2/pull/247>_; PR by @hugovk) + (`#247+ <https://github.com/agronholm/cbor2/pull/247>`_; PR by @hugovk) - Added support for encoding indefinite containers - (#256 <https://github.com/agronholm/cbor2/pull/256>_; PR by @CZDanol) + (`#256+ <https://github.com/agronholm/cbor2/pull/256>`_; PR by @CZDanol) - Added complex number support (tag 43000) - (#249 <https://github.com/agronholm/cbor2/pull/249>_; PR by @chillenb) + (`#249+ <https://github.com/agronholm/cbor2/pull/249>`_; PR by @chillenb) **5.6.5** (2024-10-09) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/scripts/ref_leak_test.py new/cbor2-5.8.0/scripts/ref_leak_test.py --- old/cbor2-5.7.1/scripts/ref_leak_test.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/scripts/ref_leak_test.py 2025-12-30 19:37:15.000000000 +0100 @@ -145,20 +145,15 @@ def format_leaks(result): if result.objgraph.comparison: - return "%d objs (/%d)" % ( - sum(leak[-1] for leak in result.objgraph.comparison), - result.objgraph.count, - ) + num_objs = sum(leak[-1] for leak in result.objgraph.comparison) + return f"{num_objs} objs (/{result.objgraph.count})" elif result.malloc.comparison and ( result.malloc.count < result.malloc.comparison[0].size_diff ): # Running the loop always results in *some* memory allocation, but as # long as the bytes allocated are less than the number of loops it's # unlikely to be an actual leak - return "%d bytes (/%d)" % ( - result.malloc.comparison[0].size_diff, - result.malloc.count, - ) + return f"{result.malloc.comparison[0].size_diff} bytes (/{result.malloc.count})" else: return "-" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/source/decoder.c new/cbor2-5.8.0/source/decoder.c --- old/cbor2-5.7.1/source/decoder.c 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/source/decoder.c 2025-12-30 19:37:15.000000000 +0100 @@ -42,6 +42,7 @@ typedef uint8_t DecodeOptions; static int _CBORDecoder_set_fp(CBORDecoderObject *, PyObject *, void *); +static int _CBORDecoder_set_fp_with_read_size(CBORDecoderObject *, PyObject *, Py_ssize_t); static int _CBORDecoder_set_tag_hook(CBORDecoderObject *, PyObject *, void *); static int _CBORDecoder_set_object_hook(CBORDecoderObject *, PyObject *, void *); static int _CBORDecoder_set_str_errors(CBORDecoderObject *, PyObject *, void *); @@ -102,6 +103,13 @@ Py_CLEAR(self->shareables); Py_CLEAR(self->stringref_namespace); Py_CLEAR(self->str_errors); + if (self->readahead) { + PyMem_Free(self->readahead); + self->readahead = NULL; + self->readahead_size = 0; + } + self->read_pos = 0; + self->read_len = 0; return 0; } @@ -143,6 +151,11 @@ self->str_errors = PyBytes_FromString("strict"); self->immutable = false; self->shared_index = -1; + self->decode_depth = 0; + self->readahead = NULL; + self->readahead_size = 0; + self->read_pos = 0; + self->read_len = 0; } return (PyObject *) self; error: @@ -152,21 +165,27 @@ // CBORDecoder.__init__(self, fp=None, tag_hook=None, object_hook=None, -// str_errors='strict') +// str_errors='strict', read_size=4096) int CBORDecoder_init(CBORDecoderObject *self, PyObject *args, PyObject *kwargs) { static char *keywords[] = { - "fp", "tag_hook", "object_hook", "str_errors", NULL + "fp", "tag_hook", "object_hook", "str_errors", "read_size", NULL }; PyObject *fp = NULL, *tag_hook = NULL, *object_hook = NULL, *str_errors = NULL; + Py_ssize_t read_size = CBOR2_DEFAULT_READ_SIZE; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOO", keywords, - &fp, &tag_hook, &object_hook, &str_errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOn", keywords, + &fp, &tag_hook, &object_hook, &str_errors, &read_size)) return -1; - if (_CBORDecoder_set_fp(self, fp, NULL) == -1) + if (read_size < 1) { + PyErr_SetString(PyExc_ValueError, "read_size must be at least 1"); + return -1; + } + + if (_CBORDecoder_set_fp_with_read_size(self, fp, read_size) == -1) return -1; if (tag_hook && _CBORDecoder_set_tag_hook(self, tag_hook, NULL) == -1) return -1; @@ -197,11 +216,12 @@ } -// CBORDecoder._set_fp(self, value) +// Internal: set fp with configurable read size static int -_CBORDecoder_set_fp(CBORDecoderObject *self, PyObject *value, void *closure) +_CBORDecoder_set_fp_with_read_size(CBORDecoderObject *self, PyObject *value, Py_ssize_t read_size) { PyObject *tmp, *read; + char *new_buffer = NULL; if (!value) { PyErr_SetString(PyExc_AttributeError, "cannot delete fp attribute"); @@ -214,13 +234,43 @@ return -1; } + if (self->readahead == NULL || self->readahead_size != read_size) { + new_buffer = (char *)PyMem_Malloc(read_size); + if (!new_buffer) { + Py_DECREF(read); + PyErr_NoMemory(); + return -1; + } + } + // See notes in encoder.c / _CBOREncoder_set_fp tmp = self->read; self->read = read; Py_DECREF(tmp); + + self->read_pos = 0; + self->read_len = 0; + + // Replace buffer (size changed or was NULL) + if (new_buffer) { + PyMem_Free(self->readahead); + self->readahead = new_buffer; + self->readahead_size = read_size; + } + return 0; } +// CBORDecoder._set_fp(self, value) - property setter uses default read size +static int +_CBORDecoder_set_fp(CBORDecoderObject *self, PyObject *value, void *closure) +{ + // Use existing readahead_size if already allocated, otherwise use default + Py_ssize_t read_size = (self->readahead_size > 0) ? + self->readahead_size : CBOR2_DEFAULT_READ_SIZE; + return _CBORDecoder_set_fp_with_read_size(self, value, read_size); +} + // CBORDecoder._get_tag_hook(self) static PyObject * @@ -376,45 +426,93 @@ } } -static PyObject * -fp_read_object(CBORDecoderObject *self, const Py_ssize_t size) +// Read directly into caller's buffer (bypassing readahead buffer) +static Py_ssize_t +fp_read_bytes(CBORDecoderObject *self, char *buf, Py_ssize_t size) { - PyObject *ret = NULL; - PyObject *obj, *size_obj; - size_obj = PyLong_FromSsize_t(size); - if (size_obj) { - obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL); - Py_DECREF(size_obj); - if (obj) { - assert(PyBytes_CheckExact(obj)); - if (PyBytes_GET_SIZE(obj) == (Py_ssize_t) size) { - ret = obj; + PyObject *size_obj = PyLong_FromSsize_t(size); + if (!size_obj) + return -1; + + PyObject *obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL); + Py_DECREF(size_obj); + if (!obj) + return -1; + + assert(PyBytes_CheckExact(obj)); + Py_ssize_t bytes_read = PyBytes_GET_SIZE(obj); + if (bytes_read > 0) + memcpy(buf, PyBytes_AS_STRING(obj), bytes_read); + + Py_DECREF(obj); + return bytes_read; +} + +// Read into caller's buffer using the readahead buffer +static int +fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size) +{ + Py_ssize_t available, to_copy, remaining, total_copied; + + remaining = size; + total_copied = 0; + + while (remaining > 0) { + available = self->read_len - self->read_pos; + + if (available > 0) { + // Copy from buffer + to_copy = (available < remaining) ? available : remaining; + memcpy(buf + total_copied, self->readahead + self->read_pos, to_copy); + self->read_pos += to_copy; + total_copied += to_copy; + remaining -= to_copy; + } else { + Py_ssize_t bytes_read; + + if (remaining >= self->readahead_size) { + // Large remaining: read directly into destination, bypass buffer + bytes_read = fp_read_bytes(self, buf + total_copied, remaining); + if (bytes_read > 0) { + total_copied += bytes_read; + remaining -= bytes_read; + } } else { - PyErr_Format( - _CBOR2_CBORDecodeEOF, - "premature end of stream (expected to read %zd bytes, " - "got %zd instead)", size, PyBytes_GET_SIZE(obj)); - Py_DECREF(obj); + // Small remaining: refill buffer + self->read_pos = 0; + self->read_len = 0; + bytes_read = fp_read_bytes(self, self->readahead, self->readahead_size); + if (bytes_read > 0) + self->read_len = bytes_read; + } + + if (bytes_read <= 0) { + if (bytes_read == 0) + PyErr_Format( + _CBOR2_CBORDecodeEOF, + "premature end of stream (expected to read %zd bytes, " + "got %zd instead)", size, total_copied); + return -1; } } } - return ret; -} + return 0; +} -static int -fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size) +// Read and return as PyBytes object +static PyObject * +fp_read_object(CBORDecoderObject *self, const Py_ssize_t size) { - int ret = -1; - PyObject *obj = fp_read_object(self, size); - if (obj) { - char *data = PyBytes_AS_STRING(obj); - if (data) { - memcpy(buf, data, size); - ret = 0; - } - Py_DECREF(obj); + PyObject *ret = PyBytes_FromStringAndSize(NULL, size); + if (!ret) + return NULL; + + if (fp_read(self, PyBytes_AS_STRING(ret), size) == -1) { + Py_DECREF(ret); + return NULL; } + return ret; } @@ -2083,11 +2181,30 @@ } +// Reset shared state at the end of each top-level decode to prevent +// shared references from leaking between independent decode operations. +// Nested calls (from hooks) must preserve the state. +static inline void +clear_shareable_state(CBORDecoderObject *self) +{ + PyList_SetSlice(self->shareables, 0, PY_SSIZE_T_MAX, NULL); + self->shared_index = -1; +} + + // CBORDecoder.decode(self) -> obj PyObject * CBORDecoder_decode(CBORDecoderObject *self) { - return decode(self, DECODE_NORMAL); + PyObject *ret; + self->decode_depth++; + ret = decode(self, DECODE_NORMAL); + self->decode_depth--; + assert(self->decode_depth >= 0); + if (self->decode_depth == 0) { + clear_shareable_state(self); + } + return ret; } @@ -2096,21 +2213,59 @@ CBORDecoder_decode_from_bytes(CBORDecoderObject *self, PyObject *data) { PyObject *save_read, *buf, *ret = NULL; + bool is_nested = (self->decode_depth > 0); + Py_ssize_t save_read_pos = 0, save_read_len = 0; + char *save_buffer = NULL; if (!_CBOR2_BytesIO && _CBOR2_init_BytesIO() == -1) return NULL; - save_read = self->read; buf = PyObject_CallFunctionObjArgs(_CBOR2_BytesIO, data, NULL); - if (buf) { - self->read = PyObject_GetAttr(buf, _CBOR2_str_read); - if (self->read) { - ret = decode(self, DECODE_NORMAL); - Py_DECREF(self->read); + if (!buf) + return NULL; + + self->decode_depth++; + save_read = self->read; + Py_INCREF(save_read); // Keep alive while we use a different read method + save_read_pos = self->read_pos; + save_read_len = self->read_len; + + // Save buffer pointer if nested + if (is_nested) { + save_buffer = self->readahead; + self->readahead = NULL; // Prevent setter from freeing saved buffer + } + + // Set up BytesIO decoder - setter handles buffer allocation + if (_CBORDecoder_set_fp_with_read_size(self, buf, self->readahead_size) == -1) { + if (is_nested) { + PyMem_Free(self->readahead); + self->readahead = save_buffer; } + Py_DECREF(save_read); Py_DECREF(buf); + self->decode_depth--; + return NULL; + } + + ret = decode(self, DECODE_NORMAL); + + Py_XDECREF(self->read); // Decrement BytesIO read method + self->read = save_read; // Restore saved read (already has correct refcount) + Py_DECREF(buf); + self->decode_depth--; + + if (is_nested) { + PyMem_Free(self->readahead); + self->readahead = save_buffer; + } + self->read_pos = save_read_pos; + self->read_len = save_read_len; + + assert(self->decode_depth >= 0); + if (self->decode_depth == 0) { + clear_shareable_state(self); } - self->read = save_read; return ret; } @@ -2258,6 +2413,14 @@ " dictionary. This callback is invoked for each deserialized\n" " :class:`dict` object. The return value is substituted for the dict\n" " in the deserialized output.\n" +":param read_size:\n" +" the size of the read buffer (default 4096). The decoder reads from\n" +" the stream in chunks of this size for performance. This means the\n" +" stream position may advance beyond the bytes actually decoded. For\n" +" large values (bytestrings, text strings), reads may be larger than\n" +" ``read_size``. Code that needs to read from the stream after\n" +" decoding should use :meth:`decode_from_bytes` instead, or set\n" +" ``read_size=1`` to disable buffering (at a performance cost).\n" "\n" ".. _CBOR: https://cbor.io/\n" ); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/source/decoder.h new/cbor2-5.8.0/source/decoder.h --- old/cbor2-5.7.1/source/decoder.h 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/source/decoder.h 2025-12-30 19:37:15.000000000 +0100 @@ -3,6 +3,9 @@ #include <stdbool.h> #include <stdint.h> +// Default readahead buffer size for streaming reads +#define CBOR2_DEFAULT_READ_SIZE 4096 + typedef struct { PyObject_HEAD PyObject *read; // cached read() method of fp @@ -13,6 +16,13 @@ PyObject *str_errors; bool immutable; Py_ssize_t shared_index; + Py_ssize_t decode_depth; + + // Readahead buffer for streaming + char *readahead; // allocated buffer + Py_ssize_t readahead_size; // size of allocated buffer + Py_ssize_t read_pos; // current position in buffer + Py_ssize_t read_len; // valid bytes in buffer } CBORDecoderObject; extern PyTypeObject CBORDecoderType; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/source/encoder.c new/cbor2-5.8.0/source/encoder.c --- old/cbor2-5.7.1/source/encoder.c 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/source/encoder.c 2025-12-30 19:37:15.000000000 +0100 @@ -114,6 +114,7 @@ self->string_referencing = false; self->string_namespacing = false; self->indefinite_containers = false; + self->encode_depth = 0; } return (PyObject *) self; } @@ -2132,17 +2133,35 @@ } +// Reset shared state at the end of each top-level encode to prevent +// shared references from leaking between independent encode operations. +// Nested calls (from hooks or recursive encoding) must preserve the state. +static inline void +clear_shared_state(CBOREncoderObject *self) +{ + PyDict_Clear(self->shared); + PyDict_Clear(self->string_references); +} + + // CBOREncoder.encode(self, value) PyObject * CBOREncoder_encode(CBOREncoderObject *self, PyObject *value) { PyObject *ret; - // TODO reset shared dict? - if (Py_EnterRecursiveCall(" in CBOREncoder.encode")) + self->encode_depth++; + if (Py_EnterRecursiveCall(" in CBOREncoder.encode")) { + self->encode_depth--; return NULL; + } ret = encode(self, value); Py_LeaveRecursiveCall(); + self->encode_depth--; + assert(self->encode_depth >= 0); + if (self->encode_depth == 0) { + clear_shared_state(self); + } return ret; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/source/encoder.h new/cbor2-5.8.0/source/encoder.h --- old/cbor2-5.7.1/source/encoder.h 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/source/encoder.h 2025-12-30 19:37:15.000000000 +0100 @@ -25,6 +25,7 @@ bool string_referencing; bool string_namespacing; bool indefinite_containers; + Py_ssize_t encode_depth; } CBOREncoderObject; extern PyTypeObject CBOREncoderType; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/tests/test_decoder.py new/cbor2-5.8.0/tests/test_decoder.py --- old/cbor2-5.7.1/tests/test_decoder.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/tests/test_decoder.py 2025-12-30 19:37:15.000000000 +0100 @@ -732,15 +732,13 @@ def test_bad_ipnetwork(impl): with pytest.raises(impl.CBORDecodeError) as exc: impl.loads(unhexlify("d90105a244c0a80064181844c0a800001818")) - assert str(exc.value).endswith( - "invalid ipnetwork value %r" % {b"\xc0\xa8\x00d": 24, b"\xc0\xa8\x00\x00": 24} - ) + invalid_value = {b"\xc0\xa8\x00d": 24, b"\xc0\xa8\x00\x00": 24} + assert str(exc.value).endswith(f"invalid ipnetwork value {invalid_value!r}") assert isinstance(exc, ValueError) with pytest.raises(impl.CBORDecodeError) as exc: impl.loads(unhexlify("d90105a144c0a80064420102")) - assert str(exc.value).endswith( - "invalid ipnetwork value %r" % {b"\xc0\xa8\x00d": b"\x01\x02"} - ) + invalid_value = {b"\xc0\xa8\x00d": b"\x01\x02"} + assert str(exc.value).endswith(f"invalid ipnetwork value {invalid_value}") assert isinstance(exc, ValueError) @@ -780,6 +778,12 @@ assert decoded == {0: decoded} +def test_nested_shareable_in_array(impl): + decoded = impl.loads(unhexlify("82d81c82d81c61616162d81d00")) + assert decoded == [["a", "b"], ["a", "b"]] + assert decoded[0] is decoded[1] + + def test_string_ref(impl): decoded = impl.loads(unhexlify("d9010085656669727374d81900667365636f6e64d81900d81901")) assert isinstance(decoded, list) @@ -1022,3 +1026,151 @@ dummy_path.write_bytes(payload) with dummy_path.open("rb") as f: impl.load(f) + + +class TestDecoderReuse: + """ + Tests for correct behavior when reusing CBORDecoder instances. + """ + + def test_decoder_reuse_resets_shared_refs(self, impl): + """ + Shared references should be scoped to a single decode operation, + not persist across multiple decodes on the same decoder instance. + """ + # Message with shareable tag (28) + msg1 = impl.dumps(impl.CBORTag(28, "first_value")) + + # Message with sharedref tag (29) referencing index 0 + msg2 = impl.dumps(impl.CBORTag(29, 0)) + + # Reuse decoder across messages + decoder = impl.CBORDecoder(BytesIO(msg1)) + result1 = decoder.decode() + assert result1 == "first_value" + + # Second decode should fail - sharedref(0) doesn't exist in this context + decoder.fp = BytesIO(msg2) + with pytest.raises(impl.CBORDecodeValueError, match="shared reference"): + decoder.decode() + + def test_decode_from_bytes_resets_shared_refs(self, impl): + """ + decode_from_bytes should also reset shared references between calls. + """ + msg1 = impl.dumps(impl.CBORTag(28, "value")) + msg2 = impl.dumps(impl.CBORTag(29, 0)) + + decoder = impl.CBORDecoder(BytesIO(b"")) + decoder.decode_from_bytes(msg1) + + with pytest.raises(impl.CBORDecodeValueError, match="shared reference"): + decoder.decode_from_bytes(msg2) + + def test_shared_refs_within_single_decode(self, impl): + """ + Shared references must work correctly within a single decode operation. + + Note: This tests non-cyclic sibling references [shareable(x), sharedref(0)], + which is a different pattern from test_cyclic_array/test_cyclic_map that + test self-referencing structures like shareable([sharedref(0)]). + """ + # [shareable("hello"), sharedref(0)] -> ["hello", "hello"] + data = unhexlify( + "82" # array(2) + "d81c" # tag(28) shareable + "65" # text(5) + "68656c6c6f" # "hello" + "d81d" # tag(29) sharedref + "00" # unsigned(0) + ) + + result = impl.loads(data) + assert result == ["hello", "hello"] + assert result[0] is result[1] # Same object reference + + +def test_decode_from_bytes_in_hook_preserves_buffer(impl): + """Test that calling decode_from_bytes from a hook preserves stream buffer state. + + This is a documented use case from docs/customizing.rst where hooks decode + embedded CBOR data. Before the fix, the stream's readahead buffer would be + corrupted, causing subsequent reads to fail or return wrong data. + """ + + def tag_hook(decoder, tag): + if tag.tag == 999: + # Decode embedded CBOR (documented pattern) + return decoder.decode_from_bytes(tag.value) + return tag + + # Test data: array with [tag(999, embedded_cbor), "after_hook", "final"] + # embedded_cbor encodes: [1, 2, 3] + data = unhexlify( + "83" # array(3) + "d903e7" # tag(999) + "44" # bytes(4) + "83010203" # embedded: array [1, 2, 3] + "6a" # text(10) + "61667465725f686f6f6b" # "after_hook" + "65" # text(5) + "66696e616c" # "final" + ) + + # Decode from stream (not bytes) to use readahead buffer + stream = BytesIO(data) + decoder = impl.CBORDecoder(stream, tag_hook=tag_hook) + result = decoder.decode() + + # Verify all values decoded correctly + assert result == [[1, 2, 3], "after_hook", "final"] + + # First element should be the decoded embedded CBOR + assert result[0] == [1, 2, 3] + # Second element should be "after_hook" (not corrupted) + assert result[1] == "after_hook" + # Third element should be "final" + assert result[2] == "final" + + +def test_decode_from_bytes_deeply_nested_in_hook(impl): + """Test deeply nested decode_from_bytes calls preserve buffer state. + + This tests tag(999, tag(888, tag(777, [1,2,3]))) where each tag value + is embedded CBOR that triggers the hook recursively. + + Before the fix, even a single level would corrupt the buffer. With multiple + levels, the buffer would be completely corrupted, mixing data from different + BytesIO objects and the original stream. + """ + + def tag_hook(decoder, tag): + if tag.tag in [999, 888, 777]: + # Recursively decode embedded CBOR + return decoder.decode_from_bytes(tag.value) + return tag + + # Test data: [tag(999, tag(888, tag(777, [1,2,3]))), "after", "final"] + # Each tag contains embedded CBOR + data = unhexlify( + "83" # array(3) + "d903e7" # tag(999) + "4c" # bytes(12) + "d9037848d903094483010203" # embedded: tag(888, tag(777, [1,2,3])) + "65" # text(5) + "6166746572" # "after" + "65" # text(5) + "66696e616c" # "final" + ) + + # Decode from stream to use readahead buffer + stream = BytesIO(data) + decoder = impl.CBORDecoder(stream, tag_hook=tag_hook) + result = decoder.decode() + + # With the fix: all three levels of nesting work correctly + # Without the fix: buffer corruption at each level, test fails + assert result == [[1, 2, 3], "after", "final"] + assert result[0] == [1, 2, 3] + assert result[1] == "after" + assert result[2] == "final" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/cbor2-5.7.1/tests/test_encoder.py new/cbor2-5.8.0/tests/test_encoder.py --- old/cbor2-5.7.1/tests/test_encoder.py 2025-10-24 11:16:14.000000000 +0200 +++ new/cbor2-5.8.0/tests/test_encoder.py 2025-12-30 19:37:15.000000000 +0100 @@ -669,16 +669,7 @@ def test_encode_stringrefs_dict(impl): value = {"aaaa": "mmmm", "bbbb": "bbbb", "cccc": "aaaa", "mmmm": "aaaa"} expected = unhexlify( - "d90100" - "a4" - "6461616161" - "646d6d6d6d" - "6462626262" - "d81902" - "6463636363" - "d81900" - "d81901" - "d81900" + "d90100a46461616161646d6d6d6d6462626262d819026463636363d81900d81901d81900" ) assert impl.dumps(value, string_referencing=True, canonical=True) == expected @@ -717,3 +708,73 @@ expected = b"\xbf\xff" assert impl.dumps({}, indefinite_containers=True) == expected assert impl.dumps({}, indefinite_containers=True, canonical=True) == expected + + +class TestEncoderReuse: + """ + Tests for correct behavior when reusing CBOREncoder instances. + """ + + def test_encoder_reuse_resets_shared_containers(self, impl): + """ + Shared container tracking should be scoped to a single encode operation, + not persist across multiple encodes on the same encoder instance. + """ + fp = BytesIO() + encoder = impl.CBOREncoder(fp, value_sharing=True) + shared_obj = ["hello"] + + # First encode: object is tracked in shared containers + encoder.encode([shared_obj, shared_obj]) + + # Second encode on new fp: should produce valid standalone CBOR + # (not a sharedref pointing to stale first-encode data) + encoder.fp = BytesIO() + encoder.encode(shared_obj) + second_output = encoder.fp.getvalue() + + # The second output must be decodable on its own + result = impl.loads(second_output) + assert result == ["hello"] + + def test_encode_to_bytes_resets_shared_containers(self, impl): + """ + encode_to_bytes should also reset shared container tracking between calls. + """ + fp = BytesIO() + encoder = impl.CBOREncoder(fp, value_sharing=True) + shared_obj = ["hello"] + + # First encode + encoder.encode_to_bytes([shared_obj, shared_obj]) + + # Second encode should produce valid standalone CBOR + result_bytes = encoder.encode_to_bytes(shared_obj) + result = impl.loads(result_bytes) + assert result == ["hello"] + + def test_encoder_hook_does_not_reset_state(self, impl): + """ + When a custom encoder hook calls encode(), the shared container + tracking should be preserved (not reset mid-operation). + """ + + class Custom: + def __init__(self, value): + self.value = value + + def custom_encoder(encoder, obj): + # Hook encodes the wrapped value + encoder.encode(obj.value) + + # Encode a Custom wrapping a list + data = impl.dumps(Custom(["a", "b"]), default=custom_encoder) + + # Verify the output decodes correctly + result = impl.loads(data) + assert result == ["a", "b"] + + # Test nested Custom objects - hook should work recursively + data2 = impl.dumps(Custom(Custom(["x"])), default=custom_encoder) + result2 = impl.loads(data2) + assert result2 == ["x"]
