This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/main by this push:
     new 6560287  Ensure archive members limit can be disabled, and catch more 
widely
6560287 is described below

commit 6560287bb3881d2d44103e92b774b7a989ee4634
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Jan 28 15:50:56 2026 +0000

    Ensure archive members limit can be disabled, and catch more widely
---
 atr/tarzip.py                           |  23 +++--
 atr/tasks/checks/license.py             | 116 +++++++++++++----------
 atr/tasks/checks/targz.py               |   4 +
 tests/unit/test_archive_member_limit.py | 160 ++++++++++++++++++++++++++++++++
 4 files changed, 243 insertions(+), 60 deletions(-)

diff --git a/atr/tarzip.py b/atr/tarzip.py
index 7ad6a66..20e9919 100644
--- a/atr/tarzip.py
+++ b/atr/tarzip.py
@@ -111,24 +111,27 @@ class ArchiveContext[ArchiveT: (tarfile.TarFile, 
zipfile.ZipFile)]:
 
     def __iter__(self) -> Iterator[TarMember | ZipMember]:
         count = 0
+        enforce_limit = self._max_members > 0
         match self._archive_obj:
             case tarfile.TarFile() as tf:
                 for member_orig in tf:
                     if member_orig.isdev():
                         continue
-                    count += 1
-                    if count > self._max_members:
-                        raise ArchiveMemberLimitExceededError(
-                            f"Archive contains too many members: exceeded 
limit of {self._max_members}"
-                        )
+                    if enforce_limit:
+                        count += 1
+                        if count > self._max_members:
+                            raise ArchiveMemberLimitExceededError(
+                                f"Archive contains too many members: exceeded 
limit of {self._max_members}"
+                            )
                     yield TarMember(member_orig)
             case zipfile.ZipFile() as zf:
                 for member_orig in zf.infolist():
-                    count += 1
-                    if count > self._max_members:
-                        raise ArchiveMemberLimitExceededError(
-                            f"Archive contains too many members: exceeded 
limit of {self._max_members}"
-                        )
+                    if enforce_limit:
+                        count += 1
+                        if count > self._max_members:
+                            raise ArchiveMemberLimitExceededError(
+                                f"Archive contains too many members: exceeded 
limit of {self._max_members}"
+                            )
                     yield ZipMember(member_orig)
 
     def extractfile(self, member_wrapper: Member) -> IO[bytes] | None:
diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index 0e1a0c9..d9482f7 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -221,27 +221,35 @@ def _files_check_core_logic(artifact_path: str, 
is_podling: bool) -> Iterator[Re
     disclaimer_found = False
 
     # Check for license files in the root directory
-    with tarzip.open_archive(artifact_path) as archive:
-        for member in archive:
-            if member.name and member.name.split("/")[-1].startswith("._"):
-                # Metadata convention
-                continue
-
-            if member.name.count("/") > 1:
-                # Skip files in subdirectories
-                continue
-
-            filename = os.path.basename(member.name)
-            if filename == "LICENSE":
-                # TODO: Check length, should be 11,358 bytes
-                license_diff = _files_check_core_logic_license(archive, member)
-                license_results[filename] = license_diff
-            elif filename == "NOTICE":
-                # TODO: Check length doesn't exceed some preset
-                notice_ok, notice_issues, notice_preamble = 
_files_check_core_logic_notice(archive, member)
-                notice_results[filename] = (notice_ok, notice_issues, 
notice_preamble)
-            elif filename in {"DISCLAIMER", "DISCLAIMER-WIP"}:
-                disclaimer_found = True
+    try:
+        with tarzip.open_archive(artifact_path) as archive:
+            for member in archive:
+                if member.name and member.name.split("/")[-1].startswith("._"):
+                    # Metadata convention
+                    continue
+
+                if member.name.count("/") > 1:
+                    # Skip files in subdirectories
+                    continue
+
+                filename = os.path.basename(member.name)
+                if filename == "LICENSE":
+                    # TODO: Check length, should be 11,358 bytes
+                    license_diff = _files_check_core_logic_license(archive, 
member)
+                    license_results[filename] = license_diff
+                elif filename == "NOTICE":
+                    # TODO: Check length doesn't exceed some preset
+                    notice_ok, notice_issues, notice_preamble = 
_files_check_core_logic_notice(archive, member)
+                    notice_results[filename] = (notice_ok, notice_issues, 
notice_preamble)
+                elif filename in {"DISCLAIMER", "DISCLAIMER-WIP"}:
+                    disclaimer_found = True
+    except tarzip.ArchiveMemberLimitExceededError as e:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message=f"Archive has too many members: {e}",
+            data={"error": str(e)},
+        )
+        return
 
     yield from _license_results(license_results)
     yield from _notice_results(notice_results)
@@ -320,7 +328,7 @@ def _get_file_extension(filename: str) -> str | None:
     return ext[1:].lower()
 
 
-def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str], 
excludes_source: str) -> Iterator[Result]:
+def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str], 
excludes_source: str) -> Iterator[Result]:  # noqa: C901
     """Verify Apache License headers in source files within an archive."""
     # We could modify @Lucas-C/pre-commit-hooks instead for this
     # But hopefully this will be robust enough, at least for testing
@@ -341,34 +349,42 @@ def _headers_check_core_logic(artifact_path: str, 
ignore_lines: list[str], exclu
     # log.info(f"Ignore lines: {ignore_lines}")
 
     # Check files in the archive
-    with tarzip.open_archive(artifact_path) as archive:
-        for member in archive:
-            if member.name and member.name.split("/")[-1].startswith("._"):
-                # Metadata convention
-                continue
-
-            ignore_path = "/" + artifact_basename + "/" + 
member.name.lstrip("/")
-            matcher = util.create_path_matcher(ignore_lines, 
pathlib.Path(ignore_path), pathlib.Path("/"))
-            # log.info(f"Checking {ignore_path} with matcher {matcher}")
-            if matcher(ignore_path):
-                # log.info(f"Skipping {ignore_path} because it matches the 
ignore list")
-                continue
-
-            match _headers_check_core_logic_process_file(archive, member):
-                case ArtifactResult() | MemberResult() as result:
-                    artifact_data.files_checked += 1
-                    match result.status:
-                        case sql.CheckResultStatus.SUCCESS:
-                            artifact_data.files_with_valid_headers += 1
-                        case sql.CheckResultStatus.FAILURE:
-                            artifact_data.files_with_invalid_headers += 1
-                        case sql.CheckResultStatus.WARNING:
-                            artifact_data.files_with_invalid_headers += 1
-                        case sql.CheckResultStatus.EXCEPTION:
-                            artifact_data.files_with_invalid_headers += 1
-                    yield result
-                case MemberSkippedResult():
-                    artifact_data.files_skipped += 1
+    try:
+        with tarzip.open_archive(artifact_path) as archive:
+            for member in archive:
+                if member.name and member.name.split("/")[-1].startswith("._"):
+                    # Metadata convention
+                    continue
+
+                ignore_path = "/" + artifact_basename + "/" + 
member.name.lstrip("/")
+                matcher = util.create_path_matcher(ignore_lines, 
pathlib.Path(ignore_path), pathlib.Path("/"))
+                # log.info(f"Checking {ignore_path} with matcher {matcher}")
+                if matcher(ignore_path):
+                    # log.info(f"Skipping {ignore_path} because it matches the 
ignore list")
+                    continue
+
+                match _headers_check_core_logic_process_file(archive, member):
+                    case ArtifactResult() | MemberResult() as result:
+                        artifact_data.files_checked += 1
+                        match result.status:
+                            case sql.CheckResultStatus.SUCCESS:
+                                artifact_data.files_with_valid_headers += 1
+                            case sql.CheckResultStatus.FAILURE:
+                                artifact_data.files_with_invalid_headers += 1
+                            case sql.CheckResultStatus.WARNING:
+                                artifact_data.files_with_invalid_headers += 1
+                            case sql.CheckResultStatus.EXCEPTION:
+                                artifact_data.files_with_invalid_headers += 1
+                        yield result
+                    case MemberSkippedResult():
+                        artifact_data.files_skipped += 1
+    except tarzip.ArchiveMemberLimitExceededError as e:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message=f"Archive has too many members: {e}",
+            data={"error": str(e)},
+        )
+        return
 
     yield ArtifactResult(
         status=sql.CheckResultStatus.SUCCESS,
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 18ae3f4..fd90463 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -43,6 +43,8 @@ async def integrity(args: checks.FunctionArguments) -> 
results.Results | None:
     try:
         size = await asyncio.to_thread(archives.total_size, 
str(artifact_abs_path), chunk_size)
         await recorder.success("Able to read all entries of the archive using 
tarfile", {"size": size})
+    except tarzip.ArchiveMemberLimitExceededError as e:
+        await recorder.failure(f"Archive has too many members: {e}", {"error": 
str(e)})
     except Exception as e:
         await recorder.failure("Unable to read all entries of the archive 
using tarfile", {"error": str(e)})
     return None
@@ -99,6 +101,8 @@ async def structure(args: checks.FunctionArguments) -> 
results.Results | None:
                 f"Root directory '{root}' does not match expected name 
'{expected_root}'",
                 {"root": root, "expected": expected_root},
             )
+    except tarzip.ArchiveMemberLimitExceededError as e:
+        await recorder.failure(f"Archive has too many members: {e}", {"error": 
str(e)})
     except RootDirectoryError as e:
         await recorder.warning("Could not get the root directory of the 
archive", {"error": str(e)})
     except Exception as e:
diff --git a/tests/unit/test_archive_member_limit.py 
b/tests/unit/test_archive_member_limit.py
index e805d68..d085b1d 100644
--- a/tests/unit/test_archive_member_limit.py
+++ b/tests/unit/test_archive_member_limit.py
@@ -15,17 +15,67 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import datetime
 import io
+import pathlib
 import tarfile
 import zipfile
 
 import pytest
 
 import atr.archives as archives
+import atr.models.sql as sql
 import atr.tarzip as tarzip
+import atr.tasks.checks as checks
+import atr.tasks.checks.license as license_checks
+import atr.tasks.checks.targz as targz
 import atr.tasks.checks.zipformat as zipformat
 
 
+class _RecorderStub(checks.Recorder):
+    def __init__(self, path: pathlib.Path):
+        super().__init__(
+            checker="tests.unit.test_archive_member_limit",
+            project_name="test",
+            version_name="test",
+            revision_number="00001",
+            primary_rel_path=None,
+            member_rel_path=None,
+            afresh=False,
+        )
+        self._path = path
+        self.messages: list[tuple[str, str, dict | None]] = []
+
+    async def abs_path(self, rel_path: str | None = None) -> pathlib.Path | 
None:
+        return self._path if (rel_path is None) else self._path / rel_path
+
+    async def primary_path_is_binary(self) -> bool:
+        return False
+
+    async def _add(
+        self,
+        status: sql.CheckResultStatus,
+        message: str,
+        data: object,
+        primary_rel_path: str | None = None,
+        member_rel_path: str | None = None,
+    ) -> sql.CheckResult:
+        self.messages.append((status.value, message, data if isinstance(data, 
dict) else None))
+        return sql.CheckResult(
+            id=0,
+            release_name=self.release_name,
+            revision_number=self.revision_number,
+            checker=self.checker,
+            primary_rel_path=primary_rel_path,
+            member_rel_path=member_rel_path,
+            created=datetime.datetime.now(datetime.UTC),
+            status=status,
+            message=message,
+            data=data,
+            input_hash=None,
+        )
+
+
 def test_extract_wraps_member_limit(tmp_path, monkeypatch):
     archive_path = tmp_path / "sample.tar"
     _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -45,6 +95,62 @@ def test_extract_wraps_member_limit(tmp_path, monkeypatch):
     assert "too many members" in str(excinfo.value).lower()
 
 
+def test_license_files_reports_member_limit(tmp_path, monkeypatch):
+    archive_path = tmp_path / "sample.tar"
+    _make_tar(archive_path, ["LICENSE", "NOTICE", "README.txt"])
+
+    original_open = tarzip.open_archive
+
+    def limited_open(path: str, *args, **kwargs):
+        return original_open(path, max_members=2)
+
+    monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+    results = list(license_checks._files_check_core_logic(str(archive_path), 
is_podling=False))
+    assert any(
+        isinstance(result, license_checks.ArtifactResult) and ("too many 
members" in result.message.lower())
+        for result in results
+    )
+
+
+def test_license_headers_reports_member_limit(tmp_path, monkeypatch):
+    archive_path = tmp_path / "sample.tar"
+    _make_tar(archive_path, ["main.py", "README.txt", "extra.txt"])
+
+    original_open = tarzip.open_archive
+
+    def limited_open(path: str, *args, **kwargs):
+        return original_open(path, max_members=2)
+
+    monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+    results = list(license_checks._headers_check_core_logic(str(archive_path), 
[], "none"))
+    assert any(
+        isinstance(result, license_checks.ArtifactResult) and ("too many 
members" in result.message.lower())
+        for result in results
+    )
+
+
+def test_open_archive_disables_member_limit_for_negative(tmp_path):
+    archive_path = tmp_path / "sample.zip"
+    _make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
+
+    with tarzip.open_archive(str(archive_path), max_members=-1) as archive:
+        members = list(archive)
+
+    assert len(members) == 3
+
+
+def test_open_archive_disables_member_limit_for_zero(tmp_path):
+    archive_path = tmp_path / "sample.tar"
+    _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
+
+    with tarzip.open_archive(str(archive_path), max_members=0) as archive:
+        members = list(archive)
+
+    assert len(members) == 3
+
+
 def test_open_archive_enforces_member_limit_tar(tmp_path):
     archive_path = tmp_path / "sample.tar"
     _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -63,6 +169,45 @@ def test_open_archive_enforces_member_limit_zip(tmp_path):
             list(archive)
 
 
[email protected]
+async def test_targz_integrity_reports_member_limit(tmp_path, monkeypatch):
+    archive_path = tmp_path / "sample.tar"
+    _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
+    recorder = _RecorderStub(archive_path)
+
+    original_open = tarzip.open_archive
+
+    def limited_open(path: str, *args, **kwargs):
+        return original_open(path, max_members=2)
+
+    monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+    args = await _args_for(recorder)
+    await targz.integrity(args)
+
+    assert any("too many members" in message.lower() for _, message, _ in 
recorder.messages)
+
+
[email protected]
+async def test_targz_structure_reports_member_limit(tmp_path, monkeypatch):
+    archive_path = tmp_path / "sample.tar"
+    # Must include the root directory here
+    _make_tar(archive_path, ["sample/a.txt", "sample/b.txt", "sample/c.txt"])
+    recorder = _RecorderStub(archive_path)
+
+    original_open = tarzip.open_archive
+
+    def limited_open(path: str, *args, **kwargs):
+        return original_open(path, max_members=2)
+
+    monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+    args = await _args_for(recorder)
+    await targz.structure(args)
+
+    assert any("too many members" in message.lower() for _, message, _ in 
recorder.messages)
+
+
 def test_zipformat_integrity_reports_member_limit(tmp_path, monkeypatch):
     archive_path = tmp_path / "sample.zip"
     _make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -93,6 +238,21 @@ def test_zipformat_structure_reports_member_limit(tmp_path, 
monkeypatch):
     assert "too many members" in result.get("error", "").lower()
 
 
+async def _args_for(recorder: _RecorderStub) -> checks.FunctionArguments:
+    async def _recorder() -> checks.Recorder:
+        return recorder
+
+    return checks.FunctionArguments(
+        recorder=_recorder,
+        asf_uid="",
+        project_name="test",
+        version_name="test",
+        revision_number="00001",
+        primary_rel_path=None,
+        extra_args={},
+    )
+
+
 def _make_tar(path, members: list[str]) -> None:
     with tarfile.open(path, "w") as tf:
         for name in members:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to